#!/usr/bin/perl use strict; use Digest::SHA qw(sha256_hex); use LWP::UserAgent; use DBI qw(:sql_types); use Getopt::Long; use pQuery; sub init { my $driver = "SQLite"; my $database = "history.db"; my $dsn = "DBI:$driver:dbname=$database"; my $dbh = DBI->connect($dsn, { RaiseError => 1 }) or die $DBI::errstr; print "Opened database successfully\n"; return $dbh; } sub add_History($$$){ my($url, $signature, $dbh) = @_; my $stmt = qq(INSERT INTO visit (url,signature) VALUES ("$url", "$signature")); my $rv = $dbh->do($stmt) or die $DBI::errstr; } sub get_html($) { my $ua = new LWP::UserAgent; $ua->timeout(120); my $url = @_ ; my $request = new HTTP::Request('GET', $url); my $response = $ua->request($request); my $content = $response->content(); return $content; } sub visit($$){ my ($url, $dbh)= @_; my $content = get_html "$url" ; return sha256_hex( $content ); } sub isNew($$$){ my($url, $signature, $dbh) = @_; my $sth = $dbh->prepare("SELECT COUNT(signature) FROM visit WHERE signature=?1 AND url=?2"); $sth->execute($signature, $url); my $refs = $sth->fetchall_arrayref()->[0][0]; return ($refs)? 0 : 1 ; } sub registerIfNew($$){ my($url, $dbh) = @_; my $signature = visit($url, $dbh); if( isNew($url, $signature ,$dbh)){ add_History($url, $signature ,$dbh); return 1; } return 0; } sub importFromUrl($){ my ($url) = @_; print "importing $url"; } sub checkUrl($){ my($url) = @_; my $dbh = init; if( registerIfNew($url, $dbh) ){ importFromUrl($url); } $dbh->disconnect(); } sub get_content($$){ ; } sub main($){ my($file) = @_; print "Using file $file\n"; #checkUrl( $url ); open(FH, '<', $file) or die $!; while(){ print "Checking $_"; checkUrl( $_ ); } close(FH); } my $file; GetOptions ('file=s' => \$file); main( $file );