12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- #!/usr/bin/perl
- use strict;
- use Digest::SHA qw(sha256_hex);
- use LWP::UserAgent;
- use DBI qw(:sql_types);
- use Getopt::Long;
- use pQuery;
-
- sub init {
- my $driver = "SQLite";
- my $database = "history.db";
- my $dsn = "DBI:$driver:dbname=$database";
- my $dbh = DBI->connect($dsn, { RaiseError => 1 })
- or die $DBI::errstr;
- print "Opened database successfully\n";
- return $dbh;
- }
-
- sub add_History($$$){
- my($url, $signature, $dbh) = @_;
- my $stmt = qq(INSERT INTO visit (url,signature)
- VALUES ("$url", "$signature"));
- my $rv = $dbh->do($stmt) or die $DBI::errstr;
- }
-
- sub get_html($) {
- my $ua = new LWP::UserAgent;
- $ua->timeout(120);
- my $url = @_ ;
- my $request = new HTTP::Request('GET', $url);
- my $response = $ua->request($request);
- my $content = $response->content();
- return $content;
- }
-
- sub visit($$){
- my ($url, $dbh)= @_;
- my $content = get_html "$url" ;
- return sha256_hex( $content );
- }
-
- sub isNew($$$){
- my($url, $signature, $dbh) = @_;
- my $sth = $dbh->prepare("SELECT COUNT(signature) FROM visit WHERE signature=?1 AND url=?2");
- $sth->execute($signature, $url);
- my $refs = $sth->fetchall_arrayref()->[0][0];
- return ($refs)? 0 : 1 ;
- }
-
- sub registerIfNew($$){
- my($url, $dbh) = @_;
- my $signature = visit($url, $dbh);
-
- if( isNew($url, $signature ,$dbh)){
- add_History($url, $signature ,$dbh);
- return 1;
- }
- return 0;
- }
-
- sub importFromUrl($){
- my ($url) = @_;
- print "importing $url";
- }
-
- sub checkUrl($){
- my($url) = @_;
- my $dbh = init;
- if( registerIfNew($url, $dbh) ){
- importFromUrl($url);
- }
- $dbh->disconnect();
- }
-
- sub get_content($$){
- ;
- }
-
- sub main($){
- my($file) = @_;
- print "Using file $file\n";
- #checkUrl( $url );
- open(FH, '<', $file) or die $!;
- while(<FH>){
- print "Checking $_";
- checkUrl( $_ );
- }
- close(FH);
- }
-
- my $file;
- GetOptions ('file=s' => \$file);
-
- main( $file );
|