|
@@ -0,0 +1,94 @@
|
|
1
|
+#!/usr/bin/perl
|
|
2
|
+use strict;
|
|
3
|
+use Digest::SHA qw(sha256_hex);
|
|
4
|
+use LWP::UserAgent;
|
|
5
|
+use DBI qw(:sql_types);
|
|
6
|
+use Getopt::Long;
|
|
7
|
+use pQuery;
|
|
8
|
+
|
|
9
|
+sub init {
|
|
10
|
+ my $driver = "SQLite";
|
|
11
|
+ my $database = "history.db";
|
|
12
|
+ my $dsn = "DBI:$driver:dbname=$database";
|
|
13
|
+ my $dbh = DBI->connect($dsn, { RaiseError => 1 })
|
|
14
|
+ or die $DBI::errstr;
|
|
15
|
+ print "Opened database successfully\n";
|
|
16
|
+ return $dbh;
|
|
17
|
+}
|
|
18
|
+
|
|
19
|
+sub add_History($$$){
|
|
20
|
+ my($url, $signature, $dbh) = @_;
|
|
21
|
+ my $stmt = qq(INSERT INTO visit (url,signature)
|
|
22
|
+ VALUES ("$url", "$signature"));
|
|
23
|
+ my $rv = $dbh->do($stmt) or die $DBI::errstr;
|
|
24
|
+}
|
|
25
|
+
|
|
26
|
+sub get_html($) {
|
|
27
|
+ my $ua = new LWP::UserAgent;
|
|
28
|
+ $ua->timeout(120);
|
|
29
|
+ my $url = @_ ;
|
|
30
|
+ my $request = new HTTP::Request('GET', $url);
|
|
31
|
+ my $response = $ua->request($request);
|
|
32
|
+ my $content = $response->content();
|
|
33
|
+ return $content;
|
|
34
|
+}
|
|
35
|
+
|
|
36
|
+sub visit($$){
|
|
37
|
+ my ($url, $dbh)= @_;
|
|
38
|
+ my $content = get_html "$url" ;
|
|
39
|
+ return sha256_hex( $content );
|
|
40
|
+}
|
|
41
|
+
|
|
42
|
+sub isNew($$$){
|
|
43
|
+ my($url, $signature, $dbh) = @_;
|
|
44
|
+ my $sth = $dbh->prepare("SELECT COUNT(signature) FROM visit WHERE signature=?1 AND url=?2");
|
|
45
|
+ $sth->execute($signature, $url);
|
|
46
|
+ my $refs = $sth->fetchall_arrayref()->[0][0];
|
|
47
|
+ return ($refs)? 0 : 1 ;
|
|
48
|
+}
|
|
49
|
+
|
|
50
|
+sub registerIfNew($$){
|
|
51
|
+ my($url, $dbh) = @_;
|
|
52
|
+ my $signature = visit($url, $dbh);
|
|
53
|
+
|
|
54
|
+ if( isNew($url, $signature ,$dbh)){
|
|
55
|
+ add_History($url, $signature ,$dbh);
|
|
56
|
+ return 1;
|
|
57
|
+ }
|
|
58
|
+ return 0;
|
|
59
|
+}
|
|
60
|
+
|
|
61
|
+sub importFromUrl($){
|
|
62
|
+ my ($url) = @_;
|
|
63
|
+ print "importing $url";
|
|
64
|
+}
|
|
65
|
+
|
|
66
|
+sub checkUrl($){
|
|
67
|
+ my($url) = @_;
|
|
68
|
+ my $dbh = init;
|
|
69
|
+ if( registerIfNew($url, $dbh) ){
|
|
70
|
+ importFromUrl($url);
|
|
71
|
+ }
|
|
72
|
+ $dbh->disconnect();
|
|
73
|
+}
|
|
74
|
+
|
|
75
|
+sub get_content($$){
|
|
76
|
+ ;
|
|
77
|
+}
|
|
78
|
+
|
|
79
|
+sub main($){
|
|
80
|
+ my($file) = @_;
|
|
81
|
+ print "Using file $file\n";
|
|
82
|
+ #checkUrl( $url );
|
|
83
|
+ open(FH, '<', $file) or die $!;
|
|
84
|
+ while(<FH>){
|
|
85
|
+ print "Checking $_";
|
|
86
|
+ checkUrl( $_ );
|
|
87
|
+ }
|
|
88
|
+ close(FH);
|
|
89
|
+}
|
|
90
|
+
|
|
91
|
+my $file;
|
|
92
|
+GetOptions ('file=s' => \$file);
|
|
93
|
+
|
|
94
|
+main( $file );
|