Batterie de script servant à synchroniser du contenu web
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

syncWeb.pl 1.9KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. #!/usr/bin/perl
  2. use strict;
  3. use Digest::SHA qw(sha256_hex);
  4. use LWP::UserAgent;
  5. use DBI qw(:sql_types);
  6. use Getopt::Long;
  7. use pQuery;
  8. sub init {
  9. my $driver = "SQLite";
  10. my $database = "history.db";
  11. my $dsn = "DBI:$driver:dbname=$database";
  12. my $dbh = DBI->connect($dsn, { RaiseError => 1 })
  13. or die $DBI::errstr;
  14. print "Opened database successfully\n";
  15. return $dbh;
  16. }
  17. sub add_History($$$){
  18. my($url, $signature, $dbh) = @_;
  19. my $stmt = qq(INSERT INTO visit (url,signature)
  20. VALUES ("$url", "$signature"));
  21. my $rv = $dbh->do($stmt) or die $DBI::errstr;
  22. }
  23. sub get_html($) {
  24. my $ua = new LWP::UserAgent;
  25. $ua->timeout(120);
  26. my $url = @_ ;
  27. my $request = new HTTP::Request('GET', $url);
  28. my $response = $ua->request($request);
  29. my $content = $response->content();
  30. return $content;
  31. }
  32. sub visit($$){
  33. my ($url, $dbh)= @_;
  34. my $content = get_html "$url" ;
  35. return sha256_hex( $content );
  36. }
  37. sub isNew($$$){
  38. my($url, $signature, $dbh) = @_;
  39. my $sth = $dbh->prepare("SELECT COUNT(signature) FROM visit WHERE signature=?1 AND url=?2");
  40. $sth->execute($signature, $url);
  41. my $refs = $sth->fetchall_arrayref()->[0][0];
  42. return ($refs)? 0 : 1 ;
  43. }
  44. sub registerIfNew($$){
  45. my($url, $dbh) = @_;
  46. my $signature = visit($url, $dbh);
  47. if( isNew($url, $signature ,$dbh)){
  48. add_History($url, $signature ,$dbh);
  49. return 1;
  50. }
  51. return 0;
  52. }
  53. sub importFromUrl($){
  54. my ($url) = @_;
  55. print "importing $url";
  56. }
  57. sub checkUrl($){
  58. my($url) = @_;
  59. my $dbh = init;
  60. if( registerIfNew($url, $dbh) ){
  61. importFromUrl($url);
  62. }
  63. $dbh->disconnect();
  64. }
  65. sub get_content($$){
  66. ;
  67. }
  68. sub main($){
  69. my($file) = @_;
  70. print "Using file $file\n";
  71. #checkUrl( $url );
  72. open(FH, '<', $file) or die $!;
  73. while(<FH>){
  74. print "Checking $_";
  75. checkUrl( $_ );
  76. }
  77. close(FH);
  78. }
  79. my $file;
  80. GetOptions ('file=s' => \$file);
  81. main( $file );