Batterie de script servant à synchroniser du contenu web
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

syncWeb.pl 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. #!/usr/bin/perl
  2. use strict;
  3. use Digest::SHA qw(sha256_hex);
  4. use HTTP::Tiny;
  5. use DBI qw(:sql_types);
  6. use Getopt::Long;
  7. use Mojo::DOM;
  8. use HTML::WikiConverter;
  9. use Switch;
  10. use String::Escape;
  11. use Data::Dumper::Perltidy;
  12. my $import_list;
  13. my $export;
  14. my $config;
  15. sub trim {
  16. (my $s = $_[0]) =~ s/^\s+|\s+$//g;
  17. return $s;
  18. }
  19. sub connect_db($$$$$){
  20. my ($driver, $database, $user, $password, $host) = @_;
  21. my $dbh = 0;
  22. my $dsn = "DBI:$driver:dbname=$database";
  23. if( $user && $password){
  24. $dbh = DBI->connect($dsn, $user, $password, { RaiseError => 1 })
  25. or die $DBI::errstr;
  26. }elsif( $user ){
  27. $dbh = DBI->connect($dsn, $user, { RaiseError => 1 })
  28. or die $DBI::errstr;
  29. }else{
  30. $dbh = DBI->connect($dsn, { RaiseError => 1 })
  31. or die $DBI::errstr;
  32. }
  33. return $dbh;
  34. }
  35. sub to_mariadb($$){
  36. my ($export, $content) = @_;
  37. my $dbh = connect_db(
  38. "mysql",
  39. "$config->{'export'}->{$export}->{'connexion'}->{'base'}",
  40. "$config->{'export'}->{$export}->{'connexion'}->{'user'}",
  41. "$config->{'export'}->{$export}->{'connexion'}->{'password'}",
  42. "$config->{'export'}->{$export}->{'connexion'}->{'host'}"
  43. );
  44. print Dumper $config->{'export'}->{$export}->{'column_default'};
  45. my $columns_default = "";
  46. my $columns_default_value = "";
  47. if( exists($config->{'export'}->{$export}->{'column_default'}) ){
  48. $columns_default = "";
  49. foreach my $column ( keys %{$config->{'export'}->{$export}->{'column_default'}} ){
  50. $columns_default .= ", $column";
  51. print "\n- deal with $column\n";
  52. $columns_default_value .= ", \"$config->{'export'}->{$export}->{'column_default'}->{$column}\"";
  53. }
  54. }
  55. my $sql = "INSERT INTO $config->{'export'}->{$export}->{'content'}->{'table'} (
  56. $config->{'export'}->{$export}->{'content'}->{'column'} $columns_default
  57. )
  58. VALUES ('$content' $columns_default_value);";
  59. print $sql;
  60. exit;
  61. my $sth = $dbh->prepare( $sql );
  62. $sth->execute();
  63. $dbh->disconnect();
  64. }
  65. sub to_file($$){
  66. my($path, $content) = @_;
  67. my $filename = "$path";
  68. open(my $fh, '>', $filename) or die "Could not open file '$filename' $!";
  69. print $fh "$content";
  70. close $fh;
  71. print "done\n";
  72. }
  73. sub init (){
  74. return connect_db("SQLite", "history.db", "", "", "");
  75. }
  76. sub add_History($$$){
  77. my($url, $signature, $dbh) = @_;
  78. my $stmt = qq(INSERT INTO visit (url,signature)
  79. VALUES ("$url", "$signature"));
  80. my $rv = $dbh->do($stmt) or die $DBI::errstr;
  81. }
  82. sub get_html($){
  83. my ($url) = @_;
  84. my $response = HTTP::Tiny->new->get($url);
  85. if ($response->{success}) {
  86. return $response->{content};
  87. }
  88. return 0;
  89. }
  90. sub get_content($$){
  91. my($url, $selector) = @_;
  92. my $content = get_html( $url );
  93. my $dom = Mojo::DOM->new( $content );
  94. return $dom->at( "$selector" );
  95. }
  96. sub visit($$){
  97. my ($url, $dbh)= @_;
  98. my $content = get_html "$url" ;
  99. return sha256_hex( $content );
  100. }
  101. sub isNew($$$){
  102. my($url, $signature, $dbh) = @_;
  103. my $sth = $dbh->prepare("SELECT count(signature) AS COUNT FROM visit WHERE signature='$signature' AND url='$url';");
  104. #$sth->execute("$signature", "$url");
  105. $sth->execute();
  106. my $refs = $sth->fetchrow_arrayref()->[0];
  107. return !($refs);
  108. }
  109. sub registerIfNew($$$){
  110. my($url, $selector,$dbh) = @_;
  111. my $signature = sha256_hex( get_content($url, $selector) );
  112. if( isNew($url, $signature ,$dbh)){
  113. add_History($url, $signature ,$dbh);
  114. return 1;
  115. }
  116. return 0;
  117. }
  118. sub convert_strategy($$$){
  119. my ($content, $fromFormat, $toFormat) = @_;
  120. switch("$fromFormat $toFormat") {
  121. case "html md" { return htmlToMd( $content ) }
  122. case "a" { print "string a" }
  123. else { return $content }
  124. }
  125. }
  126. sub importContent($$$$){
  127. my ($url, $selector, $fromFormat, $toFormat) = @_;
  128. my $content = get_content( $url, $selector );
  129. if( $content ){
  130. return convertStrategy( $content, $fromFormat, $toFormat );
  131. }
  132. return 0;
  133. }
  134. sub checkUrl($$){
  135. my($url, $selector) = @_;
  136. my $dbh = init;
  137. if( registerIfNew($url, $selector,$dbh) ){
  138. get_content( $url, $selector )
  139. }
  140. $dbh->disconnect();
  141. }
  142. sub htmlToMd($){
  143. my($html) = @_;
  144. my $wc = new HTML::WikiConverter( dialect => 'Markdown' );
  145. return $wc->html2wiki( html => $html );
  146. }
  147. sub export_strategy($$$){
  148. my ($export, $format, $content) = @_;
  149. print "export straty de $export $config->{'export'}->{$export}->{'type'}";
  150. $content = convert_strategy(
  151. $content,
  152. $format,
  153. "$config->{'export'}->{$export}->{'format'}"
  154. );
  155. $content = String::Escape::backslash( $content );
  156. if( $config->{'export'}->{$export}->{'type'} eq "mariadb" ){
  157. print "\nis $config->{'export'}->{$export}->{'type'} mariadb?\n";
  158. to_mariadb( $export, $content );
  159. }else{
  160. to_file( "$config->{'export'}->{$export}->{'path'}", $content );
  161. }
  162. }
  163. sub main($$){
  164. my $dbh = init;
  165. my($import_list, $export) = @_;
  166. $config=do("./$export");
  167. open(FH, '<', $import_list) or die $!;
  168. while(<FH>){
  169. my($url, $selector, $fromFormat, $toFormat, $export) = split("\t");
  170. $fromFormat = trim( $fromFormat );
  171. $toFormat = trim( $toFormat );
  172. $export = trim( $export );
  173. my $content = get_content( $url, $selector );
  174. export_strategy( $export, $toFormat, $content );
  175. if( registerIfNew( $url, $selector, $dbh ) ){
  176. export_strategy( $export, $toFormat, $content );
  177. }
  178. }
  179. close(FH);
  180. }
  181. GetOptions (
  182. 'import-list=s' => \$import_list,
  183. 'export=s' => \$export
  184. );
  185. main( $import_list, $export );