#!/usr/bin/perl use strict; use Digest::SHA qw(sha256_hex); use HTTP::Tiny; use DBI qw(:sql_types); use Getopt::Long; use Mojo::DOM; use HTML::WikiConverter; use Switch; use String::Escape; use Data::Dumper::Perltidy; my $import_list; my $export; my $config; sub trim { (my $s = $_[0]) =~ s/^\s+|\s+$//g; return $s; } sub connect_db($$$$$){ my ($driver, $database, $user, $password, $host) = @_; my $dbh = 0; my $dsn = "DBI:$driver:dbname=$database"; if( $user && $password){ $dbh = DBI->connect($dsn, $user, $password, { RaiseError => 1 }) or die $DBI::errstr; }elsif( $user ){ $dbh = DBI->connect($dsn, $user, { RaiseError => 1 }) or die $DBI::errstr; }else{ $dbh = DBI->connect($dsn, { RaiseError => 1 }) or die $DBI::errstr; } return $dbh; } sub to_mariadb($$){ my ($export, $content) = @_; my $dbh = connect_db( "mysql", "$config->{'export'}->{$export}->{'connexion'}->{'base'}", "$config->{'export'}->{$export}->{'connexion'}->{'user'}", "$config->{'export'}->{$export}->{'connexion'}->{'password'}", "$config->{'export'}->{$export}->{'connexion'}->{'host'}" ); print Dumper $config->{'export'}->{$export}->{'column_default'}; my $columns_default = ""; my $columns_default_value = ""; if( exists($config->{'export'}->{$export}->{'column_default'}) ){ $columns_default = ""; foreach my $column ( keys %{$config->{'export'}->{$export}->{'column_default'}} ){ $columns_default .= ", $column"; print "\n- deal with $column\n"; $columns_default_value .= ", \"$config->{'export'}->{$export}->{'column_default'}->{$column}\""; } } my $sql = "INSERT INTO $config->{'export'}->{$export}->{'content'}->{'table'} ( $config->{'export'}->{$export}->{'content'}->{'column'} $columns_default ) VALUES ('$content' $columns_default_value);"; print $sql; exit; my $sth = $dbh->prepare( $sql ); $sth->execute(); $dbh->disconnect(); } sub to_file($$){ my($path, $content) = @_; my $filename = "$path"; open(my $fh, '>', $filename) or die "Could not open file '$filename' $!"; print $fh "$content"; close $fh; print "done\n"; } sub init (){ return connect_db("SQLite", "history.db", "", "", ""); } sub add_History($$$){ my($url, $signature, $dbh) = @_; my $stmt = qq(INSERT INTO visit (url,signature) VALUES ("$url", "$signature")); my $rv = $dbh->do($stmt) or die $DBI::errstr; } sub get_html($){ my ($url) = @_; my $response = HTTP::Tiny->new->get($url); if ($response->{success}) { return $response->{content}; } return 0; } sub get_content($$){ my($url, $selector) = @_; my $content = get_html( $url ); my $dom = Mojo::DOM->new( $content ); return $dom->at( "$selector" ); } sub visit($$){ my ($url, $dbh)= @_; my $content = get_html "$url" ; return sha256_hex( $content ); } sub isNew($$$){ my($url, $signature, $dbh) = @_; my $sth = $dbh->prepare("SELECT count(signature) AS COUNT FROM visit WHERE signature='$signature' AND url='$url';"); #$sth->execute("$signature", "$url"); $sth->execute(); my $refs = $sth->fetchrow_arrayref()->[0]; return !($refs); } sub registerIfNew($$$){ my($url, $selector,$dbh) = @_; my $signature = sha256_hex( get_content($url, $selector) ); if( isNew($url, $signature ,$dbh)){ add_History($url, $signature ,$dbh); return 1; } return 0; } sub convert_strategy($$$){ my ($content, $fromFormat, $toFormat) = @_; switch("$fromFormat $toFormat") { case "html md" { return htmlToMd( $content ) } case "a" { print "string a" } else { return $content } } } sub importContent($$$$){ my ($url, $selector, $fromFormat, $toFormat) = @_; my $content = get_content( $url, $selector ); if( $content ){ return convertStrategy( $content, $fromFormat, $toFormat ); } return 0; } sub checkUrl($$){ my($url, $selector) = @_; my $dbh = init; if( registerIfNew($url, $selector,$dbh) ){ get_content( $url, $selector ) } $dbh->disconnect(); } sub htmlToMd($){ my($html) = @_; my $wc = new HTML::WikiConverter( dialect => 'Markdown' ); return $wc->html2wiki( html => $html ); } sub export_strategy($$$){ my ($export, $format, $content) = @_; print "export straty de $export $config->{'export'}->{$export}->{'type'}"; $content = convert_strategy( $content, $format, "$config->{'export'}->{$export}->{'format'}" ); $content = String::Escape::backslash( $content ); if( $config->{'export'}->{$export}->{'type'} eq "mariadb" ){ print "\nis $config->{'export'}->{$export}->{'type'} mariadb?\n"; to_mariadb( $export, $content ); }else{ to_file( "$config->{'export'}->{$export}->{'path'}", $content ); } } sub main($$){ my $dbh = init; my($import_list, $export) = @_; $config=do("./$export"); open(FH, '<', $import_list) or die $!; while(){ my($url, $selector, $fromFormat, $toFormat, $export) = split("\t"); $fromFormat = trim( $fromFormat ); $toFormat = trim( $toFormat ); $export = trim( $export ); my $content = get_content( $url, $selector ); export_strategy( $export, $toFormat, $content ); if( registerIfNew( $url, $selector, $dbh ) ){ export_strategy( $export, $toFormat, $content ); } } close(FH); } GetOptions ( 'import-list=s' => \$import_list, 'export=s' => \$export ); main( $import_list, $export );