X-Git-Url: https://git.jankratochvil.net/?p=kewensis.git;a=blobdiff_plain;f=kewensis-collect.pl;h=8fd1d541cdf6815b65c07b5a8b23847254926b80;hp=ba61527b73003c6b25e06f8ca56d9d055e732403;hb=e332ad4bfdbfe8d2764271bdb52e15cb0d8c5c18;hpb=95acaf4907648517bd0f10047d107464e137ca70 diff --git a/kewensis-collect.pl b/kewensis-collect.pl index ba61527..8fd1d54 100755 --- a/kewensis-collect.pl +++ b/kewensis-collect.pl @@ -5,52 +5,16 @@ use warnings; use Cwd; use Data::Dumper; +use DBI; -my(%DB,%OWNS,$debugmatch,$D,$key,$owner,$ref,$maxsize,$filename,$doimport,$import_xlate); +my(%DB,%OWNS,$debugmatch,$D,$key,$owner,$ref,$filename,$doimport,$import_xlate); -# $maxsize=0x40000; %DB=(); $D=0; $debugmatch=0; $doimport=1; $import_xlate=1; -sub rec_to_name -{ -my( %rec )=@_; - - return $rec{"name"}; -} - -sub htmlquote -{ -my($class); -($_,$class)=@_; - - s/&/&/g; - s//>/g; - s/\n/&nl;/g; - s/"/"/g; - return "$_" if defined $class; - return $_; -} - -sub format_record -{ -my( $preinsert,$postinsert,%rec )=@_; - - my($r)=$preinsert.htmlquote(rec_to_name(%rec),"name").$postinsert; - $r.="\n".htmlquote($rec{"Publ. Author"},"author") if (exists($rec{"Publ. Author"})); - $r.="
\n".htmlquote($rec{"Publication"},"publication") if (exists($rec{"Publication"})); - $r.="
\n".htmlquote($rec{"Notes"},"notes") if (exists($rec{"Notes"})); - if ($doimport && exists($rec{"html"})) { - my($import)="import: [".$rec{"id"}."]"; - $r.="
\n
\n".$rec{"html"}."\n
\n"; - } - return $r; -} - sub extract_year { ($_)=@_; @@ -124,6 +88,7 @@ remarks: .*)?(

Type

)?()?(?: my($attrsbody)=$11; ($rec{"id"}=$file)=~s#^($id)\.html$#$1#os or failed($file); $rec{"html"}=~s#$ipniservletwordthree#$3#osg if $import_xlate && exists $rec{"html"}; + my $score=0; # -: upper, +: lower while (defined($attrsbody) && $attrsbody=~s%^(?:$attrpat)%%os) { # nomenclatural synonym: id=$2 # basionym: id=$5 @@ -137,7 +102,12 @@ remarks: .*)?(

Type

)?(
)?(?: while (@refs) { push(@{$rec{"refs"}},$_) if defined ($_=shift @refs); } + $score+=-10 if defined $5; # basionym: id=$5 + $score+=+10 if defined $13; # Is a basionym: id=$13 + $score+=+10 if defined $16; # later publication: id=$16 + $score+=-10 if defined $18; # Is a later publication: id=$18 } + $rec{"score"}=$score; if ($attrsbody) { failed($file); return; @@ -197,122 +167,74 @@ print Data::Dumper->Dump([\%DB],["%DB"]) if $D; %OWNS=map { $_=>[] } keys(%DB); for $id (keys %OWNS) { - my($refid); - for $refid (@{$DB{$id}{"refs"}}) { + next if !exists $OWNS{$id}; + for my $refid (@{$DB{$id}{"refs"}}) { next if !exists $OWNS{$refid}; push(@{$OWNS{$id}},$refid,@{$OWNS{$refid}}); delete $OWNS{$refid}; + print "connected: id=$id,refid=$refid\n" if $D; } } print Data::Dumper->Dump([\%OWNS],["%OWNS"]) if $D; -#foreach $key (keys %DB) { -# $DB{$key}{"Publication"}=extract_year($DB{$key}{"Publication"})." ::: ".$DB{$key}{"Publication"} -# if (exists($DB{$key}{"Publication"})); -# } - foreach $key (keys %OWNS) { my(@keys)=@{$OWNS{$key}}; delete($OWNS{$key}); unshift(@keys,$key); - @keys=sort { extract_year($DB{$b}{"Publication"}) <=> extract_year($DB{$a}{"Publication"}); } @keys; + @keys=sort { $DB{$a}{"score"} <=> $DB{$b}{"score"} + || extract_year($DB{$b}{"Publication"}) <=> extract_year($DB{$a}{"Publication"}); } @keys; my($pkey)=shift(@keys); $OWNS{$pkey}=\@keys; } -sub print_header -{ -my($header)=@_; - - print OUT -" - - -Kewensis $header - - - - -\n"; -} +my($db_driver,$db_host,$db_user,$db_pwd,$DB_PWD,$db_name,$db); +my($tb_tree); -sub OUT_flush -{ - print OUT "\n"; - close(OUT); -} +$db_driver="mysql"; +$db_host=""; +#$db_user="short"; +#$DB_PWD=$ENV{"HOME"}."/priv/mysql.${db_user}.pwd"; +$db_name="short"; +$tb_tree="kewensis_tree"; -if (defined $maxsize) { - my($fileno)=-1; - my($filename,$fileid); - foreach $owner (sort { $DB{$b}{"name"} cmp $DB{$a}{"name"}; } keys %OWNS) { - my( $child ); - - if ($fileno<0 || tell(OUT)>=$maxsize) { - OUT_flush() if ($fileno>=0); - $fileid=sprintf("%04d",++$fileno); - $filename="kew-$fileid.html"; - open(OUT,">$filename") or die "Cannot open \"$filename\": $!"; - print_header("chunk $fileid"); - } - - if ($debugmatch) { - print STDERR "($owner):"; - foreach $child (@{$OWNS{$owner}}) - { print STDERR " ($child)"; } - print STDERR "\n"; - } - print OUT format_record("

","

\n",%{$DB{$owner}}); - if (@{$OWNS{$owner}}) { - print OUT "
\n"; - foreach $child (@{$OWNS{$owner}}) - { print OUT format_record("

","

\n",%{$DB{$child}}); } - print OUT "
\n"; - } - print OUT "\n"; - $DB{$owner}{"_filename"}=$filename; - } - OUT_flush(); - open(OUT,">kew-index.html") or die "Cannot open \"kew-index.html\": $!"; - print_header("Index"); - } -else { - open(OUT,">kewensis.html") or die "Cannot open \"kewensis.html\": $!"; - print_header("Full"); - } +$db=DBI->connect("DBI:$db_driver:database=$db_name;host=$db_host",$db_user,$db_pwd) or die "Database open fail: $!"; -sub format_href +sub db_do { -my($preinsert,$postinsert,%rec)=@_; +my( $cmd )=@_; - return htmlquote(rec_to_name(%rec)); + $db->do($cmd) or die("SQL command \"$cmd\" failed: $!"); } -my($printrecref)=(defined $maxsize ? \&format_href : \&format_record ); +eval { &db_do("drop table $tb_tree") }; + +&db_do("create table $tb_tree (" + ."id char(10) not null," + ."family_id char(10) not null," + ."family_order int not null," + ."name varchar(100) not null," + ."Publication text null," + ."Notes text null," + ."html text null" + .")"); + +&db_do("alter table $tb_tree add unique (id)"); +&db_do("alter table $tb_tree add index (name)"); +&db_do("alter table $tb_tree add unique (family_id,family_order)"); + +my $insert_tb_tree=$db->prepare("insert into $tb_tree (id,family_id,family_order,name,Publication,Notes,html) values (?,?,?,?,?,?,?)") + or die "Prepare fail: $!"; foreach $owner (sort { $DB{$b}{"name"} cmp $DB{$a}{"name"}; } keys %OWNS) { -my( $child ); - - print OUT "

"; - print OUT "" if defined $maxsize; - print OUT &{$printrecref}("","",%{$DB{$owner}}); - print OUT "" if defined $maxsize; - print OUT "

"; - if (@{$OWNS{$owner}}) { - print OUT ""; + my @family=@{$OWNS{$owner}}; + unshift(@family,$owner); + my $family_id=$DB{$owner}{"id"}; + for my $family_order (0..$#family) { + my $id=$family[$family_order]; + print "insert:$id,".$DB{$id}{"name"}."\n" if $D; + $insert_tb_tree->execute($id,$family_id,$family_order, + $DB{$id}{"name"},$DB{$id}{"Publication"},$DB{$id}{"Notes"},$DB{$id}{"html"} + ) or die "SQL insert failure: $!"; } - print OUT "\n"; } -OUT_flush();