X-Git-Url: https://git.jankratochvil.net/?p=kewensis.git;a=blobdiff_plain;f=kewensis-collect.pl;h=61b6d884c708cb376a6844f5feca63ae2bea241b;hp=009be1e2e6d2ab646f9d3fbca933c214bd5d56bb;hb=670bf3198dd46a70d44d922162078a0784244811;hpb=cc9a120fa1095fe32c186606799e005456fccb54 diff --git a/kewensis-collect.pl b/kewensis-collect.pl index 009be1e..61b6d88 100755 --- a/kewensis-collect.pl +++ b/kewensis-collect.pl @@ -7,10 +7,10 @@ use Cwd; use Data::Dumper; use DBI; -my(%DB,%OWNS,$debugmatch,$D,$key,$owner,$ref,$filename,$doimport,$import_xlate); +my(%DB,$debugmatch,$D,$key,$owner,$ref,$filename,$doimport,$import_xlate); %DB=(); -$D=0; +$D=1; $debugmatch=0; $doimport=1; $import_xlate=1; @@ -64,7 +64,7 @@ my($file)=@_;
Orchidaceae ($word) ($word)
-((?: ($word) ($word)|$bigword(?:
\n$word)*)?

($bigword)?

($word)?

(?: +(?: ($word) ($word)|($bigword(?:
\n$word)*))?(

($bigword)?

($word)?

(?: remarks: .*)?(

Type

)?()?(?:

Linked Records

((?:$attrpat)*))?(
@@ -83,11 +83,17 @@ remarks: .*)?(

Type

)?(
)?(?: my(%rec); $rec{"name"}=$1; $rec{"Publ. Author"}=$2; - $rec{"Publication"}="$5 $6" if defined($5) && defined($6); - $rec{"html"}=$3 if defined $3; - my($attrsbody)=$11; + my $publ=""; + $publ.="$4 $5" if defined($4) && defined($5); + $publ.=$6 if defined $6; + $rec{"html"}=$7 if defined $7; + my($attrsbody)=$12; + # catch-array destroyed here! + $publ.=s/\//g; + $rec{"Publication"}=$publ if $publ ne ""; ($rec{"id"}=$file)=~s#^($id)\.html$#$1#os or failed($file); $rec{"html"}=~s#$ipniservletwordthree#$3#osg if $import_xlate && exists $rec{"html"}; + $rec{"refs"}=[]; my $score=0; # -: upper, +: lower while (defined($attrsbody) && $attrsbody=~s%^(?:$attrpat)%%os) { # nomenclatural synonym: id=$2 @@ -98,15 +104,15 @@ remarks: .*)?(

Type

)?(
)?(?: # later publication: id=$16 # Is a later publication: id=$18 my(@refs)=($2,$5,$8,$11,$13,$16,$18); - $rec{"refs"}=[]; while (@refs) { push(@{$rec{"refs"}},$_) if defined ($_=shift @refs); } - $score+=-10 if defined $5; # basionym: id=$5 - $score+=+10 if defined $13; # Is a basionym: id=$13 - $score+=+10 if defined $16; # later publication: id=$16 - $score+=-10 if defined $18; # Is a later publication: id=$18 + $score+=+10 if defined $5; # basionym: id=$5 + $score+=-10 if defined $13; # Is a basionym: id=$13 + $score+=- 4 if defined $16; # later publication: id=$16 + $score+=+ 4 if defined $18; # Is a later publication: id=$18 } + $score+=+9 if $rec{"Publ. Author"}=~/^\s*\(/; $rec{"score"}=$score; if ($attrsbody) { failed($file); @@ -141,18 +147,15 @@ my( $entry )=@_; process_file($entry) if -f $entry; } -%OWNS=(); - foreach (@ARGV) { process_entry($_); } -my($id); -for $id (keys %DB) { +for my $id (keys %DB) { my($refid); my(@refs); for $refid (@{$DB{$id}{"refs"}}) { if (!exists $DB{$refid}) { - warn "Undefined ref id \"$refid\" from id \"$id\"" if $D; + print "Undefined ref id \"$refid\" from id \"$id\"\n" if $D>=1; next; } next if $id eq $refid; # self-ref @@ -162,28 +165,43 @@ for $id (keys %DB) { $DB{$id}{"refs"}=\@refs; } -print Data::Dumper->Dump([\%DB],["%DB"]) if $D; +print Data::Dumper->Dump([\%DB],["%DB"]) if $D>=2; + +my %OWNS=map { $_=>$DB{$_}{"refs"}; } keys(%DB); +my %NAMES=(); -%OWNS=map { $_=>[] } keys(%DB); +for my $id (keys %DB) { + $NAMES{$DB{$id}{"name"}}=[] if !exists $NAMES{$DB{$id}{"name"}}; + push @{$NAMES{$DB{$id}{"name"}}},$id; + } -for $id (keys %OWNS) { +for my $id (keys %DB) { next if !exists $OWNS{$id}; - for my $refid (@{$DB{$id}{"refs"}}) { + my @queue=($id,@{$OWNS{$id}}); + $OWNS{$id}=[]; + while (my $refid=shift @queue) { + if (exists $NAMES{$DB{$refid}{"name"}}) { + push(@queue,@{$NAMES{$DB{$refid}{"name"}}}); + delete $NAMES{$DB{$refid}{"name"}}; + } + next if $refid eq $id; next if !exists $OWNS{$refid}; - push(@{$OWNS{$id}},$refid,@{$OWNS{$refid}}); + push @queue,@{$OWNS{$refid}}; delete $OWNS{$refid}; - print "connected: id=$id,refid=$refid\n" if $D; + print "processed connect id=$id <- refid=$refid\n" if $D>=1; + push @{$OWNS{$id}},$refid; } } -print Data::Dumper->Dump([\%OWNS],["%OWNS"]) if $D; +print Data::Dumper->Dump([\%OWNS],["%OWNS"]) if $D>=2; foreach $key (keys %OWNS) { my(@keys)=@{$OWNS{$key}}; delete($OWNS{$key}); unshift(@keys,$key); - @keys=sort { $DB{$a}{"score"} <=> $DB{$b}{"score"} - || extract_year($DB{$b}{"Publication"}) <=> extract_year($DB{$a}{"Publication"}); } @keys; +# @keys=sort { $DB{$a}{"score"} <=> $DB{$b}{"score"} +# || extract_year($DB{$b}{"Publication"}) <=> extract_year($DB{$a}{"Publication"}); } @keys; + @keys=sort { $DB{$a}{"name"} cmp $DB{$b}{"name"}; } @keys; my($pkey)=shift(@keys); $OWNS{$pkey}=\@keys; } @@ -194,7 +212,8 @@ my($tb_tree); $db_driver="mysql"; $db_host=""; $db_user="short"; -$DB_PWD=$ENV{"HOME"}."/priv/mysql.${db_user}.pwd"; +#$DB_PWD=$ENV{"HOME"}."/priv/mysql.${db_user}.pwd"; +$db_pwd="short"; $db_name="short"; $tb_tree="kewensis_tree"; @@ -216,7 +235,6 @@ eval { &db_do("drop table $tb_tree") }; ."name varchar(100) not null," ."PublAuthor text null," ."Publication text null," - ."Notes text null," ."html text null" .")"); @@ -224,7 +242,7 @@ eval { &db_do("drop table $tb_tree") }; &db_do("alter table $tb_tree add index (name)"); &db_do("alter table $tb_tree add unique (family_id,family_order)"); -my $insert_tb_tree=$db->prepare("insert into $tb_tree (id,family_id,family_order,name,PublAuthor,Publication,Notes,html) values (?,?,?,?,?,?,?)") +my $insert_tb_tree=$db->prepare("insert into $tb_tree (id,family_id,family_order,name,PublAuthor,Publication,html) values (?,?,?,?,?,?,?)") or die "Prepare fail: $!"; foreach $owner (sort { $DB{$b}{"name"} cmp $DB{$a}{"name"}; } keys %OWNS) { @@ -233,9 +251,9 @@ foreach $owner (sort { $DB{$b}{"name"} cmp $DB{$a}{"name"}; } keys %OWNS) { my $family_id=$DB{$owner}{"id"}; for my $family_order (0..$#family) { my $id=$family[$family_order]; - print "insert:$id,".$DB{$id}{"name"}."\n" if $D; + print "insert:$id,".$DB{$id}{"name"}."\n" if $D>=2; $insert_tb_tree->execute($id,$family_id,$family_order, - $DB{$id}{"name"},$DB{$id}{"Publ. Author"},$DB{$id}{"Publication"},$DB{$id}{"Notes"},$DB{$id}{"html"} + $DB{$id}{"name"},$DB{$id}{"Publ. Author"},$DB{$id}{"Publication"},$DB{$id}{"html"} ) or die "SQL insert failure: $!"; } }