X-Git-Url: https://git.jankratochvil.net/?p=kewensis.git;a=blobdiff_plain;f=kewensis-collect.pl;h=61b6d884c708cb376a6844f5feca63ae2bea241b;hp=009be1e2e6d2ab646f9d3fbca933c214bd5d56bb;hb=5576eb19ac5c37ed1629abcb68db6b5b528a259b;hpb=cc9a120fa1095fe32c186606799e005456fccb54
diff --git a/kewensis-collect.pl b/kewensis-collect.pl
index 009be1e..61b6d88 100755
--- a/kewensis-collect.pl
+++ b/kewensis-collect.pl
@@ -7,10 +7,10 @@ use Cwd;
use Data::Dumper;
use DBI;
-my(%DB,%OWNS,$debugmatch,$D,$key,$owner,$ref,$filename,$doimport,$import_xlate);
+my(%DB,$debugmatch,$D,$key,$owner,$ref,$filename,$doimport,$import_xlate);
%DB=();
-$D=0;
+$D=1;
$debugmatch=0;
$doimport=1;
$import_xlate=1;
@@ -64,7 +64,7 @@ my($file)=@_;
Orchidaceae ($word) ($word)
-((?: ($word) ($word)|$bigword(?:
\n$word)*)?($bigword)?
($word)?
(?:
+(?: ($word) ($word)|($bigword(?:
\n$word)*))?(($bigword)?
($word)?
(?:
remarks: .*)?(
Type
)?()?(?:
Linked Records
((?:$attrpat)*))?(
@@ -83,11 +83,17 @@ remarks: .*)?(
Type
)?(
)?(?:
my(%rec);
$rec{"name"}=$1;
$rec{"Publ. Author"}=$2;
- $rec{"Publication"}="$5 $6" if defined($5) && defined($6);
- $rec{"html"}=$3 if defined $3;
- my($attrsbody)=$11;
+ my $publ="";
+ $publ.="$4 $5" if defined($4) && defined($5);
+ $publ.=$6 if defined $6;
+ $rec{"html"}=$7 if defined $7;
+ my($attrsbody)=$12;
+ # catch-array destroyed here!
+ $publ.=s/\
//g;
+ $rec{"Publication"}=$publ if $publ ne "";
($rec{"id"}=$file)=~s#^($id)\.html$#$1#os or failed($file);
$rec{"html"}=~s#$ipniservletwordthree#$3#osg if $import_xlate && exists $rec{"html"};
+ $rec{"refs"}=[];
my $score=0; # -: upper, +: lower
while (defined($attrsbody) && $attrsbody=~s%^(?:$attrpat)%%os) {
# nomenclatural synonym: id=$2
@@ -98,15 +104,15 @@ remarks: .*)?(
Type
)?()?(?:
# later publication: id=$16
# Is a later publication: id=$18
my(@refs)=($2,$5,$8,$11,$13,$16,$18);
- $rec{"refs"}=[];
while (@refs) {
push(@{$rec{"refs"}},$_) if defined ($_=shift @refs);
}
- $score+=-10 if defined $5; # basionym: id=$5
- $score+=+10 if defined $13; # Is a basionym: id=$13
- $score+=+10 if defined $16; # later publication: id=$16
- $score+=-10 if defined $18; # Is a later publication: id=$18
+ $score+=+10 if defined $5; # basionym: id=$5
+ $score+=-10 if defined $13; # Is a basionym: id=$13
+ $score+=- 4 if defined $16; # later publication: id=$16
+ $score+=+ 4 if defined $18; # Is a later publication: id=$18
}
+ $score+=+9 if $rec{"Publ. Author"}=~/^\s*\(/;
$rec{"score"}=$score;
if ($attrsbody) {
failed($file);
@@ -141,18 +147,15 @@ my( $entry )=@_;
process_file($entry) if -f $entry;
}
-%OWNS=();
-
foreach (@ARGV)
{ process_entry($_); }
-my($id);
-for $id (keys %DB) {
+for my $id (keys %DB) {
my($refid);
my(@refs);
for $refid (@{$DB{$id}{"refs"}}) {
if (!exists $DB{$refid}) {
- warn "Undefined ref id \"$refid\" from id \"$id\"" if $D;
+ print "Undefined ref id \"$refid\" from id \"$id\"\n" if $D>=1;
next;
}
next if $id eq $refid; # self-ref
@@ -162,28 +165,43 @@ for $id (keys %DB) {
$DB{$id}{"refs"}=\@refs;
}
-print Data::Dumper->Dump([\%DB],["%DB"]) if $D;
+print Data::Dumper->Dump([\%DB],["%DB"]) if $D>=2;
+
+my %OWNS=map { $_=>$DB{$_}{"refs"}; } keys(%DB);
+my %NAMES=();
-%OWNS=map { $_=>[] } keys(%DB);
+for my $id (keys %DB) {
+ $NAMES{$DB{$id}{"name"}}=[] if !exists $NAMES{$DB{$id}{"name"}};
+ push @{$NAMES{$DB{$id}{"name"}}},$id;
+ }
-for $id (keys %OWNS) {
+for my $id (keys %DB) {
next if !exists $OWNS{$id};
- for my $refid (@{$DB{$id}{"refs"}}) {
+ my @queue=($id,@{$OWNS{$id}});
+ $OWNS{$id}=[];
+ while (my $refid=shift @queue) {
+ if (exists $NAMES{$DB{$refid}{"name"}}) {
+ push(@queue,@{$NAMES{$DB{$refid}{"name"}}});
+ delete $NAMES{$DB{$refid}{"name"}};
+ }
+ next if $refid eq $id;
next if !exists $OWNS{$refid};
- push(@{$OWNS{$id}},$refid,@{$OWNS{$refid}});
+ push @queue,@{$OWNS{$refid}};
delete $OWNS{$refid};
- print "connected: id=$id,refid=$refid\n" if $D;
+ print "processed connect id=$id <- refid=$refid\n" if $D>=1;
+ push @{$OWNS{$id}},$refid;
}
}
-print Data::Dumper->Dump([\%OWNS],["%OWNS"]) if $D;
+print Data::Dumper->Dump([\%OWNS],["%OWNS"]) if $D>=2;
foreach $key (keys %OWNS) {
my(@keys)=@{$OWNS{$key}};
delete($OWNS{$key});
unshift(@keys,$key);
- @keys=sort { $DB{$a}{"score"} <=> $DB{$b}{"score"}
- || extract_year($DB{$b}{"Publication"}) <=> extract_year($DB{$a}{"Publication"}); } @keys;
+# @keys=sort { $DB{$a}{"score"} <=> $DB{$b}{"score"}
+# || extract_year($DB{$b}{"Publication"}) <=> extract_year($DB{$a}{"Publication"}); } @keys;
+ @keys=sort { $DB{$a}{"name"} cmp $DB{$b}{"name"}; } @keys;
my($pkey)=shift(@keys);
$OWNS{$pkey}=\@keys;
}
@@ -194,7 +212,8 @@ my($tb_tree);
$db_driver="mysql";
$db_host="";
$db_user="short";
-$DB_PWD=$ENV{"HOME"}."/priv/mysql.${db_user}.pwd";
+#$DB_PWD=$ENV{"HOME"}."/priv/mysql.${db_user}.pwd";
+$db_pwd="short";
$db_name="short";
$tb_tree="kewensis_tree";
@@ -216,7 +235,6 @@ eval { &db_do("drop table $tb_tree") };
."name varchar(100) not null,"
."PublAuthor text null,"
."Publication text null,"
- ."Notes text null,"
."html text null"
.")");
@@ -224,7 +242,7 @@ eval { &db_do("drop table $tb_tree") };
&db_do("alter table $tb_tree add index (name)");
&db_do("alter table $tb_tree add unique (family_id,family_order)");
-my $insert_tb_tree=$db->prepare("insert into $tb_tree (id,family_id,family_order,name,PublAuthor,Publication,Notes,html) values (?,?,?,?,?,?,?)")
+my $insert_tb_tree=$db->prepare("insert into $tb_tree (id,family_id,family_order,name,PublAuthor,Publication,html) values (?,?,?,?,?,?,?)")
or die "Prepare fail: $!";
foreach $owner (sort { $DB{$b}{"name"} cmp $DB{$a}{"name"}; } keys %OWNS) {
@@ -233,9 +251,9 @@ foreach $owner (sort { $DB{$b}{"name"} cmp $DB{$a}{"name"}; } keys %OWNS) {
my $family_id=$DB{$owner}{"id"};
for my $family_order (0..$#family) {
my $id=$family[$family_order];
- print "insert:$id,".$DB{$id}{"name"}."\n" if $D;
+ print "insert:$id,".$DB{$id}{"name"}."\n" if $D>=2;
$insert_tb_tree->execute($id,$family_id,$family_order,
- $DB{$id}{"name"},$DB{$id}{"Publ. Author"},$DB{$id}{"Publication"},$DB{$id}{"Notes"},$DB{$id}{"html"}
+ $DB{$id}{"name"},$DB{$id}{"Publ. Author"},$DB{$id}{"Publication"},$DB{$id}{"html"}
) or die "SQL insert failure: $!";
}
}