#! /usr/bin/perl use strict; use warnings; use Cwd; use Data::Dumper; use DBI; my(%DB,$debugmatch,$D,$key,$owner,$ref,$filename,$doimport,$import_xlate); %DB=(); $D=1; $debugmatch=0; $doimport=1; $import_xlate=1; sub extract_year { ($_)=@_; if (defined($_)) { s/\(([0-9]{4}) publ. [0-9]{4}\)/($1)/g; while (/\(([0-9]{4})(-[0-9]+)?\)/) { return $1 if ($1>=1700 && $1<=2010); $_=$'; } } return -1; } sub failed { my($file)=@_; print("-$file----------------$'--------------\n"); warn "Unable to match file \"".getcwd()."/$file\"."; } sub process_file { my($file)=@_; if (!open(FI,$file)) { warn("Unable to open file \"$file\" in dir \"".getcwd()."\": $!"); return; } undef $/; my($fi)=; close(FI); my($word)='[^<]*'; my($bigword)='[^<]*(?:)?[^<]*(?:)?[^<]*(?:<1000 m)?[^<]*'; my($idone)='[-\d]'; my($id)="$idone+"; my($any)='[\x00-\xFF]*'; my($ipniservletword)="($word)"; my($idquoted)=$id; $idquoted=~s%\W%\\$&%g; my($ipniservletwordthree)=$ipniservletword; $ipniservletwordthree=~s%\($idquoted\)%((?:$idone){3})$&%os or die; my($attrpat)="

nomenclatural synonym(\\(Main Record\\))?:$ipniservletword

|

basionym(\\(\\d+\\))?:$ipniservletword

|

basionym:($word)|

replaced synonym:$ipniservletword

|

replaced synonym:($word)|

Is a replaced synonym of:$ipniservletword

|

Is a basionym of:$ipniservletword

|

later publication of(\\(\\d+\\))?:$ipniservletword

|

Is a later publication of of:$ipniservletword

"; $|=1; ""=~/^/; if ($fi!~m%^ IPNI Query Results
Orchidaceae ($word) ($word)
(?: ($word) ($word)|($bigword(?:
\n$word)*))?(

($bigword)?

($word)?

(?: remarks: .*)?(

Type

)?()?(?:

Linked Records

((?:$attrpat)*))?(

Original Data

(?:basionym: ($word)
)?(?:hybrid parentage: ($word)
)?(?:replaced synonym: ($word)
)?(?:distribution: ($word)
)?(?:Notes: ($word))?)?)(?:

Back to Search Page

)$%os) { failed($file); return; } my(%rec); $rec{"name"}=$1; $rec{"Publ. Author"}=$2; my $publ=""; $publ.="$4 $5" if defined($4) && defined($5); $publ.=$6 if defined $6; $rec{"html"}=$7 if defined $7; my($attrsbody)=$12; # catch-array destroyed here! $publ.=s/\//g; $rec{"Publication"}=$publ if $publ ne ""; ($rec{"id"}=$file)=~s#^($id)\.html$#$1#os or failed($file); $rec{"html"}=~s#$ipniservletwordthree#$3#osg if $import_xlate && exists $rec{"html"}; $rec{"refs"}=[]; my $score=0; # -: upper, +: lower while (defined($attrsbody) && $attrsbody=~s%^(?:$attrpat)%%os) { # nomenclatural synonym: id=$2 # basionym: id=$5 # replaced synonym: id=$8 # Is a replaced synonym: id=$11 # Is a basionym: id=$13 # later publication: id=$16 # Is a later publication: id=$18 my(@refs)=($2,$5,$8,$11,$13,$16,$18); while (@refs) { push(@{$rec{"refs"}},$_) if defined ($_=shift @refs); } $score+=+10 if defined $5; # basionym: id=$5 $score+=-10 if defined $13; # Is a basionym: id=$13 $score+=- 4 if defined $16; # later publication: id=$16 $score+=+ 4 if defined $18; # Is a later publication: id=$18 } $score+=+9 if $rec{"Publ. Author"}=~/^\s*\(/; $rec{"score"}=$score; if ($attrsbody) { failed($file); return; } $DB{$rec{"id"}}=\%rec; } sub process_dir { my( $dir )=@_; my( $old )=getcwd(); if (!chdir($dir)) { warn("Unable to change to dir \"$dir\" from dir \"$old\": $!"); return; } opendir(DIR,".") or die("Cannot open . in \"".getcwd()."\": $!"); foreach (sort readdir(DIR)) { process_entry($_) if ($_!~/^\./); } closedir(DIR); chdir($old) or warn("Unable to retreat to dir \"$old\": $!"); } sub process_entry { my( $entry )=@_; process_dir ($entry) if -d $entry; process_file($entry) if -f $entry; } foreach (@ARGV) { process_entry($_); } for my $id (keys %DB) { my($refid); my(@refs); for $refid (@{$DB{$id}{"refs"}}) { if (!exists $DB{$refid}) { print "Undefined ref id \"$refid\" from id \"$id\"\n" if $D>=1; next; } next if $id eq $refid; # self-ref # push(@refs,$DB{$refid}); push(@refs,$refid); } $DB{$id}{"refs"}=\@refs; } print Data::Dumper->Dump([\%DB],["%DB"]) if $D>=2; my %OWNS=map { $_=>$DB{$_}{"refs"}; } keys(%DB); my %NAMES=(); for my $id (keys %DB) { $NAMES{$DB{$id}{"name"}}=[] if !exists $NAMES{$DB{$id}{"name"}}; push @{$NAMES{$DB{$id}{"name"}}},$id; } for my $id (keys %DB) { next if !exists $OWNS{$id}; my @queue=($id,@{$OWNS{$id}}); $OWNS{$id}=[]; while (my $refid=shift @queue) { if (exists $NAMES{$DB{$refid}{"name"}}) { push(@queue,@{$NAMES{$DB{$refid}{"name"}}}); delete $NAMES{$DB{$refid}{"name"}}; } next if $refid eq $id; next if !exists $OWNS{$refid}; push @queue,@{$OWNS{$refid}}; delete $OWNS{$refid}; print "processed connect id=$id <- refid=$refid\n" if $D>=1; push @{$OWNS{$id}},$refid; } } print Data::Dumper->Dump([\%OWNS],["%OWNS"]) if $D>=2; foreach $key (keys %OWNS) { my(@keys)=@{$OWNS{$key}}; delete($OWNS{$key}); unshift(@keys,$key); # @keys=sort { $DB{$a}{"score"} <=> $DB{$b}{"score"} # || extract_year($DB{$b}{"Publication"}) <=> extract_year($DB{$a}{"Publication"}); } @keys; @keys=sort { $DB{$a}{"name"} cmp $DB{$b}{"name"}; } @keys; my($pkey)=shift(@keys); $OWNS{$pkey}=\@keys; } my($db_driver,$db_host,$db_user,$db_pwd,$DB_PWD,$db_name,$db); my($tb_tree); $db_driver="mysql"; $db_host=""; $db_user="short"; #$DB_PWD=$ENV{"HOME"}."/priv/mysql.${db_user}.pwd"; $db_pwd="short"; $db_name="short"; $tb_tree="kewensis_tree"; $db=DBI->connect("DBI:$db_driver:database=$db_name;host=$db_host",$db_user,$db_pwd) or die "Database open fail: $!"; sub db_do { my( $cmd )=@_; $db->do($cmd) or die("SQL command \"$cmd\" failed: $!"); } eval { &db_do("drop table $tb_tree") }; &db_do("create table $tb_tree (" ."id char(10) not null," ."family_id char(10) not null," ."family_order int not null," ."name varchar(100) not null," ."PublAuthor text null," ."Publication text null," ."html text null" .")"); &db_do("alter table $tb_tree add unique (id)"); &db_do("alter table $tb_tree add index (name)"); &db_do("alter table $tb_tree add unique (family_id,family_order)"); my $insert_tb_tree=$db->prepare("insert into $tb_tree (id,family_id,family_order,name,PublAuthor,Publication,html) values (?,?,?,?,?,?,?)") or die "Prepare fail: $!"; foreach $owner (sort { $DB{$b}{"name"} cmp $DB{$a}{"name"}; } keys %OWNS) { my @family=@{$OWNS{$owner}}; unshift(@family,$owner); my $family_id=$DB{$owner}{"id"}; for my $family_order (0..$#family) { my $id=$family[$family_order]; print "insert:$id,".$DB{$id}{"name"}."\n" if $D>=2; $insert_tb_tree->execute($id,$family_id,$family_order, $DB{$id}{"name"},$DB{$id}{"Publ. Author"},$DB{$id}{"Publication"},$DB{$id}{"html"} ) or die "SQL insert failure: $!"; } }