init orig
authorshort <>
Sun, 30 Sep 2001 21:09:31 +0000 (21:09 +0000)
committershort <>
Sun, 30 Sep 2001 21:09:31 +0000 (21:09 +0000)
kewensis-collect.pl [new file with mode: 0755]

diff --git a/kewensis-collect.pl b/kewensis-collect.pl
new file mode 100755 (executable)
index 0000000..f71d434
--- /dev/null
@@ -0,0 +1,125 @@
+#! /usr/bin/perl -w
+
+use vars qw/%DB %OWNS %PARENT %CURR $last_field $debugmatch $debugparse $key $owner $parent $ref/;
+
+%DB=();
+%CURR=();
+$debugparse=0;
+$printdupl=0;
+$debugmatch=0;
+
+sub name_to_key
+{
+my( $r )=@_;
+
+       $r=~tr/A-Z/a-z/;
+       $r=~tr/a-z0-9//cd;
+       return $r;
+}
+
+sub rec_to_name
+{
+my( %rec )=@_;
+my( @list )=("Rank","Infrafam.","Genus","Species");
+my( $item,$r );
+
+       $r="";
+       while ($item=shift @list)
+               { $r.=" ".$rec{$item} if exists $rec{$item}; }
+       $r=~s/^ //;
+       return $r;
+}
+
+sub flush_CURR
+{
+my( $key );
+
+       $key=&name_to_key(&rec_to_name(%CURR));
+       return if (!defined $key);
+       if (exists $DB{$key})
+               { print STDERR "Key \"$key\" is duplicated!\n" if $printdupl; }
+       else {
+               print STDERR "ADDKEY: $key\n" if $debugparse;
+               %{$DB{$key}}=%CURR;
+               }
+       %CURR=();
+}
+
+while (<>) {
+       tr/\r\n//d;
+       if (/^ *\304\301\304/) {
+               &flush_CURR();
+               print STDERR "---\n" if $debugparse;
+               }
+       elsif (/^([^³]*[^ ]) *³ *(.*)$/) {
+               print STDERR "FIELD: $1, VALUE: $2\n" if $debugparse;
+               $last_field=$1;
+               $CURR{$1}=$2;
+               }
+       elsif (/^ *³ *(.*)$/) {
+               print STDERR "APPEND: $1\n" if $debugparse;
+               $CURR{$last_field}.=" ".$1;
+               }
+       else {
+               print STDERR "DISCARD: $_\n" if $debugparse;
+               }
+       }
+&flush_CURR();
+undef %CURR;
+
+%OWNS=();
+%PARENT=();
+
+sub try_reparent
+{
+my( $reparent,$refkey );
+
+       $refkey=&name_to_key($ref);
+       return 0 if ($refkey eq "");
+       return 0 if ($parent eq $refkey);
+       return 0 if !exists $DB{$refkey};
+       print STDERR "try_reparent: SUCCESS: key=\"$key\", refkey=\"$refkey\"\n" if $debugmatch;
+
+       foreach $reparent (@{$OWNS{$parent}}) {
+               $PARENT{$reparent}=$refkey;
+               }
+       @{$OWNS{$parent}}=() if (!defined $OWNS{$parent});
+       @{$OWNS{$refkey}}=() if (!defined $OWNS{$refkey});
+       @{$OWNS{$refkey}}=(@{$OWNS{$refkey}},$parent,@{$OWNS{$parent}});
+       delete $OWNS{$parent};
+
+       return 1;
+}
+
+MATCH: foreach $key (keys %DB) {
+       $parent=$PARENT{$key};
+       $parent=$key if (!defined $parent); 
+       $ref=$DB{$key}{"Notes"};
+       next MATCH if (!defined $ref);
+       $ref=~tr/()//d;
+       $ref=~s/^=//;
+       $ref=~s/^O\. *//i;
+       $ref=~s/^Orchidaceae *//i;
+       $ref=~s/\..*$//;
+       next MATCH if &try_reparent();
+       $ref=$DB{$key}{"Rank"}." $ref" if exists $DB{$key}{"Rank"};
+       next MATCH if &try_reparent();
+       $ref=$DB{$key}{"Genus"}." $ref" if exists $DB{$key}{"Genus"};
+       next MATCH if &try_reparent();
+       }
+undef %PARENT;
+
+foreach $owner (keys %OWNS) {
+my( $child );
+
+       if ($debugmatch) {
+               print STDERR "($owner):";
+               foreach $child (@{$OWNS{$owner}})
+                       { print STDERR " ($child)"; }
+               print STDERR "\n";
+               }
+       print "(".&rec_to_name(%{$DB{$owner}}).")";
+       foreach $child (@{$OWNS{$owner}})
+               { print " (".&rec_to_name(%{$DB{$child}}).")"; }
+       print "\n";
+       }