From: short <> Date: Sun, 30 Sep 2001 21:09:31 +0000 (+0000) Subject: init X-Git-Tag: orig X-Git-Url: https://git.jankratochvil.net/?p=kewensis.git;a=commitdiff_plain;h=525245c5f9c7f716b224a04a62fd9b4a3b501bb8 init --- 525245c5f9c7f716b224a04a62fd9b4a3b501bb8 diff --git a/kewensis-collect.pl b/kewensis-collect.pl new file mode 100755 index 0000000..f71d434 --- /dev/null +++ b/kewensis-collect.pl @@ -0,0 +1,125 @@ +#! /usr/bin/perl -w + +use vars qw/%DB %OWNS %PARENT %CURR $last_field $debugmatch $debugparse $key $owner $parent $ref/; + +%DB=(); +%CURR=(); +$debugparse=0; +$printdupl=0; +$debugmatch=0; + +sub name_to_key +{ +my( $r )=@_; + + $r=~tr/A-Z/a-z/; + $r=~tr/a-z0-9//cd; + return $r; +} + +sub rec_to_name +{ +my( %rec )=@_; +my( @list )=("Rank","Infrafam.","Genus","Species"); +my( $item,$r ); + + $r=""; + while ($item=shift @list) + { $r.=" ".$rec{$item} if exists $rec{$item}; } + $r=~s/^ //; + return $r; +} + +sub flush_CURR +{ +my( $key ); + + $key=&name_to_key(&rec_to_name(%CURR)); + return if (!defined $key); + if (exists $DB{$key}) + { print STDERR "Key \"$key\" is duplicated!\n" if $printdupl; } + else { + print STDERR "ADDKEY: $key\n" if $debugparse; + %{$DB{$key}}=%CURR; + } + %CURR=(); +} + +while (<>) { + tr/\r\n//d; + if (/^ *\304\301\304/) { + &flush_CURR(); + print STDERR "---\n" if $debugparse; + } + elsif (/^([^³]*[^ ]) *³ *(.*)$/) { + print STDERR "FIELD: $1, VALUE: $2\n" if $debugparse; + $last_field=$1; + $CURR{$1}=$2; + } + elsif (/^ *³ *(.*)$/) { + print STDERR "APPEND: $1\n" if $debugparse; + $CURR{$last_field}.=" ".$1; + } + else { + print STDERR "DISCARD: $_\n" if $debugparse; + } + } +&flush_CURR(); +undef %CURR; + +%OWNS=(); +%PARENT=(); + +sub try_reparent +{ +my( $reparent,$refkey ); + + $refkey=&name_to_key($ref); + return 0 if ($refkey eq ""); + return 0 if ($parent eq $refkey); + return 0 if !exists $DB{$refkey}; + print STDERR "try_reparent: SUCCESS: key=\"$key\", refkey=\"$refkey\"\n" if $debugmatch; + + foreach $reparent (@{$OWNS{$parent}}) { + $PARENT{$reparent}=$refkey; + } + @{$OWNS{$parent}}=() if (!defined $OWNS{$parent}); + @{$OWNS{$refkey}}=() if (!defined $OWNS{$refkey}); + @{$OWNS{$refkey}}=(@{$OWNS{$refkey}},$parent,@{$OWNS{$parent}}); + delete $OWNS{$parent}; + + return 1; +} + +MATCH: foreach $key (keys %DB) { + $parent=$PARENT{$key}; + $parent=$key if (!defined $parent); + $ref=$DB{$key}{"Notes"}; + next MATCH if (!defined $ref); + $ref=~tr/()//d; + $ref=~s/^=//; + $ref=~s/^O\. *//i; + $ref=~s/^Orchidaceae *//i; + $ref=~s/\..*$//; + next MATCH if &try_reparent(); + $ref=$DB{$key}{"Rank"}." $ref" if exists $DB{$key}{"Rank"}; + next MATCH if &try_reparent(); + $ref=$DB{$key}{"Genus"}." $ref" if exists $DB{$key}{"Genus"}; + next MATCH if &try_reparent(); + } +undef %PARENT; + +foreach $owner (keys %OWNS) { +my( $child ); + + if ($debugmatch) { + print STDERR "($owner):"; + foreach $child (@{$OWNS{$owner}}) + { print STDERR " ($child)"; } + print STDERR "\n"; + } + print "(".&rec_to_name(%{$DB{$owner}}).")"; + foreach $child (@{$OWNS{$owner}}) + { print " (".&rec_to_name(%{$DB{$child}}).")"; } + print "\n"; + }