#! /usr/bin/perl use strict; use warnings; use vars qw/%DB %OWNS %PARENT %CURR $last_field $debugmatch $debugparse $key $owner $parent $ref $note_rest $printdupl $maxsize $fileno $fileid $filename/; $maxsize=0x40000; %DB=(); %CURR=(); $debugparse=0; $printdupl=0; $debugmatch=0; sub name_to_key { my( $r )=@_; $r=~tr/A-Z/a-z/; $r=~tr/a-z0-9//cd; return $r; } sub rec_to_name { my( %rec )=@_; my( @list )=("Genus","Species"); my( $item,$r ); $r=""; while ($item=shift @list) { $r.=" ".$rec{$item} if exists $rec{$item}; } $r=~s/^ //; return $r; } sub htmlquote { ($_)=@_; s/&/&/g; s//>/g; s/\n/&nl;/g; s/"/"/g; return $_; } sub format_record { my( $preinsert,$postinsert,%rec )=@_; my($r)="

".$preinsert.&htmlquote(&rec_to_name(%rec)).$postinsert.""; $r.="\n".htmlquote($rec{"Publ. Author"}) if (exists($rec{"Publ. Author"})); $r.="
\n".htmlquote($rec{"Publication"}) if (exists($rec{"Publication"})); $r.="
\n".htmlquote($rec{"Notes"}) if (exists($rec{"Notes"})); $r.="

\n"; return($r); } sub extract_year { ($_)=@_; if (defined($_)) { s/\(([0-9]{4}) publ. [0-9]{4}\)/($1)/g; while (/\(([0-9]{4})(-[0-9]+)?\)/) { return $1 if ($1>=1700 && $1<=2010); $_=$'; } } return(-1); } sub flush_CURR { my( $key ); $key=&name_to_key(&rec_to_name(%CURR)); return if (!$key); delete $CURR{"Family"}; if (exists $DB{$key}) { print STDERR "Key \"$key\" is duplicated!\n" if $printdupl; } else { print STDERR "ADDKEY: $key\n" if $debugparse; %{$DB{$key}}=%CURR; # my($year)=&extract_year($CURR{"Notes"}); # %{$DB{$key}}{"year"}=$year if (defined($year)); } %CURR=(); @{$OWNS{$key}}=(); } %OWNS=(); while (<>) { tr/\r\n//d; if (/^ *\304\301\304/) { &flush_CURR(); print STDERR "---\n" if $debugparse; } elsif (/^([^]*[^ ]) * *(.*)$/) { print STDERR "FIELD: $1, VALUE: $2\n" if $debugparse; $last_field=$1; $CURR{$1}=$2; } elsif (/^ * *(.*)$/) { print STDERR "APPEND: $1\n" if $debugparse; $CURR{$last_field}.=" ".$1; } else { print STDERR "DISCARD: $_\n" if $debugparse; } } &flush_CURR(); undef %CURR; %PARENT=(); sub try_reparent { my( $reparent,$refkey ); $refkey=&name_to_key($ref); return 0 if ($refkey eq ""); return 0 if ($parent eq $refkey); return 0 if !exists $DB{$refkey}; print STDERR "try_reparent: SUCCESS: key=\"$key\", refkey=\"$refkey\"\n" if $debugmatch; if ($note_rest) { $DB{$key}{"Notes"}=$note_rest; } else { delete $DB{$key}{"Notes"}; } foreach $reparent (@{$OWNS{$parent}}) { $PARENT{$reparent}=$refkey; } @{$OWNS{$parent}}=() if (!exists $OWNS{$parent}); @{$OWNS{$refkey}}=() if (!exists $OWNS{$refkey}); @{$OWNS{$refkey}}=(@{$OWNS{$refkey}},$parent,@{$OWNS{$parent}}); delete $OWNS{$parent}; return 1; } MATCH: foreach $key (keys %DB) { $parent=$PARENT{$key}; $parent=$key if (!defined $parent); $ref=$DB{$key}{"Notes"}; next MATCH if (!defined $ref); $ref=~tr/()//d; $ref=~s/^=//; $ref=~s/^O\. *//i; $ref=~s/^Orchidaceae *//i; $ref=~s/\. *(.*)$//; $note_rest=$1; next MATCH if &try_reparent(); $ref=$DB{$key}{"Rank"}." $ref" if exists $DB{$key}{"Rank"}; next MATCH if &try_reparent(); $ref=$DB{$key}{"Genus"}." $ref" if exists $DB{$key}{"Genus"}; next MATCH if &try_reparent(); } undef %PARENT; #foreach $key (keys %DB) { # $DB{$key}{"Publication"}=&extract_year($DB{$key}{"Publication"})." ::: ".$DB{$key}{"Publication"} # if (exists($DB{$key}{"Publication"})); # } foreach $key (keys %OWNS) { my(@keys)=@{$OWNS{$key}}; delete($OWNS{$key}); unshift(@keys,$key); @keys=sort { &extract_year($DB{$b}{"Publication"}) <=> &extract_year($DB{$a}{"Publication"}); } @keys; my($pkey)=shift(@keys); @{$OWNS{$pkey}}=@keys; } sub print_header { my($header)=@_; print OUT " Kewensis $header \n"; } $fileno=-1; sub OUT_flush { print OUT "\n"; close(OUT); } my( $filename ); foreach $owner (sort keys %OWNS) { my( $child ); if ($fileno<0 || tell(OUT)>=$maxsize) { &OUT_flush() if ($fileno>=0); $fileid=sprintf("%04d",++$fileno); $filename="kew-$fileid.html"; open(OUT,">$filename") or die "Cannot open \"$filename\": $!"; &print_header("chunk $fileid"); } if ($debugmatch) { print STDERR "($owner):"; foreach $child (@{$OWNS{$owner}}) { print STDERR " ($child)"; } print STDERR "\n"; } print OUT &format_record("","",%{$DB{$owner}}); if (@{$OWNS{$owner}}) { print OUT "
\n"; foreach $child (@{$OWNS{$owner}}) { print OUT &format_record("","",%{$DB{$child}}); } print OUT "
\n"; } print OUT "\n"; $DB{$owner}{"_filename"}=$filename; } &OUT_flush(); open(OUT,">kew-index.html") or die "Cannot open \"kew-index.html\": $!"; &print_header("Index"); sub print_href { my($owner)=@_; my(%rec)=%{$DB{$owner}}; print OUT &htmlquote(&rec_to_name(%rec)); } foreach $owner (sort keys %OWNS) { my( $child ); print OUT "

"; &print_href($owner); print OUT "

"; if (@{$OWNS{$owner}}) { print OUT ""; } print OUT "\n"; } OUT_flush();