X-Git-Url: https://git.jankratochvil.net/?p=kewensis.git;a=blobdiff_plain;f=kewensis-collect.pl;h=c6f399bfa009f6789d1be3ca2e022ea90e9e8f54;hp=f71d434fd081e9eb486011795d39bdc8f2674913;hb=cf0dae5bac9c0070c767b3b798a83f8e9f4acd04;hpb=525245c5f9c7f716b224a04a62fd9b4a3b501bb8 diff --git a/kewensis-collect.pl b/kewensis-collect.pl index f71d434..c6f399b 100755 --- a/kewensis-collect.pl +++ b/kewensis-collect.pl @@ -1,7 +1,11 @@ -#! /usr/bin/perl -w +#! /usr/bin/perl -use vars qw/%DB %OWNS %PARENT %CURR $last_field $debugmatch $debugparse $key $owner $parent $ref/; +use strict; +use warnings; +use vars qw/%DB %OWNS %PARENT %CURR $last_field $debugmatch $debugparse $key $owner $parent $ref $note_rest $printdupl $maxsize $fileno $fileid $filename/; + +$maxsize=0x40000; %DB=(); %CURR=(); $debugparse=0; @@ -20,7 +24,7 @@ my( $r )=@_; sub rec_to_name { my( %rec )=@_; -my( @list )=("Rank","Infrafam.","Genus","Species"); +my( @list )=("Genus","Species"); my( $item,$r ); $r=""; @@ -30,21 +34,64 @@ my( $item,$r ); return $r; } +sub htmlquote +{ +($_)=@_; + + s/&/&/g; + s//>/g; + s/\n/&nl;/g; + s/"/"/g; + return $_; +} + +sub format_record +{ +my( $preinsert,$postinsert,%rec )=@_; + + my($r)="

".$preinsert.&htmlquote(&rec_to_name(%rec)).$postinsert.""; + $r.="\n".htmlquote($rec{"Publ. Author"}) if (exists($rec{"Publ. Author"})); + $r.="
\n".htmlquote($rec{"Publication"}) if (exists($rec{"Publication"})); + $r.="
\n".htmlquote($rec{"Notes"}) if (exists($rec{"Notes"})); + $r.="

\n"; + return($r); +} + +sub extract_year +{ + ($_)=@_; + if (defined($_)) { + s/\(([0-9]{4}) publ. [0-9]{4}\)/($1)/g; + while (/\(([0-9]{4})(-[0-9]+)?\)/) { + return $1 if ($1>=1700 && $1<=2010); + $_=$'; + } + } + return(-1); +} + sub flush_CURR { my( $key ); $key=&name_to_key(&rec_to_name(%CURR)); - return if (!defined $key); + return if (!$key); + delete $CURR{"Family"}; if (exists $DB{$key}) { print STDERR "Key \"$key\" is duplicated!\n" if $printdupl; } else { print STDERR "ADDKEY: $key\n" if $debugparse; %{$DB{$key}}=%CURR; +# my($year)=&extract_year($CURR{"Notes"}); +# %{$DB{$key}}{"year"}=$year if (defined($year)); } %CURR=(); + @{$OWNS{$key}}=(); } +%OWNS=(); + while (<>) { tr/\r\n//d; if (/^ *\304\301\304/) { @@ -67,7 +114,6 @@ while (<>) { &flush_CURR(); undef %CURR; -%OWNS=(); %PARENT=(); sub try_reparent @@ -79,12 +125,16 @@ my( $reparent,$refkey ); return 0 if ($parent eq $refkey); return 0 if !exists $DB{$refkey}; print STDERR "try_reparent: SUCCESS: key=\"$key\", refkey=\"$refkey\"\n" if $debugmatch; + if ($note_rest) + { $DB{$key}{"Notes"}=$note_rest; } + else + { delete $DB{$key}{"Notes"}; } foreach $reparent (@{$OWNS{$parent}}) { $PARENT{$reparent}=$refkey; } - @{$OWNS{$parent}}=() if (!defined $OWNS{$parent}); - @{$OWNS{$refkey}}=() if (!defined $OWNS{$refkey}); + @{$OWNS{$parent}}=() if (!exists $OWNS{$parent}); + @{$OWNS{$refkey}}=() if (!exists $OWNS{$refkey}); @{$OWNS{$refkey}}=(@{$OWNS{$refkey}},$parent,@{$OWNS{$parent}}); delete $OWNS{$parent}; @@ -100,7 +150,8 @@ MATCH: foreach $key (keys %DB) { $ref=~s/^=//; $ref=~s/^O\. *//i; $ref=~s/^Orchidaceae *//i; - $ref=~s/\..*$//; + $ref=~s/\. *(.*)$//; + $note_rest=$1; next MATCH if &try_reparent(); $ref=$DB{$key}{"Rank"}." $ref" if exists $DB{$key}{"Rank"}; next MATCH if &try_reparent(); @@ -109,17 +160,99 @@ MATCH: foreach $key (keys %DB) { } undef %PARENT; -foreach $owner (keys %OWNS) { +#foreach $key (keys %DB) { +# $DB{$key}{"Publication"}=&extract_year($DB{$key}{"Publication"})." ::: ".$DB{$key}{"Publication"} +# if (exists($DB{$key}{"Publication"})); +# } + +foreach $key (keys %OWNS) { + my(@keys)=@{$OWNS{$key}}; + delete($OWNS{$key}); + unshift(@keys,$key); + @keys=sort { &extract_year($DB{$b}{"Publication"}) <=> &extract_year($DB{$a}{"Publication"}); } @keys; + my($pkey)=shift(@keys); + @{$OWNS{$pkey}}=@keys; + } + +sub print_header +{ +my($header)=@_; + + print OUT +" + + +Kewensis $header + + +\n"; +} + +$fileno=-1; + +sub OUT_flush +{ + print OUT "\n"; + close(OUT); +} + +my( $filename ); +foreach $owner (sort keys %OWNS) { my( $child ); + if ($fileno<0 || tell(OUT)>=$maxsize) { + &OUT_flush() if ($fileno>=0); + $fileid=sprintf("%04d",++$fileno); + $filename="kew-$fileid.html"; + open(OUT,">$filename") or die "Cannot open \"$filename\": $!"; + &print_header("chunk $fileid"); + } + if ($debugmatch) { print STDERR "($owner):"; foreach $child (@{$OWNS{$owner}}) { print STDERR " ($child)"; } print STDERR "\n"; } - print "(".&rec_to_name(%{$DB{$owner}}).")"; - foreach $child (@{$OWNS{$owner}}) - { print " (".&rec_to_name(%{$DB{$child}}).")"; } - print "\n"; + print OUT &format_record("","",%{$DB{$owner}}); + if (@{$OWNS{$owner}}) { + print OUT "
\n"; + foreach $child (@{$OWNS{$owner}}) + { print OUT &format_record("","",%{$DB{$child}}); } + print OUT "
\n"; + } + print OUT "\n"; + $DB{$owner}{"_filename"}=$filename; + } +&OUT_flush(); +open(OUT,">kew-index.html") or die "Cannot open \"kew-index.html\": $!"; +&print_header("Index"); + +sub print_href +{ +my($owner)=@_; +my(%rec)=%{$DB{$owner}}; + + print OUT &htmlquote(&rec_to_name(%rec)); +} + +foreach $owner (sort keys %OWNS) { +my( $child ); + + print OUT "

"; + &print_href($owner); + print OUT "

"; + if (@{$OWNS{$owner}}) { + print OUT ""; + } + print OUT "\n"; } +OUT_flush();