X-Git-Url: https://git.jankratochvil.net/?p=kewensis.git;a=blobdiff_plain;f=kewensis-collect.pl;h=c6f399bfa009f6789d1be3ca2e022ea90e9e8f54;hp=f71d434fd081e9eb486011795d39bdc8f2674913;hb=cf0dae5bac9c0070c767b3b798a83f8e9f4acd04;hpb=525245c5f9c7f716b224a04a62fd9b4a3b501bb8
diff --git a/kewensis-collect.pl b/kewensis-collect.pl
index f71d434..c6f399b 100755
--- a/kewensis-collect.pl
+++ b/kewensis-collect.pl
@@ -1,7 +1,11 @@
-#! /usr/bin/perl -w
+#! /usr/bin/perl
-use vars qw/%DB %OWNS %PARENT %CURR $last_field $debugmatch $debugparse $key $owner $parent $ref/;
+use strict;
+use warnings;
+use vars qw/%DB %OWNS %PARENT %CURR $last_field $debugmatch $debugparse $key $owner $parent $ref $note_rest $printdupl $maxsize $fileno $fileid $filename/;
+
+$maxsize=0x40000;
%DB=();
%CURR=();
$debugparse=0;
@@ -20,7 +24,7 @@ my( $r )=@_;
sub rec_to_name
{
my( %rec )=@_;
-my( @list )=("Rank","Infrafam.","Genus","Species");
+my( @list )=("Genus","Species");
my( $item,$r );
$r="";
@@ -30,21 +34,64 @@ my( $item,$r );
return $r;
}
+sub htmlquote
+{
+($_)=@_;
+
+ s/&/&/g;
+ s/</g;
+ s/>/>/g;
+ s/\n/&nl;/g;
+ s/"/"/g;
+ return $_;
+}
+
+sub format_record
+{
+my( $preinsert,$postinsert,%rec )=@_;
+
+ my($r)="
".$preinsert.&htmlquote(&rec_to_name(%rec)).$postinsert."";
+ $r.="\n".htmlquote($rec{"Publ. Author"}) if (exists($rec{"Publ. Author"}));
+ $r.="
\n".htmlquote($rec{"Publication"}) if (exists($rec{"Publication"}));
+ $r.="
\n".htmlquote($rec{"Notes"}) if (exists($rec{"Notes"}));
+ $r.="
\n";
+ return($r);
+}
+
+sub extract_year
+{
+ ($_)=@_;
+ if (defined($_)) {
+ s/\(([0-9]{4}) publ. [0-9]{4}\)/($1)/g;
+ while (/\(([0-9]{4})(-[0-9]+)?\)/) {
+ return $1 if ($1>=1700 && $1<=2010);
+ $_=$';
+ }
+ }
+ return(-1);
+}
+
sub flush_CURR
{
my( $key );
$key=&name_to_key(&rec_to_name(%CURR));
- return if (!defined $key);
+ return if (!$key);
+ delete $CURR{"Family"};
if (exists $DB{$key})
{ print STDERR "Key \"$key\" is duplicated!\n" if $printdupl; }
else {
print STDERR "ADDKEY: $key\n" if $debugparse;
%{$DB{$key}}=%CURR;
+# my($year)=&extract_year($CURR{"Notes"});
+# %{$DB{$key}}{"year"}=$year if (defined($year));
}
%CURR=();
+ @{$OWNS{$key}}=();
}
+%OWNS=();
+
while (<>) {
tr/\r\n//d;
if (/^ *\304\301\304/) {
@@ -67,7 +114,6 @@ while (<>) {
&flush_CURR();
undef %CURR;
-%OWNS=();
%PARENT=();
sub try_reparent
@@ -79,12 +125,16 @@ my( $reparent,$refkey );
return 0 if ($parent eq $refkey);
return 0 if !exists $DB{$refkey};
print STDERR "try_reparent: SUCCESS: key=\"$key\", refkey=\"$refkey\"\n" if $debugmatch;
+ if ($note_rest)
+ { $DB{$key}{"Notes"}=$note_rest; }
+ else
+ { delete $DB{$key}{"Notes"}; }
foreach $reparent (@{$OWNS{$parent}}) {
$PARENT{$reparent}=$refkey;
}
- @{$OWNS{$parent}}=() if (!defined $OWNS{$parent});
- @{$OWNS{$refkey}}=() if (!defined $OWNS{$refkey});
+ @{$OWNS{$parent}}=() if (!exists $OWNS{$parent});
+ @{$OWNS{$refkey}}=() if (!exists $OWNS{$refkey});
@{$OWNS{$refkey}}=(@{$OWNS{$refkey}},$parent,@{$OWNS{$parent}});
delete $OWNS{$parent};
@@ -100,7 +150,8 @@ MATCH: foreach $key (keys %DB) {
$ref=~s/^=//;
$ref=~s/^O\. *//i;
$ref=~s/^Orchidaceae *//i;
- $ref=~s/\..*$//;
+ $ref=~s/\. *(.*)$//;
+ $note_rest=$1;
next MATCH if &try_reparent();
$ref=$DB{$key}{"Rank"}." $ref" if exists $DB{$key}{"Rank"};
next MATCH if &try_reparent();
@@ -109,17 +160,99 @@ MATCH: foreach $key (keys %DB) {
}
undef %PARENT;
-foreach $owner (keys %OWNS) {
+#foreach $key (keys %DB) {
+# $DB{$key}{"Publication"}=&extract_year($DB{$key}{"Publication"})." ::: ".$DB{$key}{"Publication"}
+# if (exists($DB{$key}{"Publication"}));
+# }
+
+foreach $key (keys %OWNS) {
+ my(@keys)=@{$OWNS{$key}};
+ delete($OWNS{$key});
+ unshift(@keys,$key);
+ @keys=sort { &extract_year($DB{$b}{"Publication"}) <=> &extract_year($DB{$a}{"Publication"}); } @keys;
+ my($pkey)=shift(@keys);
+ @{$OWNS{$pkey}}=@keys;
+ }
+
+sub print_header
+{
+my($header)=@_;
+
+ print OUT
+"
+
+
+Kewensis $header
+
+
+\n";
+}
+
+$fileno=-1;
+
+sub OUT_flush
+{
+ print OUT "\n";
+ close(OUT);
+}
+
+my( $filename );
+foreach $owner (sort keys %OWNS) {
my( $child );
+ if ($fileno<0 || tell(OUT)>=$maxsize) {
+ &OUT_flush() if ($fileno>=0);
+ $fileid=sprintf("%04d",++$fileno);
+ $filename="kew-$fileid.html";
+ open(OUT,">$filename") or die "Cannot open \"$filename\": $!";
+ &print_header("chunk $fileid");
+ }
+
if ($debugmatch) {
print STDERR "($owner):";
foreach $child (@{$OWNS{$owner}})
{ print STDERR " ($child)"; }
print STDERR "\n";
}
- print "(".&rec_to_name(%{$DB{$owner}}).")";
- foreach $child (@{$OWNS{$owner}})
- { print " (".&rec_to_name(%{$DB{$child}}).")"; }
- print "\n";
+ print OUT &format_record("","",%{$DB{$owner}});
+ if (@{$OWNS{$owner}}) {
+ print OUT "\n";
+ foreach $child (@{$OWNS{$owner}})
+ { print OUT &format_record("","",%{$DB{$child}}); }
+ print OUT "
\n";
+ }
+ print OUT "\n";
+ $DB{$owner}{"_filename"}=$filename;
+ }
+&OUT_flush();
+open(OUT,">kew-index.html") or die "Cannot open \"kew-index.html\": $!";
+&print_header("Index");
+
+sub print_href
+{
+my($owner)=@_;
+my(%rec)=%{$DB{$owner}};
+
+ print OUT &htmlquote(&rec_to_name(%rec));
+}
+
+foreach $owner (sort keys %OWNS) {
+my( $child );
+
+ print OUT "";
+ &print_href($owner);
+ print OUT "
";
+ if (@{$OWNS{$owner}}) {
+ print OUT "\n";
+ foreach $child (@{$OWNS{$owner}}) {
+ print OUT "- ";
+ &print_href($child);
+ print OUT "
\n";
+ }
+ print OUT "
";
+ }
+ print OUT "\n";
}
+OUT_flush();