#! /usr/bin/perl use strict; use warnings; use Cwd; use Data::Dumper; my(%DB,%OWNS,$debugmatch,$D,$key,$owner,$ref,$maxsize,$filename,$doimport,$import_xlate); # $maxsize=0x40000; %DB=(); $D=0; $debugmatch=0; $doimport=1; $import_xlate=1; sub rec_to_name { my( %rec )=@_; return $rec{"name"}; } sub htmlquote { my($class); ($_,$class)=@_; s/&/&/g; s//>/g; s/\n/&nl;/g; s/"/"/g; return "$_" if defined $class; return $_; } sub format_record { my( $preinsert,$postinsert,%rec )=@_; my($r)=$preinsert.htmlquote(rec_to_name(%rec),"name").$postinsert; $r.="\n".htmlquote($rec{"Publ. Author"},"author") if (exists($rec{"Publ. Author"})); $r.="
\n".htmlquote($rec{"Publication"},"publication") if (exists($rec{"Publication"})); $r.="
\n".htmlquote($rec{"Notes"},"notes") if (exists($rec{"Notes"})); if ($doimport && exists($rec{"html"})) { my($import)="import: [".$rec{"id"}."]"; $r.="
\n
\n".$rec{"html"}."\n
\n"; } return $r; } sub extract_year { ($_)=@_; if (defined($_)) { s/\(([0-9]{4}) publ. [0-9]{4}\)/($1)/g; while (/\(([0-9]{4})(-[0-9]+)?\)/) { return $1 if ($1>=1700 && $1<=2010); $_=$'; } } return -1; } sub failed { my($file)=@_; print("-$file----------------$'--------------\n"); warn "Unable to match file \"".getcwd()."/$file\"."; } sub process_file { my($file)=@_; if (!open(FI,$file)) { warn("Unable to open file \"$file\" in dir \"".getcwd()."\": $!"); return; } undef $/; my($fi)=; close(FI); my($word)='[^<]*'; my($bigword)='[^<]*(?:)?[^<]*(?:)?[^<]*(?:<1000 m)?[^<]*'; my($idone)='[-\d]'; my($id)="$idone+"; my($any)='[\x00-\xFF]*'; my($ipniservletword)="($word)"; my($idquoted)=$id; $idquoted=~s%\W%\\$&%g; my($ipniservletwordthree)=$ipniservletword; $ipniservletwordthree=~s%\($idquoted\)%((?:$idone){3})$&%os or die; my($attrpat)="

nomenclatural synonym(\\(Main Record\\))?:$ipniservletword

|

basionym(\\(\\d+\\))?:$ipniservletword

|

basionym:($word)|

replaced synonym:$ipniservletword

|

replaced synonym:($word)|

Is a replaced synonym of:$ipniservletword

|

Is a basionym of:$ipniservletword

|

later publication of(\\(\\d+\\))?:$ipniservletword

|

Is a later publication of of:$ipniservletword

"; $|=1; ""=~/^/; if ($fi!~m%^ IPNI Query Results
Orchidaceae ($word) ($word)
((?: ($word) ($word)|$bigword(?:
\n$word)*)?

($bigword)?

($word)?

(?: remarks: .*)?(

Type

)?()?(?:

Linked Records

((?:$attrpat)*))?(

Original Data

(?:basionym: ($word)
)?(?:hybrid parentage: ($word)
)?(?:replaced synonym: ($word)
)?(?:distribution: ($word)
)?(?:Notes: ($word))?)?)(?:

Back to Search Page

)$%os) { failed($file); return; } my(%rec); $rec{"name"}=$1; $rec{"Publ. Author"}=$2; $rec{"Publication"}="$5 $6" if defined($5) && defined($6); $rec{"html"}=$3 if defined $3; my($attrsbody)=$11; ($rec{"id"}=$file)=~s#^($id)\.html$#$1#os or failed($file); $rec{"html"}=~s#$ipniservletwordthree#$3#osg if $import_xlate && exists $rec{"html"}; while (defined($attrsbody) && $attrsbody=~s%^(?:$attrpat)%%os) { # nomenclatural synonym: id=$2 # basionym: id=$5 # replaced synonym: id=$8 # Is a replaced synonym: id=$11 # Is a basionym: id=$13 # later publication: id=$16 # Is a later publication: id=$18 my(@refs)=($2,$5,$8,$11,$13,$16,$18); $rec{"refs"}=[]; while (@refs) { push(@{$rec{"refs"}},$_) if defined ($_=shift @refs); } } if ($attrsbody) { failed($file); return; } $DB{$rec{"id"}}=\%rec; } sub process_dir { my( $dir )=@_; my( $old )=getcwd(); if (!chdir($dir)) { warn("Unable to change to dir \"$dir\" from dir \"$old\": $!"); return; } opendir(DIR,".") or die("Cannot open . in \"".getcwd()."\": $!"); foreach (sort readdir(DIR)) { process_entry($_) if ($_!~/^\./); } closedir(DIR); chdir($old) or warn("Unable to retreat to dir \"$old\": $!"); } sub process_entry { my( $entry )=@_; process_dir ($entry) if -d $entry; process_file($entry) if -f $entry; } %OWNS=(); foreach (@ARGV) { process_entry($_); } my($id); for $id (keys %DB) { my($refid); my(@refs); for $refid (@{$DB{$id}{"refs"}}) { if (!exists $DB{$refid}) { warn "Undefined ref id \"$refid\" from id \"$id\"" if $D; next; } next if $id eq $refid; # self-ref # push(@refs,$DB{$refid}); push(@refs,$refid); } $DB{$id}{"refs"}=\@refs; } print Data::Dumper->Dump([\%DB],["%DB"]) if $D; %OWNS=map { $_=>[] } keys(%DB); for $id (keys %OWNS) { my($refid); for $refid (@{$DB{$id}{"refs"}}) { next if !exists $OWNS{$refid}; push(@{$OWNS{$id}},$refid,@{$OWNS{$refid}}); delete $OWNS{$refid}; } } print Data::Dumper->Dump([\%OWNS],["%OWNS"]) if $D; #foreach $key (keys %DB) { # $DB{$key}{"Publication"}=extract_year($DB{$key}{"Publication"})." ::: ".$DB{$key}{"Publication"} # if (exists($DB{$key}{"Publication"})); # } foreach $key (keys %OWNS) { my(@keys)=@{$OWNS{$key}}; delete($OWNS{$key}); unshift(@keys,$key); @keys=sort { extract_year($DB{$b}{"Publication"}) <=> extract_year($DB{$a}{"Publication"}); } @keys; my($pkey)=shift(@keys); $OWNS{$pkey}=\@keys; } sub print_header { my($header)=@_; print OUT "Kewensis $header \n"; } sub OUT_flush { print OUT "\n"; close(OUT); } if (defined $maxsize) { my($fileno)=-1; my($filename,$fileid); foreach $owner (sort { $DB{$b}{"name"} cmp $DB{$a}{"name"}; } keys %OWNS) { my( $child ); if ($fileno<0 || tell(OUT)>=$maxsize) { OUT_flush() if ($fileno>=0); $fileid=sprintf("%04d",++$fileno); $filename="kew-$fileid.html"; open(OUT,">$filename") or die "Cannot open \"$filename\": $!"; print_header("chunk $fileid"); } if ($debugmatch) { print STDERR "($owner):"; foreach $child (@{$OWNS{$owner}}) { print STDERR " ($child)"; } print STDERR "\n"; } print OUT format_record("

","

\n",%{$DB{$owner}}); if (@{$OWNS{$owner}}) { print OUT "
\n"; foreach $child (@{$OWNS{$owner}}) { print OUT format_record("

","

\n",%{$DB{$child}}); } print OUT "
\n"; } print OUT "\n"; $DB{$owner}{"_filename"}=$filename; } OUT_flush(); open(OUT,">kew-index.html") or die "Cannot open \"kew-index.html\": $!"; print_header("Index"); } else { open(OUT,">kewensis.html") or die "Cannot open \"kewensis.html\": $!"; print_header("Full"); } sub format_href { my($preinsert,$postinsert,%rec)=@_; return htmlquote(rec_to_name(%rec)); } my($printrecref)=(defined $maxsize ? \&format_href : \&format_record ); foreach $owner (sort { $DB{$b}{"name"} cmp $DB{$a}{"name"}; } keys %OWNS) { my( $child ); print OUT "

"; print OUT "" if defined $maxsize; print OUT &{$printrecref}("","",%{$DB{$owner}}); print OUT "" if defined $maxsize; print OUT "

"; if (@{$OWNS{$owner}}) { print OUT "
    \n"; foreach $child (@{$OWNS{$owner}}) { print OUT "
  • "; print OUT &{$printrecref}("","",%{$DB{$child}}); print OUT "
  • \n"; } print OUT "
"; } print OUT "\n"; } OUT_flush();