From e332ad4bfdbfe8d2764271bdb52e15cb0d8c5c18 Mon Sep 17 00:00:00 2001
From: short <>
Date: Mon, 24 Dec 2001 12:14:51 +0000
Subject: [PATCH 1/1] Implemented DBI database backend Fixed %OWNS matcher
duplications
---
kewensis-collect.pl | 182 +++++++++++++++-------------------------------------
1 file changed, 52 insertions(+), 130 deletions(-)
diff --git a/kewensis-collect.pl b/kewensis-collect.pl
index ba61527..8fd1d54 100755
--- a/kewensis-collect.pl
+++ b/kewensis-collect.pl
@@ -5,52 +5,16 @@ use warnings;
use Cwd;
use Data::Dumper;
+use DBI;
-my(%DB,%OWNS,$debugmatch,$D,$key,$owner,$ref,$maxsize,$filename,$doimport,$import_xlate);
+my(%DB,%OWNS,$debugmatch,$D,$key,$owner,$ref,$filename,$doimport,$import_xlate);
-# $maxsize=0x40000;
%DB=();
$D=0;
$debugmatch=0;
$doimport=1;
$import_xlate=1;
-sub rec_to_name
-{
-my( %rec )=@_;
-
- return $rec{"name"};
-}
-
-sub htmlquote
-{
-my($class);
-($_,$class)=@_;
-
- s/&/&/g;
- s/</g;
- s/>/>/g;
- s/\n/&nl;/g;
- s/"/"/g;
- return "$_" if defined $class;
- return $_;
-}
-
-sub format_record
-{
-my( $preinsert,$postinsert,%rec )=@_;
-
- my($r)=$preinsert.htmlquote(rec_to_name(%rec),"name").$postinsert;
- $r.="\n".htmlquote($rec{"Publ. Author"},"author") if (exists($rec{"Publ. Author"}));
- $r.="
\n".htmlquote($rec{"Publication"},"publication") if (exists($rec{"Publication"}));
- $r.="
\n".htmlquote($rec{"Notes"},"notes") if (exists($rec{"Notes"}));
- if ($doimport && exists($rec{"html"})) {
- my($import)="import: [".$rec{"id"}."]";
- $r.="
\n
\n".$rec{"html"}."\n
\n";
- }
- return $r;
-}
-
sub extract_year
{
($_)=@_;
@@ -124,6 +88,7 @@ remarks: .*)?(
Type
)?()?(?:
my($attrsbody)=$11;
($rec{"id"}=$file)=~s#^($id)\.html$#$1#os or failed($file);
$rec{"html"}=~s#$ipniservletwordthree#$3#osg if $import_xlate && exists $rec{"html"};
+ my $score=0; # -: upper, +: lower
while (defined($attrsbody) && $attrsbody=~s%^(?:$attrpat)%%os) {
# nomenclatural synonym: id=$2
# basionym: id=$5
@@ -137,7 +102,12 @@ remarks: .*)?(
Type
)?()?(?:
while (@refs) {
push(@{$rec{"refs"}},$_) if defined ($_=shift @refs);
}
+ $score+=-10 if defined $5; # basionym: id=$5
+ $score+=+10 if defined $13; # Is a basionym: id=$13
+ $score+=+10 if defined $16; # later publication: id=$16
+ $score+=-10 if defined $18; # Is a later publication: id=$18
}
+ $rec{"score"}=$score;
if ($attrsbody) {
failed($file);
return;
@@ -197,122 +167,74 @@ print Data::Dumper->Dump([\%DB],["%DB"]) if $D;
%OWNS=map { $_=>[] } keys(%DB);
for $id (keys %OWNS) {
- my($refid);
- for $refid (@{$DB{$id}{"refs"}}) {
+ next if !exists $OWNS{$id};
+ for my $refid (@{$DB{$id}{"refs"}}) {
next if !exists $OWNS{$refid};
push(@{$OWNS{$id}},$refid,@{$OWNS{$refid}});
delete $OWNS{$refid};
+ print "connected: id=$id,refid=$refid\n" if $D;
}
}
print Data::Dumper->Dump([\%OWNS],["%OWNS"]) if $D;
-#foreach $key (keys %DB) {
-# $DB{$key}{"Publication"}=extract_year($DB{$key}{"Publication"})." ::: ".$DB{$key}{"Publication"}
-# if (exists($DB{$key}{"Publication"}));
-# }
-
foreach $key (keys %OWNS) {
my(@keys)=@{$OWNS{$key}};
delete($OWNS{$key});
unshift(@keys,$key);
- @keys=sort { extract_year($DB{$b}{"Publication"}) <=> extract_year($DB{$a}{"Publication"}); } @keys;
+ @keys=sort { $DB{$a}{"score"} <=> $DB{$b}{"score"}
+ || extract_year($DB{$b}{"Publication"}) <=> extract_year($DB{$a}{"Publication"}); } @keys;
my($pkey)=shift(@keys);
$OWNS{$pkey}=\@keys;
}
-sub print_header
-{
-my($header)=@_;
-
- print OUT
-"
-
-
-Kewensis $header
-
-
-
-
-\n";
-}
+my($db_driver,$db_host,$db_user,$db_pwd,$DB_PWD,$db_name,$db);
+my($tb_tree);
-sub OUT_flush
-{
- print OUT "\n";
- close(OUT);
-}
+$db_driver="mysql";
+$db_host="";
+#$db_user="short";
+#$DB_PWD=$ENV{"HOME"}."/priv/mysql.${db_user}.pwd";
+$db_name="short";
+$tb_tree="kewensis_tree";
-if (defined $maxsize) {
- my($fileno)=-1;
- my($filename,$fileid);
- foreach $owner (sort { $DB{$b}{"name"} cmp $DB{$a}{"name"}; } keys %OWNS) {
- my( $child );
-
- if ($fileno<0 || tell(OUT)>=$maxsize) {
- OUT_flush() if ($fileno>=0);
- $fileid=sprintf("%04d",++$fileno);
- $filename="kew-$fileid.html";
- open(OUT,">$filename") or die "Cannot open \"$filename\": $!";
- print_header("chunk $fileid");
- }
-
- if ($debugmatch) {
- print STDERR "($owner):";
- foreach $child (@{$OWNS{$owner}})
- { print STDERR " ($child)"; }
- print STDERR "\n";
- }
- print OUT format_record("","
\n",%{$DB{$owner}});
- if (@{$OWNS{$owner}}) {
- print OUT "\n";
- foreach $child (@{$OWNS{$owner}})
- { print OUT format_record("","
\n",%{$DB{$child}}); }
- print OUT "
\n";
- }
- print OUT "\n";
- $DB{$owner}{"_filename"}=$filename;
- }
- OUT_flush();
- open(OUT,">kew-index.html") or die "Cannot open \"kew-index.html\": $!";
- print_header("Index");
- }
-else {
- open(OUT,">kewensis.html") or die "Cannot open \"kewensis.html\": $!";
- print_header("Full");
- }
+$db=DBI->connect("DBI:$db_driver:database=$db_name;host=$db_host",$db_user,$db_pwd) or die "Database open fail: $!";
-sub format_href
+sub db_do
{
-my($preinsert,$postinsert,%rec)=@_;
+my( $cmd )=@_;
- return htmlquote(rec_to_name(%rec));
+ $db->do($cmd) or die("SQL command \"$cmd\" failed: $!");
}
-my($printrecref)=(defined $maxsize ? \&format_href : \&format_record );
+eval { &db_do("drop table $tb_tree") };
+
+&db_do("create table $tb_tree ("
+ ."id char(10) not null,"
+ ."family_id char(10) not null,"
+ ."family_order int not null,"
+ ."name varchar(100) not null,"
+ ."Publication text null,"
+ ."Notes text null,"
+ ."html text null"
+ .")");
+
+&db_do("alter table $tb_tree add unique (id)");
+&db_do("alter table $tb_tree add index (name)");
+&db_do("alter table $tb_tree add unique (family_id,family_order)");
+
+my $insert_tb_tree=$db->prepare("insert into $tb_tree (id,family_id,family_order,name,Publication,Notes,html) values (?,?,?,?,?,?,?)")
+ or die "Prepare fail: $!";
foreach $owner (sort { $DB{$b}{"name"} cmp $DB{$a}{"name"}; } keys %OWNS) {
-my( $child );
-
- print OUT "";
- print OUT "" if defined $maxsize;
- print OUT &{$printrecref}("","",%{$DB{$owner}});
- print OUT "" if defined $maxsize;
- print OUT "
";
- if (@{$OWNS{$owner}}) {
- print OUT "\n";
- foreach $child (@{$OWNS{$owner}}) {
- print OUT "- ";
- print OUT &{$printrecref}("","",%{$DB{$child}});
- print OUT "
\n";
- }
- print OUT "
";
+ my @family=@{$OWNS{$owner}};
+ unshift(@family,$owner);
+ my $family_id=$DB{$owner}{"id"};
+ for my $family_order (0..$#family) {
+ my $id=$family[$family_order];
+ print "insert:$id,".$DB{$id}{"name"}."\n" if $D;
+ $insert_tb_tree->execute($id,$family_id,$family_order,
+ $DB{$id}{"name"},$DB{$id}{"Publication"},$DB{$id}{"Notes"},$DB{$id}{"html"}
+ ) or die "SQL insert failure: $!";
}
- print OUT "\n";
}
-OUT_flush();
--
1.8.3.1