Resolve amiguous english->kana translations.
authorshort <>
Sun, 22 May 2005 09:36:41 +0000 (09:36 +0000)
committershort <>
Sun, 22 May 2005 09:36:41 +0000 (09:36 +0000)
Other fixes.

src/jaxam

index cce6e85..261428c 100755 (executable)
--- a/src/jaxam
+++ b/src/jaxam
@@ -39,13 +39,18 @@ die if !GetOptions(
 $opt_log||=$opt_edict.".log";
 
 my %exam=(
-       "kana"   =>{"prob"=>1,"to"=>"english"},
-       "english"=>{"prob"=>1,"to"=>"kana"},
+       "kana"   =>{"to"=>"english"},   # "prob"=>1,
+       "english"=>{"to"=>"kana"},      # "prob"=>1,
+       );
+my %ok=(
+       0=>-1,
+       1=>+4,
        );
 
 my $T;
 my @R;
 my %identify;
+my %kana_to_r;
 
 sub schedule($$%)
 {
@@ -65,7 +70,7 @@ sub schedule($$%)
                # $sum: small..soon
                # 1/log(2+$age): big..  big importance
                # 1/log(2+$age):   0..small importance
-               $sum+=1/log(2+$age)*($result->{"ok"}?+1:-1);
+               $sum+=1/log(2+$age)*$ok{$result->{"ok"}};
                }
        my $arr=[];
        if ($T->exists($sum)) {
@@ -103,7 +108,11 @@ sub best()
        $T->delete($min);
        my $r=shift @$arr;
        $T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr;
-       print STDERR "min=$min\n" if $opt_debug;
+       if ($opt_debug) {
+               print STDERR "min=$min";
+               print STDERR " of ".(1+@$arr) if @$arr;
+               print STDERR "\n";
+       }
        return $r;
 }
 
@@ -119,22 +128,23 @@ sub identify($%)
                        (!$args{"at"} ? () : '@'.$r->{"line"});
 }
 
-sub result($$$)
+sub result($$$%)
 {
-       my($r,$exam_from,$ok)=@_;
-
-       local *LOG_APPEND;
-       open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!";
-       print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())
-                       ." exam_from=$exam_from ok=$ok: ".identify($r)."\n";
-       close LOG_APPEND or die "close \"$opt_log\": $!";
+       my($r,$exam_from,$ok,%args)=@_;
+
+       if ($args{"write"}) {
+               local *LOG_APPEND;
+               open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!";
+               print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())
+                               ." exam_from=$exam_from ok=$ok: ".identify($r)."\n";
+               close LOG_APPEND or die "close \"$opt_log\": $!";
+       }
 
        push @{$r->{"result"}},{
                "time"=>time(),
                "exam_from"=>$exam_from,
                "ok"=>$ok,
                };
-       schedule $r,$exam_from if $T;
 }
 
 my $errors_wait_key_count;
@@ -151,14 +161,14 @@ sub edict_line_parse(;$)
 
        $s=$_ if !defined $s;
        $line=$. if !defined $s;
+       chomp $s;
        local $_=$s;
-       chomp;
        s/;.*//;
        return if /^\s*$/;
        #〒 [ゆうびん] /(n) mail/postal service/
        #Tシャツ /T-shirt/
        m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$}
-                       or do { warn "Unparsable: $_\n"; $errors_wait_key_count++; return; };
+                       or do { warn "Unparsable: $s\n"; $errors_wait_key_count++; return; };
        my $r;
        my $rest;
        ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3);
@@ -168,14 +178,27 @@ sub edict_line_parse(;$)
        }
        while ($rest) {
                $rest=~s{^\s*([^/]+?)\s*/}{}
-                               or do { warn "Unparsable english: $_\n"; $errors_wait_key_count++; return; };
+                               or do { warn "Unparsable english: $s\n"; $errors_wait_key_count++; return; };
                my($english)=($1);
                push @{$r->{"english"}},$english;
        }
-       $r->{"line"}=$.;
+       $r->{"line"}=$line;
+       $r->{"orig"}=$s;
        return $r;
 }
 
+sub to_chk($$)
+{
+       my($s,$type)=@_;
+
+       local $_=$s;
+       $_=lc $_;
+       s/\bto\b//g if $type eq "english";
+       s/\s//g;
+       s/[(][^)]*[)]//g;
+       return $_;
+}
+
 sub edict_read($)
 {
        my($pathname)=@_;
@@ -187,10 +210,11 @@ sub edict_read($)
                #"kana"=>{},    # exists...
                "identify"=>{},
                );
-       LINE:
        while (<EDICT>) {
                my $r=edict_line_parse() or next;
+               next if /\Q(laceno)\E/;
                push @R,$r;
+               push @{$kana_to_r{$r->{"kana"}}},$r;
                keys(%check);
                while (my($field,$hashref)=each(%check)) {
                        my $val;
@@ -229,11 +253,7 @@ sub log_read($)
                my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute);
                my $r=$identify{$identify}
                                or do { warn "Word not found from line: $_\n"; next; };
-               push @{$r->{"result"}},{
-                       "time"=>$gmtime,
-                       "exam_from"=>$exam_from,
-                       "ok"=>$ok,
-                       };
+               result $r,$exam_from,$ok,"write"=>0;
        }
        close LOG_READ or die "close \"$pathname\": $!";
 }
@@ -246,22 +266,10 @@ sub wrong_read()
                my $w=edict_line_parse() or next;
                my $r=$identify{identify $w};
                do { warn "'wrong word' not found for the line: $_\n"; $errors_wait_key_count++; next; } if !$r;
-               result $r,"kana",0;
+               result $r,"kana",0,"write"=>1;
        }
 }
 
-sub to_chk($$)
-{
-       my($s,$type)=@_;
-
-       local $_=$s;
-       $_=lc $_;
-       s/\bto\b//g if $type eq "english";
-       s/\s//g;
-       s/[(][^)]*[)]//g;
-       return $_;
-}
-
 sub word_out($)
 {
        my($s)=@_;
@@ -283,7 +291,6 @@ sub exam()
                my $got=<STDIN>;
                chomp $got;
                my $want=$r->{$exam_to};
-               print "-------> ".word_out($want)."\n";
                my $got_chk=to_chk($got,$exam_to);
                my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want));
                my $ok;
@@ -293,6 +300,23 @@ sub exam()
                        $ok=1 if $got_chk eq $want_chk;
                        $substr=1 if $want_chk=~/\Q$got_chk\E/;
                }
+               my $aliased;
+               if (!$ok && $exam_from eq "english" && $exam_to eq "kana"
+                               && (my $arrref=$kana_to_r{$got})) {
+                       my %chk_english_from=map((to_chk($_,"english")=>1),@{$r->{"english"}});
+                       my $intended;
+                       for my $kana_alias (@$arrref) {
+                               for my $kana_alias_english (@{$kana_alias->{"english"}}) {
+                                       my $chk_english=to_chk $kana_alias_english,"english" or next;
+                                       next if !$chk_english_from{$chk_english};
+                                       print "intended ----> ".$r->{"orig"}."\n" if !$intended++;
+                                       print "OK if alias -> ".$kana_alias->{"orig"}."\n";
+                                       $ok=1;
+                                       $aliased=1;
+                               }
+                       }
+               }
+               print "-------------> ".word_out($want)."\n" if !$aliased;
                if (!$ok) {
                        if ($exam_to eq "english" && $got=~/\S/ && $substr) {
                                my $key;
@@ -314,9 +338,12 @@ sub exam()
                        }
                }
                $ok||=0;
-               print STDERR "result: ".($ok ? "ok" : "WRONG");
-               result $r,$exam_from,$ok;
-               print "\n";
+               if (!$aliased) {
+                       result $r,$exam_from,$ok,"write"=>1;
+                       print STDERR "result: ".($ok ? "ok" : "WRONG");
+                       schedule $r,$exam_from;
+                       print "\n";
+               }
                print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok;
        }
 }