+make
[nethome.git] / src / jaxam
index 362cecd..1c0b62c 100755 (executable)
--- a/src/jaxam
+++ b/src/jaxam
@@ -16,32 +16,57 @@ binmode STDERR,":utf8";     # only STDIN and STDOUT covered by: use encoding "utf8";
 
 
 our $opt_debug=1;
-my $opt_edict="jaxam.edict";
+my $opt_edict="$0.edict";
 my $opt_log;
+my $opt_wrong;
+my $opt_errors_wait_key;
+my $opt_tee;
 my $opt_fast;
+my $opt_cycle=10;
 
 $Getopt::Long::ignorecase=0;
 $Getopt::Long::bundling=1;
 die if !GetOptions(
-               "d|debug+" =>\$opt_debug,
-               "e|edict=s"=>\$opt_edict,
-               "l|log=s"  =>\$opt_log,
-                 "fast!"  =>\$opt_fast,
+               "d|debug+"         =>\$opt_debug,
+               "e|edict=s"        =>\$opt_edict,
+               "l|log=s"          =>\$opt_log,
+               "w|wrong"          =>\$opt_wrong,               # Filter the input lines.
+                 "errors-wait-key"=>\$opt_errors_wait_key,     # Only for -w|--wrong.
+                 "tee!"           =>\$opt_tee,                 # Output all the read lines. Only for -w|--wrong.
+                 "fast!"          =>\$opt_fast,
+                 "cycle=s"        =>\$opt_cycle,
                );
 $opt_log||=$opt_edict.".log";
 
 my %exam=(
-       "kana"   =>{"prob"=>1,"to"=>"english"},
-       "english"=>{"prob"=>1,"to"=>"kana"},
+       "kana"   =>{"to"=>"english"},   # "prob"=>1,
+       "english"=>{"to"=>"kana"},      # "prob"=>1,
+       );
+my %ok=(
+       0=>-1,
+       1=>+4,
        );
 
 my $T;
 my @R;
+my %identify;
+my %kana_to_r;
+my $result_count=0;
+
+sub siglog($)
+{
+       my($a)=@_;
+
+       return 0 if !$a;
+       my $sig=($a>0)-($a<0);
+       return $sig*log(abs $a);
+}
 
 sub schedule($$%)
 {
        my($r,$exam_from,%args)=@_;
 
+       return if $r->{"orig"}=~/\Q(laceno)\E/;
        my $time=$args{"time"}||time();
        my $sum=0;
        for my $result (@{$r->{"result"}}) {
@@ -56,8 +81,20 @@ sub schedule($$%)
                # $sum: small..soon
                # 1/log(2+$age): big..  big importance
                # 1/log(2+$age):   0..small importance
-               $sum+=1/log(2+$age)*($result->{"ok"}?+1:-1);
+               $sum+=1/log(2+$age)*$ok{$result->{"ok"}};
+               if (!$result->{"ok"}) {
+                       my $count_diff=$result_count-$result->{"count"};
+                       $count_diff-=$opt_cycle;
+                       $sum-=siglog($count_diff);
+                       }
                }
+       my $scale=1;
+       if ($r->{"pri"}) {
+               my $abspri=$r->{"pri"};
+               $abspri=1/(-$abspri) if $abspri<0;
+               $abspri=2 if $abspri<2;
+               $scale=log($abspri)*($r->{"pri"}<0 ? -1 : +1);
+       }
        my $arr=[];
        if ($T->exists($sum)) {
                $arr=$T->select($sum)->{"arr"};
@@ -94,7 +131,11 @@ sub best()
        $T->delete($min);
        my $r=shift @$arr;
        $T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr;
-       print STDERR "min=$min\n" if $opt_debug;
+       if ($opt_debug) {
+               print STDERR "min=$min";
+               print STDERR " of ".(1+@$arr) if @$arr;
+               print STDERR "\n";
+       }
        return $r;
 }
 
@@ -110,39 +151,48 @@ sub identify($%)
                        (!$args{"at"} ? () : '@'.$r->{"line"});
 }
 
-sub result($$$)
+sub result($$$%)
 {
-       my($r,$exam_from,$ok)=@_;
-
-       local *LOG_APPEND;
-       open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!";
-       print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())
-                       ." exam_from=$exam_from ok=$ok: ".identify($r)."\n";
-       close LOG_APPEND or die "close \"$opt_log\": $!";
+       my($r,$exam_from,$ok,%args)=@_;
 
+       if ($args{"write"}) {
+               local *LOG_APPEND;
+               open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!";
+               print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())
+                               ." exam_from=$exam_from ok=$ok: ".identify($r)."\n";
+               close LOG_APPEND or die "close \"$opt_log\": $!";
+       }
+       $args{"time"}||=time();
        push @{$r->{"result"}},{
-               "time"=>time(),
+               "time"=>$args{"time"},
                "exam_from"=>$exam_from,
                "ok"=>$ok,
+               "count"=>$result_count++,
                };
-       schedule $r,$exam_from;
 }
 
-local *EDICT;
-open EDICT,"<:utf8",$opt_edict or die "open \"$opt_edict\": $!";
-my %check=(
-       #"kanji"=>{},           # exists...
-       #"kana"=>{},    # exists...
-       "identify"=>{},
-       );
-LINE:
-while (<EDICT>) {
-       chomp;
-       next if /^\t [^\t]*$/;
-       next if /^\s*$/;
+my $errors_wait_key_count;
+END {
+       if ($errors_wait_key_count && $opt_errors_wait_key) {
+               print STDERR "Errors occured. Press any key to continue...";
+               <STDIN>;
+       }
+}
+
+sub edict_line_parse(;$)
+{
+       my($s,$line)=@_;
+
+       $s=$_ if !defined $s;
+       $line=$. if !defined $line;
+       chomp $s;
+       local $_=$s;
+       s/;.*//;
+       return if /^\s*$/;
        #〒 [ゆうびん] /(n) mail/postal service/
        #Tシャツ /T-shirt/
-       m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$} or do { warn "Unparsable: $_\n"; next LINE; };
+       m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$}
+                       or do { warn "Unparsable: $s\n"; $errors_wait_key_count++; return; };
        my $r;
        my $rest;
        ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3);
@@ -150,33 +200,70 @@ while (<EDICT>) {
                $r->{"kana"}=$r->{"kanji"};
                delete $r->{"kanji"};
        }
+       $rest=~s{^(|.*/)[(]pri([+-]\d+)[)]/}{$r->{"pri"}=$2;$1;}e;
        while ($rest) {
-               $rest=~s{^\s*([^/]+?)\s*/}{} or do { warn "Unparsable english: $_\n"; next LINE; };
+               $rest=~s{^\s*([^/]+?)\s*/}{}
+                               or do { warn "Unparsable english: $s\n"; $errors_wait_key_count++; return; };
                my($english)=($1);
                push @{$r->{"english"}},$english;
        }
-       $r->{"line"}=$.;
-       push @R,$r;
-       keys(%check);
-       while (my($field,$hashref)=each(%check)) {
-               my $val;
-               $val=identify $r if $field eq "identify";
-               $val||=$r->{$field};
-               next if $val eq "悪い にくい";     # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net>
-               my $origvalref=\$hashref->{$val};
-               warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1)
-                               if $$origvalref;
-               $$origvalref=$r;
+       $r->{"line"}=$line;
+       $r->{"orig"}=$s;
+       return $r;
+}
+
+sub to_chk($$)
+{
+       my($s,$type)=@_;
+
+       local $_=$s;
+       $_=lc $_;
+       s/\bto\b//g if $type eq "english";
+       s/\s//g;
+       s/[(][^)]*[)]//g;
+       return $_;
+}
+
+sub edict_read($)
+{
+       my($pathname)=@_;
+
+       local *EDICT;
+       open EDICT,"<:utf8",$pathname or die "open \"$pathname\": $!";
+       my %check=(
+               #"kanji"=>{},   # exists...
+               #"kana"=>{},    # exists...
+               "identify"=>{},
+               );
+       while (<EDICT>) {
+               my $r=edict_line_parse() or next;
+               push @R,$r;
+               push @{$kana_to_r{$r->{"kana"}}},$r;
+               keys(%check);
+               while (my($field,$hashref)=each(%check)) {
+                       my $val;
+                       $val=identify $r if $field eq "identify";
+                       $val||=$r->{$field};
+                       next if $val eq "悪い にくい";     # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net>
+                       my $origvalref=\$hashref->{$val};
+                       warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1)
+                                       if $$origvalref;
+                       $$origvalref=$r;
+               }
        }
+       close EDICT or die "close \"$pathname\": $!";
+       %identify=map((identify($_)=>$_),@R);
 }
-close EDICT or die "close \"$opt_edict\": $!";
 
-my %identify=map((identify($_)=>$_),@R);
+sub log_read($)
+{
+       my($pathname)=@_;
 
-local *LOG_READ;
-if (!open LOG_READ,"<:utf8",$opt_log) {
-       warn "open \"$opt_log\": $!";
-} else {
+       local *LOG_READ;
+       if (!open LOG_READ,"<:utf8",$pathname) {
+               warn "open \"$pathname\": $!";
+               return;
+       }
        while (<LOG_READ>) {
                chomp;
                # 2002-04-25T21:17:52+0900 ok=1: identify($r)
@@ -190,27 +277,21 @@ if (!open LOG_READ,"<:utf8",$opt_log) {
                my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute);
                my $r=$identify{$identify}
                                or do { warn "Word not found from line: $_\n"; next; };
-               push @{$r->{"result"}},{
-                       "time"=>$gmtime,
-                       "exam_from"=>$exam_from,
-                       "ok"=>$ok,
-                       };
+               result $r,$exam_from,$ok,"write"=>0,"time"=>$gmtime;
        }
-       close LOG_READ or die "close \"$opt_log\": $!";
+       close LOG_READ or die "close \"$pathname\": $!";
 }
 
-reschedule $opt_debug>=2;      # init
-
-sub to_chk($$)
+sub wrong_read()
 {
-       my($s,$type)=@_;
-
-       local $_=$s;
-       $_=lc $_;
-       s/\bto\b//g if $type eq "english";
-       s/\s//g;
-       s/[(][^)]*[)]//g;
-       return $_;
+       while (<>) {
+               print if $opt_tee;
+               chomp;
+               my $w=edict_line_parse() or next;
+               my $r=$identify{identify $w};
+               do { warn "'wrong word' not found for the line: $_\n"; $errors_wait_key_count++; next; } if !$r;
+               result $r,"kana",0,"write"=>1;
+       }
 }
 
 sub word_out($)
@@ -221,6 +302,76 @@ sub word_out($)
        return join("",map("$_/",@$s));
 }
 
+sub exam()
+{
+       my @exam=keys(%exam);
+       for (;;) {
+               reschedule() if !$opt_fast;
+               my $besthashref=best();
+               my $r=$besthashref->{"r"};
+               my $exam_from=$besthashref->{"exam_from"};
+               my $exam_to=$exam{$exam_from}{"to"};
+               print word_out($r->{$exam_from}).": ";
+               my $got=<STDIN>;
+               chomp $got;
+               my $want=$r->{$exam_to};
+               my $got_chk=to_chk($got,$exam_to);
+               my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want));
+               my $ok;
+               my $substr;
+               for my $want_chk (@want_chk) {
+                       next if !$want_chk;     # discard patterns like: /(P)/
+                       $ok=1 if $got_chk eq $want_chk;
+                       $substr=1 if $want_chk=~/\Q$got_chk\E/;
+               }
+               my $aliased;
+               if (!$ok && $exam_from eq "english" && $exam_to eq "kana"
+                               && (my $arrref=$kana_to_r{$got})) {
+                       my %chk_english_from=map((to_chk($_,"english")=>1),@{$r->{"english"}});
+                       my $intended;
+                       for my $kana_alias (@$arrref) {
+                               for my $kana_alias_english (@{$kana_alias->{"english"}}) {
+                                       my $chk_english=to_chk $kana_alias_english,"english" or next;
+                                       next if !$chk_english_from{$chk_english};
+                                       print "intended ----> ".$r->{"orig"}."\n" if !$intended++;
+                                       print "OK if alias -> ".$kana_alias->{"orig"}."\n";
+                                       $ok=1;
+                                       $aliased=1;
+                               }
+                       }
+               }
+               print "-------------> ".word_out($want)."\n" if !$aliased;
+               if (!$ok) {
+                       if ($exam_to eq "english" && $got=~/\S/ && $substr) {
+                               my $key;
+                               do {
+                                       print STDERR "Is your answer correct? [y/n] ";
+                                       sub restore
+                                       {
+                                               Term::ReadKey::ReadMode(0);
+                                       }
+                                       local $SIG{"__DIE__"}=\&restore;
+                                       Term::ReadKey::ReadMode(4);
+                                       $key=Term::ReadKey::ReadKey(0);
+                                       print STDERR "\n";      # no echo
+                                       restore();
+                                       die if $key eq "\x03";  # ctrl-c
+                                       $ok=1 if $key eq "y";
+                                       $ok=0 if $key eq "n";
+                               } while (!defined $ok);
+                       }
+               }
+               $ok||=0;
+               if (!$aliased) {
+                       result $r,$exam_from,$ok,"write"=>1;
+                       print STDERR "result: ".($ok ? "ok" : "WRONG");
+                       schedule $r,$exam_from;
+                       print "\n";
+               }
+               print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok;
+       }
+}
+
 # FIXME: balanced $exam_from:
 # my $exam_prob_sum=0;
 # $exam_prob_sum+=$_->{"prob"} for values(%exam);
@@ -236,50 +387,12 @@ sub word_out($)
 #      }
 #      die "INTERNAL" if !$exam_from;
 # }
-my @exam=keys(%exam);
-for (;;) {
-       reschedule() if !$opt_fast;
-       my $besthashref=best();
-       my $r=$besthashref->{"r"};
-       my $exam_from=$besthashref->{"exam_from"};
-       my $exam_to=$exam{$exam_from}{"to"};
-       print word_out($r->{$exam_from}).": ";
-       my $got=<STDIN>;
-       chomp $got;
-       my $want=$r->{$exam_to};
-       print "-------> ".word_out($want)."\n";
-       my $got_chk=to_chk($got,$exam_to);
-       my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want));
-       my $ok;
-       my $substr;
-       for my $want_chk (@want_chk) {
-               next if !$want_chk;     # discard patterns like: /(P)/
-               $ok=1 if $got_chk eq $want_chk;
-               $substr=1 if $want_chk=~/\Q$got_chk\E/;
-       }
-       if (!$ok) {
-               if ($exam_to eq "english" && $got=~/\S/ && $substr) {
-                       my $key;
-                       do {
-                               print STDERR "Is your answer correct? [y/n] ";
-                               sub restore
-                               {
-                                       Term::ReadKey::ReadMode(0);
-                               }
-                               local $SIG{"__DIE__"}=\&restore;
-                               Term::ReadKey::ReadMode(4);
-                               $key=Term::ReadKey::ReadKey(0);
-                               print STDERR "\n";      # no echo
-                               restore();
-                               die if $key eq "\x03";  # ctrl-c
-                               $ok=1 if $key eq "y";
-                               $ok=0 if $key eq "n";
-                       } while (!defined $ok);
-               }
-       }
-       $ok||=0;
-       print STDERR "result: ".($ok ? "ok" : "WRONG");
-       result $r,$exam_from,$ok;
-       print "\n";
-       print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok;
-}
+
+edict_read $opt_edict;
+
+do { wrong_read(); exit; } if $opt_wrong;
+
+log_read $opt_log;
+reschedule $opt_debug>=2;      # init
+
+exam();