mocksetup: /unsafe+/hdd -> /quad
[nethome.git] / src / jaxam
index 596a688..1c0b62c 100755 (executable)
--- a/src/jaxam
+++ b/src/jaxam
@@ -10,29 +10,68 @@ require Tree::Binary::Search;
 use Getopt::Long;
 require Time::Piece::ISO;
 require POSIX;
+use utf8;
+use encoding "utf8";
+binmode STDERR,":utf8";        # only STDIN and STDOUT covered by: use encoding "utf8";
 
 
 our $opt_debug=1;
-my $opt_dict=$ENV{"HOME"}."/priv/japan-lang/jfe_1-9.csv";
-my $opt_log=$opt_dict.".log";
+my $opt_edict="$0.edict";
+my $opt_log;
+my $opt_wrong;
+my $opt_errors_wait_key;
+my $opt_tee;
+my $opt_fast;
+my $opt_cycle=10;
 
 $Getopt::Long::ignorecase=0;
 $Getopt::Long::bundling=1;
 die if !GetOptions(
-               "d|debug+"=>\$opt_debug,
-               "c|dict=s"=>\$opt_dict,
-               "l|log=s" =>\$opt_log,
+               "d|debug+"         =>\$opt_debug,
+               "e|edict=s"        =>\$opt_edict,
+               "l|log=s"          =>\$opt_log,
+               "w|wrong"          =>\$opt_wrong,               # Filter the input lines.
+                 "errors-wait-key"=>\$opt_errors_wait_key,     # Only for -w|--wrong.
+                 "tee!"           =>\$opt_tee,                 # Output all the read lines. Only for -w|--wrong.
+                 "fast!"          =>\$opt_fast,
+                 "cycle=s"        =>\$opt_cycle,
                );
+$opt_log||=$opt_edict.".log";
+
+my %exam=(
+       "kana"   =>{"to"=>"english"},   # "prob"=>1,
+       "english"=>{"to"=>"kana"},      # "prob"=>1,
+       );
+my %ok=(
+       0=>-1,
+       1=>+4,
+       );
 
 my $T;
+my @R;
+my %identify;
+my %kana_to_r;
+my $result_count=0;
 
-sub schedule($)
+sub siglog($)
 {
-       my($r)=@_;
+       my($a)=@_;
+
+       return 0 if !$a;
+       my $sig=($a>0)-($a<0);
+       return $sig*log(abs $a);
+}
 
+sub schedule($$%)
+{
+       my($r,$exam_from,%args)=@_;
+
+       return if $r->{"orig"}=~/\Q(laceno)\E/;
+       my $time=$args{"time"}||time();
        my $sum=0;
        for my $result (@{$r->{"result"}}) {
-               my $age=time()-$result->{"time"};
+               next if $result->{"exam_from"} ne $exam_from;
+               my $age=$time-$result->{"time"};
                $age=0 if $age<0;
                # $age:   0..  big importance
                # $age: big..small importance
@@ -42,18 +81,48 @@ sub schedule($)
                # $sum: small..soon
                # 1/log(2+$age): big..  big importance
                # 1/log(2+$age):   0..small importance
-               $sum+=1/log(2+$age)*($result->{"ok"}?+1:-1);
+               $sum+=1/log(2+$age)*$ok{$result->{"ok"}};
+               if (!$result->{"ok"}) {
+                       my $count_diff=$result_count-$result->{"count"};
+                       $count_diff-=$opt_cycle;
+                       $sum-=siglog($count_diff);
+                       }
                }
+       my $scale=1;
+       if ($r->{"pri"}) {
+               my $abspri=$r->{"pri"};
+               $abspri=1/(-$abspri) if $abspri<0;
+               $abspri=2 if $abspri<2;
+               $scale=log($abspri)*($r->{"pri"}<0 ? -1 : +1);
+       }
        my $arr=[];
        if ($T->exists($sum)) {
                $arr=$T->select($sum)->{"arr"};
                $T->delete($sum);
        }
-       splice @$arr,int(rand(@$arr+1)),0,$r;
+       splice @$arr,int(rand(@$arr+1)),0,{"r"=>$r,"exam_from"=>$exam_from};
        $T->insert($sum=>{"key"=>$sum,"arr"=>$arr});
        print STDERR "; schedule=$sum" if $opt_debug;
 }
 
+sub reschedule(;$)
+{
+       my($opt_debug_force)=@_;
+
+       $T=Tree::Binary::Search->new();
+       $T->useNumericComparison();
+       my $time=time();
+       for my $r (@R) {
+               local $opt_debug=0 if $opt_debug<2;
+               local $opt_debug=$opt_debug_force if defined $opt_debug_force;
+               print STDERR $r->{"kana"}."\t".$r->{"english"} if $opt_debug;
+               for my $exam_from (keys(%exam)) {
+                       schedule $r,$exam_from,"time"=>$time;
+               }
+               print STDERR "\n" if $opt_debug;
+       }
+}
+
 sub best()
 {
        return if $T->isEmpty();
@@ -62,77 +131,144 @@ sub best()
        $T->delete($min);
        my $r=shift @$arr;
        $T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr;
-       print STDERR "min=$min\n" if $opt_debug;
+       if ($opt_debug) {
+               print STDERR "min=$min";
+               print STDERR " of ".(1+@$arr) if @$arr;
+               print STDERR "\n";
+       }
        return $r;
 }
 
-sub identify($)
+sub identify($%)
 {
-       my($r)=@_;
+       my($r,%args)=@_;
 
-       return join " ",map(($r->{$_}||"-"),qw(
-                       written
-                       phonetic
-                       ));
+       return join " ",
+                       map(($r->{$_}||()),qw(
+                                       kanji
+                                       kana
+                                       )),
+                       (!$args{"at"} ? () : '@'.$r->{"line"});
 }
 
-sub result($$)
+sub result($$$%)
 {
-       my($r,$ok)=@_;
-
-       local *LOG_APPEND;
-       open LOG_APPEND,">>".$opt_log or die "append \"$opt_log\": $!";
-       print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())." ok=$ok: ".identify($r)."\n";
-       close LOG_APPEND or die "close \"$opt_log\": $!";
+       my($r,$exam_from,$ok,%args)=@_;
 
+       if ($args{"write"}) {
+               local *LOG_APPEND;
+               open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!";
+               print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())
+                               ." exam_from=$exam_from ok=$ok: ".identify($r)."\n";
+               close LOG_APPEND or die "close \"$opt_log\": $!";
+       }
+       $args{"time"}||=time();
        push @{$r->{"result"}},{
-               "time"=>time(),
+               "time"=>$args{"time"},
+               "exam_from"=>$exam_from,
                "ok"=>$ok,
+               "count"=>$result_count++,
                };
-       schedule $r;
 }
 
-my @R;
-my %keyword;
-$T=Tree::Binary::Search->new();
-$T->useNumericComparison();
-local *DICT;
-open DICT,$opt_dict or die "open \"$opt_dict\": $!";
-while (<DICT>) {
-       chomp;
+my $errors_wait_key_count;
+END {
+       if ($errors_wait_key_count && $opt_errors_wait_key) {
+               print STDERR "Errors occured. Press any key to continue...";
+               <STDIN>;
+       }
+}
+
+sub edict_line_parse(;$)
+{
+       my($s,$line)=@_;
+
+       $s=$_ if !defined $s;
+       $line=$. if !defined $line;
+       chomp $s;
+       local $_=$s;
+       s/;.*//;
+       return if /^\s*$/;
+       #〒 [ゆうびん] /(n) mail/postal service/
+       #Tシャツ /T-shirt/
+       m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$}
+                       or do { warn "Unparsable: $s\n"; $errors_wait_key_count++; return; };
        my $r;
        my $rest;
-       (
-               $r->{"written"},
-               $r->{"phonetic"},
-               $r->{"meaning"},
-               $r->{"category"},
-               $r->{"learned"},
-               $rest,
-               )=split /\t/;
-       next if $r->{"written"} eq "";
-       die "Excessive argument: $rest" if defined $rest;
-       while ($r->{"meaning"}=~s/\s*<([-\w]+)>\s*//) {
-               my $keyword=$1;
-               $r->{"meaning_keyword"}{$keyword}=1;
-               $keyword{$keyword}=1;
+       ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3);
+       if (!$r->{"kana"}) {
+               $r->{"kana"}=$r->{"kanji"};
+               delete $r->{"kanji"};
+       }
+       $rest=~s{^(|.*/)[(]pri([+-]\d+)[)]/}{$r->{"pri"}=$2;$1;}e;
+       while ($rest) {
+               $rest=~s{^\s*([^/]+?)\s*/}{}
+                               or do { warn "Unparsable english: $s\n"; $errors_wait_key_count++; return; };
+               my($english)=($1);
+               push @{$r->{"english"}},$english;
+       }
+       $r->{"line"}=$line;
+       $r->{"orig"}=$s;
+       return $r;
+}
+
+sub to_chk($$)
+{
+       my($s,$type)=@_;
+
+       local $_=$s;
+       $_=lc $_;
+       s/\bto\b//g if $type eq "english";
+       s/\s//g;
+       s/[(][^)]*[)]//g;
+       return $_;
+}
+
+sub edict_read($)
+{
+       my($pathname)=@_;
+
+       local *EDICT;
+       open EDICT,"<:utf8",$pathname or die "open \"$pathname\": $!";
+       my %check=(
+               #"kanji"=>{},   # exists...
+               #"kana"=>{},    # exists...
+               "identify"=>{},
+               );
+       while (<EDICT>) {
+               my $r=edict_line_parse() or next;
+               push @R,$r;
+               push @{$kana_to_r{$r->{"kana"}}},$r;
+               keys(%check);
+               while (my($field,$hashref)=each(%check)) {
+                       my $val;
+                       $val=identify $r if $field eq "identify";
+                       $val||=$r->{$field};
+                       next if $val eq "悪い にくい";     # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net>
+                       my $origvalref=\$hashref->{$val};
+                       warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1)
+                                       if $$origvalref;
+                       $$origvalref=$r;
                }
-       $r->{"meaning"}!~/[<>]/ or warn "Meaning constains invalid characters: ".$r->{"meaning"};
-       push @R,$r;
+       }
+       close EDICT or die "close \"$pathname\": $!";
+       %identify=map((identify($_)=>$_),@R);
 }
-close DICT or die "close \"$opt_dict\": $!";
 
-my %identify=map((identify($_)=>$_),@R);
+sub log_read($)
+{
+       my($pathname)=@_;
 
-local *LOG_READ;
-if (!open LOG_READ,$opt_log) {
-       warn "open \"$opt_log\": $!";
-} else {
+       local *LOG_READ;
+       if (!open LOG_READ,"<:utf8",$pathname) {
+               warn "open \"$pathname\": $!";
+               return;
+       }
        while (<LOG_READ>) {
                chomp;
                # 2002-04-25T21:17:52+0900 ok=1: identify($r)
-               my($year, $month, $day,   $hour,  $minute,$second,$zonepm,$zonehour,$zoneminute,$ok,$identify)=
-               /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) ok=(\d+): (.+)$/
+               my($year, $month, $day,   $hour,  $minute,$second,$zonepm,$zonehour,$zoneminute,$exam_from,$ok,$identify)=
+               /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) exam_from=(\w+) ok=(\d+): (.+)$/
                                or do { warn "Unrecognized line: $_\n"; next; };
                $year-=1900;
                $month--;
@@ -141,76 +277,122 @@ if (!open LOG_READ,$opt_log) {
                my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute);
                my $r=$identify{$identify}
                                or do { warn "Word not found from line: $_\n"; next; };
-               push @{$r->{"result"}},{
-                       "time"=>$gmtime,
-                       "ok"=>$ok,
-                       };
+               result $r,$exam_from,$ok,"write"=>0,"time"=>$gmtime;
        }
-       close LOG_READ or die "close \"$opt_log\": $!";
+       close LOG_READ or die "close \"$pathname\": $!";
 }
 
-for my $r (@R) {
-       local $opt_debug=0 if $opt_debug<2;
-       print STDERR $r->{"phonetic"}."\t".$r->{"meaning"} if $opt_debug;
-       schedule $r;
-       print STDERR "\n" if $opt_debug;
+sub wrong_read()
+{
+       while (<>) {
+               print if $opt_tee;
+               chomp;
+               my $w=edict_line_parse() or next;
+               my $r=$identify{identify $w};
+               do { warn "'wrong word' not found for the line: $_\n"; $errors_wait_key_count++; next; } if !$r;
+               result $r,"kana",0,"write"=>1;
+       }
 }
 
-my %exam=(
-       "phonetic"=>"meaning",
-       "meaning"=>"phonetic",
-       );
-my @exam=keys(%exam);
-my $r_last;
-my $exam_from_last;
-for (;;) {
-       my $r=best();
-       # Do not change $exam_from if $r remained the same:
-       my $exam_from=$exam_from_last;
-       if (!$r_last || $r ne $r_last) {
-               $exam_from=$exam[int rand @exam];
+sub word_out($)
+{
+       my($s)=@_;
+
+       return $s if !ref $s;
+       return join("",map("$_/",@$s));
+}
+
+sub exam()
+{
+       my @exam=keys(%exam);
+       for (;;) {
+               reschedule() if !$opt_fast;
+               my $besthashref=best();
+               my $r=$besthashref->{"r"};
+               my $exam_from=$besthashref->{"exam_from"};
+               my $exam_to=$exam{$exam_from}{"to"};
+               print word_out($r->{$exam_from}).": ";
+               my $got=<STDIN>;
+               chomp $got;
+               my $want=$r->{$exam_to};
+               my $got_chk=to_chk($got,$exam_to);
+               my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want));
+               my $ok;
+               my $substr;
+               for my $want_chk (@want_chk) {
+                       next if !$want_chk;     # discard patterns like: /(P)/
+                       $ok=1 if $got_chk eq $want_chk;
+                       $substr=1 if $want_chk=~/\Q$got_chk\E/;
                }
-       my $exam_to  =$exam{$exam_from};
-       print $r->{$exam_from}.": ";
-       my $got=<STDIN>;
-       chomp $got;
-       my $want=$r->{$exam_to};
-       print "-------> $want\n";
-       (my $got_chk =lc $got )=~s/\s//g;
-       (my $want_chk=lc $want)=~s/\s//g;
-       $got_chk=~s/\bto\b//g;
-       $want_chk=~s/\bto\b//g;
-       $want_chk=~s/[(][^)]*[)]//g;
-       warn "Parenthesis ('(',')') not supported in the user input: $got\n" if $got=~/[()]/;
-       warn "Comma (',') not well supported in the user input: $got\n" if $got=~/,/;
-       my $ok;
-       $ok=1 if $got_chk eq $want_chk; # incl. commas
-       for (split /,/,$want_chk) {
-               $ok=1 if $got_chk eq $_;
-       }
-       if (!$ok) {
-               if ($exam_to eq "meaning" && $got=~/\S/ && $want=~/\Q$got\E/) {
-                       my $key;
-                       do {
-                               print STDERR "Is your answer correct? [y/n] ";
-                               sub restore
-                               {
-                                       Term::ReadKey::ReadMode(0);
+               my $aliased;
+               if (!$ok && $exam_from eq "english" && $exam_to eq "kana"
+                               && (my $arrref=$kana_to_r{$got})) {
+                       my %chk_english_from=map((to_chk($_,"english")=>1),@{$r->{"english"}});
+                       my $intended;
+                       for my $kana_alias (@$arrref) {
+                               for my $kana_alias_english (@{$kana_alias->{"english"}}) {
+                                       my $chk_english=to_chk $kana_alias_english,"english" or next;
+                                       next if !$chk_english_from{$chk_english};
+                                       print "intended ----> ".$r->{"orig"}."\n" if !$intended++;
+                                       print "OK if alias -> ".$kana_alias->{"orig"}."\n";
+                                       $ok=1;
+                                       $aliased=1;
                                }
-                               local $SIG{"__DIE__"}=\&restore;
-                               Term::ReadKey::ReadMode(4);
-                               $key=Term::ReadKey::ReadKey(0);
-                               print STDERR "\n";      # no echo
-                               restore();
-                               $ok=1 if $key eq "y";
-                               $ok=0 if $key eq "n";
-                       } while (!defined $ok);
+                       }
                }
+               print "-------------> ".word_out($want)."\n" if !$aliased;
+               if (!$ok) {
+                       if ($exam_to eq "english" && $got=~/\S/ && $substr) {
+                               my $key;
+                               do {
+                                       print STDERR "Is your answer correct? [y/n] ";
+                                       sub restore
+                                       {
+                                               Term::ReadKey::ReadMode(0);
+                                       }
+                                       local $SIG{"__DIE__"}=\&restore;
+                                       Term::ReadKey::ReadMode(4);
+                                       $key=Term::ReadKey::ReadKey(0);
+                                       print STDERR "\n";      # no echo
+                                       restore();
+                                       die if $key eq "\x03";  # ctrl-c
+                                       $ok=1 if $key eq "y";
+                                       $ok=0 if $key eq "n";
+                               } while (!defined $ok);
+                       }
+               }
+               $ok||=0;
+               if (!$aliased) {
+                       result $r,$exam_from,$ok,"write"=>1;
+                       print STDERR "result: ".($ok ? "ok" : "WRONG");
+                       schedule $r,$exam_from;
+                       print "\n";
+               }
+               print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok;
        }
-       $ok||=0;
-       print STDERR "result: ".($ok ? "ok" : "WRONG");
-       result($r,$ok);
-       print "\n";
-       $r_last=$r;
-       $exam_from_last=$exam_from;
 }
+
+# FIXME: balanced $exam_from:
+# my $exam_prob_sum=0;
+# $exam_prob_sum+=$_->{"prob"} for values(%exam);
+# {
+#      my $rand=int rand $exam_prob_sum;
+#      my $sum=0;
+#      keys(%exam);
+#      while (my($from,$hashref)=each(%exam)) {
+#              $sum+=$hashref->{"prob"};
+#              next if $rand>=$sum;
+#              $exam_from=$from;
+#              last;
+#      }
+#      die "INTERNAL" if !$exam_from;
+# }
+
+edict_read $opt_edict;
+
+do { wrong_read(); exit; } if $opt_wrong;
+
+log_read $opt_log;
+reschedule $opt_debug>=2;      # init
+
+exam();