X-Git-Url: https://git.jankratochvil.net/?p=nethome.git;a=blobdiff_plain;f=src%2Fjaxam;h=1c0b62caff92fd590a17d93606087f53ebc66293;hp=597b8ad521173669e00b47318697124d53cd1614;hb=2f85edc8b571e495efb641a13fe6f3ace60699bd;hpb=f45778db720e89cee74c6aaece62295b2e5c71cb diff --git a/src/jaxam b/src/jaxam index 597b8ad..1c0b62c 100755 --- a/src/jaxam +++ b/src/jaxam @@ -16,32 +16,57 @@ binmode STDERR,":utf8"; # only STDIN and STDOUT covered by: use encoding "utf8"; our $opt_debug=1; -my $opt_edict="jaxam.edict"; +my $opt_edict="$0.edict"; my $opt_log; +my $opt_wrong; +my $opt_errors_wait_key; +my $opt_tee; my $opt_fast; +my $opt_cycle=10; $Getopt::Long::ignorecase=0; $Getopt::Long::bundling=1; die if !GetOptions( - "d|debug+" =>\$opt_debug, - "e|edict=s"=>\$opt_edict, - "l|log=s" =>\$opt_log, - "fast!" =>\$opt_fast, + "d|debug+" =>\$opt_debug, + "e|edict=s" =>\$opt_edict, + "l|log=s" =>\$opt_log, + "w|wrong" =>\$opt_wrong, # Filter the input lines. + "errors-wait-key"=>\$opt_errors_wait_key, # Only for -w|--wrong. + "tee!" =>\$opt_tee, # Output all the read lines. Only for -w|--wrong. + "fast!" =>\$opt_fast, + "cycle=s" =>\$opt_cycle, ); $opt_log||=$opt_edict.".log"; my %exam=( - "kana" =>{"prob"=>1,"to"=>"english"}, - "english"=>{"prob"=>1,"to"=>"kana"}, + "kana" =>{"to"=>"english"}, # "prob"=>1, + "english"=>{"to"=>"kana"}, # "prob"=>1, + ); +my %ok=( + 0=>-1, + 1=>+4, ); my $T; my @R; +my %identify; +my %kana_to_r; +my $result_count=0; + +sub siglog($) +{ + my($a)=@_; + + return 0 if !$a; + my $sig=($a>0)-($a<0); + return $sig*log(abs $a); +} sub schedule($$%) { my($r,$exam_from,%args)=@_; + return if $r->{"orig"}=~/\Q(laceno)\E/; my $time=$args{"time"}||time(); my $sum=0; for my $result (@{$r->{"result"}}) { @@ -56,8 +81,20 @@ sub schedule($$%) # $sum: small..soon # 1/log(2+$age): big.. big importance # 1/log(2+$age): 0..small importance - $sum+=1/log(2+$age)*($result->{"ok"}?+1:-1); + $sum+=1/log(2+$age)*$ok{$result->{"ok"}}; + if (!$result->{"ok"}) { + my $count_diff=$result_count-$result->{"count"}; + $count_diff-=$opt_cycle; + $sum-=siglog($count_diff); + } } + my $scale=1; + if ($r->{"pri"}) { + my $abspri=$r->{"pri"}; + $abspri=1/(-$abspri) if $abspri<0; + $abspri=2 if $abspri<2; + $scale=log($abspri)*($r->{"pri"}<0 ? -1 : +1); + } my $arr=[]; if ($T->exists($sum)) { $arr=$T->select($sum)->{"arr"}; @@ -94,7 +131,11 @@ sub best() $T->delete($min); my $r=shift @$arr; $T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr; - print STDERR "min=$min\n" if $opt_debug; + if ($opt_debug) { + print STDERR "min=$min"; + print STDERR " of ".(1+@$arr) if @$arr; + print STDERR "\n"; + } return $r; } @@ -110,39 +151,48 @@ sub identify($%) (!$args{"at"} ? () : '@'.$r->{"line"}); } -sub result($$$) +sub result($$$%) { - my($r,$exam_from,$ok)=@_; - - local *LOG_APPEND; - open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!"; - print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime()) - ." exam_from=$exam_from ok=$ok: ".identify($r)."\n"; - close LOG_APPEND or die "close \"$opt_log\": $!"; + my($r,$exam_from,$ok,%args)=@_; + if ($args{"write"}) { + local *LOG_APPEND; + open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!"; + print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime()) + ." exam_from=$exam_from ok=$ok: ".identify($r)."\n"; + close LOG_APPEND or die "close \"$opt_log\": $!"; + } + $args{"time"}||=time(); push @{$r->{"result"}},{ - "time"=>time(), + "time"=>$args{"time"}, "exam_from"=>$exam_from, "ok"=>$ok, + "count"=>$result_count++, }; - schedule $r,$exam_from; } -local *EDICT; -open EDICT,"<:utf8",$opt_edict or die "open \"$opt_edict\": $!"; -my %check=( - #"kanji"=>{}, # exists... - #"kana"=>{}, # exists... - "identify"=>{}, - ); -LINE: -while () { - chomp; +my $errors_wait_key_count; +END { + if ($errors_wait_key_count && $opt_errors_wait_key) { + print STDERR "Errors occured. Press any key to continue..."; + ; + } +} + +sub edict_line_parse(;$) +{ + my($s,$line)=@_; + + $s=$_ if !defined $s; + $line=$. if !defined $line; + chomp $s; + local $_=$s; s/;.*//; - next if /^\s*$/; + return if /^\s*$/; #〒 [ゆうびん] /(n) mail/postal service/ #Tシャツ /T-shirt/ - m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$} or do { warn "Unparsable: $_\n"; next LINE; }; + m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$} + or do { warn "Unparsable: $s\n"; $errors_wait_key_count++; return; }; my $r; my $rest; ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3); @@ -150,33 +200,70 @@ while () { $r->{"kana"}=$r->{"kanji"}; delete $r->{"kanji"}; } + $rest=~s{^(|.*/)[(]pri([+-]\d+)[)]/}{$r->{"pri"}=$2;$1;}e; while ($rest) { - $rest=~s{^\s*([^/]+?)\s*/}{} or do { warn "Unparsable english: $_\n"; next LINE; }; + $rest=~s{^\s*([^/]+?)\s*/}{} + or do { warn "Unparsable english: $s\n"; $errors_wait_key_count++; return; }; my($english)=($1); push @{$r->{"english"}},$english; } - $r->{"line"}=$.; - push @R,$r; - keys(%check); - while (my($field,$hashref)=each(%check)) { - my $val; - $val=identify $r if $field eq "identify"; - $val||=$r->{$field}; - next if $val eq "悪い にくい"; # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net> - my $origvalref=\$hashref->{$val}; - warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1) - if $$origvalref; - $$origvalref=$r; + $r->{"line"}=$line; + $r->{"orig"}=$s; + return $r; +} + +sub to_chk($$) +{ + my($s,$type)=@_; + + local $_=$s; + $_=lc $_; + s/\bto\b//g if $type eq "english"; + s/\s//g; + s/[(][^)]*[)]//g; + return $_; +} + +sub edict_read($) +{ + my($pathname)=@_; + + local *EDICT; + open EDICT,"<:utf8",$pathname or die "open \"$pathname\": $!"; + my %check=( + #"kanji"=>{}, # exists... + #"kana"=>{}, # exists... + "identify"=>{}, + ); + while () { + my $r=edict_line_parse() or next; + push @R,$r; + push @{$kana_to_r{$r->{"kana"}}},$r; + keys(%check); + while (my($field,$hashref)=each(%check)) { + my $val; + $val=identify $r if $field eq "identify"; + $val||=$r->{$field}; + next if $val eq "悪い にくい"; # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net> + my $origvalref=\$hashref->{$val}; + warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1) + if $$origvalref; + $$origvalref=$r; + } } + close EDICT or die "close \"$pathname\": $!"; + %identify=map((identify($_)=>$_),@R); } -close EDICT or die "close \"$opt_edict\": $!"; -my %identify=map((identify($_)=>$_),@R); +sub log_read($) +{ + my($pathname)=@_; -local *LOG_READ; -if (!open LOG_READ,"<:utf8",$opt_log) { - warn "open \"$opt_log\": $!"; -} else { + local *LOG_READ; + if (!open LOG_READ,"<:utf8",$pathname) { + warn "open \"$pathname\": $!"; + return; + } while () { chomp; # 2002-04-25T21:17:52+0900 ok=1: identify($r) @@ -190,27 +277,21 @@ if (!open LOG_READ,"<:utf8",$opt_log) { my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute); my $r=$identify{$identify} or do { warn "Word not found from line: $_\n"; next; }; - push @{$r->{"result"}},{ - "time"=>$gmtime, - "exam_from"=>$exam_from, - "ok"=>$ok, - }; + result $r,$exam_from,$ok,"write"=>0,"time"=>$gmtime; } - close LOG_READ or die "close \"$opt_log\": $!"; + close LOG_READ or die "close \"$pathname\": $!"; } -reschedule $opt_debug>=2; # init - -sub to_chk($$) +sub wrong_read() { - my($s,$type)=@_; - - local $_=$s; - $_=lc $_; - s/\bto\b//g if $type eq "english"; - s/\s//g; - s/[(][^)]*[)]//g; - return $_; + while (<>) { + print if $opt_tee; + chomp; + my $w=edict_line_parse() or next; + my $r=$identify{identify $w}; + do { warn "'wrong word' not found for the line: $_\n"; $errors_wait_key_count++; next; } if !$r; + result $r,"kana",0,"write"=>1; + } } sub word_out($) @@ -221,6 +302,76 @@ sub word_out($) return join("",map("$_/",@$s)); } +sub exam() +{ + my @exam=keys(%exam); + for (;;) { + reschedule() if !$opt_fast; + my $besthashref=best(); + my $r=$besthashref->{"r"}; + my $exam_from=$besthashref->{"exam_from"}; + my $exam_to=$exam{$exam_from}{"to"}; + print word_out($r->{$exam_from}).": "; + my $got=; + chomp $got; + my $want=$r->{$exam_to}; + my $got_chk=to_chk($got,$exam_to); + my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want)); + my $ok; + my $substr; + for my $want_chk (@want_chk) { + next if !$want_chk; # discard patterns like: /(P)/ + $ok=1 if $got_chk eq $want_chk; + $substr=1 if $want_chk=~/\Q$got_chk\E/; + } + my $aliased; + if (!$ok && $exam_from eq "english" && $exam_to eq "kana" + && (my $arrref=$kana_to_r{$got})) { + my %chk_english_from=map((to_chk($_,"english")=>1),@{$r->{"english"}}); + my $intended; + for my $kana_alias (@$arrref) { + for my $kana_alias_english (@{$kana_alias->{"english"}}) { + my $chk_english=to_chk $kana_alias_english,"english" or next; + next if !$chk_english_from{$chk_english}; + print "intended ----> ".$r->{"orig"}."\n" if !$intended++; + print "OK if alias -> ".$kana_alias->{"orig"}."\n"; + $ok=1; + $aliased=1; + } + } + } + print "-------------> ".word_out($want)."\n" if !$aliased; + if (!$ok) { + if ($exam_to eq "english" && $got=~/\S/ && $substr) { + my $key; + do { + print STDERR "Is your answer correct? [y/n] "; + sub restore + { + Term::ReadKey::ReadMode(0); + } + local $SIG{"__DIE__"}=\&restore; + Term::ReadKey::ReadMode(4); + $key=Term::ReadKey::ReadKey(0); + print STDERR "\n"; # no echo + restore(); + die if $key eq "\x03"; # ctrl-c + $ok=1 if $key eq "y"; + $ok=0 if $key eq "n"; + } while (!defined $ok); + } + } + $ok||=0; + if (!$aliased) { + result $r,$exam_from,$ok,"write"=>1; + print STDERR "result: ".($ok ? "ok" : "WRONG"); + schedule $r,$exam_from; + print "\n"; + } + print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok; + } +} + # FIXME: balanced $exam_from: # my $exam_prob_sum=0; # $exam_prob_sum+=$_->{"prob"} for values(%exam); @@ -236,50 +387,12 @@ sub word_out($) # } # die "INTERNAL" if !$exam_from; # } -my @exam=keys(%exam); -for (;;) { - reschedule() if !$opt_fast; - my $besthashref=best(); - my $r=$besthashref->{"r"}; - my $exam_from=$besthashref->{"exam_from"}; - my $exam_to=$exam{$exam_from}{"to"}; - print word_out($r->{$exam_from}).": "; - my $got=; - chomp $got; - my $want=$r->{$exam_to}; - print "-------> ".word_out($want)."\n"; - my $got_chk=to_chk($got,$exam_to); - my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want)); - my $ok; - my $substr; - for my $want_chk (@want_chk) { - next if !$want_chk; # discard patterns like: /(P)/ - $ok=1 if $got_chk eq $want_chk; - $substr=1 if $want_chk=~/\Q$got_chk\E/; - } - if (!$ok) { - if ($exam_to eq "english" && $got=~/\S/ && $substr) { - my $key; - do { - print STDERR "Is your answer correct? [y/n] "; - sub restore - { - Term::ReadKey::ReadMode(0); - } - local $SIG{"__DIE__"}=\&restore; - Term::ReadKey::ReadMode(4); - $key=Term::ReadKey::ReadKey(0); - print STDERR "\n"; # no echo - restore(); - die if $key eq "\x03"; # ctrl-c - $ok=1 if $key eq "y"; - $ok=0 if $key eq "n"; - } while (!defined $ok); - } - } - $ok||=0; - print STDERR "result: ".($ok ? "ok" : "WRONG"); - result $r,$exam_from,$ok; - print "\n"; - print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok; -} + +edict_read $opt_edict; + +do { wrong_read(); exit; } if $opt_wrong; + +log_read $opt_log; +reschedule $opt_debug>=2; # init + +exam();