From: short <> Date: Sun, 22 May 2005 09:36:41 +0000 (+0000) Subject: Resolve amiguous english->kana translations. X-Git-Url: https://git.jankratochvil.net/?p=nethome.git;a=commitdiff_plain;h=99f25354facbf5b652f4a88378d005107797d5c8 Resolve amiguous english->kana translations. Other fixes. --- diff --git a/src/jaxam b/src/jaxam index cce6e85..261428c 100755 --- a/src/jaxam +++ b/src/jaxam @@ -39,13 +39,18 @@ die if !GetOptions( $opt_log||=$opt_edict.".log"; my %exam=( - "kana" =>{"prob"=>1,"to"=>"english"}, - "english"=>{"prob"=>1,"to"=>"kana"}, + "kana" =>{"to"=>"english"}, # "prob"=>1, + "english"=>{"to"=>"kana"}, # "prob"=>1, + ); +my %ok=( + 0=>-1, + 1=>+4, ); my $T; my @R; my %identify; +my %kana_to_r; sub schedule($$%) { @@ -65,7 +70,7 @@ sub schedule($$%) # $sum: small..soon # 1/log(2+$age): big.. big importance # 1/log(2+$age): 0..small importance - $sum+=1/log(2+$age)*($result->{"ok"}?+1:-1); + $sum+=1/log(2+$age)*$ok{$result->{"ok"}}; } my $arr=[]; if ($T->exists($sum)) { @@ -103,7 +108,11 @@ sub best() $T->delete($min); my $r=shift @$arr; $T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr; - print STDERR "min=$min\n" if $opt_debug; + if ($opt_debug) { + print STDERR "min=$min"; + print STDERR " of ".(1+@$arr) if @$arr; + print STDERR "\n"; + } return $r; } @@ -119,22 +128,23 @@ sub identify($%) (!$args{"at"} ? () : '@'.$r->{"line"}); } -sub result($$$) +sub result($$$%) { - my($r,$exam_from,$ok)=@_; - - local *LOG_APPEND; - open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!"; - print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime()) - ." exam_from=$exam_from ok=$ok: ".identify($r)."\n"; - close LOG_APPEND or die "close \"$opt_log\": $!"; + my($r,$exam_from,$ok,%args)=@_; + + if ($args{"write"}) { + local *LOG_APPEND; + open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!"; + print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime()) + ." exam_from=$exam_from ok=$ok: ".identify($r)."\n"; + close LOG_APPEND or die "close \"$opt_log\": $!"; + } push @{$r->{"result"}},{ "time"=>time(), "exam_from"=>$exam_from, "ok"=>$ok, }; - schedule $r,$exam_from if $T; } my $errors_wait_key_count; @@ -151,14 +161,14 @@ sub edict_line_parse(;$) $s=$_ if !defined $s; $line=$. if !defined $s; + chomp $s; local $_=$s; - chomp; s/;.*//; return if /^\s*$/; #〒 [ゆうびん] /(n) mail/postal service/ #Tシャツ /T-shirt/ m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$} - or do { warn "Unparsable: $_\n"; $errors_wait_key_count++; return; }; + or do { warn "Unparsable: $s\n"; $errors_wait_key_count++; return; }; my $r; my $rest; ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3); @@ -168,14 +178,27 @@ sub edict_line_parse(;$) } while ($rest) { $rest=~s{^\s*([^/]+?)\s*/}{} - or do { warn "Unparsable english: $_\n"; $errors_wait_key_count++; return; }; + or do { warn "Unparsable english: $s\n"; $errors_wait_key_count++; return; }; my($english)=($1); push @{$r->{"english"}},$english; } - $r->{"line"}=$.; + $r->{"line"}=$line; + $r->{"orig"}=$s; return $r; } +sub to_chk($$) +{ + my($s,$type)=@_; + + local $_=$s; + $_=lc $_; + s/\bto\b//g if $type eq "english"; + s/\s//g; + s/[(][^)]*[)]//g; + return $_; +} + sub edict_read($) { my($pathname)=@_; @@ -187,10 +210,11 @@ sub edict_read($) #"kana"=>{}, # exists... "identify"=>{}, ); - LINE: while () { my $r=edict_line_parse() or next; + next if /\Q(laceno)\E/; push @R,$r; + push @{$kana_to_r{$r->{"kana"}}},$r; keys(%check); while (my($field,$hashref)=each(%check)) { my $val; @@ -229,11 +253,7 @@ sub log_read($) my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute); my $r=$identify{$identify} or do { warn "Word not found from line: $_\n"; next; }; - push @{$r->{"result"}},{ - "time"=>$gmtime, - "exam_from"=>$exam_from, - "ok"=>$ok, - }; + result $r,$exam_from,$ok,"write"=>0; } close LOG_READ or die "close \"$pathname\": $!"; } @@ -246,22 +266,10 @@ sub wrong_read() my $w=edict_line_parse() or next; my $r=$identify{identify $w}; do { warn "'wrong word' not found for the line: $_\n"; $errors_wait_key_count++; next; } if !$r; - result $r,"kana",0; + result $r,"kana",0,"write"=>1; } } -sub to_chk($$) -{ - my($s,$type)=@_; - - local $_=$s; - $_=lc $_; - s/\bto\b//g if $type eq "english"; - s/\s//g; - s/[(][^)]*[)]//g; - return $_; -} - sub word_out($) { my($s)=@_; @@ -283,7 +291,6 @@ sub exam() my $got=; chomp $got; my $want=$r->{$exam_to}; - print "-------> ".word_out($want)."\n"; my $got_chk=to_chk($got,$exam_to); my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want)); my $ok; @@ -293,6 +300,23 @@ sub exam() $ok=1 if $got_chk eq $want_chk; $substr=1 if $want_chk=~/\Q$got_chk\E/; } + my $aliased; + if (!$ok && $exam_from eq "english" && $exam_to eq "kana" + && (my $arrref=$kana_to_r{$got})) { + my %chk_english_from=map((to_chk($_,"english")=>1),@{$r->{"english"}}); + my $intended; + for my $kana_alias (@$arrref) { + for my $kana_alias_english (@{$kana_alias->{"english"}}) { + my $chk_english=to_chk $kana_alias_english,"english" or next; + next if !$chk_english_from{$chk_english}; + print "intended ----> ".$r->{"orig"}."\n" if !$intended++; + print "OK if alias -> ".$kana_alias->{"orig"}."\n"; + $ok=1; + $aliased=1; + } + } + } + print "-------------> ".word_out($want)."\n" if !$aliased; if (!$ok) { if ($exam_to eq "english" && $got=~/\S/ && $substr) { my $key; @@ -314,9 +338,12 @@ sub exam() } } $ok||=0; - print STDERR "result: ".($ok ? "ok" : "WRONG"); - result $r,$exam_from,$ok; - print "\n"; + if (!$aliased) { + result $r,$exam_from,$ok,"write"=>1; + print STDERR "result: ".($ok ? "ok" : "WRONG"); + schedule $r,$exam_from; + print "\n"; + } print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok; } }