X-Git-Url: https://git.jankratochvil.net/?p=nethome.git;a=blobdiff_plain;f=src%2Fjaxam;h=1c0b62caff92fd590a17d93606087f53ebc66293;hp=596a68892c5e97d55f7ea2d70ecd511c93a205ad;hb=271bd12bf3ce2bbf0e03fa5fe2bec93840d2c247;hpb=276d4f61952f1c44755dad759581b0d67f1e903d diff --git a/src/jaxam b/src/jaxam index 596a688..1c0b62c 100755 --- a/src/jaxam +++ b/src/jaxam @@ -10,29 +10,68 @@ require Tree::Binary::Search; use Getopt::Long; require Time::Piece::ISO; require POSIX; +use utf8; +use encoding "utf8"; +binmode STDERR,":utf8"; # only STDIN and STDOUT covered by: use encoding "utf8"; our $opt_debug=1; -my $opt_dict=$ENV{"HOME"}."/priv/japan-lang/jfe_1-9.csv"; -my $opt_log=$opt_dict.".log"; +my $opt_edict="$0.edict"; +my $opt_log; +my $opt_wrong; +my $opt_errors_wait_key; +my $opt_tee; +my $opt_fast; +my $opt_cycle=10; $Getopt::Long::ignorecase=0; $Getopt::Long::bundling=1; die if !GetOptions( - "d|debug+"=>\$opt_debug, - "c|dict=s"=>\$opt_dict, - "l|log=s" =>\$opt_log, + "d|debug+" =>\$opt_debug, + "e|edict=s" =>\$opt_edict, + "l|log=s" =>\$opt_log, + "w|wrong" =>\$opt_wrong, # Filter the input lines. + "errors-wait-key"=>\$opt_errors_wait_key, # Only for -w|--wrong. + "tee!" =>\$opt_tee, # Output all the read lines. Only for -w|--wrong. + "fast!" =>\$opt_fast, + "cycle=s" =>\$opt_cycle, ); +$opt_log||=$opt_edict.".log"; + +my %exam=( + "kana" =>{"to"=>"english"}, # "prob"=>1, + "english"=>{"to"=>"kana"}, # "prob"=>1, + ); +my %ok=( + 0=>-1, + 1=>+4, + ); my $T; +my @R; +my %identify; +my %kana_to_r; +my $result_count=0; -sub schedule($) +sub siglog($) { - my($r)=@_; + my($a)=@_; + + return 0 if !$a; + my $sig=($a>0)-($a<0); + return $sig*log(abs $a); +} +sub schedule($$%) +{ + my($r,$exam_from,%args)=@_; + + return if $r->{"orig"}=~/\Q(laceno)\E/; + my $time=$args{"time"}||time(); my $sum=0; for my $result (@{$r->{"result"}}) { - my $age=time()-$result->{"time"}; + next if $result->{"exam_from"} ne $exam_from; + my $age=$time-$result->{"time"}; $age=0 if $age<0; # $age: 0.. big importance # $age: big..small importance @@ -42,18 +81,48 @@ sub schedule($) # $sum: small..soon # 1/log(2+$age): big.. big importance # 1/log(2+$age): 0..small importance - $sum+=1/log(2+$age)*($result->{"ok"}?+1:-1); + $sum+=1/log(2+$age)*$ok{$result->{"ok"}}; + if (!$result->{"ok"}) { + my $count_diff=$result_count-$result->{"count"}; + $count_diff-=$opt_cycle; + $sum-=siglog($count_diff); + } } + my $scale=1; + if ($r->{"pri"}) { + my $abspri=$r->{"pri"}; + $abspri=1/(-$abspri) if $abspri<0; + $abspri=2 if $abspri<2; + $scale=log($abspri)*($r->{"pri"}<0 ? -1 : +1); + } my $arr=[]; if ($T->exists($sum)) { $arr=$T->select($sum)->{"arr"}; $T->delete($sum); } - splice @$arr,int(rand(@$arr+1)),0,$r; + splice @$arr,int(rand(@$arr+1)),0,{"r"=>$r,"exam_from"=>$exam_from}; $T->insert($sum=>{"key"=>$sum,"arr"=>$arr}); print STDERR "; schedule=$sum" if $opt_debug; } +sub reschedule(;$) +{ + my($opt_debug_force)=@_; + + $T=Tree::Binary::Search->new(); + $T->useNumericComparison(); + my $time=time(); + for my $r (@R) { + local $opt_debug=0 if $opt_debug<2; + local $opt_debug=$opt_debug_force if defined $opt_debug_force; + print STDERR $r->{"kana"}."\t".$r->{"english"} if $opt_debug; + for my $exam_from (keys(%exam)) { + schedule $r,$exam_from,"time"=>$time; + } + print STDERR "\n" if $opt_debug; + } +} + sub best() { return if $T->isEmpty(); @@ -62,77 +131,144 @@ sub best() $T->delete($min); my $r=shift @$arr; $T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr; - print STDERR "min=$min\n" if $opt_debug; + if ($opt_debug) { + print STDERR "min=$min"; + print STDERR " of ".(1+@$arr) if @$arr; + print STDERR "\n"; + } return $r; } -sub identify($) +sub identify($%) { - my($r)=@_; + my($r,%args)=@_; - return join " ",map(($r->{$_}||"-"),qw( - written - phonetic - )); + return join " ", + map(($r->{$_}||()),qw( + kanji + kana + )), + (!$args{"at"} ? () : '@'.$r->{"line"}); } -sub result($$) +sub result($$$%) { - my($r,$ok)=@_; - - local *LOG_APPEND; - open LOG_APPEND,">>".$opt_log or die "append \"$opt_log\": $!"; - print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())." ok=$ok: ".identify($r)."\n"; - close LOG_APPEND or die "close \"$opt_log\": $!"; + my($r,$exam_from,$ok,%args)=@_; + if ($args{"write"}) { + local *LOG_APPEND; + open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!"; + print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime()) + ." exam_from=$exam_from ok=$ok: ".identify($r)."\n"; + close LOG_APPEND or die "close \"$opt_log\": $!"; + } + $args{"time"}||=time(); push @{$r->{"result"}},{ - "time"=>time(), + "time"=>$args{"time"}, + "exam_from"=>$exam_from, "ok"=>$ok, + "count"=>$result_count++, }; - schedule $r; } -my @R; -my %keyword; -$T=Tree::Binary::Search->new(); -$T->useNumericComparison(); -local *DICT; -open DICT,$opt_dict or die "open \"$opt_dict\": $!"; -while () { - chomp; +my $errors_wait_key_count; +END { + if ($errors_wait_key_count && $opt_errors_wait_key) { + print STDERR "Errors occured. Press any key to continue..."; + ; + } +} + +sub edict_line_parse(;$) +{ + my($s,$line)=@_; + + $s=$_ if !defined $s; + $line=$. if !defined $line; + chomp $s; + local $_=$s; + s/;.*//; + return if /^\s*$/; + #〒 [ゆうびん] /(n) mail/postal service/ + #Tシャツ /T-shirt/ + m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$} + or do { warn "Unparsable: $s\n"; $errors_wait_key_count++; return; }; my $r; my $rest; - ( - $r->{"written"}, - $r->{"phonetic"}, - $r->{"meaning"}, - $r->{"category"}, - $r->{"learned"}, - $rest, - )=split /\t/; - next if $r->{"written"} eq ""; - die "Excessive argument: $rest" if defined $rest; - while ($r->{"meaning"}=~s/\s*<([-\w]+)>\s*//) { - my $keyword=$1; - $r->{"meaning_keyword"}{$keyword}=1; - $keyword{$keyword}=1; + ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3); + if (!$r->{"kana"}) { + $r->{"kana"}=$r->{"kanji"}; + delete $r->{"kanji"}; + } + $rest=~s{^(|.*/)[(]pri([+-]\d+)[)]/}{$r->{"pri"}=$2;$1;}e; + while ($rest) { + $rest=~s{^\s*([^/]+?)\s*/}{} + or do { warn "Unparsable english: $s\n"; $errors_wait_key_count++; return; }; + my($english)=($1); + push @{$r->{"english"}},$english; + } + $r->{"line"}=$line; + $r->{"orig"}=$s; + return $r; +} + +sub to_chk($$) +{ + my($s,$type)=@_; + + local $_=$s; + $_=lc $_; + s/\bto\b//g if $type eq "english"; + s/\s//g; + s/[(][^)]*[)]//g; + return $_; +} + +sub edict_read($) +{ + my($pathname)=@_; + + local *EDICT; + open EDICT,"<:utf8",$pathname or die "open \"$pathname\": $!"; + my %check=( + #"kanji"=>{}, # exists... + #"kana"=>{}, # exists... + "identify"=>{}, + ); + while () { + my $r=edict_line_parse() or next; + push @R,$r; + push @{$kana_to_r{$r->{"kana"}}},$r; + keys(%check); + while (my($field,$hashref)=each(%check)) { + my $val; + $val=identify $r if $field eq "identify"; + $val||=$r->{$field}; + next if $val eq "悪い にくい"; # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net> + my $origvalref=\$hashref->{$val}; + warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1) + if $$origvalref; + $$origvalref=$r; } - $r->{"meaning"}!~/[<>]/ or warn "Meaning constains invalid characters: ".$r->{"meaning"}; - push @R,$r; + } + close EDICT or die "close \"$pathname\": $!"; + %identify=map((identify($_)=>$_),@R); } -close DICT or die "close \"$opt_dict\": $!"; -my %identify=map((identify($_)=>$_),@R); +sub log_read($) +{ + my($pathname)=@_; -local *LOG_READ; -if (!open LOG_READ,$opt_log) { - warn "open \"$opt_log\": $!"; -} else { + local *LOG_READ; + if (!open LOG_READ,"<:utf8",$pathname) { + warn "open \"$pathname\": $!"; + return; + } while () { chomp; # 2002-04-25T21:17:52+0900 ok=1: identify($r) - my($year, $month, $day, $hour, $minute,$second,$zonepm,$zonehour,$zoneminute,$ok,$identify)= - /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) ok=(\d+): (.+)$/ + my($year, $month, $day, $hour, $minute,$second,$zonepm,$zonehour,$zoneminute,$exam_from,$ok,$identify)= + /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) exam_from=(\w+) ok=(\d+): (.+)$/ or do { warn "Unrecognized line: $_\n"; next; }; $year-=1900; $month--; @@ -141,76 +277,122 @@ if (!open LOG_READ,$opt_log) { my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute); my $r=$identify{$identify} or do { warn "Word not found from line: $_\n"; next; }; - push @{$r->{"result"}},{ - "time"=>$gmtime, - "ok"=>$ok, - }; + result $r,$exam_from,$ok,"write"=>0,"time"=>$gmtime; } - close LOG_READ or die "close \"$opt_log\": $!"; + close LOG_READ or die "close \"$pathname\": $!"; } -for my $r (@R) { - local $opt_debug=0 if $opt_debug<2; - print STDERR $r->{"phonetic"}."\t".$r->{"meaning"} if $opt_debug; - schedule $r; - print STDERR "\n" if $opt_debug; +sub wrong_read() +{ + while (<>) { + print if $opt_tee; + chomp; + my $w=edict_line_parse() or next; + my $r=$identify{identify $w}; + do { warn "'wrong word' not found for the line: $_\n"; $errors_wait_key_count++; next; } if !$r; + result $r,"kana",0,"write"=>1; + } } -my %exam=( - "phonetic"=>"meaning", - "meaning"=>"phonetic", - ); -my @exam=keys(%exam); -my $r_last; -my $exam_from_last; -for (;;) { - my $r=best(); - # Do not change $exam_from if $r remained the same: - my $exam_from=$exam_from_last; - if (!$r_last || $r ne $r_last) { - $exam_from=$exam[int rand @exam]; +sub word_out($) +{ + my($s)=@_; + + return $s if !ref $s; + return join("",map("$_/",@$s)); +} + +sub exam() +{ + my @exam=keys(%exam); + for (;;) { + reschedule() if !$opt_fast; + my $besthashref=best(); + my $r=$besthashref->{"r"}; + my $exam_from=$besthashref->{"exam_from"}; + my $exam_to=$exam{$exam_from}{"to"}; + print word_out($r->{$exam_from}).": "; + my $got=; + chomp $got; + my $want=$r->{$exam_to}; + my $got_chk=to_chk($got,$exam_to); + my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want)); + my $ok; + my $substr; + for my $want_chk (@want_chk) { + next if !$want_chk; # discard patterns like: /(P)/ + $ok=1 if $got_chk eq $want_chk; + $substr=1 if $want_chk=~/\Q$got_chk\E/; } - my $exam_to =$exam{$exam_from}; - print $r->{$exam_from}.": "; - my $got=; - chomp $got; - my $want=$r->{$exam_to}; - print "-------> $want\n"; - (my $got_chk =lc $got )=~s/\s//g; - (my $want_chk=lc $want)=~s/\s//g; - $got_chk=~s/\bto\b//g; - $want_chk=~s/\bto\b//g; - $want_chk=~s/[(][^)]*[)]//g; - warn "Parenthesis ('(',')') not supported in the user input: $got\n" if $got=~/[()]/; - warn "Comma (',') not well supported in the user input: $got\n" if $got=~/,/; - my $ok; - $ok=1 if $got_chk eq $want_chk; # incl. commas - for (split /,/,$want_chk) { - $ok=1 if $got_chk eq $_; - } - if (!$ok) { - if ($exam_to eq "meaning" && $got=~/\S/ && $want=~/\Q$got\E/) { - my $key; - do { - print STDERR "Is your answer correct? [y/n] "; - sub restore - { - Term::ReadKey::ReadMode(0); + my $aliased; + if (!$ok && $exam_from eq "english" && $exam_to eq "kana" + && (my $arrref=$kana_to_r{$got})) { + my %chk_english_from=map((to_chk($_,"english")=>1),@{$r->{"english"}}); + my $intended; + for my $kana_alias (@$arrref) { + for my $kana_alias_english (@{$kana_alias->{"english"}}) { + my $chk_english=to_chk $kana_alias_english,"english" or next; + next if !$chk_english_from{$chk_english}; + print "intended ----> ".$r->{"orig"}."\n" if !$intended++; + print "OK if alias -> ".$kana_alias->{"orig"}."\n"; + $ok=1; + $aliased=1; } - local $SIG{"__DIE__"}=\&restore; - Term::ReadKey::ReadMode(4); - $key=Term::ReadKey::ReadKey(0); - print STDERR "\n"; # no echo - restore(); - $ok=1 if $key eq "y"; - $ok=0 if $key eq "n"; - } while (!defined $ok); + } } + print "-------------> ".word_out($want)."\n" if !$aliased; + if (!$ok) { + if ($exam_to eq "english" && $got=~/\S/ && $substr) { + my $key; + do { + print STDERR "Is your answer correct? [y/n] "; + sub restore + { + Term::ReadKey::ReadMode(0); + } + local $SIG{"__DIE__"}=\&restore; + Term::ReadKey::ReadMode(4); + $key=Term::ReadKey::ReadKey(0); + print STDERR "\n"; # no echo + restore(); + die if $key eq "\x03"; # ctrl-c + $ok=1 if $key eq "y"; + $ok=0 if $key eq "n"; + } while (!defined $ok); + } + } + $ok||=0; + if (!$aliased) { + result $r,$exam_from,$ok,"write"=>1; + print STDERR "result: ".($ok ? "ok" : "WRONG"); + schedule $r,$exam_from; + print "\n"; + } + print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok; } - $ok||=0; - print STDERR "result: ".($ok ? "ok" : "WRONG"); - result($r,$ok); - print "\n"; - $r_last=$r; - $exam_from_last=$exam_from; } + +# FIXME: balanced $exam_from: +# my $exam_prob_sum=0; +# $exam_prob_sum+=$_->{"prob"} for values(%exam); +# { +# my $rand=int rand $exam_prob_sum; +# my $sum=0; +# keys(%exam); +# while (my($from,$hashref)=each(%exam)) { +# $sum+=$hashref->{"prob"}; +# next if $rand>=$sum; +# $exam_from=$from; +# last; +# } +# die "INTERNAL" if !$exam_from; +# } + +edict_read $opt_edict; + +do { wrong_read(); exit; } if $opt_wrong; + +log_read $opt_log; +reschedule $opt_debug>=2; # init + +exam();