#! /usr/bin/perl # # $Id$ use strict; use warnings; require Term::ReadKey; require Tree::Binary::Search; use Getopt::Long; require Time::Piece::ISO; require POSIX; use utf8; use encoding "utf8"; binmode STDERR,":utf8"; # only STDIN and STDOUT covered by: use encoding "utf8"; our $opt_debug=1; my $opt_edict="$0.edict"; my $opt_log; my $opt_wrong; my $opt_errors_wait_key; my $opt_tee; my $opt_fast; my $opt_cycle=10; $Getopt::Long::ignorecase=0; $Getopt::Long::bundling=1; die if !GetOptions( "d|debug+" =>\$opt_debug, "e|edict=s" =>\$opt_edict, "l|log=s" =>\$opt_log, "w|wrong" =>\$opt_wrong, # Filter the input lines. "errors-wait-key"=>\$opt_errors_wait_key, # Only for -w|--wrong. "tee!" =>\$opt_tee, # Output all the read lines. Only for -w|--wrong. "fast!" =>\$opt_fast, "cycle=s" =>\$opt_cycle, ); $opt_log||=$opt_edict.".log"; my %exam=( "kana" =>{"to"=>"english"}, # "prob"=>1, "english"=>{"to"=>"kana"}, # "prob"=>1, ); my %ok=( 0=>-1, 1=>+4, ); my $T; my @R; my %identify; my %kana_to_r; my $result_count=0; sub siglog($) { my($a)=@_; return 0 if !$a; my $sig=($a>0)-($a<0); return $sig*log(abs $a); } sub schedule($$%) { my($r,$exam_from,%args)=@_; return if $r->{"orig"}=~/\Q(laceno)\E/; my $time=$args{"time"}||time(); my $sum=0; for my $result (@{$r->{"result"}}) { next if $result->{"exam_from"} ne $exam_from; my $age=$time-$result->{"time"}; $age=0 if $age<0; # $age: 0.. big importance # $age: big..small importance # $ok: 0..mistake # $ok: 1..correct # $sum: big..far # $sum: small..soon # 1/log(2+$age): big.. big importance # 1/log(2+$age): 0..small importance $sum+=1/log(2+$age)*$ok{$result->{"ok"}}; if (!$result->{"ok"}) { my $count_diff=$result_count-$result->{"count"}; $count_diff-=$opt_cycle; $sum-=siglog($count_diff); } } my $scale=1; if ($r->{"pri"}) { my $abspri=$r->{"pri"}; $abspri=1/(-$abspri) if $abspri<0; $abspri=2 if $abspri<2; $scale=log($abspri)*($r->{"pri"}<0 ? -1 : +1); } my $arr=[]; if ($T->exists($sum)) { $arr=$T->select($sum)->{"arr"}; $T->delete($sum); } splice @$arr,int(rand(@$arr+1)),0,{"r"=>$r,"exam_from"=>$exam_from}; $T->insert($sum=>{"key"=>$sum,"arr"=>$arr}); print STDERR "; schedule=$sum" if $opt_debug; } sub reschedule(;$) { my($opt_debug_force)=@_; $T=Tree::Binary::Search->new(); $T->useNumericComparison(); my $time=time(); for my $r (@R) { local $opt_debug=0 if $opt_debug<2; local $opt_debug=$opt_debug_force if defined $opt_debug_force; print STDERR $r->{"kana"}."\t".$r->{"english"} if $opt_debug; for my $exam_from (keys(%exam)) { schedule $r,$exam_from,"time"=>$time; } print STDERR "\n" if $opt_debug; } } sub best() { return if $T->isEmpty(); my $min=$T->min()->{"key"}; my $arr=$T->select($min)->{"arr"}; $T->delete($min); my $r=shift @$arr; $T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr; if ($opt_debug) { print STDERR "min=$min"; print STDERR " of ".(1+@$arr) if @$arr; print STDERR "\n"; } return $r; } sub identify($%) { my($r,%args)=@_; return join " ", map(($r->{$_}||()),qw( kanji kana )), (!$args{"at"} ? () : '@'.$r->{"line"}); } sub result($$$%) { my($r,$exam_from,$ok,%args)=@_; if ($args{"write"}) { local *LOG_APPEND; open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!"; print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime()) ." exam_from=$exam_from ok=$ok: ".identify($r)."\n"; close LOG_APPEND or die "close \"$opt_log\": $!"; } $args{"time"}||=time(); push @{$r->{"result"}},{ "time"=>$args{"time"}, "exam_from"=>$exam_from, "ok"=>$ok, "count"=>$result_count++, }; } my $errors_wait_key_count; END { if ($errors_wait_key_count && $opt_errors_wait_key) { print STDERR "Errors occured. Press any key to continue..."; ; } } sub edict_line_parse(;$) { my($s,$line)=@_; $s=$_ if !defined $s; $line=$. if !defined $line; chomp $s; local $_=$s; s/;.*//; return if /^\s*$/; #〒 [ゆうびん] /(n) mail/postal service/ #Tシャツ /T-shirt/ m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$} or do { warn "Unparsable: $s\n"; $errors_wait_key_count++; return; }; my $r; my $rest; ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3); if (!$r->{"kana"}) { $r->{"kana"}=$r->{"kanji"}; delete $r->{"kanji"}; } $rest=~s{^(|.*/)[(]pri([+-]\d+)[)]/}{$r->{"pri"}=$2;$1;}e; while ($rest) { $rest=~s{^\s*([^/]+?)\s*/}{} or do { warn "Unparsable english: $s\n"; $errors_wait_key_count++; return; }; my($english)=($1); push @{$r->{"english"}},$english; } $r->{"line"}=$line; $r->{"orig"}=$s; return $r; } sub to_chk($$) { my($s,$type)=@_; local $_=$s; $_=lc $_; s/\bto\b//g if $type eq "english"; s/\s//g; s/[(][^)]*[)]//g; return $_; } sub edict_read($) { my($pathname)=@_; local *EDICT; open EDICT,"<:utf8",$pathname or die "open \"$pathname\": $!"; my %check=( #"kanji"=>{}, # exists... #"kana"=>{}, # exists... "identify"=>{}, ); while () { my $r=edict_line_parse() or next; push @R,$r; push @{$kana_to_r{$r->{"kana"}}},$r; keys(%check); while (my($field,$hashref)=each(%check)) { my $val; $val=identify $r if $field eq "identify"; $val||=$r->{$field}; next if $val eq "悪い にくい"; # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net> my $origvalref=\$hashref->{$val}; warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1) if $$origvalref; $$origvalref=$r; } } close EDICT or die "close \"$pathname\": $!"; %identify=map((identify($_)=>$_),@R); } sub log_read($) { my($pathname)=@_; local *LOG_READ; if (!open LOG_READ,"<:utf8",$pathname) { warn "open \"$pathname\": $!"; return; } while () { chomp; # 2002-04-25T21:17:52+0900 ok=1: identify($r) my($year, $month, $day, $hour, $minute,$second,$zonepm,$zonehour,$zoneminute,$exam_from,$ok,$identify)= /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) exam_from=(\w+) ok=(\d+): (.+)$/ or do { warn "Unrecognized line: $_\n"; next; }; $year-=1900; $month--; my $localtime=POSIX::mktime($second,$minute,$hour,$day,$month,$year); $localtime or do { warn "Unparsable time at line: $_\n"; next; }; my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute); my $r=$identify{$identify} or do { warn "Word not found from line: $_\n"; next; }; result $r,$exam_from,$ok,"write"=>0,"time"=>$gmtime; } close LOG_READ or die "close \"$pathname\": $!"; } sub wrong_read() { while (<>) { print if $opt_tee; chomp; my $w=edict_line_parse() or next; my $r=$identify{identify $w}; do { warn "'wrong word' not found for the line: $_\n"; $errors_wait_key_count++; next; } if !$r; result $r,"kana",0,"write"=>1; } } sub word_out($) { my($s)=@_; return $s if !ref $s; return join("",map("$_/",@$s)); } sub exam() { my @exam=keys(%exam); for (;;) { reschedule() if !$opt_fast; my $besthashref=best(); my $r=$besthashref->{"r"}; my $exam_from=$besthashref->{"exam_from"}; my $exam_to=$exam{$exam_from}{"to"}; print word_out($r->{$exam_from}).": "; my $got=; chomp $got; my $want=$r->{$exam_to}; my $got_chk=to_chk($got,$exam_to); my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want)); my $ok; my $substr; for my $want_chk (@want_chk) { next if !$want_chk; # discard patterns like: /(P)/ $ok=1 if $got_chk eq $want_chk; $substr=1 if $want_chk=~/\Q$got_chk\E/; } my $aliased; if (!$ok && $exam_from eq "english" && $exam_to eq "kana" && (my $arrref=$kana_to_r{$got})) { my %chk_english_from=map((to_chk($_,"english")=>1),@{$r->{"english"}}); my $intended; for my $kana_alias (@$arrref) { for my $kana_alias_english (@{$kana_alias->{"english"}}) { my $chk_english=to_chk $kana_alias_english,"english" or next; next if !$chk_english_from{$chk_english}; print "intended ----> ".$r->{"orig"}."\n" if !$intended++; print "OK if alias -> ".$kana_alias->{"orig"}."\n"; $ok=1; $aliased=1; } } } print "-------------> ".word_out($want)."\n" if !$aliased; if (!$ok) { if ($exam_to eq "english" && $got=~/\S/ && $substr) { my $key; do { print STDERR "Is your answer correct? [y/n] "; sub restore { Term::ReadKey::ReadMode(0); } local $SIG{"__DIE__"}=\&restore; Term::ReadKey::ReadMode(4); $key=Term::ReadKey::ReadKey(0); print STDERR "\n"; # no echo restore(); die if $key eq "\x03"; # ctrl-c $ok=1 if $key eq "y"; $ok=0 if $key eq "n"; } while (!defined $ok); } } $ok||=0; if (!$aliased) { result $r,$exam_from,$ok,"write"=>1; print STDERR "result: ".($ok ? "ok" : "WRONG"); schedule $r,$exam_from; print "\n"; } print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok; } } # FIXME: balanced $exam_from: # my $exam_prob_sum=0; # $exam_prob_sum+=$_->{"prob"} for values(%exam); # { # my $rand=int rand $exam_prob_sum; # my $sum=0; # keys(%exam); # while (my($from,$hashref)=each(%exam)) { # $sum+=$hashref->{"prob"}; # next if $rand>=$sum; # $exam_from=$from; # last; # } # die "INTERNAL" if !$exam_from; # } edict_read $opt_edict; do { wrong_read(); exit; } if $opt_wrong; log_read $opt_log; reschedule $opt_debug>=2; # init exam();