From 79c2dad59b18722ba9f368763d520d0303d37fc6 Mon Sep 17 00:00:00 2001 From: short <> Date: Sun, 22 May 2005 03:03:24 +0000 Subject: [PATCH] Moved from WaKan to edict. Probability choosing now separates each $exam_from of a word. Many bugfixes. --- src/jaxam | 231 ++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 150 insertions(+), 81 deletions(-) diff --git a/src/jaxam b/src/jaxam index 596a688..362cecd 100755 --- a/src/jaxam +++ b/src/jaxam @@ -10,29 +10,43 @@ require Tree::Binary::Search; use Getopt::Long; require Time::Piece::ISO; require POSIX; +use utf8; +use encoding "utf8"; +binmode STDERR,":utf8"; # only STDIN and STDOUT covered by: use encoding "utf8"; our $opt_debug=1; -my $opt_dict=$ENV{"HOME"}."/priv/japan-lang/jfe_1-9.csv"; -my $opt_log=$opt_dict.".log"; +my $opt_edict="jaxam.edict"; +my $opt_log; +my $opt_fast; $Getopt::Long::ignorecase=0; $Getopt::Long::bundling=1; die if !GetOptions( - "d|debug+"=>\$opt_debug, - "c|dict=s"=>\$opt_dict, - "l|log=s" =>\$opt_log, + "d|debug+" =>\$opt_debug, + "e|edict=s"=>\$opt_edict, + "l|log=s" =>\$opt_log, + "fast!" =>\$opt_fast, ); +$opt_log||=$opt_edict.".log"; + +my %exam=( + "kana" =>{"prob"=>1,"to"=>"english"}, + "english"=>{"prob"=>1,"to"=>"kana"}, + ); my $T; +my @R; -sub schedule($) +sub schedule($$%) { - my($r)=@_; + my($r,$exam_from,%args)=@_; + my $time=$args{"time"}||time(); my $sum=0; for my $result (@{$r->{"result"}}) { - my $age=time()-$result->{"time"}; + next if $result->{"exam_from"} ne $exam_from; + my $age=$time-$result->{"time"}; $age=0 if $age<0; # $age: 0.. big importance # $age: big..small importance @@ -49,11 +63,29 @@ sub schedule($) $arr=$T->select($sum)->{"arr"}; $T->delete($sum); } - splice @$arr,int(rand(@$arr+1)),0,$r; + splice @$arr,int(rand(@$arr+1)),0,{"r"=>$r,"exam_from"=>$exam_from}; $T->insert($sum=>{"key"=>$sum,"arr"=>$arr}); print STDERR "; schedule=$sum" if $opt_debug; } +sub reschedule(;$) +{ + my($opt_debug_force)=@_; + + $T=Tree::Binary::Search->new(); + $T->useNumericComparison(); + my $time=time(); + for my $r (@R) { + local $opt_debug=0 if $opt_debug<2; + local $opt_debug=$opt_debug_force if defined $opt_debug_force; + print STDERR $r->{"kana"}."\t".$r->{"english"} if $opt_debug; + for my $exam_from (keys(%exam)) { + schedule $r,$exam_from,"time"=>$time; + } + print STDERR "\n" if $opt_debug; + } +} + sub best() { return if $T->isEmpty(); @@ -66,73 +98,90 @@ sub best() return $r; } -sub identify($) +sub identify($%) { - my($r)=@_; + my($r,%args)=@_; - return join " ",map(($r->{$_}||"-"),qw( - written - phonetic - )); + return join " ", + map(($r->{$_}||()),qw( + kanji + kana + )), + (!$args{"at"} ? () : '@'.$r->{"line"}); } -sub result($$) +sub result($$$) { - my($r,$ok)=@_; + my($r,$exam_from,$ok)=@_; local *LOG_APPEND; - open LOG_APPEND,">>".$opt_log or die "append \"$opt_log\": $!"; - print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())." ok=$ok: ".identify($r)."\n"; + open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!"; + print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime()) + ." exam_from=$exam_from ok=$ok: ".identify($r)."\n"; close LOG_APPEND or die "close \"$opt_log\": $!"; push @{$r->{"result"}},{ "time"=>time(), + "exam_from"=>$exam_from, "ok"=>$ok, }; - schedule $r; + schedule $r,$exam_from; } -my @R; -my %keyword; -$T=Tree::Binary::Search->new(); -$T->useNumericComparison(); -local *DICT; -open DICT,$opt_dict or die "open \"$opt_dict\": $!"; -while () { +local *EDICT; +open EDICT,"<:utf8",$opt_edict or die "open \"$opt_edict\": $!"; +my %check=( + #"kanji"=>{}, # exists... + #"kana"=>{}, # exists... + "identify"=>{}, + ); +LINE: +while () { chomp; + next if /^\t [^\t]*$/; + next if /^\s*$/; + #〒 [ゆうびん] /(n) mail/postal service/ + #Tシャツ /T-shirt/ + m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$} or do { warn "Unparsable: $_\n"; next LINE; }; my $r; my $rest; - ( - $r->{"written"}, - $r->{"phonetic"}, - $r->{"meaning"}, - $r->{"category"}, - $r->{"learned"}, - $rest, - )=split /\t/; - next if $r->{"written"} eq ""; - die "Excessive argument: $rest" if defined $rest; - while ($r->{"meaning"}=~s/\s*<([-\w]+)>\s*//) { - my $keyword=$1; - $r->{"meaning_keyword"}{$keyword}=1; - $keyword{$keyword}=1; - } - $r->{"meaning"}!~/[<>]/ or warn "Meaning constains invalid characters: ".$r->{"meaning"}; + ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3); + if (!$r->{"kana"}) { + $r->{"kana"}=$r->{"kanji"}; + delete $r->{"kanji"}; + } + while ($rest) { + $rest=~s{^\s*([^/]+?)\s*/}{} or do { warn "Unparsable english: $_\n"; next LINE; }; + my($english)=($1); + push @{$r->{"english"}},$english; + } + $r->{"line"}=$.; push @R,$r; + keys(%check); + while (my($field,$hashref)=each(%check)) { + my $val; + $val=identify $r if $field eq "identify"; + $val||=$r->{$field}; + next if $val eq "悪い にくい"; # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net> + my $origvalref=\$hashref->{$val}; + warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1) + if $$origvalref; + $$origvalref=$r; + } } -close DICT or die "close \"$opt_dict\": $!"; +close EDICT or die "close \"$opt_edict\": $!"; my %identify=map((identify($_)=>$_),@R); local *LOG_READ; -if (!open LOG_READ,$opt_log) { +if (!open LOG_READ,"<:utf8",$opt_log) { warn "open \"$opt_log\": $!"; } else { while () { chomp; # 2002-04-25T21:17:52+0900 ok=1: identify($r) - my($year, $month, $day, $hour, $minute,$second,$zonepm,$zonehour,$zoneminute,$ok,$identify)= - /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) ok=(\d+): (.+)$/ + my($year, $month, $day, $hour, $minute,$second,$zonepm,$zonehour,$zoneminute,$exam_from,$ok,$identify)= + /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) exam_from=(\w+) ok=(\d+): (.+)$/ or do { warn "Unrecognized line: $_\n"; next; }; $year-=1900; $month--; @@ -143,53 +192,73 @@ if (!open LOG_READ,$opt_log) { or do { warn "Word not found from line: $_\n"; next; }; push @{$r->{"result"}},{ "time"=>$gmtime, + "exam_from"=>$exam_from, "ok"=>$ok, }; } close LOG_READ or die "close \"$opt_log\": $!"; } -for my $r (@R) { - local $opt_debug=0 if $opt_debug<2; - print STDERR $r->{"phonetic"}."\t".$r->{"meaning"} if $opt_debug; - schedule $r; - print STDERR "\n" if $opt_debug; +reschedule $opt_debug>=2; # init + +sub to_chk($$) +{ + my($s,$type)=@_; + + local $_=$s; + $_=lc $_; + s/\bto\b//g if $type eq "english"; + s/\s//g; + s/[(][^)]*[)]//g; + return $_; } -my %exam=( - "phonetic"=>"meaning", - "meaning"=>"phonetic", - ); +sub word_out($) +{ + my($s)=@_; + + return $s if !ref $s; + return join("",map("$_/",@$s)); +} + +# FIXME: balanced $exam_from: +# my $exam_prob_sum=0; +# $exam_prob_sum+=$_->{"prob"} for values(%exam); +# { +# my $rand=int rand $exam_prob_sum; +# my $sum=0; +# keys(%exam); +# while (my($from,$hashref)=each(%exam)) { +# $sum+=$hashref->{"prob"}; +# next if $rand>=$sum; +# $exam_from=$from; +# last; +# } +# die "INTERNAL" if !$exam_from; +# } my @exam=keys(%exam); -my $r_last; -my $exam_from_last; for (;;) { - my $r=best(); - # Do not change $exam_from if $r remained the same: - my $exam_from=$exam_from_last; - if (!$r_last || $r ne $r_last) { - $exam_from=$exam[int rand @exam]; - } - my $exam_to =$exam{$exam_from}; - print $r->{$exam_from}.": "; + reschedule() if !$opt_fast; + my $besthashref=best(); + my $r=$besthashref->{"r"}; + my $exam_from=$besthashref->{"exam_from"}; + my $exam_to=$exam{$exam_from}{"to"}; + print word_out($r->{$exam_from}).": "; my $got=; chomp $got; my $want=$r->{$exam_to}; - print "-------> $want\n"; - (my $got_chk =lc $got )=~s/\s//g; - (my $want_chk=lc $want)=~s/\s//g; - $got_chk=~s/\bto\b//g; - $want_chk=~s/\bto\b//g; - $want_chk=~s/[(][^)]*[)]//g; - warn "Parenthesis ('(',')') not supported in the user input: $got\n" if $got=~/[()]/; - warn "Comma (',') not well supported in the user input: $got\n" if $got=~/,/; + print "-------> ".word_out($want)."\n"; + my $got_chk=to_chk($got,$exam_to); + my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want)); my $ok; - $ok=1 if $got_chk eq $want_chk; # incl. commas - for (split /,/,$want_chk) { - $ok=1 if $got_chk eq $_; + my $substr; + for my $want_chk (@want_chk) { + next if !$want_chk; # discard patterns like: /(P)/ + $ok=1 if $got_chk eq $want_chk; + $substr=1 if $want_chk=~/\Q$got_chk\E/; } if (!$ok) { - if ($exam_to eq "meaning" && $got=~/\S/ && $want=~/\Q$got\E/) { + if ($exam_to eq "english" && $got=~/\S/ && $substr) { my $key; do { print STDERR "Is your answer correct? [y/n] "; @@ -202,6 +271,7 @@ for (;;) { $key=Term::ReadKey::ReadKey(0); print STDERR "\n"; # no echo restore(); + die if $key eq "\x03"; # ctrl-c $ok=1 if $key eq "y"; $ok=0 if $key eq "n"; } while (!defined $ok); @@ -209,8 +279,7 @@ for (;;) { } $ok||=0; print STDERR "result: ".($ok ? "ok" : "WRONG"); - result($r,$ok); + result $r,$exam_from,$ok; print "\n"; - $r_last=$r; - $exam_from_last=$exam_from; + print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok; } -- 1.8.3.1