use Getopt::Long;
require Time::Piece::ISO;
require POSIX;
+use utf8;
+use encoding "utf8";
+binmode STDERR,":utf8"; # only STDIN and STDOUT covered by: use encoding "utf8";
our $opt_debug=1;
-my $opt_dict=$ENV{"HOME"}."/priv/japan-lang/jfe_1-9.csv";
-my $opt_log=$opt_dict.".log";
+my $opt_edict="$0.edict";
+my $opt_log;
+my $opt_wrong;
+my $opt_errors_wait_key;
+my $opt_tee;
+my $opt_fast;
+my $opt_cycle=10;
$Getopt::Long::ignorecase=0;
$Getopt::Long::bundling=1;
die if !GetOptions(
- "d|debug+"=>\$opt_debug,
- "c|dict=s"=>\$opt_dict,
- "l|log=s" =>\$opt_log,
+ "d|debug+" =>\$opt_debug,
+ "e|edict=s" =>\$opt_edict,
+ "l|log=s" =>\$opt_log,
+ "w|wrong" =>\$opt_wrong, # Filter the input lines.
+ "errors-wait-key"=>\$opt_errors_wait_key, # Only for -w|--wrong.
+ "tee!" =>\$opt_tee, # Output all the read lines. Only for -w|--wrong.
+ "fast!" =>\$opt_fast,
+ "cycle=s" =>\$opt_cycle,
);
+$opt_log||=$opt_edict.".log";
+
+my %exam=(
+ "kana" =>{"to"=>"english"}, # "prob"=>1,
+ "english"=>{"to"=>"kana"}, # "prob"=>1,
+ );
+my %ok=(
+ 0=>-1,
+ 1=>+4,
+ );
my $T;
+my @R;
+my %identify;
+my %kana_to_r;
+my $result_count=0;
-sub schedule($)
+sub siglog($)
{
- my($r)=@_;
+ my($a)=@_;
+
+ return 0 if !$a;
+ my $sig=($a>0)-($a<0);
+ return $sig*log(abs $a);
+}
+sub schedule($$%)
+{
+ my($r,$exam_from,%args)=@_;
+
+ return if $r->{"orig"}=~/\Q(laceno)\E/;
+ my $time=$args{"time"}||time();
my $sum=0;
for my $result (@{$r->{"result"}}) {
- my $age=time()-$result->{"time"};
+ next if $result->{"exam_from"} ne $exam_from;
+ my $age=$time-$result->{"time"};
$age=0 if $age<0;
# $age: 0.. big importance
# $age: big..small importance
# $sum: small..soon
# 1/log(2+$age): big.. big importance
# 1/log(2+$age): 0..small importance
- $sum+=1/log(2+$age)*($result->{"ok"}?+1:-1);
+ $sum+=1/log(2+$age)*$ok{$result->{"ok"}};
+ if (!$result->{"ok"}) {
+ my $count_diff=$result_count-$result->{"count"};
+ $count_diff-=$opt_cycle;
+ $sum-=siglog($count_diff);
+ }
}
+ my $scale=1;
+ if ($r->{"pri"}) {
+ my $abspri=$r->{"pri"};
+ $abspri=1/(-$abspri) if $abspri<0;
+ $abspri=2 if $abspri<2;
+ $scale=log($abspri)*($r->{"pri"}<0 ? -1 : +1);
+ }
my $arr=[];
if ($T->exists($sum)) {
$arr=$T->select($sum)->{"arr"};
$T->delete($sum);
}
- splice @$arr,int(rand(@$arr+1)),0,$r;
+ splice @$arr,int(rand(@$arr+1)),0,{"r"=>$r,"exam_from"=>$exam_from};
$T->insert($sum=>{"key"=>$sum,"arr"=>$arr});
print STDERR "; schedule=$sum" if $opt_debug;
}
+sub reschedule(;$)
+{
+ my($opt_debug_force)=@_;
+
+ $T=Tree::Binary::Search->new();
+ $T->useNumericComparison();
+ my $time=time();
+ for my $r (@R) {
+ local $opt_debug=0 if $opt_debug<2;
+ local $opt_debug=$opt_debug_force if defined $opt_debug_force;
+ print STDERR $r->{"kana"}."\t".$r->{"english"} if $opt_debug;
+ for my $exam_from (keys(%exam)) {
+ schedule $r,$exam_from,"time"=>$time;
+ }
+ print STDERR "\n" if $opt_debug;
+ }
+}
+
sub best()
{
return if $T->isEmpty();
$T->delete($min);
my $r=shift @$arr;
$T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr;
- print STDERR "min=$min\n" if $opt_debug;
+ if ($opt_debug) {
+ print STDERR "min=$min";
+ print STDERR " of ".(1+@$arr) if @$arr;
+ print STDERR "\n";
+ }
return $r;
}
-sub identify($)
+sub identify($%)
{
- my($r)=@_;
+ my($r,%args)=@_;
- return join " ",map(($r->{$_}||"-"),qw(
- written
- phonetic
- ));
+ return join " ",
+ map(($r->{$_}||()),qw(
+ kanji
+ kana
+ )),
+ (!$args{"at"} ? () : '@'.$r->{"line"});
}
-sub result($$)
+sub result($$$%)
{
- my($r,$ok)=@_;
-
- local *LOG_APPEND;
- open LOG_APPEND,">>".$opt_log or die "append \"$opt_log\": $!";
- print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())." ok=$ok: ".identify($r)."\n";
- close LOG_APPEND or die "close \"$opt_log\": $!";
+ my($r,$exam_from,$ok,%args)=@_;
+ if ($args{"write"}) {
+ local *LOG_APPEND;
+ open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!";
+ print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())
+ ." exam_from=$exam_from ok=$ok: ".identify($r)."\n";
+ close LOG_APPEND or die "close \"$opt_log\": $!";
+ }
+ $args{"time"}||=time();
push @{$r->{"result"}},{
- "time"=>time(),
+ "time"=>$args{"time"},
+ "exam_from"=>$exam_from,
"ok"=>$ok,
+ "count"=>$result_count++,
};
- schedule $r;
}
-my @R;
-my %keyword;
-$T=Tree::Binary::Search->new();
-$T->useNumericComparison();
-local *DICT;
-open DICT,$opt_dict or die "open \"$opt_dict\": $!";
-while (<DICT>) {
- chomp;
+my $errors_wait_key_count;
+END {
+ if ($errors_wait_key_count && $opt_errors_wait_key) {
+ print STDERR "Errors occured. Press any key to continue...";
+ <STDIN>;
+ }
+}
+
+sub edict_line_parse(;$)
+{
+ my($s,$line)=@_;
+
+ $s=$_ if !defined $s;
+ $line=$. if !defined $line;
+ chomp $s;
+ local $_=$s;
+ s/;.*//;
+ return if /^\s*$/;
+ #〒 [ゆうびん] /(n) mail/postal service/
+ #Tシャツ /T-shirt/
+ m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$}
+ or do { warn "Unparsable: $s\n"; $errors_wait_key_count++; return; };
my $r;
my $rest;
- (
- $r->{"written"},
- $r->{"phonetic"},
- $r->{"meaning"},
- $r->{"category"},
- $r->{"learned"},
- $rest,
- )=split /\t/;
- next if $r->{"written"} eq "";
- die "Excessive argument: $rest" if defined $rest;
- while ($r->{"meaning"}=~s/\s*<([-\w]+)>\s*//) {
- my $keyword=$1;
- $r->{"meaning_keyword"}{$keyword}=1;
- $keyword{$keyword}=1;
+ ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3);
+ if (!$r->{"kana"}) {
+ $r->{"kana"}=$r->{"kanji"};
+ delete $r->{"kanji"};
+ }
+ $rest=~s{^(|.*/)[(]pri([+-]\d+)[)]/}{$r->{"pri"}=$2;$1;}e;
+ while ($rest) {
+ $rest=~s{^\s*([^/]+?)\s*/}{}
+ or do { warn "Unparsable english: $s\n"; $errors_wait_key_count++; return; };
+ my($english)=($1);
+ push @{$r->{"english"}},$english;
+ }
+ $r->{"line"}=$line;
+ $r->{"orig"}=$s;
+ return $r;
+}
+
+sub to_chk($$)
+{
+ my($s,$type)=@_;
+
+ local $_=$s;
+ $_=lc $_;
+ s/\bto\b//g if $type eq "english";
+ s/\s//g;
+ s/[(][^)]*[)]//g;
+ return $_;
+}
+
+sub edict_read($)
+{
+ my($pathname)=@_;
+
+ local *EDICT;
+ open EDICT,"<:utf8",$pathname or die "open \"$pathname\": $!";
+ my %check=(
+ #"kanji"=>{}, # exists...
+ #"kana"=>{}, # exists...
+ "identify"=>{},
+ );
+ while (<EDICT>) {
+ my $r=edict_line_parse() or next;
+ push @R,$r;
+ push @{$kana_to_r{$r->{"kana"}}},$r;
+ keys(%check);
+ while (my($field,$hashref)=each(%check)) {
+ my $val;
+ $val=identify $r if $field eq "identify";
+ $val||=$r->{$field};
+ next if $val eq "悪い にくい"; # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net>
+ my $origvalref=\$hashref->{$val};
+ warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1)
+ if $$origvalref;
+ $$origvalref=$r;
}
- $r->{"meaning"}!~/[<>]/ or warn "Meaning constains invalid characters: ".$r->{"meaning"};
- push @R,$r;
+ }
+ close EDICT or die "close \"$pathname\": $!";
+ %identify=map((identify($_)=>$_),@R);
}
-close DICT or die "close \"$opt_dict\": $!";
-my %identify=map((identify($_)=>$_),@R);
+sub log_read($)
+{
+ my($pathname)=@_;
-local *LOG_READ;
-if (!open LOG_READ,$opt_log) {
- warn "open \"$opt_log\": $!";
-} else {
+ local *LOG_READ;
+ if (!open LOG_READ,"<:utf8",$pathname) {
+ warn "open \"$pathname\": $!";
+ return;
+ }
while (<LOG_READ>) {
chomp;
# 2002-04-25T21:17:52+0900 ok=1: identify($r)
- my($year, $month, $day, $hour, $minute,$second,$zonepm,$zonehour,$zoneminute,$ok,$identify)=
- /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) ok=(\d+): (.+)$/
+ my($year, $month, $day, $hour, $minute,$second,$zonepm,$zonehour,$zoneminute,$exam_from,$ok,$identify)=
+ /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) exam_from=(\w+) ok=(\d+): (.+)$/
or do { warn "Unrecognized line: $_\n"; next; };
$year-=1900;
$month--;
my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute);
my $r=$identify{$identify}
or do { warn "Word not found from line: $_\n"; next; };
- push @{$r->{"result"}},{
- "time"=>$gmtime,
- "ok"=>$ok,
- };
+ result $r,$exam_from,$ok,"write"=>0,"time"=>$gmtime;
}
- close LOG_READ or die "close \"$opt_log\": $!";
+ close LOG_READ or die "close \"$pathname\": $!";
}
-for my $r (@R) {
- local $opt_debug=0 if $opt_debug<2;
- print STDERR $r->{"phonetic"}."\t".$r->{"meaning"} if $opt_debug;
- schedule $r;
- print STDERR "\n" if $opt_debug;
+sub wrong_read()
+{
+ while (<>) {
+ print if $opt_tee;
+ chomp;
+ my $w=edict_line_parse() or next;
+ my $r=$identify{identify $w};
+ do { warn "'wrong word' not found for the line: $_\n"; $errors_wait_key_count++; next; } if !$r;
+ result $r,"kana",0,"write"=>1;
+ }
}
-my %exam=(
- "phonetic"=>"meaning",
- "meaning"=>"phonetic",
- );
-my @exam=keys(%exam);
-my $r_last;
-my $exam_from_last;
-for (;;) {
- my $r=best();
- # Do not change $exam_from if $r remained the same:
- my $exam_from=$exam_from_last;
- if (!$r_last || $r ne $r_last) {
- $exam_from=$exam[int rand @exam];
+sub word_out($)
+{
+ my($s)=@_;
+
+ return $s if !ref $s;
+ return join("",map("$_/",@$s));
+}
+
+sub exam()
+{
+ my @exam=keys(%exam);
+ for (;;) {
+ reschedule() if !$opt_fast;
+ my $besthashref=best();
+ my $r=$besthashref->{"r"};
+ my $exam_from=$besthashref->{"exam_from"};
+ my $exam_to=$exam{$exam_from}{"to"};
+ print word_out($r->{$exam_from}).": ";
+ my $got=<STDIN>;
+ chomp $got;
+ my $want=$r->{$exam_to};
+ my $got_chk=to_chk($got,$exam_to);
+ my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want));
+ my $ok;
+ my $substr;
+ for my $want_chk (@want_chk) {
+ next if !$want_chk; # discard patterns like: /(P)/
+ $ok=1 if $got_chk eq $want_chk;
+ $substr=1 if $want_chk=~/\Q$got_chk\E/;
}
- my $exam_to =$exam{$exam_from};
- print $r->{$exam_from}.": ";
- my $got=<STDIN>;
- chomp $got;
- my $want=$r->{$exam_to};
- print "-------> $want\n";
- (my $got_chk =lc $got )=~s/\s//g;
- (my $want_chk=lc $want)=~s/\s//g;
- $got_chk=~s/\bto\b//g;
- $want_chk=~s/\bto\b//g;
- $want_chk=~s/[(][^)]*[)]//g;
- warn "Parenthesis ('(',')') not supported in the user input: $got\n" if $got=~/[()]/;
- warn "Comma (',') not well supported in the user input: $got\n" if $got=~/,/;
- my $ok;
- $ok=1 if $got_chk eq $want_chk; # incl. commas
- for (split /,/,$want_chk) {
- $ok=1 if $got_chk eq $_;
- }
- if (!$ok) {
- if ($exam_to eq "meaning" && $got=~/\S/ && $want=~/\Q$got\E/) {
- my $key;
- do {
- print STDERR "Is your answer correct? [y/n] ";
- sub restore
- {
- Term::ReadKey::ReadMode(0);
+ my $aliased;
+ if (!$ok && $exam_from eq "english" && $exam_to eq "kana"
+ && (my $arrref=$kana_to_r{$got})) {
+ my %chk_english_from=map((to_chk($_,"english")=>1),@{$r->{"english"}});
+ my $intended;
+ for my $kana_alias (@$arrref) {
+ for my $kana_alias_english (@{$kana_alias->{"english"}}) {
+ my $chk_english=to_chk $kana_alias_english,"english" or next;
+ next if !$chk_english_from{$chk_english};
+ print "intended ----> ".$r->{"orig"}."\n" if !$intended++;
+ print "OK if alias -> ".$kana_alias->{"orig"}."\n";
+ $ok=1;
+ $aliased=1;
}
- local $SIG{"__DIE__"}=\&restore;
- Term::ReadKey::ReadMode(4);
- $key=Term::ReadKey::ReadKey(0);
- print STDERR "\n"; # no echo
- restore();
- $ok=1 if $key eq "y";
- $ok=0 if $key eq "n";
- } while (!defined $ok);
+ }
}
+ print "-------------> ".word_out($want)."\n" if !$aliased;
+ if (!$ok) {
+ if ($exam_to eq "english" && $got=~/\S/ && $substr) {
+ my $key;
+ do {
+ print STDERR "Is your answer correct? [y/n] ";
+ sub restore
+ {
+ Term::ReadKey::ReadMode(0);
+ }
+ local $SIG{"__DIE__"}=\&restore;
+ Term::ReadKey::ReadMode(4);
+ $key=Term::ReadKey::ReadKey(0);
+ print STDERR "\n"; # no echo
+ restore();
+ die if $key eq "\x03"; # ctrl-c
+ $ok=1 if $key eq "y";
+ $ok=0 if $key eq "n";
+ } while (!defined $ok);
+ }
+ }
+ $ok||=0;
+ if (!$aliased) {
+ result $r,$exam_from,$ok,"write"=>1;
+ print STDERR "result: ".($ok ? "ok" : "WRONG");
+ schedule $r,$exam_from;
+ print "\n";
+ }
+ print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok;
}
- $ok||=0;
- print STDERR "result: ".($ok ? "ok" : "WRONG");
- result($r,$ok);
- print "\n";
- $r_last=$r;
- $exam_from_last=$exam_from;
}
+
+# FIXME: balanced $exam_from:
+# my $exam_prob_sum=0;
+# $exam_prob_sum+=$_->{"prob"} for values(%exam);
+# {
+# my $rand=int rand $exam_prob_sum;
+# my $sum=0;
+# keys(%exam);
+# while (my($from,$hashref)=each(%exam)) {
+# $sum+=$hashref->{"prob"};
+# next if $rand>=$sum;
+# $exam_from=$from;
+# last;
+# }
+# die "INTERNAL" if !$exam_from;
+# }
+
+edict_read $opt_edict;
+
+do { wrong_read(); exit; } if $opt_wrong;
+
+log_read $opt_log;
+reschedule $opt_debug>=2; # init
+
+exam();