Moved from WaKan to edict.
authorshort <>
Sun, 22 May 2005 03:03:24 +0000 (03:03 +0000)
committershort <>
Sun, 22 May 2005 03:03:24 +0000 (03:03 +0000)
Probability choosing now separates each $exam_from of a word.
Many bugfixes.

src/jaxam

index 596a688..362cecd 100755 (executable)
--- a/src/jaxam
+++ b/src/jaxam
@@ -10,29 +10,43 @@ require Tree::Binary::Search;
 use Getopt::Long;
 require Time::Piece::ISO;
 require POSIX;
+use utf8;
+use encoding "utf8";
+binmode STDERR,":utf8";        # only STDIN and STDOUT covered by: use encoding "utf8";
 
 
 our $opt_debug=1;
-my $opt_dict=$ENV{"HOME"}."/priv/japan-lang/jfe_1-9.csv";
-my $opt_log=$opt_dict.".log";
+my $opt_edict="jaxam.edict";
+my $opt_log;
+my $opt_fast;
 
 $Getopt::Long::ignorecase=0;
 $Getopt::Long::bundling=1;
 die if !GetOptions(
-               "d|debug+"=>\$opt_debug,
-               "c|dict=s"=>\$opt_dict,
-               "l|log=s" =>\$opt_log,
+               "d|debug+" =>\$opt_debug,
+               "e|edict=s"=>\$opt_edict,
+               "l|log=s"  =>\$opt_log,
+                 "fast!"  =>\$opt_fast,
                );
+$opt_log||=$opt_edict.".log";
+
+my %exam=(
+       "kana"   =>{"prob"=>1,"to"=>"english"},
+       "english"=>{"prob"=>1,"to"=>"kana"},
+       );
 
 my $T;
+my @R;
 
-sub schedule($)
+sub schedule($$%)
 {
-       my($r)=@_;
+       my($r,$exam_from,%args)=@_;
 
+       my $time=$args{"time"}||time();
        my $sum=0;
        for my $result (@{$r->{"result"}}) {
-               my $age=time()-$result->{"time"};
+               next if $result->{"exam_from"} ne $exam_from;
+               my $age=$time-$result->{"time"};
                $age=0 if $age<0;
                # $age:   0..  big importance
                # $age: big..small importance
@@ -49,11 +63,29 @@ sub schedule($)
                $arr=$T->select($sum)->{"arr"};
                $T->delete($sum);
        }
-       splice @$arr,int(rand(@$arr+1)),0,$r;
+       splice @$arr,int(rand(@$arr+1)),0,{"r"=>$r,"exam_from"=>$exam_from};
        $T->insert($sum=>{"key"=>$sum,"arr"=>$arr});
        print STDERR "; schedule=$sum" if $opt_debug;
 }
 
+sub reschedule(;$)
+{
+       my($opt_debug_force)=@_;
+
+       $T=Tree::Binary::Search->new();
+       $T->useNumericComparison();
+       my $time=time();
+       for my $r (@R) {
+               local $opt_debug=0 if $opt_debug<2;
+               local $opt_debug=$opt_debug_force if defined $opt_debug_force;
+               print STDERR $r->{"kana"}."\t".$r->{"english"} if $opt_debug;
+               for my $exam_from (keys(%exam)) {
+                       schedule $r,$exam_from,"time"=>$time;
+               }
+               print STDERR "\n" if $opt_debug;
+       }
+}
+
 sub best()
 {
        return if $T->isEmpty();
@@ -66,73 +98,90 @@ sub best()
        return $r;
 }
 
-sub identify($)
+sub identify($%)
 {
-       my($r)=@_;
+       my($r,%args)=@_;
 
-       return join " ",map(($r->{$_}||"-"),qw(
-                       written
-                       phonetic
-                       ));
+       return join " ",
+                       map(($r->{$_}||()),qw(
+                                       kanji
+                                       kana
+                                       )),
+                       (!$args{"at"} ? () : '@'.$r->{"line"});
 }
 
-sub result($$)
+sub result($$$)
 {
-       my($r,$ok)=@_;
+       my($r,$exam_from,$ok)=@_;
 
        local *LOG_APPEND;
-       open LOG_APPEND,">>".$opt_log or die "append \"$opt_log\": $!";
-       print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())." ok=$ok: ".identify($r)."\n";
+       open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!";
+       print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())
+                       ." exam_from=$exam_from ok=$ok: ".identify($r)."\n";
        close LOG_APPEND or die "close \"$opt_log\": $!";
 
        push @{$r->{"result"}},{
                "time"=>time(),
+               "exam_from"=>$exam_from,
                "ok"=>$ok,
                };
-       schedule $r;
+       schedule $r,$exam_from;
 }
 
-my @R;
-my %keyword;
-$T=Tree::Binary::Search->new();
-$T->useNumericComparison();
-local *DICT;
-open DICT,$opt_dict or die "open \"$opt_dict\": $!";
-while (<DICT>) {
+local *EDICT;
+open EDICT,"<:utf8",$opt_edict or die "open \"$opt_edict\": $!";
+my %check=(
+       #"kanji"=>{},           # exists...
+       #"kana"=>{},    # exists...
+       "identify"=>{},
+       );
+LINE:
+while (<EDICT>) {
        chomp;
+       next if /^\t [^\t]*$/;
+       next if /^\s*$/;
+       #〒 [ゆうびん] /(n) mail/postal service/
+       #Tシャツ /T-shirt/
+       m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$} or do { warn "Unparsable: $_\n"; next LINE; };
        my $r;
        my $rest;
-       (
-               $r->{"written"},
-               $r->{"phonetic"},
-               $r->{"meaning"},
-               $r->{"category"},
-               $r->{"learned"},
-               $rest,
-               )=split /\t/;
-       next if $r->{"written"} eq "";
-       die "Excessive argument: $rest" if defined $rest;
-       while ($r->{"meaning"}=~s/\s*<([-\w]+)>\s*//) {
-               my $keyword=$1;
-               $r->{"meaning_keyword"}{$keyword}=1;
-               $keyword{$keyword}=1;
-               }
-       $r->{"meaning"}!~/[<>]/ or warn "Meaning constains invalid characters: ".$r->{"meaning"};
+       ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3);
+       if (!$r->{"kana"}) {
+               $r->{"kana"}=$r->{"kanji"};
+               delete $r->{"kanji"};
+       }
+       while ($rest) {
+               $rest=~s{^\s*([^/]+?)\s*/}{} or do { warn "Unparsable english: $_\n"; next LINE; };
+               my($english)=($1);
+               push @{$r->{"english"}},$english;
+       }
+       $r->{"line"}=$.;
        push @R,$r;
+       keys(%check);
+       while (my($field,$hashref)=each(%check)) {
+               my $val;
+               $val=identify $r if $field eq "identify";
+               $val||=$r->{$field};
+               next if $val eq "悪い にくい";     # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net>
+               my $origvalref=\$hashref->{$val};
+               warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1)
+                               if $$origvalref;
+               $$origvalref=$r;
+       }
 }
-close DICT or die "close \"$opt_dict\": $!";
+close EDICT or die "close \"$opt_edict\": $!";
 
 my %identify=map((identify($_)=>$_),@R);
 
 local *LOG_READ;
-if (!open LOG_READ,$opt_log) {
+if (!open LOG_READ,"<:utf8",$opt_log) {
        warn "open \"$opt_log\": $!";
 } else {
        while (<LOG_READ>) {
                chomp;
                # 2002-04-25T21:17:52+0900 ok=1: identify($r)
-               my($year, $month, $day,   $hour,  $minute,$second,$zonepm,$zonehour,$zoneminute,$ok,$identify)=
-               /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) ok=(\d+): (.+)$/
+               my($year, $month, $day,   $hour,  $minute,$second,$zonepm,$zonehour,$zoneminute,$exam_from,$ok,$identify)=
+               /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) exam_from=(\w+) ok=(\d+): (.+)$/
                                or do { warn "Unrecognized line: $_\n"; next; };
                $year-=1900;
                $month--;
@@ -143,53 +192,73 @@ if (!open LOG_READ,$opt_log) {
                                or do { warn "Word not found from line: $_\n"; next; };
                push @{$r->{"result"}},{
                        "time"=>$gmtime,
+                       "exam_from"=>$exam_from,
                        "ok"=>$ok,
                        };
        }
        close LOG_READ or die "close \"$opt_log\": $!";
 }
 
-for my $r (@R) {
-       local $opt_debug=0 if $opt_debug<2;
-       print STDERR $r->{"phonetic"}."\t".$r->{"meaning"} if $opt_debug;
-       schedule $r;
-       print STDERR "\n" if $opt_debug;
+reschedule $opt_debug>=2;      # init
+
+sub to_chk($$)
+{
+       my($s,$type)=@_;
+
+       local $_=$s;
+       $_=lc $_;
+       s/\bto\b//g if $type eq "english";
+       s/\s//g;
+       s/[(][^)]*[)]//g;
+       return $_;
 }
 
-my %exam=(
-       "phonetic"=>"meaning",
-       "meaning"=>"phonetic",
-       );
+sub word_out($)
+{
+       my($s)=@_;
+
+       return $s if !ref $s;
+       return join("",map("$_/",@$s));
+}
+
+# FIXME: balanced $exam_from:
+# my $exam_prob_sum=0;
+# $exam_prob_sum+=$_->{"prob"} for values(%exam);
+# {
+#      my $rand=int rand $exam_prob_sum;
+#      my $sum=0;
+#      keys(%exam);
+#      while (my($from,$hashref)=each(%exam)) {
+#              $sum+=$hashref->{"prob"};
+#              next if $rand>=$sum;
+#              $exam_from=$from;
+#              last;
+#      }
+#      die "INTERNAL" if !$exam_from;
+# }
 my @exam=keys(%exam);
-my $r_last;
-my $exam_from_last;
 for (;;) {
-       my $r=best();
-       # Do not change $exam_from if $r remained the same:
-       my $exam_from=$exam_from_last;
-       if (!$r_last || $r ne $r_last) {
-               $exam_from=$exam[int rand @exam];
-               }
-       my $exam_to  =$exam{$exam_from};
-       print $r->{$exam_from}.": ";
+       reschedule() if !$opt_fast;
+       my $besthashref=best();
+       my $r=$besthashref->{"r"};
+       my $exam_from=$besthashref->{"exam_from"};
+       my $exam_to=$exam{$exam_from}{"to"};
+       print word_out($r->{$exam_from}).": ";
        my $got=<STDIN>;
        chomp $got;
        my $want=$r->{$exam_to};
-       print "-------> $want\n";
-       (my $got_chk =lc $got )=~s/\s//g;
-       (my $want_chk=lc $want)=~s/\s//g;
-       $got_chk=~s/\bto\b//g;
-       $want_chk=~s/\bto\b//g;
-       $want_chk=~s/[(][^)]*[)]//g;
-       warn "Parenthesis ('(',')') not supported in the user input: $got\n" if $got=~/[()]/;
-       warn "Comma (',') not well supported in the user input: $got\n" if $got=~/,/;
+       print "-------> ".word_out($want)."\n";
+       my $got_chk=to_chk($got,$exam_to);
+       my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want));
        my $ok;
-       $ok=1 if $got_chk eq $want_chk; # incl. commas
-       for (split /,/,$want_chk) {
-               $ok=1 if $got_chk eq $_;
+       my $substr;
+       for my $want_chk (@want_chk) {
+               next if !$want_chk;     # discard patterns like: /(P)/
+               $ok=1 if $got_chk eq $want_chk;
+               $substr=1 if $want_chk=~/\Q$got_chk\E/;
        }
        if (!$ok) {
-               if ($exam_to eq "meaning" && $got=~/\S/ && $want=~/\Q$got\E/) {
+               if ($exam_to eq "english" && $got=~/\S/ && $substr) {
                        my $key;
                        do {
                                print STDERR "Is your answer correct? [y/n] ";
@@ -202,6 +271,7 @@ for (;;) {
                                $key=Term::ReadKey::ReadKey(0);
                                print STDERR "\n";      # no echo
                                restore();
+                               die if $key eq "\x03";  # ctrl-c
                                $ok=1 if $key eq "y";
                                $ok=0 if $key eq "n";
                        } while (!defined $ok);
@@ -209,8 +279,7 @@ for (;;) {
        }
        $ok||=0;
        print STDERR "result: ".($ok ? "ok" : "WRONG");
-       result($r,$ok);
+       result $r,$exam_from,$ok;
        print "\n";
-       $r_last=$r;
-       $exam_from_last=$exam_from;
+       print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok;
 }