Moved from WaKan to edict.
[nethome.git] / src / jaxam
1 #! /usr/bin/perl
2 #
3 # $Id$
4
5
6 use strict;
7 use warnings;
8 require Term::ReadKey;
9 require Tree::Binary::Search;
10 use Getopt::Long;
11 require Time::Piece::ISO;
12 require POSIX;
13 use utf8;
14 use encoding "utf8";
15 binmode STDERR,":utf8"; # only STDIN and STDOUT covered by: use encoding "utf8";
16
17
18 our $opt_debug=1;
19 my $opt_edict="jaxam.edict";
20 my $opt_log;
21 my $opt_fast;
22
23 $Getopt::Long::ignorecase=0;
24 $Getopt::Long::bundling=1;
25 die if !GetOptions(
26                 "d|debug+" =>\$opt_debug,
27                 "e|edict=s"=>\$opt_edict,
28                 "l|log=s"  =>\$opt_log,
29                   "fast!"  =>\$opt_fast,
30                 );
31 $opt_log||=$opt_edict.".log";
32
33 my %exam=(
34         "kana"   =>{"prob"=>1,"to"=>"english"},
35         "english"=>{"prob"=>1,"to"=>"kana"},
36         );
37
38 my $T;
39 my @R;
40
41 sub schedule($$%)
42 {
43         my($r,$exam_from,%args)=@_;
44
45         my $time=$args{"time"}||time();
46         my $sum=0;
47         for my $result (@{$r->{"result"}}) {
48                 next if $result->{"exam_from"} ne $exam_from;
49                 my $age=$time-$result->{"time"};
50                 $age=0 if $age<0;
51                 # $age:   0..  big importance
52                 # $age: big..small importance
53                 # $ok: 0..mistake
54                 # $ok: 1..correct
55                 # $sum:   big..far
56                 # $sum: small..soon
57                 # 1/log(2+$age): big..  big importance
58                 # 1/log(2+$age):   0..small importance
59                 $sum+=1/log(2+$age)*($result->{"ok"}?+1:-1);
60                 }
61         my $arr=[];
62         if ($T->exists($sum)) {
63                 $arr=$T->select($sum)->{"arr"};
64                 $T->delete($sum);
65         }
66         splice @$arr,int(rand(@$arr+1)),0,{"r"=>$r,"exam_from"=>$exam_from};
67         $T->insert($sum=>{"key"=>$sum,"arr"=>$arr});
68         print STDERR "; schedule=$sum" if $opt_debug;
69 }
70
71 sub reschedule(;$)
72 {
73         my($opt_debug_force)=@_;
74
75         $T=Tree::Binary::Search->new();
76         $T->useNumericComparison();
77         my $time=time();
78         for my $r (@R) {
79                 local $opt_debug=0 if $opt_debug<2;
80                 local $opt_debug=$opt_debug_force if defined $opt_debug_force;
81                 print STDERR $r->{"kana"}."\t".$r->{"english"} if $opt_debug;
82                 for my $exam_from (keys(%exam)) {
83                         schedule $r,$exam_from,"time"=>$time;
84                 }
85                 print STDERR "\n" if $opt_debug;
86         }
87 }
88
89 sub best()
90 {
91         return if $T->isEmpty();
92         my $min=$T->min()->{"key"};
93         my $arr=$T->select($min)->{"arr"};
94         $T->delete($min);
95         my $r=shift @$arr;
96         $T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr;
97         print STDERR "min=$min\n" if $opt_debug;
98         return $r;
99 }
100
101 sub identify($%)
102 {
103         my($r,%args)=@_;
104
105         return join " ",
106                         map(($r->{$_}||()),qw(
107                                         kanji
108                                         kana
109                                         )),
110                         (!$args{"at"} ? () : '@'.$r->{"line"});
111 }
112
113 sub result($$$)
114 {
115         my($r,$exam_from,$ok)=@_;
116
117         local *LOG_APPEND;
118         open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!";
119         print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())
120                         ." exam_from=$exam_from ok=$ok: ".identify($r)."\n";
121         close LOG_APPEND or die "close \"$opt_log\": $!";
122
123         push @{$r->{"result"}},{
124                 "time"=>time(),
125                 "exam_from"=>$exam_from,
126                 "ok"=>$ok,
127                 };
128         schedule $r,$exam_from;
129 }
130
131 local *EDICT;
132 open EDICT,"<:utf8",$opt_edict or die "open \"$opt_edict\": $!";
133 my %check=(
134         #"kanji"=>{},           # exists...
135         #"kana"=>{},    # exists...
136         "identify"=>{},
137         );
138 LINE:
139 while (<EDICT>) {
140         chomp;
141         next if /^\t [^\t]*$/;
142         next if /^\s*$/;
143         #〒 [ゆうびん] /(n) mail/postal service/
144         #Tシャツ /T-shirt/
145         m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$} or do { warn "Unparsable: $_\n"; next LINE; };
146         my $r;
147         my $rest;
148         ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3);
149         if (!$r->{"kana"}) {
150                 $r->{"kana"}=$r->{"kanji"};
151                 delete $r->{"kanji"};
152         }
153         while ($rest) {
154                 $rest=~s{^\s*([^/]+?)\s*/}{} or do { warn "Unparsable english: $_\n"; next LINE; };
155                 my($english)=($1);
156                 push @{$r->{"english"}},$english;
157         }
158         $r->{"line"}=$.;
159         push @R,$r;
160         keys(%check);
161         while (my($field,$hashref)=each(%check)) {
162                 my $val;
163                 $val=identify $r if $field eq "identify";
164                 $val||=$r->{$field};
165                 next if $val eq "悪い にくい";     # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net>
166                 my $origvalref=\$hashref->{$val};
167                 warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1)
168                                 if $$origvalref;
169                 $$origvalref=$r;
170         }
171 }
172 close EDICT or die "close \"$opt_edict\": $!";
173
174 my %identify=map((identify($_)=>$_),@R);
175
176 local *LOG_READ;
177 if (!open LOG_READ,"<:utf8",$opt_log) {
178         warn "open \"$opt_log\": $!";
179 } else {
180         while (<LOG_READ>) {
181                 chomp;
182                 # 2002-04-25T21:17:52+0900 ok=1: identify($r)
183                 my($year, $month, $day,   $hour,  $minute,$second,$zonepm,$zonehour,$zoneminute,$exam_from,$ok,$identify)=
184                 /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) exam_from=(\w+) ok=(\d+): (.+)$/
185                                 or do { warn "Unrecognized line: $_\n"; next; };
186                 $year-=1900;
187                 $month--;
188                 my $localtime=POSIX::mktime($second,$minute,$hour,$day,$month,$year);
189                 $localtime or do { warn "Unparsable time at line: $_\n"; next; };
190                 my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute);
191                 my $r=$identify{$identify}
192                                 or do { warn "Word not found from line: $_\n"; next; };
193                 push @{$r->{"result"}},{
194                         "time"=>$gmtime,
195                         "exam_from"=>$exam_from,
196                         "ok"=>$ok,
197                         };
198         }
199         close LOG_READ or die "close \"$opt_log\": $!";
200 }
201
202 reschedule $opt_debug>=2;       # init
203
204 sub to_chk($$)
205 {
206         my($s,$type)=@_;
207
208         local $_=$s;
209         $_=lc $_;
210         s/\bto\b//g if $type eq "english";
211         s/\s//g;
212         s/[(][^)]*[)]//g;
213         return $_;
214 }
215
216 sub word_out($)
217 {
218         my($s)=@_;
219
220         return $s if !ref $s;
221         return join("",map("$_/",@$s));
222 }
223
224 # FIXME: balanced $exam_from:
225 # my $exam_prob_sum=0;
226 # $exam_prob_sum+=$_->{"prob"} for values(%exam);
227 # {
228 #       my $rand=int rand $exam_prob_sum;
229 #       my $sum=0;
230 #       keys(%exam);
231 #       while (my($from,$hashref)=each(%exam)) {
232 #               $sum+=$hashref->{"prob"};
233 #               next if $rand>=$sum;
234 #               $exam_from=$from;
235 #               last;
236 #       }
237 #       die "INTERNAL" if !$exam_from;
238 # }
239 my @exam=keys(%exam);
240 for (;;) {
241         reschedule() if !$opt_fast;
242         my $besthashref=best();
243         my $r=$besthashref->{"r"};
244         my $exam_from=$besthashref->{"exam_from"};
245         my $exam_to=$exam{$exam_from}{"to"};
246         print word_out($r->{$exam_from}).": ";
247         my $got=<STDIN>;
248         chomp $got;
249         my $want=$r->{$exam_to};
250         print "-------> ".word_out($want)."\n";
251         my $got_chk=to_chk($got,$exam_to);
252         my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want));
253         my $ok;
254         my $substr;
255         for my $want_chk (@want_chk) {
256                 next if !$want_chk;     # discard patterns like: /(P)/
257                 $ok=1 if $got_chk eq $want_chk;
258                 $substr=1 if $want_chk=~/\Q$got_chk\E/;
259         }
260         if (!$ok) {
261                 if ($exam_to eq "english" && $got=~/\S/ && $substr) {
262                         my $key;
263                         do {
264                                 print STDERR "Is your answer correct? [y/n] ";
265                                 sub restore
266                                 {
267                                         Term::ReadKey::ReadMode(0);
268                                 }
269                                 local $SIG{"__DIE__"}=\&restore;
270                                 Term::ReadKey::ReadMode(4);
271                                 $key=Term::ReadKey::ReadKey(0);
272                                 print STDERR "\n";      # no echo
273                                 restore();
274                                 die if $key eq "\x03";  # ctrl-c
275                                 $ok=1 if $key eq "y";
276                                 $ok=0 if $key eq "n";
277                         } while (!defined $ok);
278                 }
279         }
280         $ok||=0;
281         print STDERR "result: ".($ok ? "ok" : "WRONG");
282         result $r,$exam_from,$ok;
283         print "\n";
284         print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok;
285 }