Resolve amiguous english->kana translations.
[nethome.git] / src / jaxam
1 #! /usr/bin/perl
2 #
3 # $Id$
4 # for .vimrc:
5 #       noremap <C-w> V:!~/src/jaxam --wrong --tee --errors-wait-key<cr>
6
7
8 use strict;
9 use warnings;
10 require Term::ReadKey;
11 require Tree::Binary::Search;
12 use Getopt::Long;
13 require Time::Piece::ISO;
14 require POSIX;
15 use utf8;
16 use encoding "utf8";
17 binmode STDERR,":utf8"; # only STDIN and STDOUT covered by: use encoding "utf8";
18
19
20 our $opt_debug=1;
21 my $opt_edict="$0.edict";
22 my $opt_log;
23 my $opt_wrong;
24 my $opt_errors_wait_key;
25 my $opt_tee;
26 my $opt_fast;
27
28 $Getopt::Long::ignorecase=0;
29 $Getopt::Long::bundling=1;
30 die if !GetOptions(
31                 "d|debug+"         =>\$opt_debug,
32                 "e|edict=s"        =>\$opt_edict,
33                 "l|log=s"          =>\$opt_log,
34                 "w|wrong"          =>\$opt_wrong,               # Filter the input lines.
35                   "errors-wait-key"=>\$opt_errors_wait_key,     # Only for -w|--wrong.
36                   "tee!"           =>\$opt_tee,                 # Output all the read lines. Only for -w|--wrong.
37                   "fast!"          =>\$opt_fast,
38                 );
39 $opt_log||=$opt_edict.".log";
40
41 my %exam=(
42         "kana"   =>{"to"=>"english"},   # "prob"=>1,
43         "english"=>{"to"=>"kana"},      # "prob"=>1,
44         );
45 my %ok=(
46         0=>-1,
47         1=>+4,
48         );
49
50 my $T;
51 my @R;
52 my %identify;
53 my %kana_to_r;
54
55 sub schedule($$%)
56 {
57         my($r,$exam_from,%args)=@_;
58
59         my $time=$args{"time"}||time();
60         my $sum=0;
61         for my $result (@{$r->{"result"}}) {
62                 next if $result->{"exam_from"} ne $exam_from;
63                 my $age=$time-$result->{"time"};
64                 $age=0 if $age<0;
65                 # $age:   0..  big importance
66                 # $age: big..small importance
67                 # $ok: 0..mistake
68                 # $ok: 1..correct
69                 # $sum:   big..far
70                 # $sum: small..soon
71                 # 1/log(2+$age): big..  big importance
72                 # 1/log(2+$age):   0..small importance
73                 $sum+=1/log(2+$age)*$ok{$result->{"ok"}};
74                 }
75         my $arr=[];
76         if ($T->exists($sum)) {
77                 $arr=$T->select($sum)->{"arr"};
78                 $T->delete($sum);
79         }
80         splice @$arr,int(rand(@$arr+1)),0,{"r"=>$r,"exam_from"=>$exam_from};
81         $T->insert($sum=>{"key"=>$sum,"arr"=>$arr});
82         print STDERR "; schedule=$sum" if $opt_debug;
83 }
84
85 sub reschedule(;$)
86 {
87         my($opt_debug_force)=@_;
88
89         $T=Tree::Binary::Search->new();
90         $T->useNumericComparison();
91         my $time=time();
92         for my $r (@R) {
93                 local $opt_debug=0 if $opt_debug<2;
94                 local $opt_debug=$opt_debug_force if defined $opt_debug_force;
95                 print STDERR $r->{"kana"}."\t".$r->{"english"} if $opt_debug;
96                 for my $exam_from (keys(%exam)) {
97                         schedule $r,$exam_from,"time"=>$time;
98                 }
99                 print STDERR "\n" if $opt_debug;
100         }
101 }
102
103 sub best()
104 {
105         return if $T->isEmpty();
106         my $min=$T->min()->{"key"};
107         my $arr=$T->select($min)->{"arr"};
108         $T->delete($min);
109         my $r=shift @$arr;
110         $T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr;
111         if ($opt_debug) {
112                 print STDERR "min=$min";
113                 print STDERR " of ".(1+@$arr) if @$arr;
114                 print STDERR "\n";
115         }
116         return $r;
117 }
118
119 sub identify($%)
120 {
121         my($r,%args)=@_;
122
123         return join " ",
124                         map(($r->{$_}||()),qw(
125                                         kanji
126                                         kana
127                                         )),
128                         (!$args{"at"} ? () : '@'.$r->{"line"});
129 }
130
131 sub result($$$%)
132 {
133         my($r,$exam_from,$ok,%args)=@_;
134
135         if ($args{"write"}) {
136                 local *LOG_APPEND;
137                 open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!";
138                 print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())
139                                 ." exam_from=$exam_from ok=$ok: ".identify($r)."\n";
140                 close LOG_APPEND or die "close \"$opt_log\": $!";
141         }
142
143         push @{$r->{"result"}},{
144                 "time"=>time(),
145                 "exam_from"=>$exam_from,
146                 "ok"=>$ok,
147                 };
148 }
149
150 my $errors_wait_key_count;
151 END {
152         if ($errors_wait_key_count && $opt_errors_wait_key) {
153                 print STDERR "Errors occured. Press any key to continue...";
154                 <STDIN>;
155         }
156 }
157
158 sub edict_line_parse(;$)
159 {
160         my($s,$line)=@_;
161
162         $s=$_ if !defined $s;
163         $line=$. if !defined $s;
164         chomp $s;
165         local $_=$s;
166         s/;.*//;
167         return if /^\s*$/;
168         #〒 [ゆうびん] /(n) mail/postal service/
169         #Tシャツ /T-shirt/
170         m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$}
171                         or do { warn "Unparsable: $s\n"; $errors_wait_key_count++; return; };
172         my $r;
173         my $rest;
174         ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3);
175         if (!$r->{"kana"}) {
176                 $r->{"kana"}=$r->{"kanji"};
177                 delete $r->{"kanji"};
178         }
179         while ($rest) {
180                 $rest=~s{^\s*([^/]+?)\s*/}{}
181                                 or do { warn "Unparsable english: $s\n"; $errors_wait_key_count++; return; };
182                 my($english)=($1);
183                 push @{$r->{"english"}},$english;
184         }
185         $r->{"line"}=$line;
186         $r->{"orig"}=$s;
187         return $r;
188 }
189
190 sub to_chk($$)
191 {
192         my($s,$type)=@_;
193
194         local $_=$s;
195         $_=lc $_;
196         s/\bto\b//g if $type eq "english";
197         s/\s//g;
198         s/[(][^)]*[)]//g;
199         return $_;
200 }
201
202 sub edict_read($)
203 {
204         my($pathname)=@_;
205
206         local *EDICT;
207         open EDICT,"<:utf8",$pathname or die "open \"$pathname\": $!";
208         my %check=(
209                 #"kanji"=>{},   # exists...
210                 #"kana"=>{},    # exists...
211                 "identify"=>{},
212                 );
213         while (<EDICT>) {
214                 my $r=edict_line_parse() or next;
215                 next if /\Q(laceno)\E/;
216                 push @R,$r;
217                 push @{$kana_to_r{$r->{"kana"}}},$r;
218                 keys(%check);
219                 while (my($field,$hashref)=each(%check)) {
220                         my $val;
221                         $val=identify $r if $field eq "identify";
222                         $val||=$r->{$field};
223                         next if $val eq "悪い にくい";     # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net>
224                         my $origvalref=\$hashref->{$val};
225                         warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1)
226                                         if $$origvalref;
227                         $$origvalref=$r;
228                 }
229         }
230         close EDICT or die "close \"$pathname\": $!";
231         %identify=map((identify($_)=>$_),@R);
232 }
233
234 sub log_read($)
235 {
236         my($pathname)=@_;
237
238         local *LOG_READ;
239         if (!open LOG_READ,"<:utf8",$pathname) {
240                 warn "open \"$pathname\": $!";
241                 return;
242         }
243         while (<LOG_READ>) {
244                 chomp;
245                 # 2002-04-25T21:17:52+0900 ok=1: identify($r)
246                 my($year, $month, $day,   $hour,  $minute,$second,$zonepm,$zonehour,$zoneminute,$exam_from,$ok,$identify)=
247                 /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) exam_from=(\w+) ok=(\d+): (.+)$/
248                                 or do { warn "Unrecognized line: $_\n"; next; };
249                 $year-=1900;
250                 $month--;
251                 my $localtime=POSIX::mktime($second,$minute,$hour,$day,$month,$year);
252                 $localtime or do { warn "Unparsable time at line: $_\n"; next; };
253                 my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute);
254                 my $r=$identify{$identify}
255                                 or do { warn "Word not found from line: $_\n"; next; };
256                 result $r,$exam_from,$ok,"write"=>0;
257         }
258         close LOG_READ or die "close \"$pathname\": $!";
259 }
260
261 sub wrong_read()
262 {
263         while (<>) {
264                 print if $opt_tee;
265                 chomp;
266                 my $w=edict_line_parse() or next;
267                 my $r=$identify{identify $w};
268                 do { warn "'wrong word' not found for the line: $_\n"; $errors_wait_key_count++; next; } if !$r;
269                 result $r,"kana",0,"write"=>1;
270         }
271 }
272
273 sub word_out($)
274 {
275         my($s)=@_;
276
277         return $s if !ref $s;
278         return join("",map("$_/",@$s));
279 }
280
281 sub exam()
282 {
283         my @exam=keys(%exam);
284         for (;;) {
285                 reschedule() if !$opt_fast;
286                 my $besthashref=best();
287                 my $r=$besthashref->{"r"};
288                 my $exam_from=$besthashref->{"exam_from"};
289                 my $exam_to=$exam{$exam_from}{"to"};
290                 print word_out($r->{$exam_from}).": ";
291                 my $got=<STDIN>;
292                 chomp $got;
293                 my $want=$r->{$exam_to};
294                 my $got_chk=to_chk($got,$exam_to);
295                 my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want));
296                 my $ok;
297                 my $substr;
298                 for my $want_chk (@want_chk) {
299                         next if !$want_chk;     # discard patterns like: /(P)/
300                         $ok=1 if $got_chk eq $want_chk;
301                         $substr=1 if $want_chk=~/\Q$got_chk\E/;
302                 }
303                 my $aliased;
304                 if (!$ok && $exam_from eq "english" && $exam_to eq "kana"
305                                 && (my $arrref=$kana_to_r{$got})) {
306                         my %chk_english_from=map((to_chk($_,"english")=>1),@{$r->{"english"}});
307                         my $intended;
308                         for my $kana_alias (@$arrref) {
309                                 for my $kana_alias_english (@{$kana_alias->{"english"}}) {
310                                         my $chk_english=to_chk $kana_alias_english,"english" or next;
311                                         next if !$chk_english_from{$chk_english};
312                                         print "intended ----> ".$r->{"orig"}."\n" if !$intended++;
313                                         print "OK if alias -> ".$kana_alias->{"orig"}."\n";
314                                         $ok=1;
315                                         $aliased=1;
316                                 }
317                         }
318                 }
319                 print "-------------> ".word_out($want)."\n" if !$aliased;
320                 if (!$ok) {
321                         if ($exam_to eq "english" && $got=~/\S/ && $substr) {
322                                 my $key;
323                                 do {
324                                         print STDERR "Is your answer correct? [y/n] ";
325                                         sub restore
326                                         {
327                                                 Term::ReadKey::ReadMode(0);
328                                         }
329                                         local $SIG{"__DIE__"}=\&restore;
330                                         Term::ReadKey::ReadMode(4);
331                                         $key=Term::ReadKey::ReadKey(0);
332                                         print STDERR "\n";      # no echo
333                                         restore();
334                                         die if $key eq "\x03";  # ctrl-c
335                                         $ok=1 if $key eq "y";
336                                         $ok=0 if $key eq "n";
337                                 } while (!defined $ok);
338                         }
339                 }
340                 $ok||=0;
341                 if (!$aliased) {
342                         result $r,$exam_from,$ok,"write"=>1;
343                         print STDERR "result: ".($ok ? "ok" : "WRONG");
344                         schedule $r,$exam_from;
345                         print "\n";
346                 }
347                 print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok;
348         }
349 }
350
351 # FIXME: balanced $exam_from:
352 # my $exam_prob_sum=0;
353 # $exam_prob_sum+=$_->{"prob"} for values(%exam);
354 # {
355 #       my $rand=int rand $exam_prob_sum;
356 #       my $sum=0;
357 #       keys(%exam);
358 #       while (my($from,$hashref)=each(%exam)) {
359 #               $sum+=$hashref->{"prob"};
360 #               next if $rand>=$sum;
361 #               $exam_from=$from;
362 #               last;
363 #       }
364 #       die "INTERNAL" if !$exam_from;
365 # }
366
367 edict_read $opt_edict;
368
369 do { wrong_read(); exit; } if $opt_wrong;
370
371 log_read $opt_log;
372 reschedule $opt_debug>=2;       # init
373
374 exam();