Fixed "country" o- prefix.
[nethome.git] / src / jaxam
1 #! /usr/bin/perl
2 #
3 # $Id$
4
5
6 use strict;
7 use warnings;
8 require Term::ReadKey;
9 require Tree::Binary::Search;
10 use Getopt::Long;
11 require Time::Piece::ISO;
12 require POSIX;
13 use utf8;
14 use encoding "utf8";
15 binmode STDERR,":utf8"; # only STDIN and STDOUT covered by: use encoding "utf8";
16
17
18 our $opt_debug=1;
19 my $opt_edict="$0.edict";
20 my $opt_log;
21 my $opt_wrong;
22 my $opt_errors_wait_key;
23 my $opt_tee;
24 my $opt_fast;
25
26 $Getopt::Long::ignorecase=0;
27 $Getopt::Long::bundling=1;
28 die if !GetOptions(
29                 "d|debug+"         =>\$opt_debug,
30                 "e|edict=s"        =>\$opt_edict,
31                 "l|log=s"          =>\$opt_log,
32                 "w|wrong"          =>\$opt_wrong,               # Filter the input lines.
33                   "errors-wait-key"=>\$opt_errors_wait_key,     # Only for -w|--wrong.
34                   "tee!"           =>\$opt_tee,                 # Output all the read lines. Only for -w|--wrong.
35                   "fast!"          =>\$opt_fast,
36                 );
37 $opt_log||=$opt_edict.".log";
38
39 my %exam=(
40         "kana"   =>{"to"=>"english"},   # "prob"=>1,
41         "english"=>{"to"=>"kana"},      # "prob"=>1,
42         );
43 my %ok=(
44         0=>-1,
45         1=>+4,
46         );
47
48 my $T;
49 my @R;
50 my %identify;
51 my %kana_to_r;
52
53 sub schedule($$%)
54 {
55         my($r,$exam_from,%args)=@_;
56
57         my $time=$args{"time"}||time();
58         my $sum=0;
59         for my $result (@{$r->{"result"}}) {
60                 next if $result->{"exam_from"} ne $exam_from;
61                 my $age=$time-$result->{"time"};
62                 $age=0 if $age<0;
63                 # $age:   0..  big importance
64                 # $age: big..small importance
65                 # $ok: 0..mistake
66                 # $ok: 1..correct
67                 # $sum:   big..far
68                 # $sum: small..soon
69                 # 1/log(2+$age): big..  big importance
70                 # 1/log(2+$age):   0..small importance
71                 $sum+=1/log(2+$age)*$ok{$result->{"ok"}};
72                 }
73         my $arr=[];
74         if ($T->exists($sum)) {
75                 $arr=$T->select($sum)->{"arr"};
76                 $T->delete($sum);
77         }
78         splice @$arr,int(rand(@$arr+1)),0,{"r"=>$r,"exam_from"=>$exam_from};
79         $T->insert($sum=>{"key"=>$sum,"arr"=>$arr});
80         print STDERR "; schedule=$sum" if $opt_debug;
81 }
82
83 sub reschedule(;$)
84 {
85         my($opt_debug_force)=@_;
86
87         $T=Tree::Binary::Search->new();
88         $T->useNumericComparison();
89         my $time=time();
90         for my $r (@R) {
91                 local $opt_debug=0 if $opt_debug<2;
92                 local $opt_debug=$opt_debug_force if defined $opt_debug_force;
93                 print STDERR $r->{"kana"}."\t".$r->{"english"} if $opt_debug;
94                 for my $exam_from (keys(%exam)) {
95                         schedule $r,$exam_from,"time"=>$time;
96                 }
97                 print STDERR "\n" if $opt_debug;
98         }
99 }
100
101 sub best()
102 {
103         return if $T->isEmpty();
104         my $min=$T->min()->{"key"};
105         my $arr=$T->select($min)->{"arr"};
106         $T->delete($min);
107         my $r=shift @$arr;
108         $T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr;
109         if ($opt_debug) {
110                 print STDERR "min=$min";
111                 print STDERR " of ".(1+@$arr) if @$arr;
112                 print STDERR "\n";
113         }
114         return $r;
115 }
116
117 sub identify($%)
118 {
119         my($r,%args)=@_;
120
121         return join " ",
122                         map(($r->{$_}||()),qw(
123                                         kanji
124                                         kana
125                                         )),
126                         (!$args{"at"} ? () : '@'.$r->{"line"});
127 }
128
129 sub result($$$%)
130 {
131         my($r,$exam_from,$ok,%args)=@_;
132
133         if ($args{"write"}) {
134                 local *LOG_APPEND;
135                 open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!";
136                 print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())
137                                 ." exam_from=$exam_from ok=$ok: ".identify($r)."\n";
138                 close LOG_APPEND or die "close \"$opt_log\": $!";
139         }
140
141         push @{$r->{"result"}},{
142                 "time"=>time(),
143                 "exam_from"=>$exam_from,
144                 "ok"=>$ok,
145                 };
146 }
147
148 my $errors_wait_key_count;
149 END {
150         if ($errors_wait_key_count && $opt_errors_wait_key) {
151                 print STDERR "Errors occured. Press any key to continue...";
152                 <STDIN>;
153         }
154 }
155
156 sub edict_line_parse(;$)
157 {
158         my($s,$line)=@_;
159
160         $s=$_ if !defined $s;
161         $line=$. if !defined $line;
162         chomp $s;
163         local $_=$s;
164         s/;.*//;
165         return if /^\s*$/;
166         #〒 [ゆうびん] /(n) mail/postal service/
167         #Tシャツ /T-shirt/
168         m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$}
169                         or do { warn "Unparsable: $s\n"; $errors_wait_key_count++; return; };
170         my $r;
171         my $rest;
172         ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3);
173         if (!$r->{"kana"}) {
174                 $r->{"kana"}=$r->{"kanji"};
175                 delete $r->{"kanji"};
176         }
177         while ($rest) {
178                 $rest=~s{^\s*([^/]+?)\s*/}{}
179                                 or do { warn "Unparsable english: $s\n"; $errors_wait_key_count++; return; };
180                 my($english)=($1);
181                 push @{$r->{"english"}},$english;
182         }
183         $r->{"line"}=$line;
184         $r->{"orig"}=$s;
185         return $r;
186 }
187
188 sub to_chk($$)
189 {
190         my($s,$type)=@_;
191
192         local $_=$s;
193         $_=lc $_;
194         s/\bto\b//g if $type eq "english";
195         s/\s//g;
196         s/[(][^)]*[)]//g;
197         return $_;
198 }
199
200 sub edict_read($)
201 {
202         my($pathname)=@_;
203
204         local *EDICT;
205         open EDICT,"<:utf8",$pathname or die "open \"$pathname\": $!";
206         my %check=(
207                 #"kanji"=>{},   # exists...
208                 #"kana"=>{},    # exists...
209                 "identify"=>{},
210                 );
211         while (<EDICT>) {
212                 my $r=edict_line_parse() or next;
213                 next if /\Q(laceno)\E/;
214                 push @R,$r;
215                 push @{$kana_to_r{$r->{"kana"}}},$r;
216                 keys(%check);
217                 while (my($field,$hashref)=each(%check)) {
218                         my $val;
219                         $val=identify $r if $field eq "identify";
220                         $val||=$r->{$field};
221                         next if $val eq "悪い にくい";     # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net>
222                         my $origvalref=\$hashref->{$val};
223                         warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1)
224                                         if $$origvalref;
225                         $$origvalref=$r;
226                 }
227         }
228         close EDICT or die "close \"$pathname\": $!";
229         %identify=map((identify($_)=>$_),@R);
230 }
231
232 sub log_read($)
233 {
234         my($pathname)=@_;
235
236         local *LOG_READ;
237         if (!open LOG_READ,"<:utf8",$pathname) {
238                 warn "open \"$pathname\": $!";
239                 return;
240         }
241         while (<LOG_READ>) {
242                 chomp;
243                 # 2002-04-25T21:17:52+0900 ok=1: identify($r)
244                 my($year, $month, $day,   $hour,  $minute,$second,$zonepm,$zonehour,$zoneminute,$exam_from,$ok,$identify)=
245                 /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) exam_from=(\w+) ok=(\d+): (.+)$/
246                                 or do { warn "Unrecognized line: $_\n"; next; };
247                 $year-=1900;
248                 $month--;
249                 my $localtime=POSIX::mktime($second,$minute,$hour,$day,$month,$year);
250                 $localtime or do { warn "Unparsable time at line: $_\n"; next; };
251                 my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute);
252                 my $r=$identify{$identify}
253                                 or do { warn "Word not found from line: $_\n"; next; };
254                 result $r,$exam_from,$ok,"write"=>0;
255         }
256         close LOG_READ or die "close \"$pathname\": $!";
257 }
258
259 sub wrong_read()
260 {
261         while (<>) {
262                 print if $opt_tee;
263                 chomp;
264                 my $w=edict_line_parse() or next;
265                 my $r=$identify{identify $w};
266                 do { warn "'wrong word' not found for the line: $_\n"; $errors_wait_key_count++; next; } if !$r;
267                 result $r,"kana",0,"write"=>1;
268         }
269 }
270
271 sub word_out($)
272 {
273         my($s)=@_;
274
275         return $s if !ref $s;
276         return join("",map("$_/",@$s));
277 }
278
279 sub exam()
280 {
281         my @exam=keys(%exam);
282         for (;;) {
283                 reschedule() if !$opt_fast;
284                 my $besthashref=best();
285                 my $r=$besthashref->{"r"};
286                 my $exam_from=$besthashref->{"exam_from"};
287                 my $exam_to=$exam{$exam_from}{"to"};
288                 print word_out($r->{$exam_from}).": ";
289                 my $got=<STDIN>;
290                 chomp $got;
291                 my $want=$r->{$exam_to};
292                 my $got_chk=to_chk($got,$exam_to);
293                 my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want));
294                 my $ok;
295                 my $substr;
296                 for my $want_chk (@want_chk) {
297                         next if !$want_chk;     # discard patterns like: /(P)/
298                         $ok=1 if $got_chk eq $want_chk;
299                         $substr=1 if $want_chk=~/\Q$got_chk\E/;
300                 }
301                 my $aliased;
302                 if (!$ok && $exam_from eq "english" && $exam_to eq "kana"
303                                 && (my $arrref=$kana_to_r{$got})) {
304                         my %chk_english_from=map((to_chk($_,"english")=>1),@{$r->{"english"}});
305                         my $intended;
306                         for my $kana_alias (@$arrref) {
307                                 for my $kana_alias_english (@{$kana_alias->{"english"}}) {
308                                         my $chk_english=to_chk $kana_alias_english,"english" or next;
309                                         next if !$chk_english_from{$chk_english};
310                                         print "intended ----> ".$r->{"orig"}."\n" if !$intended++;
311                                         print "OK if alias -> ".$kana_alias->{"orig"}."\n";
312                                         $ok=1;
313                                         $aliased=1;
314                                 }
315                         }
316                 }
317                 print "-------------> ".word_out($want)."\n" if !$aliased;
318                 if (!$ok) {
319                         if ($exam_to eq "english" && $got=~/\S/ && $substr) {
320                                 my $key;
321                                 do {
322                                         print STDERR "Is your answer correct? [y/n] ";
323                                         sub restore
324                                         {
325                                                 Term::ReadKey::ReadMode(0);
326                                         }
327                                         local $SIG{"__DIE__"}=\&restore;
328                                         Term::ReadKey::ReadMode(4);
329                                         $key=Term::ReadKey::ReadKey(0);
330                                         print STDERR "\n";      # no echo
331                                         restore();
332                                         die if $key eq "\x03";  # ctrl-c
333                                         $ok=1 if $key eq "y";
334                                         $ok=0 if $key eq "n";
335                                 } while (!defined $ok);
336                         }
337                 }
338                 $ok||=0;
339                 if (!$aliased) {
340                         result $r,$exam_from,$ok,"write"=>1;
341                         print STDERR "result: ".($ok ? "ok" : "WRONG");
342                         schedule $r,$exam_from;
343                         print "\n";
344                 }
345                 print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok;
346         }
347 }
348
349 # FIXME: balanced $exam_from:
350 # my $exam_prob_sum=0;
351 # $exam_prob_sum+=$_->{"prob"} for values(%exam);
352 # {
353 #       my $rand=int rand $exam_prob_sum;
354 #       my $sum=0;
355 #       keys(%exam);
356 #       while (my($from,$hashref)=each(%exam)) {
357 #               $sum+=$hashref->{"prob"};
358 #               next if $rand>=$sum;
359 #               $exam_from=$from;
360 #               last;
361 #       }
362 #       die "INTERNAL" if !$exam_from;
363 # }
364
365 edict_read $opt_edict;
366
367 do { wrong_read(); exit; } if $opt_wrong;
368
369 log_read $opt_log;
370 reschedule $opt_debug>=2;       # init
371
372 exam();