Implemented "pri" priorities.
[nethome.git] / src / jaxam
1 #! /usr/bin/perl
2 #
3 # $Id$
4
5
6 use strict;
7 use warnings;
8 require Term::ReadKey;
9 require Tree::Binary::Search;
10 use Getopt::Long;
11 require Time::Piece::ISO;
12 require POSIX;
13 use utf8;
14 use encoding "utf8";
15 binmode STDERR,":utf8"; # only STDIN and STDOUT covered by: use encoding "utf8";
16
17
18 our $opt_debug=1;
19 my $opt_edict="$0.edict";
20 my $opt_log;
21 my $opt_wrong;
22 my $opt_errors_wait_key;
23 my $opt_tee;
24 my $opt_fast;
25
26 $Getopt::Long::ignorecase=0;
27 $Getopt::Long::bundling=1;
28 die if !GetOptions(
29                 "d|debug+"         =>\$opt_debug,
30                 "e|edict=s"        =>\$opt_edict,
31                 "l|log=s"          =>\$opt_log,
32                 "w|wrong"          =>\$opt_wrong,               # Filter the input lines.
33                   "errors-wait-key"=>\$opt_errors_wait_key,     # Only for -w|--wrong.
34                   "tee!"           =>\$opt_tee,                 # Output all the read lines. Only for -w|--wrong.
35                   "fast!"          =>\$opt_fast,
36                 );
37 $opt_log||=$opt_edict.".log";
38
39 my %exam=(
40         "kana"   =>{"to"=>"english"},   # "prob"=>1,
41         "english"=>{"to"=>"kana"},      # "prob"=>1,
42         );
43 my %ok=(
44         0=>-1,
45         1=>+4,
46         );
47
48 my $T;
49 my @R;
50 my %identify;
51 my %kana_to_r;
52
53 sub schedule($$%)
54 {
55         my($r,$exam_from,%args)=@_;
56
57         return if $r->{"orig"}=~/\Q(laceno)\E/;
58         my $time=$args{"time"}||time();
59         my $sum=0;
60         for my $result (@{$r->{"result"}}) {
61                 next if $result->{"exam_from"} ne $exam_from;
62                 my $age=$time-$result->{"time"};
63                 $age=0 if $age<0;
64                 # $age:   0..  big importance
65                 # $age: big..small importance
66                 # $ok: 0..mistake
67                 # $ok: 1..correct
68                 # $sum:   big..far
69                 # $sum: small..soon
70                 # 1/log(2+$age): big..  big importance
71                 # 1/log(2+$age):   0..small importance
72                 $sum+=1/log(2+$age)*$ok{$result->{"ok"}};
73                 }
74         my $scale=1;
75         if ($r->{"pri"}) {
76                 my $abspri=$r->{"pri"};
77                 $abspri=1/(-$abspri) if $abspri<0;
78                 $abspri=2 if $abspri<2;
79                 $scale=log($abspri)*($r->{"pri"}<0 ? -1 : +1);
80         }
81         my $arr=[];
82         if ($T->exists($sum)) {
83                 $arr=$T->select($sum)->{"arr"};
84                 $T->delete($sum);
85         }
86         splice @$arr,int(rand(@$arr+1)),0,{"r"=>$r,"exam_from"=>$exam_from};
87         $T->insert($sum=>{"key"=>$sum,"arr"=>$arr});
88         print STDERR "; schedule=$sum" if $opt_debug;
89 }
90
91 sub reschedule(;$)
92 {
93         my($opt_debug_force)=@_;
94
95         $T=Tree::Binary::Search->new();
96         $T->useNumericComparison();
97         my $time=time();
98         for my $r (@R) {
99                 local $opt_debug=0 if $opt_debug<2;
100                 local $opt_debug=$opt_debug_force if defined $opt_debug_force;
101                 print STDERR $r->{"kana"}."\t".$r->{"english"} if $opt_debug;
102                 for my $exam_from (keys(%exam)) {
103                         schedule $r,$exam_from,"time"=>$time;
104                 }
105                 print STDERR "\n" if $opt_debug;
106         }
107 }
108
109 sub best()
110 {
111         return if $T->isEmpty();
112         my $min=$T->min()->{"key"};
113         my $arr=$T->select($min)->{"arr"};
114         $T->delete($min);
115         my $r=shift @$arr;
116         $T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr;
117         if ($opt_debug) {
118                 print STDERR "min=$min";
119                 print STDERR " of ".(1+@$arr) if @$arr;
120                 print STDERR "\n";
121         }
122         return $r;
123 }
124
125 sub identify($%)
126 {
127         my($r,%args)=@_;
128
129         return join " ",
130                         map(($r->{$_}||()),qw(
131                                         kanji
132                                         kana
133                                         )),
134                         (!$args{"at"} ? () : '@'.$r->{"line"});
135 }
136
137 sub result($$$%)
138 {
139         my($r,$exam_from,$ok,%args)=@_;
140
141         if ($args{"write"}) {
142                 local *LOG_APPEND;
143                 open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!";
144                 print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())
145                                 ." exam_from=$exam_from ok=$ok: ".identify($r)."\n";
146                 close LOG_APPEND or die "close \"$opt_log\": $!";
147         }
148
149         push @{$r->{"result"}},{
150                 "time"=>time(),
151                 "exam_from"=>$exam_from,
152                 "ok"=>$ok,
153                 };
154 }
155
156 my $errors_wait_key_count;
157 END {
158         if ($errors_wait_key_count && $opt_errors_wait_key) {
159                 print STDERR "Errors occured. Press any key to continue...";
160                 <STDIN>;
161         }
162 }
163
164 sub edict_line_parse(;$)
165 {
166         my($s,$line)=@_;
167
168         $s=$_ if !defined $s;
169         $line=$. if !defined $line;
170         chomp $s;
171         local $_=$s;
172         s/;.*//;
173         return if /^\s*$/;
174         #〒 [ゆうびん] /(n) mail/postal service/
175         #Tシャツ /T-shirt/
176         m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$}
177                         or do { warn "Unparsable: $s\n"; $errors_wait_key_count++; return; };
178         my $r;
179         my $rest;
180         ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3);
181         if (!$r->{"kana"}) {
182                 $r->{"kana"}=$r->{"kanji"};
183                 delete $r->{"kanji"};
184         }
185         $rest=~s{^(|.*/)[(]pri([+-]\d+)[)]/}{$1};
186         $r->{"pri"}=$2 if $2;
187         while ($rest) {
188                 $rest=~s{^\s*([^/]+?)\s*/}{}
189                                 or do { warn "Unparsable english: $s\n"; $errors_wait_key_count++; return; };
190                 my($english)=($1);
191                 push @{$r->{"english"}},$english;
192         }
193         $r->{"line"}=$line;
194         $r->{"orig"}=$s;
195         return $r;
196 }
197
198 sub to_chk($$)
199 {
200         my($s,$type)=@_;
201
202         local $_=$s;
203         $_=lc $_;
204         s/\bto\b//g if $type eq "english";
205         s/\s//g;
206         s/[(][^)]*[)]//g;
207         return $_;
208 }
209
210 sub edict_read($)
211 {
212         my($pathname)=@_;
213
214         local *EDICT;
215         open EDICT,"<:utf8",$pathname or die "open \"$pathname\": $!";
216         my %check=(
217                 #"kanji"=>{},   # exists...
218                 #"kana"=>{},    # exists...
219                 "identify"=>{},
220                 );
221         while (<EDICT>) {
222                 my $r=edict_line_parse() or next;
223                 push @R,$r;
224                 push @{$kana_to_r{$r->{"kana"}}},$r;
225                 keys(%check);
226                 while (my($field,$hashref)=each(%check)) {
227                         my $val;
228                         $val=identify $r if $field eq "identify";
229                         $val||=$r->{$field};
230                         next if $val eq "悪い にくい";     # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net>
231                         my $origvalref=\$hashref->{$val};
232                         warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1)
233                                         if $$origvalref;
234                         $$origvalref=$r;
235                 }
236         }
237         close EDICT or die "close \"$pathname\": $!";
238         %identify=map((identify($_)=>$_),@R);
239 }
240
241 sub log_read($)
242 {
243         my($pathname)=@_;
244
245         local *LOG_READ;
246         if (!open LOG_READ,"<:utf8",$pathname) {
247                 warn "open \"$pathname\": $!";
248                 return;
249         }
250         while (<LOG_READ>) {
251                 chomp;
252                 # 2002-04-25T21:17:52+0900 ok=1: identify($r)
253                 my($year, $month, $day,   $hour,  $minute,$second,$zonepm,$zonehour,$zoneminute,$exam_from,$ok,$identify)=
254                 /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) exam_from=(\w+) ok=(\d+): (.+)$/
255                                 or do { warn "Unrecognized line: $_\n"; next; };
256                 $year-=1900;
257                 $month--;
258                 my $localtime=POSIX::mktime($second,$minute,$hour,$day,$month,$year);
259                 $localtime or do { warn "Unparsable time at line: $_\n"; next; };
260                 my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute);
261                 my $r=$identify{$identify}
262                                 or do { warn "Word not found from line: $_\n"; next; };
263                 result $r,$exam_from,$ok,"write"=>0;
264         }
265         close LOG_READ or die "close \"$pathname\": $!";
266 }
267
268 sub wrong_read()
269 {
270         while (<>) {
271                 print if $opt_tee;
272                 chomp;
273                 my $w=edict_line_parse() or next;
274                 my $r=$identify{identify $w};
275                 do { warn "'wrong word' not found for the line: $_\n"; $errors_wait_key_count++; next; } if !$r;
276                 result $r,"kana",0,"write"=>1;
277         }
278 }
279
280 sub word_out($)
281 {
282         my($s)=@_;
283
284         return $s if !ref $s;
285         return join("",map("$_/",@$s));
286 }
287
288 sub exam()
289 {
290         my @exam=keys(%exam);
291         for (;;) {
292                 reschedule() if !$opt_fast;
293                 my $besthashref=best();
294                 my $r=$besthashref->{"r"};
295                 my $exam_from=$besthashref->{"exam_from"};
296                 my $exam_to=$exam{$exam_from}{"to"};
297                 print word_out($r->{$exam_from}).": ";
298                 my $got=<STDIN>;
299                 chomp $got;
300                 my $want=$r->{$exam_to};
301                 my $got_chk=to_chk($got,$exam_to);
302                 my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want));
303                 my $ok;
304                 my $substr;
305                 for my $want_chk (@want_chk) {
306                         next if !$want_chk;     # discard patterns like: /(P)/
307                         $ok=1 if $got_chk eq $want_chk;
308                         $substr=1 if $want_chk=~/\Q$got_chk\E/;
309                 }
310                 my $aliased;
311                 if (!$ok && $exam_from eq "english" && $exam_to eq "kana"
312                                 && (my $arrref=$kana_to_r{$got})) {
313                         my %chk_english_from=map((to_chk($_,"english")=>1),@{$r->{"english"}});
314                         my $intended;
315                         for my $kana_alias (@$arrref) {
316                                 for my $kana_alias_english (@{$kana_alias->{"english"}}) {
317                                         my $chk_english=to_chk $kana_alias_english,"english" or next;
318                                         next if !$chk_english_from{$chk_english};
319                                         print "intended ----> ".$r->{"orig"}."\n" if !$intended++;
320                                         print "OK if alias -> ".$kana_alias->{"orig"}."\n";
321                                         $ok=1;
322                                         $aliased=1;
323                                 }
324                         }
325                 }
326                 print "-------------> ".word_out($want)."\n" if !$aliased;
327                 if (!$ok) {
328                         if ($exam_to eq "english" && $got=~/\S/ && $substr) {
329                                 my $key;
330                                 do {
331                                         print STDERR "Is your answer correct? [y/n] ";
332                                         sub restore
333                                         {
334                                                 Term::ReadKey::ReadMode(0);
335                                         }
336                                         local $SIG{"__DIE__"}=\&restore;
337                                         Term::ReadKey::ReadMode(4);
338                                         $key=Term::ReadKey::ReadKey(0);
339                                         print STDERR "\n";      # no echo
340                                         restore();
341                                         die if $key eq "\x03";  # ctrl-c
342                                         $ok=1 if $key eq "y";
343                                         $ok=0 if $key eq "n";
344                                 } while (!defined $ok);
345                         }
346                 }
347                 $ok||=0;
348                 if (!$aliased) {
349                         result $r,$exam_from,$ok,"write"=>1;
350                         print STDERR "result: ".($ok ? "ok" : "WRONG");
351                         schedule $r,$exam_from;
352                         print "\n";
353                 }
354                 print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok;
355         }
356 }
357
358 # FIXME: balanced $exam_from:
359 # my $exam_prob_sum=0;
360 # $exam_prob_sum+=$_->{"prob"} for values(%exam);
361 # {
362 #       my $rand=int rand $exam_prob_sum;
363 #       my $sum=0;
364 #       keys(%exam);
365 #       while (my($from,$hashref)=each(%exam)) {
366 #               $sum+=$hashref->{"prob"};
367 #               next if $rand>=$sum;
368 #               $exam_from=$from;
369 #               last;
370 #       }
371 #       die "INTERNAL" if !$exam_from;
372 # }
373
374 edict_read $opt_edict;
375
376 do { wrong_read(); exit; } if $opt_wrong;
377
378 log_read $opt_log;
379 reschedule $opt_debug>=2;       # init
380
381 exam();