mocksetup: /unsafe+/hdd -> /quad
[nethome.git] / src / jaxam
1 #! /usr/bin/perl
2 #
3 # $Id$
4
5
6 use strict;
7 use warnings;
8 require Term::ReadKey;
9 require Tree::Binary::Search;
10 use Getopt::Long;
11 require Time::Piece::ISO;
12 require POSIX;
13 use utf8;
14 use encoding "utf8";
15 binmode STDERR,":utf8"; # only STDIN and STDOUT covered by: use encoding "utf8";
16
17
18 our $opt_debug=1;
19 my $opt_edict="$0.edict";
20 my $opt_log;
21 my $opt_wrong;
22 my $opt_errors_wait_key;
23 my $opt_tee;
24 my $opt_fast;
25 my $opt_cycle=10;
26
27 $Getopt::Long::ignorecase=0;
28 $Getopt::Long::bundling=1;
29 die if !GetOptions(
30                 "d|debug+"         =>\$opt_debug,
31                 "e|edict=s"        =>\$opt_edict,
32                 "l|log=s"          =>\$opt_log,
33                 "w|wrong"          =>\$opt_wrong,               # Filter the input lines.
34                   "errors-wait-key"=>\$opt_errors_wait_key,     # Only for -w|--wrong.
35                   "tee!"           =>\$opt_tee,                 # Output all the read lines. Only for -w|--wrong.
36                   "fast!"          =>\$opt_fast,
37                   "cycle=s"        =>\$opt_cycle,
38                 );
39 $opt_log||=$opt_edict.".log";
40
41 my %exam=(
42         "kana"   =>{"to"=>"english"},   # "prob"=>1,
43         "english"=>{"to"=>"kana"},      # "prob"=>1,
44         );
45 my %ok=(
46         0=>-1,
47         1=>+4,
48         );
49
50 my $T;
51 my @R;
52 my %identify;
53 my %kana_to_r;
54 my $result_count=0;
55
56 sub siglog($)
57 {
58         my($a)=@_;
59
60         return 0 if !$a;
61         my $sig=($a>0)-($a<0);
62         return $sig*log(abs $a);
63 }
64
65 sub schedule($$%)
66 {
67         my($r,$exam_from,%args)=@_;
68
69         return if $r->{"orig"}=~/\Q(laceno)\E/;
70         my $time=$args{"time"}||time();
71         my $sum=0;
72         for my $result (@{$r->{"result"}}) {
73                 next if $result->{"exam_from"} ne $exam_from;
74                 my $age=$time-$result->{"time"};
75                 $age=0 if $age<0;
76                 # $age:   0..  big importance
77                 # $age: big..small importance
78                 # $ok: 0..mistake
79                 # $ok: 1..correct
80                 # $sum:   big..far
81                 # $sum: small..soon
82                 # 1/log(2+$age): big..  big importance
83                 # 1/log(2+$age):   0..small importance
84                 $sum+=1/log(2+$age)*$ok{$result->{"ok"}};
85                 if (!$result->{"ok"}) {
86                         my $count_diff=$result_count-$result->{"count"};
87                         $count_diff-=$opt_cycle;
88                         $sum-=siglog($count_diff);
89                         }
90                 }
91         my $scale=1;
92         if ($r->{"pri"}) {
93                 my $abspri=$r->{"pri"};
94                 $abspri=1/(-$abspri) if $abspri<0;
95                 $abspri=2 if $abspri<2;
96                 $scale=log($abspri)*($r->{"pri"}<0 ? -1 : +1);
97         }
98         my $arr=[];
99         if ($T->exists($sum)) {
100                 $arr=$T->select($sum)->{"arr"};
101                 $T->delete($sum);
102         }
103         splice @$arr,int(rand(@$arr+1)),0,{"r"=>$r,"exam_from"=>$exam_from};
104         $T->insert($sum=>{"key"=>$sum,"arr"=>$arr});
105         print STDERR "; schedule=$sum" if $opt_debug;
106 }
107
108 sub reschedule(;$)
109 {
110         my($opt_debug_force)=@_;
111
112         $T=Tree::Binary::Search->new();
113         $T->useNumericComparison();
114         my $time=time();
115         for my $r (@R) {
116                 local $opt_debug=0 if $opt_debug<2;
117                 local $opt_debug=$opt_debug_force if defined $opt_debug_force;
118                 print STDERR $r->{"kana"}."\t".$r->{"english"} if $opt_debug;
119                 for my $exam_from (keys(%exam)) {
120                         schedule $r,$exam_from,"time"=>$time;
121                 }
122                 print STDERR "\n" if $opt_debug;
123         }
124 }
125
126 sub best()
127 {
128         return if $T->isEmpty();
129         my $min=$T->min()->{"key"};
130         my $arr=$T->select($min)->{"arr"};
131         $T->delete($min);
132         my $r=shift @$arr;
133         $T->insert($min=>{"key"=>$min,"arr"=>$arr}) if @$arr;
134         if ($opt_debug) {
135                 print STDERR "min=$min";
136                 print STDERR " of ".(1+@$arr) if @$arr;
137                 print STDERR "\n";
138         }
139         return $r;
140 }
141
142 sub identify($%)
143 {
144         my($r,%args)=@_;
145
146         return join " ",
147                         map(($r->{$_}||()),qw(
148                                         kanji
149                                         kana
150                                         )),
151                         (!$args{"at"} ? () : '@'.$r->{"line"});
152 }
153
154 sub result($$$%)
155 {
156         my($r,$exam_from,$ok,%args)=@_;
157
158         if ($args{"write"}) {
159                 local *LOG_APPEND;
160                 open LOG_APPEND,">>:utf8",$opt_log or die "append \"$opt_log\": $!";
161                 print LOG_APPEND Time::Piece::ISO::localtime().POSIX::strftime("%z",localtime())
162                                 ." exam_from=$exam_from ok=$ok: ".identify($r)."\n";
163                 close LOG_APPEND or die "close \"$opt_log\": $!";
164         }
165         $args{"time"}||=time();
166         push @{$r->{"result"}},{
167                 "time"=>$args{"time"},
168                 "exam_from"=>$exam_from,
169                 "ok"=>$ok,
170                 "count"=>$result_count++,
171                 };
172 }
173
174 my $errors_wait_key_count;
175 END {
176         if ($errors_wait_key_count && $opt_errors_wait_key) {
177                 print STDERR "Errors occured. Press any key to continue...";
178                 <STDIN>;
179         }
180 }
181
182 sub edict_line_parse(;$)
183 {
184         my($s,$line)=@_;
185
186         $s=$_ if !defined $s;
187         $line=$. if !defined $line;
188         chomp $s;
189         local $_=$s;
190         s/;.*//;
191         return if /^\s*$/;
192         #〒 [ゆうびん] /(n) mail/postal service/
193         #Tシャツ /T-shirt/
194         m{^([^[/\s]+)(?:\s+[[]([^[/\s]+)[]])?\s+/(.*)$}
195                         or do { warn "Unparsable: $s\n"; $errors_wait_key_count++; return; };
196         my $r;
197         my $rest;
198         ($r->{"kanji"},$r->{"kana"},$rest)=($1,$2,$3);
199         if (!$r->{"kana"}) {
200                 $r->{"kana"}=$r->{"kanji"};
201                 delete $r->{"kanji"};
202         }
203         $rest=~s{^(|.*/)[(]pri([+-]\d+)[)]/}{$r->{"pri"}=$2;$1;}e;
204         while ($rest) {
205                 $rest=~s{^\s*([^/]+?)\s*/}{}
206                                 or do { warn "Unparsable english: $s\n"; $errors_wait_key_count++; return; };
207                 my($english)=($1);
208                 push @{$r->{"english"}},$english;
209         }
210         $r->{"line"}=$line;
211         $r->{"orig"}=$s;
212         return $r;
213 }
214
215 sub to_chk($$)
216 {
217         my($s,$type)=@_;
218
219         local $_=$s;
220         $_=lc $_;
221         s/\bto\b//g if $type eq "english";
222         s/\s//g;
223         s/[(][^)]*[)]//g;
224         return $_;
225 }
226
227 sub edict_read($)
228 {
229         my($pathname)=@_;
230
231         local *EDICT;
232         open EDICT,"<:utf8",$pathname or die "open \"$pathname\": $!";
233         my %check=(
234                 #"kanji"=>{},   # exists...
235                 #"kana"=>{},    # exists...
236                 "identify"=>{},
237                 );
238         while (<EDICT>) {
239                 my $r=edict_line_parse() or next;
240                 push @R,$r;
241                 push @{$kana_to_r{$r->{"kana"}}},$r;
242                 keys(%check);
243                 while (my($field,$hashref)=each(%check)) {
244                         my $val;
245                         $val=identify $r if $field eq "identify";
246                         $val||=$r->{$field};
247                         next if $val eq "悪い にくい";     # fixup: <20050522015353.GA31030@kashome.dyn.jankratochvil.net>
248                         my $origvalref=\$hashref->{$val};
249                         warn "field{$field} check duplicity: ".identify($r,"at"=>1)." vs. ".identify($$origvalref,"at"=>1)
250                                         if $$origvalref;
251                         $$origvalref=$r;
252                 }
253         }
254         close EDICT or die "close \"$pathname\": $!";
255         %identify=map((identify($_)=>$_),@R);
256 }
257
258 sub log_read($)
259 {
260         my($pathname)=@_;
261
262         local *LOG_READ;
263         if (!open LOG_READ,"<:utf8",$pathname) {
264                 warn "open \"$pathname\": $!";
265                 return;
266         }
267         while (<LOG_READ>) {
268                 chomp;
269                 # 2002-04-25T21:17:52+0900 ok=1: identify($r)
270                 my($year, $month, $day,   $hour,  $minute,$second,$zonepm,$zonehour,$zoneminute,$exam_from,$ok,$identify)=
271                 /^(\d{4})-(\d{1,2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})([+-])(\d{2})(\d{2}) exam_from=(\w+) ok=(\d+): (.+)$/
272                                 or do { warn "Unrecognized line: $_\n"; next; };
273                 $year-=1900;
274                 $month--;
275                 my $localtime=POSIX::mktime($second,$minute,$hour,$day,$month,$year);
276                 $localtime or do { warn "Unparsable time at line: $_\n"; next; };
277                 my $gmtime=$localtime-($zonepm."1")*($zonehour*60+$zoneminute);
278                 my $r=$identify{$identify}
279                                 or do { warn "Word not found from line: $_\n"; next; };
280                 result $r,$exam_from,$ok,"write"=>0,"time"=>$gmtime;
281         }
282         close LOG_READ or die "close \"$pathname\": $!";
283 }
284
285 sub wrong_read()
286 {
287         while (<>) {
288                 print if $opt_tee;
289                 chomp;
290                 my $w=edict_line_parse() or next;
291                 my $r=$identify{identify $w};
292                 do { warn "'wrong word' not found for the line: $_\n"; $errors_wait_key_count++; next; } if !$r;
293                 result $r,"kana",0,"write"=>1;
294         }
295 }
296
297 sub word_out($)
298 {
299         my($s)=@_;
300
301         return $s if !ref $s;
302         return join("",map("$_/",@$s));
303 }
304
305 sub exam()
306 {
307         my @exam=keys(%exam);
308         for (;;) {
309                 reschedule() if !$opt_fast;
310                 my $besthashref=best();
311                 my $r=$besthashref->{"r"};
312                 my $exam_from=$besthashref->{"exam_from"};
313                 my $exam_to=$exam{$exam_from}{"to"};
314                 print word_out($r->{$exam_from}).": ";
315                 my $got=<STDIN>;
316                 chomp $got;
317                 my $want=$r->{$exam_to};
318                 my $got_chk=to_chk($got,$exam_to);
319                 my @want_chk=map(to_chk($_,$exam_to),map((!ref($_)?$_:@$_),$want));
320                 my $ok;
321                 my $substr;
322                 for my $want_chk (@want_chk) {
323                         next if !$want_chk;     # discard patterns like: /(P)/
324                         $ok=1 if $got_chk eq $want_chk;
325                         $substr=1 if $want_chk=~/\Q$got_chk\E/;
326                 }
327                 my $aliased;
328                 if (!$ok && $exam_from eq "english" && $exam_to eq "kana"
329                                 && (my $arrref=$kana_to_r{$got})) {
330                         my %chk_english_from=map((to_chk($_,"english")=>1),@{$r->{"english"}});
331                         my $intended;
332                         for my $kana_alias (@$arrref) {
333                                 for my $kana_alias_english (@{$kana_alias->{"english"}}) {
334                                         my $chk_english=to_chk $kana_alias_english,"english" or next;
335                                         next if !$chk_english_from{$chk_english};
336                                         print "intended ----> ".$r->{"orig"}."\n" if !$intended++;
337                                         print "OK if alias -> ".$kana_alias->{"orig"}."\n";
338                                         $ok=1;
339                                         $aliased=1;
340                                 }
341                         }
342                 }
343                 print "-------------> ".word_out($want)."\n" if !$aliased;
344                 if (!$ok) {
345                         if ($exam_to eq "english" && $got=~/\S/ && $substr) {
346                                 my $key;
347                                 do {
348                                         print STDERR "Is your answer correct? [y/n] ";
349                                         sub restore
350                                         {
351                                                 Term::ReadKey::ReadMode(0);
352                                         }
353                                         local $SIG{"__DIE__"}=\&restore;
354                                         Term::ReadKey::ReadMode(4);
355                                         $key=Term::ReadKey::ReadKey(0);
356                                         print STDERR "\n";      # no echo
357                                         restore();
358                                         die if $key eq "\x03";  # ctrl-c
359                                         $ok=1 if $key eq "y";
360                                         $ok=0 if $key eq "n";
361                                 } while (!defined $ok);
362                         }
363                 }
364                 $ok||=0;
365                 if (!$aliased) {
366                         result $r,$exam_from,$ok,"write"=>1;
367                         print STDERR "result: ".($ok ? "ok" : "WRONG");
368                         schedule $r,$exam_from;
369                         print "\n";
370                 }
371                 print STDERR join(" * ",map("WRONG",1..8))."\n" if !$ok;
372         }
373 }
374
375 # FIXME: balanced $exam_from:
376 # my $exam_prob_sum=0;
377 # $exam_prob_sum+=$_->{"prob"} for values(%exam);
378 # {
379 #       my $rand=int rand $exam_prob_sum;
380 #       my $sum=0;
381 #       keys(%exam);
382 #       while (my($from,$hashref)=each(%exam)) {
383 #               $sum+=$hashref->{"prob"};
384 #               next if $rand>=$sum;
385 #               $exam_from=$from;
386 #               last;
387 #       }
388 #       die "INTERNAL" if !$exam_from;
389 # }
390
391 edict_read $opt_edict;
392
393 do { wrong_read(); exit; } if $opt_wrong;
394
395 log_read $opt_log;
396 reschedule $opt_debug>=2;       # init
397
398 exam();