From: lace <> Date: Wed, 14 Jul 2010 11:13:17 +0000 (+0000) Subject: current X-Git-Url: https://git.jankratochvil.net/?p=nethome.git;a=commitdiff_plain;h=89ca70a15a92676350073237f862e90b567430d5;ds=inline current --- diff --git a/public_html/cgi-bin/idnes-foto b/public_html/cgi-bin/idnes-foto index dfdbd96..2cd8e73 100755 --- a/public_html/cgi-bin/idnes-foto +++ b/public_html/cgi-bin/idnes-foto @@ -18,11 +18,13 @@ else { $BASE=$ARGV[0]; } +# Somehow Perl modules started reencoding windows-1250 -> utf-8. + if ($ENV{"GATEWAY_INTERFACE"}) { my $future=HTTP::Date::time2str(2000000000); my $past =HTTP::Date::time2str(1000000000); print <<"EOH"; -Content-type: text/html; charset=windows-1250 +Content-type: text/html; charset=utf-8 Cache-Control: public Expires: $future Last-Modified: $past @@ -32,14 +34,14 @@ EOH print <<"EOH"; iDNES foto@{[ (!$BASE ? "" : ": $BASE") ]} - +
EOH -$BASE=~m{^http://\w+\Q.idnes.cz/foto.asp?\E(?:r=[-\w]+&c=[-\w]+|c=\w+&r=\w+)$} or $BASE=""; +$BASE=~m{^http://\w+\Q.idnes.cz/foto.asp?\E(?:r=[-_\w]+&c=[-_\w]+|c=[-_\w]+&r=[-_\w]+)$} or $BASE=""; if ($BASE) { my $pageno=1; my %seen; @@ -54,24 +56,29 @@ PAGES: for (;;) { last PAGES if $seen{$base}++; my $infourl=$BASE.'&styl=zoom&foto='.$base; my $info=LWP::Simple::get($infourl) or die $infourl; - $info=~m{"]+)" }is or die "No image found: $infourl"; my $img_src=$1; print <<"EOH";
EOH - my $text_last=keys(%seen_p_text);; + my $hit; for my $text ( + $info=~m{(.*?)}s, $info=~m{

([^<>]*)

}, $info=~m{

([^<>]*)

([^<>]*)

Autor:\s+(.*?)(?:, iDNES.cz)?

}s, ) { next if !$text; + $text=~s/^\s+//s; + $text=~s/\s+$//s; + next if !$text; + $hit++; print <<"EOH" if !$seen_p_text{$text}++;

$text

EOH } - warn "No text found: $infourl" if $text_last==keys(%seen_p_text); + warn "No text found: $infourl" if !$hit; } die $pageurl if !$did; $pageno++;