X-Git-Url: https://git.jankratochvil.net/?p=nethome.git;a=blobdiff_plain;f=public_html%2Fcgi-bin%2Fidnes-foto;h=2cd8e7325d1828b9d1b25fa9dfb0647bfd026ef4;hp=eaa15c91972731a087093e71359b4394ab9f563b;hb=89ca70a15a92676350073237f862e90b567430d5;hpb=f262b352b1da192913af430249ea764d73398667 diff --git a/public_html/cgi-bin/idnes-foto b/public_html/cgi-bin/idnes-foto index eaa15c9..2cd8e73 100755 --- a/public_html/cgi-bin/idnes-foto +++ b/public_html/cgi-bin/idnes-foto @@ -2,6 +2,7 @@ use strict; use warnings; require LWP::Simple; +require HTTP::Date; require URI::Escape; $|=1; @@ -17,23 +18,30 @@ else { $BASE=$ARGV[0]; } +# Somehow Perl modules started reencoding windows-1250 -> utf-8. + if ($ENV{"GATEWAY_INTERFACE"}) { + my $future=HTTP::Date::time2str(2000000000); + my $past =HTTP::Date::time2str(1000000000); print <<"EOH"; -Content-type: text/html; charset=windows-1250 +Content-type: text/html; charset=utf-8 +Cache-Control: public +Expires: $future +Last-Modified: $past EOH } print <<"EOH"; iDNES foto@{[ (!$BASE ? "" : ": $BASE") ]} - +
EOH -$BASE=~m{^http://\w+\Q.idnes.cz/foto.asp?\E(?:r=\w+&c=\w+|c=\w+&r=\w+)$} or $BASE=""; +$BASE=~m{^http://\w+\Q.idnes.cz/foto.asp?\E(?:r=[-_\w]+&c=[-_\w]+|c=[-_\w]+&r=[-_\w]+)$} or $BASE=""; if ($BASE) { my $pageno=1; my %seen; @@ -42,25 +50,35 @@ PAGES: for (;;) { my $pageurl=$BASE.'&strana='.$pageno; my $page=LWP::Simple::get($pageurl) or die $pageurl; my $did=0; - while ($page=~m{"]+)" }is + or die "No image found: $infourl"; + my $img_src=$1; print <<"EOH";
- + EOH - my $infourl=$BASE.'&styl=zoom&foto='.$base; - my $info=LWP::Simple::get($infourl) or die $infourl; - $info=~m{

[^<]*

} or die "No text found: $infourl"; - my $p_text=$&; - print <<"EOH" if !$seen_p_text{$p_text}++; -$p_text + my $hit; + for my $text ( + $info=~m{(.*?)}s, + $info=~m{

([^<>]*)

}, + $info=~m{

([^<>]*)

([^<>]*)

Autor:\s+(.*?)(?:, iDNES.cz)?

}s, + ) { + next if !$text; + $text=~s/^\s+//s; + $text=~s/\s+$//s; + next if !$text; + $hit++; + print <<"EOH" if !$seen_p_text{$text}++; +

$text

EOH + } + warn "No text found: $infourl" if !$hit; } die $pageurl if !$did; $pageno++;