#! /usr/bin/perl use strict; use warnings; require LWP::Simple; require HTTP::Date; require URI::Escape; $|=1; my $BASE; if (exists $ENV{"QUERY_STRING"}) { $BASE=($ENV{"QUERY_STRING"}=~m{^url=([^&]*)})[0]; $BASE=URI::Escape::uri_unescape($BASE); 0==@ARGV or die "ARGV count != 0"; $BASE||=""; } else { 1==@ARGV or die "ARGV count != 1"; $BASE=$ARGV[0]; } # Somehow Perl modules started reencoding windows-1250 -> utf-8. if ($ENV{"GATEWAY_INTERFACE"}) { my $future=HTTP::Date::time2str(2000000000); my $past =HTTP::Date::time2str(1000000000); print <<"EOH"; Content-type: text/html; charset=utf-8 Cache-Control: public Expires: $future Last-Modified: $past EOH } print <<"EOH"; iDNES foto@{[ (!$BASE ? "" : ": $BASE") ]}
EOH $BASE=~m{^http://\w+\Q.idnes.cz/foto.asp?\E(?:r=[-_\w]+&c=[-_\w]+|c=[-_\w]+&r=[-_\w]+)$} or $BASE=""; if ($BASE) { my $pageno=1; my %seen; my %seen_p_text; PAGES: for (;;) { my $pageurl=$BASE.'&strana='.$pageno; my $page=LWP::Simple::get($pageurl) or die $pageurl; my $did=0; while ($page=~m{"]+)" }is or die "No image found: $infourl"; my $img_src=$1; print <<"EOH";
EOH my $hit; for my $text ( $info=~m{(.*?)}s, $info=~m{

([^<>]*)

}, $info=~m{

([^<>]*)

([^<>]*)

Autor:\s+(.*?)(?:, iDNES.cz)?

}s, ) { next if !$text; $text=~s/^\s+//s; $text=~s/\s+$//s; next if !$text; $hit++; print <<"EOH" if !$seen_p_text{$text}++;

$text

EOH } warn "No text found: $infourl" if !$hit; } die $pageurl if !$did; $pageno++; } } print <<"EOH";

EOF

EOH