From c8f94771b5de8eba10e174a63a8d5c778dff065e Mon Sep 17 00:00:00 2001 From: lace <> Date: Wed, 14 Jul 2010 11:13:07 +0000 Subject: [PATCH] idnes-foto-old1 --- public_html/cgi-bin/idnes-foto | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/public_html/cgi-bin/idnes-foto b/public_html/cgi-bin/idnes-foto index eaa15c9..dfdbd96 100755 --- a/public_html/cgi-bin/idnes-foto +++ b/public_html/cgi-bin/idnes-foto @@ -2,6 +2,7 @@ use strict; use warnings; require LWP::Simple; +require HTTP::Date; require URI::Escape; $|=1; @@ -18,8 +19,13 @@ else { } if ($ENV{"GATEWAY_INTERFACE"}) { + my $future=HTTP::Date::time2str(2000000000); + my $past =HTTP::Date::time2str(1000000000); print <<"EOH"; Content-type: text/html; charset=windows-1250 +Cache-Control: public +Expires: $future +Last-Modified: $past EOH } @@ -33,7 +39,7 @@ print <<"EOH"; EOH -$BASE=~m{^http://\w+\Q.idnes.cz/foto.asp?\E(?:r=\w+&c=\w+|c=\w+&r=\w+)$} or $BASE=""; +$BASE=~m{^http://\w+\Q.idnes.cz/foto.asp?\E(?:r=[-\w]+&c=[-\w]+|c=\w+&r=\w+)$} or $BASE=""; if ($BASE) { my $pageno=1; my %seen; @@ -42,25 +48,30 @@ PAGES: for (;;) { my $pageurl=$BASE.'&strana='.$pageno; my $page=LWP::Simple::get($pageurl) or die $pageurl; my $did=0; - while ($page=~m{ - + EOH - my $infourl=$BASE.'&styl=zoom&foto='.$base; - my $info=LWP::Simple::get($infourl) or die $infourl; - $info=~m{

[^<]*

} or die "No text found: $infourl"; - my $p_text=$&; - print <<"EOH" if !$seen_p_text{$p_text}++; -$p_text + my $text_last=keys(%seen_p_text);; + for my $text ( + $info=~m{

([^<>]*)

}, + $info=~m{

([^<>]*)

([^<>]*)

Autor:\s+(.*?)(?:, iDNES.cz)?

}s, + ) { + next if !$text; + print <<"EOH" if !$seen_p_text{$text}++; +

$text

EOH + } + warn "No text found: $infourl" if $text_last==keys(%seen_p_text); } die $pageurl if !$did; $pageno++; -- 1.8.3.1