use strict;
use warnings;
require LWP::Simple;
+require HTTP::Date;
require URI::Escape;
$|=1;
}
if ($ENV{"GATEWAY_INTERFACE"}) {
+ my $future=HTTP::Date::time2str(2000000000);
+ my $past =HTTP::Date::time2str(1000000000);
print <<"EOH";
Content-type: text/html; charset=windows-1250
+Cache-Control: public
+Expires: $future
+Last-Modified: $past
EOH
}
<input type="submit">
</form>
EOH
-$BASE=~m{^http://\w+\Q.idnes.cz/foto.asp?\E(?:r=\w+&c=\w+|c=\w+&r=\w+)$} or $BASE="";
+$BASE=~m{^http://\w+\Q.idnes.cz/foto.asp?\E(?:r=[-\w]+&c=[-\w]+|c=\w+&r=\w+)$} or $BASE="";
if ($BASE) {
my $pageno=1;
my %seen;
my $pageurl=$BASE.'&strana='.$pageno;
my $page=LWP::Simple::get($pageurl) or die $pageurl;
my $did=0;
- while ($page=~m{<img src="(http://(?:\Qi.idnes.cz\E/\d{2}/\d{3}|\Qimgs.idnes.cz\E/\w+))(/midi)?/([-\w]+.jpg)" }gi) {
+ while ($page=~m{<img src="http://(?:\Qi.idnes.cz\E/\d{2}/\d{3}|\Qimgs.idnes.cz\E/\w+)(?:/\w+)?/([-.\w]+.jpg)" }gi) {
$did=1;
- my($start,$mid,$base)=($1,$2,$3);
+ my($base)=($1);
last PAGES if $seen{$base}++;
- # FIXME: Sometimes "/maxi" does not exist.
- $mid&&="/maxi";
- $base=~s/_1M.JPG$/_V.JPG/;
- my $maxi=$start.($mid||"")."/".$base;
+ my $infourl=$BASE.'&styl=zoom&foto='.$base;
+ my $info=LWP::Simple::get($infourl) or die $infourl;
+ $info=~m{<img src="(http://(?:\Qi.idnes.cz\E/\d{2}/\d{3}|\Qimgs.idnes.cz\E/\w+)(?:/\w+)?/[-.\w]+.jpg)" }i
+ or die "No image found: $infourl";
+ my $img_src=$1;
print <<"EOH";
<hr>
-<img src="$maxi" border="0">
+<img src="$img_src" border="0">
EOH
- my $infourl=$BASE.'&styl=zoom&foto='.$base;
- my $info=LWP::Simple::get($infourl) or die $infourl;
- $info=~m{<p>[^<]*</p>} or die "No text found: $infourl";
- my $p_text=$&;
- print <<"EOH" if !$seen_p_text{$p_text}++;
-$p_text
+ my $text_last=keys(%seen_p_text);;
+ for my $text (
+ $info=~m{<p>([^<>]*)</p>},
+ $info=~m{<div class="text"><!--google_ad_section_start--><h4>([^<>]*)</h4><p>([^<>]*)<!--google_ad_section_end--></p><p>Autor:\s+(.*?)(?:, <a target="_blank" href="http://www.idnes.cz">iDNES.cz</a>)?</p></div>}s,
+ ) {
+ next if !$text;
+ print <<"EOH" if !$seen_p_text{$text}++;
+<p>$text</p>
EOH
+ }
+ warn "No text found: $infourl" if $text_last==keys(%seen_p_text);
}
die $pageurl if !$did;
$pageno++;