dfdbd9649833afc73a3787675631b56b273568ba
[nethome.git] / public_html / cgi-bin / idnes-foto
1 #! /usr/bin/perl
2 use strict;
3 use warnings;
4 require LWP::Simple;
5 require HTTP::Date;
6 require URI::Escape;
7
8 $|=1;
9 my $BASE;
10 if (exists $ENV{"QUERY_STRING"}) {
11         $BASE=($ENV{"QUERY_STRING"}=~m{^url=([^&]*)})[0];
12         $BASE=URI::Escape::uri_unescape($BASE);
13         0==@ARGV or die "ARGV count != 0";
14         $BASE||="";
15         }
16 else {
17         1==@ARGV or die "ARGV count != 1";
18         $BASE=$ARGV[0];
19         }
20
21 if ($ENV{"GATEWAY_INTERFACE"}) {
22         my $future=HTTP::Date::time2str(2000000000);
23         my $past  =HTTP::Date::time2str(1000000000);
24         print <<"EOH";
25 Content-type: text/html; charset=windows-1250
26 Cache-Control: public
27 Expires: $future
28 Last-Modified: $past
29
30 EOH
31         }
32 print <<"EOH";
33 <html><head>
34 <title>iDNES foto@{[ (!$BASE ? "" : ": $BASE") ]}</title>
35 <meta http-equiv="Content-Type" content="text/html; charset=windows-1250">
36 </head><body>
37 <form action="idnes-foto" method="get">
38         <input type="text" name="url" size="50" value="$BASE">
39         <input type="submit">
40 </form>
41 EOH
42 $BASE=~m{^http://\w+\Q.idnes.cz/foto.asp?\E(?:r=[-\w]+&c=[-\w]+|c=\w+&r=\w+)$} or $BASE="";
43 if ($BASE) {
44         my $pageno=1;
45         my %seen;
46         my %seen_p_text;
47 PAGES:  for (;;) {
48                 my $pageurl=$BASE.'&strana='.$pageno;
49                 my $page=LWP::Simple::get($pageurl) or die $pageurl;
50                 my $did=0;
51                 while ($page=~m{<img src="http://(?:\Qi.idnes.cz\E/\d{2}/\d{3}|\Qimgs.idnes.cz\E/\w+)(?:/\w+)?/([-.\w]+.jpg)" }gi) {
52                         $did=1;
53                         my($base)=($1);
54                         last PAGES if $seen{$base}++;
55                         my $infourl=$BASE.'&styl=zoom&foto='.$base;
56                         my $info=LWP::Simple::get($infourl) or die $infourl;
57                         $info=~m{<img src="(http://(?:\Qi.idnes.cz\E/\d{2}/\d{3}|\Qimgs.idnes.cz\E/\w+)(?:/\w+)?/[-.\w]+.jpg)" }i
58                                         or die "No image found: $infourl";
59                         my $img_src=$1;
60                         print <<"EOH";
61 <hr>
62 <img src="$img_src" border="0">
63 EOH
64                         my $text_last=keys(%seen_p_text);;
65                         for my $text (
66                                         $info=~m{<p>([^<>]*)</p>},
67                                         $info=~m{<div class="text"><!--google_ad_section_start--><h4>([^<>]*)</h4><p>([^<>]*)<!--google_ad_section_end--></p><p>Autor:\s+(.*?)(?:, <a target="_blank" href="http://www.idnes.cz">iDNES.cz</a>)?</p></div>}s,
68                                         ) {
69                                 next if !$text;
70                                 print <<"EOH" if !$seen_p_text{$text}++;
71 <p>$text</p>
72 EOH
73                                 }
74                         warn "No text found: $infourl" if $text_last==keys(%seen_p_text);
75                         }
76                 die $pageurl if !$did;
77                 $pageno++;
78                 }
79         }
80 print <<"EOH";
81 <hr>
82 <p>EOF</p>
83 </body></html>
84 EOH