-git: checkout: +--ignore-other-worktrees
[nethome.git] / public_html / cgi-bin / idnes-foto
1 #! /usr/bin/perl
2 use strict;
3 use warnings;
4 require LWP::Simple;
5 require HTTP::Date;
6 require URI::Escape;
7
8 $|=1;
9 my $BASE;
10 if (exists $ENV{"QUERY_STRING"}) {
11         $BASE=($ENV{"QUERY_STRING"}=~m{^url=([^&]*)})[0];
12         $BASE=URI::Escape::uri_unescape($BASE);
13         0==@ARGV or die "ARGV count != 0";
14         $BASE||="";
15         }
16 else {
17         1==@ARGV or die "ARGV count != 1";
18         $BASE=$ARGV[0];
19         }
20
21 # Somehow Perl modules started reencoding windows-1250 -> utf-8.
22
23 if ($ENV{"GATEWAY_INTERFACE"}) {
24         my $future=HTTP::Date::time2str(2000000000);
25         my $past  =HTTP::Date::time2str(1000000000);
26         print <<"EOH";
27 Content-type: text/html; charset=utf-8
28 Cache-Control: public
29 Expires: $future
30 Last-Modified: $past
31
32 EOH
33         }
34 print <<"EOH";
35 <html><head>
36 <title>iDNES foto@{[ (!$BASE ? "" : ": $BASE") ]}</title>
37 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
38 </head><body>
39 <form action="idnes-foto" method="get">
40         <input type="text" name="url" size="50" value="$BASE">
41         <input type="submit">
42 </form>
43 EOH
44 $BASE=~m{^http://\w+\Q.idnes.cz/foto.asp\Ex?[?](?:r=[-_\w]+&c=[-_\w]+|c=[-_\w]+&r=[-_\w]+)$} or $BASE="";
45
46  
47 my $ua=LWP::UserAgent->new();
48 $ua->env_proxy();
49 $ua->requests_redirectable([]);
50
51 #sub webget($)
52 #{
53 #  my($url)=@_;
54 #
55 #  my $resp=$ua->get($url);
56 #  if ($resp->is_redirect()) {
57 #    my $relurl=$resp->header("Location");
58 #    $relurl=~m{^foto.*[?]} or die;
59 #    $url=~s{[^/]*$}{};
60 #    $url.=$relurl;
61 #    warn "Redirect to: $url\n";
62 #    $resp=$ua->get($url);
63 #  }
64 #  $resp->is_success() or die $url;
65 #  return $resp->decoded_content();
66 #}
67
68 if ($BASE) {
69         my $pageno=1;
70         my %seen;
71         my %seen_p_text;
72 PAGES:  for (;;) {
73                 my $pageurl=$BASE.'&strana='.$pageno;
74                 my $page=LWP::Simple::get($pageurl) or die $pageurl;
75                 my $did=0;
76                 my $aspx=0;
77                 while ($page=~m{<a href="([?]c=[^<>"]*&amp;foto=[^<>"&]*(?:&amp;inframe=1)?)"(?: target="foto-iframe")?>}gi) {
78                         my $relurl=$1;
79                         $relurl=~s{&amp;}{&}g;
80                         my $frameurl=$BASE;
81                         $frameurl=~s{[?].*}{};
82                         $frameurl.=$relurl;
83                         my $frame=LWP::Simple::get($frameurl) or die $frameurl;
84                         if ($frame=~m{<iframe name="foto-iframe" id="foto-iframe" [^<>]* src="([?]c=[^<>"]*&amp;inframe=1&amp;[^<>"]*foto=[^<>"]*)">}) {
85                                 $relurl=$1;
86                                 $relurl=~s{&amp;}{&}g;
87                                 $frameurl=~s{[?].*}{};
88                                 $frameurl.=$relurl;
89                                 $frame=LWP::Simple::get($frameurl) or die $frameurl;
90                         }
91                         if ($frame!~m{<img\s+id="fotka"\s+alt="([^"]*)"\s+src="(http://\w+\Q.idnes.cz/\E[\d/]+)/(?:gal|cl6h|cl6)/([^<>"]+)" }is) {
92 die $frame;
93                                 last if !$did;
94                                 die "No aspx image found: $frameurl";
95                         }
96                         $did=1;
97                         $aspx=1;
98                         my $text=$1;
99                         my $img_src="$2/org/$3";
100                         if (!LWP::Simple::head($img_src)) {
101                                 $img_src="$2/maxi/$3";
102                                 die $img_src if !LWP::Simple::head($img_src);
103                         }
104                         print <<"EOH";
105 <hr>
106 <img src="$img_src" border="0">
107 EOH
108                         print <<"EOH" if !$seen_p_text{$text}++;
109 <p>$text</p>
110 EOH
111                 }
112                 while (!$aspx && $page=~m{<img src="http://(?:\Qi.idnes.cz\E/\d{2}/\d{3}|\Qimgs.idnes.cz\E/\w+)(?:/\w+)?/([-.\w]+.jpg)" }gi) {
113                         $did=1;
114                         my($base)=($1);
115                         last PAGES if $seen{$base}++;
116                         my $infourl=$BASE.'&styl=zoom&foto='.$base;
117                         my $info=LWP::Simple::get($infourl) or die $infourl;
118                         $info=~m{<img\s+id="fotka"\s+src="([^<>"]+)" }is
119                                         or die "No image found: $infourl";
120                         my $img_src=$1;
121                         print <<"EOH";
122 <hr>
123 <img src="$img_src" border="0">
124 EOH
125                         my $hit;
126                         for my $text (
127                                         $info=~m{<span>(.*?)</span>}s,
128                                         $info=~m{<p>([^<>]*)</p>},
129                                         $info=~m{<div class="text"><!--google_ad_section_start--><h4>([^<>]*)</h4><p>([^<>]*)<!--google_ad_section_end--></p><p>Autor:\s+(.*?)(?:, <a target="_blank" href="http://www.idnes.cz">iDNES.cz</a>)?</p></div>}s,
130                                         ) {
131                                 next if !$text;
132                                 $text=~s/^\s+//s;
133                                 $text=~s/\s+$//s;
134                                 next if !$text;
135                                 $hit++;
136                                 print <<"EOH" if !$seen_p_text{$text}++;
137 <p>$text</p>
138 EOH
139                                 }
140                         warn "No text found: $infourl" if !$hit;
141                         }
142                 die $pageurl if !$did;
143                 last if $aspx;
144                 $pageno++;
145                 }
146         }
147 print <<"EOH";
148 <hr>
149 <p>EOF</p>
150 </body></html>
151 EOH