-git: checkout: +--ignore-other-worktrees
[nethome.git] / public_html / cgi-bin / idnes-foto
index 2cd8e73..6ead9ce 100755 (executable)
@@ -41,7 +41,30 @@ print <<"EOH";
        <input type="submit">
 </form>
 EOH
-$BASE=~m{^http://\w+\Q.idnes.cz/foto.asp?\E(?:r=[-_\w]+&c=[-_\w]+|c=[-_\w]+&r=[-_\w]+)$} or $BASE="";
+$BASE=~m{^http://\w+\Q.idnes.cz/foto.asp\Ex?[?](?:r=[-_\w]+&c=[-_\w]+|c=[-_\w]+&r=[-_\w]+)$} or $BASE="";
+
+my $ua=LWP::UserAgent->new();
+$ua->env_proxy();
+$ua->requests_redirectable([]);
+
+#sub webget($)
+#{
+#  my($url)=@_;
+#
+#  my $resp=$ua->get($url);
+#  if ($resp->is_redirect()) {
+#    my $relurl=$resp->header("Location");
+#    $relurl=~m{^foto.*[?]} or die;
+#    $url=~s{[^/]*$}{};
+#    $url.=$relurl;
+#    warn "Redirect to: $url\n";
+#    $resp=$ua->get($url);
+#  }
+#  $resp->is_success() or die $url;
+#  return $resp->decoded_content();
+#}
+
 if ($BASE) {
        my $pageno=1;
        my %seen;
@@ -50,7 +73,43 @@ PAGES:       for (;;) {
                my $pageurl=$BASE.'&strana='.$pageno;
                my $page=LWP::Simple::get($pageurl) or die $pageurl;
                my $did=0;
-               while ($page=~m{<img src="http://(?:\Qi.idnes.cz\E/\d{2}/\d{3}|\Qimgs.idnes.cz\E/\w+)(?:/\w+)?/([-.\w]+.jpg)" }gi) {
+               my $aspx=0;
+               while ($page=~m{<a href="([?]c=[^<>"]*&amp;foto=[^<>"&]*(?:&amp;inframe=1)?)"(?: target="foto-iframe")?>}gi) {
+                       my $relurl=$1;
+                       $relurl=~s{&amp;}{&}g;
+                       my $frameurl=$BASE;
+                       $frameurl=~s{[?].*}{};
+                       $frameurl.=$relurl;
+                       my $frame=LWP::Simple::get($frameurl) or die $frameurl;
+                       if ($frame=~m{<iframe name="foto-iframe" id="foto-iframe" [^<>]* src="([?]c=[^<>"]*&amp;inframe=1&amp;[^<>"]*foto=[^<>"]*)">}) {
+                               $relurl=$1;
+                               $relurl=~s{&amp;}{&}g;
+                               $frameurl=~s{[?].*}{};
+                               $frameurl.=$relurl;
+                               $frame=LWP::Simple::get($frameurl) or die $frameurl;
+                       }
+                       if ($frame!~m{<img\s+id="fotka"\s+alt="([^"]*)"\s+src="(http://\w+\Q.idnes.cz/\E[\d/]+)/(?:gal|cl6h|cl6)/([^<>"]+)" }is) {
+die $frame;
+                               last if !$did;
+                               die "No aspx image found: $frameurl";
+                       }
+                       $did=1;
+                       $aspx=1;
+                       my $text=$1;
+                       my $img_src="$2/org/$3";
+                       if (!LWP::Simple::head($img_src)) {
+                               $img_src="$2/maxi/$3";
+                               die $img_src if !LWP::Simple::head($img_src);
+                       }
+                       print <<"EOH";
+<hr>
+<img src="$img_src" border="0">
+EOH
+                       print <<"EOH" if !$seen_p_text{$text}++;
+<p>$text</p>
+EOH
+               }
+               while (!$aspx && $page=~m{<img src="http://(?:\Qi.idnes.cz\E/\d{2}/\d{3}|\Qimgs.idnes.cz\E/\w+)(?:/\w+)?/([-.\w]+.jpg)" }gi) {
                        $did=1;
                        my($base)=($1);
                        last PAGES if $seen{$base}++;
@@ -81,6 +140,7 @@ EOH
                        warn "No text found: $infourl" if !$hit;
                        }
                die $pageurl if !$did;
+               last if $aspx;
                $pageno++;
                }
        }