Move 'captivemodid' library to libcaptive.
[captive.git] / src / libcaptive / captivemodid / captivemodid-list.pl
1 #! /usr/bin/perl
2 #
3 # $Id$
4 # Extract the list of files to download from Microsoft.
5 # Copyright (C) 2005 Jan Kratochvil <project-captive@jankratochvil.net>
6
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; exactly version 2 of June 1991 is required
10
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU General Public License for more details.
15
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
20
21 use strict;
22 use warnings;
23
24 require LWP::Simple;
25 use URI::Escape;
26
27
28 sub unamp($)
29 {
30 my($ref)=@_;
31
32         die "non-amp amp: $$ref" if $$ref=~/&(?!amp;)/;
33         $$ref=~s/&amp;/&/go;
34 }
35
36 print STDERR "Downloading index: ";
37 my $found_total;
38 my @found;
39 my $found_processed=0;
40 my $next='results.aspx?freetext=&productID=4C937A02-BAE0-4317-A1A9-0C56CD979D05&categoryId=7&period=&sortCriteria=date&nr=50&DisplayLang=en&type=a';
41 my $BASE='http://www.microsoft.com/downloads/';
42 my $VALIDATION_CODE=$ARGV[0] or die "ARGV[0]=VALIDATION_CODE";
43
44 while ($next) {
45         my $page=LWP::Simple::get($BASE.$next) or die;
46         my($page_total,$page_first,$page_last)=($page=~m{<b>(\d+)</b> results found; results <b>(\d+)-(\d+)</b> shown.}) or die;
47         die if !$page_total;
48         $found_total=$page_total if !$found_total;
49         $found_total==$page_total or die;
50         $found_processed+1==$page_first or die "(found_processed+1)(".($found_processed+1)."!=page_first($page_first)";
51         $page_last>=$page_first or die;
52         if ($page_last<$found_total) {
53                 $page_last==$page_first+50-1 or die;
54                 ($next)=($page=~m{<a href="([^"]*)">Next\s+&gt;</a>}) or die;
55                 unamp \$next;
56                 $next=~/^info.aspx[?]/ or die;
57                 }
58         else {
59                 $page_last==$found_total or die;
60                 $next=undef();
61                 }
62
63         while ($page=~m{<p><a href="([^"]*)">[^<]*</a>(?:&nbsp;<a href=[^>]*><img[^>]*\balt="Genuine Windows download"[^>]*></a>)?</p>}g) {
64                 my($url)=($1);
65                 $found_processed++;
66                 unamp \$url;
67                 push @found,$url;
68                 }
69         print STDERR ".";
70         $found_processed==$page_last or die "found_processed(".($found_processed).")!=page_last($page_last)";
71         }
72 print STDERR " found: ".(0+@found)."\n";
73
74 my @download;
75 print STDERR "Downloading product pages: ";
76 for my $found (@found) {
77         my $page=LWP::Simple::get($BASE.$found) or die;
78         my($lang)=($page=~m{<select name="displaylang" [^>]*>((?:<option [^>]*>[^<>]*</option>)+)</select>});
79         # <option value="en" selected>English</option><option value="fr">French</option></select>
80         print STDERR "+";
81         my @stage2;
82         if (!$lang) {
83                 @stage2=$found;
84                 }
85         else {
86                 while ($lang=~s{<option value="([^"]*)"(?: selected)?>([^<>]*)</option>}{}) {
87                         my($short,$long)=($1,$2);
88                         (my $found_lang=$found)=~s/((?:\b|%26)DisplayLang(?:=|%3d))en\b/$1$short/ or die $found;
89                         push @stage2,$found_lang;
90                         }
91                 !$lang or die;
92                 die if !@stage2;
93                 }
94         for my $stage2 (@stage2) {
95                 my $delim='&';
96                 $delim='%26' if $stage2=~/%26DisplayLang/i;
97                 $stage2.="${delim}Hash=$VALIDATION_CODE";
98                 $stage2=$BASE.$stage2;
99                 my $page=LWP::Simple::get($stage2) or die $stage2;
100                 while ($page=~m{<h2>Object moved to <a href="([^"]*)">}) {
101                         $stage2=$1;
102                         unamp \$stage2;
103                         $page=LWP::Simple::get($stage2) or die $stage2;
104                         }
105                 my $download;
106                 while ($page=~m{window.open(?:.)'(http://download.microsoft.com/download/[^']*)',null,}g) {
107                         die if $download;
108                         $download=$1;
109                         }
110                 if ($download) {
111                         push @download,$download;
112                         print STDERR ".";
113                         }
114                 elsif ($page=~m{<table id="multiFileList"(.*?)</table>}) {
115                         my $files=$1;
116                         my $download_orig=@download;
117                         while ($files=~m{<a href="([^"]*)">[^<>]*</a>}g) {
118                                 my $url=$1;
119                                 unamp \$url;
120                                 push @download,$url;
121                                 }
122                         die if $download_orig==@download;
123                         print STDERR "_";
124                         }
125                 elsif ($page=~m{<div id="regsysNotRegistered">}) {
126                         print STDERR "!";
127                         }
128                 else {
129                         die $stage2;
130                         }
131                 }
132         }
133 for (@download) {
134         next if !/^info.aspx[?].*&u=(http%3a[^&]*)$/;
135         $_=uri_unescape $1;
136         }
137 @download=sort keys(%{{ map(($_=>1),@download) }});
138 print STDERR " found downloads: ".(0+@download)."\n";
139 print "$_\n" for @download;