4 # Extract the list of files to download from Microsoft.
5 # Copyright (C) 2005 Jan Kratochvil <project-captive@jankratochvil.net>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; exactly version 2 of June 1991 is required
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32 die "non-amp amp: $$ref" if $$ref=~/&(?!amp;)/;
36 print STDERR "Downloading index: ";
39 my $found_processed=0;
40 my $next='results.aspx?freetext=&productID=4C937A02-BAE0-4317-A1A9-0C56CD979D05&categoryId=7&period=&sortCriteria=date&nr=50&DisplayLang=en&type=a';
41 my $BASE='http://www.microsoft.com/downloads/';
42 my $VALIDATION_CODE=$ARGV[0] or die "ARGV[0]=VALIDATION_CODE";
45 my $page=LWP::Simple::get($BASE.$next) or die;
46 my($page_total,$page_first,$page_last)=($page=~m{<b>(\d+)</b> results found; results <b>(\d+)-(\d+)</b> shown.}) or die;
48 $found_total=$page_total if !$found_total;
49 $found_total==$page_total or die;
50 $found_processed+1==$page_first or die "(found_processed+1)(".($found_processed+1)."!=page_first($page_first)";
51 $page_last>=$page_first or die;
52 if ($page_last<$found_total) {
53 $page_last==$page_first+50-1 or die;
54 ($next)=($page=~m{<a href="([^"]*)">Next\s+></a>}) or die;
56 $next=~/^info.aspx[?]/ or die;
59 $page_last==$found_total or die;
63 while ($page=~m{<p><a href="([^"]*)">[^<]*</a>(?: <a href=[^>]*><img[^>]*\balt="Genuine Windows download"[^>]*></a>)?</p>}g) {
70 $found_processed==$page_last or die "found_processed(".($found_processed).")!=page_last($page_last)";
72 print STDERR " found: ".(0+@found)."\n";
75 print STDERR "Downloading product pages: ";
76 for my $found (@found) {
77 my $page=LWP::Simple::get($BASE.$found) or die;
78 my($lang)=($page=~m{<select name="displaylang" [^>]*>((?:<option [^>]*>[^<>]*</option>)+)</select>});
79 # <option value="en" selected>English</option><option value="fr">French</option></select>
86 while ($lang=~s{<option value="([^"]*)"(?: selected)?>([^<>]*)</option>}{}) {
87 my($short,$long)=($1,$2);
88 (my $found_lang=$found)=~s/((?:\b|%26)DisplayLang(?:=|%3d))en\b/$1$short/ or die $found;
89 push @stage2,$found_lang;
94 for my $stage2 (@stage2) {
96 $delim='%26' if $stage2=~/%26DisplayLang/i;
97 $stage2.="${delim}Hash=$VALIDATION_CODE";
98 $stage2=$BASE.$stage2;
99 my $page=LWP::Simple::get($stage2) or die $stage2;
100 while ($page=~m{<h2>Object moved to <a href="([^"]*)">}) {
103 $page=LWP::Simple::get($stage2) or die $stage2;
106 while ($page=~m{window.open(?:.)'(http://download.microsoft.com/download/[^']*)',null,}g) {
111 push @download,$download;
114 elsif ($page=~m{<table id="multiFileList"(.*?)</table>}) {
116 my $download_orig=@download;
117 while ($files=~m{<a href="([^"]*)">[^<>]*</a>}g) {
122 die if $download_orig==@download;
125 elsif ($page=~m{<div id="regsysNotRegistered">}) {
134 next if !/^info.aspx[?].*&u=(http%3a[^&]*)$/;
137 @download=sort keys(%{{ map(($_=>1),@download) }});
138 print STDERR " found downloads: ".(0+@download)."\n";
139 print "$_\n" for @download;