From: lace <> Date: Mon, 19 Dec 2005 07:30:46 +0000 (+0000) Subject: +Microsoft files downloader/extractor/.captivemodid.xml builders. X-Git-Tag: bp_HEAD~53 X-Git-Url: http://git.jankratochvil.net/?a=commitdiff_plain;h=9ee0cebcce71f1abb729a0a1f0f1157cedb1518c;hp=e8c5249a31c2ce2578e39ff3d1fb3a903ea95d17;p=captive.git +Microsoft files downloader/extractor/.captivemodid.xml builders. --- diff --git a/src/install/acquire/Makefile.am b/src/install/acquire/Makefile.am index 7f97717..d55c791 100644 --- a/src/install/acquire/Makefile.am +++ b/src/install/acquire/Makefile.am @@ -1,6 +1,6 @@ # $Id$ # automake source for drivers acquiring installation utility Makefile -# Copyright (C) 2003 Jan Kratochvil +# Copyright (C) 2003-2005 Jan Kratochvil # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -67,7 +67,9 @@ CLEANFILES+=gnome_vfs_read_entire_file.c EXTRA_DIST+= \ $(GLADE_IN) \ - cabextract/.vimrc + cabextract/.vimrc \ + captivemodid-list.pl \ + captivemodid-print.pl captive_install_acquire-ui-gnome-interface.$(OBJEXT): ui-gnome-callbacks.h diff --git a/src/install/acquire/captivemodid-list.pl b/src/install/acquire/captivemodid-list.pl new file mode 100755 index 0000000..2a1f22b --- /dev/null +++ b/src/install/acquire/captivemodid-list.pl @@ -0,0 +1,139 @@ +#! /usr/bin/perl +# +# $Id$ +# Extract the list of files to download from Microsoft. +# Copyright (C) 2005 Jan Kratochvil +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; exactly version 2 of June 1991 is required +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +use strict; +use warnings; + +require LWP::Simple; +use URI::Escape; + + +sub unamp($) +{ +my($ref)=@_; + + die "non-amp amp: $$ref" if $$ref=~/&(?!amp;)/; + $$ref=~s/&/&/go; +} + +print STDERR "Downloading index: "; +my $found_total; +my @found; +my $found_processed=0; +my $next='results.aspx?freetext=&productID=4C937A02-BAE0-4317-A1A9-0C56CD979D05&categoryId=7&period=&sortCriteria=date&nr=50&DisplayLang=en&type=a'; +my $BASE='http://www.microsoft.com/downloads/'; +my $VALIDATION_CODE=$ARGV[0] or die "ARGV[0]=VALIDATION_CODE"; + +while ($next) { + my $page=LWP::Simple::get($BASE.$next) or die; + my($page_total,$page_first,$page_last)=($page=~m{(\d+) results found; results (\d+)-(\d+) shown.}) or die; + die if !$page_total; + $found_total=$page_total if !$found_total; + $found_total==$page_total or die; + $found_processed+1==$page_first or die "(found_processed+1)(".($found_processed+1)."!=page_first($page_first)"; + $page_last>=$page_first or die; + if ($page_last<$found_total) { + $page_last==$page_first+50-1 or die; + ($next)=($page=~m{Next\s+>}) or die; + unamp \$next; + $next=~/^info.aspx[?]/ or die; + } + else { + $page_last==$found_total or die; + $next=undef(); + } + + while ($page=~m{

[^<]*(?: ]*>]*\balt="Genuine Windows download"[^>]*>)?

}g) { + my($url)=($1); + $found_processed++; + unamp \$url; + push @found,$url; + } + print STDERR "."; + $found_processed==$page_last or die "found_processed(".($found_processed).")!=page_last($page_last)"; + } +print STDERR " found: ".(0+@found)."\n"; + +my @download; +print STDERR "Downloading product pages: "; +for my $found (@found) { + my $page=LWP::Simple::get($BASE.$found) or die; + my($lang)=($page=~m{}); + # + print STDERR "+"; + my @stage2; + if (!$lang) { + @stage2=$found; + } + else { + while ($lang=~s{}{}) { + my($short,$long)=($1,$2); + (my $found_lang=$found)=~s/((?:\b|%26)DisplayLang(?:=|%3d))en\b/$1$short/ or die $found; + push @stage2,$found_lang; + } + !$lang or die; + die if !@stage2; + } + for my $stage2 (@stage2) { + my $delim='&'; + $delim='%26' if $stage2=~/%26DisplayLang/i; + $stage2.="${delim}Hash=$VALIDATION_CODE"; + $stage2=$BASE.$stage2; + my $page=LWP::Simple::get($stage2) or die $stage2; + while ($page=~m{

Object moved to }) { + $stage2=$1; + unamp \$stage2; + $page=LWP::Simple::get($stage2) or die $stage2; + } + my $download; + while ($page=~m{window.open(?:.)'(http://download.microsoft.com/download/[^']*)',null,}g) { + die if $download; + $download=$1; + } + if ($download) { + push @download,$download; + print STDERR "."; + } + elsif ($page=~m{}) { + my $files=$1; + my $download_orig=@download; + while ($files=~m{[^<>]*}g) { + my $url=$1; + unamp \$url; + push @download,$url; + } + die if $download_orig==@download; + print STDERR "_"; + } + elsif ($page=~m{
}) { + print STDERR "!"; + } + else { + die $stage2; + } + } + } +for (@download) { + next if !/^info.aspx[?].*&u=(http%3a[^&]*)$/; + $_=uri_unescape $1; + } +@download=sort keys(%{{ map(($_=>1),@download) }}); +print STDERR " found downloads: ".(0+@download)."\n"; +print "$_\n" for @download; diff --git a/src/install/acquire/captivemodid-print.pl b/src/install/acquire/captivemodid-print.pl new file mode 100755 index 0000000..3b3988e --- /dev/null +++ b/src/install/acquire/captivemodid-print.pl @@ -0,0 +1,461 @@ +#! /usr/bin/perl +# +# $Id$ +# Scan downloaded Microsoft files to build the .captivemodid.xml file core. +# Copyright (C) 2005 Jan Kratochvil +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; exactly version 2 of June 1991 is required +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +use strict; +use warnings; + +use Cwd; +use Carp qw(confess cluck); +use Digest::MD5 qw(md5_hex); +use File::Remove qw(remove); +use Getopt::Long; +use Data::Dumper; + + +my $tmp="/tmp/captivemodid"; +my $out; + + +my $opt_quiet; +die if !GetOptions( + "q|quiet+",\$opt_quiet, + ); + + +our $dirnew=$ARGV[0]; +do { $dirnew="."; warn "Defaulting ARGV[0] to: $dirnew"; } if !$dirnew; +our $out=$ARGV[1]; +warn "No output md5 directory, not copying." if !$out; + + +my %type=( + "ntoskrnl.exe"=>"Kernel", + "ntkrnlpa.exe"=>"Kernel PA", + "ntkrnlmp.exe"=>"Kernel SMP", + "ntkrpamp.exe"=>"Kernel SMP PA", + "cdfs.sys" =>"CD-ROM/iso-9660", + "fastfat.sys" =>"FastFAT/vfat", + "ntfs.sys" =>"NTFS", + ); + +my %lang=( + "ar"=>"Arabic", + "hk"=>"Chinese (Hong Kong SAR)", + "cn"=>"Chinese (Simplified)", + "tw"=>"Chinese (Traditional)", + "cs"=>"Czech", + "da"=>"Danish", + "nl"=>"Dutch", + "en"=>"English", + "fi"=>"Finnish", + "fr"=>"French", + "de"=>"German", + "el"=>"Greek", + "he"=>"Hebrew", + "hu"=>"Hungarian", + "it"=>"Italian", + "ja"=>"Japanese", + "ko"=>"Korean", + "no"=>"Norwegian", + "pl"=>"Polish", + "br"=>"Portugese (Brazil)", + "pt"=>"Portugese (Portugal)", + "ru"=>"Russian", + "es"=>"Spanish", + "sv"=>"Swedish", + "tr"=>"Turkish", + "ara"=>"Arabic", + "chh"=>"Chinese (Hong Kong SAR)", + "chs"=>"Chinese (Simplified)", + "cht"=>"Chinese (Traditional)", + "csy"=>"Czech", + "dan"=>"Danish", + "nld"=>"Dutch", + "enu"=>"English", + "fin"=>"Finnish", + "fra"=>"French", + "deu"=>"German", + "ell"=>"Greek", + "heb"=>"Hebrew", + "hun"=>"Hungarian", + "ita"=>"Italian", + "jpn"=>"Japanese", + "kor"=>"Korean", + "nor"=>"Norwegian", + "plk"=>"Polish", + "ptb"=>"Portugese (Brazil)", + "ptg"=>"Portugese (Portugal)", + "rus"=>"Russian", + "esn"=>"Spanish", + "sve"=>"Swedish", + "trk"=>"Turkish", + ); + + +sub name_to_lang($) +{ +my($name)=@_; + + $name=~tr/-/_/; + $name=lc $name; + my $r; + while (my($subst,$long)=each(%lang)) { + next if $name!~/[^a-z]$subst[^a-z]/; +# FIXME + cluck "$name: $subst" if $r; + $r=$long; + } +# FIXME + cluck $name if !$r; + return $r; +} + +my $ver_prefix="5.1.2600."; +my @stack_out; +my $STDERR_needs_eol; +my %md5sum_printed_global; + +sub stack0_flush() +{ + return if $opt_quiet; + # FIXME: Proper nesting, not just these 2 levels: + my %md5sum_printed_local; + while (my $stack=shift @stack_out) { + for my $stacki (0..$#$stack) { + my $this=$stack->[$stacki]; + next if $md5sum_printed_local{$this->{"md5"}}++; + my $dupe_global=$md5sum_printed_global{$this->{"md5"}}++; + my $print=""; + $print.=<<"HERE"; + +HERE + $print.=<<"HERE" if !$dupe_global; +{"lang"} ]} $ver_prefix@{[ $this->{"ver"} ]} @{[ $this->{"this_name"} ]}" /> +HERE + $print=~s/^/"\t" x (1+$stacki)/egm; + print STDERR "\n" if $STDERR_needs_eol; + $STDERR_needs_eol=0; + print $print; + } + } + print "\n"; +} + +our @stack; +my %md5sum_stored; +my $last_stack0_filename; + +sub check($$) +{ +my($filename_unused,$basename_orig)=@_; + + return if !$type{lc $basename_orig}; + my $final_type=$basename_orig; + my $final_name=$type{$final_type} or confess; + $final_type="ntoskrnl.exe" if $final_type=~/^nt.*[.]exe$/; + (my $basename_orig_=$basename_orig)=~s/.$/_/; + (my $basename0=$stack[0]->{"filename"})=~s{^.*/}{} or confess; + stack0_flush() if $last_stack0_filename && $last_stack0_filename ne $stack[0]->{"filename"}; + $last_stack0_filename=$stack[0]->{"filename"}; + for my $stacki (0..$#stack) { + my $this=$stack[$stacki]; + my $filename=$this->{"filename"} or confess; + my $relname=$this->{"relname"} or confess; + (my $basename=$filename)=~s{^.*/}{} or confess; + my $expanded=$stack[$#stack]->{"filename"}; + my $this_name; + my $type; + if ($stacki==$#stack-1 && $basename eq $basename_orig_) { + $this->{"type"} eq "cabinet" or confess; + $type=$this->{"type"} or confess; + $this_name="$final_name Cabinet"; + } + elsif ($stacki<$#stack) { + $type=$this->{"type"} or confess; + $this_name="Cabinet"; + } + else { + $type=$final_type or confess; + $this_name=$final_name; + } + my $length=(stat $filename)[7] or confess; + my $dist=$stack[0]->{"filename"}; + $dist=~s{^.*/}{} or confess; + my $lang=name_to_lang $basename0; + local *F; + open F,$expanded or confess; + my $buf=""; + my $buf2; + my $ver; + while (my $got=read F,$buf2,0x1000) { + confess $expanded if !defined $got; + last if !$got; + confess $expanded if $got<0; + confess $expanded if length($buf2)!=$got; + $buf.=$buf2; + $ver=$buf; + $ver=~s/\x00\x00+/!/g; + $ver=~tr/\x00//d; + $ver=~tr/!/\x00/; + last if $ver=~s/^.*\x00\Q$ver_prefix\E(\d+)[ \x00].*$/$1/s; + $ver=undef(); + $buf=substr($buf,-0x100); + } + confess $expanded if !$ver; + close F or confess; + $ver=~/^\d+$/ or confess "$expanded: $ver"; + my $pri=sprintf "510%04u00",$ver; + $pri+=90; + $pri-=10 if $lang ne "English"; + $pri-=20 if $basename_orig=~/^ntkrnlpa/; + $pri-=40 if $basename_orig=~/^ntkrnlmp/; + $pri-=60 if $basename_orig=~/^ntkrpamp/; + local *F; + open F,$filename or confess; + my $md5obj=Digest::MD5->new(); + $md5obj->addfile(*F); + close F or confess; + my $md5sum=lc $md5obj->hexdigest(); + spawn("cp -p '$filename' '$out/$md5sum'") if !$md5sum_stored{$md5sum}++ && $out && $stacki==$#stack; + + my %new=( + %$this, + "type"=>$type, + "length"=>$length, + "priority"=>$pri, + "md5"=>$md5sum, + "dist"=>$dist, + "lang"=>$lang, + "ver"=>$ver, + "this_name"=>$this_name, + ); + # Highest priority/version of all the files in the cabinet: + for (qw(ver priority)) { + next if !$this->{$_}; + $new{$_}=$this->{$_} if $new{$_}<$this->{$_}; + delete $this->{$_} if $this->{$_}<$new{$_}; + } + do { cluck Dumper($this,\%new) if $new{$_} ne $this->{$_}; } for keys(%$this); + %$this=%new; + } + push @stack_out,[@stack]; +} + +sub spawn($) +{ +my($command)=@_; + + my $code=system "($command)".' >&2'; + confess "$code: $command" if $code; +} + +sub rm_rf($) +{ +my($dir)=@_; + + return if !-e $dir; + # Do not: Can't remove directory xyzzy: Directory not empty + # -r--r--r-- xyzzy + # remove \1,$dir or confess "$dir: $!"; + spawn "rm -rf '$dir'"; +} + +sub mkdir_checked +{ +my(@dirs)=@_; + + for (@dirs) { + mkdir $_ or confess "$_: $!"; + } +} + + +our $depth=-1; + +sub prep() +{ + confess if $dirnew; + $dirnew="$tmp/$depth"; + chdir "/" or confess; + rm_rf $dirnew; + mkdir_checked $dirnew; + chdir $dirnew or confess "chdir: $dirnew"; + return $dirnew; +} + +sub relname($) +{ +my($filename)=@_; + + return $filename if $filename=~s{^\Q$tmp\E/\d+/}{}; + (my $basename=$filename)=~s{^.*/}{} or confess; + return $basename; +} + +my %unknown; +sub process(); +sub process() +{ + my $dirname=$dirnew or confess; + $dirnew=undef(); + local $depth=$depth+1; + my @stack_orig=@stack; + + local *FIND; + open FIND,"find '$dirname' -type f|sort|" or confess $dirname; + local $_; + while () { + next if $_ eq ""; + if (!$depth) { + print STDERR "."; + $STDERR_needs_eol=1; + } + chomp; + my $filename=$_; + $filename=~m{^/} or confess $filename; + (my $basename=$filename)=~s{^.*/}{} or confess; + -r $filename or do { cluck $filename; system("ls -l '$filename'"); }; + my $this={ + "filename"=>$filename, + "relname"=>relname($filename), + }; + @stack=(@stack_orig,$this); + + check $filename,$basename; + + local *MIME; + open MIME,"file -b -i '$filename'|" or confess; + my $mime=do { local $/; or confess; }; + close MIME or confess; + chomp $mime; + next if $mime=~m{^text/}; + next if $mime=~m{^image/}; + next if $mime=~m{^audio/}; + next if $mime eq 'application/x-empty'; + next if $mime eq 'application/msaccess'; + + local *ID; + open ID,"file -b '$filename'|" or confess; + my $id=do { local $/; or confess; }; + close ID or confess; + chomp $id; + next if $id eq 'Microsoft Office Document'; + next if $id=~/^PDF document, version 1[.]\d+$/; + next if $id eq 'Rich Text Format data, version 1,'; + next if $id=~/^x86 boot sector, /; + next if $id eq 'data'; + next if $id eq 'MZ executable for MS-DOS'; + next if $id eq 'XML document text'; + next if $id eq 'MS Windows HtmlHelp Data'; + next if $id eq 'MPEG sequence'; + next if $id=~/^MSVC program database ver /; + next if $id eq 'Lotus 1-2-3'; + next if $id eq 'MS Windows Help Data'; + next if $id=~/^Macromedia Flash data, /; + next if $id eq 'Microsoft ASF'; # .wmv + next if $id eq 'GLF_BINARY_LSB_FIRST'; # .hlp + next if $id=~/^Sendmail frozen configuration /; + next if $id eq 'Assembler source'; + next if $id=~/^DBase 3 data file/; + next if $id=~/^Macintosh HFS Extended version /; + next if $id=~/^MPEG ADTS, /; # .txt?,.hlp? + next if $id eq 'MS-DOS executable (COM)'; + next if $id eq 'NE executable for MS Windows 3.x (driver)'; + next if $id eq 'TrueType font data'; + next if $id eq 'Windows NT registry file'; + next if $id eq 'lif file'; # .sig? + next if $id=~/^Infocom game data /; # ._p? + next if $id=~/^Sun disk label /; # .dl_? + next if $id=~/^compiled Java class data, /; + next if $id=~/^Minix filesystem/; # .dl_?,_p? + next if $id eq 'DBase 3 index file'; # .nls? + next if $id=~/^Macintosh MFS data /; # .cp_? + next if $id eq 'ACB archive data'; # .cat? + next if $id eq 'COM executable for MS-DOS'; # .edb? + next if $id=~/^Bio-Rad .PIC Image File/; # ._p? + next if $id eq 'SysEx File - Gulbransen'; # .edb? + + # Do not: + #next if $id=~/^PE executable for MS Windows .* 32-bit$/; # TODO + + if ($id eq 'PE executable for MS Windows (GUI) Intel 80386 32-bit, InnoSetup self-extracting archive') { + # FIXME: http://innounp.sourceforge.net/ + next; + } + if (0 + || $id=~/^Microsoft Cabinet archive data, \d+ byte(?:s)?, \d+ file(?:s)?$/ + || $id eq 'PE executable for MS Windows (GUI) Intel 80386 32-bit, MS CAB-Installer self-extracting archive' + # Catch all unidentified flying cabinets: + || $id=~/^PE executable for MS Windows .* 32-bit$/ + ) { + $this->{"type"}="cabinet"; + prep(); + spawn q{cabextract -q '}.$filename.q{' 2>&1|grep -v ': \(}.join(q{\|}, + q{WARNING; possible [0-9]* extra bytes at end of file.}, + q{no valid cabinets found}, + q{checksum error}, + q{error in CAB data format}, + ).q{\)$';true}; + process(); + next; + } + # non-"32-bit" (amd64 in fact) + next if $id=~/^PE executable for MS Windows/; + if (0 + || $id eq 'PE executable for MS Windows (GUI) Intel 80386 32-bit, ZIP self-extracting archive (WinZip)' + || $id=~/^Zip archive data, / + ) { + prep(); + spawn "unzip -q '$filename'"; + process(); + next; + } + if ($id=~m{^gzip compressed data, was "\S+", from Win/32}) { + prep(); + spawn "gzip -d <'$filename' >'$basename'"; + process(); + next; + } + if ($id eq 'tar archive') { + prep(); + spawn "tar xf '$filename'"; + process(); + next; + } + warn "$filename: $mime - $id" if !$unknown{$mime,$id}++; + } + close FIND or confess; + $dirnew=undef(); +} + + +$dirnew=getcwd()."/".$dirnew if $dirnew!~m{^/}; +$out =getcwd()."/".$out if $out && $out!~m{^/}; +rm_rf $tmp; +mkdir_checked $tmp; +if ($out) { + rm_rf $out; + mkdir_checked $out; + } +process; +stack0_flush() if $last_stack0_filename; +print STDERR "\n" if $STDERR_needs_eol; +$STDERR_needs_eol=0;