4 # Scan downloaded Microsoft files to build the .captivemodid.xml file core.
5 # Copyright (C) 2005 Jan Kratochvil <project-captive@jankratochvil.net>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; exactly version 2 of June 1991 is required
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 use Carp qw(confess cluck);
26 use Digest::MD5 qw(md5_hex);
27 use File::Remove qw(remove);
32 my $tmp="/tmp/captivemodid";
38 "q|quiet+",\$opt_quiet,
43 do { $dirnew="."; warn "Defaulting ARGV[0] to: $dirnew"; } if !$dirnew;
45 warn "No output md5 directory, not copying." if !$out;
49 "ntoskrnl.exe"=>"Kernel",
50 "ntkrnlpa.exe"=>"Kernel PA",
51 "ntkrnlmp.exe"=>"Kernel SMP",
52 "ntkrpamp.exe"=>"Kernel SMP PA",
53 "cdfs.sys" =>"CD-ROM/iso-9660",
54 "fastfat.sys" =>"FastFAT/vfat",
60 "hk"=>"Chinese (Hong Kong SAR)",
61 "cn"=>"Chinese (Simplified)",
62 "tw"=>"Chinese (Traditional)",
78 "br"=>"Portugese (Brazil)",
79 "pt"=>"Portugese (Portugal)",
85 "chh"=>"Chinese (Hong Kong SAR)",
86 "chs"=>"Chinese (Simplified)",
87 "cht"=>"Chinese (Traditional)",
103 "ptb"=>"Portugese (Brazil)",
104 "ptg"=>"Portugese (Portugal)",
119 while (my($subst,$long)=each(%lang)) {
120 next if $name!~/[^a-z]$subst[^a-z]/;
122 cluck "$name: $subst" if $r;
130 my @checked_build=qw(
141 for my $subst (@checked_build) {
142 return 1 if $name=~/[^a-z]$subst[^a-z]/;
147 my $ver_prefix="5.1.2600.";
149 my $STDERR_needs_eol;
150 my %md5sum_printed_global;
154 return if $opt_quiet;
155 # FIXME: Proper nesting, not just these 2 levels:
156 my %md5sum_printed_local;
157 while (my $stack=shift @stack_out) {
158 for my $stacki (0..$#$stack) {
159 my $this=$stack->[$stacki];
160 next if $md5sum_printed_local{$this->{"md5"}}++;
161 my $dupe_global=$md5sum_printed_global{$this->{"md5"}}++;
164 <!-- @{[ $this->{"relname"} ]} -->
166 $print.=<<"HERE" if !$dupe_global;
167 <module type="@{[ $this->{"type"} ]}" length="@{[ $this->{"length"} ]}" priority="@{[ $this->{"priority"} ]}" md5="@{[ $this->{"md5"} ]}"
168 id="@{[ $this->{"dist"} ]} @{[ !$this->{"is_debug"} ? "" : "Checked Build "]}@{[ $this->{"lang"} ]} $ver_prefix@{[ $this->{"ver"} ]} @{[ $this->{"this_name"} ]}" />
170 $print=~s/^/"\t" x (1+$stacki)/egm;
171 print STDERR "\n" if $STDERR_needs_eol;
181 my $last_stack0_filename;
185 my($filename_unused,$basename_orig)=@_;
187 return if !$type{lc $basename_orig};
188 my $final_type=$basename_orig;
189 my $final_name=$type{$final_type} or confess;
190 $final_type="ntoskrnl.exe" if $final_type=~/^nt.*[.]exe$/;
191 (my $basename_orig_=$basename_orig)=~s/.$/_/;
192 (my $basename0=$stack[0]->{"filename"})=~s{^.*/}{} or confess;
193 stack0_flush() if $last_stack0_filename && $last_stack0_filename ne $stack[0]->{"filename"};
194 $last_stack0_filename=$stack[0]->{"filename"};
195 for my $stacki (0..$#stack) {
196 my $this=$stack[$stacki];
197 my $filename=$this->{"filename"} or confess;
198 my $relname=$this->{"relname"} or confess;
199 (my $basename=$filename)=~s{^.*/}{} or confess;
200 my $expanded=$stack[$#stack]->{"filename"};
203 if ($stacki==$#stack-1 && $basename eq $basename_orig_) {
204 $this->{"type"} eq "cabinet" or confess;
205 $type=$this->{"type"} or confess;
206 $this_name="$final_name Cabinet";
208 elsif ($stacki<$#stack) {
209 $type=$this->{"type"} or confess;
210 $this_name="Cabinet";
213 $type=$final_type or confess;
214 $this_name=$final_name;
216 my $length=(stat $filename)[7] or confess;
217 my $dist=$stack[0]->{"filename"};
218 $dist=~s{^.*/}{} or confess;
219 my $lang=name_to_lang $basename0;
221 open F,$expanded or confess;
225 while (my $got=read F,$buf2,0x1000) {
226 confess $expanded if !defined $got;
228 confess $expanded if $got<0;
229 confess $expanded if length($buf2)!=$got;
232 $ver=~s/\x00\x00+/!/g;
235 last if $ver=~s/^.*\x00\Q$ver_prefix\E(\d+)[ \x00].*$/$1/s;
237 $buf=substr($buf,-0x100);
239 confess $expanded if !$ver;
241 $ver=~/^\d+$/ or confess "$expanded: $ver";
242 my $pri=sprintf "510%04u00",$ver;
244 $pri-=10 if $lang ne "English";
245 $pri-=20 if $basename_orig=~/^ntkrnlpa/;
246 $pri-=40 if $basename_orig=~/^ntkrnlmp/;
247 $pri-=60 if $basename_orig=~/^ntkrpamp/;
248 $pri+=5000000 if my $is_debug=name_is_debug $basename0;
250 open F,$filename or confess;
251 my $md5obj=Digest::MD5->new();
252 $md5obj->addfile(*F);
254 my $md5sum=lc $md5obj->hexdigest();
255 spawn("cp -p '$filename' '$out/$md5sum'") if !$md5sum_stored{$md5sum}++ && $out && $stacki==$#stack;
266 "this_name"=>$this_name,
267 "is_debug"=>$is_debug,
269 # Highest priority/version of all the files in the cabinet:
270 for (qw(ver priority)) {
271 next if !$this->{$_};
272 $new{$_}=$this->{$_} if $new{$_}<$this->{$_};
273 delete $this->{$_} if $this->{$_}<$new{$_};
275 do { cluck Dumper($this,\%new) if $new{$_} ne $this->{$_}; } for keys(%$this);
278 push @stack_out,[@stack];
285 my $code=system "($command)".' >&2';
286 confess "$code: $command" if $code;
294 # Do not: Can't remove directory xyzzy: Directory not empty
296 # remove \1,$dir or confess "$dir: $!";
297 spawn "rm -rf '$dir'";
305 mkdir $_ or confess "$_: $!";
315 $dirnew="$tmp/$depth";
316 chdir "/" or confess;
318 mkdir_checked $dirnew;
319 chdir $dirnew or confess "chdir: $dirnew";
327 return $filename if $filename=~s{^\Q$tmp\E/\d+/}{};
328 (my $basename=$filename)=~s{^.*/}{} or confess;
336 my $dirname=$dirnew or confess;
338 local $depth=$depth+1;
339 my @stack_orig=@stack;
342 open FIND,"find '$dirname' -type f|sort|" or confess $dirname;
352 $filename=~m{^/} or confess $filename;
353 (my $basename=$filename)=~s{^.*/}{} or confess;
354 -r $filename or do { cluck $filename; system("ls -l '$filename'"); };
356 "filename"=>$filename,
357 "relname"=>relname($filename),
359 @stack=(@stack_orig,$this);
361 check $filename,$basename;
364 open MIME,"file -b -i '$filename'|" or confess;
365 my $mime=do { local $/; <MIME> or confess; };
366 close MIME or confess;
368 next if $mime=~m{^text/};
369 next if $mime=~m{^image/};
370 next if $mime=~m{^audio/};
371 next if $mime eq 'application/x-empty';
372 next if $mime eq 'application/msaccess';
375 open ID,"file -b '$filename'|" or confess;
376 my $id=do { local $/; <ID> or confess; };
379 next if $id eq 'Microsoft Office Document';
380 next if $id=~/^PDF document, version 1[.]\d+$/;
381 next if $id eq 'Rich Text Format data, version 1,';
382 next if $id=~/^x86 boot sector, /;
383 next if $id eq 'data';
384 next if $id eq 'MZ executable for MS-DOS';
385 next if $id eq 'XML document text';
386 next if $id eq 'MS Windows HtmlHelp Data';
387 next if $id eq 'MPEG sequence';
388 next if $id=~/^MSVC program database ver /;
389 next if $id eq 'Lotus 1-2-3';
390 next if $id eq 'MS Windows Help Data';
391 next if $id=~/^Macromedia Flash data, /;
392 next if $id eq 'Microsoft ASF'; # .wmv
393 next if $id eq 'GLF_BINARY_LSB_FIRST'; # .hlp
394 next if $id=~/^Sendmail frozen configuration /;
395 next if $id eq 'Assembler source';
396 next if $id=~/^DBase 3 data file/;
397 next if $id=~/^Macintosh HFS Extended version /;
398 next if $id=~/^MPEG ADTS, /; # .txt?,.hlp?
399 next if $id eq 'MS-DOS executable (COM)';
400 next if $id eq 'NE executable for MS Windows 3.x (driver)';
401 next if $id eq 'TrueType font data';
402 next if $id eq 'Windows NT registry file';
403 next if $id eq 'lif file'; # .sig?
404 next if $id=~/^Infocom game data /; # ._p?
405 next if $id=~/^Sun disk label /; # .dl_?
406 next if $id=~/^compiled Java class data, /;
407 next if $id=~/^Minix filesystem/; # .dl_?,_p?
408 next if $id eq 'DBase 3 index file'; # .nls?
409 next if $id=~/^Macintosh MFS data /; # .cp_?
410 next if $id eq 'ACB archive data'; # .cat?
411 next if $id eq 'COM executable for MS-DOS'; # .edb?
412 next if $id=~/^Bio-Rad .PIC Image File/; # ._p?
413 next if $id eq 'SysEx File - Gulbransen'; # .edb?
416 #next if $id=~/^PE executable for MS Windows .* 32-bit$/; # TODO
418 if ($id eq 'PE executable for MS Windows (GUI) Intel 80386 32-bit, InnoSetup self-extracting archive') {
419 # FIXME: http://innounp.sourceforge.net/
423 || $id=~/^Microsoft Cabinet archive data, \d+ byte(?:s)?, \d+ file(?:s)?$/
424 || $id eq 'PE executable for MS Windows (GUI) Intel 80386 32-bit, MS CAB-Installer self-extracting archive'
425 # Catch all unidentified flying cabinets:
426 || $id=~/^PE executable for MS Windows .* 32-bit$/
428 $this->{"type"}="cabinet";
430 spawn q{cabextract -q '}.$filename.q{' 2>&1|grep -v ': \(}.join(q{\|},
431 q{WARNING; possible [0-9]* extra bytes at end of file.},
432 q{no valid cabinets found},
434 q{error in CAB data format},
439 # non-"32-bit" (amd64 in fact)
440 next if $id=~/^PE executable for MS Windows/;
442 || $id eq 'PE executable for MS Windows (GUI) Intel 80386 32-bit, ZIP self-extracting archive (WinZip)'
443 || $id=~/^Zip archive data, /
446 spawn "unzip -q '$filename'";
450 if ($id=~m{^gzip compressed data, was "\S+", from Win/32}) {
452 spawn "gzip -d <'$filename' >'$basename'";
456 if ($id eq 'tar archive') {
458 spawn "tar xf '$filename'";
462 warn "$filename: $mime - $id" if !$unknown{$mime,$id}++;
464 close FIND or confess;
469 $dirnew=getcwd()."/".$dirnew if $dirnew!~m{^/};
470 $out =getcwd()."/".$out if $out && $out!~m{^/};
478 stack0_flush() if $last_stack0_filename;
479 print STDERR "\n" if $STDERR_needs_eol;