1 #!/bin/perl -- # -*- perl -*-
2 eval 'exec /bin/perl -S $0 "$*"'
5 # FixHTML v1.02 2001/01/07
7 # Copyright (C) 1994, 1995, 1996 by Victor Parada (vparada@inf.utfsm.cl)
8 # Copyright (C) 1997, ..., 2001 by Jan Kratochvil <short@ucw.cz>
10 # Fix broken paths in downloaded HTML trees.
12 # Syntax: fixhtml path host;
14 # Please read this "License Agreement and Lack of Warranty":
15 # - The author of this program is Jan Kratochvil <short@k332.feld.cvut.cz>.
16 # - This program is "Freeware", not "Public Domain".
17 # - This program must be distributed for free, and cannot be included in
18 # commercial packages without prior written permisson from the autor.
19 # - This program cannot be distributed if modified in any way.
20 # - This program can be used by anyone if the copyright and this notice
21 # remains intact in every file.
22 # - If you modify this program, please e-mail patches to the the author.
23 # - This is a Beta version of the program. You have been warned!
24 # - This program is provided ``AS IS'', without any warranty.
25 # - This program can cause huge file transfers and all the related effects.
26 # - This program can fill data disks without notice.
27 # - Neither the author nor UTFSM are responsible for the use of this program.
28 # - Bug reports, comments, questions and suggestions are welcome! But
29 # please check first that you have the latest version!
30 # - The latest version of this program is available at:
33 # If you use this program, please send e-mail to the author.
34 # He will try to notify you of any updates made to it.
44 # Configurable defaults
46 $tmpfile="W$$.tmp"; # temp.
49 dump QUICKSTART if $ARGV[0] eq "--dump";
52 # Process options in command line
54 while ($_=$ARGV[0],/^-/) {
56 /h|\?/ && ($help=1, next);
57 die "$0: Invalid option $_\n";
62 $path=shift || die "Path not specified\n";
63 $host=shift || die "Host not specified\n";
65 chdir $path || die "Cannot find specified path $path!\n";
67 open(FEED,"find -type f|") or die "No files found";
69 FLSKIP: while (<FEED>) {
73 open(I,$curfile) or die "Cannot open $curfile!\n";
75 last IDENT if (m/[^ \t\r\n]/);
78 if ($line!~/^[ -~\r]*$/ || $line!~/\<\s*[!\w]+/i) { #.*\>
85 open(F,">".$tmpfile) or die "Cannot create $tmpfile!\n";
92 while ($line=~/\<([^\<\>]*)([\<\>])(.*)$/s) {
94 $line=($2 eq "<"?"<":"").$3;
97 $fulltag=$1; if ($fulltag=~/^!/)
100 $fulltag=$1; $origtag=$fulltag;
101 $fulltag=~tr/\n\t\r/ /;
102 ($tag,%args)=&split_tag($fulltag);
103 for (sort keys %args) {
105 if (defined($za=rindex($z,"/")))
106 { $zb=substr($z,0,$za+1); $zc=substr($z,$za+1); }
107 else { $zb=""; $zc=$z }
108 ($zc,$zd)=$zc=~m!^([^\?\#]*)(.*)$!;
109 ($h,$n,$p,$f,$e)=(($y=~/^\w+:/)?
110 &split_url($x):("localhost",$port,$zb,$zc,$zd));
111 #print "par=\"$x\", h=\"$h\", n=\"$n\", p=\"$p\", f=\"$f\", e=\"$e\",
112 # host=\"$host\", port=\"$port\"\n";
113 $minone=($e=~s/\?/%3F/g);
114 if (($h eq "localhost" || $h eq $host) && $n eq $port) {
115 @new=split('/',$pth);
117 splice(@nwa,0,1) if (@nwa[0] eq "");
118 splice(@new,0,1) if (@new[0] eq "");
120 if (substr($p,0,1) eq "/") {
123 foreach $dir (@nwa) {
126 elsif ($dir ne "." && $dir ne "")
129 while (defined(@new[0]) && @new[0] eq @nwb[0]) {
133 $newone=join('/',(map("..",@nwb),@new,$f)).$e;
134 if ($newone ne $args{$_}) {
140 #print "minone=".$minone."\n";
143 for (sort keys %args)
144 { print F " ".$_.($args{$_} eq ""?"":"=\"$args{$_}\""); }
150 print F ">" if ($endoftag eq ">");
154 print F "\n" if ($line!~/\n$/);
157 rename($tmpfile,$curfile) || die "Cannot rename $tmpfile to $curfile ($!)!\n";
158 #&set_date($file,$update);
166 local($tagname,$etc,$var,$quote,$arg,$value,@tagargs);
168 ($tagname,$etc)=$fulltag=~/\s*(\S+)\s*(.*)$/;
169 while ($etc!~/^\s*$/) {
170 ($arg,$var,$value,$etc)=$etc=~/\s*([^=\s]+)\s*(=)?\s*(\S+)?(.*)$/;
171 ($value,$etc)=((undef),$value.$etc) unless $var=~/^=$/;
172 ($quote)=$value=~/^([\"\'])/;
174 if ($value!~/.$quote$/) {
175 ($_,$etc)=$etc=~/^([^$quote]*)$quote?(.*)$/;
178 $value=~s/$quote\s*(.*)\s*$quote$/$1/;
182 push(@tagargs,$arg,$value);
184 $tagname="\U$tagname\E";
190 local($host,$port,$path,$file,$extra,$v);
191 ($url,$extra)=$url=~m!^([^\?\#]*)(.*)$!;
192 $v=$url=~m!^http:(//([^:/]*)(:(\d*))?)?((/([^/]+/)*)?([^/]*))?$!;
194 ($host,$port,$path,$file)=(($2?$2:"localhost"),($4?$4:80),$6,$8);
195 ($host,$port,$path,$file,$extra);
201 FixHTML v1.02 2001/01/07 (C) 2001 by Jan Kratochvil <short\@ucw.cz>
203 Fix broken paths in downloaded HTML trees.