+#!/bin/perl -- # -*- perl -*-
+eval 'exec /bin/perl -S $0 "$*"'
+ if undef;
+#
+# FixHTML v1.02 2001/01/07
+#
+# Copyright (C) 1994, 1995, 1996 by Victor Parada (vparada@inf.utfsm.cl)
+# Copyright (C) 1997, ..., 2001 by Jan Kratochvil <short@ucw.cz>
+#
+# Fix broken paths in downloaded HTML trees.
+#
+# Syntax: fixhtml path host;
+#
+# Please read this "License Agreement and Lack of Warranty":
+# - The author of this program is Jan Kratochvil <short@k332.feld.cvut.cz>.
+# - This program is "Freeware", not "Public Domain".
+# - This program must be distributed for free, and cannot be included in
+# commercial packages without prior written permisson from the autor.
+# - This program cannot be distributed if modified in any way.
+# - This program can be used by anyone if the copyright and this notice
+# remains intact in every file.
+# - If you modify this program, please e-mail patches to the the author.
+# - This is a Beta version of the program. You have been warned!
+# - This program is provided ``AS IS'', without any warranty.
+# - This program can cause huge file transfers and all the related effects.
+# - This program can fill data disks without notice.
+# - Neither the author nor UTFSM are responsible for the use of this program.
+# - Bug reports, comments, questions and suggestions are welcome! But
+# please check first that you have the latest version!
+# - The latest version of this program is available at:
+# <URL:--->
+#
+# If you use this program, please send e-mail to the author.
+# He will try to notify you of any updates made to it.
+#
+
+# Setup perl defaults
+#
+$fixhtml=$0;
+$0=~s![^/]*/!!g;
+$|=1;
+$[=0;
+
+# Configurable defaults
+#
+$tmpfile="W$$.tmp"; # temp.
+$port=80;
+
+dump QUICKSTART if $ARGV[0] eq "--dump";
+QUICKSTART:
+
+# Process options in command line
+#
+while ($_=$ARGV[0],/^-/) {
+ shift;
+ /h|\?/ && ($help=1, next);
+ die "$0: Invalid option $_\n";
+}
+
+exit &about if $help;
+
+$path=shift || die "Path not specified\n";
+$host=shift || die "Host not specified\n";
+
+chdir $path || die "Cannot find specified path $path!\n";
+
+open(FEED,"find -type f|") or die "No files found";
+
+FLSKIP: while (<FEED>) {
+ s/\n//g;
+ $curfile=$_;
+ print "$curfile - ";
+ open(I,$curfile) or die "Cannot open $curfile!\n";
+ IDENT: while (<I>) {
+ last IDENT if (m/[^ \t\r\n]/);
+ }
+ $line=$_;
+ if ($line!~/^[ -~\r]*$/ || $line!~/\<\s*[!\w]+/i) { #.*\>
+ close(I);
+ print "not HTML\n";
+ next FLSKIP;
+ }
+ print "fixing ... ";
+ seek I,0,SEEK_SET;
+ open(F,">".$tmpfile) or die "Cannot create $tmpfile!\n";
+ $line="";
+ $pth=$curfile;
+ $pth=~s/^.\///;
+ $pth=~s![^/]*$!!;
+ while (<I>) {
+ $line.=$_;
+ while ($line=~/\<([^\<\>]*)([\<\>])(.*)$/s) {
+#print "TAG=$1\n";
+ $line=($2 eq "<"?"<":"").$3;
+ $endoftag=$2;
+ print F $`."<";
+ $fulltag=$1; if ($fulltag=~/^!/)
+ { print F $fulltag; }
+ else {
+ $fulltag=$1; $origtag=$fulltag;
+ $fulltag=~tr/\n\t\r/ /;
+ ($tag,%args)=&split_tag($fulltag);
+ for (sort keys %args) {
+ $x=$y=$z=$args{$_};
+ if (defined($za=rindex($z,"/")))
+ { $zb=substr($z,0,$za+1); $zc=substr($z,$za+1); }
+ else { $zb=""; $zc=$z }
+ ($zc,$zd)=$zc=~m!^([^\?\#]*)(.*)$!;
+ ($h,$n,$p,$f,$e)=(($y=~/^\w+:/)?
+ &split_url($x):("localhost",$port,$zb,$zc,$zd));
+#print "par=\"$x\", h=\"$h\", n=\"$n\", p=\"$p\", f=\"$f\", e=\"$e\",
+# host=\"$host\", port=\"$port\"\n";
+ $minone=($e=~s/\?/%3F/g);
+ if (($h eq "localhost" || $h eq $host) && $n eq $port) {
+ @new=split('/',$pth);
+ @nwa=split('/',$p);
+ splice(@nwa,0,1) if (@nwa[0] eq "");
+ splice(@new,0,1) if (@new[0] eq "");
+ @nwb=@new;
+ if (substr($p,0,1) eq "/") {
+ @new=();
+ }
+ foreach $dir (@nwa) {
+ if ($dir eq "..")
+ { pop(@new); }
+ elsif ($dir ne "." && $dir ne "")
+ { push(@new,$dir); }
+ }
+ while (defined(@new[0]) && @new[0] eq @nwb[0]) {
+ splice(@new,0,1);
+ splice(@nwb,0,1);
+ }
+ $newone=join('/',(map("..",@nwb),@new,$f)).$e;
+ if ($newone ne $args{$_}) {
+ $args{$_}=$newone;
+ $minone=1;
+ }
+ }
+ }
+#print "minone=".$minone."\n";
+ if ($minone) {
+ print F $tag;
+ for (sort keys %args)
+ { print F " ".$_.($args{$_} eq ""?"":"=\"$args{$_}\""); }
+ }
+ else {
+ print F $origtag
+ }
+ }
+ print F ">" if ($endoftag eq ">");
+ }
+ }
+ print F $line;
+ print F "\n" if ($line!~/\n$/);
+ close(I);
+ close(F);
+ rename($tmpfile,$curfile) || die "Cannot rename $tmpfile to $curfile ($!)!\n";
+#&set_date($file,$update);
+ print "done\n";
+ }
+close(FEED);
+exit;
+
+sub split_tag {
+ local($fulltag)=@_;
+ local($tagname,$etc,$var,$quote,$arg,$value,@tagargs);
+ undef $etc;
+ ($tagname,$etc)=$fulltag=~/\s*(\S+)\s*(.*)$/;
+ while ($etc!~/^\s*$/) {
+ ($arg,$var,$value,$etc)=$etc=~/\s*([^=\s]+)\s*(=)?\s*(\S+)?(.*)$/;
+ ($value,$etc)=((undef),$value.$etc) unless $var=~/^=$/;
+ ($quote)=$value=~/^([\"\'])/;
+ if ($quote) {
+ if ($value!~/.$quote$/) {
+ ($_,$etc)=$etc=~/^([^$quote]*)$quote?(.*)$/;
+ $value.=$_.$quote;
+ }
+ $value=~s/$quote\s*(.*)\s*$quote$/$1/;
+ $value=~s/\s*$//;
+ }
+ $arg="\U$arg\E";
+ push(@tagargs,$arg,$value);
+ }
+ $tagname="\U$tagname\E";
+ ($tagname,@tagargs);
+}
+
+sub split_url {
+ local($url)=@_;
+ local($host,$port,$path,$file,$extra,$v);
+ ($url,$extra)=$url=~m!^([^\?\#]*)(.*)$!;
+ $v=$url=~m!^http:(//([^:/]*)(:(\d*))?)?((/([^/]+/)*)?([^/]*))?$!;
+ return () unless $v;
+ ($host,$port,$path,$file)=(($2?$2:"localhost"),($4?$4:80),$6,$8);
+ ($host,$port,$path,$file,$extra);
+}
+
+sub about {
+ print <<EOH;
+
+FixHTML v1.02 2001/01/07 (C) 2001 by Jan Kratochvil <short\@ucw.cz>
+
+Fix broken paths in downloaded HTML trees.
+
+Usage: $0 path host
+
+EOH
+ undef;
+}
+
+__END__