#! /usr/bin/perl -- # -*- perl -*- eval 'exec /bin/perl -S $0 "$*"' if undef; # # FixHTML v1.02 2001/01/07 # # Copyright (C) 1994, 1995, 1996 by Victor Parada (vparada@inf.utfsm.cl) # Copyright (C) 1997, ..., 2001 by Jan Kratochvil # # Fix broken paths in downloaded HTML trees. # # Syntax: fixhtml path host; # # Please read this "License Agreement and Lack of Warranty": # - The author of this program is Jan Kratochvil . # - This program is "Freeware", not "Public Domain". # - This program must be distributed for free, and cannot be included in # commercial packages without prior written permisson from the autor. # - This program cannot be distributed if modified in any way. # - This program can be used by anyone if the copyright and this notice # remains intact in every file. # - If you modify this program, please e-mail patches to the the author. # - This is a Beta version of the program. You have been warned! # - This program is provided ``AS IS'', without any warranty. # - This program can cause huge file transfers and all the related effects. # - This program can fill data disks without notice. # - Neither the author nor UTFSM are responsible for the use of this program. # - Bug reports, comments, questions and suggestions are welcome! But # please check first that you have the latest version! # - The latest version of this program is available at: # # # If you use this program, please send e-mail to the author. # He will try to notify you of any updates made to it. # # Setup perl defaults # $fixhtml=$0; $0=~s![^/]*/!!g; $|=1; $[=0; # Configurable defaults # $tmpfile="W$$.tmp"; # temp. $port=80; dump QUICKSTART if $ARGV[0] eq "--dump"; QUICKSTART: # Process options in command line # while ($_=$ARGV[0],/^-/) { shift; /h|\?/ && ($help=1, next); die "$0: Invalid option $_\n"; } exit &about if $help; $path=shift || die "Path not specified\n"; $host=shift || die "Host not specified\n"; chdir $path || die "Cannot find specified path $path!\n"; open(FEED,"find -type f|") or die "No files found"; FLSKIP: while () { s/\n//g; $curfile=$_; print "$curfile - "; open(I,$curfile) or die "Cannot open $curfile!\n"; IDENT: while () { last IDENT if (m/[^ \t\r\n]/); } $line=$_; if ($line!~/^[ -~\r]*$/ || $line!~/\<\s*[!\w]+/i) { #.*\> close(I); print "not HTML\n"; next FLSKIP; } print "fixing ... "; seek I,0,SEEK_SET; open(F,">".$tmpfile) or die "Cannot create $tmpfile!\n"; $line=""; $pth=$curfile; $pth=~s/^.\///; $pth=~s![^/]*$!!; while () { $line.=$_; while ($line=~/\<([^\<\>]*)([\<\>])(.*)$/s) { #print "TAG=$1\n"; $line=($2 eq "<"?"<":"").$3; $endoftag=$2; print F $`."<"; $fulltag=$1; if ($fulltag=~/^!/) { print F $fulltag; } else { $fulltag=$1; $origtag=$fulltag; $fulltag=~tr/\n\t\r/ /; ($tag,%args)=&split_tag($fulltag); for (sort keys %args) { $x=$y=$z=$args{$_}; if (defined($za=rindex($z,"/"))) { $zb=substr($z,0,$za+1); $zc=substr($z,$za+1); } else { $zb=""; $zc=$z } ($zc,$zd)=$zc=~m!^([^\?\#]*)(.*)$!; ($h,$n,$p,$f,$e)=(($y=~/^\w+:/)? &split_url($x):("localhost",$port,$zb,$zc,$zd)); #print "par=\"$x\", h=\"$h\", n=\"$n\", p=\"$p\", f=\"$f\", e=\"$e\", # host=\"$host\", port=\"$port\"\n"; $minone||=($e=~s/\?/%3F/g); if (($h eq "localhost" || $h eq $host) && $n eq $port) { @new=split('/',$pth); @nwa=split('/',$p); splice(@nwa,0,1) if (@nwa[0] eq ""); splice(@new,0,1) if (@new[0] eq ""); @nwb=@new; if (substr($p,0,1) eq "/") { @new=(); } foreach $dir (@nwa) { if ($dir eq "..") { pop(@new); } elsif ($dir ne "." && $dir ne "") { push(@new,$dir); } } while (defined(@new[0]) && @new[0] eq @nwb[0]) { splice(@new,0,1); splice(@nwb,0,1); } $newone=join('/',(map("..",@nwb),@new,$f)).$e; if ($newone ne $args{$_}) { $args{$_}=$newone; $minone=1; } } } #print "minone=".$minone."\n"; if ($minone) { print F $tag; for (sort keys %args) { print F " ".$_.($args{$_} eq ""?"":"=\"$args{$_}\""); } } else { print F $origtag } } print F ">" if ($endoftag eq ">"); } } print F $line; print F "\n" if ($line!~/\n$/); close(I); close(F); rename($tmpfile,$curfile) || die "Cannot rename $tmpfile to $curfile ($!)!\n"; #&set_date($file,$update); print "done\n"; } close(FEED); exit; sub split_tag { local($fulltag)=@_; local($tagname,$etc,$var,$quote,$arg,$value,@tagargs); undef $etc; ($tagname,$etc)=$fulltag=~/\s*(\S+)\s*(.*)$/; while ($etc!~/^\s*$/) { ($arg,$var,$value,$etc)=$etc=~/\s*([^=\s]+)\s*(=)?\s*(\S+)?(.*)$/; ($value,$etc)=((undef),$value.$etc) unless $var=~/^=$/; ($quote)=$value=~/^([\"\'])/; if ($quote) { if ($value!~/.$quote$/) { ($_,$etc)=$etc=~/^([^$quote]*)$quote?(.*)$/; $value.=$_.$quote; } $value=~s/$quote\s*(.*)\s*$quote$/$1/; $value=~s/\s*$//; } $arg="\U$arg\E"; push(@tagargs,$arg,$value); } $tagname="\U$tagname\E"; ($tagname,@tagargs); } sub split_url { local($url)=@_; local($host,$port,$path,$file,$extra,$v); ($url,$extra)=$url=~m!^([^\?\#]*)(.*)$!; $v=$url=~m!^http:(//([^:/]*)(:(\d*))?)?((/([^/]+/)*)?([^/]*))?$!; return () unless $v; ($host,$port,$path,$file)=(($2?$2:"localhost"),($4?$4:80),$6,$8); ($host,$port,$path,$file,$extra); } sub about { print < Fix broken paths in downloaded HTML trees. Usage: $0 path host EOH undef; } __END__