From a48c0fc6077a2d7cd968804a6409d40c86449ca5 Mon Sep 17 00:00:00 2001 From: lace <> Date: Tue, 1 Jan 2008 11:43:08 +0000 Subject: [PATCH] Original buggy version. --- bin/fixhtml | 211 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100755 bin/fixhtml diff --git a/bin/fixhtml b/bin/fixhtml new file mode 100755 index 0000000..3d8acba --- /dev/null +++ b/bin/fixhtml @@ -0,0 +1,211 @@ +#!/bin/perl -- # -*- perl -*- +eval 'exec /bin/perl -S $0 "$*"' + if undef; +# +# FixHTML v1.02 2001/01/07 +# +# Copyright (C) 1994, 1995, 1996 by Victor Parada (vparada@inf.utfsm.cl) +# Copyright (C) 1997, ..., 2001 by Jan Kratochvil +# +# Fix broken paths in downloaded HTML trees. +# +# Syntax: fixhtml path host; +# +# Please read this "License Agreement and Lack of Warranty": +# - The author of this program is Jan Kratochvil . +# - This program is "Freeware", not "Public Domain". +# - This program must be distributed for free, and cannot be included in +# commercial packages without prior written permisson from the autor. +# - This program cannot be distributed if modified in any way. +# - This program can be used by anyone if the copyright and this notice +# remains intact in every file. +# - If you modify this program, please e-mail patches to the the author. +# - This is a Beta version of the program. You have been warned! +# - This program is provided ``AS IS'', without any warranty. +# - This program can cause huge file transfers and all the related effects. +# - This program can fill data disks without notice. +# - Neither the author nor UTFSM are responsible for the use of this program. +# - Bug reports, comments, questions and suggestions are welcome! But +# please check first that you have the latest version! +# - The latest version of this program is available at: +# +# +# If you use this program, please send e-mail to the author. +# He will try to notify you of any updates made to it. +# + +# Setup perl defaults +# +$fixhtml=$0; +$0=~s![^/]*/!!g; +$|=1; +$[=0; + +# Configurable defaults +# +$tmpfile="W$$.tmp"; # temp. +$port=80; + +dump QUICKSTART if $ARGV[0] eq "--dump"; +QUICKSTART: + +# Process options in command line +# +while ($_=$ARGV[0],/^-/) { + shift; + /h|\?/ && ($help=1, next); + die "$0: Invalid option $_\n"; +} + +exit &about if $help; + +$path=shift || die "Path not specified\n"; +$host=shift || die "Host not specified\n"; + +chdir $path || die "Cannot find specified path $path!\n"; + +open(FEED,"find -type f|") or die "No files found"; + +FLSKIP: while () { + s/\n//g; + $curfile=$_; + print "$curfile - "; + open(I,$curfile) or die "Cannot open $curfile!\n"; + IDENT: while () { + last IDENT if (m/[^ \t\r\n]/); + } + $line=$_; + if ($line!~/^[ -~\r]*$/ || $line!~/\<\s*[!\w]+/i) { #.*\> + close(I); + print "not HTML\n"; + next FLSKIP; + } + print "fixing ... "; + seek I,0,SEEK_SET; + open(F,">".$tmpfile) or die "Cannot create $tmpfile!\n"; + $line=""; + $pth=$curfile; + $pth=~s/^.\///; + $pth=~s![^/]*$!!; + while () { + $line.=$_; + while ($line=~/\<([^\<\>]*)([\<\>])(.*)$/s) { +#print "TAG=$1\n"; + $line=($2 eq "<"?"<":"").$3; + $endoftag=$2; + print F $`."<"; + $fulltag=$1; if ($fulltag=~/^!/) + { print F $fulltag; } + else { + $fulltag=$1; $origtag=$fulltag; + $fulltag=~tr/\n\t\r/ /; + ($tag,%args)=&split_tag($fulltag); + for (sort keys %args) { + $x=$y=$z=$args{$_}; + if (defined($za=rindex($z,"/"))) + { $zb=substr($z,0,$za+1); $zc=substr($z,$za+1); } + else { $zb=""; $zc=$z } + ($zc,$zd)=$zc=~m!^([^\?\#]*)(.*)$!; + ($h,$n,$p,$f,$e)=(($y=~/^\w+:/)? + &split_url($x):("localhost",$port,$zb,$zc,$zd)); +#print "par=\"$x\", h=\"$h\", n=\"$n\", p=\"$p\", f=\"$f\", e=\"$e\", +# host=\"$host\", port=\"$port\"\n"; + $minone=($e=~s/\?/%3F/g); + if (($h eq "localhost" || $h eq $host) && $n eq $port) { + @new=split('/',$pth); + @nwa=split('/',$p); + splice(@nwa,0,1) if (@nwa[0] eq ""); + splice(@new,0,1) if (@new[0] eq ""); + @nwb=@new; + if (substr($p,0,1) eq "/") { + @new=(); + } + foreach $dir (@nwa) { + if ($dir eq "..") + { pop(@new); } + elsif ($dir ne "." && $dir ne "") + { push(@new,$dir); } + } + while (defined(@new[0]) && @new[0] eq @nwb[0]) { + splice(@new,0,1); + splice(@nwb,0,1); + } + $newone=join('/',(map("..",@nwb),@new,$f)).$e; + if ($newone ne $args{$_}) { + $args{$_}=$newone; + $minone=1; + } + } + } +#print "minone=".$minone."\n"; + if ($minone) { + print F $tag; + for (sort keys %args) + { print F " ".$_.($args{$_} eq ""?"":"=\"$args{$_}\""); } + } + else { + print F $origtag + } + } + print F ">" if ($endoftag eq ">"); + } + } + print F $line; + print F "\n" if ($line!~/\n$/); + close(I); + close(F); + rename($tmpfile,$curfile) || die "Cannot rename $tmpfile to $curfile ($!)!\n"; +#&set_date($file,$update); + print "done\n"; + } +close(FEED); +exit; + +sub split_tag { + local($fulltag)=@_; + local($tagname,$etc,$var,$quote,$arg,$value,@tagargs); + undef $etc; + ($tagname,$etc)=$fulltag=~/\s*(\S+)\s*(.*)$/; + while ($etc!~/^\s*$/) { + ($arg,$var,$value,$etc)=$etc=~/\s*([^=\s]+)\s*(=)?\s*(\S+)?(.*)$/; + ($value,$etc)=((undef),$value.$etc) unless $var=~/^=$/; + ($quote)=$value=~/^([\"\'])/; + if ($quote) { + if ($value!~/.$quote$/) { + ($_,$etc)=$etc=~/^([^$quote]*)$quote?(.*)$/; + $value.=$_.$quote; + } + $value=~s/$quote\s*(.*)\s*$quote$/$1/; + $value=~s/\s*$//; + } + $arg="\U$arg\E"; + push(@tagargs,$arg,$value); + } + $tagname="\U$tagname\E"; + ($tagname,@tagargs); +} + +sub split_url { + local($url)=@_; + local($host,$port,$path,$file,$extra,$v); + ($url,$extra)=$url=~m!^([^\?\#]*)(.*)$!; + $v=$url=~m!^http:(//([^:/]*)(:(\d*))?)?((/([^/]+/)*)?([^/]*))?$!; + return () unless $v; + ($host,$port,$path,$file)=(($2?$2:"localhost"),($4?$4:80),$6,$8); + ($host,$port,$path,$file,$extra); +} + +sub about { + print < + +Fix broken paths in downloaded HTML trees. + +Usage: $0 path host + +EOH + undef; +} + +__END__ -- 1.8.3.1