Finally drop CGI itself.
authorshort <>
Sun, 2 Oct 2005 05:44:17 +0000 (05:44 +0000)
committershort <>
Sun, 2 Oct 2005 05:44:17 +0000 (05:44 +0000)
Web.pm

diff --git a/Web.pm b/Web.pm
index 2bf90bb..1a3f9e0 100644 (file)
--- a/Web.pm
+++ b/Web.pm
@@ -99,7 +99,6 @@ BEGIN
 }
 
 use WebConfig; # see also below: Wuse 'WebConfig';
-require CGI;
 require Image::Size;   # for &imgsize
 use File::Basename;    # &basename
 use Carp qw(cluck confess);
@@ -127,6 +126,7 @@ use Apache2::Connection;
 require MIME::Base64;
 use Apache2::ServerUtil;
 require MIME::Types;
+require MIME::Parser;
 
 
 #our $W;
@@ -160,10 +160,6 @@ my($class,%args)=@_;
        do { cluck "No $_" if !$W->{$_}; } for "__PACKAGE__";
        exit_hook_start();
 
-       # See: &escapeHTML
-       do { cluck "charset==$_, expecting ISO-8859-1" if $_ ne "ISO-8859-1"; } for CGI::charset();
-       CGI::charset("utf-8");
-
        do { $W->{$_}=0  if !defined $W->{$_}; } for "detect_ent";
        do { $W->{$_}=0  if !defined $W->{$_}; } for "detect_js";
        do { $W->{$_}=1  if !defined $W->{$_}; } for "have_css";        # AFAIK it does not hurt anyone.
@@ -194,35 +190,6 @@ my($class,%args)=@_;
        select *STDOUT;
        $|=1;
 
-       $W->{"QUERY_STRING"}=$W->{"r"}->args() || "";
-       if ($W->{"detect_ent"}) {
-                        if ($W->{"QUERY_STRING"}=~/[&]amp;have_ent/)
-                       { $W->{"have_ent"}=0; }
-               elsif ($W->{"QUERY_STRING"}=~    /[&]have_ent/)
-                       { $W->{"have_ent"}=1; }
-               else
-                       { delete $W->{"have_ent"}; }
-               if (!defined $W->{"have_ent"} && $W->{"r"}->method() eq "GET") {
-                       $W->{"head"}.='<meta http-equiv="Refresh" content="0; URL='
-                                       .escapeHTML("http://".$W->{"web_hostname"}."/".($W->{"r"}->uri()=~m#^/*(.*)$#)[0]
-                                                       ."?".($W->{"QUERY_STRING"} || "detect_ent_glue=1").'&have_ent=detect')
-                                       .'" />'."\n";
-                       }
-               }
-       $W->{"QUERY_STRING"}=~s/([&])amp;/$1/g;
-       $W->{"r"}->args($W->{"QUERY_STRING"});
-       # Workaround: &CGI::Vars behaves weird if strings passed both as POST data and in: $QUERY_STRING
-       do { $W->{"r"}->args(""); delete $ENV{"QUERY_STRING"}; } if $W->{"r"}->method() eq "POST";
-       # Do not: $W->{"r"}->args()
-       # as it parses only QUERY_STRING (not POST data).
-       $W->{"args_orig_array"}=[ CGI->new($W->{"r"})->Vars() ];
-       $W->{"args"}={ @{$W->{"args_orig_array"}} };
-       for my $name (keys(%{$W->{"args"}})) {
-               my @vals=split /\x00/,$W->{"args"}{$name};
-               next if @vals<=1;
-               $W->{"args"}{$name}=[@vals];
-               }
-
        $W->{"headers_in"}=$W->{"r"}->headers_in();
        Wrequire 'My::Hash::Merge';
        $W->{"headers_in"}=My::Hash::Merge->new(
@@ -256,6 +223,32 @@ my($class,%args)=@_;
                $W->{"headers_in"}=$W->{"headers_in_RecordKeys"};
                }
 
+       {
+               local $_=$W->{"r"}->args() || "";
+               if ($W->{"detect_ent"}) {
+                                if (/[&]amp;have_ent/)
+                               { $W->{"have_ent"}=0; }
+                       elsif (    /[&]have_ent/)
+                               { $W->{"have_ent"}=1; }
+                       else
+                               { delete $W->{"have_ent"}; }
+                       if (!defined $W->{"have_ent"} && $W->{"r"}->method() eq "GET") {
+                               $W->{"head"}.='<meta http-equiv="Refresh" content="0; URL='
+                                               .escapeHTML("http://".$W->{"web_hostname"}."/".($W->{"r"}->uri()=~m#^/*(.*)$#)[0]
+                                                               ."?".($_ || "detect_ent_glue=1").'&have_ent=detect')
+                                               .'" />'."\n";
+                               }
+                       }
+               s/([&])amp;/$1/g;
+               $W->{"r"}->args($_);
+               }
+
+       $W->{"args"}=URI->new("?".$W->{"r"}->args())->query_form_hash();
+       $W->merge_post_args() if $W->{"r"}->method() eq "POST";
+       # Prepare '$args' first to (FIXME: Why?) prevent: Not a reference
+       my $args=$W->{"args"};
+       $W->{"args_orig"}=Storable::dclone($args);
+
        $W->{"browser"}=sub {
                # Lazy-evaluation, we may not need the "User-Agent" header at all.
                return our $r||=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"});
@@ -285,6 +278,132 @@ my($class,%args)=@_;
        return $W;
 }
 
+sub merge_post_args($)
+{
+my($class)=@_;
+
+       my @post_args=$class->read_post_args();
+       while (@post_args) {
+               my $name=shift @post_args;
+               my $data=shift @post_args;
+               my $ref=\$W->{"args"}{$name};
+                  if (!defined $$ref)       { $$ref=$data; }
+               elsif (!ref $$ref)           { $$ref=[$$ref,$data]; }
+               elsif ("ARRAY" eq ref $$ref) { push @$$ref,$data; }
+               else {
+                       cluck "Ignoring POST argument \"$name\", orig is weird:\n",Dumper($$ref);
+                       }
+               }
+       return;
+}
+
+# Do not: use CGI;
+# as CGI parsing of POST vs. QUERY_STRING data, multiple-valued keys etc.
+# is too dense and causes weird problems, together with mod_perl etc.
+sub read_post_args($)
+{
+my($class)=@_;
+
+       local $_=$class->http_headers_in_for("Content-type")->content_type();
+       return $class->read_multipart_form_data() if $_ eq "multipart/form-data";
+       return $class->read_application_x_www_form_urlencoded() if $_ eq "application/x-www-form-urlencoded";
+       cluck "Unknown POST data body, ignored: $_";
+       return;
+}
+
+sub read_application_x_www_form_urlencoded($)
+{
+my($class)=@_;
+
+       my $body="";
+       for (;;) {
+               my $got=$W->{"r"}->read(my($buf),0x1000);
+               # Do not: cluck "Error reading POST data: $!" if !defined $got;
+               # as it should be done using: APR::Error exceptions
+               last if !$got;
+               $body.=$buf;
+               }
+       return URI->new("?".$body)->query_form();
+}
+
+sub read_multipart_form_data($)
+{
+my($class)=@_;
+
+       my $parser=MIME::Parser->new();
+       # FIXME: No unlink()s done!
+       $parser->output_under("/tmp");
+
+       local *R_FH;
+       tie *R_FH,$W->{"r"};
+       local *FH;
+       tie *FH,"My::Web::ReadMerged",
+                       join("",map(($_.": ".$W->{"headers_in"}{$_}."\n"),qw(
+                                       Content-type
+                                       )))."\n",
+                       \*R_FH;
+       my $body=$parser->parse(\*FH);
+       cluck "No multipart POST request body?" if !$body->is_multipart();
+
+       return map((
+                       $_->head()->mime_attr("content-disposition.name")
+                       =>
+                       join("",@{$_->body()})
+                       ),$body->parts());
+
+       # TODO: Globalize, make it IO::* compatible, split to the merging part + IO::Scalar.
+       package My::Web::ReadMerged;
+
+       require Tie::Handle;
+       require Exporter;
+       our @ISA=qw(Tie::Handle Exporter);
+       use Carp qw(cluck confess);
+
+       sub READLINE($)
+       {
+       my($self)=@_;
+
+               confess "Slurp not yet implemented" if !defined $/;
+               # Apache2::RequestIO does not support 'READLINE'!
+               for (;;) {
+                       if (defined $self->{"data"} && $self->{"data"}=~s{^.*\Q$/\E}{}) {
+                               $self->{"offset"}+=length $&;
+                               return $&;
+                               }
+                       my $fh_orig=$self->{"fh_orig"};
+                       if (!$fh_orig) {
+                               my $r=$self->{"data"};
+                               delete $self->{"data"};
+                               $self->{"offset"}+=length $r if defined $r;
+                               return $r;
+                               }
+                       my $got=read $fh_orig,my($buf),0x1000;
+                       cluck "Error reading POST data: $!" if !defined $got;
+                       delete $self->{"fh_orig"} if !$got;
+                       cluck "INTERNAL: fh_orig should not exist here" if !defined $self->{"data"};
+                       $self->{"data"}.=$buf;
+                       }
+       }
+
+       sub TELL($)
+       {
+       my($self)=@_;
+
+               return $self->{"offset"};
+       }
+
+       sub TIEHANDLE($$$)
+       {
+       my($class,$data,$fh_orig)=@_;
+
+               my $self=bless {},$class;
+               $self->{"data"}=$data;
+               $self->{"offset"}=0;
+               $self->{"fh_orig"}=$fh_orig;
+               return $self;
+       }
+}
+
 sub cleanup($)
 {
 my($apache_request)=@_;
@@ -351,28 +470,24 @@ sub request_check(;$)
 my($self)=@_;
 
        # Use &eval to prevent: Global $r object is not available. Set:\n\tPerlOptions +GlobalRequest\nin ...
-       # CGI requires valid "r": check it beforehand here.
        confess "Calling sensitive dynamic code from a static code" if !eval { Apache2::RequestUtil->request(); };
        # Do not: confess "Calling sensitive dynamic code without My::Web::init" if !$W->{"__PACKAGE__"};
        # as it is valid at least while preparing arguments to call: &project::Lib::init
 }
 
+# Do not: use CGI;
+# as it is too much backward compatible regarding the charset encodings etc.
+# and the resulting code is too dense with no additional functionality for the recent content.
 sub escapeHTML($)
 {
 my($text)=@_;
 
-       # Prevent &CGI::escapeHTML breaking utf-8 strings like: \xC4\x9B eq \x{11B}
-       # Prevent case if we run under mod_perl but still just initializing:
-       request_check() if $ENV{"MOD_PERL"};
-       # Generally we are initialized from &init but we may be used without it without mod_perl
-       # and in such case check the change on all non-first invocations.
-       our $init;
-       if (!$ENV{"MOD_PERL"} && $init++) {
-               do { cluck "charset==$_" if $_ ne "utf-8"; } for CGI::charset();
-               }
-       CGI::charset("utf-8");
-
-       return CGI::escapeHTML($text);
+       local $_=$text;
+       s{&}{&amp;}gso;
+       s{<}{&lt;}gso;
+       s{>}{&gt;}gso;
+       s{"}{&quot;}gso;
+       return $_;
 }
 
 # /home/user/www/webdir
@@ -438,6 +553,7 @@ my($in,%args)=@_;
        my $uri=in_to_uri_abs($in);
        if (uri_is_local($uri)) {
                # Prefer the $uri values over "args_persistent" values.
+               # &query_form_hash comes from: URI::QueryParam
                $uri->query_form_hash({
                                map({
                                        my $key=$_;
@@ -800,6 +916,15 @@ my(%args)=@_;
        return [ map(($args{$_}),@fields) ];
 }
 
+# Returns: 'HTTP::Headers' instance.
+sub http_headers_in_for($@)
+{
+my($self,@headers)=@_;
+
+       # Limit these entries to generate proper 'Vary' header.
+       return HTTP::Headers->new(map(($_=>$W->{"headers_in"}{$_}),@headers));
+}
+
 # Input: $self is required!
 # Input: Put the fallback variant as the first one.
 # Returns: always only scalar!
@@ -807,13 +932,6 @@ sub Negotiate_choose($$)
 {
 my($self,$variants)=@_;
 
-       # Limit these entries to generate proper 'Vary' header.
-       my %hash=(map(($_=>$W->{"headers_in"}{$_}),qw(
-                       Accept
-                       Accept-Charset
-                       Accept-Encoding
-                       Accept-Language
-                       )));
        my $best=HTTP::Negotiate::choose($variants,
                        # Do not: $W->{"r"}
                        # to prevent: Can't locate object method "scan" via package "Apache2::RequestRec" at HTTP/Negotiate.pm line 84.
@@ -821,7 +939,12 @@ my($self,$variants)=@_;
                        # to prevent: Can't locate object method "scan" via package "APR::Table" at HTTP/Negotiate.pm line 84.
                        # Do not: HTTP::Headers->new($W->{"r"}->headers_in());
                        # to prevent empty result or even: Odd number of elements in anonymous hash
-                       HTTP::Headers->new(%hash));
+                       $self->http_headers_in_for(qw(
+                                       Accept
+                                       Accept-Charset
+                                       Accept-Encoding
+                                       Accept-Language
+                                       )));
        $best||=$variants->[0][0];      # $variants->[0]{"id"}; &HTTP::Negotiate::choose failed?
        return $best;
 }
@@ -985,8 +1108,9 @@ sub cache_start()
                # &Wrequire it here even if it will not be later used; to be stable!
                Wrequire 'My::Hash::RestrictTo';
                my %uri_args_hash=(
+                       "method"=>$W->{"r"}->method(),
                        "uri"=>"http://".$W->{"web_hostname"}."/".$W->{"r"}->uri(),
-                       "args"=>$W->{"args_orig_array"},
+                       "args"=>$W->{"args_orig"},
                        );
                $W->{"uri_args_frozen"}=do { local $Storable::canonical=1; Storable::freeze(\%uri_args_hash); };
                last if !(my $headers_in_keys_arrayref=$uri_args_frozen_to_headers_in_keys{$W->{"uri_args_frozen"}});