Fixed package dependencies (missing the web request main package).
[MyWeb.git] / Web.pm
diff --git a/Web.pm b/Web.pm
index 2a9406d..2b08495 100644 (file)
--- a/Web.pm
+++ b/Web.pm
@@ -24,7 +24,7 @@ use strict;
 use warnings;
 
 use Exporter;
-sub Wrequire($);
+sub Wrequire($%);
 sub Wuse($@);
 our $W;
 our @EXPORT=qw(
@@ -37,6 +37,7 @@ our @EXPORT=qw(
                $W
                &input_hidden_persistents
                &escapeHTML
+               &form_method
                );
 our @ISA=qw(Tie::Handle Exporter);
 
@@ -48,9 +49,10 @@ BEGIN
        use Carp qw(cluck confess);
        $W->{"__My::Web_init"}=1;
 
-       sub Wrequire ($)
+       # $args{"first"}=1
+       sub Wrequire ($%)
        {
-       my($file)=@_;
+       my($file,%args)=@_;
 
 #              print STDERR "Wrequire $file\n";
                $file=~s#/#::#g;
@@ -66,11 +68,15 @@ BEGIN
                $callers{$selfpkg}=1;
                for my $target ($class,__PACKAGE__) {
                        for my $caller (keys(%callers)) {
-                               next if $caller eq $target;
                                next if $packages_used_hash{$caller}{$target}++;
                                cluck "Appending to the '_done' package list: caller=$caller,target=$target"
                                                if $packages_used_hash{$caller}{"_done"};
-                               push @{$packages_used_array{$caller}},$target;
+                               if ($args{"first"}) {
+                                       unshift @{$packages_used_array{$caller}},$target;
+                                       }
+                               else {
+                                       push @{$packages_used_array{$caller}},$target;
+                                       }
                                }
                        }
                eval { CORE::require "$file"; } or confess $@;
@@ -99,7 +105,6 @@ BEGIN
 }
 
 use WebConfig; # see also below: Wuse 'WebConfig';
-require CGI;
 require Image::Size;   # for &imgsize
 use File::Basename;    # &basename
 use Carp qw(cluck confess);
@@ -122,11 +127,16 @@ require Digest::MD5;
 require Data::Compare;
 use Data::Dumper;
 require Encode;
+use Apache2::RequestUtil;
 use Apache2::Filter;
 use Apache2::Connection;
 require MIME::Base64;
 use Apache2::ServerUtil;
 require MIME::Types;
+require MIME::Parser;
+use Apache2::RequestRec;
+use Apache2::RequestIO;
+use Apache2::Response;
 
 
 #our $W;
@@ -159,10 +169,8 @@ my($class,%args)=@_;
        # $Apache2::Registry::curstash is no longer supported.
        do { cluck "No $_" if !$W->{$_}; } for "__PACKAGE__";
        exit_hook_start();
-
-       # See: &escapeHTML
-       do { cluck "charset==$_, expecting ISO-8859-1" if $_ ne "ISO-8859-1"; } for CGI::charset();
-       CGI::charset("utf-8");
+       # Package dependencies tracking only:
+       Wrequire $W->{"__PACKAGE__"},"first"=>1;
 
        do { $W->{$_}=0  if !defined $W->{$_}; } for "detect_ent";
        do { $W->{$_}=0  if !defined $W->{$_}; } for "detect_js";
@@ -194,35 +202,6 @@ my($class,%args)=@_;
        select *STDOUT;
        $|=1;
 
-       $W->{"QUERY_STRING"}=$W->{"r"}->args() || "";
-       if ($W->{"detect_ent"}) {
-                        if ($W->{"QUERY_STRING"}=~/[&]amp;have_ent/)
-                       { $W->{"have_ent"}=0; }
-               elsif ($W->{"QUERY_STRING"}=~    /[&]have_ent/)
-                       { $W->{"have_ent"}=1; }
-               else
-                       { delete $W->{"have_ent"}; }
-               if (!defined $W->{"have_ent"} && $W->{"r"}->method() eq "GET") {
-                       $W->{"head"}.='<meta http-equiv="Refresh" content="0; URL='
-                                       .escapeHTML("http://".$W->{"web_hostname"}."/".($W->{"r"}->uri()=~m#^/*(.*)$#)[0]
-                                                       ."?".($W->{"QUERY_STRING"} || "detect_ent_glue=1").'&have_ent=detect')
-                                       .'" />'."\n";
-                       }
-               }
-       $W->{"QUERY_STRING"}=~s/([&])amp;/$1/g;
-       $W->{"r"}->args($W->{"QUERY_STRING"});
-       # Workaround: &CGI::Vars behaves weird if strings passed both as POST data and in: $QUERY_STRING
-       do { $W->{"r"}->args(""); delete $ENV{"QUERY_STRING"}; } if $W->{"r"}->method() eq "POST";
-       # Do not: $W->{"r"}->args()
-       # as it parses only QUERY_STRING (not POST data).
-       $W->{"args_orig_array"}=[ CGI->new($W->{"r"})->Vars() ];
-       $W->{"args"}={ @{$W->{"args_orig_array"}} };
-       for my $name (keys(%{$W->{"args"}})) {
-               my @vals=split /\x00/,$W->{"args"}{$name};
-               next if @vals<=1;
-               $W->{"args"}{$name}=[@vals];
-               }
-
        $W->{"headers_in"}=$W->{"r"}->headers_in();
        Wrequire 'My::Hash::Merge';
        $W->{"headers_in"}=My::Hash::Merge->new(
@@ -250,12 +229,40 @@ my($class,%args)=@_;
                        $$_=0 if !defined $$_;
                        }
                }
+       # Used only if: $W->{"http_safe"}
+       # but we would cause on different method(): Appending to the '_done' package list
+       Wrequire 'My::Hash::RecordKeys';
        if ($W->{"http_safe"}) {
-               Wrequire 'My::Hash::RecordKeys';
                $W->{"headers_in_RecordKeys"}=My::Hash::RecordKeys->new($W->{"headers_in"});
                $W->{"headers_in"}=$W->{"headers_in_RecordKeys"};
                }
 
+       {
+               local $_=$W->{"r"}->args() || "";
+               if ($W->{"detect_ent"}) {
+                                if (/[&]amp;have_ent/)
+                               { $W->{"have_ent"}=0; }
+                       elsif (    /[&]have_ent/)
+                               { $W->{"have_ent"}=1; }
+                       else
+                               { delete $W->{"have_ent"}; }
+                       if (!defined $W->{"have_ent"} && $W->{"r"}->method() eq "GET") {
+                               $W->{"head"}.='<meta http-equiv="Refresh" content="0; URL='
+                                               .escapeHTML("http://".$W->{"web_hostname"}."/".($W->{"r"}->uri()=~m#^/*(.*)$#)[0]
+                                                               ."?".($_ || "detect_ent_glue=1").'&have_ent=detect')
+                                               .'" />'."\n";
+                               }
+                       }
+               s/([&])amp;/$1/g;
+               $W->{"r"}->args($_);
+               }
+
+       $W->{"args"}=URI->new("?".$W->{"r"}->args())->query_form_hash();
+       $W->merge_post_args() if $W->{"r"}->method() eq "POST";
+       # Prepare '$args' first to (FIXME: Why?) prevent: Not a reference
+       my $args=$W->{"args"};
+       $W->{"args_orig"}=Storable::dclone($args);
+
        $W->{"browser"}=sub {
                # Lazy-evaluation, we may not need the "User-Agent" header at all.
                return our $r||=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"});
@@ -272,15 +279,157 @@ my($class,%args)=@_;
        if ($W->{"detect_js"} && !$W->{"have_js"}) {
                # Do not: <script />
                # as at least Lynx inhibits any further HTML output.
-               $W->{"head"}.='<script type="text/javascript" src="'.uri_escaped(path_web('/My/HaveJS.pm')).'"></script>'."\n";
+               # Do not: text/javascript
+               # as it does not look as registered, at least according to: MIME::Types $VERSION 1.15
+               # "application/javascript" so far standardized till 2005-12-08 by:
+               #       http://www.ietf.org/internet-drafts/draft-hoehrmann-script-types-03.txt
+               $W->{"head"}.='<script type="application/javascript" src="'.uri_escaped(path_web('/My/HaveJS.pm')).'"></script>'."\n";
                }
 
-       do { _args_check(%$_) if $_; } for ($W->{"args_check"});
-
+       # Required by &_args_check below.
        $W->{"_init_done"}=1;
+
+       do { _args_check(%$_) if $_; } for $W->{"args_check"};
+
        return $W;
 }
 
+sub form_method($)
+{
+my($method)=@_;
+
+       return q{enctype="application/x-www-form-urlencoded" accept-charset="us-ascii utf-8"} if $method eq "post";
+       return                                             q{accept-charset="us-ascii utf-8"} if $method eq "get";
+       cluck "Undefined method: $method";
+       return ""
+}
+
+sub merge_post_args($)
+{
+my($class)=@_;
+
+       my @post_args=$class->read_post_args();
+       while (@post_args) {
+               my $name=shift @post_args;
+               my $data=shift @post_args;
+               my $ref=\$W->{"args"}{$name};
+                  if (!defined $$ref)       { $$ref=$data; }
+               elsif (!ref $$ref)           { $$ref=[$$ref,$data]; }
+               elsif ("ARRAY" eq ref $$ref) { push @$$ref,$data; }
+               else {
+                       cluck "Ignoring POST argument \"$name\", orig is weird:\n",Dumper($$ref);
+                       }
+               }
+       return;
+}
+
+# Do not: use CGI;
+# as CGI parsing of POST vs. QUERY_STRING data, multiple-valued keys etc.
+# is too dense and causes weird problems, together with mod_perl etc.
+sub read_post_args($)
+{
+my($class)=@_;
+
+       local $_=$class->http_headers_in_for("Content-type")->content_type();
+       return $class->read_multipart_form_data() if $_ eq "multipart/form-data";
+       return $class->read_application_x_www_form_urlencoded() if $_ eq "application/x-www-form-urlencoded";
+       cluck "Unknown POST data body, ignored: $_";
+       return;
+}
+
+sub read_application_x_www_form_urlencoded($)
+{
+my($class)=@_;
+
+       my $body="";
+       for (;;) {
+               my $got=$W->{"r"}->read(my($buf),0x1000);
+               # Do not: cluck "Error reading POST data: $!" if !defined $got;
+               # as it should be done using: APR::Error exceptions
+               last if !$got;
+               $body.=$buf;
+               }
+       return URI->new("?".$body)->query_form();
+}
+
+sub read_multipart_form_data($)
+{
+my($class)=@_;
+
+       my $parser=MIME::Parser->new();
+       # FIXME: No unlink()s done!
+       $parser->output_under("/tmp");
+
+       local *R_FH;
+       tie *R_FH,$W->{"r"};
+       local *FH;
+       tie *FH,"My::Web::ReadMerged",
+                       join("",map(($_.": ".$W->{"headers_in"}{$_}."\n"),qw(
+                                       Content-type
+                                       )))."\n",
+                       \*R_FH;
+       my $body=$parser->parse(\*FH);
+       cluck "No multipart POST request body?" if !$body->is_multipart();
+
+       return map((
+                       $_->head()->mime_attr("content-disposition.name")
+                       =>
+                       join("",@{$_->body()})
+                       ),$body->parts());
+
+       # TODO: Globalize, make it IO::* compatible, split to the merging part + IO::Scalar.
+       package My::Web::ReadMerged;
+
+       require Tie::Handle;
+       require Exporter;
+       our @ISA=qw(Tie::Handle Exporter);
+       use Carp qw(cluck confess);
+
+       sub READLINE($)
+       {
+       my($self)=@_;
+
+               confess "Slurp not yet implemented" if !defined $/;
+               # Apache2::RequestIO does not support 'READLINE'!
+               for (;;) {
+                       if (defined $self->{"data"} && $self->{"data"}=~s{^.*\Q$/\E}{}) {
+                               $self->{"offset"}+=length $&;
+                               return $&;
+                               }
+                       my $fh_orig=$self->{"fh_orig"};
+                       if (!$fh_orig) {
+                               my $r=$self->{"data"};
+                               delete $self->{"data"};
+                               $self->{"offset"}+=length $r if defined $r;
+                               return $r;
+                               }
+                       my $got=read $fh_orig,my($buf),0x1000;
+                       cluck "Error reading POST data: $!" if !defined $got;
+                       delete $self->{"fh_orig"} if !$got;
+                       cluck "INTERNAL: fh_orig should not exist here" if !defined $self->{"data"};
+                       $self->{"data"}.=$buf;
+                       }
+       }
+
+       sub TELL($)
+       {
+       my($self)=@_;
+
+               return $self->{"offset"};
+       }
+
+       sub TIEHANDLE($$$)
+       {
+       my($class,$data,$fh_orig)=@_;
+
+               my $self=bless {},$class;
+               $self->{"data"}=$data;
+               $self->{"offset"}=0;
+               $self->{"fh_orig"}=$fh_orig;
+               return $self;
+       }
+}
+
 sub cleanup($)
 {
 my($apache_request)=@_;
@@ -347,28 +496,24 @@ sub request_check(;$)
 my($self)=@_;
 
        # Use &eval to prevent: Global $r object is not available. Set:\n\tPerlOptions +GlobalRequest\nin ...
-       # CGI requires valid "r": check it beforehand here.
        confess "Calling sensitive dynamic code from a static code" if !eval { Apache2::RequestUtil->request(); };
        # Do not: confess "Calling sensitive dynamic code without My::Web::init" if !$W->{"__PACKAGE__"};
        # as it is valid at least while preparing arguments to call: &project::Lib::init
 }
 
+# Do not: use CGI;
+# as it is too much backward compatible regarding the charset encodings etc.
+# and the resulting code is too dense with no additional functionality for the recent content.
 sub escapeHTML($)
 {
 my($text)=@_;
 
-       # Prevent &CGI::escapeHTML breaking utf-8 strings like: \xC4\x9B eq \x{11B}
-       # Prevent case if we run under mod_perl but still just initializing:
-       request_check() if $ENV{"MOD_PERL"};
-       # Generally we are initialized from &init but we may be used without it without mod_perl
-       # and in such case check the change on all non-first invocations.
-       our $init;
-       if (!$ENV{"MOD_PERL"} && $init++) {
-               do { cluck "charset==$_" if $_ ne "utf-8"; } for CGI::charset();
-               }
-       CGI::charset("utf-8");
-
-       return CGI::escapeHTML($text);
+       local $_=$text;
+       s{&}{&amp;}gso;
+       s{<}{&lt;}gso;
+       s{>}{&gt;}gso;
+       s{"}{&quot;}gso;
+       return $_;
 }
 
 # /home/user/www/webdir
@@ -434,6 +579,7 @@ my($in,%args)=@_;
        my $uri=in_to_uri_abs($in);
        if (uri_is_local($uri)) {
                # Prefer the $uri values over "args_persistent" values.
+               # &query_form_hash comes from: URI::QueryParam
                $uri->query_form_hash({
                                map({
                                        my $key=$_;
@@ -513,6 +659,7 @@ my($msg)=@_;
        if (!$W->{"heading_done"}) {
                $W->{"indexme"}=0;      # For the case no heading was sent yet.
                $W->{"header_only"}=0;  # assurance for &heading
+               $W->{"content_type"}="text/html";       # Force HTML and avoid strictly checked XHTML.
                My::Web->heading();
                }
        Wprint "\n".vskip("3ex")."<hr /><h1 class=\"error\">FATAL ERROR: $msg!</h1>\n"
@@ -796,6 +943,15 @@ my(%args)=@_;
        return [ map(($args{$_}),@fields) ];
 }
 
+# Returns: 'HTTP::Headers' instance.
+sub http_headers_in_for($@)
+{
+my($self,@headers)=@_;
+
+       # Limit these entries to generate proper 'Vary' header.
+       return HTTP::Headers->new(map(($_=>$W->{"headers_in"}{$_}),@headers));
+}
+
 # Input: $self is required!
 # Input: Put the fallback variant as the first one.
 # Returns: always only scalar!
@@ -803,13 +959,6 @@ sub Negotiate_choose($$)
 {
 my($self,$variants)=@_;
 
-       # Limit these entries to generate proper 'Vary' header.
-       my %hash=(map(($_=>$W->{"headers_in"}{$_}),qw(
-                       Accept
-                       Accept-Charset
-                       Accept-Encoding
-                       Accept-Language
-                       )));
        my $best=HTTP::Negotiate::choose($variants,
                        # Do not: $W->{"r"}
                        # to prevent: Can't locate object method "scan" via package "Apache2::RequestRec" at HTTP/Negotiate.pm line 84.
@@ -817,7 +966,12 @@ my($self,$variants)=@_;
                        # to prevent: Can't locate object method "scan" via package "APR::Table" at HTTP/Negotiate.pm line 84.
                        # Do not: HTTP::Headers->new($W->{"r"}->headers_in());
                        # to prevent empty result or even: Odd number of elements in anonymous hash
-                       HTTP::Headers->new(%hash));
+                       $self->http_headers_in_for(qw(
+                                       Accept
+                                       Accept-Charset
+                                       Accept-Encoding
+                                       Accept-Language
+                                       )));
        $best||=$variants->[0][0];      # $variants->[0]{"id"}; &HTTP::Negotiate::choose failed?
        return $best;
 }
@@ -972,17 +1126,20 @@ my($f)=@_;
 
 sub cache_start()
 {
+       # Used only if: !$W->{"http_safe"}
+       # but we would cause on different method(): Appending to the '_done' package list
+       # &Wrequire it here even if it will not be later used; to be stable!
+       Wrequire 'My::Hash::RestrictTo';
        if (!$W->{"http_safe"}) {
                __PACKAGE__->_no_cache();
                return;
                }
 
        {
-               # &Wrequire it here even if it will not be later used; to be stable!
-               Wrequire 'My::Hash::RestrictTo';
                my %uri_args_hash=(
+                       "method"=>$W->{"r"}->method(),
                        "uri"=>"http://".$W->{"web_hostname"}."/".$W->{"r"}->uri(),
-                       "args"=>$W->{"args_orig_array"},
+                       "args"=>$W->{"args_orig"},
                        );
                $W->{"uri_args_frozen"}=do { local $Storable::canonical=1; Storable::freeze(\%uri_args_hash); };
                last if !(my $headers_in_keys_arrayref=$uri_args_frozen_to_headers_in_keys{$W->{"uri_args_frozen"}});
@@ -1110,7 +1267,11 @@ my($class)=@_;
 
        if (!$W->{"header_only"}) {
                header("Content-Style-Type"=>"text/css");
-               header("Content-Script-Type"=>"text/javascript");
+               # Do not: text/javascript
+               # as it does not look as registered, at least according to: MIME::Types $VERSION 1.15
+               # "application/javascript" so far standardized till 2005-12-08 by:
+               #       http://www.ietf.org/internet-drafts/draft-hoehrmann-script-types-03.txt
+               header("Content-Script-Type"=>"application/javascript");
                # $W->{"r"}->content_languages() ?
                do { header("Content-Language"=>$_) if $_; } for $W->{"language"};
                }
@@ -1126,33 +1287,39 @@ my($class)=@_;
        # of at least
        #   Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217
        # http://validator.w3.org/ does not send ANY "Accept" headers!
-       $W->{"content_type"}="application/xhtml+xml" if !defined $W->{"content_type"}
-                       && !$W->{"headers_in"}{"Accept"}
-                       && ($W->{"headers_in"}{"User-Agent"}||"")=~m{^W3C_Validator/}i;
-       defined($W->{"content_type"}) or $W->{"content_type"}=$class->Negotiate_choose([
-                       # Put the fallback variant as the first one.
-                       # Rate both variants the same to prefer "text/html" for undecided clients.
-                       # At least
-                       #   Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217
-                       # prefers "application/xhtml+xml" over "text/html" itself:
-                       #   text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
-                       negotiate_variant(
-                                       "id"=>"text/html",
-                                       "content-type"=>"text/html",
-                                       "qs"=>0.6,
-                                       (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}),
-                                       "lang"=>$W->{"language"},
-                                       ),
-                       negotiate_variant(
-                                       "id"=>"application/xhtml+xml",
-                                       "content-type"=>"application/xhtml+xml",
-                                       "qs"=>0.6,
-                                       (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}),
-                                       "lang"=>$W->{"language"},
-                                       ),
-                       # application/xml ?
-                       # text/xml ?
-                       ]);
+       if (!defined $W->{"content_type"}) {
+               # Be _stable_ for "headers_in".
+               my $accept=$W->{"headers_in"}{"Accept"};
+               my $user_agent=$W->{"headers_in"}{"User-Agent"}||"";
+               $W->{"content_type"}="application/xhtml+xml"
+                               if !$accept && $user_agent=~m{^W3C_Validator/}i;
+               # Be _stable_:
+               my $negotiated=$class->Negotiate_choose([
+                               # Put the fallback variant as the first one.
+                               # Rate both variants the same to prefer "text/html" for undecided clients.
+                               # At least
+                               #   Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217
+                               # prefers "application/xhtml+xml" over "text/html" itself:
+                               #   text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
+                               negotiate_variant(
+                                               "id"=>"text/html",
+                                               "content-type"=>"text/html",
+                                               "qs"=>0.6,
+                                               (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}),
+                                               "lang"=>$W->{"language"},
+                                               ),
+                               negotiate_variant(
+                                               "id"=>"application/xhtml+xml",
+                                               "content-type"=>"application/xhtml+xml",
+                                               "qs"=>0.6,
+                                               (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}),
+                                               "lang"=>$W->{"language"},
+                                               ),
+                               # application/xml ?
+                               # text/xml ?
+                               ]);
+               $W->{"content_type"}=$negotiated if !defined $W->{"content_type"};
+               }
        # mod_perl doc: If you set this header via the headers_out table directly, it
        #               will be ignored by Apache. So do not do that.
        my $type;
@@ -1196,8 +1363,12 @@ HERE
                if ($W->{"css_inherit"}) {
                        # Do not: <script />
                        # as at least Lynx inhibits any further HTML output.
+                       # Do not: text/javascript
+                       # as it does not look as registered, at least according to: MIME::Types $VERSION 1.15
+                       # "application/javascript" so far standardized till 2005-12-08 by:
+                       #       http://www.ietf.org/internet-drafts/draft-hoehrmann-script-types-03.txt
                        Wprint <<"HERE";
-<script type="text/javascript" src="@{[ uri_escaped(path_web('/My/css_inherit.js')) ]}"></script>
+<script type="application/javascript" src="@{[ uri_escaped(path_web('/My/css_inherit.js')) ]}"></script>
 HERE
                        }
                }