X-Git-Url: http://git.jankratochvil.net/?a=blobdiff_plain;f=Web.pm;h=1a3f9e0a072f9d1f3f7d85b4d091bb9e9e67f0dc;hb=6c5cf4306de5fcb4958519e0f73cac52fea32753;hp=8244e59b7f34863866dec19a1a6511effcd159c9;hpb=312aff9cc8388402560b84502cb7dd09b035f3f9;p=MyWeb.git diff --git a/Web.pm b/Web.pm index 8244e59..1a3f9e0 100644 --- a/Web.pm +++ b/Web.pm @@ -99,7 +99,6 @@ BEGIN } use WebConfig; # see also below: Wuse 'WebConfig'; -require CGI; require Image::Size; # for &imgsize use File::Basename; # &basename use Carp qw(cluck confess); @@ -127,6 +126,7 @@ use Apache2::Connection; require MIME::Base64; use Apache2::ServerUtil; require MIME::Types; +require MIME::Parser; #our $W; @@ -160,10 +160,6 @@ my($class,%args)=@_; do { cluck "No $_" if !$W->{$_}; } for "__PACKAGE__"; exit_hook_start(); - # See: &escapeHTML - do { cluck "charset==$_, expecting ISO-8859-1" if $_ ne "ISO-8859-1"; } for CGI::charset(); - CGI::charset("utf-8"); - do { $W->{$_}=0 if !defined $W->{$_}; } for "detect_ent"; do { $W->{$_}=0 if !defined $W->{$_}; } for "detect_js"; do { $W->{$_}=1 if !defined $W->{$_}; } for "have_css"; # AFAIK it does not hurt anyone. @@ -194,35 +190,6 @@ my($class,%args)=@_; select *STDOUT; $|=1; - $W->{"QUERY_STRING"}=$W->{"r"}->args() || ""; - if ($W->{"detect_ent"}) { - if ($W->{"QUERY_STRING"}=~/[&]amp;have_ent/) - { $W->{"have_ent"}=0; } - elsif ($W->{"QUERY_STRING"}=~ /[&]have_ent/) - { $W->{"have_ent"}=1; } - else - { delete $W->{"have_ent"}; } - if (!defined $W->{"have_ent"} && $W->{"r"}->method() eq "GET") { - $W->{"head"}.='{"web_hostname"}."/".($W->{"r"}->uri()=~m#^/*(.*)$#)[0] - ."?".($W->{"QUERY_STRING"} || "detect_ent_glue=1").'&have_ent=detect') - .'" />'."\n"; - } - } - $W->{"QUERY_STRING"}=~s/([&])amp;/$1/g; - $W->{"r"}->args($W->{"QUERY_STRING"}); - # Workaround: &CGI::Vars behaves weird if strings passed both as POST data and in: $QUERY_STRING - do { $W->{"r"}->args(""); delete $ENV{"QUERY_STRING"}; } if $W->{"r"}->method() eq "POST"; - # Do not: $W->{"r"}->args() - # as it parses only QUERY_STRING (not POST data). - $W->{"args_orig_array"}=[ CGI->new($W->{"r"})->Vars() ]; - $W->{"args"}={ @{$W->{"args_orig_array"}} }; - for my $name (keys(%{$W->{"args"}})) { - my @vals=split /\x00/,$W->{"args"}{$name}; - next if @vals<=1; - $W->{"args"}{$name}=[@vals]; - } - $W->{"headers_in"}=$W->{"r"}->headers_in(); Wrequire 'My::Hash::Merge'; $W->{"headers_in"}=My::Hash::Merge->new( @@ -256,6 +223,32 @@ my($class,%args)=@_; $W->{"headers_in"}=$W->{"headers_in_RecordKeys"}; } + { + local $_=$W->{"r"}->args() || ""; + if ($W->{"detect_ent"}) { + if (/[&]amp;have_ent/) + { $W->{"have_ent"}=0; } + elsif ( /[&]have_ent/) + { $W->{"have_ent"}=1; } + else + { delete $W->{"have_ent"}; } + if (!defined $W->{"have_ent"} && $W->{"r"}->method() eq "GET") { + $W->{"head"}.='{"web_hostname"}."/".($W->{"r"}->uri()=~m#^/*(.*)$#)[0] + ."?".($_ || "detect_ent_glue=1").'&have_ent=detect') + .'" />'."\n"; + } + } + s/([&])amp;/$1/g; + $W->{"r"}->args($_); + } + + $W->{"args"}=URI->new("?".$W->{"r"}->args())->query_form_hash(); + $W->merge_post_args() if $W->{"r"}->method() eq "POST"; + # Prepare '$args' first to (FIXME: Why?) prevent: Not a reference + my $args=$W->{"args"}; + $W->{"args_orig"}=Storable::dclone($args); + $W->{"browser"}=sub { # Lazy-evaluation, we may not need the "User-Agent" header at all. return our $r||=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"}); @@ -272,7 +265,11 @@ my($class,%args)=@_; if ($W->{"detect_js"} && !$W->{"have_js"}) { # Do not: '."\n"; + # Do not: text/javascript + # as it does not look as registered, at least according to: MIME::Types $VERSION 1.15 + # "application/javascript" so far standardized till 2005-12-08 by: + # http://www.ietf.org/internet-drafts/draft-hoehrmann-script-types-03.txt + $W->{"head"}.=''."\n"; } do { _args_check(%$_) if $_; } for ($W->{"args_check"}); @@ -281,6 +278,132 @@ my($class,%args)=@_; return $W; } +sub merge_post_args($) +{ +my($class)=@_; + + my @post_args=$class->read_post_args(); + while (@post_args) { + my $name=shift @post_args; + my $data=shift @post_args; + my $ref=\$W->{"args"}{$name}; + if (!defined $$ref) { $$ref=$data; } + elsif (!ref $$ref) { $$ref=[$$ref,$data]; } + elsif ("ARRAY" eq ref $$ref) { push @$$ref,$data; } + else { + cluck "Ignoring POST argument \"$name\", orig is weird:\n",Dumper($$ref); + } + } + return; +} + +# Do not: use CGI; +# as CGI parsing of POST vs. QUERY_STRING data, multiple-valued keys etc. +# is too dense and causes weird problems, together with mod_perl etc. +sub read_post_args($) +{ +my($class)=@_; + + local $_=$class->http_headers_in_for("Content-type")->content_type(); + return $class->read_multipart_form_data() if $_ eq "multipart/form-data"; + return $class->read_application_x_www_form_urlencoded() if $_ eq "application/x-www-form-urlencoded"; + cluck "Unknown POST data body, ignored: $_"; + return; +} + +sub read_application_x_www_form_urlencoded($) +{ +my($class)=@_; + + my $body=""; + for (;;) { + my $got=$W->{"r"}->read(my($buf),0x1000); + # Do not: cluck "Error reading POST data: $!" if !defined $got; + # as it should be done using: APR::Error exceptions + last if !$got; + $body.=$buf; + } + return URI->new("?".$body)->query_form(); +} + +sub read_multipart_form_data($) +{ +my($class)=@_; + + my $parser=MIME::Parser->new(); + # FIXME: No unlink()s done! + $parser->output_under("/tmp"); + + local *R_FH; + tie *R_FH,$W->{"r"}; + local *FH; + tie *FH,"My::Web::ReadMerged", + join("",map(($_.": ".$W->{"headers_in"}{$_}."\n"),qw( + Content-type + )))."\n", + \*R_FH; + my $body=$parser->parse(\*FH); + cluck "No multipart POST request body?" if !$body->is_multipart(); + + return map(( + $_->head()->mime_attr("content-disposition.name") + => + join("",@{$_->body()}) + ),$body->parts()); + + # TODO: Globalize, make it IO::* compatible, split to the merging part + IO::Scalar. + package My::Web::ReadMerged; + + require Tie::Handle; + require Exporter; + our @ISA=qw(Tie::Handle Exporter); + use Carp qw(cluck confess); + + sub READLINE($) + { + my($self)=@_; + + confess "Slurp not yet implemented" if !defined $/; + # Apache2::RequestIO does not support 'READLINE'! + for (;;) { + if (defined $self->{"data"} && $self->{"data"}=~s{^.*\Q$/\E}{}) { + $self->{"offset"}+=length $&; + return $&; + } + my $fh_orig=$self->{"fh_orig"}; + if (!$fh_orig) { + my $r=$self->{"data"}; + delete $self->{"data"}; + $self->{"offset"}+=length $r if defined $r; + return $r; + } + my $got=read $fh_orig,my($buf),0x1000; + cluck "Error reading POST data: $!" if !defined $got; + delete $self->{"fh_orig"} if !$got; + cluck "INTERNAL: fh_orig should not exist here" if !defined $self->{"data"}; + $self->{"data"}.=$buf; + } + } + + sub TELL($) + { + my($self)=@_; + + return $self->{"offset"}; + } + + sub TIEHANDLE($$$) + { + my($class,$data,$fh_orig)=@_; + + my $self=bless {},$class; + $self->{"data"}=$data; + $self->{"offset"}=0; + $self->{"fh_orig"}=$fh_orig; + return $self; + } +} + sub cleanup($) { my($apache_request)=@_; @@ -306,7 +429,7 @@ sub exit_hook } sub exit_hook_start { - cluck "exit_hook_start() twice?" if defined $exit_orig; + do { cluck "exit_hook_start() twice?"; return; } if defined $exit_orig; $exit_orig=\&CORE::GLOBAL::exit; # Prevent: Subroutine CORE::GLOBAL::exit redefined no warnings 'redefine'; @@ -316,7 +439,8 @@ sub exit_hook_stop { do { cluck "exit_hook_stop() without exit_hook_start()?"; return; } if \&exit_hook ne \&CORE::GLOBAL::exit; - cluck "INTERNAL: exit_orig uninitialized" if !$exit_orig; + do { cluck "INTERNAL: exit_orig uninitialized"; return; } + if !$exit_orig; # Prevent: Subroutine CORE::GLOBAL::exit redefined no warnings 'redefine'; *CORE::GLOBAL::exit=$exit_orig; @@ -346,28 +470,24 @@ sub request_check(;$) my($self)=@_; # Use &eval to prevent: Global $r object is not available. Set:\n\tPerlOptions +GlobalRequest\nin ... - # CGI requires valid "r": check it beforehand here. confess "Calling sensitive dynamic code from a static code" if !eval { Apache2::RequestUtil->request(); }; # Do not: confess "Calling sensitive dynamic code without My::Web::init" if !$W->{"__PACKAGE__"}; # as it is valid at least while preparing arguments to call: &project::Lib::init } +# Do not: use CGI; +# as it is too much backward compatible regarding the charset encodings etc. +# and the resulting code is too dense with no additional functionality for the recent content. sub escapeHTML($) { my($text)=@_; - # Prevent &CGI::escapeHTML breaking utf-8 strings like: \xC4\x9B eq \x{11B} - # Prevent case if we run under mod_perl but still just initializing: - request_check() if $ENV{"MOD_PERL"}; - # Generally we are initialized from &init but we may be used without it without mod_perl - # and in such case check the change on all non-first invocations. - our $init; - if (!$ENV{"MOD_PERL"} && $init++) { - do { cluck "charset==$_" if $_ ne "utf-8"; } for CGI::charset(); - } - CGI::charset("utf-8"); - - return CGI::escapeHTML($text); + local $_=$text; + s{&}{&}gso; + s{<}{<}gso; + s{>}{>}gso; + s{"}{"}gso; + return $_; } # /home/user/www/webdir @@ -433,6 +553,7 @@ my($in,%args)=@_; my $uri=in_to_uri_abs($in); if (uri_is_local($uri)) { # Prefer the $uri values over "args_persistent" values. + # &query_form_hash comes from: URI::QueryParam $uri->query_form_hash({ map({ my $key=$_; @@ -795,6 +916,15 @@ my(%args)=@_; return [ map(($args{$_}),@fields) ]; } +# Returns: 'HTTP::Headers' instance. +sub http_headers_in_for($@) +{ +my($self,@headers)=@_; + + # Limit these entries to generate proper 'Vary' header. + return HTTP::Headers->new(map(($_=>$W->{"headers_in"}{$_}),@headers)); +} + # Input: $self is required! # Input: Put the fallback variant as the first one. # Returns: always only scalar! @@ -802,13 +932,6 @@ sub Negotiate_choose($$) { my($self,$variants)=@_; - # Limit these entries to generate proper 'Vary' header. - my %hash=(map(($_=>$W->{"headers_in"}{$_}),qw( - Accept - Accept-Charset - Accept-Encoding - Accept-Language - ))); my $best=HTTP::Negotiate::choose($variants, # Do not: $W->{"r"} # to prevent: Can't locate object method "scan" via package "Apache2::RequestRec" at HTTP/Negotiate.pm line 84. @@ -816,7 +939,12 @@ my($self,$variants)=@_; # to prevent: Can't locate object method "scan" via package "APR::Table" at HTTP/Negotiate.pm line 84. # Do not: HTTP::Headers->new($W->{"r"}->headers_in()); # to prevent empty result or even: Odd number of elements in anonymous hash - HTTP::Headers->new(%hash)); + $self->http_headers_in_for(qw( + Accept + Accept-Charset + Accept-Encoding + Accept-Language + ))); $best||=$variants->[0][0]; # $variants->[0]{"id"}; &HTTP::Negotiate::choose failed? return $best; } @@ -980,8 +1108,9 @@ sub cache_start() # &Wrequire it here even if it will not be later used; to be stable! Wrequire 'My::Hash::RestrictTo'; my %uri_args_hash=( + "method"=>$W->{"r"}->method(), "uri"=>"http://".$W->{"web_hostname"}."/".$W->{"r"}->uri(), - "args"=>$W->{"args_orig_array"}, + "args"=>$W->{"args_orig"}, ); $W->{"uri_args_frozen"}=do { local $Storable::canonical=1; Storable::freeze(\%uri_args_hash); }; last if !(my $headers_in_keys_arrayref=$uri_args_frozen_to_headers_in_keys{$W->{"uri_args_frozen"}}); @@ -1046,6 +1175,9 @@ sub cache_finish() # as we may have just gave 304 and 'exit;' without starting the caching. return if !$W->{"cache_active"}; + # Headers may not be complete in this case; not sure, just trying. + return if $W->{"r"}->connection()->aborted(); + # Fill-in/check: %uri_args_frozen_to_headers_in_keys my $headers_in_keys_stored_arrayref_ref=\$uri_args_frozen_to_headers_in_keys{$W->{"uri_args_frozen"}}; my @headers_in_keys=tied(%{$W->{"headers_in_RecordKeys"}})->accessed(); @@ -1106,7 +1238,11 @@ my($class)=@_; if (!$W->{"header_only"}) { header("Content-Style-Type"=>"text/css"); - header("Content-Script-Type"=>"text/javascript"); + # Do not: text/javascript + # as it does not look as registered, at least according to: MIME::Types $VERSION 1.15 + # "application/javascript" so far standardized till 2005-12-08 by: + # http://www.ietf.org/internet-drafts/draft-hoehrmann-script-types-03.txt + header("Content-Script-Type"=>"application/javascript"); # $W->{"r"}->content_languages() ? do { header("Content-Language"=>$_) if $_; } for $W->{"language"}; } @@ -1122,33 +1258,39 @@ my($class)=@_; # of at least # Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217 # http://validator.w3.org/ does not send ANY "Accept" headers! - $W->{"content_type"}="application/xhtml+xml" if !defined $W->{"content_type"} - && !$W->{"headers_in"}{"Accept"} - && ($W->{"headers_in"}{"User-Agent"}||"")=~m{^W3C_Validator/}i; - defined($W->{"content_type"}) or $W->{"content_type"}=$class->Negotiate_choose([ - # Put the fallback variant as the first one. - # Rate both variants the same to prefer "text/html" for undecided clients. - # At least - # Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217 - # prefers "application/xhtml+xml" over "text/html" itself: - # text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5 - negotiate_variant( - "id"=>"text/html", - "content-type"=>"text/html", - "qs"=>0.6, - (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}), - "lang"=>$W->{"language"}, - ), - negotiate_variant( - "id"=>"application/xhtml+xml", - "content-type"=>"application/xhtml+xml", - "qs"=>0.6, - (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}), - "lang"=>$W->{"language"}, - ), - # application/xml ? - # text/xml ? - ]); + if (!defined $W->{"content_type"}) { + # Be _stable_ for "headers_in". + my $accept=$W->{"headers_in"}{"Accept"}; + my $user_agent=$W->{"headers_in"}{"User-Agent"}||""; + $W->{"content_type"}="application/xhtml+xml" + if !$accept && $user_agent=~m{^W3C_Validator/}i; + # Be _stable_: + my $negotiated=$class->Negotiate_choose([ + # Put the fallback variant as the first one. + # Rate both variants the same to prefer "text/html" for undecided clients. + # At least + # Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217 + # prefers "application/xhtml+xml" over "text/html" itself: + # text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5 + negotiate_variant( + "id"=>"text/html", + "content-type"=>"text/html", + "qs"=>0.6, + (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}), + "lang"=>$W->{"language"}, + ), + negotiate_variant( + "id"=>"application/xhtml+xml", + "content-type"=>"application/xhtml+xml", + "qs"=>0.6, + (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}), + "lang"=>$W->{"language"}, + ), + # application/xml ? + # text/xml ? + ]); + $W->{"content_type"}=$negotiated if !defined $W->{"content_type"}; + } # mod_perl doc: If you set this header via the headers_out table directly, it # will be ignored by Apache. So do not do that. my $type; @@ -1192,8 +1334,12 @@ HERE if ($W->{"css_inherit"}) { # Do not: + HERE } }