X-Git-Url: http://git.jankratochvil.net/?a=blobdiff_plain;f=Web.pm;h=9aafbab363e40f6f7458855838f59fc76f152a61;hb=5d886f3c5fbabcadbaa646db7f6c636dbebec218;hp=f9d6d62507e56413e29d635847c39f5ade3d33eb;hpb=cd853c0664010679a7015ba21831d56802be8455;p=MyWeb.git diff --git a/Web.pm b/Web.pm index f9d6d62..9aafbab 100644 --- a/Web.pm +++ b/Web.pm @@ -31,7 +31,7 @@ our @EXPORT=qw( &Wrequire &Wuse &path_web &path_abs_disk &uri_escaped - &a_href &a_href_cz + &a_href &a_href_cc &vskip &img ¢erimg &rightimg $W @@ -68,6 +68,8 @@ BEGIN for my $caller (keys(%callers)) { next if $caller eq $target; next if $packages_used_hash{$caller}{$target}++; + cluck "Appending to the '_done' package list: caller=$caller,target=$target" + if $packages_used_hash{$caller}{"_done"}; push @{$packages_used_array{$caller}},$target; } } @@ -121,32 +123,14 @@ require Data::Compare; use Data::Dumper; require Encode; use Apache2::Filter; +use Apache2::Connection; +require MIME::Base64; +use Apache2::ServerUtil; +require MIME::Types; #our $W; -sub cleanup($) -{ -my($apache_request)=@_; - - $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1; - cache_finish(); - # Sanity protection. - $W=undef(); - return OK; -} - -sub request_check(;$) -{ -my($self)=@_; - - # Use &eval to prevent: Global $r object is not available. Set:\n\tPerlOptions +GlobalRequest\nin ... - # CGI requires valid "r": check it beforehand here. - confess "Calling sensitive dynamic code from a static code" if !eval { Apache2::RequestUtil->request(); }; - # Do not: confess "Calling sensitive dynamic code without My::Web::init" if !$W->{"__PACKAGE__"}; - # as it is valid at least while preparing arguments to call: &project::Lib::init -} - sub init ($%) { my($class,%args)=@_; @@ -159,15 +143,20 @@ my($class,%args)=@_; Wuse 'WebConfig'; Wrequire 'My::Hash'; - $W=My::Hash->new({ + # $W={} can get somehow created very easily. + cluck "W not empty:\n".Dumper($W) if keys(%$W); + $W=My::Hash->new({},"My::Hash::Sub","My::Hash::Push"); + bless $W,$class; + %$W=( "__PACKAGE__"=>scalar(caller()), %WebConfig, %args, # override %WebConfig settings - },"My::Hash::Sub","My::Hash::Push"); + ); # {"__PACKAGE__"} is mandatory for mod_perl-2.0; # $Apache2::Registry::curstash is no longer supported. do { cluck "No $_" if !$W->{$_}; } for "__PACKAGE__"; + exit_hook_start(); # See: &escapeHTML do { cluck "charset==$_, expecting ISO-8859-1" if $_ ne "ISO-8859-1"; } for CGI::charset(); @@ -237,16 +226,18 @@ my($class,%args)=@_; $W->{"headers_in"}=My::Hash::Merge->new( $W->{"headers_in"}, My::Hash::Sub->new({ - "_get_remote_host"=>sub { return $W->{"r"}->get_remote_host(); }, + "_remote_ip"=>sub { return $W->{"r"}->connection()->remote_ip(); }, }), ); + Wrequire 'My::Hash::Readonly'; $W->{"headers_in"}=My::Hash::Readonly->new($W->{"headers_in"}); if ($W->{"r"}->method() eq "GET" || $W->{"r"}->method() eq "HEAD") { for (\$W->{"http_safe"}) { - # Extend the current ETag system instead if you would need it: - cluck "Explicitely NOT HTTP-Safe for method \"".$W->{"r"}->method()."\"?!?" - if defined($$_) && !$$_; + # Do not: # Extend the current ETag system instead if you would need it: + # cluck "Explicitely NOT HTTP-Safe for method \"".$W->{"r"}->method()."\"?!?" + # if defined($$_) && !$$_; + # as sometimes it just does not make sense to cache it. $$_=1 if !defined $$_; } } @@ -263,20 +254,69 @@ my($class,%args)=@_; $W->{"headers_in"}=$W->{"headers_in_RecordKeys"}; } - $W->{"browser"}=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"}); + $W->{"browser"}=sub { + # Lazy-evaluation, we may not need the "User-Agent" header at all. + return our $r||=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"}); + }; if (!defined $W->{"have_style"}) { - $W->{"have_style"}=(!$W->{"browser"}->netscape() || ($W->{"browser"}->major() && $W->{"browser"}->major()>4) ? 1 : 0); + $W->{"have_style"}=sub { + # Lazy-evaluation, we may not need the "User-Agent" header at all. + return our $r||=(!$W->{"browser"}->netscape() || ($W->{"browser"}->major() && $W->{"browser"}->major()>4) ? 1 : 0); + }; } $W->{"have_js"}=($W->{"args"}{"have_js"} ? 1 : 0); if ($W->{"detect_js"} && !$W->{"have_js"}) { - $W->{"head"}.=''."\n"; + $W->{"head"}.=''."\n"; } do { _args_check(%$_) if $_; } for ($W->{"args_check"}); - return bless $W,$class; + $W->{"_init_done"}=1; + return $W; +} + +sub cleanup($) +{ +my($apache_request)=@_; + + cluck "CORE::GLOBAL::exit hook not ran" if !$W->{"_exit_done"}; + cluck "packages not finalized" if !$packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}; + cache_finish(); + # Sanity protection. + $W=undef(); + exit_hook_stop(); + return OK; +} + +# PerlResponseHandler is RUN_FIRST and &ModPerl::Util::exit returns OK, so no (sane) go. +# PerlLogHandler is already too late to be able to produce any output. +my $exit_orig; +sub exit_hook +{ + cluck "Missing ->init while in exit_hook()" if !$W->{"_init_done"}; + # &footer will call us recursively! + footer() if !$W->{"_exit_done"}++; + return &{$exit_orig}(@_); +} +sub exit_hook_start +{ + cluck "exit_hook_start() twice?" if defined $exit_orig; + $exit_orig=\&CORE::GLOBAL::exit; + # Prevent: Subroutine CORE::GLOBAL::exit redefined + no warnings 'redefine'; + *CORE::GLOBAL::exit=\&exit_hook; +} +sub exit_hook_stop +{ + do { cluck "exit_hook_stop() without exit_hook_start()?"; return; } + if \&exit_hook ne \&CORE::GLOBAL::exit; + cluck "INTERNAL: exit_orig uninitialized" if !$exit_orig; + # Prevent: Subroutine CORE::GLOBAL::exit redefined + no warnings 'redefine'; + *CORE::GLOBAL::exit=$exit_orig; + $exit_orig=undef(); } # Be aware other parts of code (non-My::Web) will NOT use this function! @@ -292,10 +332,22 @@ my($text,%args)=@_; delete $args{"undef"}; cluck join(" ","Invalid arguments:",keys(%args)) if keys(%args); return if !defined $text; - cluck "utf-8 untested" if Encode::is_utf8($text); + # Do not: cluck "utf-8 untested" if Encode::is_utf8($text); + # as it is valid here. $W->{"r"}->puts($text); } +sub request_check(;$) +{ +my($self)=@_; + + # Use &eval to prevent: Global $r object is not available. Set:\n\tPerlOptions +GlobalRequest\nin ... + # CGI requires valid "r": check it beforehand here. + confess "Calling sensitive dynamic code from a static code" if !eval { Apache2::RequestUtil->request(); }; + # Do not: confess "Calling sensitive dynamic code without My::Web::init" if !$W->{"__PACKAGE__"}; + # as it is valid at least while preparing arguments to call: &project::Lib::init +} + sub escapeHTML($) { my($text)=@_; @@ -314,24 +366,6 @@ my($text)=@_; return CGI::escapeHTML($text); } -# local *FH; -# tie *FH,ref($W),$W; -sub TIEHANDLE($) -{ -my($class,$W)=@_; - - my $self={}; - $self->{"W"}=$W or confess "Missing W"; - return bless $self,$class; -} - -sub WRITE -{ -my($self,$scalar,$length,$offset)=@_; - - Wprint substr($scalar,0,$length); -} - # /home/user/www/webdir sub dir_top_abs_disk() { @@ -479,21 +513,37 @@ my($msg)=@_; Wprint "\n".vskip("3ex")."

FATAL ERROR: $msg!

\n" ."

You can report this problem's details to" ." ".a_href("mailto:".$W->{"admin_mail"},"admin of this website").".

\n"; - footer(); + exit; } -sub footer (;$) +sub footer_packages_used_comments() { - exit 1 if $W->{"footer_passed"}++; # deadlock prevention: - - Wprint vskip if $W->{"footer_delimit"}; + my $packages_used=$packages_used_array{$W->{"__PACKAGE__"}}; + for my $package (@$packages_used) { + my $cvs_id=(eval('$'.$package."::CVS_ID") +# || $package # debug + ); + Wprint ''."\n" if $cvs_id; + } +} - do { Wprint $_ if $_; } for $W->{"footing_delimit"}; +sub footer() +{ + cluck 'Explicit &footer call is deprecated, !_exit_dne' if !$W->{"_exit_done"}; + exit if $W->{"footer_done"}++; # deadlock prevention: + if ($W->{"header_only"}) { + $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1; + exit; + } + Wprint vskip if $W->{"footer_delimit"}; + &{$_}() for reverse @{$W->{"footing_delimit_sub_push"}}; Wprint "
\n" if $W->{"footer"}; - my $packages_used=$packages_used_array{$W->{"__PACKAGE__"}}; + # Never update the package list while we examine it! + $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1; + my $packages_used=$packages_used_array{$W->{"__PACKAGE__"}}; if ($W->{"footer_ids"}) { Wprint '

'; Wprint join("
\n",map({ my $package=$_; @@ -532,17 +582,12 @@ sub footer (;$) Wprint "

\n"; } - for my $package (@$packages_used) { - my $cvs_id=(eval('$'.$package."::CVS_ID") -# || $package # debug - ); - Wprint ''."\n" if $cvs_id; - } + packages_used_comments(); do { Wprint $_ if $_; } for $W->{"footing"}; Wprint "\n"; - exit 0; + exit; } # Existing entries are overwritten. @@ -595,7 +640,7 @@ my($uri)=@_; return $uri if defined $W->{"have_ent"} && !$W->{"have_ent"}; # non-ent client return $urient if $W->{"have_ent"}; # ent client # Unknown client, &escapeHTML should not be needed here: - return escapeHTML(path_web('/Redirect.pm?location='.uri_escape($uri->abs(unparsed_uri())))); + return escapeHTML(path_web('/My/Redirect.pm?location='.uri_escape($uri->abs(unparsed_uri())))); } our $a_href_inhibited; @@ -659,6 +704,7 @@ my($self,$url,$status)=@_; $W->{"r"}->status($status); $W->{"r"}->headers_out()->{"Location"}=$url; $W->{"header_only"}=1; + $W->{"content_type"}=0; My::Web->heading(); exit; die "NOTREACHED"; @@ -671,22 +717,29 @@ sub remote_ip () # As 'Apache2::ForwardedFor' takes the first of $ENV{"HTTP_X_FORWARDED_FOR"} # while the contents is '127.0.0.1, 213.220.195.171' if client has its own proxy. # We must take the last item ourselves. - my $r=$W->{"headers_in"}{"X-Forwarded-For"} || $W->{"headers_in"}{"_get_remote_host"}; - $r=~s/^.*,\s*//; + # Be VERY sure you always retrieve all the headers unconditionally to hit: My::Hash::RecordKeys + my $x_forwarded_for=$W->{"headers_in"}{"X-Forwarded-For"}; + $x_forwarded_for=~s/^.*,\s*// if $x_forwarded_for; + my $remote_ip=$W->{"headers_in"}{"_remote_ip"}; + my $r; + $r||=$x_forwarded_for; + $r||=$remote_ip; return $r; } -sub is_cz () -{ - return 0 if !$have_Geo_IP; - return "CZ" eq Geo::IP->new()->country_code_by_addr(remote_ip()); -} - -sub a_href_cz ($$;%) +# $url={"JP"=>"http://specific",...}; +# $url={""=>"http://default",...}; +sub a_href_cc($$;%) { my($url,$contents,%args)=@_; - return a_href $url,$contents,%args if is_cz(); + # A bit ineffective but we must process all the possibilities to get stable 'headers_in' hits! + my %map=map(($_=>a_href($url->{$_},$contents,%args)),keys(%$url)); + my $cc; + $cc||=Geo::IP->new()->country_code_by_addr(remote_ip()) if $have_Geo_IP; + $cc||=""; + my $r=$map{$cc}; + return $r if $r; return $contents; } @@ -944,7 +997,8 @@ sub cache_start() } last if OK==$status; $W->{"r"}->status($status); - exit 0; + $W->{"header_only"}=1; # Inhibit &footer output. + exit; die "NOTREACHED"; } @@ -977,7 +1031,7 @@ sub cache_finish_last_modified() sub cache_finish() { # Do not: return if !$W->{"uri_args_frozen"}; - # as we may have just gave 304 and 'exit 0;' without starting the caching. + # as we may have just gave 304 and 'exit;' without starting the caching. return if !$W->{"cache_active"}; # Fill-in/check: %uri_args_frozen_to_headers_in_keys @@ -996,7 +1050,13 @@ sub cache_finish() # Prepare 'headers_out' for the future reusal: my %headers_out; - $headers_out{"Content-MD5"}=$W->{"digest-md5"}->b64digest(); + # Do not: $W->{"digest-md5"}->b64digest(); + # as it will not provide the trailing filling '='s. + # RFC 1864 is not clear if they should be there but its sample provides them. + # Do not try to provide canonical "\r\n" form of newlines as is said by RFC 1864. + # RFC 2068 (HTTP/1.1) section 14.16 says the newlines should NOT be converted for HTTP. + # ',""' to avoid breaking the headers by its default "\n". + $headers_out{"Content-MD5"}=MIME::Base64::encode_base64($W->{"digest-md5"}->digest(),""); # In fact we could also use MD5 for ETag as if we know ETag we also know MD5. # But this way we do not need to calculate MD5 and we still can provide such ETag. So. # $W->{"r"}->set_etag() ? @@ -1010,7 +1070,7 @@ sub cache_finish() delete $Vary{$_}; } %Vary=("*"=>1) if $Vary{"*"}; - $headers_out{"Vary"}=join(", ",sort keys(%Vary)); + $headers_out{"Vary"}=join(", ",sort keys(%Vary)) if keys(%Vary); # $W->{"r"}->set_last_modified() ? $headers_out{"Last-Modified"}=cache_finish_last_modified(); @@ -1041,19 +1101,18 @@ my($class)=@_; # TODO: Support also: private header("Cache-Control"=>"public"); # HTTP/1.1 - # $ENV{"CLIENT_CHARSET"} ignored (mod_czech support dropped!) - my $client_charset=$W->{"force_charset"} || "us-ascii"; + # Use $W->{"charset"}=0 to disable charset. + $W->{"charset"}="us-ascii" if !defined $W->{"charset"} && !defined($W->{"content_type"}) || $W->{"content_type"}; # Workaround bug # https://bugzilla.mozilla.org/show_bug.cgi?id=120556 # of at least # Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217 - my $mime; # http://validator.w3.org/ does not send ANY "Accept" headers! - $mime||="application/xhtml+xml" if 1 + $W->{"content_type"}="application/xhtml+xml" if !defined $W->{"content_type"} && !$W->{"headers_in"}{"Accept"} && ($W->{"headers_in"}{"User-Agent"}||"")=~m{^W3C_Validator/}i; - $mime||=$class->Negotiate_choose([ + defined($W->{"content_type"}) or $W->{"content_type"}=$class->Negotiate_choose([ # Put the fallback variant as the first one. # Rate both variants the same to prefer "text/html" for undecided clients. # At least @@ -1064,14 +1123,14 @@ my($class)=@_; "id"=>"text/html", "content-type"=>"text/html", "qs"=>0.6, - "charset"=>$client_charset, + (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}), "lang"=>$W->{"language"}, ), negotiate_variant( "id"=>"application/xhtml+xml", "content-type"=>"application/xhtml+xml", "qs"=>0.6, - "charset"=>$client_charset, + (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}), "lang"=>$W->{"language"}, ), # application/xml ? @@ -1079,16 +1138,28 @@ my($class)=@_; ]); # mod_perl doc: If you set this header via the headers_out table directly, it # will be ignored by Apache. So do not do that. - $W->{"r"}->content_type("$mime; charset=$client_charset"); + my $type; + if ($W->{"content_type"}) { + $type=MIME::Types->new()->type($W->{"content_type"}); + cluck "MIME::Types type '".$W->{"content_type"}."' not known" if !$type; + } + cluck "charset='".$W->{"charset"}."' does not match content-type='".$W->{"content_type"}."'" + if ($W->{"charset"} ? 1 : 0) != (!$type ? 0 : $type->isAscii()); + $W->{"r"}->content_type($W->{"content_type"}.(!$W->{"charset"} ? "" : "; charset=".$W->{"charset"})) + if $W->{"content_type"}; cache_start(); - return if $W->{"header_only"}; # We still can append headers before we put out some text. # FIXME: It is not clean to still append them without overwriting. - return if $W->{"heading_done"}++; + return if $W->{"heading_done"}; + Wprint '{"charset"}.'"?>'."\n" + if (!$W->{"header_only"} || $W->{"header_only"} eq "xml") && (0 + || $W->{"content_type"}=~m{^application/\w+[+]xml$} + || $W->{"content_type"} eq "text/vnd.wap.wml"); + return if $W->{"header_only"}; + # Split 'heading_done' for the proper handling of: /project/Rel.pm + $W->{"heading_done"}++; - Wprint ''."\n" if $mime=~m{^application/\w+[+]xml$}; - return if $W->{"xml_header_only"}; Wprint ''."\n"; Wprint ''."\n"; my $title=$W->{"title_prefix"}.join("",map({ ': '.$_; } ($W->{"title"} || ()))); @@ -1100,7 +1171,7 @@ my($class)=@_; Wprint "$title\n"; if ($W->{"have_css"}) { # Everything can get overriden later. - for my $css ("/My/Web.css",map((!$_ ? () : ("ARRAY" ne ref($_) ? $_ : @$_)),$W->{"css_push"})) { + for my $css ("/My/Web.css",@{$W->{"css_push"}}) { Wprint <<"HERE"; HERE