Many fixes, incompatible changes, improvements...
[MyWeb.git] / Web.pm
diff --git a/Web.pm b/Web.pm
index 5c2dd07..ad685e6 100644 (file)
--- a/Web.pm
+++ b/Web.pm
@@ -31,7 +31,7 @@ our @EXPORT=qw(
                &Wrequire &Wuse
                &path_web &path_abs_disk
                &uri_escaped
-               &a_href &a_href_cz
+               &a_href &a_href_cc
                &vskip
                &img &centerimg &rightimg
                $W
@@ -68,6 +68,8 @@ BEGIN
                        for my $caller (keys(%callers)) {
                                next if $caller eq $target;
                                next if $packages_used_hash{$caller}{$target}++;
+                               cluck "Appending to the '_done' package list: caller=$caller,target=$target"
+                                               if $packages_used_hash{$caller}{"_done"};
                                push @{$packages_used_array{$caller}},$target;
                                }
                        }
@@ -122,32 +124,13 @@ use Data::Dumper;
 require Encode;
 use Apache2::Filter;
 use Apache2::Connection;
+require MIME::Base64;
+use Apache2::ServerUtil;
+require MIME::Types;
 
 
 #our $W;
 
-sub cleanup($)
-{
-my($apache_request)=@_;
-
-       $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1;
-       cache_finish();
-       # Sanity protection.
-       $W=undef();
-       return OK;
-}
-
-sub request_check(;$)
-{
-my($self)=@_;
-
-       # Use &eval to prevent: Global $r object is not available. Set:\n\tPerlOptions +GlobalRequest\nin ...
-       # CGI requires valid "r": check it beforehand here.
-       confess "Calling sensitive dynamic code from a static code" if !eval { Apache2::RequestUtil->request(); };
-       # Do not: confess "Calling sensitive dynamic code without My::Web::init" if !$W->{"__PACKAGE__"};
-       # as it is valid at least while preparing arguments to call: &project::Lib::init
-}
-
 sub init ($%)
 {
 my($class,%args)=@_;
@@ -160,15 +143,20 @@ my($class,%args)=@_;
        Wuse 'WebConfig';
        Wrequire 'My::Hash';
 
-       $W=My::Hash->new({
+       # $W={} can get somehow created very easily.
+       cluck "W not empty:\n".Dumper($W) if keys(%$W);
+       $W=bless My::Hash->new({}),$class;
+       $W=My::Hash->new($W,"My::Hash::Sub","My::Hash::Push");
+       %$W=(
                "__PACKAGE__"=>scalar(caller()),
                %WebConfig,
                %args,  # override %WebConfig settings
-               },"My::Hash::Sub","My::Hash::Push");
+               );
 
        # {"__PACKAGE__"} is mandatory for mod_perl-2.0;
        # $Apache2::Registry::curstash is no longer supported.
        do { cluck "No $_" if !$W->{$_}; } for "__PACKAGE__";
+       exit_hook_start();
 
        # See: &escapeHTML
        do { cluck "charset==$_, expecting ISO-8859-1" if $_ ne "ISO-8859-1"; } for CGI::charset();
@@ -241,13 +229,15 @@ my($class,%args)=@_;
                                "_remote_ip"=>sub { return $W->{"r"}->connection()->remote_ip(); },
                                }),
                        );
+       Wrequire 'My::Hash::Readonly';
        $W->{"headers_in"}=My::Hash::Readonly->new($W->{"headers_in"});
        
        if ($W->{"r"}->method() eq "GET" || $W->{"r"}->method() eq "HEAD") {
                for (\$W->{"http_safe"}) {
-                       # Extend the current ETag system instead if you would need it:
-                       cluck "Explicitely NOT HTTP-Safe for method \"".$W->{"r"}->method()."\"?!?"
-                                       if defined($$_) && !$$_;
+                       # Do not: # Extend the current ETag system instead if you would need it:
+                       #         cluck "Explicitely NOT HTTP-Safe for method \"".$W->{"r"}->method()."\"?!?"
+                       #                       if defined($$_) && !$$_;
+                       # as sometimes it just does not make sense to cache it.
                        $$_=1 if !defined $$_;
                        }
                }
@@ -264,7 +254,10 @@ my($class,%args)=@_;
                $W->{"headers_in"}=$W->{"headers_in_RecordKeys"};
                }
 
-       $W->{"browser"}=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"});
+       $W->{"browser"}=sub {
+               # Lazy-evaluation, we may not need the "User-Agent" header at all.
+               return our $r||=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"});
+               };
 
        if (!defined $W->{"have_style"}) {
                $W->{"have_style"}=(!$W->{"browser"}->netscape() || ($W->{"browser"}->major() && $W->{"browser"}->major()>4) ? 1 : 0);
@@ -272,12 +265,55 @@ my($class,%args)=@_;
 
        $W->{"have_js"}=($W->{"args"}{"have_js"} ? 1 : 0);
        if ($W->{"detect_js"} && !$W->{"have_js"}) {
-               $W->{"head"}.='<script type="text/javascript" src="'.uri_escaped(path_web('/have_js.pm')).'"></script>'."\n";
+               $W->{"head"}.='<script type="text/javascript" src="'.uri_escaped(path_web('/My/HaveJS.pm')).'"></script>'."\n";
                }
 
        do { _args_check(%$_) if $_; } for ($W->{"args_check"});
 
-       return bless $W,$class;
+       $W->{"_init_done"}=1;
+       return $W;
+}
+
+sub cleanup($)
+{
+my($apache_request)=@_;
+
+       cluck "CORE::GLOBAL::exit hook not ran" if !$W->{"_exit_done"};
+       cluck "packages not finalized" if !$packages_used_hash{$W->{"__PACKAGE__"}}{"_done"};
+       cache_finish();
+       # Sanity protection.
+       $W=undef();
+       exit_hook_stop();
+       return OK;
+}
+
+# PerlResponseHandler is RUN_FIRST and &ModPerl::Util::exit returns OK, so no (sane) go.
+# PerlLogHandler is already too late to be able to produce any output.
+my $exit_orig;
+sub exit_hook
+{
+       cluck "Missing ->init while in exit_hook()" if !$W->{"_init_done"};
+       # &footer will call us recursively!
+       footer() if !$W->{"_exit_done"}++;
+       return &{$exit_orig}(@_);
+}
+sub exit_hook_start
+{
+       cluck "exit_hook_start() twice?" if defined $exit_orig;
+       $exit_orig=\&CORE::GLOBAL::exit;
+       # Prevent: Subroutine CORE::GLOBAL::exit redefined
+       no warnings 'redefine';
+       *CORE::GLOBAL::exit=\&exit_hook;
+}
+sub exit_hook_stop
+{
+       do { cluck "exit_hook_stop() without exit_hook_start()?"; return; }
+                       if \&exit_hook ne \&CORE::GLOBAL::exit;
+       cluck "INTERNAL: exit_orig uninitialized" if !$exit_orig;
+       # Prevent: Subroutine CORE::GLOBAL::exit redefined
+       no warnings 'redefine';
+       *CORE::GLOBAL::exit=$exit_orig;
+       $exit_orig=undef();
 }
 
 # Be aware other parts of code (non-My::Web) will NOT use this function!
@@ -293,10 +329,22 @@ my($text,%args)=@_;
        delete $args{"undef"};
        cluck join(" ","Invalid arguments:",keys(%args)) if keys(%args);
        return if !defined $text;
-       cluck "utf-8 untested" if Encode::is_utf8($text);
+       # Do not: cluck "utf-8 untested" if Encode::is_utf8($text);
+       # as it is valid here.
        $W->{"r"}->puts($text);
 }
 
+sub request_check(;$)
+{
+my($self)=@_;
+
+       # Use &eval to prevent: Global $r object is not available. Set:\n\tPerlOptions +GlobalRequest\nin ...
+       # CGI requires valid "r": check it beforehand here.
+       confess "Calling sensitive dynamic code from a static code" if !eval { Apache2::RequestUtil->request(); };
+       # Do not: confess "Calling sensitive dynamic code without My::Web::init" if !$W->{"__PACKAGE__"};
+       # as it is valid at least while preparing arguments to call: &project::Lib::init
+}
+
 sub escapeHTML($)
 {
 my($text)=@_;
@@ -315,24 +363,6 @@ my($text)=@_;
        return CGI::escapeHTML($text);
 }
 
-# local *FH;
-# tie *FH,ref($W),$W;
-sub TIEHANDLE($)
-{
-my($class,$W)=@_;
-
-       my $self={};
-       $self->{"W"}=$W or confess "Missing W";
-       return bless $self,$class;
-}
-
-sub WRITE
-{
-my($self,$scalar,$length,$offset)=@_;
-
-       Wprint substr($scalar,0,$length);
-}
-
 # /home/user/www/webdir
 sub dir_top_abs_disk()
 {
@@ -480,21 +510,26 @@ my($msg)=@_;
        Wprint "\n".vskip("3ex")."<hr /><h1 class=\"error\">FATAL ERROR: $msg!</h1>\n"
                        ."<p>You can report this problem's details to"
                        ." ".a_href("mailto:".$W->{"admin_mail"},"admin of this website").".</p>\n";
-       footer();
+       exit;
 }
 
-sub footer (;$)
+sub footer()
 {
-       exit 1 if $W->{"footer_passed"}++;      # deadlock prevention:
+       cluck 'Explicit &footer call is deprecated, !_exit_dne' if !$W->{"_exit_done"};
+       exit if $W->{"footer_done"}++;  # deadlock prevention:
+       if ($W->{"header_only"}) {
+               $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1;
+               exit;
+               }
 
        Wprint vskip if $W->{"footer_delimit"};
-
-       do { Wprint $_ if $_; } for $W->{"footing_delimit"};
-
+       &{$_}() for reverse @{$W->{"footing_delimit_sub_push"}};
        Wprint "<hr />\n" if $W->{"footer"};
 
-       my $packages_used=$packages_used_array{$W->{"__PACKAGE__"}};
+       # Never update the package list while we examine it!
+       $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1;
 
+       my $packages_used=$packages_used_array{$W->{"__PACKAGE__"}};
        if ($W->{"footer_ids"}) {
                Wprint '<p class="cvs-id">';
                Wprint join("<br />\n",map({ my $package=$_;
@@ -543,7 +578,7 @@ sub footer (;$)
        do { Wprint $_ if $_; } for $W->{"footing"};
 
        Wprint "</body></html>\n";
-       exit 0;
+       exit;
 }
 
 # Existing entries are overwritten.
@@ -596,7 +631,7 @@ my($uri)=@_;
        return $uri    if defined $W->{"have_ent"} && !$W->{"have_ent"};        # non-ent client
        return $urient if $W->{"have_ent"};     # ent client
        # Unknown client, &escapeHTML should not be needed here:
-       return escapeHTML(path_web('/Redirect.pm?location='.uri_escape($uri->abs(unparsed_uri()))));
+       return escapeHTML(path_web('/My/Redirect.pm?location='.uri_escape($uri->abs(unparsed_uri()))));
 }
 
 our $a_href_inhibited;
@@ -660,6 +695,7 @@ my($self,$url,$status)=@_;
        $W->{"r"}->status($status);
        $W->{"r"}->headers_out()->{"Location"}=$url;
        $W->{"header_only"}=1;
+       $W->{"content_type"}=0;
        My::Web->heading();
        exit;
        die "NOTREACHED";
@@ -672,22 +708,29 @@ sub remote_ip ()
        # As 'Apache2::ForwardedFor' takes the first of $ENV{"HTTP_X_FORWARDED_FOR"}
        # while the contents is '127.0.0.1, 213.220.195.171' if client has its own proxy.
        # We must take the last item ourselves.
-       my $r=$W->{"headers_in"}{"X-Forwarded-For"} || $W->{"headers_in"}{"_remote_ip"};
-       $r=~s/^.*,\s*//;
+       # Be VERY sure you always retrieve all the headers unconditionally to hit: My::Hash::RecordKeys
+       my $x_forwarded_for=$W->{"headers_in"}{"X-Forwarded-For"};
+       $x_forwarded_for=~s/^.*,\s*// if $x_forwarded_for;
+       my $remote_ip=$W->{"headers_in"}{"_remote_ip"};
+       my $r;
+       $r||=$x_forwarded_for;
+       $r||=$remote_ip;
        return $r;
 }
 
-sub is_cz ()
-{
-       return 0 if !$have_Geo_IP;
-       return "CZ" eq Geo::IP->new()->country_code_by_addr(remote_ip());
-}
-
-sub a_href_cz ($$;%)
+# $url={"JP"=>"http://specific",...};
+# $url={""=>"http://default",...};
+sub a_href_cc($$;%)
 {
 my($url,$contents,%args)=@_;
 
-       return a_href $url,$contents,%args if is_cz();
+       # A bit ineffective but we must process all the possibilities to get stable 'headers_in' hits!
+       my %map=map(($_=>a_href($url->{$_},$contents,%args)),keys(%$url));
+       my $cc;
+       $cc||=Geo::IP->new()->country_code_by_addr(remote_ip()) if $have_Geo_IP;
+       $cc||="";
+       my $r=$map{$cc};
+       return $r if $r;
        return $contents;
 }
 
@@ -945,7 +988,8 @@ sub cache_start()
                        }
                last if OK==$status;
                $W->{"r"}->status($status);
-               exit 0;
+               $W->{"header_only"}=1;  # Inhibit &footer output.
+               exit;
                die "NOTREACHED";
                }
 
@@ -978,7 +1022,7 @@ sub cache_finish_last_modified()
 sub cache_finish()
 {
        # Do not: return if !$W->{"uri_args_frozen"};
-       # as we may have just gave 304 and 'exit 0;' without starting the caching.
+       # as we may have just gave 304 and 'exit;' without starting the caching.
        return if !$W->{"cache_active"};
 
        # Fill-in/check: %uri_args_frozen_to_headers_in_keys
@@ -997,7 +1041,13 @@ sub cache_finish()
 
        # Prepare 'headers_out' for the future reusal:
        my %headers_out;
-       $headers_out{"Content-MD5"}=$W->{"digest-md5"}->b64digest();
+       # Do not: $W->{"digest-md5"}->b64digest();
+       # as it will not provide the trailing filling '='s.
+       # RFC 1864 is not clear if they should be there but its sample provides them.
+       # Do not try to provide canonical "\r\n" form of newlines as is said by RFC 1864.
+       # RFC 2068 (HTTP/1.1) section 14.16 says the newlines should NOT be converted for HTTP.
+       # ',""' to avoid breaking the headers by its default "\n".
+       $headers_out{"Content-MD5"}=MIME::Base64::encode_base64($W->{"digest-md5"}->digest(),"");
        # In fact we could also use MD5 for ETag as if we know ETag we also know MD5.
        # But this way we do not need to calculate MD5 and we still can provide such ETag. So.
        # $W->{"r"}->set_etag() ?
@@ -1011,7 +1061,7 @@ sub cache_finish()
                delete $Vary{$_};
                }
        %Vary=("*"=>1) if $Vary{"*"};
-       $headers_out{"Vary"}=join(", ",sort keys(%Vary));
+       $headers_out{"Vary"}=join(", ",sort keys(%Vary)) if keys(%Vary);
        # $W->{"r"}->set_last_modified() ?
        $headers_out{"Last-Modified"}=cache_finish_last_modified();
 
@@ -1042,19 +1092,18 @@ my($class)=@_;
        # TODO: Support also: private
        header("Cache-Control"=>"public");      # HTTP/1.1
 
-       # $ENV{"CLIENT_CHARSET"} ignored (mod_czech support dropped!)
-       my $client_charset=$W->{"force_charset"} || "us-ascii";
+       # Use $W->{"charset"}=0 to disable charset.
+       $W->{"charset"}="us-ascii" if !defined $W->{"charset"} && !defined($W->{"content_type"}) || $W->{"content_type"};
 
        # Workaround bug
        #   https://bugzilla.mozilla.org/show_bug.cgi?id=120556
        # of at least
        #   Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217
-       my $mime;
        # http://validator.w3.org/ does not send ANY "Accept" headers!
-       $mime||="application/xhtml+xml" if 1
+       $W->{"content_type"}="application/xhtml+xml" if !defined $W->{"content_type"}
                        && !$W->{"headers_in"}{"Accept"}
                        && ($W->{"headers_in"}{"User-Agent"}||"")=~m{^W3C_Validator/}i;
-       $mime||=$class->Negotiate_choose([
+       defined($W->{"content_type"}) or $W->{"content_type"}=$class->Negotiate_choose([
                        # Put the fallback variant as the first one.
                        # Rate both variants the same to prefer "text/html" for undecided clients.
                        # At least
@@ -1065,14 +1114,14 @@ my($class)=@_;
                                        "id"=>"text/html",
                                        "content-type"=>"text/html",
                                        "qs"=>0.6,
-                                       "charset"=>$client_charset,
+                                       (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}),
                                        "lang"=>$W->{"language"},
                                        ),
                        negotiate_variant(
                                        "id"=>"application/xhtml+xml",
                                        "content-type"=>"application/xhtml+xml",
                                        "qs"=>0.6,
-                                       "charset"=>$client_charset,
+                                       (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}),
                                        "lang"=>$W->{"language"},
                                        ),
                        # application/xml ?
@@ -1080,16 +1129,26 @@ my($class)=@_;
                        ]);
        # mod_perl doc: If you set this header via the headers_out table directly, it
        #               will be ignored by Apache. So do not do that.
-       $W->{"r"}->content_type("$mime; charset=$client_charset");
+       my $type;
+       if ($W->{"content_type"}) {
+               $type=MIME::Types->new()->type($W->{"content_type"});
+               cluck "MIME::Types type '".$W->{"content_type"}."' not known" if !$type;
+               }
+       cluck "charset='".$W->{"charset"}."' does not match content-type='".$W->{"content_type"}."'"
+                       if ($W->{"charset"} ? 1 : 0) != (!$type ? 0 : $type->isAscii());
+       $W->{"r"}->content_type($W->{"content_type"}.(!$W->{"charset"} ? "" : "; charset=".$W->{"charset"}))
+                       if $W->{"content_type"};
 
        cache_start();
-       return if $W->{"header_only"};
        # We still can append headers before we put out some text.
        # FIXME: It is not clean to still append them without overwriting.
-       return if $W->{"heading_done"}++;
+       return if $W->{"heading_done"};
+       Wprint '<?xml version="1.0" encoding="'.$W->{"charset"}.'"?>'."\n"
+                       if (!$W->{"header_only"} || $W->{"header_only"} eq "xml") && $W->{"content_type"}=~m{^application/\w+[+]xml$};
+       return if $W->{"header_only"};
+       # Split 'heading_done' for the proper handling of: /project/Rel.pm
+       $W->{"heading_done"}++;
 
-       Wprint '<?xml version="1.0" encoding="'.$client_charset.'"?>'."\n" if $mime=~m{^application/\w+[+]xml$};
-       return if $W->{"xml_header_only"};
        Wprint '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'."\n";
        Wprint '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="'.$W->{"language"}.'">'."\n";
        my $title=$W->{"title_prefix"}.join("",map({ ': '.$_; } ($W->{"title"} || ())));
@@ -1101,7 +1160,7 @@ my($class)=@_;
        Wprint "<title>$title</title>\n";
        if ($W->{"have_css"}) {
                # Everything can get overriden later.
-               for my $css ("/My/Web.css",map((!$_ ? () : ("ARRAY" ne ref($_) ? $_ : @$_)),$W->{"css_push"})) {
+               for my $css ("/My/Web.css",@{$W->{"css_push"}}) {
                        Wprint <<"HERE";
 <link rel="stylesheet" type="text/css" href="@{[ uri_escaped(path_web $css) ]}" />
 HERE