Some fortification of misplaced inits/shutdowns.
[MyWeb.git] / Web.pm
diff --git a/Web.pm b/Web.pm
index 6b76695..c5c1ae9 100644 (file)
--- a/Web.pm
+++ b/Web.pm
@@ -68,6 +68,8 @@ BEGIN
                        for my $caller (keys(%callers)) {
                                next if $caller eq $target;
                                next if $packages_used_hash{$caller}{$target}++;
+                               cluck "Appending to the '_done' package list: caller=$caller,target=$target"
+                                               if $packages_used_hash{$caller}{"_done"};
                                push @{$packages_used_array{$caller}},$target;
                                }
                        }
@@ -122,32 +124,13 @@ use Data::Dumper;
 require Encode;
 use Apache2::Filter;
 use Apache2::Connection;
+require MIME::Base64;
+use Apache2::ServerUtil;
+require MIME::Types;
 
 
 #our $W;
 
-sub cleanup($)
-{
-my($apache_request)=@_;
-
-       $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1;
-       cache_finish();
-       # Sanity protection.
-       $W=undef();
-       return OK;
-}
-
-sub request_check(;$)
-{
-my($self)=@_;
-
-       # Use &eval to prevent: Global $r object is not available. Set:\n\tPerlOptions +GlobalRequest\nin ...
-       # CGI requires valid "r": check it beforehand here.
-       confess "Calling sensitive dynamic code from a static code" if !eval { Apache2::RequestUtil->request(); };
-       # Do not: confess "Calling sensitive dynamic code without My::Web::init" if !$W->{"__PACKAGE__"};
-       # as it is valid at least while preparing arguments to call: &project::Lib::init
-}
-
 sub init ($%)
 {
 my($class,%args)=@_;
@@ -160,15 +143,22 @@ my($class,%args)=@_;
        Wuse 'WebConfig';
        Wrequire 'My::Hash';
 
-       $W=My::Hash->new({
+       # $W={} can get somehow created very easily.
+       # Do not: cluck "W not empty:\n".Dumper($W) if keys(%$W);
+       # to prevent (of $W->{"headers_in"}): TODO: Enumeration may not be expected.
+       cluck "W not empty; __PACKAGE__ was: ".$W->{"__PACKAGE__"} if keys(%$W);
+       $W=My::Hash->new({},"My::Hash::Sub","My::Hash::Push");
+       bless $W,$class;
+       %$W=(
                "__PACKAGE__"=>scalar(caller()),
                %WebConfig,
                %args,  # override %WebConfig settings
-               },"My::Hash::Sub","My::Hash::Push");
+               );
 
        # {"__PACKAGE__"} is mandatory for mod_perl-2.0;
        # $Apache2::Registry::curstash is no longer supported.
        do { cluck "No $_" if !$W->{$_}; } for "__PACKAGE__";
+       exit_hook_start();
 
        # See: &escapeHTML
        do { cluck "charset==$_, expecting ISO-8859-1" if $_ ne "ISO-8859-1"; } for CGI::charset();
@@ -241,13 +231,15 @@ my($class,%args)=@_;
                                "_remote_ip"=>sub { return $W->{"r"}->connection()->remote_ip(); },
                                }),
                        );
+       Wrequire 'My::Hash::Readonly';
        $W->{"headers_in"}=My::Hash::Readonly->new($W->{"headers_in"});
        
        if ($W->{"r"}->method() eq "GET" || $W->{"r"}->method() eq "HEAD") {
                for (\$W->{"http_safe"}) {
-                       # Extend the current ETag system instead if you would need it:
-                       cluck "Explicitely NOT HTTP-Safe for method \"".$W->{"r"}->method()."\"?!?"
-                                       if defined($$_) && !$$_;
+                       # Do not: # Extend the current ETag system instead if you would need it:
+                       #         cluck "Explicitely NOT HTTP-Safe for method \"".$W->{"r"}->method()."\"?!?"
+                       #                       if defined($$_) && !$$_;
+                       # as sometimes it just does not make sense to cache it.
                        $$_=1 if !defined $$_;
                        }
                }
@@ -264,20 +256,72 @@ my($class,%args)=@_;
                $W->{"headers_in"}=$W->{"headers_in_RecordKeys"};
                }
 
-       $W->{"browser"}=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"});
+       $W->{"browser"}=sub {
+               # Lazy-evaluation, we may not need the "User-Agent" header at all.
+               return our $r||=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"});
+               };
 
        if (!defined $W->{"have_style"}) {
-               $W->{"have_style"}=(!$W->{"browser"}->netscape() || ($W->{"browser"}->major() && $W->{"browser"}->major()>4) ? 1 : 0);
+               $W->{"have_style"}=sub {
+                       # Lazy-evaluation, we may not need the "User-Agent" header at all.
+                       return our $r||=(!$W->{"browser"}->netscape() || ($W->{"browser"}->major() && $W->{"browser"}->major()>4) ? 1 : 0);
+                       };
                }
 
        $W->{"have_js"}=($W->{"args"}{"have_js"} ? 1 : 0);
        if ($W->{"detect_js"} && !$W->{"have_js"}) {
+               # Do not: <script />
+               # as at least Lynx inhibits any further HTML output.
                $W->{"head"}.='<script type="text/javascript" src="'.uri_escaped(path_web('/My/HaveJS.pm')).'"></script>'."\n";
                }
 
        do { _args_check(%$_) if $_; } for ($W->{"args_check"});
 
-       return bless $W,$class;
+       $W->{"_init_done"}=1;
+       return $W;
+}
+
+sub cleanup($)
+{
+my($apache_request)=@_;
+
+       cluck "CORE::GLOBAL::exit hook not ran" if !$W->{"_exit_done"};
+       cluck "packages not finalized" if !$packages_used_hash{$W->{"__PACKAGE__"}}{"_done"};
+       cache_finish();
+       # Sanity protection.
+       $W=undef();
+       exit_hook_stop();
+       return OK;
+}
+
+# PerlResponseHandler is RUN_FIRST and &ModPerl::Util::exit returns OK, so no (sane) go.
+# PerlLogHandler is already too late to be able to produce any output.
+my $exit_orig;
+sub exit_hook
+{
+       cluck "Missing ->init while in exit_hook()" if !$W->{"_init_done"};
+       # &footer will call us recursively!
+       footer() if !$W->{"_exit_done"}++;
+       return &{$exit_orig}(@_);
+}
+sub exit_hook_start
+{
+       do { cluck "exit_hook_start() twice?"; return; } if defined $exit_orig;
+       $exit_orig=\&CORE::GLOBAL::exit;
+       # Prevent: Subroutine CORE::GLOBAL::exit redefined
+       no warnings 'redefine';
+       *CORE::GLOBAL::exit=\&exit_hook;
+}
+sub exit_hook_stop
+{
+       do { cluck "exit_hook_stop() without exit_hook_start()?"; return; }
+                       if \&exit_hook ne \&CORE::GLOBAL::exit;
+       do { cluck "INTERNAL: exit_orig uninitialized"; return; }
+                       if !$exit_orig;
+       # Prevent: Subroutine CORE::GLOBAL::exit redefined
+       no warnings 'redefine';
+       *CORE::GLOBAL::exit=$exit_orig;
+       $exit_orig=undef();
 }
 
 # Be aware other parts of code (non-My::Web) will NOT use this function!
@@ -293,10 +337,22 @@ my($text,%args)=@_;
        delete $args{"undef"};
        cluck join(" ","Invalid arguments:",keys(%args)) if keys(%args);
        return if !defined $text;
-       cluck "utf-8 untested" if Encode::is_utf8($text);
+       # Do not: cluck "utf-8 untested" if Encode::is_utf8($text);
+       # as it is valid here.
        $W->{"r"}->puts($text);
 }
 
+sub request_check(;$)
+{
+my($self)=@_;
+
+       # Use &eval to prevent: Global $r object is not available. Set:\n\tPerlOptions +GlobalRequest\nin ...
+       # CGI requires valid "r": check it beforehand here.
+       confess "Calling sensitive dynamic code from a static code" if !eval { Apache2::RequestUtil->request(); };
+       # Do not: confess "Calling sensitive dynamic code without My::Web::init" if !$W->{"__PACKAGE__"};
+       # as it is valid at least while preparing arguments to call: &project::Lib::init
+}
+
 sub escapeHTML($)
 {
 my($text)=@_;
@@ -462,21 +518,38 @@ my($msg)=@_;
        Wprint "\n".vskip("3ex")."<hr /><h1 class=\"error\">FATAL ERROR: $msg!</h1>\n"
                        ."<p>You can report this problem's details to"
                        ." ".a_href("mailto:".$W->{"admin_mail"},"admin of this website").".</p>\n";
-       footer();
+       exit;
 }
 
-sub footer (;$)
+sub footer_packages_used_comments()
 {
-       exit 1 if $W->{"footer_passed"}++;      # deadlock prevention:
-
-       Wprint vskip if $W->{"footer_delimit"};
+       my $packages_used=$packages_used_array{$W->{"__PACKAGE__"}};
+       for my $package (@$packages_used) {
+               my $cvs_id=(eval('$'.$package."::CVS_ID")
+#                              || $package     # debug
+                               );
+               Wprint '<!-- '.$package.' - $'.$cvs_id.'$ -->'."\n" if $cvs_id;
+               }
+}
 
-       do { Wprint $_ if $_; } for $W->{"footing_delimit"};
+sub footer()
+{
+       cluck 'Explicit &footer call is deprecated, !_exit_dne' if !$W->{"_exit_done"};
+       exit if $W->{"footer_done"}++;  # deadlock prevention:
+       &{$_}() for reverse @{$W->{"footer_sub_push"}};
+       if ($W->{"header_only"}) {
+               $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1;
+               exit;
+               }
 
+       Wprint vskip if $W->{"footer_delimit"};
+       &{$_}() for reverse @{$W->{"footing_delimit_sub_push"}};
        Wprint "<hr />\n" if $W->{"footer"};
 
-       my $packages_used=$packages_used_array{$W->{"__PACKAGE__"}};
+       # Never update the package list while we examine it!
+       $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1;
 
+       my $packages_used=$packages_used_array{$W->{"__PACKAGE__"}};
        if ($W->{"footer_ids"}) {
                Wprint '<p class="cvs-id">';
                Wprint join("<br />\n",map({ my $package=$_;
@@ -502,11 +575,17 @@ sub footer (;$)
                                                cluck "Class file $file not found; tried: ".join(" ",@tried) if !$ext;
                                                }
                                        $file.=$ext;
+                                       my $viewcvs;
+                                       if ((my $file_cvs=$file)=~s{^My/}{}) {
+                                               $viewcvs=$W->{"viewcvs_My"}.$file_cvs;
+                                               }
+                                       else {
+                                               $viewcvs=$W->{"viewcvs"}.$file;
+                                               }
                                        $cvs_id_split[2]=""
-                                                       .a_href((map({ my $s=$_; $s=~s#/viewcvs/#$&~checkout~/#; $s; } $W->{"viewcvs"}))[0]."$file?rev=".$cvs_id_split[2],
+                                                       .a_href((map({ my $s=$_; $s=~s#/viewcvs/#$&~checkout~/#; $s; } $viewcvs))[0]."?rev=".$cvs_id_split[2],
                                                                        $cvs_id_split[2]);
-                                       $cvs_id_split[1]=a_href($W->{"viewcvs"}.$file,
-                                                       ($package!~/^Apache2::/ ? $package : $cvs_id_split[1]));
+                                       $cvs_id_split[1]=a_href($viewcvs,($package!~/^Apache2::/ ? $package : $cvs_id_split[1]));
                                        $cvs_id_split[5]=&{$W->{"cvs_id_author_sub"}}($cvs_id_split[5]);
                                        }
                                join " ",@cvs_id_split;
@@ -515,17 +594,12 @@ sub footer (;$)
                Wprint "</p>\n";
                }
 
-       for my $package (@$packages_used) {
-               my $cvs_id=(eval('$'.$package."::CVS_ID")
-#                              || $package     # debug
-                               );
-               Wprint '<!-- '.$package.' - $'.$cvs_id.'$ -->'."\n" if $cvs_id;
-               }
+       footer_packages_used_comments();
 
        do { Wprint $_ if $_; } for $W->{"footing"};
 
        Wprint "</body></html>\n";
-       exit 0;
+       exit;
 }
 
 # Existing entries are overwritten.
@@ -642,6 +716,8 @@ my($self,$url,$status)=@_;
        $W->{"r"}->status($status);
        $W->{"r"}->headers_out()->{"Location"}=$url;
        $W->{"header_only"}=1;
+       $W->{"content_type"}=0;
+       $W->{"charset"}=0;
        My::Web->heading();
        exit;
        die "NOTREACHED";
@@ -934,7 +1010,8 @@ sub cache_start()
                        }
                last if OK==$status;
                $W->{"r"}->status($status);
-               exit 0;
+               $W->{"header_only"}=1;  # Inhibit &footer output.
+               exit;
                die "NOTREACHED";
                }
 
@@ -967,7 +1044,7 @@ sub cache_finish_last_modified()
 sub cache_finish()
 {
        # Do not: return if !$W->{"uri_args_frozen"};
-       # as we may have just gave 304 and 'exit 0;' without starting the caching.
+       # as we may have just gave 304 and 'exit;' without starting the caching.
        return if !$W->{"cache_active"};
 
        # Fill-in/check: %uri_args_frozen_to_headers_in_keys
@@ -986,7 +1063,13 @@ sub cache_finish()
 
        # Prepare 'headers_out' for the future reusal:
        my %headers_out;
-       $headers_out{"Content-MD5"}=$W->{"digest-md5"}->b64digest();
+       # Do not: $W->{"digest-md5"}->b64digest();
+       # as it will not provide the trailing filling '='s.
+       # RFC 1864 is not clear if they should be there but its sample provides them.
+       # Do not try to provide canonical "\r\n" form of newlines as is said by RFC 1864.
+       # RFC 2068 (HTTP/1.1) section 14.16 says the newlines should NOT be converted for HTTP.
+       # ',""' to avoid breaking the headers by its default "\n".
+       $headers_out{"Content-MD5"}=MIME::Base64::encode_base64($W->{"digest-md5"}->digest(),"");
        # In fact we could also use MD5 for ETag as if we know ETag we also know MD5.
        # But this way we do not need to calculate MD5 and we still can provide such ETag. So.
        # $W->{"r"}->set_etag() ?
@@ -1000,7 +1083,7 @@ sub cache_finish()
                delete $Vary{$_};
                }
        %Vary=("*"=>1) if $Vary{"*"};
-       $headers_out{"Vary"}=join(", ",sort keys(%Vary));
+       $headers_out{"Vary"}=join(", ",sort keys(%Vary)) if keys(%Vary);
        # $W->{"r"}->set_last_modified() ?
        $headers_out{"Last-Modified"}=cache_finish_last_modified();
 
@@ -1031,19 +1114,19 @@ my($class)=@_;
        # TODO: Support also: private
        header("Cache-Control"=>"public");      # HTTP/1.1
 
-       # $ENV{"CLIENT_CHARSET"} ignored (mod_czech support dropped!)
-       my $client_charset=$W->{"force_charset"} || "us-ascii";
+       # Use $W->{"charset"}=0 to disable charset.
+       $W->{"charset"}="us-ascii"
+                       if !defined $W->{"charset"} && (!defined($W->{"content_type"}) || $W->{"content_type"});
 
        # Workaround bug
        #   https://bugzilla.mozilla.org/show_bug.cgi?id=120556
        # of at least
        #   Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217
-       my $mime;
        # http://validator.w3.org/ does not send ANY "Accept" headers!
-       $mime||="application/xhtml+xml" if 1
+       $W->{"content_type"}="application/xhtml+xml" if !defined $W->{"content_type"}
                        && !$W->{"headers_in"}{"Accept"}
                        && ($W->{"headers_in"}{"User-Agent"}||"")=~m{^W3C_Validator/}i;
-       $mime||=$class->Negotiate_choose([
+       defined($W->{"content_type"}) or $W->{"content_type"}=$class->Negotiate_choose([
                        # Put the fallback variant as the first one.
                        # Rate both variants the same to prefer "text/html" for undecided clients.
                        # At least
@@ -1054,14 +1137,14 @@ my($class)=@_;
                                        "id"=>"text/html",
                                        "content-type"=>"text/html",
                                        "qs"=>0.6,
-                                       "charset"=>$client_charset,
+                                       (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}),
                                        "lang"=>$W->{"language"},
                                        ),
                        negotiate_variant(
                                        "id"=>"application/xhtml+xml",
                                        "content-type"=>"application/xhtml+xml",
                                        "qs"=>0.6,
-                                       "charset"=>$client_charset,
+                                       (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}),
                                        "lang"=>$W->{"language"},
                                        ),
                        # application/xml ?
@@ -1069,16 +1152,28 @@ my($class)=@_;
                        ]);
        # mod_perl doc: If you set this header via the headers_out table directly, it
        #               will be ignored by Apache. So do not do that.
-       $W->{"r"}->content_type("$mime; charset=$client_charset");
+       my $type;
+       if ($W->{"content_type"}) {
+               $type=MIME::Types->new()->type($W->{"content_type"});
+               cluck "MIME::Types type '".$W->{"content_type"}."' not known" if !$type;
+               }
+       cluck "charset='".$W->{"charset"}."' does not match content-type='".$W->{"content_type"}."'"
+                       if ($W->{"charset"} ? 1 : 0) != (!$type ? 0 : $type->isAscii());
+       $W->{"r"}->content_type($W->{"content_type"}.(!$W->{"charset"} ? "" : "; charset=".$W->{"charset"}))
+                       if $W->{"content_type"};
 
        cache_start();
-       return if $W->{"header_only"};
        # We still can append headers before we put out some text.
        # FIXME: It is not clean to still append them without overwriting.
-       return if $W->{"heading_done"}++;
+       return if $W->{"heading_done"};
+       Wprint '<?xml version="1.0" encoding="'.$W->{"charset"}.'"?>'."\n"
+                       if (!$W->{"header_only"} || $W->{"header_only"} eq "xml") && (0
+                                       || $W->{"content_type"}=~m{^application/\w+[+]xml$}
+                                       || $W->{"content_type"} eq "text/vnd.wap.wml");
+       return if $W->{"header_only"};
+       # Split 'heading_done' for the proper handling of: /project/Rel.pm
+       $W->{"heading_done"}++;
 
-       Wprint '<?xml version="1.0" encoding="'.$client_charset.'"?>'."\n" if $mime=~m{^application/\w+[+]xml$};
-       return if $W->{"xml_header_only"};
        Wprint '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'."\n";
        Wprint '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="'.$W->{"language"}.'">'."\n";
        my $title=$W->{"title_prefix"}.join("",map({ ': '.$_; } ($W->{"title"} || ())));
@@ -1090,14 +1185,16 @@ my($class)=@_;
        Wprint "<title>$title</title>\n";
        if ($W->{"have_css"}) {
                # Everything can get overriden later.
-               for my $css ("/My/Web.css",map((!$_ ? () : ("ARRAY" ne ref($_) ? $_ : @$_)),$W->{"css_push"})) {
+               for my $css ("/My/Web.css",@{$W->{"css_push"}}) {
                        Wprint <<"HERE";
 <link rel="stylesheet" type="text/css" href="@{[ uri_escaped(path_web $css) ]}" />
 HERE
                        }
                if ($W->{"css_inherit"}) {
+                       # Do not: <script />
+                       # as at least Lynx inhibits any further HTML output.
                        Wprint <<"HERE";
-<script type="text/javascript" src="@{[ uri_escaped(path_web('/My/css_inherit.js')) ]}" />
+<script type="text/javascript" src="@{[ uri_escaped(path_web('/My/css_inherit.js')) ]}"></script>
 HERE
                        }
                }