&Wrequire &Wuse
&path_web &path_abs_disk
&uri_escaped
- &a_href &a_href_cz
+ &a_href &a_href_cc
&vskip
&img ¢erimg &rightimg
$W
use URI::Escape;
require HTTP::BrowserDetect;
require HTTP::Negotiate;
-my $have_Geo_IP; BEGIN { $have_Geo_IP=eval { require Geo::IP; 1; }; }
+our $have_Geo_IP; BEGIN { $have_Geo_IP=eval { require Geo::IP; 1; }; }
# Do not: use ModPerl::Util qw(exit);
# to prevent in mod_perl2: "exit" is not exported by the ModPerl::Util module
# I do not know why.
use POSIX qw(strftime);
use Tie::Handle;
-use Apache2::Const qw(HTTP_MOVED_TEMPORARILY OK);
+use Apache2::Const qw(HTTP_MOVED_TEMPORARILY OK HTTP_OK);
use URI;
use URI::QueryParam;
use Cwd;
require HTTP::Date;
+require Storable;
+require Digest::MD5;
+require Data::Compare;
+use Data::Dumper;
+require Encode;
+use Apache2::Filter;
+use Apache2::Connection;
#our $W;
- # $W->{"title"}
- # $W->{"head"}
- # $W->{"force_charset"}
- # $W->{"heading_done"}
- # $W->{"footer_passed"}
- # %{$W->{"headers"}}
- # %{$W->{"headers_lc"}} # maps lc($headers_key)=>$headers_key
- # %{$W->{"args"}}
sub cleanup($)
{
my($apache_request)=@_;
$packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1;
+ cache_finish();
# Sanity protection.
$W=undef();
return OK;
do { $W->{"r"}->args(""); delete $ENV{"QUERY_STRING"}; } if $W->{"r"}->method() eq "POST";
# Do not: $W->{"r"}->args()
# as it parses only QUERY_STRING (not POST data).
- $W->{"args"}={ CGI->new($W->{"r"})->Vars() };
+ $W->{"args_orig_array"}=[ CGI->new($W->{"r"})->Vars() ];
+ $W->{"args"}={ @{$W->{"args_orig_array"}} };
for my $name (keys(%{$W->{"args"}})) {
my @vals=split /\x00/,$W->{"args"}{$name};
next if @vals<=1;
$W->{"args"}{$name}=[@vals];
}
- do { $W->{$_}=$W->{"r"}->headers_in()->{"Accept"} if !defined $W->{$_}; } for ("accept");
- do { $W->{$_}=$W->{"r"}->headers_in()->{"User-Agent"}||"" if !defined $W->{$_}; } for ("user_agent");
+ $W->{"headers_in"}=$W->{"r"}->headers_in();
+ Wrequire 'My::Hash::Merge';
+ $W->{"headers_in"}=My::Hash::Merge->new(
+ $W->{"headers_in"},
+ My::Hash::Sub->new({
+ "_remote_ip"=>sub { return $W->{"r"}->connection()->remote_ip(); },
+ }),
+ );
+ $W->{"headers_in"}=My::Hash::Readonly->new($W->{"headers_in"});
+
+ if ($W->{"r"}->method() eq "GET" || $W->{"r"}->method() eq "HEAD") {
+ for (\$W->{"http_safe"}) {
+ # Extend the current ETag system instead if you would need it:
+ cluck "Explicitely NOT HTTP-Safe for method \"".$W->{"r"}->method()."\"?!?"
+ if defined($$_) && !$$_;
+ $$_=1 if !defined $$_;
+ }
+ }
+ else {
+ for (\$W->{"http_safe"}) {
+ cluck "Undefined HTTP-Safe-ty for method \"".$W->{"r"}->method()."\"!"
+ if !defined($$_);
+ $$_=0 if !defined $$_;
+ }
+ }
+ if ($W->{"http_safe"}) {
+ Wrequire 'My::Hash::RecordKeys';
+ $W->{"headers_in_RecordKeys"}=My::Hash::RecordKeys->new($W->{"headers_in"});
+ $W->{"headers_in"}=$W->{"headers_in_RecordKeys"};
+ }
- $W->{"browser"}=HTTP::BrowserDetect->new($W->{"user_agent"});
+ $W->{"browser"}=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"});
if (!defined $W->{"have_style"}) {
$W->{"have_style"}=(!$W->{"browser"}->netscape() || ($W->{"browser"}->major() && $W->{"browser"}->major()>4) ? 1 : 0);
return bless $W,$class;
}
-# Although we have &tie-d *STDOUT we try to not to be dependent on it in My::Web itself.
+# Be aware other parts of code (non-My::Web) will NOT use this function!
# Do not: Wprint $W->{"heading"},"undef"=>1;
# as we would need to undef() it to turn it off and it would get defaulted in such case.
# Do not: exists $W->{"heading"}
cluck "undef Wprint" if !defined $text && !$args{"undef"};
delete $args{"undef"};
cluck join(" ","Invalid arguments:",keys(%args)) if keys(%args);
- $W->{"r"}->puts($text) if defined $text;
+ return if !defined $text;
+ cluck "utf-8 untested" if Encode::is_utf8($text);
+ $W->{"r"}->puts($text);
}
sub escapeHTML($)
return CGI::escapeHTML($text);
}
-# local *FH;
-# tie *FH,ref($W),$W;
-sub TIEHANDLE($)
-{
-my($class,$W)=@_;
-
- my $self={};
- $self->{"W"}=$W or confess "Missing W";
- return bless $self,$class;
-}
-
-sub WRITE
-{
-my($self,$scalar,$length,$offset)=@_;
-
- Wprint substr($scalar,0,$length);
-}
-
# /home/user/www/webdir
sub dir_top_abs_disk()
{
return $uri->rel(unparsed_uri());
}
-my %path_abs_disk_for_package; # $path_abs_disk_for_package{$W->{"__PACKAGE__"}}{$path_abs_disk}=1;
-
sub path_abs_disk_register($)
{
my($path_abs_disk)=@_;
- $path_abs_disk_for_package{$W->{"__PACKAGE__"}}{$path_abs_disk}=1;
+ $W->{"path_abs_disk_register"}{$path_abs_disk}=1;
}
# $args{"uri_as_in"}=1 to permit passing URI objects as: $in
exit 0;
}
-sub header (%)
+# Existing entries are overwritten.
+sub header(%)
{
my(%pairs)=@_;
while (my($key,$val)=each(%pairs)) {
do { cluck "Headers already sent"; next; } if $W->{"heading_done"};
- for ($W->{"headers_lc"}{lc $key} || ()) {
- delete $W->{"headers"}{$_};
- }
- $W->{"headers_lc"}{lc $key}=$key;
- $W->{"headers"}{$key}=$val;
+ $W->{"r"}->headers_out()->set($key,$val);
}
}
# As 'Apache2::ForwardedFor' takes the first of $ENV{"HTTP_X_FORWARDED_FOR"}
# while the contents is '127.0.0.1, 213.220.195.171' if client has its own proxy.
# We must take the last item ourselves.
- my $r=$W->{"r"}->headers_in()->{"X-Forwarded-For"} || $W->{"r"}->get_remote_host();
- $r=~s/^.*,\s*//;
+ # Be VERY sure you always retrieve all the headers unconditionally to hit: My::Hash::RecordKeys
+ my $x_forwarded_for=$W->{"headers_in"}{"X-Forwarded-For"};
+ $x_forwarded_for=~s/^.*,\s*// if $x_forwarded_for;
+ my $remote_ip=$W->{"headers_in"}{"_remote_ip"};
+ my $r;
+ $r||=$x_forwarded_for;
+ $r||=$remote_ip;
return $r;
}
-sub is_cz ()
-{
- return 0 if !$have_Geo_IP;
- return "CZ" eq Geo::IP->new()->country_code_by_addr(remote_ip());
-}
-
-sub a_href_cz ($$;%)
+# $url={"JP"=>"http://specific",...};
+# $url={""=>"http://default",...};
+sub a_href_cc($$;%)
{
my($url,$contents,%args)=@_;
- return a_href $url,$contents,%args if is_cz();
- return $contents;
+ my $cc;
+ $cc||=Geo::IP->new()->country_code_by_addr(remote_ip()) if $have_Geo_IP;
+ $cc||="";
+ $url=$url->{$cc};
+ return $contents if !$url;
+ return a_href $url,$contents,%args;
}
sub make ($)
{
my($self,$variants)=@_;
+ # Limit these entries to generate proper 'Vary' header.
+ my %hash=(map(($_=>$W->{"headers_in"}{$_}),qw(
+ Accept
+ Accept-Charset
+ Accept-Encoding
+ Accept-Language
+ )));
my $best=HTTP::Negotiate::choose($variants,
# Do not: $W->{"r"}
# to prevent: Can't locate object method "scan" via package "Apache2::RequestRec" at HTTP/Negotiate.pm line 84.
# to prevent: Can't locate object method "scan" via package "APR::Table" at HTTP/Negotiate.pm line 84.
# Do not: HTTP::Headers->new($W->{"r"}->headers_in());
# to prevent empty result or even: Odd number of elements in anonymous hash
- HTTP::Headers->new(%{$W->{"r"}->headers_in()}));
+ HTTP::Headers->new(%hash));
$best||=$variants->[0][0]; # $variants->[0]{"id"}; &HTTP::Negotiate::choose failed?
return $best;
}
return $F;
}
-sub no_cache($)
+sub _no_cache($)
{
my($self)=@_;
header("Vary"=>"*"); # content may ba based on unpredictable sources
}
-sub last_modified($)
+sub headers_in_filtered(@)
{
-my($self)=@_;
+my(@keys)=@_;
+
+ return map(($_=>$W->{"headers_in"}{$_}),@keys);
+}
+
+our %uri_args_frozen_to_headers_in_keys;
+our %uri_args_headers_in_frozen_to_headers_out;
+
+sub uri_args_headers_in_frozen_get($)
+{
+my($headers_in_keys_arrayref)=@_;
+
+ my %uri_args_headers_in_hash=(
+ "uri_args_frozen"=>$W->{"uri_args_frozen"},
+ "headers_in"=>{ headers_in_filtered(@$headers_in_keys_arrayref) },
+ );
+ return do { local $Storable::canonical=1; Storable::freeze(\%uri_args_headers_in_hash); };
+}
+
+sub cache_output_filter($)
+{
+my($f)=@_;
+
+ while ($f->read(my $text,0x400)) {
+ cluck "utf-8 untested" if Encode::is_utf8($text); # Possible here at all?
+ $f->print($text);
+ $W->{"digest-md5"}->add($text);
+ }
+ return OK;
+}
- return if !$packages_used_hash{$W->{"__PACKAGE__"}}{"_done"};
- our %path_abs_disk_registered;
- if (!$path_abs_disk_registered{$W->{"__PACKAGE__"}}++) {
- for my $package_orig (@{$packages_used_array{$W->{"__PACKAGE__"}}}) {
- local $_=$package_orig.".pm";
- s{::}{/}g;
- path_abs_disk "/$_","register"=>1;
+sub cache_start()
+{
+ if (!$W->{"http_safe"}) {
+ __PACKAGE__->_no_cache();
+ return;
+ }
+
+ {
+ # &Wrequire it here even if it will not be later used; to be stable!
+ Wrequire 'My::Hash::RestrictTo';
+ my %uri_args_hash=(
+ "uri"=>"http://".$W->{"web_hostname"}."/".$W->{"r"}->uri(),
+ "args"=>$W->{"args_orig_array"},
+ );
+ $W->{"uri_args_frozen"}=do { local $Storable::canonical=1; Storable::freeze(\%uri_args_hash); };
+ last if !(my $headers_in_keys_arrayref=$uri_args_frozen_to_headers_in_keys{$W->{"uri_args_frozen"}});
+
+ # Protection to be sure we are stable:
+ $W->{"headers_in"}=My::Hash::RestrictTo->new($W->{"headers_in"},@$headers_in_keys_arrayref);
+
+ $W->{"uri_args_headers_in_frozen"}=uri_args_headers_in_frozen_get($headers_in_keys_arrayref);
+ last if !(my $headers_out_hashref=$uri_args_headers_in_frozen_to_headers_out{$W->{"uri_args_headers_in_frozen"}});
+ header(%$headers_out_hashref);
+ my $status;
+ {
+ # &meets_conditions will always deny the attempt if !2xx status().
+ # At least ap_read_request() sets: r->status=HTTP_REQUEST_TIME_OUT; /* Until we get a request */
+ my $status_old=$W->{"r"}->status();
+ $W->{"r"}->status(HTTP_OK);
+ # Update httpd's 'r->mtime' as the header "Last-Modified" is just not enough for ap_meets_conditions():
+ # &update_mtime() argument is really in _secs_, not in _msecs_ as the docs claim.
+ # Be aware '*1000000' would overflow Perl integer anyway.
+ # &set_last_modified would also override the "Last-Modified" headers_out!
+ # &mtime may exist but somehow does not work.
+ $W->{"r"}->update_mtime(HTTP::Date::str2time($headers_out_hashref->{"Last-Modified"}));
+ $status=$W->{"r"}->meets_conditions();
+ $W->{"r"}->status($status_old);
}
+ last if OK==$status;
+ $W->{"r"}->status($status);
+ exit 0;
+ die "NOTREACHED";
+ }
+
+ $W->{"digest-md5"}=Digest::MD5->new();
+ $W->{"cache_active"}=1;
+ $W->{"r"}->add_output_filter(\&cache_output_filter);
+}
+
+sub cache_finish_last_modified()
+{
+ cluck "Not yet done now? W __PACKAGE__: ".$W->{"__PACKAGE__"}
+ if !$packages_used_hash{$W->{"__PACKAGE__"}}{"_done"};
+ for my $package_orig (@{$packages_used_array{$W->{"__PACKAGE__"}}}) {
+ local $_=$package_orig.".pm";
+ s{::}{/}g;
+ path_abs_disk "/$_","register"=>1;
}
my $mtime_newest;
- for my $path_abs_disk (keys(%{$path_abs_disk_for_package{$W->{"__PACKAGE__"}}})) {
-###print STDERR "CHECK:$path_abs_disk\n";
+ for my $path_abs_disk (keys(%{$W->{"path_abs_disk_register"}})) {
my $mtime=(stat $path_abs_disk)[9];
do { cluck "No mtime for: $path_abs_disk"; next; } if !$mtime;
$mtime_newest=$mtime if !$mtime_newest || $mtime_newest<$mtime;
}
cluck "No mtime_newest found for the current W __PACKAGE__: ".$W->{"__PACKAGE__"}
if !$mtime_newest;
- # "Vary" header is REQUIRED in this case:
- header("Last-Modified"=>HTTP::Date::time2str($mtime_newest));
- return 1;
+ return HTTP::Date::time2str($mtime_newest);
+}
+
+
+sub cache_finish()
+{
+ # Do not: return if !$W->{"uri_args_frozen"};
+ # as we may have just gave 304 and 'exit 0;' without starting the caching.
+ return if !$W->{"cache_active"};
+
+ # Fill-in/check: %uri_args_frozen_to_headers_in_keys
+ my $headers_in_keys_stored_arrayref_ref=\$uri_args_frozen_to_headers_in_keys{$W->{"uri_args_frozen"}};
+ my @headers_in_keys=tied(%{$W->{"headers_in_RecordKeys"}})->accessed();
+ if (!$$headers_in_keys_stored_arrayref_ref
+ || !Data::Compare::Compare(\@headers_in_keys,$$headers_in_keys_stored_arrayref_ref)) {
+ cluck "Non-matching generated 'headers_in_keys' per 'uri_args_frozen' key:\n"
+ .Dumper(\@headers_in_keys,$$headers_in_keys_stored_arrayref_ref)
+ if $$headers_in_keys_stored_arrayref_ref;
+ # Build or possibly prevent such further warn dupes:
+ $$headers_in_keys_stored_arrayref_ref=\@headers_in_keys;
+ # Build or regenerate as obsoleted now:
+ $W->{"uri_args_headers_in_frozen"}=uri_args_headers_in_frozen_get(\@headers_in_keys);
+ }
+
+ # Prepare 'headers_out' for the future reusal:
+ my %headers_out;
+ $headers_out{"Content-MD5"}=$W->{"digest-md5"}->b64digest();
+ # In fact we could also use MD5 for ETag as if we know ETag we also know MD5.
+ # But this way we do not need to calculate MD5 and we still can provide such ETag. So.
+ # $W->{"r"}->set_etag() ?
+ $headers_out{"ETag"}='"'.Digest::MD5::md5_base64($W->{"uri_args_headers_in_frozen"}).'"';
+ # $W->{"r"}->set_content_length() ?
+ $headers_out{"Content-Length"}=$W->{"r"}->bytes_sent();
+ my %Vary=map(($_=>1),(@headers_in_keys));
+ for (keys(%Vary)) {
+ next if !/^_/;
+ $Vary{"*"}=1;
+ delete $Vary{$_};
+ }
+ %Vary=("*"=>1) if $Vary{"*"};
+ $headers_out{"Vary"}=join(", ",sort keys(%Vary));
+ # $W->{"r"}->set_last_modified() ?
+ $headers_out{"Last-Modified"}=cache_finish_last_modified();
+
+ # Fill-in/check: %uri_args_headers_in_frozen_to_headers_out
+ my $headers_out_stored_hashref_ref=\$uri_args_headers_in_frozen_to_headers_out{$W->{"uri_args_headers_in_frozen"}};
+ if (!$$headers_out_stored_hashref_ref
+ || !Data::Compare::Compare(\%headers_out,$$headers_out_stored_hashref_ref)) {
+ cluck "Non-matching generated 'headers_out' per 'uri_args_headers_in_frozen' key:\n"
+ .Dumper(\%headers_out,$$headers_out_stored_hashref_ref)
+ if $$headers_out_stored_hashref_ref;
+ # Build or possibly prevent such further warn dupes:
+ $$headers_out_stored_hashref_ref=\%headers_out;
+ }
+
+###print STDERR Dumper(\%uri_args_frozen_to_headers_in_keys,\%uri_args_headers_in_frozen_to_headers_out);
}
sub heading()
{
my($class)=@_;
+ if (!$W->{"header_only"}) {
+ header("Content-Style-Type"=>"text/css");
+ header("Content-Script-Type"=>"text/javascript");
+ # $W->{"r"}->content_languages() ?
+ do { header("Content-Language"=>$_) if $_; } for $W->{"language"};
+ }
+ # TODO: Support also: private
+ header("Cache-Control"=>"public"); # HTTP/1.1
+
# $ENV{"CLIENT_CHARSET"} ignored (mod_czech support dropped!)
my $client_charset=$W->{"force_charset"} || "us-ascii";
- header("Content-Style-Type"=>"text/css");
- header("Content-Script-Type"=>"text/javascript");
- do { header("Content-Language"=>$_) if $_; } for $W->{"language"};
- $class->last_modified() if !$W->{"no_cache"};
- $class->no_cache() if $W->{"no_cache"};
-
- while (my($key,$val)=each(%{$W->{"headers"}})) {
- $W->{"r"}->headers_out()->{$key}=$val;
- }
- exit if $W->{"r"}->header_only();
- return if $W->{"header_only"};
- # We still can append headers before we put out some text.
- # FIXME: It is not clean to still append them without overwriting.
- return if $W->{"heading_done"}++;
# Workaround bug
# https://bugzilla.mozilla.org/show_bug.cgi?id=120556
# Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217
my $mime;
# http://validator.w3.org/ does not send ANY "Accept" headers!
- $mime||="application/xhtml+xml" if !$W->{"accept"} && $W->{"user_agent"}=~m{^W3C_Validator/}i;
+ $mime||="application/xhtml+xml" if 1
+ && !$W->{"headers_in"}{"Accept"}
+ && ($W->{"headers_in"}{"User-Agent"}||"")=~m{^W3C_Validator/}i;
$mime||=$class->Negotiate_choose([
# Put the fallback variant as the first one.
# Rate both variants the same to prefer "text/html" for undecided clients.
# application/xml ?
# text/xml ?
]);
+ # mod_perl doc: If you set this header via the headers_out table directly, it
+ # will be ignored by Apache. So do not do that.
$W->{"r"}->content_type("$mime; charset=$client_charset");
+
+ cache_start();
+ return if $W->{"header_only"};
+ # We still can append headers before we put out some text.
+ # FIXME: It is not clean to still append them without overwriting.
+ return if $W->{"heading_done"}++;
+
Wprint '<?xml version="1.0" encoding="'.$client_charset.'"?>'."\n" if $mime=~m{^application/\w+[+]xml$};
return if $W->{"xml_header_only"};
Wprint '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'."\n";