# $Id$ # Common functions for HTML/XHTML output generation # Copyright (C) 2003-2005 Jan Kratochvil # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; exactly version 2 of June 1991 is required # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package My::Web; require 5.6.0; # at least 'use warnings;' but we need some 5.6.0+ modules anyway our $VERSION=do { my @r=(q$Revision$=~/\d+/g); sprintf "%d.".("%03d"x$#r),@r; }; our $CVS_ID=q$Id$; use strict; use warnings; use Exporter; sub Wrequire($%); sub Wuse($@); our $W; our @EXPORT=qw( &Wrequire &Wuse &path_web &path_abs_disk &uri_escaped &a_href &a_href_cc &text_cc &vskip &img ¢erimg &rightimg &leftimg $W &input_hidden_persistents &escapeHTML &form_method ); our @ISA=qw(Tie::Handle Exporter); my %packages_used_hash; # $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1; my %packages_used_array; BEGIN { use Carp qw(cluck confess); $W->{"__My::Web_init"}=1; # $args{"first"}=1 sub Wrequire ($%) { my($file,%args)=@_; # print STDERR "Wrequire $file\n"; $file=~s#/#::#g; $file=~s/[.]pm$//; my $class=$file; $file=~s#::#/#g; $file.=".pm"; my %callers; for (my $depth=0;defined caller($depth);$depth++) { $callers{caller($depth)}=1; } my $selfpkg=__PACKAGE__; $callers{$selfpkg}=1; for my $target ($class,__PACKAGE__) { for my $caller (keys(%callers)) { next if $packages_used_hash{$caller}{$target}++; cluck "Appending to the '_done' package list: caller=$caller,target=$target" if $packages_used_hash{$caller}{"_done"}; if ($args{"first"}) { unshift @{$packages_used_array{$caller}},$target; } else { push @{$packages_used_array{$caller}},$target; } } } eval { CORE::require "$file"; } or confess $@; 1; # Otherwise 'require' would already file above. } sub Wuse ($@) { my($file,@list)=@_; # print STDERR "Wuse $file\n"; Wrequire $file; local $Exporter::ExportLevel=$Exporter::ExportLevel+1; $file->import(@list); 1; } sub import { my($class,@rest)=@_; local $Exporter::ExportLevel=$Exporter::ExportLevel+1; Wrequire("$class"); return $class->SUPER::import(@rest); } } use WebConfig; # see also below: Wuse 'WebConfig'; require Image::Size; # for &imgsize use File::Basename; # &basename use Carp qw(cluck confess); use URI::Escape; require HTTP::BrowserDetect; require HTTP::Negotiate; our $have_Geo_IP; BEGIN { $have_Geo_IP=eval { require Geo::IP; 1; }; } # Do not: use ModPerl::Util qw(exit); # to prevent in mod_perl2: "exit" is not exported by the ModPerl::Util module # I do not know why. use POSIX qw(strftime); use Tie::Handle; use Apache2::Const qw(HTTP_MOVED_TEMPORARILY OK HTTP_OK); use URI; use URI::QueryParam; use Cwd; require HTTP::Date; require Storable; require Digest::MD5; require Data::Compare; use Data::Dumper; require Encode; use Apache2::RequestUtil; use Apache2::Filter; use Apache2::Connection; require MIME::Base64; use Apache2::ServerUtil; require MIME::Types; require MIME::Parser; use Apache2::RequestRec; use Apache2::RequestIO; use Apache2::Response; #our $W; sub init ($%) { my($class,%args)=@_; print STDERR "$class->init ".Apache2::RequestUtil->request()->unparsed_uri()."\n"; # We need to track package dependencies, so we need to call it from &init. # We cannot do it in BEGIN { } block # as it would not be tracked for each of the toplevel users later. Wuse 'WebConfig'; Wrequire 'My::Hash'; # $W={} can get somehow created very easily. # Do not: cluck "W not empty:\n".Dumper($W) if keys(%$W); # to prevent (of $W->{"headers_in"}): TODO: Enumeration may not be expected. cluck "W not empty; __PACKAGE__ was: ".$W->{"__PACKAGE__"} if keys(%$W); $W=My::Hash->new({},"My::Hash::Sub","My::Hash::Push"); bless $W,$class; %$W=( "__PACKAGE__"=>scalar(caller()), %WebConfig, %args, # override %WebConfig settings ); # {"__PACKAGE__"} is mandatory for mod_perl-2.0; # $Apache2::Registry::curstash is no longer supported. do { cluck "No $_" if !$W->{$_}; } for "__PACKAGE__"; exit_hook_start(); # Package dependencies tracking only: Wrequire $W->{"__PACKAGE__"},"first"=>1; do { $W->{$_}=0 if !defined $W->{$_}; } for "detect_ent"; do { $W->{$_}=0 if !defined $W->{$_}; } for "detect_js"; do { $W->{$_}=1 if !defined $W->{$_}; } for "have_css"; # AFAIK it does not hurt anyone. do { $W->{$_}=0 if !defined $W->{$_}; } for "css_inherit"; do { $W->{$_}=1 if !defined $W->{$_}; } for "footer"; do { $W->{$_}=1 if !defined $W->{$_}; } for "footer_delimit"; do { $W->{$_}=1 if !defined $W->{$_}; } for "footer_ids"; do { $W->{$_}=1 if !defined $W->{$_}; } for "indexme"; do { $W->{$_}="" if !defined $W->{$_}; } for "body_attr"; do { $W->{$_}="en-US" if !defined $W->{$_}; } for "language"; my $footer_any=0; for (qw(footer_ids)) { $W->{$_}=0 if !$W->{"footer"}; $footer_any=1 if $W->{$_}; } $W->{"footer"}=0 if !$footer_any; $W->{"footer_delimit"}=0 if !$W->{"footer"}; $W->{"r"}=Apache2::RequestUtil->request(); $W->{"r"}->push_handlers("PerlCleanupHandler"=>\&cleanup); $W->{"web_hostname"}||=$W->{"r"}->hostname(); tie *STDOUT,$W->{"r"}; select *STDOUT; $|=1; $W->{"headers_in"}=$W->{"r"}->headers_in(); Wrequire 'My::Hash::Merge'; $W->{"headers_in"}=My::Hash::Merge->new( $W->{"headers_in"}, My::Hash::Sub->new({ "_remote_ip"=>sub { return $W->{"r"}->connection()->remote_ip(); }, }), ); Wrequire 'My::Hash::Readonly'; $W->{"headers_in"}=My::Hash::Readonly->new($W->{"headers_in"}); if ($W->{"r"}->method() eq "GET" || $W->{"r"}->method() eq "HEAD") { for (\$W->{"http_safe"}) { # Do not: # Extend the current ETag system instead if you would need it: # cluck "Explicitely NOT HTTP-Safe for method \"".$W->{"r"}->method()."\"?!?" # if defined($$_) && !$$_; # as sometimes it just does not make sense to cache it. $$_=1 if !defined $$_; } } else { for (\$W->{"http_safe"}) { cluck "Undefined HTTP-Safe-ty for method \"".$W->{"r"}->method()."\"!" if !defined($$_); $$_=0 if !defined $$_; } } # Used only if: $W->{"http_safe"} # but we would cause on different method(): Appending to the '_done' package list Wrequire 'My::Hash::RecordKeys'; if ($W->{"http_safe"}) { $W->{"headers_in_RecordKeys"}=My::Hash::RecordKeys->new($W->{"headers_in"}); $W->{"headers_in"}=$W->{"headers_in_RecordKeys"}; } { local $_=$W->{"r"}->args() || ""; if ($W->{"detect_ent"}) { if (/[&]amp;have_ent/) { $W->{"have_ent"}=0; } elsif ( /[&]have_ent/) { $W->{"have_ent"}=1; } else { delete $W->{"have_ent"}; } if (!defined $W->{"have_ent"} && $W->{"r"}->method() eq "GET") { $W->{"head_push"}='{"web_hostname"}."/".($W->{"r"}->uri()=~m#^/*(.*)$#)[0] ."?".($_ || "detect_ent_glue=1").'&have_ent=detect') .'" />'."\n"; } } s/([&])amp;/$1/g; $W->{"r"}->args($_); } $W->{"args"}=URI->new("?".$W->{"r"}->args())->query_form_hash(); $W->merge_post_args() if $W->{"r"}->method() eq "POST"; # Prepare '$args' first to (FIXME: Why?) prevent: Not a reference my $args=$W->{"args"}; $W->{"args_orig"}=Storable::dclone($args); $W->{"browser"}=sub { # Lazy-evaluation, we may not need the "User-Agent" header at all. return our $r||=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"}); }; if (!defined $W->{"have_style"}) { $W->{"have_style"}=sub { # Lazy-evaluation, we may not need the "User-Agent" header at all. return our $r||=(!$W->{"browser"}->netscape() || ($W->{"browser"}->major() && $W->{"browser"}->major()>4) ? 1 : 0); }; } $W->{"have_js"}=($W->{"args"}{"have_js"} ? 1 : 0); if ($W->{"detect_js"} && !$W->{"have_js"}) { # Do not: '."\n"; } # Required by &_args_check below. $W->{"_init_done"}=1; do { _args_check(%$_) if $_; } for $W->{"args_check"}; return $W; } sub form_method($) { my($method)=@_; my $r=q{method="}.$method.q{"}; return $r." ".q{enctype="application/x-www-form-urlencoded" accept-charset="us-ascii utf-8"} if $method eq "post"; return $r." ". q{accept-charset="us-ascii utf-8"} if $method eq "get"; cluck "Undefined method: $method"; return $r; } sub merge_post_args($) { my($class)=@_; my @post_args=$class->read_post_args(); while (@post_args) { my $name=shift @post_args; my $data=shift @post_args; my $ref=\$W->{"args"}{$name}; if (!defined $$ref) { $$ref=$data; } elsif (!ref $$ref) { $$ref=[$$ref,$data]; } elsif ("ARRAY" eq ref $$ref) { push @$$ref,$data; } else { cluck "Ignoring POST argument \"$name\", orig is weird:\n",Dumper($$ref); } } return; } # Do not: use CGI; # as CGI parsing of POST vs. QUERY_STRING data, multiple-valued keys etc. # is too dense and causes weird problems, together with mod_perl etc. sub read_post_args($) { my($class)=@_; local $_=$class->http_headers_in_for("Content-type")->content_type(); return $class->read_multipart_form_data() if $_ eq "multipart/form-data"; return $class->read_application_x_www_form_urlencoded() if $_ eq "application/x-www-form-urlencoded"; cluck "Unknown POST data body, ignored: $_"; return; } sub read_application_x_www_form_urlencoded($) { my($class)=@_; my $body=""; for (;;) { my $got=$W->{"r"}->read(my($buf),0x1000); # Do not: cluck "Error reading POST data: $!" if !defined $got; # as it should be done using: APR::Error exceptions last if !$got; $body.=$buf; } return URI->new("?".$body)->query_form(); } sub read_multipart_form_data($) { my($class)=@_; my $parser=MIME::Parser->new(); # FIXME: No unlink()s done! $parser->output_under("/tmp"); local *R_FH; tie *R_FH,$W->{"r"}; local *FH; tie *FH,"My::Web::ReadMerged", join("",map(($_.": ".$W->{"headers_in"}{$_}."\n"),qw( Content-type )))."\n", \*R_FH; my $body=$parser->parse(\*FH); cluck "No multipart POST request body?" if !$body->is_multipart(); return map(( $_->head()->mime_attr("content-disposition.name") => join("",@{$_->body()}) ),$body->parts()); # TODO: Globalize, make it IO::* compatible, split to the merging part + IO::Scalar. package My::Web::ReadMerged; require Tie::Handle; require Exporter; our @ISA=qw(Tie::Handle Exporter); use Carp qw(cluck confess); sub READLINE($) { my($self)=@_; confess "Slurp not yet implemented" if !defined $/; # Apache2::RequestIO does not support 'READLINE'! for (;;) { if (defined $self->{"data"} && $self->{"data"}=~s{^.*\Q$/\E}{}) { $self->{"offset"}+=length $&; return $&; } my $fh_orig=$self->{"fh_orig"}; if (!$fh_orig) { my $r=$self->{"data"}; delete $self->{"data"}; $self->{"offset"}+=length $r if defined $r; return $r; } my $got=read $fh_orig,my($buf),0x1000; cluck "Error reading POST data: $!" if !defined $got; delete $self->{"fh_orig"} if !$got; cluck "INTERNAL: fh_orig should not exist here" if !defined $self->{"data"}; $self->{"data"}.=$buf; } } sub TELL($) { my($self)=@_; return $self->{"offset"}; } sub TIEHANDLE($$$) { my($class,$data,$fh_orig)=@_; my $self=bless {},$class; $self->{"data"}=$data; $self->{"offset"}=0; $self->{"fh_orig"}=$fh_orig; return $self; } } sub cleanup($) { my($apache_request)=@_; cluck "CORE::GLOBAL::exit hook not ran" if !$W->{"_exit_done"}; cluck "packages not finalized" if !$packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}; cache_finish(); # Sanity protection. $W=undef(); exit_hook_stop(); return OK; } # PerlResponseHandler is RUN_FIRST and &ModPerl::Util::exit returns OK, so no (sane) go. # PerlLogHandler is already too late to be able to produce any output. my $exit_orig; sub exit_hook { cluck "Missing ->init while in exit_hook()" if !$W->{"_init_done"}; # &footer will call us recursively! footer() if !$W->{"_exit_done"}++; return &{$exit_orig}(@_); } sub exit_hook_start { do { cluck "exit_hook_start() twice?"; return; } if defined $exit_orig; $exit_orig=\&CORE::GLOBAL::exit; # Prevent: Subroutine CORE::GLOBAL::exit redefined no warnings 'redefine'; *CORE::GLOBAL::exit=\&exit_hook; } sub exit_hook_stop { do { cluck "exit_hook_stop() without exit_hook_start()?"; return; } if \&exit_hook ne \&CORE::GLOBAL::exit; do { cluck "INTERNAL: exit_orig uninitialized"; return; } if !$exit_orig; # Prevent: Subroutine CORE::GLOBAL::exit redefined no warnings 'redefine'; *CORE::GLOBAL::exit=$exit_orig; $exit_orig=undef(); } # Be aware other parts of code (non-My::Web) will NOT use this function! # Do not: Wprint $W->{"heading"},"undef"=>1; # as we would need to undef() it to turn it off and it would get defaulted in such case. # Do not: exists $W->{"heading"} # as we use a lot of 'for $W->{"heading"}' which instantiates it with the value: undef() sub Wprint($%) { my($text,%args)=@_; cluck "undef Wprint" if !defined $text && !$args{"undef"}; delete $args{"undef"}; cluck join(" ","Invalid arguments:",keys(%args)) if keys(%args); return if !defined $text; # Do not: cluck "utf-8 untested" if Encode::is_utf8($text); # as it is valid here. $W->{"r"}->puts($text); } sub request_check(;$) { my($self)=@_; # Use &eval to prevent: Global $r object is not available. Set:\n\tPerlOptions +GlobalRequest\nin ... confess "Calling sensitive dynamic code from a static code" if !eval { Apache2::RequestUtil->request(); }; # Do not: confess "Calling sensitive dynamic code without My::Web::init" if !$W->{"__PACKAGE__"}; # as it is valid at least while preparing arguments to call: &project::Lib::init } # Do not: use CGI; # as it is too much backward compatible regarding the charset encodings etc. # and the resulting code is too dense with no additional functionality for the recent content. sub escapeHTML($) { my($text)=@_; local $_=$text; s{&}{&}gso; s{<}{<}gso; s{>}{>}gso; s{"}{"}gso; return $_; } # /home/user/www/webdir sub dir_top_abs_disk() { our $dir_top_abs_disk; if (!$dir_top_abs_disk) { my $selfpkg_relpath=__PACKAGE__; $selfpkg_relpath=~s{::}{/}g; $selfpkg_relpath.=".pm"; my $selfpkg_abspath=$INC{$selfpkg_relpath} or do { cluck "Unable to find self package $selfpkg_relpath"; return; }; $selfpkg_abspath=~s{/*\Q$selfpkg_relpath\E$}{} or do { cluck "Unable to strip myself \"$selfpkg_relpath\" from the abspath: $selfpkg_abspath"; return; }; cluck "INC{myself} is relative?: $selfpkg_abspath" if $selfpkg_abspath!~m{^/}; $dir_top_abs_disk=$selfpkg_abspath; } return $dir_top_abs_disk; } sub unparsed_uri() { request_check(); if (!$W->{"unparsed_uri"}) { # Do not: $W->{"r"} # as we may be called before &init from: &My::Project::init my $r=Apache2::RequestUtil->request(); cluck "Calling ".'&unparsed_uri'." from a static code, going to fail" if !$r; my $uri_string=$r->unparsed_uri() or cluck "Valid 'r' missing unparsed_uri()?"; my $uri=URI->new_abs($uri_string,"http://".$W->{"web_hostname"}."/"); $W->{"unparsed_uri"}=$uri; } return $W->{"unparsed_uri"}; } sub in_to_uri_abs($) { my($in)=@_; # Otherwise we may have been already processed and thus legally relativized. # FIXME data: Currently disabled, all the data are too violating such rule. if (0 && !ref $in) { my $uri_check=URI->new($in); $uri_check->scheme() || $in=~m{^\Q./\E} || $in=~m{^/} or cluck "Use './' or '/' prefix for all the local references: $in"; } my $uri=URI->new_abs($in,unparsed_uri()); $uri=$uri->canonical(); return $uri; } # $args{"uri_as_in"}=1 to permit passing URI objects as: $in # $args{"abs"}=0 || 1; # overrides: $W->{"args"}{"Wabs"} sub path_web($%) { my($in,%args)=@_; cluck if !$args{"uri_as_in"} && ref $in; my $uri=in_to_uri_abs($in); if (uri_is_local($uri)) { # Prefer the $uri values over "args_persistent" values. # &query_form_hash comes from: URI::QueryParam $uri->query_form_hash({ map({ my $key=$_; my $val=$W->{"args"}{$key}; (!defined $val ? () : ($key=>$val)); } keys(%{$W->{"args_persistent"}})), %{$uri->query_form_hash()}, }); } my $abs; do { $abs=$_ if defined; } for $W->{"args"}{"Wabs"},$args{"abs"}; return $uri->abs(unparsed_uri()) if $abs; return $uri->rel(unparsed_uri()); } sub path_abs_disk_register($) { my($path_abs_disk)=@_; $W->{"path_abs_disk_register"}{$path_abs_disk}=1; } # $args{"uri_as_in"}=1 to permit passing URI objects as: $in sub path_abs_disk($%) { my($in,%args)=@_; cluck if !$args{"uri_as_in"} && ref $in; my $uri=in_to_uri_abs($in); cluck if !uri_is_local($uri); my $path=$uri->path(); cluck "URI compatibility: ->path() not w/leading slash of URI \"$uri\"; path: $path" if $path!~m{^/}; my $r=dir_top_abs_disk().$path; path_abs_disk_register $r if !defined $args{"register"} || $args{"register"}; return $r; } sub fatal (;$); sub _args_check (%) { my(%tmpl)=@_; while (my($name,$regex)=each(%tmpl)) { my $name_html="Parameter ".escapeHTML($name).""; $W->{"args"}{$name}="" if !defined $W->{"args"}{$name}; $W->{"args"}{$name}=[ $W->{"args"}{$name} ] if !ref $W->{"args"}{$name} && ref $regex; fatal "$name_html passed as multivar although singlevar expected" if ref $W->{"args"}{$name} && !ref $regex; $regex=$regex->[0] if ref $regex; for my $val (!ref $W->{"args"}{$name} ? $W->{"args"}{$name} : @{$W->{"args"}{$name}}) { $val="" if !defined $val; fatal "$name_html ".escapeHTML($val)."" ." does not match the required regex ".escapeHTML($regex)." " if $regex ne "" && $val!~/$regex/; } } } sub vskip (;$) { my($height)=@_; return ' 

'."\n"; } sub fatal (;$) { my($msg)=@_; $msg="UNKNOWN" if !$msg; cluck "FATAL: $msg"; # Do not send it unconditionally. # The intial duplicated '{"heading_done"}=0 if $W->{"header_only"}; # Do not send it unconditionally. # Prevents warn: Headers already sent if (!$W->{"heading_done"}) { $W->{"indexme"}=0; # For the case no heading was sent yet. $W->{"header_only"}=0; # assurance for &heading $W->{"content_type"}="text/html"; # Force HTML and avoid strictly checked XHTML. My::Web->heading(); } Wprint "\n".vskip("3ex")."

FATAL ERROR: $msg!

\n" ."

You can report this problem's details to" ." ".a_href("mailto:".$W->{"admin_mail"},"admin of this website").".

\n"; exit; } sub footer_packages_used_comments() { my $packages_used=$packages_used_array{$W->{"__PACKAGE__"}}; for my $package (@$packages_used) { my $cvs_id=(eval('$'.$package."::CVS_ID") # || $package # debug ); Wprint ''."\n" if $cvs_id; } } sub footer() { cluck 'Explicit &footer call is deprecated, !_exit_dne' if !$W->{"_exit_done"}; exit if $W->{"footer_done"}++; # deadlock prevention: &{$_}() for reverse @{$W->{"footer_sub_push"}}; if ($W->{"header_only"}) { $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1; exit; } Wprint vskip if $W->{"footer_delimit"}; &{$_}() for reverse @{$W->{"footing_delimit_sub_push"}}; Wprint "
\n" if $W->{"footer"}; # Never update the package list while we examine it! $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1; # Dump the whole packages stack or just the primary one? #my $packages_used=$packages_used_array{$W->{"__PACKAGE__"}}; my $packages_used=[$W->{"__PACKAGE__"}]; if ($W->{"footer_ids"}) { Wprint '

'; Wprint join("
\n",map({ my $package=$_; my $cvs_id=(eval('$'.$package."::CVS_ID") # || $package # debug ); if (!$cvs_id) { (); } else { $cvs_id='$'.$cvs_id.'$'; # Eaten by 'q' operator. my @cvs_id_split=split / +/,$cvs_id; if (@cvs_id_split==8) { my $file=$package; $file=~s#::#/#g; my $ext; my @tried; for (qw(.pm)) { $ext=$_; my $path_abs_disk=path_abs_disk("/$file$ext"); push @tried,$path_abs_disk; last if -r $path_abs_disk; cluck "Class file $file not found; tried: ".join(" ",@tried) if !$ext; } $file.=$ext; my $viewcvs; if ((my $file_cvs=$file)=~s{^My/}{}) { $viewcvs=$W->{"viewcvs_My"}.$file_cvs; } else { $viewcvs=$W->{"viewcvs"}.$file; } $cvs_id_split[2]="" .a_href((map({ my $s=$_; $s=~s#/viewcvs/#$&~checkout~/#; $s; } $viewcvs))[0]."?rev=".$cvs_id_split[2], $cvs_id_split[2]); $cvs_id_split[1]=a_href($viewcvs,($package!~/^Apache2::/ ? $package : $cvs_id_split[1])); $cvs_id_split[5]=&{$W->{"cvs_id_author_sub"}}($cvs_id_split[5]); } join " ",@cvs_id_split; } } @$packages_used)); Wprint "

\n"; } footer_packages_used_comments(); do { Wprint $_ if $_; } for $W->{"footing"}; Wprint "\n"; exit; } # Existing entries are overwritten. sub header(%) { my(%pairs)=@_; while (my($key,$val)=each(%pairs)) { do { cluck "Headers already sent"; next; } if $W->{"heading_done"}; $W->{"r"}->headers_out()->set($key,$val); } } sub size_display ($) { my($size)=@_; if ($size<4096) {} elsif ($size<1024*1024) { $size=sprintf "%.1fK",$size/1024; } else { $size=sprintf "%.1fM",$size/1024/1024; } $size.="B"; return $size; } sub uri_is_local($) { my($in)=@_; my $uri_rel=in_to_uri_abs($in)->rel(unparsed_uri()); # Do not: defined $uri_rel->("userinfo"|"host"|"port")(); # as they fail to be called for schemes not supporting them. return 0 if $uri_rel->scheme(); return 0 if $uri_rel->authority(); return 1; } # &path_web still may be required for &uri_escaped ! sub uri_escaped($) { my($uri)=@_; cluck if !ref $uri; my $urient=escapeHTML($uri); return $uri if $uri eq $urient; request_check(); return $urient if uri_is_local $uri; return $uri if defined $W->{"have_ent"} && !$W->{"have_ent"}; # non-ent client return $urient if $W->{"have_ent"}; # ent client # Unknown client, &escapeHTML should not be needed here: return escapeHTML(path_web('/My/Redirect.pm?location='.uri_escape($uri->abs(unparsed_uri())))); } our $a_href_inhibited; sub a_href($;$%) { my($in,$contents,%args)=@_; request_check(); do { $$_=1 if !defined $$_; } for (\$args{"size"}); if (!defined $contents) { $contents=$in; $contents=File::Basename::basename($contents) if $args{"basename"}; $contents=~s/^mailto:([-.\w]+(?:@|\Q(at)\E)[-.\w]+)$/$1/; $contents=escapeHTML($contents); } $contents=~s#]*>##gi; $contents=~s###gi; return $contents if $a_href_inhibited; my $path_web=path_web $in,%args; my $r=""; $r.=''; my $size_in=$in; do { $size_in=$_ if $_ && !/^\d+$/; } for $args{"size"}; if ($args{"size"} && uri_is_local($size_in) && (($args{"size"} && $args{"size"}=~/^\d+$/ && $args{"size"}>=2) || $size_in=~/[.](?:gz|Z|rpm|zip|deb|lha)/)) { # Downloadable? my $path_abs_disk=path_abs_disk $size_in,%args; cluck "File not readable: $path_abs_disk" if !-r $path_abs_disk; $r.=' ('.size_display((stat($path_abs_disk))[7]).')'; } return $r; } sub a_href_inhibit($$;@) { my($self,$sub,@sub_args)=@_; local $a_href_inhibited=1; return &{$sub}(@sub_args); } sub input_hidden_persistents() { request_check(); return join("",map({ my $key=$_; my $val=$W->{"args"}{$key}; (!defined $val ? () : ''."\n"); } (keys(%{$W->{"args_persistent"}})))); } sub http_moved($$;$) { my($self,$url,$status)=@_; $url=path_web($url,"abs"=>1); $status||=HTTP_MOVED_TEMPORARILY; $W->{"r"}->status($status); $W->{"r"}->headers_out()->{"Location"}=$url; $W->{"header_only"}=1; $W->{"content_type"}=0; $W->{"charset"}=0; My::Web->heading(); exit; die "NOTREACHED"; } sub remote_ip () { # Do not: PerlModule Apache2::ForwardedFor # PerlPostReadRequestHandler Apache2::ForwardedFor # As 'Apache2::ForwardedFor' takes the first of $ENV{"HTTP_X_FORWARDED_FOR"} # while the contents is '127.0.0.1, 213.220.195.171' if client has its own proxy. # We must take the last item ourselves. # Be VERY sure you always retrieve all the headers unconditionally to hit: My::Hash::RecordKeys my $x_forwarded_for=$W->{"headers_in"}{"X-Forwarded-For"}; $x_forwarded_for=~s/^.*,\s*// if $x_forwarded_for; my $remote_ip=$W->{"headers_in"}{"_remote_ip"}; my $r; $r||=$x_forwarded_for; $r||=$remote_ip; return $r; } sub _cc() { my $r; $r||=Geo::IP->new()->country_code_by_addr(remote_ip()) if $have_Geo_IP; $r||=""; return $r; } # $url={"JP"=>"http://specific",...}; # $url={""=>"http://default",...}; sub a_href_cc($$;%) { my($url,$contents,%args)=@_; # A bit ineffective but we must process all the possibilities to get stable 'headers_in' hits! my %map=map(($_=>a_href($url->{$_},$contents,%args)),keys(%$url)); my $r; $r||=$map{_cc()}; $r||=$map{""}; return $r if $r; return $contents; } # $tree={"JP"=>"specific",...}; # $tree={""=>"default",...}; sub text_cc($) { my($tree)=@_; cluck if !$tree->{""}; my $r; $r||=$tree->{_cc()}; $r||=$tree->{""}; return $r; } sub make ($) { my($cmd)=@_; # FIXME: &alarm, --timeout is now infinite. # FIXME: Try to remove bash(1). # FIXME: Use: @PATH_FLOCK@ # Do not: dir_top_abs_disk(), # to prevent: flock: cannot open lock file /home/lace/www/www.jankratochvil.net: Is a directory my @argv=('flock',dir_top_abs_disk()."/WebConfig.pm",'bash','-c',$cmd.' >&2'); print STDERR join(" ","SPAWN:",@argv)."\n"; system @argv; } sub make_file($$) { my($self,$file)=@_; cluck "Pathname not absolute: $file" if $file!~m{^/}; return if -f $file; # TODO: Somehow quickly check dependencies? return make('make -s --no-print-directory' .' -C '."'".File::Basename::dirname($file)."' '".File::Basename::basename($file)."'"); } sub img_size ($$) { my($width,$height)=@_; cluck if !defined $width || !defined $height; return ($W->{"have_style"} ? "style=\"border:0;width:${width}px;height:${height}px\"" : "border=\"0\"") ." width=\"$width\" height=\"$height\""; } sub negotiate_variant (%) { my(%args)=@_; my @fields=("id","qs","content-type","encoding","charset","lang","size"); return [ map(($args{$_}),@fields) ]; } # Returns: 'HTTP::Headers' instance. sub http_headers_in_for($@) { my($self,@headers)=@_; # Limit these entries to generate proper 'Vary' header. return HTTP::Headers->new(map(($_=>$W->{"headers_in"}{$_}),@headers)); } # Input: $self is required! # Input: Put the fallback variant as the first one. # Returns: always only scalar! sub Negotiate_choose($$) { my($self,$variants)=@_; my $best=HTTP::Negotiate::choose($variants, # Do not: $W->{"r"} # to prevent: Can't locate object method "scan" via package "Apache2::RequestRec" at HTTP/Negotiate.pm line 84. # Do not: $W->{"r"}->headers_in() # to prevent: Can't locate object method "scan" via package "APR::Table" at HTTP/Negotiate.pm line 84. # Do not: HTTP::Headers->new($W->{"r"}->headers_in()); # to prevent empty result or even: Odd number of elements in anonymous hash $self->http_headers_in_for(qw( Accept Accept-Charset Accept-Encoding Accept-Language ))); $best||=$variants->[0][0]; # $variants->[0]{"id"}; &HTTP::Negotiate::choose failed? return $best; } # FIXME: Use for "content-type": MIME::Types my @img_variants=( { "id"=>"svg","qs"=>0.9,"content-type"=>"image/svg+xml" }, { "id"=>"png","qs"=>0.8,"content-type"=>"image/png" }, { "id"=>"gif","qs"=>0.7,"content-type"=>"image/gif" }, # Do not prefer these to avoid fedding them for '*/*' browsers. { "id"=>"dia","qs"=>0.6,"content-type"=>"application/x-dia-diagram" }, { "id"=>"fig","qs"=>0.6,"content-type"=>"image/x-xfig" }, { "id"=>"fig","qs"=>0.6,"content-type"=>"application/x-xfig" }, { "id"=>"sxd","qs"=>0.6,"content-type"=>"application/vnd.sun.xml.draw" }, { "id"=>"sxi","qs"=>0.6,"content-type"=>"application/vnd.sun.xml.impress" }, ); # Unsupported by Image::Size 2.992: sxd sxi dia # Supported by Image::Size 2.992: fig png gif jpeg # Expensive by Image::Size 2.992: svg my %img_map=( "svg" =>[qw(png gif)], # svg "png" =>[qw(png gif)], "jpeg"=>[qw(jpeg)], "dia" =>[qw(png gif)], # svg "fig" =>[qw(fig png gif)], "sxd" =>[qw(png gif)], "sxi" =>[qw(png gif)], ); # Returns: ($path_web,$path_abs_disk) # URI path segments support ignored here. Where it is used? (';' path segment options) sub _img_src($%) { my($in,%args)=@_; cluck if !uri_is_local $in; my $uri=in_to_uri_abs $in; my $path_abs_disk=path_abs_disk $uri,%args,"uri_as_in"=>1,"register"=>0; cluck "Not exists image path_abs_disk: $path_abs_disk" if !-r $path_abs_disk; my($path_abs_disk_base,$ext_orig)=($path_abs_disk=~/^(.*)[.](\w+)$/) or cluck; my $map_arrayref=$img_map{$ext_orig} or cluck; my $ext=$ext_orig; if (1!=@$map_arrayref) { my @nego_variants; for my $ext (@$map_arrayref) { my $path_abs_disk_variant=$path_abs_disk_base.".".$ext; path_abs_disk_register($path_abs_disk_variant); __PACKAGE__->make_file($path_abs_disk_variant); for my $var (@img_variants) { next if $var->{"id"} ne $ext; push @nego_variants,negotiate_variant( %$var, "size"=>(stat $path_abs_disk_variant)[7], ); } } $ext=__PACKAGE__->Negotiate_choose(\@nego_variants); } my $uri_path=$uri->path(); $uri_path=~s/\Q.$ext_orig\E$/.$ext/ or cluck; $uri->path($uri_path); return path_web($uri,%args,"uri_as_in"=>1),path_abs_disk($uri,%args,"uri_as_in"=>1); } # $args{"attr"} sub img ($$%) { my($in,$alt,%args)=@_; request_check(); my($path_web,$path_abs_disk)=_img_src($in,%args); my($width,$height)=Image::Size::imgsize($path_abs_disk); $alt=~s/<[^>]*>//g; $alt=escapeHTML($alt); my $content="\"$alt\""; do { return a_href((_img_src($_))[0],$content,"uri_as_in"=>1) if $_; } for $args{"a_href_img"}; do { return a_href $_,$content if $_; } for $args{"a_href"}; return $content; } sub centerimg { my $r=""; $r.=''."\n"; @_=( [@_] ) if !ref $_[0]; for (@_) { $r.="\t".''."\n"; } $r.='
'.&{\&img}(@$_).'
'."\n"; return $r; } # Optional: Provide 'text' as 1==@args_img item. sub rightimg { my($text,@args_img)=@_; # FIXME: Workaround bug of 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)': # ie() ? "1*" : "90%" ) ]}" /> # ie() ? "0*" : "10%" ) ]}" /> # causes whole invisible projects in: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.10) Gecko/20050719 Galeon/1.3.21 return <<"HERE";
@{[ $text ]}    @{[ 1==@args_img ? $args_img[0] : &{\&img}(@args_img) ]}
HERE } # Optional: Provide 'text' as 1==@args_img item. sub leftimg { my($text,@args_img)=@_; # FIXME: Workaround bug of 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)': # ie() ? "0*" : "10%" ) ]}" /> # ie() ? "1*" : "90%" ) ]}" /> # causes whole invisible projects in: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.10) Gecko/20050719 Galeon/1.3.21 return <<"HERE";
@{[ 1==@args_img ? $args_img[0] : &{\&img}(@args_img) ]}    @{[ $text ]}
HERE } sub readfile($$) { my($class,$filename)=@_; local *F; open F,$filename or cluck "Cannot open \"$filename\": $!"; my $F=do { local $/=undef(); ; }; close F or cluck "Cannot close \"$filename\": $!"; return $F; } sub _no_cache($) { my($self)=@_; header("Expires"=>HTTP::Date::time2str(1000000000)); # date in the past header("Last-Modified"=>HTTP::Date::time2str()); # always modified header("Cache-Control"=>join(", ", "no-cache", "no-store", "must-revalidate", "max-age=0", "pre-check=0", # MSIE "post-check=0", # MSIE )); # HTTP/1.1 header("Pragma"=>"no-cache"); # HTTP/1.0 header("Vary"=>"*"); # content may ba based on unpredictable sources } sub headers_in_filtered(@) { my(@keys)=@_; return map(($_=>$W->{"headers_in"}{$_}),@keys); } our %uri_args_frozen_to_headers_in_keys; our %uri_args_headers_in_frozen_to_headers_out; sub uri_args_headers_in_frozen_get($) { my($headers_in_keys_arrayref)=@_; my %uri_args_headers_in_hash=( "uri_args_frozen"=>$W->{"uri_args_frozen"}, "headers_in"=>{ headers_in_filtered(@$headers_in_keys_arrayref) }, ); return do { local $Storable::canonical=1; Storable::freeze(\%uri_args_headers_in_hash); }; } sub cache_output_filter($) { my($f)=@_; while ($f->read(my $text,0x400)) { cluck "utf-8 untested" if Encode::is_utf8($text); # Possible here at all? $f->print($text); $W->{"digest-md5"}->add($text); } return OK; } sub cache_start() { # Used only if: !$W->{"http_safe"} # but we would cause on different method(): Appending to the '_done' package list # &Wrequire it here even if it will not be later used; to be stable! Wrequire 'My::Hash::RestrictTo'; if (!$W->{"http_safe"}) { __PACKAGE__->_no_cache(); return; } { my %uri_args_hash=( "method"=>$W->{"r"}->method(), "uri"=>"http://".$W->{"web_hostname"}."/".$W->{"r"}->uri(), "args"=>$W->{"args_orig"}, ); $W->{"uri_args_frozen"}=do { local $Storable::canonical=1; Storable::freeze(\%uri_args_hash); }; last if !(my $headers_in_keys_arrayref=$uri_args_frozen_to_headers_in_keys{$W->{"uri_args_frozen"}}); # Protection to be sure we are stable: $W->{"headers_in"}=My::Hash::RestrictTo->new($W->{"headers_in"},@$headers_in_keys_arrayref); $W->{"uri_args_headers_in_frozen"}=uri_args_headers_in_frozen_get($headers_in_keys_arrayref); last if !(my $headers_out_hashref=$uri_args_headers_in_frozen_to_headers_out{$W->{"uri_args_headers_in_frozen"}}); header(%$headers_out_hashref); my $status; { # &meets_conditions will always deny the attempt if !2xx status(). # At least ap_read_request() sets: r->status=HTTP_REQUEST_TIME_OUT; /* Until we get a request */ my $status_old=$W->{"r"}->status(); $W->{"r"}->status(HTTP_OK); # Update httpd's 'r->mtime' as the header "Last-Modified" is just not enough for ap_meets_conditions(): # &update_mtime() argument is really in _secs_, not in _msecs_ as the docs claim. # Be aware '*1000000' would overflow Perl integer anyway. # &set_last_modified would also override the "Last-Modified" headers_out! # &mtime may exist but somehow does not work. $W->{"r"}->update_mtime(HTTP::Date::str2time($headers_out_hashref->{"Last-Modified"})); $status=$W->{"r"}->meets_conditions(); $W->{"r"}->status($status_old); } last if OK==$status; $W->{"r"}->status($status); $W->{"header_only"}=1; # Inhibit &footer output. exit; die "NOTREACHED"; } $W->{"digest-md5"}=Digest::MD5->new(); $W->{"cache_active"}=1; $W->{"r"}->add_output_filter(\&cache_output_filter); } sub cache_finish_last_modified() { cluck "Not yet done now? W __PACKAGE__: ".$W->{"__PACKAGE__"} if !$packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}; for my $package_orig (@{$packages_used_array{$W->{"__PACKAGE__"}}}) { local $_=$package_orig.".pm"; s{::}{/}g; path_abs_disk "/$_","register"=>1; } my $mtime_newest; for my $path_abs_disk (keys(%{$W->{"path_abs_disk_register"}})) { my $mtime=(stat $path_abs_disk)[9]; do { cluck "No mtime for: $path_abs_disk"; next; } if !$mtime; $mtime_newest=$mtime if !$mtime_newest || $mtime_newest<$mtime; } cluck "No mtime_newest found for the current W __PACKAGE__: ".$W->{"__PACKAGE__"} if !$mtime_newest; return HTTP::Date::time2str($mtime_newest); } sub cache_finish() { # Do not: return if !$W->{"uri_args_frozen"}; # as we may have just gave 304 and 'exit;' without starting the caching. return if !$W->{"cache_active"}; # Headers may not be complete in this case; not sure, just trying. return if $W->{"r"}->connection()->aborted(); # Fill-in/check: %uri_args_frozen_to_headers_in_keys my $headers_in_keys_stored_arrayref_ref=\$uri_args_frozen_to_headers_in_keys{$W->{"uri_args_frozen"}}; my @headers_in_keys=tied(%{$W->{"headers_in_RecordKeys"}})->accessed(); if (!$$headers_in_keys_stored_arrayref_ref || !Data::Compare::Compare(\@headers_in_keys,$$headers_in_keys_stored_arrayref_ref)) { cluck "Non-matching generated 'headers_in_keys' per 'uri_args_frozen' key:\n" .Dumper(\@headers_in_keys,$$headers_in_keys_stored_arrayref_ref) if $$headers_in_keys_stored_arrayref_ref; # Build or possibly prevent such further warn dupes: $$headers_in_keys_stored_arrayref_ref=\@headers_in_keys; # Build or regenerate as obsoleted now: $W->{"uri_args_headers_in_frozen"}=uri_args_headers_in_frozen_get(\@headers_in_keys); } # Prepare 'headers_out' for the future reusal: my %headers_out; # Do not: $W->{"digest-md5"}->b64digest(); # as it will not provide the trailing filling '='s. # RFC 1864 is not clear if they should be there but its sample provides them. # Do not try to provide canonical "\r\n" form of newlines as is said by RFC 1864. # RFC 2068 (HTTP/1.1) section 14.16 says the newlines should NOT be converted for HTTP. # ',""' to avoid breaking the headers by its default "\n". $headers_out{"Content-MD5"}=MIME::Base64::encode_base64($W->{"digest-md5"}->digest(),""); # In fact we could also use MD5 for ETag as if we know ETag we also know MD5. # But this way we do not need to calculate MD5 and we still can provide such ETag. So. # $W->{"r"}->set_etag() ? $headers_out{"ETag"}='"'.Digest::MD5::md5_base64($W->{"uri_args_headers_in_frozen"}).'"'; # $W->{"r"}->set_content_length() ? $headers_out{"Content-Length"}=$W->{"r"}->bytes_sent(); my %Vary=map(($_=>1),(@headers_in_keys)); for (keys(%Vary)) { next if !/^_/; $Vary{"*"}=1; delete $Vary{$_}; } %Vary=("*"=>1) if $Vary{"*"}; $headers_out{"Vary"}=join(", ",sort keys(%Vary)) if keys(%Vary); # $W->{"r"}->set_last_modified() ? $headers_out{"Last-Modified"}=cache_finish_last_modified(); # Fill-in/check: %uri_args_headers_in_frozen_to_headers_out my $headers_out_stored_hashref_ref=\$uri_args_headers_in_frozen_to_headers_out{$W->{"uri_args_headers_in_frozen"}}; if (!$$headers_out_stored_hashref_ref || !Data::Compare::Compare(\%headers_out,$$headers_out_stored_hashref_ref)) { cluck "Non-matching generated 'headers_out' per 'uri_args_headers_in_frozen' key:\n" .Dumper(\%headers_out,$$headers_out_stored_hashref_ref) if $$headers_out_stored_hashref_ref; # Build or possibly prevent such further warn dupes: $$headers_out_stored_hashref_ref=\%headers_out; } ###print STDERR Dumper(\%uri_args_frozen_to_headers_in_keys,\%uri_args_headers_in_frozen_to_headers_out); } sub heading() { my($class)=@_; if (!$W->{"header_only"}) { header("Content-Style-Type"=>"text/css"); # Do not: text/javascript # as it does not look as registered, at least according to: MIME::Types $VERSION 1.15 # "application/javascript" so far standardized till 2005-12-08 by: # http://www.ietf.org/internet-drafts/draft-hoehrmann-script-types-03.txt header("Content-Script-Type"=>"application/javascript"); # $W->{"r"}->content_languages() ? do { header("Content-Language"=>$_) if $_; } for $W->{"language"}; } # TODO: Support also: private header("Cache-Control"=>"public"); # HTTP/1.1 # Use $W->{"charset"}=0 to disable charset. $W->{"charset"}="us-ascii" if !defined $W->{"charset"} && (!defined($W->{"content_type"}) || $W->{"content_type"}); # Workaround bug # https://bugzilla.mozilla.org/show_bug.cgi?id=120556 # of at least # Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217 # http://validator.w3.org/ does not send ANY "Accept" headers! if (!defined $W->{"content_type"}) { # Be _stable_ for "headers_in". my $accept=$W->{"headers_in"}{"Accept"}; my $user_agent=$W->{"headers_in"}{"User-Agent"}||""; $W->{"content_type"}="application/xhtml+xml" if !$accept && $user_agent=~m{^W3C_Validator/}i; # Be _stable_: my $negotiated=$class->Negotiate_choose([ # Put the fallback variant as the first one. # Rate both variants the same to prefer "text/html" for undecided clients. # At least # Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217 # prefers "application/xhtml+xml" over "text/html" itself: # text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5 negotiate_variant( "id"=>"text/html", "content-type"=>"text/html", "qs"=>0.6, (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}), "lang"=>$W->{"language"}, ), negotiate_variant( "id"=>"application/xhtml+xml", "content-type"=>"application/xhtml+xml", "qs"=>0.6, (!$W->{"charset"} ? () : "charset"=>$W->{"charset"}), "lang"=>$W->{"language"}, ), # application/xml ? # text/xml ? ]); $W->{"content_type"}=$negotiated if !defined $W->{"content_type"}; } # mod_perl doc: If you set this header via the headers_out table directly, it # will be ignored by Apache. So do not do that. my $type; if ($W->{"content_type"}) { $type=MIME::Types->new()->type($W->{"content_type"}); cluck "MIME::Types type '".$W->{"content_type"}."' not known" if !$type; } cluck "charset='".$W->{"charset"}."' does not match content-type='".$W->{"content_type"}."'" if ($W->{"charset"} ? 1 : 0) != (!$type ? 0 : $type->isAscii()); $W->{"r"}->content_type($W->{"content_type"}.(!$W->{"charset"} ? "" : "; charset=".$W->{"charset"})) if $W->{"content_type"}; cache_start(); # We still can append headers before we put out some text. # FIXME: It is not clean to still append them without overwriting. return if $W->{"heading_done"}; Wprint '{"charset"}.'"?>'."\n" if (!$W->{"header_only"} || $W->{"header_only"} eq "xml") && (0 || $W->{"content_type"}=~m{^application/\w+[+]xml$} || $W->{"content_type"} eq "text/vnd.wap.wml"); return if $W->{"header_only"}; # Split 'heading_done' for the proper handling of: /project/Rel.pm $W->{"heading_done"}++; Wprint ''."\n"; Wprint ''."\n"; my $title=$W->{"title_prefix"}.join("",map({ ': '.$_; } ($W->{"title"} || ()))); # Do not: cluck if $title=~/[<>]/; # as it is not solved just by: &a_href_inhibit # as sometimes titles use also: ... $title=~s#<[^>]*>##g; Wprint ""; Wprint "$title\n"; if ($W->{"have_css"}) { # Everything can get overriden later. for my $css ("/My/Web.css",@{$W->{"css_push"}}) { Wprint <<"HERE"; HERE } if ($W->{"css_inherit"}) { $W->{"js_push"}="/My/css_inherit.js"; } } for my $js (@{$W->{"js_push"}}) { # Do not: HERE } Wprint ''."\n"; for my $head (@{$W->{"head_push"}}) { $head=&{$head}() if "CODE" eq ref $head; Wprint $head; } for my $type (qw(prev next index contents start up)) { do { Wprint ''."\n" if $_; } for ($W->{"rel_$type"}); } Wprint "{"browser"}->netscape() && (!$W->{"browser"}->major() || $W->{"browser"}->major()<=4); Wprint $W->{"body_attr"}; Wprint ">\n"; do { Wprint $_ if $_; } for $W->{"heading"}; } BEGIN { delete $W->{"__My::Web_init"}; } 1;