Fix possible RFC 1864 "Content-MD5" violation - trailing '='s.
[MyWeb.git] / Web.pm
1 # $Id$
2 # Common functions for HTML/XHTML output generation
3 # Copyright (C) 2003-2005 Jan Kratochvil <project-www.jankratochvil.net@jankratochvil.net>
4
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; exactly version 2 of June 1991 is required
8
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
18
19 package My::Web;
20 require 5.6.0;  # at least 'use warnings;' but we need some 5.6.0+ modules anyway
21 our $VERSION=do { my @r=(q$Revision$=~/\d+/g); sprintf "%d.".("%03d"x$#r),@r; };
22 our $CVS_ID=q$Id$;
23 use strict;
24 use warnings;
25
26 use Exporter;
27 sub Wrequire($);
28 sub Wuse($@);
29 our $W;
30 our @EXPORT=qw(
31                 &Wrequire &Wuse
32                 &path_web &path_abs_disk
33                 &uri_escaped
34                 &a_href &a_href_cc
35                 &vskip
36                 &img &centerimg &rightimg
37                 $W
38                 &input_hidden_persistents
39                 &escapeHTML
40                 );
41 our @ISA=qw(Tie::Handle Exporter);
42
43 my %packages_used_hash; # $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1;
44 my %packages_used_array;
45
46 BEGIN
47 {
48         use Carp qw(cluck confess);
49         $W->{"__My::Web_init"}=1;
50
51         sub Wrequire ($)
52         {
53         my($file)=@_;
54
55 #               print STDERR "Wrequire $file\n";
56                 $file=~s#/#::#g;
57                 $file=~s/[.]pm$//;
58                 my $class=$file;
59                 $file=~s#::#/#g;
60                 $file.=".pm";
61                 my %callers;
62                 for (my $depth=0;defined caller($depth);$depth++) {
63                         $callers{caller($depth)}=1;
64                         }
65                 my $selfpkg=__PACKAGE__;
66                 $callers{$selfpkg}=1;
67                 for my $target ($class,__PACKAGE__) {
68                         for my $caller (keys(%callers)) {
69                                 next if $caller eq $target;
70                                 next if $packages_used_hash{$caller}{$target}++;
71                                 push @{$packages_used_array{$caller}},$target;
72                                 }
73                         }
74                 eval { CORE::require "$file"; } or confess $@;
75                 1;      # Otherwise 'require' would already file above.
76         }
77
78         sub Wuse ($@)
79         {
80         my($file,@list)=@_;
81
82 #               print STDERR "Wuse $file\n";
83                 Wrequire $file;
84                 local $Exporter::ExportLevel=$Exporter::ExportLevel+1;
85                 $file->import(@list);
86                 1;
87         }
88
89         sub import
90         {
91         my($class,@rest)=@_;
92
93                 local $Exporter::ExportLevel=$Exporter::ExportLevel+1;
94                 Wrequire("$class");
95                 return $class->SUPER::import(@rest);
96         }
97 }
98
99 use WebConfig;  # see also below: Wuse 'WebConfig';
100 require CGI;
101 require Image::Size;    # for &imgsize
102 use File::Basename;     # &basename
103 use Carp qw(cluck confess);
104 use URI::Escape;
105 require HTTP::BrowserDetect;
106 require HTTP::Negotiate;
107 our $have_Geo_IP; BEGIN { $have_Geo_IP=eval { require Geo::IP; 1; }; }
108 # Do not: use ModPerl::Util qw(exit);
109 # to prevent in mod_perl2: "exit" is not exported by the ModPerl::Util module
110 # I do not know why.
111 use POSIX qw(strftime);
112 use Tie::Handle;
113 use Apache2::Const qw(HTTP_MOVED_TEMPORARILY OK HTTP_OK);
114 use URI;
115 use URI::QueryParam;
116 use Cwd;
117 require HTTP::Date;
118 require Storable;
119 require Digest::MD5;
120 require Data::Compare;
121 use Data::Dumper;
122 require Encode;
123 use Apache2::Filter;
124 use Apache2::Connection;
125 require MIME::Base64;
126
127
128 #our $W;
129
130 sub cleanup($)
131 {
132 my($apache_request)=@_;
133
134         $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1;
135         cache_finish();
136         # Sanity protection.
137         $W=undef();
138         return OK;
139 }
140
141 sub request_check(;$)
142 {
143 my($self)=@_;
144
145         # Use &eval to prevent: Global $r object is not available. Set:\n\tPerlOptions +GlobalRequest\nin ...
146         # CGI requires valid "r": check it beforehand here.
147         confess "Calling sensitive dynamic code from a static code" if !eval { Apache2::RequestUtil->request(); };
148         # Do not: confess "Calling sensitive dynamic code without My::Web::init" if !$W->{"__PACKAGE__"};
149         # as it is valid at least while preparing arguments to call: &project::Lib::init
150 }
151
152 sub init ($%)
153 {
154 my($class,%args)=@_;
155
156         print STDERR "$class->init ".Apache2::RequestUtil->request()->unparsed_uri()."\n";
157
158         # We need to track package dependencies, so we need to call it from &init.
159         # We cannot do it in BEGIN { } block
160         # as it would not be tracked for each of the toplevel users later.
161         Wuse 'WebConfig';
162         Wrequire 'My::Hash';
163
164         $W=My::Hash->new({
165                 "__PACKAGE__"=>scalar(caller()),
166                 %WebConfig,
167                 %args,  # override %WebConfig settings
168                 },"My::Hash::Sub","My::Hash::Push");
169
170         # {"__PACKAGE__"} is mandatory for mod_perl-2.0;
171         # $Apache2::Registry::curstash is no longer supported.
172         do { cluck "No $_" if !$W->{$_}; } for "__PACKAGE__";
173
174         # See: &escapeHTML
175         do { cluck "charset==$_, expecting ISO-8859-1" if $_ ne "ISO-8859-1"; } for CGI::charset();
176         CGI::charset("utf-8");
177
178         do { $W->{$_}=0  if !defined $W->{$_}; } for "detect_ent";
179         do { $W->{$_}=0  if !defined $W->{$_}; } for "detect_js";
180         do { $W->{$_}=1  if !defined $W->{$_}; } for "have_css";        # AFAIK it does not hurt anyone.
181         do { $W->{$_}=0  if !defined $W->{$_}; } for "css_inherit";
182         do { $W->{$_}=1  if !defined $W->{$_}; } for "footer";
183         do { $W->{$_}=1  if !defined $W->{$_}; } for "footer_delimit";
184         do { $W->{$_}=1  if !defined $W->{$_}; } for "footer_ids";
185         do { $W->{$_}=1  if !defined $W->{$_}; } for "indexme";
186         do { $W->{$_}="" if !defined $W->{$_}; } for "head";
187         do { $W->{$_}="" if !defined $W->{$_}; } for "body_attr";
188         do { $W->{$_}="en-US" if !defined $W->{$_}; } for "language";
189
190         my $footer_any=0;
191         for (qw(footer_ids)) {
192                 $W->{$_}=0 if !$W->{"footer"};
193                 $footer_any=1 if $W->{$_};
194                 }
195         $W->{"footer"}=0 if !$footer_any;
196         $W->{"footer_delimit"}=0 if !$W->{"footer"};
197
198         $W->{"r"}=Apache2::RequestUtil->request();
199
200         $W->{"r"}->push_handlers("PerlCleanupHandler"=>\&cleanup);
201
202         $W->{"web_hostname"}||=$W->{"r"}->hostname();
203
204         tie *STDOUT,$W->{"r"};
205         select *STDOUT;
206         $|=1;
207
208         $W->{"QUERY_STRING"}=$W->{"r"}->args() || "";
209         if ($W->{"detect_ent"}) {
210                          if ($W->{"QUERY_STRING"}=~/[&]amp;have_ent/)
211                         { $W->{"have_ent"}=0; }
212                 elsif ($W->{"QUERY_STRING"}=~    /[&]have_ent/)
213                         { $W->{"have_ent"}=1; }
214                 else
215                         { delete $W->{"have_ent"}; }
216                 if (!defined $W->{"have_ent"} && $W->{"r"}->method() eq "GET") {
217                         $W->{"head"}.='<meta http-equiv="Refresh" content="0; URL='
218                                         .escapeHTML("http://".$W->{"web_hostname"}."/".($W->{"r"}->uri()=~m#^/*(.*)$#)[0]
219                                                         ."?".($W->{"QUERY_STRING"} || "detect_ent_glue=1").'&have_ent=detect')
220                                         .'" />'."\n";
221                         }
222                 }
223         $W->{"QUERY_STRING"}=~s/([&])amp;/$1/g;
224         $W->{"r"}->args($W->{"QUERY_STRING"});
225         # Workaround: &CGI::Vars behaves weird if strings passed both as POST data and in: $QUERY_STRING
226         do { $W->{"r"}->args(""); delete $ENV{"QUERY_STRING"}; } if $W->{"r"}->method() eq "POST";
227         # Do not: $W->{"r"}->args()
228         # as it parses only QUERY_STRING (not POST data).
229         $W->{"args_orig_array"}=[ CGI->new($W->{"r"})->Vars() ];
230         $W->{"args"}={ @{$W->{"args_orig_array"}} };
231         for my $name (keys(%{$W->{"args"}})) {
232                 my @vals=split /\x00/,$W->{"args"}{$name};
233                 next if @vals<=1;
234                 $W->{"args"}{$name}=[@vals];
235                 }
236
237         $W->{"headers_in"}=$W->{"r"}->headers_in();
238         Wrequire 'My::Hash::Merge';
239         $W->{"headers_in"}=My::Hash::Merge->new(
240                         $W->{"headers_in"},
241                         My::Hash::Sub->new({
242                                 "_remote_ip"=>sub { return $W->{"r"}->connection()->remote_ip(); },
243                                 }),
244                         );
245         Wrequire 'My::Hash::Readonly';
246         $W->{"headers_in"}=My::Hash::Readonly->new($W->{"headers_in"});
247         
248         if ($W->{"r"}->method() eq "GET" || $W->{"r"}->method() eq "HEAD") {
249                 for (\$W->{"http_safe"}) {
250                         # Do not: # Extend the current ETag system instead if you would need it:
251                         #         cluck "Explicitely NOT HTTP-Safe for method \"".$W->{"r"}->method()."\"?!?"
252                         #                       if defined($$_) && !$$_;
253                         # as sometimes it just does not make sense to cache it.
254                         $$_=1 if !defined $$_;
255                         }
256                 }
257         else {
258                 for (\$W->{"http_safe"}) {
259                         cluck "Undefined HTTP-Safe-ty for method \"".$W->{"r"}->method()."\"!"
260                                         if !defined($$_);
261                         $$_=0 if !defined $$_;
262                         }
263                 }
264         if ($W->{"http_safe"}) {
265                 Wrequire 'My::Hash::RecordKeys';
266                 $W->{"headers_in_RecordKeys"}=My::Hash::RecordKeys->new($W->{"headers_in"});
267                 $W->{"headers_in"}=$W->{"headers_in_RecordKeys"};
268                 }
269
270         $W->{"browser"}=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"});
271
272         if (!defined $W->{"have_style"}) {
273                 $W->{"have_style"}=(!$W->{"browser"}->netscape() || ($W->{"browser"}->major() && $W->{"browser"}->major()>4) ? 1 : 0);
274                 }
275
276         $W->{"have_js"}=($W->{"args"}{"have_js"} ? 1 : 0);
277         if ($W->{"detect_js"} && !$W->{"have_js"}) {
278                 $W->{"head"}.='<script type="text/javascript" src="'.uri_escaped(path_web('/My/HaveJS.pm')).'"></script>'."\n";
279                 }
280
281         do { _args_check(%$_) if $_; } for ($W->{"args_check"});
282
283         return bless $W,$class;
284 }
285
286 # Be aware other parts of code (non-My::Web) will NOT use this function!
287 # Do not: Wprint $W->{"heading"},"undef"=>1;
288 # as we would need to undef() it to turn it off and it would get defaulted in such case.
289 # Do not: exists $W->{"heading"}
290 # as we use a lot of 'for $W->{"heading"}' which instantiates it with the value: undef()
291 sub Wprint($%)
292 {
293 my($text,%args)=@_;
294
295         cluck "undef Wprint" if !defined $text && !$args{"undef"};
296         delete $args{"undef"};
297         cluck join(" ","Invalid arguments:",keys(%args)) if keys(%args);
298         return if !defined $text;
299         cluck "utf-8 untested" if Encode::is_utf8($text);
300         $W->{"r"}->puts($text);
301 }
302
303 sub escapeHTML($)
304 {
305 my($text)=@_;
306
307         # Prevent &CGI::escapeHTML breaking utf-8 strings like: \xC4\x9B eq \x{11B}
308         # Prevent case if we run under mod_perl but still just initializing:
309         request_check() if $ENV{"MOD_PERL"};
310         # Generally we are initialized from &init but we may be used without it without mod_perl
311         # and in such case check the change on all non-first invocations.
312         our $init;
313         if (!$ENV{"MOD_PERL"} && $init++) {
314                 do { cluck "charset==$_" if $_ ne "utf-8"; } for CGI::charset();
315                 }
316         CGI::charset("utf-8");
317
318         return CGI::escapeHTML($text);
319 }
320
321 # /home/user/www/webdir
322 sub dir_top_abs_disk()
323 {
324         our $dir_top_abs_disk;
325         if (!$dir_top_abs_disk) {
326                 my $selfpkg_relpath=__PACKAGE__;
327                 $selfpkg_relpath=~s{::}{/}g;
328                 $selfpkg_relpath.=".pm";
329                 my $selfpkg_abspath=$INC{$selfpkg_relpath} or do {
330                         cluck "Unable to find self package $selfpkg_relpath";
331                         return;
332                         };
333                 $selfpkg_abspath=~s{/*\Q$selfpkg_relpath\E$}{} or do {
334                         cluck "Unable to strip myself \"$selfpkg_relpath\" from the abspath: $selfpkg_abspath";
335                         return;
336                         };
337                 cluck "INC{myself} is relative?: $selfpkg_abspath" if $selfpkg_abspath!~m{^/};
338                 $dir_top_abs_disk=$selfpkg_abspath;
339                 }
340         return $dir_top_abs_disk;
341 }
342
343 sub unparsed_uri()
344 {
345         request_check();
346         if (!$W->{"unparsed_uri"}) {
347                 # Do not: $W->{"r"}
348                 # as we may be called before &init from: &My::Project::init
349                 my $r=Apache2::RequestUtil->request();
350                 cluck "Calling ".'&unparsed_uri'." from a static code, going to fail" if !$r;
351                 my $uri_string=$r->unparsed_uri() or cluck "Valid 'r' missing unparsed_uri()?";
352                 my $uri=URI->new_abs($uri_string,"http://".$W->{"web_hostname"}."/");
353                 $W->{"unparsed_uri"}=$uri;
354                 }
355         return $W->{"unparsed_uri"};
356 }
357
358 sub in_to_uri_abs($)
359 {
360 my($in)=@_;
361
362         # Otherwise we may have been already processed and thus legally relativized.
363         # FIXME data: Currently disabled, all the data are too violating such rule.
364         if (0 && !ref $in) {
365                 my $uri_check=URI->new($in);
366                 $uri_check->scheme() || $in=~m{^\Q./\E} || $in=~m{^/}
367                                 or cluck "Use './' or '/' prefix for all the local references: $in";
368                 }
369         my $uri=URI->new_abs($in,unparsed_uri());
370         $uri=$uri->canonical();
371         return $uri;
372 }
373
374 # $args{"uri_as_in"}=1 to permit passing URI objects as: $in
375 # $args{"abs"}=1;
376 sub path_web($%)
377 {
378 my($in,%args)=@_;
379
380         cluck if !$args{"uri_as_in"} && ref $in;
381         my $uri=in_to_uri_abs($in);
382         if (uri_is_local($uri)) {
383                 # Prefer the $uri values over "args_persistent" values.
384                 $uri->query_form_hash({
385                                 map({
386                                         my $key=$_;
387                                         my $val=$W->{"args"}{$key};
388                                         (!defined $val ? () : ($key=>$val));
389                                         } keys(%{$W->{"args_persistent"}})),
390                                 %{$uri->query_form_hash()},
391                                 });
392                 }
393         return $uri->abs(unparsed_uri()) if $W->{"args"}{"Wabs"} || $args{"abs"};
394         return $uri->rel(unparsed_uri());
395 }
396
397 sub path_abs_disk_register($)
398 {
399 my($path_abs_disk)=@_;
400
401         $W->{"path_abs_disk_register"}{$path_abs_disk}=1;
402 }
403
404 # $args{"uri_as_in"}=1 to permit passing URI objects as: $in
405 sub path_abs_disk($%)
406 {
407 my($in,%args)=@_;
408
409         cluck if !$args{"uri_as_in"} && ref $in;
410         my $uri=in_to_uri_abs($in);
411         cluck if !uri_is_local($uri);
412         my $path=$uri->path();
413         cluck "URI compatibility: ->path() not w/leading slash of URI \"$uri\"; path: $path" if $path!~m{^/};
414         my $r=dir_top_abs_disk().$path;
415         path_abs_disk_register $r if !defined $args{"register"} || $args{"register"};
416         return $r;
417 }
418
419 sub fatal (;$);
420
421 sub _args_check (%)
422 {
423 my(%tmpl)=@_;
424
425         while (my($name,$regex)=each(%tmpl)) {
426                 my $name_html="Parameter <span class=\"quote\">".escapeHTML($name)."</span>";
427                 $W->{"args"}{$name}="" if !defined $W->{"args"}{$name};
428                 $W->{"args"}{$name}=[ $W->{"args"}{$name} ] if !ref $W->{"args"}{$name} && ref $regex;
429                 fatal "$name_html passed as multivar although singlevar expected"
430                                 if ref $W->{"args"}{$name} && !ref $regex;
431                 $regex=$regex->[0] if ref $regex;
432                 for my $val (!ref $W->{"args"}{$name} ? $W->{"args"}{$name} : @{$W->{"args"}{$name}}) {
433                         $val="" if !defined $val;
434                         fatal "$name_html <span class=\"quote\">".escapeHTML($val)."</span>"
435                                                         ." does not match the required regex <span class=\"quote\">".escapeHTML($regex)."</span> "
436                                         if $regex ne "" && $val!~/$regex/;
437                         }
438                 }
439 }
440
441 sub vskip (;$)
442 {
443 my($height)=@_;
444
445         return '<p'.(!defined $height ? "" : ' style="height: '.$height.';"').'>&nbsp;</p>'."\n";
446 }
447
448 sub fatal (;$)
449 {
450 my($msg)=@_;
451
452         $msg="UNKNOWN" if !$msg;
453         cluck "FATAL: $msg";
454
455         # Do not send it unconditionally.
456         # The intial duplicated '<?xml...' crashes Gecko parser.
457         $W->{"heading_done"}=0 if $W->{"header_only"};
458         # Do not send it unconditionally.
459         # Prevents warn: Headers already sent
460         if (!$W->{"heading_done"}) {
461                 $W->{"indexme"}=0;      # For the case no heading was sent yet.
462                 $W->{"header_only"}=0;  # assurance for &heading
463                 My::Web->heading();
464                 }
465         Wprint "\n".vskip("3ex")."<hr /><h1 class=\"error\">FATAL ERROR: $msg!</h1>\n"
466                         ."<p>You can report this problem's details to"
467                         ." ".a_href("mailto:".$W->{"admin_mail"},"admin of this website").".</p>\n";
468         footer();
469 }
470
471 sub footer (;$)
472 {
473         exit 1 if $W->{"footer_passed"}++;      # deadlock prevention:
474
475         Wprint vskip if $W->{"footer_delimit"};
476
477         do { Wprint $_ if $_; } for $W->{"footing_delimit"};
478
479         Wprint "<hr />\n" if $W->{"footer"};
480
481         my $packages_used=$packages_used_array{$W->{"__PACKAGE__"}};
482
483         if ($W->{"footer_ids"}) {
484                 Wprint '<p class="cvs-id">';
485                 Wprint join("<br />\n",map({ my $package=$_;
486                         my $cvs_id=(eval('$'.$package."::CVS_ID")
487 #                                       || $package     # debug
488                                         );
489                         if (!$cvs_id) {
490                                 ();
491                                 }
492                         else {
493                                 $cvs_id='$'.$cvs_id.'$';        # Eaten by 'q' operator.
494                                 my @cvs_id_split=split / +/,$cvs_id;
495                                 if (@cvs_id_split==8) {
496                                         my $file=$package;
497                                         $file=~s#::#/#g;
498                                         my $ext;
499                                         my @tried;
500                                         for (qw(.pm)) {
501                                                 $ext=$_;
502                                                 my $path_abs_disk=path_abs_disk("/$file$ext");
503                                                 push @tried,$path_abs_disk;
504                                                 last if -r $path_abs_disk;
505                                                 cluck "Class file $file not found; tried: ".join(" ",@tried) if !$ext;
506                                                 }
507                                         $file.=$ext;
508                                         $cvs_id_split[2]=""
509                                                         .a_href((map({ my $s=$_; $s=~s#/viewcvs/#$&~checkout~/#; $s; } $W->{"viewcvs"}))[0]."$file?rev=".$cvs_id_split[2],
510                                                                         $cvs_id_split[2]);
511                                         $cvs_id_split[1]=a_href($W->{"viewcvs"}.$file,
512                                                         ($package!~/^Apache2::/ ? $package : $cvs_id_split[1]));
513                                         $cvs_id_split[5]=&{$W->{"cvs_id_author_sub"}}($cvs_id_split[5]);
514                                         }
515                                 join " ",@cvs_id_split;
516                                 }
517                         } @$packages_used));
518                 Wprint "</p>\n";
519                 }
520
521         for my $package (@$packages_used) {
522                 my $cvs_id=(eval('$'.$package."::CVS_ID")
523 #                               || $package     # debug
524                                 );
525                 Wprint '<!-- '.$package.' - $'.$cvs_id.'$ -->'."\n" if $cvs_id;
526                 }
527
528         do { Wprint $_ if $_; } for $W->{"footing"};
529
530         Wprint "</body></html>\n";
531         exit 0;
532 }
533
534 # Existing entries are overwritten.
535 sub header(%)
536 {
537 my(%pairs)=@_;
538
539         while (my($key,$val)=each(%pairs)) {
540                 do { cluck "Headers already sent"; next; } if $W->{"heading_done"};
541                 $W->{"r"}->headers_out()->set($key,$val);
542                 }
543 }
544
545 sub size_display ($)
546 {
547 my($size)=@_;
548
549            if ($size<4096)
550                 {}
551         elsif ($size<1024*1024)
552                 { $size=sprintf "%.1fK",$size/1024; }
553         else
554                 { $size=sprintf "%.1fM",$size/1024/1024; }
555         $size.="B";
556         return $size;
557 }
558
559 sub uri_is_local($)
560 {
561 my($in)=@_;
562
563         my $uri_rel=in_to_uri_abs($in)->rel(unparsed_uri());
564         # Do not: defined $uri_rel->("userinfo"|"host"|"port")();
565         # as they fail to be called for schemes not supporting them.
566         return 0 if $uri_rel->scheme();
567         return 0 if $uri_rel->authority();
568         return 1;
569 }
570
571 # &path_web still may be required for &uri_escaped !
572 sub uri_escaped($)
573 {
574 my($uri)=@_;
575
576         cluck if !ref $uri;
577         my $urient=escapeHTML($uri);
578         return $uri    if $uri eq $urient;
579         request_check();
580         return $urient if uri_is_local $uri;
581         return $uri    if defined $W->{"have_ent"} && !$W->{"have_ent"};        # non-ent client
582         return $urient if $W->{"have_ent"};     # ent client
583         # Unknown client, &escapeHTML should not be needed here:
584         return escapeHTML(path_web('/My/Redirect.pm?location='.uri_escape($uri->abs(unparsed_uri()))));
585 }
586
587 our $a_href_inhibited;
588 sub a_href($;$%)
589 {
590 my($in,$contents,%args)=@_;
591
592         request_check();
593         do { $$_=1 if !defined $$_; } for (\$args{"size"});
594         if (!defined $contents) {
595                 $contents=$in;
596                 $contents=File::Basename::basename($contents) if $args{"basename"};
597                 $contents=escapeHTML($contents);
598                 }
599         $contents=~s#<a\b[^>]*>##gi;
600         $contents=~s#</a>##gi;
601         return $contents if $a_href_inhibited;
602
603         my $path_web=path_web $in,%args;
604         my $r="";
605         $r.='<a href="';
606         $r.=uri_escaped $path_web;
607         $r.='"';
608         do { $r.=" $_" if $_; } for ($args{"attr"});
609         $r.='>'.$contents.'</a>';
610         if ($args{"size"} && uri_is_local($in) && ($args{"size"}>=2 || $in=~/[.](?:gz|Z|rpm|zip|deb|lha)/)) {   # Downloadable?
611                 my $path_abs_disk=path_abs_disk $in,%args;
612                 cluck "File not readable: $path_abs_disk" if !-r $path_abs_disk;
613                 $r.='&nbsp;('.size_display((stat($path_abs_disk))[7]).')';
614                 }
615         return $r;
616 }
617
618 sub a_href_inhibit($$;@)
619 {
620 my($self,$sub,@sub_args)=@_;
621
622         local $a_href_inhibited=1;
623         return &{$sub}(@sub_args);
624 }
625
626 sub input_hidden_persistents()
627 {
628         request_check();
629         return join("",map({
630                 my $key=$_;
631                 my $val=$W->{"args"}{$key};
632                 (!defined $val ? () : '<input type="hidden"'
633                                 .' name="'.escapeHTML($key).'"'
634                                 .' value="'.escapeHTML($val).'"'
635                                 .' />'."\n");
636                 } (keys(%{$W->{"args_persistent"}}))));
637 }
638
639 sub http_moved($$;$)
640 {
641 my($self,$url,$status)=@_;
642
643         $url=path_web($url,"abs"=>1);
644         $status||=HTTP_MOVED_TEMPORARILY;
645         $W->{"r"}->status($status);
646         $W->{"r"}->headers_out()->{"Location"}=$url;
647         $W->{"header_only"}=1;
648         My::Web->heading();
649         exit;
650         die "NOTREACHED";
651 }
652
653 sub remote_ip ()
654 {
655         # Do not: PerlModule                 Apache2::ForwardedFor
656         #         PerlPostReadRequestHandler Apache2::ForwardedFor
657         # As 'Apache2::ForwardedFor' takes the first of $ENV{"HTTP_X_FORWARDED_FOR"}
658         # while the contents is '127.0.0.1, 213.220.195.171' if client has its own proxy.
659         # We must take the last item ourselves.
660         # Be VERY sure you always retrieve all the headers unconditionally to hit: My::Hash::RecordKeys
661         my $x_forwarded_for=$W->{"headers_in"}{"X-Forwarded-For"};
662         $x_forwarded_for=~s/^.*,\s*// if $x_forwarded_for;
663         my $remote_ip=$W->{"headers_in"}{"_remote_ip"};
664         my $r;
665         $r||=$x_forwarded_for;
666         $r||=$remote_ip;
667         return $r;
668 }
669
670 # $url={"JP"=>"http://specific",...};
671 # $url={""=>"http://default",...};
672 sub a_href_cc($$;%)
673 {
674 my($url,$contents,%args)=@_;
675
676         # A bit ineffective but we must process all the possibilities to get stable 'headers_in' hits!
677         my %map=map(($_=>a_href($url->{$_},$contents,%args)),keys(%$url));
678         my $cc;
679         $cc||=Geo::IP->new()->country_code_by_addr(remote_ip()) if $have_Geo_IP;
680         $cc||="";
681         my $r=$map{$cc};
682         return $r if $r;
683         return $contents;
684 }
685
686 sub make ($)
687 {
688 my($cmd)=@_;
689
690         # FIXME: &alarm, --timeout is now infinite.
691         # FIXME: Try to remove bash(1).
692         # FIXME: Use: @PATH_FLOCK@
693         my @argv=('flock',dir_top_abs_disk(),'bash','-c',$cmd.' >&2');
694         print STDERR join(" ","SPAWN:",@argv)."\n";
695         system @argv;
696 }
697
698 sub make_file($$)
699 {
700 my($self,$file)=@_;
701
702         cluck "Pathname not absolute: $file" if $file!~m{^/};
703         return if -f $file;
704         # TODO: Somehow quickly check dependencies?
705         return make('make -s --no-print-directory'
706                                         .' -C '."'".File::Basename::dirname($file)."' '".File::Basename::basename($file)."'");
707 }
708
709 sub img_size ($$)
710 {
711 my($width,$height)=@_;
712
713         cluck if !defined $width || !defined $height;
714         return ($W->{"have_style"} ? "style=\"border:0;width:${width}px;height:${height}px\"" : "border=\"0\"")
715                         ." width=\"$width\" height=\"$height\"";
716 }
717
718 sub negotiate_variant (%)
719 {
720 my(%args)=@_;
721
722         my @fields=("id","qs","content-type","encoding","charset","lang","size");
723         return [ map(($args{$_}),@fields) ];
724 }
725
726 # Input: $self is required!
727 # Input: Put the fallback variant as the first one.
728 # Returns: always only scalar!
729 sub Negotiate_choose($$)
730 {
731 my($self,$variants)=@_;
732
733         # Limit these entries to generate proper 'Vary' header.
734         my %hash=(map(($_=>$W->{"headers_in"}{$_}),qw(
735                         Accept
736                         Accept-Charset
737                         Accept-Encoding
738                         Accept-Language
739                         )));
740         my $best=HTTP::Negotiate::choose($variants,
741                         # Do not: $W->{"r"}
742                         # to prevent: Can't locate object method "scan" via package "Apache2::RequestRec" at HTTP/Negotiate.pm line 84.
743                         # Do not: $W->{"r"}->headers_in()
744                         # to prevent: Can't locate object method "scan" via package "APR::Table" at HTTP/Negotiate.pm line 84.
745                         # Do not: HTTP::Headers->new($W->{"r"}->headers_in());
746                         # to prevent empty result or even: Odd number of elements in anonymous hash
747                         HTTP::Headers->new(%hash));
748         $best||=$variants->[0][0];      # $variants->[0]{"id"}; &HTTP::Negotiate::choose failed?
749         return $best;
750 }
751
752 my @img_variants=(
753                 { "id"=>"png","qs"=>0.9,"content-type"=>"image/png" },
754                 { "id"=>"gif","qs"=>0.7,"content-type"=>"image/gif" },
755                 );
756 my $img_variants_re='[.](?:'.join('|',"jpeg",map(($_->{"id"}),@img_variants)).')$';
757
758 # Returns: ($path_web,$path_abs_disk)
759 # URI path segments support ignored here. Where it is used? (';' path segment options)
760 sub _img_src($%)
761 {
762 my($in,%args)=@_;
763
764         cluck if !uri_is_local $in;
765         my $uri=in_to_uri_abs $in;
766         my $path_abs_disk=path_abs_disk $uri,%args,"uri_as_in"=>1,"register"=>0;
767
768         # Known image extension?
769         return path_web($uri,%args,"uri_as_in"=>1),$path_abs_disk if $uri->path()=~m#$img_variants_re#o;
770
771         my @nego_variants;
772         for my $var (@img_variants) {
773                 my $path_abs_disk_variant=$path_abs_disk.".".$var->{"id"};
774                 path_abs_disk_register($path_abs_disk_variant);
775                 __PACKAGE__->make_file($path_abs_disk_variant);
776                 push @nego_variants,negotiate_variant(
777                                 %$var,
778                                 "size"=>(stat $path_abs_disk_variant)[7],
779                                 );
780                 }
781         my $ext=__PACKAGE__->Negotiate_choose(\@nego_variants);
782
783         $uri->path($uri->path().".$ext");
784         return path_web($uri,%args,"uri_as_in"=>1),path_abs_disk($uri,%args,"uri_as_in"=>1);
785 }
786
787 # $args{"attr"}
788 sub img ($$%)
789 {
790 my($in,$alt,%args)=@_;
791
792         request_check();
793         my($path_web,$path_abs_disk)=_img_src($in,%args);
794         my($width,$height)=Image::Size::imgsize($path_abs_disk);
795         $alt=~s/<[^>]*>//g;
796         $alt=escapeHTML($alt);
797         my $content="<img src=\"".uri_escaped($path_web)."\" alt=\"$alt\" title=\"$alt\" ".img_size($width,$height)
798                         .(!$args{"attr"} ? "" : " ".$args{"attr"})." />";
799         do { return a_href((_img_src($_))[0],$content,"uri_as_in"=>1) if $_; } for $args{"a_href_img"};
800         do { return a_href $_,$content if $_; } for $args{"a_href"};
801         return $content;
802 }
803
804 sub centerimg
805 {
806         my $r="";
807         $r.='<table border="0" width="100%"><tr>'."\n";
808         @_=( [@_] ) if !ref $_[0];
809         for (@_) {
810                 $r.="\t".'<td align="center">'.&{\&img}(@$_).'</td>'."\n";
811                 }
812         $r.='</tr></table>'."\n";
813         return $r;
814 }
815
816 sub rightimg
817 {
818 my($text,@args_img)=@_;
819
820         # FIXME: Workaround bug of 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)':
821         #        <col width="@{[ (!$W->{"browser"}->ie() ? "1*" : "90%" ) ]}" />
822         #        <col width="@{[ (!$W->{"browser"}->ie() ? "0*" : "10%" ) ]}" />
823         # causes whole invisible projects in: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.10) Gecko/20050719 Galeon/1.3.21
824         return <<"HERE";
825 <table border="0" width="100%">
826         <tr>
827                 <td align="left">
828                         @{[ $text ]}
829                 </td>
830                 <td align="right">
831                         @{[ &{\&img}(@args_img) ]}
832                 </td>
833         </tr>
834 </table>
835 HERE
836 }
837
838 sub readfile($$)
839 {
840 my($class,$filename)=@_;
841
842         local *F;
843         open F,$filename or cluck "Cannot open \"$filename\": $!";
844         my $F=do { local $/=undef(); <F>; };
845         close F or cluck "Cannot close \"$filename\": $!";
846         return $F;
847 }
848
849 sub _no_cache($)
850 {
851 my($self)=@_;
852
853         header("Expires"=>HTTP::Date::time2str(1000000000));    # date in the past
854         header("Last-Modified"=>HTTP::Date::time2str());        # always modified
855         header("Cache-Control"=>join(", ",
856                         "no-cache",
857                         "no-store",
858                         "must-revalidate",
859                         "max-age=0",
860                         "pre-check=0",  # MSIE
861                         "post-check=0", # MSIE
862                         ));     # HTTP/1.1
863         header("Pragma"=>"no-cache");   # HTTP/1.0
864         header("Vary"=>"*");    # content may ba based on unpredictable sources
865 }
866
867 sub headers_in_filtered(@)
868 {
869 my(@keys)=@_;
870
871         return map(($_=>$W->{"headers_in"}{$_}),@keys);
872 }
873
874 our %uri_args_frozen_to_headers_in_keys;
875 our %uri_args_headers_in_frozen_to_headers_out;
876
877 sub uri_args_headers_in_frozen_get($)
878 {
879 my($headers_in_keys_arrayref)=@_;
880
881         my %uri_args_headers_in_hash=(
882                 "uri_args_frozen"=>$W->{"uri_args_frozen"},
883                 "headers_in"=>{ headers_in_filtered(@$headers_in_keys_arrayref) },
884                 );
885         return do { local $Storable::canonical=1; Storable::freeze(\%uri_args_headers_in_hash); };
886 }
887
888 sub cache_output_filter($)
889 {
890 my($f)=@_;
891
892         while ($f->read(my $text,0x400)) {
893                 cluck "utf-8 untested" if Encode::is_utf8($text);       # Possible here at all?
894                 $f->print($text);
895                 $W->{"digest-md5"}->add($text);
896                 }
897         return OK;
898 }
899
900 sub cache_start()
901 {
902         if (!$W->{"http_safe"}) {
903                 __PACKAGE__->_no_cache();
904                 return;
905                 }
906
907         {
908                 # &Wrequire it here even if it will not be later used; to be stable!
909                 Wrequire 'My::Hash::RestrictTo';
910                 my %uri_args_hash=(
911                         "uri"=>"http://".$W->{"web_hostname"}."/".$W->{"r"}->uri(),
912                         "args"=>$W->{"args_orig_array"},
913                         );
914                 $W->{"uri_args_frozen"}=do { local $Storable::canonical=1; Storable::freeze(\%uri_args_hash); };
915                 last if !(my $headers_in_keys_arrayref=$uri_args_frozen_to_headers_in_keys{$W->{"uri_args_frozen"}});
916
917                 # Protection to be sure we are stable:
918                 $W->{"headers_in"}=My::Hash::RestrictTo->new($W->{"headers_in"},@$headers_in_keys_arrayref);
919
920                 $W->{"uri_args_headers_in_frozen"}=uri_args_headers_in_frozen_get($headers_in_keys_arrayref);
921                 last if !(my $headers_out_hashref=$uri_args_headers_in_frozen_to_headers_out{$W->{"uri_args_headers_in_frozen"}});
922                 header(%$headers_out_hashref);
923                 my $status;
924                 {
925                         # &meets_conditions will always deny the attempt if !2xx status().
926                         # At least ap_read_request() sets: r->status=HTTP_REQUEST_TIME_OUT;     /* Until we get a request */
927                         my $status_old=$W->{"r"}->status();
928                         $W->{"r"}->status(HTTP_OK);
929                         # Update httpd's 'r->mtime' as the header "Last-Modified" is just not enough for ap_meets_conditions():
930                         # &update_mtime() argument is really in _secs_, not in _msecs_ as the docs claim.
931                         # Be aware '*1000000' would overflow Perl integer anyway.
932                         # &set_last_modified would also override the "Last-Modified" headers_out!
933                         # &mtime may exist but somehow does not work.
934                         $W->{"r"}->update_mtime(HTTP::Date::str2time($headers_out_hashref->{"Last-Modified"}));
935                         $status=$W->{"r"}->meets_conditions();
936                         $W->{"r"}->status($status_old);
937                         }
938                 last if OK==$status;
939                 $W->{"r"}->status($status);
940                 exit 0;
941                 die "NOTREACHED";
942                 }
943
944         $W->{"digest-md5"}=Digest::MD5->new();
945         $W->{"cache_active"}=1;
946         $W->{"r"}->add_output_filter(\&cache_output_filter);
947 }
948
949 sub cache_finish_last_modified()
950 {
951         cluck "Not yet done now? W __PACKAGE__: ".$W->{"__PACKAGE__"}
952                         if !$packages_used_hash{$W->{"__PACKAGE__"}}{"_done"};
953         for my $package_orig (@{$packages_used_array{$W->{"__PACKAGE__"}}}) {
954                 local $_=$package_orig.".pm";
955                 s{::}{/}g;
956                 path_abs_disk "/$_","register"=>1;
957                 }
958         my $mtime_newest;
959         for my $path_abs_disk (keys(%{$W->{"path_abs_disk_register"}})) {
960                 my $mtime=(stat $path_abs_disk)[9];
961                 do { cluck "No mtime for: $path_abs_disk"; next; } if !$mtime;
962                 $mtime_newest=$mtime if !$mtime_newest || $mtime_newest<$mtime;
963                 }
964         cluck "No mtime_newest found for the current W __PACKAGE__: ".$W->{"__PACKAGE__"}
965                         if !$mtime_newest;
966         return HTTP::Date::time2str($mtime_newest);
967 }
968
969
970 sub cache_finish()
971 {
972         # Do not: return if !$W->{"uri_args_frozen"};
973         # as we may have just gave 304 and 'exit 0;' without starting the caching.
974         return if !$W->{"cache_active"};
975
976         # Fill-in/check: %uri_args_frozen_to_headers_in_keys
977         my $headers_in_keys_stored_arrayref_ref=\$uri_args_frozen_to_headers_in_keys{$W->{"uri_args_frozen"}};
978         my @headers_in_keys=tied(%{$W->{"headers_in_RecordKeys"}})->accessed();
979         if (!$$headers_in_keys_stored_arrayref_ref
980                         || !Data::Compare::Compare(\@headers_in_keys,$$headers_in_keys_stored_arrayref_ref)) {
981                 cluck "Non-matching generated 'headers_in_keys' per 'uri_args_frozen' key:\n"
982                                                 .Dumper(\@headers_in_keys,$$headers_in_keys_stored_arrayref_ref)
983                                 if $$headers_in_keys_stored_arrayref_ref;
984                 # Build or possibly prevent such further warn dupes:
985                 $$headers_in_keys_stored_arrayref_ref=\@headers_in_keys;
986                 # Build or regenerate as obsoleted now:
987                 $W->{"uri_args_headers_in_frozen"}=uri_args_headers_in_frozen_get(\@headers_in_keys);
988                 }
989
990         # Prepare 'headers_out' for the future reusal:
991         my %headers_out;
992         # Do not: $W->{"digest-md5"}->b64digest();
993         # as it will not provide the trailing filling '='s.
994         # RFC 1864 is not clear but the same provides them.
995         # FIXME: Should we somehow provide "\r\n" newlines for the text data? Which content is "text"?
996         $headers_out{"Content-MD5"}=MIME::Base64::encode_base64($W->{"digest-md5"}->digest());
997         # In fact we could also use MD5 for ETag as if we know ETag we also know MD5.
998         # But this way we do not need to calculate MD5 and we still can provide such ETag. So.
999         # $W->{"r"}->set_etag() ?
1000         $headers_out{"ETag"}='"'.Digest::MD5::md5_base64($W->{"uri_args_headers_in_frozen"}).'"';
1001         # $W->{"r"}->set_content_length() ?
1002         $headers_out{"Content-Length"}=$W->{"r"}->bytes_sent();
1003         my %Vary=map(($_=>1),(@headers_in_keys));
1004         for (keys(%Vary)) {
1005                 next if !/^_/;
1006                 $Vary{"*"}=1;
1007                 delete $Vary{$_};
1008                 }
1009         %Vary=("*"=>1) if $Vary{"*"};
1010         $headers_out{"Vary"}=join(", ",sort keys(%Vary));
1011         # $W->{"r"}->set_last_modified() ?
1012         $headers_out{"Last-Modified"}=cache_finish_last_modified();
1013
1014         # Fill-in/check: %uri_args_headers_in_frozen_to_headers_out
1015         my $headers_out_stored_hashref_ref=\$uri_args_headers_in_frozen_to_headers_out{$W->{"uri_args_headers_in_frozen"}};
1016         if (!$$headers_out_stored_hashref_ref
1017                         || !Data::Compare::Compare(\%headers_out,$$headers_out_stored_hashref_ref)) {
1018                 cluck "Non-matching generated 'headers_out' per 'uri_args_headers_in_frozen' key:\n"
1019                                                 .Dumper(\%headers_out,$$headers_out_stored_hashref_ref)
1020                                 if $$headers_out_stored_hashref_ref;
1021                 # Build or possibly prevent such further warn dupes:
1022                 $$headers_out_stored_hashref_ref=\%headers_out;
1023                 }
1024
1025 ###print STDERR Dumper(\%uri_args_frozen_to_headers_in_keys,\%uri_args_headers_in_frozen_to_headers_out);
1026 }
1027
1028 sub heading()
1029 {
1030 my($class)=@_;
1031
1032         if (!$W->{"header_only"}) {
1033                 header("Content-Style-Type"=>"text/css");
1034                 header("Content-Script-Type"=>"text/javascript");
1035                 # $W->{"r"}->content_languages() ?
1036                 do { header("Content-Language"=>$_) if $_; } for $W->{"language"};
1037                 }
1038         # TODO: Support also: private
1039         header("Cache-Control"=>"public");      # HTTP/1.1
1040
1041         # $ENV{"CLIENT_CHARSET"} ignored (mod_czech support dropped!)
1042         my $client_charset=$W->{"force_charset"} || "us-ascii";
1043
1044         # Workaround bug
1045         #   https://bugzilla.mozilla.org/show_bug.cgi?id=120556
1046         # of at least
1047         #   Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217
1048         my $mime;
1049         # http://validator.w3.org/ does not send ANY "Accept" headers!
1050         $mime||="application/xhtml+xml" if 1
1051                         && !$W->{"headers_in"}{"Accept"}
1052                         && ($W->{"headers_in"}{"User-Agent"}||"")=~m{^W3C_Validator/}i;
1053         $mime||=$class->Negotiate_choose([
1054                         # Put the fallback variant as the first one.
1055                         # Rate both variants the same to prefer "text/html" for undecided clients.
1056                         # At least
1057                         #   Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217
1058                         # prefers "application/xhtml+xml" over "text/html" itself:
1059                         #   text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
1060                         negotiate_variant(
1061                                         "id"=>"text/html",
1062                                         "content-type"=>"text/html",
1063                                         "qs"=>0.6,
1064                                         "charset"=>$client_charset,
1065                                         "lang"=>$W->{"language"},
1066                                         ),
1067                         negotiate_variant(
1068                                         "id"=>"application/xhtml+xml",
1069                                         "content-type"=>"application/xhtml+xml",
1070                                         "qs"=>0.6,
1071                                         "charset"=>$client_charset,
1072                                         "lang"=>$W->{"language"},
1073                                         ),
1074                         # application/xml ?
1075                         # text/xml ?
1076                         ]);
1077         # mod_perl doc: If you set this header via the headers_out table directly, it
1078         #               will be ignored by Apache. So do not do that.
1079         $W->{"r"}->content_type("$mime; charset=$client_charset");
1080
1081         cache_start();
1082         return if $W->{"header_only"};
1083         # We still can append headers before we put out some text.
1084         # FIXME: It is not clean to still append them without overwriting.
1085         return if $W->{"heading_done"}++;
1086
1087         Wprint '<?xml version="1.0" encoding="'.$client_charset.'"?>'."\n" if $mime=~m{^application/\w+[+]xml$};
1088         return if $W->{"xml_header_only"};
1089         Wprint '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'."\n";
1090         Wprint '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="'.$W->{"language"}.'">'."\n";
1091         my $title=$W->{"title_prefix"}.join("",map({ ': '.$_; } ($W->{"title"} || ())));
1092         # Do not: cluck if $title=~/[<>]/;
1093         # as it is not solved just by: &a_href_inhibit
1094         # as sometimes titles use also: <i>...</i>
1095         $title=~s#<[^>]*>##g;
1096         Wprint "<head>";
1097         Wprint "<title>$title</title>\n";
1098         if ($W->{"have_css"}) {
1099                 # Everything can get overriden later.
1100                 for my $css ("/My/Web.css",map((!$_ ? () : ("ARRAY" ne ref($_) ? $_ : @$_)),$W->{"css_push"})) {
1101                         Wprint <<"HERE";
1102 <link rel="stylesheet" type="text/css" href="@{[ uri_escaped(path_web $css) ]}" />
1103 HERE
1104                         }
1105                 if ($W->{"css_inherit"}) {
1106                         Wprint <<"HERE";
1107 <script type="text/javascript" src="@{[ uri_escaped(path_web('/My/css_inherit.js')) ]}" />
1108 HERE
1109                         }
1110                 }
1111         Wprint '<meta name="robots" content="'.($W->{"indexme"} ? "" : "no" ).'index,follow" />'."\n";
1112         Wprint $W->{"head"};
1113         for my $type (qw(prev next index contents start up)) {
1114                 do { Wprint '<link rel="'.$type.'" href="'.uri_escaped(path_web $_).'" />'."\n" if $_; }
1115                                 for ($W->{"rel_$type"});
1116                 }
1117         Wprint "</head><body";
1118 #       Wprint ' bgcolor="black" text="white" link="aqua" vlink="teal"'
1119 #                       if $W->{"browser"}->netscape() && (!$W->{"browser"}->major() || $W->{"browser"}->major()<=4);
1120         Wprint $W->{"body_attr"};
1121         Wprint ">\n";
1122
1123         do { Wprint $_ if $_; } for $W->{"heading"};
1124 }
1125
1126 BEGIN {
1127         delete $W->{"__My::Web_init"};
1128         }
1129
1130 1;