&remote_ip: Fixed for stable headers hitting.
[MyWeb.git] / Web.pm
1 # $Id$
2 # Common functions for HTML/XHTML output generation
3 # Copyright (C) 2003-2005 Jan Kratochvil <project-www.jankratochvil.net@jankratochvil.net>
4
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; exactly version 2 of June 1991 is required
8
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
18
19 package My::Web;
20 require 5.6.0;  # at least 'use warnings;' but we need some 5.6.0+ modules anyway
21 our $VERSION=do { my @r=(q$Revision$=~/\d+/g); sprintf "%d.".("%03d"x$#r),@r; };
22 our $CVS_ID=q$Id$;
23 use strict;
24 use warnings;
25
26 use Exporter;
27 sub Wrequire($);
28 sub Wuse($@);
29 our $W;
30 our @EXPORT=qw(
31                 &Wrequire &Wuse
32                 &path_web &path_abs_disk
33                 &uri_escaped
34                 &a_href &a_href_cc
35                 &vskip
36                 &img &centerimg &rightimg
37                 $W
38                 &input_hidden_persistents
39                 &escapeHTML
40                 );
41 our @ISA=qw(Tie::Handle Exporter);
42
43 my %packages_used_hash; # $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1;
44 my %packages_used_array;
45
46 BEGIN
47 {
48         use Carp qw(cluck confess);
49         $W->{"__My::Web_init"}=1;
50
51         sub Wrequire ($)
52         {
53         my($file)=@_;
54
55 #               print STDERR "Wrequire $file\n";
56                 $file=~s#/#::#g;
57                 $file=~s/[.]pm$//;
58                 my $class=$file;
59                 $file=~s#::#/#g;
60                 $file.=".pm";
61                 my %callers;
62                 for (my $depth=0;defined caller($depth);$depth++) {
63                         $callers{caller($depth)}=1;
64                         }
65                 my $selfpkg=__PACKAGE__;
66                 $callers{$selfpkg}=1;
67                 for my $target ($class,__PACKAGE__) {
68                         for my $caller (keys(%callers)) {
69                                 next if $caller eq $target;
70                                 next if $packages_used_hash{$caller}{$target}++;
71                                 push @{$packages_used_array{$caller}},$target;
72                                 }
73                         }
74                 eval { CORE::require "$file"; } or confess $@;
75                 1;      # Otherwise 'require' would already file above.
76         }
77
78         sub Wuse ($@)
79         {
80         my($file,@list)=@_;
81
82 #               print STDERR "Wuse $file\n";
83                 Wrequire $file;
84                 local $Exporter::ExportLevel=$Exporter::ExportLevel+1;
85                 $file->import(@list);
86                 1;
87         }
88
89         sub import
90         {
91         my($class,@rest)=@_;
92
93                 local $Exporter::ExportLevel=$Exporter::ExportLevel+1;
94                 Wrequire("$class");
95                 return $class->SUPER::import(@rest);
96         }
97 }
98
99 use WebConfig;  # see also below: Wuse 'WebConfig';
100 require CGI;
101 require Image::Size;    # for &imgsize
102 use File::Basename;     # &basename
103 use Carp qw(cluck confess);
104 use URI::Escape;
105 require HTTP::BrowserDetect;
106 require HTTP::Negotiate;
107 our $have_Geo_IP; BEGIN { $have_Geo_IP=eval { require Geo::IP; 1; }; }
108 # Do not: use ModPerl::Util qw(exit);
109 # to prevent in mod_perl2: "exit" is not exported by the ModPerl::Util module
110 # I do not know why.
111 use POSIX qw(strftime);
112 use Tie::Handle;
113 use Apache2::Const qw(HTTP_MOVED_TEMPORARILY OK HTTP_OK);
114 use URI;
115 use URI::QueryParam;
116 use Cwd;
117 require HTTP::Date;
118 require Storable;
119 require Digest::MD5;
120 require Data::Compare;
121 use Data::Dumper;
122 require Encode;
123 use Apache2::Filter;
124 use Apache2::Connection;
125
126
127 #our $W;
128
129 sub cleanup($)
130 {
131 my($apache_request)=@_;
132
133         $packages_used_hash{$W->{"__PACKAGE__"}}{"_done"}=1;
134         cache_finish();
135         # Sanity protection.
136         $W=undef();
137         return OK;
138 }
139
140 sub request_check(;$)
141 {
142 my($self)=@_;
143
144         # Use &eval to prevent: Global $r object is not available. Set:\n\tPerlOptions +GlobalRequest\nin ...
145         # CGI requires valid "r": check it beforehand here.
146         confess "Calling sensitive dynamic code from a static code" if !eval { Apache2::RequestUtil->request(); };
147         # Do not: confess "Calling sensitive dynamic code without My::Web::init" if !$W->{"__PACKAGE__"};
148         # as it is valid at least while preparing arguments to call: &project::Lib::init
149 }
150
151 sub init ($%)
152 {
153 my($class,%args)=@_;
154
155         print STDERR "$class->init ".Apache2::RequestUtil->request()->unparsed_uri()."\n";
156
157         # We need to track package dependencies, so we need to call it from &init.
158         # We cannot do it in BEGIN { } block
159         # as it would not be tracked for each of the toplevel users later.
160         Wuse 'WebConfig';
161         Wrequire 'My::Hash';
162
163         $W=My::Hash->new({
164                 "__PACKAGE__"=>scalar(caller()),
165                 %WebConfig,
166                 %args,  # override %WebConfig settings
167                 },"My::Hash::Sub","My::Hash::Push");
168
169         # {"__PACKAGE__"} is mandatory for mod_perl-2.0;
170         # $Apache2::Registry::curstash is no longer supported.
171         do { cluck "No $_" if !$W->{$_}; } for "__PACKAGE__";
172
173         # See: &escapeHTML
174         do { cluck "charset==$_, expecting ISO-8859-1" if $_ ne "ISO-8859-1"; } for CGI::charset();
175         CGI::charset("utf-8");
176
177         do { $W->{$_}=0  if !defined $W->{$_}; } for "detect_ent";
178         do { $W->{$_}=0  if !defined $W->{$_}; } for "detect_js";
179         do { $W->{$_}=1  if !defined $W->{$_}; } for "have_css";        # AFAIK it does not hurt anyone.
180         do { $W->{$_}=0  if !defined $W->{$_}; } for "css_inherit";
181         do { $W->{$_}=1  if !defined $W->{$_}; } for "footer";
182         do { $W->{$_}=1  if !defined $W->{$_}; } for "footer_delimit";
183         do { $W->{$_}=1  if !defined $W->{$_}; } for "footer_ids";
184         do { $W->{$_}=1  if !defined $W->{$_}; } for "indexme";
185         do { $W->{$_}="" if !defined $W->{$_}; } for "head";
186         do { $W->{$_}="" if !defined $W->{$_}; } for "body_attr";
187         do { $W->{$_}="en-US" if !defined $W->{$_}; } for "language";
188
189         my $footer_any=0;
190         for (qw(footer_ids)) {
191                 $W->{$_}=0 if !$W->{"footer"};
192                 $footer_any=1 if $W->{$_};
193                 }
194         $W->{"footer"}=0 if !$footer_any;
195         $W->{"footer_delimit"}=0 if !$W->{"footer"};
196
197         $W->{"r"}=Apache2::RequestUtil->request();
198
199         $W->{"r"}->push_handlers("PerlCleanupHandler"=>\&cleanup);
200
201         $W->{"web_hostname"}||=$W->{"r"}->hostname();
202
203         tie *STDOUT,$W->{"r"};
204         select *STDOUT;
205         $|=1;
206
207         $W->{"QUERY_STRING"}=$W->{"r"}->args() || "";
208         if ($W->{"detect_ent"}) {
209                          if ($W->{"QUERY_STRING"}=~/[&]amp;have_ent/)
210                         { $W->{"have_ent"}=0; }
211                 elsif ($W->{"QUERY_STRING"}=~    /[&]have_ent/)
212                         { $W->{"have_ent"}=1; }
213                 else
214                         { delete $W->{"have_ent"}; }
215                 if (!defined $W->{"have_ent"} && $W->{"r"}->method() eq "GET") {
216                         $W->{"head"}.='<meta http-equiv="Refresh" content="0; URL='
217                                         .escapeHTML("http://".$W->{"web_hostname"}."/".($W->{"r"}->uri()=~m#^/*(.*)$#)[0]
218                                                         ."?".($W->{"QUERY_STRING"} || "detect_ent_glue=1").'&have_ent=detect')
219                                         .'" />'."\n";
220                         }
221                 }
222         $W->{"QUERY_STRING"}=~s/([&])amp;/$1/g;
223         $W->{"r"}->args($W->{"QUERY_STRING"});
224         # Workaround: &CGI::Vars behaves weird if strings passed both as POST data and in: $QUERY_STRING
225         do { $W->{"r"}->args(""); delete $ENV{"QUERY_STRING"}; } if $W->{"r"}->method() eq "POST";
226         # Do not: $W->{"r"}->args()
227         # as it parses only QUERY_STRING (not POST data).
228         $W->{"args_orig_array"}=[ CGI->new($W->{"r"})->Vars() ];
229         $W->{"args"}={ @{$W->{"args_orig_array"}} };
230         for my $name (keys(%{$W->{"args"}})) {
231                 my @vals=split /\x00/,$W->{"args"}{$name};
232                 next if @vals<=1;
233                 $W->{"args"}{$name}=[@vals];
234                 }
235
236         $W->{"headers_in"}=$W->{"r"}->headers_in();
237         Wrequire 'My::Hash::Merge';
238         $W->{"headers_in"}=My::Hash::Merge->new(
239                         $W->{"headers_in"},
240                         My::Hash::Sub->new({
241                                 "_remote_ip"=>sub { return $W->{"r"}->connection()->remote_ip(); },
242                                 }),
243                         );
244         $W->{"headers_in"}=My::Hash::Readonly->new($W->{"headers_in"});
245         
246         if ($W->{"r"}->method() eq "GET" || $W->{"r"}->method() eq "HEAD") {
247                 for (\$W->{"http_safe"}) {
248                         # Extend the current ETag system instead if you would need it:
249                         cluck "Explicitely NOT HTTP-Safe for method \"".$W->{"r"}->method()."\"?!?"
250                                         if defined($$_) && !$$_;
251                         $$_=1 if !defined $$_;
252                         }
253                 }
254         else {
255                 for (\$W->{"http_safe"}) {
256                         cluck "Undefined HTTP-Safe-ty for method \"".$W->{"r"}->method()."\"!"
257                                         if !defined($$_);
258                         $$_=0 if !defined $$_;
259                         }
260                 }
261         if ($W->{"http_safe"}) {
262                 Wrequire 'My::Hash::RecordKeys';
263                 $W->{"headers_in_RecordKeys"}=My::Hash::RecordKeys->new($W->{"headers_in"});
264                 $W->{"headers_in"}=$W->{"headers_in_RecordKeys"};
265                 }
266
267         $W->{"browser"}=HTTP::BrowserDetect->new($W->{"headers_in"}{"User-Agent"});
268
269         if (!defined $W->{"have_style"}) {
270                 $W->{"have_style"}=(!$W->{"browser"}->netscape() || ($W->{"browser"}->major() && $W->{"browser"}->major()>4) ? 1 : 0);
271                 }
272
273         $W->{"have_js"}=($W->{"args"}{"have_js"} ? 1 : 0);
274         if ($W->{"detect_js"} && !$W->{"have_js"}) {
275                 $W->{"head"}.='<script type="text/javascript" src="'.uri_escaped(path_web('/have_js.pm')).'"></script>'."\n";
276                 }
277
278         do { _args_check(%$_) if $_; } for ($W->{"args_check"});
279
280         return bless $W,$class;
281 }
282
283 # Be aware other parts of code (non-My::Web) will NOT use this function!
284 # Do not: Wprint $W->{"heading"},"undef"=>1;
285 # as we would need to undef() it to turn it off and it would get defaulted in such case.
286 # Do not: exists $W->{"heading"}
287 # as we use a lot of 'for $W->{"heading"}' which instantiates it with the value: undef()
288 sub Wprint($%)
289 {
290 my($text,%args)=@_;
291
292         cluck "undef Wprint" if !defined $text && !$args{"undef"};
293         delete $args{"undef"};
294         cluck join(" ","Invalid arguments:",keys(%args)) if keys(%args);
295         return if !defined $text;
296         cluck "utf-8 untested" if Encode::is_utf8($text);
297         $W->{"r"}->puts($text);
298 }
299
300 sub escapeHTML($)
301 {
302 my($text)=@_;
303
304         # Prevent &CGI::escapeHTML breaking utf-8 strings like: \xC4\x9B eq \x{11B}
305         # Prevent case if we run under mod_perl but still just initializing:
306         request_check() if $ENV{"MOD_PERL"};
307         # Generally we are initialized from &init but we may be used without it without mod_perl
308         # and in such case check the change on all non-first invocations.
309         our $init;
310         if (!$ENV{"MOD_PERL"} && $init++) {
311                 do { cluck "charset==$_" if $_ ne "utf-8"; } for CGI::charset();
312                 }
313         CGI::charset("utf-8");
314
315         return CGI::escapeHTML($text);
316 }
317
318 # local *FH;
319 # tie *FH,ref($W),$W;
320 sub TIEHANDLE($)
321 {
322 my($class,$W)=@_;
323
324         my $self={};
325         $self->{"W"}=$W or confess "Missing W";
326         return bless $self,$class;
327 }
328
329 sub WRITE
330 {
331 my($self,$scalar,$length,$offset)=@_;
332
333         Wprint substr($scalar,0,$length);
334 }
335
336 # /home/user/www/webdir
337 sub dir_top_abs_disk()
338 {
339         our $dir_top_abs_disk;
340         if (!$dir_top_abs_disk) {
341                 my $selfpkg_relpath=__PACKAGE__;
342                 $selfpkg_relpath=~s{::}{/}g;
343                 $selfpkg_relpath.=".pm";
344                 my $selfpkg_abspath=$INC{$selfpkg_relpath} or do {
345                         cluck "Unable to find self package $selfpkg_relpath";
346                         return;
347                         };
348                 $selfpkg_abspath=~s{/*\Q$selfpkg_relpath\E$}{} or do {
349                         cluck "Unable to strip myself \"$selfpkg_relpath\" from the abspath: $selfpkg_abspath";
350                         return;
351                         };
352                 cluck "INC{myself} is relative?: $selfpkg_abspath" if $selfpkg_abspath!~m{^/};
353                 $dir_top_abs_disk=$selfpkg_abspath;
354                 }
355         return $dir_top_abs_disk;
356 }
357
358 sub unparsed_uri()
359 {
360         request_check();
361         if (!$W->{"unparsed_uri"}) {
362                 # Do not: $W->{"r"}
363                 # as we may be called before &init from: &My::Project::init
364                 my $r=Apache2::RequestUtil->request();
365                 cluck "Calling ".'&unparsed_uri'." from a static code, going to fail" if !$r;
366                 my $uri_string=$r->unparsed_uri() or cluck "Valid 'r' missing unparsed_uri()?";
367                 my $uri=URI->new_abs($uri_string,"http://".$W->{"web_hostname"}."/");
368                 $W->{"unparsed_uri"}=$uri;
369                 }
370         return $W->{"unparsed_uri"};
371 }
372
373 sub in_to_uri_abs($)
374 {
375 my($in)=@_;
376
377         # Otherwise we may have been already processed and thus legally relativized.
378         # FIXME data: Currently disabled, all the data are too violating such rule.
379         if (0 && !ref $in) {
380                 my $uri_check=URI->new($in);
381                 $uri_check->scheme() || $in=~m{^\Q./\E} || $in=~m{^/}
382                                 or cluck "Use './' or '/' prefix for all the local references: $in";
383                 }
384         my $uri=URI->new_abs($in,unparsed_uri());
385         $uri=$uri->canonical();
386         return $uri;
387 }
388
389 # $args{"uri_as_in"}=1 to permit passing URI objects as: $in
390 # $args{"abs"}=1;
391 sub path_web($%)
392 {
393 my($in,%args)=@_;
394
395         cluck if !$args{"uri_as_in"} && ref $in;
396         my $uri=in_to_uri_abs($in);
397         if (uri_is_local($uri)) {
398                 # Prefer the $uri values over "args_persistent" values.
399                 $uri->query_form_hash({
400                                 map({
401                                         my $key=$_;
402                                         my $val=$W->{"args"}{$key};
403                                         (!defined $val ? () : ($key=>$val));
404                                         } keys(%{$W->{"args_persistent"}})),
405                                 %{$uri->query_form_hash()},
406                                 });
407                 }
408         return $uri->abs(unparsed_uri()) if $W->{"args"}{"Wabs"} || $args{"abs"};
409         return $uri->rel(unparsed_uri());
410 }
411
412 sub path_abs_disk_register($)
413 {
414 my($path_abs_disk)=@_;
415
416         $W->{"path_abs_disk_register"}{$path_abs_disk}=1;
417 }
418
419 # $args{"uri_as_in"}=1 to permit passing URI objects as: $in
420 sub path_abs_disk($%)
421 {
422 my($in,%args)=@_;
423
424         cluck if !$args{"uri_as_in"} && ref $in;
425         my $uri=in_to_uri_abs($in);
426         cluck if !uri_is_local($uri);
427         my $path=$uri->path();
428         cluck "URI compatibility: ->path() not w/leading slash of URI \"$uri\"; path: $path" if $path!~m{^/};
429         my $r=dir_top_abs_disk().$path;
430         path_abs_disk_register $r if !defined $args{"register"} || $args{"register"};
431         return $r;
432 }
433
434 sub fatal (;$);
435
436 sub _args_check (%)
437 {
438 my(%tmpl)=@_;
439
440         while (my($name,$regex)=each(%tmpl)) {
441                 my $name_html="Parameter <span class=\"quote\">".escapeHTML($name)."</span>";
442                 $W->{"args"}{$name}="" if !defined $W->{"args"}{$name};
443                 $W->{"args"}{$name}=[ $W->{"args"}{$name} ] if !ref $W->{"args"}{$name} && ref $regex;
444                 fatal "$name_html passed as multivar although singlevar expected"
445                                 if ref $W->{"args"}{$name} && !ref $regex;
446                 $regex=$regex->[0] if ref $regex;
447                 for my $val (!ref $W->{"args"}{$name} ? $W->{"args"}{$name} : @{$W->{"args"}{$name}}) {
448                         $val="" if !defined $val;
449                         fatal "$name_html <span class=\"quote\">".escapeHTML($val)."</span>"
450                                                         ." does not match the required regex <span class=\"quote\">".escapeHTML($regex)."</span> "
451                                         if $regex ne "" && $val!~/$regex/;
452                         }
453                 }
454 }
455
456 sub vskip (;$)
457 {
458 my($height)=@_;
459
460         return '<p'.(!defined $height ? "" : ' style="height: '.$height.';"').'>&nbsp;</p>'."\n";
461 }
462
463 sub fatal (;$)
464 {
465 my($msg)=@_;
466
467         $msg="UNKNOWN" if !$msg;
468         cluck "FATAL: $msg";
469
470         # Do not send it unconditionally.
471         # The intial duplicated '<?xml...' crashes Gecko parser.
472         $W->{"heading_done"}=0 if $W->{"header_only"};
473         # Do not send it unconditionally.
474         # Prevents warn: Headers already sent
475         if (!$W->{"heading_done"}) {
476                 $W->{"indexme"}=0;      # For the case no heading was sent yet.
477                 $W->{"header_only"}=0;  # assurance for &heading
478                 My::Web->heading();
479                 }
480         Wprint "\n".vskip("3ex")."<hr /><h1 class=\"error\">FATAL ERROR: $msg!</h1>\n"
481                         ."<p>You can report this problem's details to"
482                         ." ".a_href("mailto:".$W->{"admin_mail"},"admin of this website").".</p>\n";
483         footer();
484 }
485
486 sub footer (;$)
487 {
488         exit 1 if $W->{"footer_passed"}++;      # deadlock prevention:
489
490         Wprint vskip if $W->{"footer_delimit"};
491
492         do { Wprint $_ if $_; } for $W->{"footing_delimit"};
493
494         Wprint "<hr />\n" if $W->{"footer"};
495
496         my $packages_used=$packages_used_array{$W->{"__PACKAGE__"}};
497
498         if ($W->{"footer_ids"}) {
499                 Wprint '<p class="cvs-id">';
500                 Wprint join("<br />\n",map({ my $package=$_;
501                         my $cvs_id=(eval('$'.$package."::CVS_ID")
502 #                                       || $package     # debug
503                                         );
504                         if (!$cvs_id) {
505                                 ();
506                                 }
507                         else {
508                                 $cvs_id='$'.$cvs_id.'$';        # Eaten by 'q' operator.
509                                 my @cvs_id_split=split / +/,$cvs_id;
510                                 if (@cvs_id_split==8) {
511                                         my $file=$package;
512                                         $file=~s#::#/#g;
513                                         my $ext;
514                                         my @tried;
515                                         for (qw(.pm)) {
516                                                 $ext=$_;
517                                                 my $path_abs_disk=path_abs_disk("/$file$ext");
518                                                 push @tried,$path_abs_disk;
519                                                 last if -r $path_abs_disk;
520                                                 cluck "Class file $file not found; tried: ".join(" ",@tried) if !$ext;
521                                                 }
522                                         $file.=$ext;
523                                         $cvs_id_split[2]=""
524                                                         .a_href((map({ my $s=$_; $s=~s#/viewcvs/#$&~checkout~/#; $s; } $W->{"viewcvs"}))[0]."$file?rev=".$cvs_id_split[2],
525                                                                         $cvs_id_split[2]);
526                                         $cvs_id_split[1]=a_href($W->{"viewcvs"}.$file,
527                                                         ($package!~/^Apache2::/ ? $package : $cvs_id_split[1]));
528                                         $cvs_id_split[5]=&{$W->{"cvs_id_author_sub"}}($cvs_id_split[5]);
529                                         }
530                                 join " ",@cvs_id_split;
531                                 }
532                         } @$packages_used));
533                 Wprint "</p>\n";
534                 }
535
536         for my $package (@$packages_used) {
537                 my $cvs_id=(eval('$'.$package."::CVS_ID")
538 #                               || $package     # debug
539                                 );
540                 Wprint '<!-- '.$package.' - $'.$cvs_id.'$ -->'."\n" if $cvs_id;
541                 }
542
543         do { Wprint $_ if $_; } for $W->{"footing"};
544
545         Wprint "</body></html>\n";
546         exit 0;
547 }
548
549 # Existing entries are overwritten.
550 sub header(%)
551 {
552 my(%pairs)=@_;
553
554         while (my($key,$val)=each(%pairs)) {
555                 do { cluck "Headers already sent"; next; } if $W->{"heading_done"};
556                 $W->{"r"}->headers_out()->set($key,$val);
557                 }
558 }
559
560 sub size_display ($)
561 {
562 my($size)=@_;
563
564            if ($size<4096)
565                 {}
566         elsif ($size<1024*1024)
567                 { $size=sprintf "%.1fK",$size/1024; }
568         else
569                 { $size=sprintf "%.1fM",$size/1024/1024; }
570         $size.="B";
571         return $size;
572 }
573
574 sub uri_is_local($)
575 {
576 my($in)=@_;
577
578         my $uri_rel=in_to_uri_abs($in)->rel(unparsed_uri());
579         # Do not: defined $uri_rel->("userinfo"|"host"|"port")();
580         # as they fail to be called for schemes not supporting them.
581         return 0 if $uri_rel->scheme();
582         return 0 if $uri_rel->authority();
583         return 1;
584 }
585
586 # &path_web still may be required for &uri_escaped !
587 sub uri_escaped($)
588 {
589 my($uri)=@_;
590
591         cluck if !ref $uri;
592         my $urient=escapeHTML($uri);
593         return $uri    if $uri eq $urient;
594         request_check();
595         return $urient if uri_is_local $uri;
596         return $uri    if defined $W->{"have_ent"} && !$W->{"have_ent"};        # non-ent client
597         return $urient if $W->{"have_ent"};     # ent client
598         # Unknown client, &escapeHTML should not be needed here:
599         return escapeHTML(path_web('/Redirect.pm?location='.uri_escape($uri->abs(unparsed_uri()))));
600 }
601
602 our $a_href_inhibited;
603 sub a_href($;$%)
604 {
605 my($in,$contents,%args)=@_;
606
607         request_check();
608         do { $$_=1 if !defined $$_; } for (\$args{"size"});
609         if (!defined $contents) {
610                 $contents=$in;
611                 $contents=File::Basename::basename($contents) if $args{"basename"};
612                 $contents=escapeHTML($contents);
613                 }
614         $contents=~s#<a\b[^>]*>##gi;
615         $contents=~s#</a>##gi;
616         return $contents if $a_href_inhibited;
617
618         my $path_web=path_web $in,%args;
619         my $r="";
620         $r.='<a href="';
621         $r.=uri_escaped $path_web;
622         $r.='"';
623         do { $r.=" $_" if $_; } for ($args{"attr"});
624         $r.='>'.$contents.'</a>';
625         if ($args{"size"} && uri_is_local($in) && ($args{"size"}>=2 || $in=~/[.](?:gz|Z|rpm|zip|deb|lha)/)) {   # Downloadable?
626                 my $path_abs_disk=path_abs_disk $in,%args;
627                 cluck "File not readable: $path_abs_disk" if !-r $path_abs_disk;
628                 $r.='&nbsp;('.size_display((stat($path_abs_disk))[7]).')';
629                 }
630         return $r;
631 }
632
633 sub a_href_inhibit($$;@)
634 {
635 my($self,$sub,@sub_args)=@_;
636
637         local $a_href_inhibited=1;
638         return &{$sub}(@sub_args);
639 }
640
641 sub input_hidden_persistents()
642 {
643         request_check();
644         return join("",map({
645                 my $key=$_;
646                 my $val=$W->{"args"}{$key};
647                 (!defined $val ? () : '<input type="hidden"'
648                                 .' name="'.escapeHTML($key).'"'
649                                 .' value="'.escapeHTML($val).'"'
650                                 .' />'."\n");
651                 } (keys(%{$W->{"args_persistent"}}))));
652 }
653
654 sub http_moved($$;$)
655 {
656 my($self,$url,$status)=@_;
657
658         $url=path_web($url,"abs"=>1);
659         $status||=HTTP_MOVED_TEMPORARILY;
660         $W->{"r"}->status($status);
661         $W->{"r"}->headers_out()->{"Location"}=$url;
662         $W->{"header_only"}=1;
663         My::Web->heading();
664         exit;
665         die "NOTREACHED";
666 }
667
668 sub remote_ip ()
669 {
670         # Do not: PerlModule                 Apache2::ForwardedFor
671         #         PerlPostReadRequestHandler Apache2::ForwardedFor
672         # As 'Apache2::ForwardedFor' takes the first of $ENV{"HTTP_X_FORWARDED_FOR"}
673         # while the contents is '127.0.0.1, 213.220.195.171' if client has its own proxy.
674         # We must take the last item ourselves.
675         # Be VERY sure you always retrieve all the headers unconditionally to hit: My::Hash::RecordKeys
676         my $x_forwarded_for=$W->{"headers_in"}{"X-Forwarded-For"};
677         $x_forwarded_for=~s/^.*,\s*// if $x_forwarded_for;
678         my $remote_ip=$W->{"headers_in"}{"_remote_ip"};
679         my $r;
680         $r||=$x_forwarded_for;
681         $r||=$remote_ip;
682         return $r;
683 }
684
685 # $url={"JP"=>"http://specific",...};
686 # $url={""=>"http://default",...};
687 sub a_href_cc($$;%)
688 {
689 my($url,$contents,%args)=@_;
690
691         my $cc;
692         $cc||=Geo::IP->new()->country_code_by_addr(remote_ip()) if $have_Geo_IP;
693         $cc||="";
694         $url=$url->{$cc};
695         return $contents if !$url;
696         return a_href $url,$contents,%args;
697 }
698
699 sub make ($)
700 {
701 my($cmd)=@_;
702
703         # FIXME: &alarm, --timeout is now infinite.
704         # FIXME: Try to remove bash(1).
705         # FIXME: Use: @PATH_FLOCK@
706         my @argv=('flock',dir_top_abs_disk(),'bash','-c',$cmd.' >&2');
707         print STDERR join(" ","SPAWN:",@argv)."\n";
708         system @argv;
709 }
710
711 sub make_file($$)
712 {
713 my($self,$file)=@_;
714
715         cluck "Pathname not absolute: $file" if $file!~m{^/};
716         return if -f $file;
717         # TODO: Somehow quickly check dependencies?
718         return make('make -s --no-print-directory'
719                                         .' -C '."'".File::Basename::dirname($file)."' '".File::Basename::basename($file)."'");
720 }
721
722 sub img_size ($$)
723 {
724 my($width,$height)=@_;
725
726         cluck if !defined $width || !defined $height;
727         return ($W->{"have_style"} ? "style=\"border:0;width:${width}px;height:${height}px\"" : "border=\"0\"")
728                         ." width=\"$width\" height=\"$height\"";
729 }
730
731 sub negotiate_variant (%)
732 {
733 my(%args)=@_;
734
735         my @fields=("id","qs","content-type","encoding","charset","lang","size");
736         return [ map(($args{$_}),@fields) ];
737 }
738
739 # Input: $self is required!
740 # Input: Put the fallback variant as the first one.
741 # Returns: always only scalar!
742 sub Negotiate_choose($$)
743 {
744 my($self,$variants)=@_;
745
746         # Limit these entries to generate proper 'Vary' header.
747         my %hash=(map(($_=>$W->{"headers_in"}{$_}),qw(
748                         Accept
749                         Accept-Charset
750                         Accept-Encoding
751                         Accept-Language
752                         )));
753         my $best=HTTP::Negotiate::choose($variants,
754                         # Do not: $W->{"r"}
755                         # to prevent: Can't locate object method "scan" via package "Apache2::RequestRec" at HTTP/Negotiate.pm line 84.
756                         # Do not: $W->{"r"}->headers_in()
757                         # to prevent: Can't locate object method "scan" via package "APR::Table" at HTTP/Negotiate.pm line 84.
758                         # Do not: HTTP::Headers->new($W->{"r"}->headers_in());
759                         # to prevent empty result or even: Odd number of elements in anonymous hash
760                         HTTP::Headers->new(%hash));
761         $best||=$variants->[0][0];      # $variants->[0]{"id"}; &HTTP::Negotiate::choose failed?
762         return $best;
763 }
764
765 my @img_variants=(
766                 { "id"=>"png","qs"=>0.9,"content-type"=>"image/png" },
767                 { "id"=>"gif","qs"=>0.7,"content-type"=>"image/gif" },
768                 );
769 my $img_variants_re='[.](?:'.join('|',"jpeg",map(($_->{"id"}),@img_variants)).')$';
770
771 # Returns: ($path_web,$path_abs_disk)
772 # URI path segments support ignored here. Where it is used? (';' path segment options)
773 sub _img_src($%)
774 {
775 my($in,%args)=@_;
776
777         cluck if !uri_is_local $in;
778         my $uri=in_to_uri_abs $in;
779         my $path_abs_disk=path_abs_disk $uri,%args,"uri_as_in"=>1,"register"=>0;
780
781         # Known image extension?
782         return path_web($uri,%args,"uri_as_in"=>1),$path_abs_disk if $uri->path()=~m#$img_variants_re#o;
783
784         my @nego_variants;
785         for my $var (@img_variants) {
786                 my $path_abs_disk_variant=$path_abs_disk.".".$var->{"id"};
787                 path_abs_disk_register($path_abs_disk_variant);
788                 __PACKAGE__->make_file($path_abs_disk_variant);
789                 push @nego_variants,negotiate_variant(
790                                 %$var,
791                                 "size"=>(stat $path_abs_disk_variant)[7],
792                                 );
793                 }
794         my $ext=__PACKAGE__->Negotiate_choose(\@nego_variants);
795
796         $uri->path($uri->path().".$ext");
797         return path_web($uri,%args,"uri_as_in"=>1),path_abs_disk($uri,%args,"uri_as_in"=>1);
798 }
799
800 # $args{"attr"}
801 sub img ($$%)
802 {
803 my($in,$alt,%args)=@_;
804
805         request_check();
806         my($path_web,$path_abs_disk)=_img_src($in,%args);
807         my($width,$height)=Image::Size::imgsize($path_abs_disk);
808         $alt=~s/<[^>]*>//g;
809         $alt=escapeHTML($alt);
810         my $content="<img src=\"".uri_escaped($path_web)."\" alt=\"$alt\" title=\"$alt\" ".img_size($width,$height)
811                         .(!$args{"attr"} ? "" : " ".$args{"attr"})." />";
812         do { return a_href((_img_src($_))[0],$content,"uri_as_in"=>1) if $_; } for $args{"a_href_img"};
813         do { return a_href $_,$content if $_; } for $args{"a_href"};
814         return $content;
815 }
816
817 sub centerimg
818 {
819         my $r="";
820         $r.='<table border="0" width="100%"><tr>'."\n";
821         @_=( [@_] ) if !ref $_[0];
822         for (@_) {
823                 $r.="\t".'<td align="center">'.&{\&img}(@$_).'</td>'."\n";
824                 }
825         $r.='</tr></table>'."\n";
826         return $r;
827 }
828
829 sub rightimg
830 {
831 my($text,@args_img)=@_;
832
833         # FIXME: Workaround bug of 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)':
834         #        <col width="@{[ (!$W->{"browser"}->ie() ? "1*" : "90%" ) ]}" />
835         #        <col width="@{[ (!$W->{"browser"}->ie() ? "0*" : "10%" ) ]}" />
836         # causes whole invisible projects in: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.10) Gecko/20050719 Galeon/1.3.21
837         return <<"HERE";
838 <table border="0" width="100%">
839         <tr>
840                 <td align="left">
841                         @{[ $text ]}
842                 </td>
843                 <td align="right">
844                         @{[ &{\&img}(@args_img) ]}
845                 </td>
846         </tr>
847 </table>
848 HERE
849 }
850
851 sub readfile($$)
852 {
853 my($class,$filename)=@_;
854
855         local *F;
856         open F,$filename or cluck "Cannot open \"$filename\": $!";
857         my $F=do { local $/=undef(); <F>; };
858         close F or cluck "Cannot close \"$filename\": $!";
859         return $F;
860 }
861
862 sub _no_cache($)
863 {
864 my($self)=@_;
865
866         header("Expires"=>HTTP::Date::time2str(1000000000));    # date in the past
867         header("Last-Modified"=>HTTP::Date::time2str());        # always modified
868         header("Cache-Control"=>join(", ",
869                         "no-cache",
870                         "no-store",
871                         "must-revalidate",
872                         "max-age=0",
873                         "pre-check=0",  # MSIE
874                         "post-check=0", # MSIE
875                         ));     # HTTP/1.1
876         header("Pragma"=>"no-cache");   # HTTP/1.0
877         header("Vary"=>"*");    # content may ba based on unpredictable sources
878 }
879
880 sub headers_in_filtered(@)
881 {
882 my(@keys)=@_;
883
884         return map(($_=>$W->{"headers_in"}{$_}),@keys);
885 }
886
887 our %uri_args_frozen_to_headers_in_keys;
888 our %uri_args_headers_in_frozen_to_headers_out;
889
890 sub uri_args_headers_in_frozen_get($)
891 {
892 my($headers_in_keys_arrayref)=@_;
893
894         my %uri_args_headers_in_hash=(
895                 "uri_args_frozen"=>$W->{"uri_args_frozen"},
896                 "headers_in"=>{ headers_in_filtered(@$headers_in_keys_arrayref) },
897                 );
898         return do { local $Storable::canonical=1; Storable::freeze(\%uri_args_headers_in_hash); };
899 }
900
901 sub cache_output_filter($)
902 {
903 my($f)=@_;
904
905         while ($f->read(my $text,0x400)) {
906                 cluck "utf-8 untested" if Encode::is_utf8($text);       # Possible here at all?
907                 $f->print($text);
908                 $W->{"digest-md5"}->add($text);
909                 }
910         return OK;
911 }
912
913 sub cache_start()
914 {
915         if (!$W->{"http_safe"}) {
916                 __PACKAGE__->_no_cache();
917                 return;
918                 }
919
920         {
921                 # &Wrequire it here even if it will not be later used; to be stable!
922                 Wrequire 'My::Hash::RestrictTo';
923                 my %uri_args_hash=(
924                         "uri"=>"http://".$W->{"web_hostname"}."/".$W->{"r"}->uri(),
925                         "args"=>$W->{"args_orig_array"},
926                         );
927                 $W->{"uri_args_frozen"}=do { local $Storable::canonical=1; Storable::freeze(\%uri_args_hash); };
928                 last if !(my $headers_in_keys_arrayref=$uri_args_frozen_to_headers_in_keys{$W->{"uri_args_frozen"}});
929
930                 # Protection to be sure we are stable:
931                 $W->{"headers_in"}=My::Hash::RestrictTo->new($W->{"headers_in"},@$headers_in_keys_arrayref);
932
933                 $W->{"uri_args_headers_in_frozen"}=uri_args_headers_in_frozen_get($headers_in_keys_arrayref);
934                 last if !(my $headers_out_hashref=$uri_args_headers_in_frozen_to_headers_out{$W->{"uri_args_headers_in_frozen"}});
935                 header(%$headers_out_hashref);
936                 my $status;
937                 {
938                         # &meets_conditions will always deny the attempt if !2xx status().
939                         # At least ap_read_request() sets: r->status=HTTP_REQUEST_TIME_OUT;     /* Until we get a request */
940                         my $status_old=$W->{"r"}->status();
941                         $W->{"r"}->status(HTTP_OK);
942                         # Update httpd's 'r->mtime' as the header "Last-Modified" is just not enough for ap_meets_conditions():
943                         # &update_mtime() argument is really in _secs_, not in _msecs_ as the docs claim.
944                         # Be aware '*1000000' would overflow Perl integer anyway.
945                         # &set_last_modified would also override the "Last-Modified" headers_out!
946                         # &mtime may exist but somehow does not work.
947                         $W->{"r"}->update_mtime(HTTP::Date::str2time($headers_out_hashref->{"Last-Modified"}));
948                         $status=$W->{"r"}->meets_conditions();
949                         $W->{"r"}->status($status_old);
950                         }
951                 last if OK==$status;
952                 $W->{"r"}->status($status);
953                 exit 0;
954                 die "NOTREACHED";
955                 }
956
957         $W->{"digest-md5"}=Digest::MD5->new();
958         $W->{"cache_active"}=1;
959         $W->{"r"}->add_output_filter(\&cache_output_filter);
960 }
961
962 sub cache_finish_last_modified()
963 {
964         cluck "Not yet done now? W __PACKAGE__: ".$W->{"__PACKAGE__"}
965                         if !$packages_used_hash{$W->{"__PACKAGE__"}}{"_done"};
966         for my $package_orig (@{$packages_used_array{$W->{"__PACKAGE__"}}}) {
967                 local $_=$package_orig.".pm";
968                 s{::}{/}g;
969                 path_abs_disk "/$_","register"=>1;
970                 }
971         my $mtime_newest;
972         for my $path_abs_disk (keys(%{$W->{"path_abs_disk_register"}})) {
973                 my $mtime=(stat $path_abs_disk)[9];
974                 do { cluck "No mtime for: $path_abs_disk"; next; } if !$mtime;
975                 $mtime_newest=$mtime if !$mtime_newest || $mtime_newest<$mtime;
976                 }
977         cluck "No mtime_newest found for the current W __PACKAGE__: ".$W->{"__PACKAGE__"}
978                         if !$mtime_newest;
979         return HTTP::Date::time2str($mtime_newest);
980 }
981
982
983 sub cache_finish()
984 {
985         # Do not: return if !$W->{"uri_args_frozen"};
986         # as we may have just gave 304 and 'exit 0;' without starting the caching.
987         return if !$W->{"cache_active"};
988
989         # Fill-in/check: %uri_args_frozen_to_headers_in_keys
990         my $headers_in_keys_stored_arrayref_ref=\$uri_args_frozen_to_headers_in_keys{$W->{"uri_args_frozen"}};
991         my @headers_in_keys=tied(%{$W->{"headers_in_RecordKeys"}})->accessed();
992         if (!$$headers_in_keys_stored_arrayref_ref
993                         || !Data::Compare::Compare(\@headers_in_keys,$$headers_in_keys_stored_arrayref_ref)) {
994                 cluck "Non-matching generated 'headers_in_keys' per 'uri_args_frozen' key:\n"
995                                                 .Dumper(\@headers_in_keys,$$headers_in_keys_stored_arrayref_ref)
996                                 if $$headers_in_keys_stored_arrayref_ref;
997                 # Build or possibly prevent such further warn dupes:
998                 $$headers_in_keys_stored_arrayref_ref=\@headers_in_keys;
999                 # Build or regenerate as obsoleted now:
1000                 $W->{"uri_args_headers_in_frozen"}=uri_args_headers_in_frozen_get(\@headers_in_keys);
1001                 }
1002
1003         # Prepare 'headers_out' for the future reusal:
1004         my %headers_out;
1005         $headers_out{"Content-MD5"}=$W->{"digest-md5"}->b64digest();
1006         # In fact we could also use MD5 for ETag as if we know ETag we also know MD5.
1007         # But this way we do not need to calculate MD5 and we still can provide such ETag. So.
1008         # $W->{"r"}->set_etag() ?
1009         $headers_out{"ETag"}='"'.Digest::MD5::md5_base64($W->{"uri_args_headers_in_frozen"}).'"';
1010         # $W->{"r"}->set_content_length() ?
1011         $headers_out{"Content-Length"}=$W->{"r"}->bytes_sent();
1012         my %Vary=map(($_=>1),(@headers_in_keys));
1013         for (keys(%Vary)) {
1014                 next if !/^_/;
1015                 $Vary{"*"}=1;
1016                 delete $Vary{$_};
1017                 }
1018         %Vary=("*"=>1) if $Vary{"*"};
1019         $headers_out{"Vary"}=join(", ",sort keys(%Vary));
1020         # $W->{"r"}->set_last_modified() ?
1021         $headers_out{"Last-Modified"}=cache_finish_last_modified();
1022
1023         # Fill-in/check: %uri_args_headers_in_frozen_to_headers_out
1024         my $headers_out_stored_hashref_ref=\$uri_args_headers_in_frozen_to_headers_out{$W->{"uri_args_headers_in_frozen"}};
1025         if (!$$headers_out_stored_hashref_ref
1026                         || !Data::Compare::Compare(\%headers_out,$$headers_out_stored_hashref_ref)) {
1027                 cluck "Non-matching generated 'headers_out' per 'uri_args_headers_in_frozen' key:\n"
1028                                                 .Dumper(\%headers_out,$$headers_out_stored_hashref_ref)
1029                                 if $$headers_out_stored_hashref_ref;
1030                 # Build or possibly prevent such further warn dupes:
1031                 $$headers_out_stored_hashref_ref=\%headers_out;
1032                 }
1033
1034 ###print STDERR Dumper(\%uri_args_frozen_to_headers_in_keys,\%uri_args_headers_in_frozen_to_headers_out);
1035 }
1036
1037 sub heading()
1038 {
1039 my($class)=@_;
1040
1041         if (!$W->{"header_only"}) {
1042                 header("Content-Style-Type"=>"text/css");
1043                 header("Content-Script-Type"=>"text/javascript");
1044                 # $W->{"r"}->content_languages() ?
1045                 do { header("Content-Language"=>$_) if $_; } for $W->{"language"};
1046                 }
1047         # TODO: Support also: private
1048         header("Cache-Control"=>"public");      # HTTP/1.1
1049
1050         # $ENV{"CLIENT_CHARSET"} ignored (mod_czech support dropped!)
1051         my $client_charset=$W->{"force_charset"} || "us-ascii";
1052
1053         # Workaround bug
1054         #   https://bugzilla.mozilla.org/show_bug.cgi?id=120556
1055         # of at least
1056         #   Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217
1057         my $mime;
1058         # http://validator.w3.org/ does not send ANY "Accept" headers!
1059         $mime||="application/xhtml+xml" if 1
1060                         && !$W->{"headers_in"}{"Accept"}
1061                         && ($W->{"headers_in"}{"User-Agent"}||"")=~m{^W3C_Validator/}i;
1062         $mime||=$class->Negotiate_choose([
1063                         # Put the fallback variant as the first one.
1064                         # Rate both variants the same to prefer "text/html" for undecided clients.
1065                         # At least
1066                         #   Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8b) Gecko/20050217
1067                         # prefers "application/xhtml+xml" over "text/html" itself:
1068                         #   text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
1069                         negotiate_variant(
1070                                         "id"=>"text/html",
1071                                         "content-type"=>"text/html",
1072                                         "qs"=>0.6,
1073                                         "charset"=>$client_charset,
1074                                         "lang"=>$W->{"language"},
1075                                         ),
1076                         negotiate_variant(
1077                                         "id"=>"application/xhtml+xml",
1078                                         "content-type"=>"application/xhtml+xml",
1079                                         "qs"=>0.6,
1080                                         "charset"=>$client_charset,
1081                                         "lang"=>$W->{"language"},
1082                                         ),
1083                         # application/xml ?
1084                         # text/xml ?
1085                         ]);
1086         # mod_perl doc: If you set this header via the headers_out table directly, it
1087         #               will be ignored by Apache. So do not do that.
1088         $W->{"r"}->content_type("$mime; charset=$client_charset");
1089
1090         cache_start();
1091         return if $W->{"header_only"};
1092         # We still can append headers before we put out some text.
1093         # FIXME: It is not clean to still append them without overwriting.
1094         return if $W->{"heading_done"}++;
1095
1096         Wprint '<?xml version="1.0" encoding="'.$client_charset.'"?>'."\n" if $mime=~m{^application/\w+[+]xml$};
1097         return if $W->{"xml_header_only"};
1098         Wprint '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'."\n";
1099         Wprint '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="'.$W->{"language"}.'">'."\n";
1100         my $title=$W->{"title_prefix"}.join("",map({ ': '.$_; } ($W->{"title"} || ())));
1101         # Do not: cluck if $title=~/[<>]/;
1102         # as it is not solved just by: &a_href_inhibit
1103         # as sometimes titles use also: <i>...</i>
1104         $title=~s#<[^>]*>##g;
1105         Wprint "<head>";
1106         Wprint "<title>$title</title>\n";
1107         if ($W->{"have_css"}) {
1108                 # Everything can get overriden later.
1109                 for my $css ("/My/Web.css",map((!$_ ? () : ("ARRAY" ne ref($_) ? $_ : @$_)),$W->{"css_push"})) {
1110                         Wprint <<"HERE";
1111 <link rel="stylesheet" type="text/css" href="@{[ uri_escaped(path_web $css) ]}" />
1112 HERE
1113                         }
1114                 if ($W->{"css_inherit"}) {
1115                         Wprint <<"HERE";
1116 <script type="text/javascript" src="@{[ uri_escaped(path_web('/My/css_inherit.js')) ]}" />
1117 HERE
1118                         }
1119                 }
1120         Wprint '<meta name="robots" content="'.($W->{"indexme"} ? "" : "no" ).'index,follow" />'."\n";
1121         Wprint $W->{"head"};
1122         for my $type (qw(prev next index contents start up)) {
1123                 do { Wprint '<link rel="'.$type.'" href="'.uri_escaped(path_web $_).'" />'."\n" if $_; }
1124                                 for ($W->{"rel_$type"});
1125                 }
1126         Wprint "</head><body";
1127 #       Wprint ' bgcolor="black" text="white" link="aqua" vlink="teal"'
1128 #                       if $W->{"browser"}->netscape() && (!$W->{"browser"}->major() || $W->{"browser"}->major()<=4);
1129         Wprint $W->{"body_attr"};
1130         Wprint ">\n";
1131
1132         do { Wprint $_ if $_; } for $W->{"heading"};
1133 }
1134
1135 BEGIN {
1136         delete $W->{"__My::Web_init"};
1137         }
1138
1139 1;