4 # The Intltool Message Merger
6 # Copyright (C) 2000, 2003 Free Software Foundation.
7 # Copyright (C) 2000, 2001 Eazel, Inc
9 # Intltool is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License
11 # version 2 published by the Free Software Foundation.
13 # Intltool is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 # As a special exception to the GNU General Public License, if you
23 # distribute this file as part of a program that contains a
24 # configuration script generated by Autoconf, you may include it under
25 # the same distribution terms that you use for the rest of that program.
27 # Authors: Maciej Stachowiak <mjs@noisehavoc.org>
28 # Kenneth Christiansen <kenneth@gnu.org>
29 # Darin Adler <darin@bentspoon.com>
31 # Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
34 ## Release information
35 my $PROGRAM = "intltool-merge";
36 my $PACKAGE = "intltool";
44 ## Scalars used by the option stuff
48 my $XML_STYLE_ARG = 0;
49 my $KEYS_STYLE_ARG = 0;
50 my $DESKTOP_STYLE_ARG = 0;
51 my $SCHEMAS_STYLE_ARG = 0;
52 my $RFC822DEB_STYLE_ARG = 0;
54 my $PASS_THROUGH_ARG = 0;
62 "version" => \$VERSION_ARG,
63 "quiet|q" => \$QUIET_ARG,
64 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
65 "ba-style|b" => \$BA_STYLE_ARG,
66 "xml-style|x" => \$XML_STYLE_ARG,
67 "keys-style|k" => \$KEYS_STYLE_ARG,
68 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
69 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
70 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
71 "pass-through|p" => \$PASS_THROUGH_ARG,
72 "utf8|u" => \$UTF8_ARG,
73 "cache|c=s" => \$cache_file
80 my %po_files_by_lang = ();
81 my %translations = ();
82 my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
84 # Use this instead of \w for XML files to handle more possible characters.
85 my $w = "[-A-Za-z0-9._:]";
87 # XML quoted string contents
100 elsif ($BA_STYLE_ARG && @ARGV > 2)
104 &ba_merge_translations;
107 elsif ($XML_STYLE_ARG && @ARGV > 2)
112 &xml_merge_translations;
115 elsif ($KEYS_STYLE_ARG && @ARGV > 2)
120 &keys_merge_translations;
123 elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
127 &desktop_merge_translations;
130 elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
134 &schemas_merge_translations;
137 elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
141 &rfc822deb_merge_translations;
151 ## Sub for printing release information
155 ${PROGRAM} (${PACKAGE}) ${VERSION}
156 Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
158 Copyright (C) 2000-2003 Free Software Foundation, Inc.
159 Copyright (C) 2000-2001 Eazel, Inc.
160 This is free software; see the source for copying conditions. There is NO
161 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
166 ## Sub for printing usage information
170 Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
171 Generates an output file that includes some localized attributes from an
172 untranslated source file.
174 Mandatory options: (exactly one must be specified)
175 -b, --ba-style includes translations in the bonobo-activation style
176 -d, --desktop-style includes translations in the desktop style
177 -k, --keys-style includes translations in the keys style
178 -s, --schemas-style includes translations in the schemas style
179 -r, --rfc822deb-style includes translations in the RFC822 style
180 -x, --xml-style includes translations in the standard xml style
183 -u, --utf8 convert all strings to UTF-8 before merging
184 -p, --pass-through use strings as found in .po files, without
185 conversion (STRONGLY unrecommended with -x)
186 -c, --cache=FILE specify cache file name
187 (usually \$top_builddir/po/.intltool-merge-cache)
188 -q, --quiet suppress most messages
189 --help display this help and exit
190 --version output version information and exit
192 Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
193 or send email to <xml-i18n-tools\@gnome.org>.
199 ## Sub for printing error messages
202 print STDERR "Try `${PROGRAM} --help' for more information.\n";
209 print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
220 &get_translation_database;
223 # General-purpose code for looking up translations in .po files
228 $tmp =~ s/^.*\/(.*)\.po$/$1/;
234 for my $po_file (glob "$PO_DIR/*.po") {
235 $po_files_by_lang{po_file2lang($po_file)} = $po_file;
239 sub get_local_charset
242 my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/gnome/head/INSTALL/lib/charset.alias";
244 # seek character encoding aliases in charset.alias (glib)
246 if (open CHARSET_ALIAS, $alias_file)
248 while (<CHARSET_ALIAS>)
251 return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
257 # if not found, return input string
264 my ($in_po_file) = @_;
267 open IN_PO_FILE, $in_po_file or die;
270 ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
271 if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
281 print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
282 $encoding = "ISO-8859-1";
285 system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
287 $encoding = get_local_charset($encoding);
293 sub utf8_sanity_check
297 if (!$PASS_THROUGH_ARG)
299 $PASS_THROUGH_ARG="1";
304 sub get_translation_database
307 &get_cached_translation_database;
309 &create_translation_database;
313 sub get_newest_po_age
317 foreach my $file (values %po_files_by_lang)
319 my $file_age = -M $file;
320 $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
323 $newest_age = 0 if !$newest_age;
330 print "Generating and caching the translation database\n" unless $QUIET_ARG;
332 &create_translation_database;
334 open CACHE, ">$cache_file" || die;
335 print CACHE join "\x01", %translations;
341 print "Found cached translation database\n" unless $QUIET_ARG;
344 open CACHE, "<$cache_file" || die;
350 %translations = split "\x01", $contents;
353 sub get_cached_translation_database
355 my $cache_file_age = -M $cache_file;
356 if (defined $cache_file_age)
358 if ($cache_file_age <= &get_newest_po_age)
363 print "Found too-old cached translation database\n" unless $QUIET_ARG;
369 sub create_translation_database
371 for my $lang (keys %po_files_by_lang)
373 my $po_file = $po_files_by_lang{$lang};
377 my $encoding = get_po_encoding ($po_file);
379 if (lc $encoding eq "utf-8")
381 open PO_FILE, "<$po_file";
385 print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
387 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
392 open PO_FILE, "<$po_file";
403 $nextfuzzy = 1 if /^#, fuzzy/;
405 if (/^msgid "((\\.|[^\\])*)"/ )
407 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
414 $msgid = unescape_po_string($1);
421 if (/^msgstr "((\\.|[^\\])*)"/)
423 $msgstr = unescape_po_string($1);
428 if (/^"((\\.|[^\\])*)"/)
430 $msgid .= unescape_po_string($1) if $inmsgid;
431 $msgstr .= unescape_po_string($1) if $inmsgstr;
434 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
442 sub unescape_one_sequence
446 return "\\" if $sequence eq "\\\\";
447 return "\"" if $sequence eq "\\\"";
448 return "\n" if $sequence eq "\\n";
450 # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
451 # \xXX (hex) and has a comment saying they want to handle \u and \U.
456 sub unescape_po_string
460 $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
465 ## NOTE: deal with < - < but not > - > because it seems its ok to have
466 ## > in the entity. For further info please look at #84738.
479 # entity_encode: (string)
481 # Encode the given string to XML format (encode '<' etc). It also
482 # encodes high bit if not in UTF-8 mode.
486 my ($pre_encoded) = @_;
488 my @list_of_chars = unpack ('C*', $pre_encoded);
490 if ($PASS_THROUGH_ARG)
492 return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
496 # with UTF-8 we only encode minimalistic
497 return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
501 sub entity_encode_int_minimalist
503 return """ if $_ == 34;
504 return "&" if $_ == 38;
505 return "'" if $_ == 39;
506 return "<" if $_ == 60;
510 sub entity_encode_int_even_high_bit
512 if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60)
514 # the ($_ > 127) should probably be removed
515 return "&#" . $_ . ";";
523 sub entity_encoded_translation
525 my ($lang, $string) = @_;
527 my $translation = $translations{$lang, $string};
528 return $string if !$translation;
529 return entity_encode ($translation);
532 ## XML (bonobo-activation specific) merge code
534 sub ba_merge_translations
539 local $/; # slurp mode
540 open INPUT, "<$FILE" or die "can't open $FILE: $!";
545 open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
547 while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
551 my $node = $2 . "\n";
555 while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
556 push @strings, entity_decode($3);
561 for my $string (@strings)
563 for my $lang (keys %po_files_by_lang)
565 $langs{$lang} = 1 if $translations{$lang, $string};
569 for my $lang (sort keys %langs)
572 s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
573 s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
578 print OUTPUT $source;
584 ## XML (non-bonobo-activation) merge code
586 sub xml_merge_translations
591 local $/; # slurp mode
592 open INPUT, "<$FILE" or die "can't open $FILE: $!";
597 open OUTPUT, ">$OUTFILE" or die;
599 # FIXME: support attribute translations
601 # Empty nodes never need translation, so unmark all of them.
602 # For example, <_foo/> is just replaced by <foo/>.
603 $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
605 # Support for <_foo>blah</_foo> style translations.
606 while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s)
614 print OUTPUT "$spaces<$tag>$string</$tag>\n";
616 $string =~ s/\s+/ /g;
619 $string = entity_decode($string);
621 for my $lang (sort keys %po_files_by_lang)
623 my $translation = $translations{$lang, $string};
624 next if !$translation;
625 $translation = entity_encode($translation);
626 print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
630 print OUTPUT $source;
635 sub keys_merge_translations
637 open INPUT, "<${FILE}" or die;
638 open OUTPUT, ">${OUTFILE}" or die;
642 if (s/^(\s*)_(\w+=(.*))/$1$2/)
648 my $non_translated_line = $_;
650 for my $lang (sort keys %po_files_by_lang)
652 my $translation = $translations{$lang, $string};
653 next if !$translation;
655 $_ = $non_translated_line;
656 s/(\w+)=.*/[$lang]$1=$translation/;
670 sub desktop_merge_translations
672 open INPUT, "<${FILE}" or die;
673 open OUTPUT, ">${OUTFILE}" or die;
677 if (s/^(\s*)_(\w+=(.*))/$1$2/)
683 my $non_translated_line = $_;
685 for my $lang (sort keys %po_files_by_lang)
687 my $translation = $translations{$lang, $string};
688 next if !$translation;
690 $_ = $non_translated_line;
691 s/(\w+)=.*/${1}[$lang]=$translation/;
705 sub schemas_merge_translations
710 local $/; # slurp mode
711 open INPUT, "<$FILE" or die "can't open $FILE: $!";
716 open OUTPUT, ">$OUTFILE" or die;
718 # FIXME: support attribute translations
720 # Empty nodes never need translation, so unmark all of them.
721 # For example, <_foo/> is just replaced by <foo/>.
722 $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
726 (\s+)(<locale\ name="C">(\s*)
727 (<default>\s*(.*?)\s*<\/default>)?(\s*)
728 (<short>\s*(.*?)\s*<\/short>)?(\s*)
729 (<long>\s*(.*?)\s*<\/long>)?(\s*)
735 my $locale_start_spaces = $2 ? $2 : '';
736 my $default_spaces = $4 ? $4 : '';
737 my $short_spaces = $7 ? $7 : '';
738 my $long_spaces = $10 ? $10 : '';
739 my $locale_end_spaces = $13 ? $13 : '';
740 my $c_default_block = $3 ? $3 : '';
741 my $default_string = $6 ? $6 : '';
742 my $short_string = $9 ? $9 : '';
743 my $long_string = $12 ? $12 : '';
745 $c_default_block =~ s/default>\[.*?\]/default>/s;
747 print OUTPUT "$locale_start_spaces$c_default_block";
749 $default_string =~ s/\s+/ /g;
750 $default_string = entity_decode($default_string);
751 $short_string =~ s/\s+/ /g;
752 $short_string = entity_decode($short_string);
753 $long_string =~ s/\s+/ /g;
754 $long_string = entity_decode($long_string);
756 for my $lang (sort keys %po_files_by_lang)
758 my $default_translation = $translations{$lang, $default_string};
759 my $short_translation = $translations{$lang, $short_string};
760 my $long_translation = $translations{$lang, $long_string};
762 next if (!$default_translation && !$short_translation &&
765 print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
767 print OUTPUT "$default_spaces";
769 if ($default_translation)
771 $default_translation = entity_encode($default_translation);
772 print OUTPUT "<default>$default_translation</default>";
775 print OUTPUT "$short_spaces";
777 if ($short_translation)
779 $short_translation = entity_encode($short_translation);
780 print OUTPUT "<short>$short_translation</short>";
783 print OUTPUT "$long_spaces";
785 if ($long_translation)
787 $long_translation = entity_encode($long_translation);
788 print OUTPUT "<long>$long_translation</long>";
791 print OUTPUT "$locale_end_spaces</locale>";
795 print OUTPUT $source;
800 sub rfc822deb_merge_translations
804 $Text::Wrap::huge = 'overflow';
807 local $/; # slurp mode
808 open INPUT, "<$FILE" or die "can't open $FILE: $!";
813 open OUTPUT, ">${OUTFILE}" or die;
815 while ($source =~ /(^|\n+)(_)?([^:_\n]+)(:\s*)(.*?)(?=\n[\S\n]|$)/sg)
818 my $non_translated_line = $3.$4;
820 my $is_translatable = defined($2);
821 # Remove [] dummy strings
822 $string =~ s/\[\s[^\[\]]*\]$//;
823 $non_translated_line .= $string;
825 print OUTPUT $sep.$non_translated_line;
827 if ($is_translatable)
829 my @str_list = rfc822deb_split($string);
831 for my $lang (sort keys %po_files_by_lang)
833 my $is_translated = 1;
834 my $str_translated = '';
837 for my $str (@str_list)
839 my $translation = $translations{$lang, $str};
847 # $translation may also contain [] dummy
848 # strings, mostly to indicate an empty string
849 $translation =~ s/\[\s[^\[\]]*\]$//;
854 Text::Tabs::expand($translation) .
859 $str_translated .= Text::Tabs::expand(
860 Text::Wrap::wrap(' ', ' ', $translation)) .
865 # To fix some problems with Text::Wrap::wrap
866 $str_translated =~ s/(\n )+\n/\n .\n/g;
868 next unless $is_translated;
870 $str_translated =~ s/\n \.\n$//;
871 $str_translated =~ s/\s+$//;
873 $_ = $non_translated_line;
874 s/^(\w+):\s*.*/$sep${1}-$lang: $str_translated/s;
887 # Debian defines a special way to deal with rfc822-style files:
888 # when a value contain newlines, it consists of
889 # 1. a short form (first line)
890 # 2. a long description, all lines begin with a space,
891 # and paragraphs are separated by a single dot on a line
892 # This routine returns an array of all paragraphs, and reformat
896 return ($text) if $text !~ /\n/;
898 $text =~ s/([^\n]*)\n//;
902 for my $line (split (/\n/, $text))
914 elsif ($line =~ /^\s/)
916 # Line which must not be reformatted
917 $str .= "\n" if length ($str) && $str !~ /\n$/;
922 # Continuation line, remove newline
923 $str .= " " if length ($str) && $str !~ /[\n ]$/;
929 push(@list, $str) if length ($str);