4
# The Intltool Message Merger
6
# Copyright (C) 2000, 2002 Free Software Foundation.
7
# Copyright (C) 2000, 2001 Eazel, Inc
9
# Intltool is free software; you can redistribute it and/or
10
# modify it under the terms of the GNU General Public License
11
# version 2 published by the Free Software Foundation.
13
# Intltool is distributed in the hope that it will be useful,
14
# but WITHOUT ANY WARRANTY; without even the implied warranty of
15
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
# General Public License for more details.
18
# You should have received a copy of the GNU General Public License
19
# along with this program; if not, write to the Free Software
20
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22
# As a special exception to the GNU General Public License, if you
23
# distribute this file as part of a program that contains a
24
# configuration script generated by Autoconf, you may include it under
25
# the same distribution terms that you use for the rest of that program.
27
# Authors: Maciej Stachowiak <mjs@noisehavoc.org>
28
# Kenneth Christiansen <kenneth@gnu.org>
29
# Darin Adler <darin@bentspoon.com>
31
# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
34
## Release information
35
my $PROGRAM = "intltool-merge";
36
my $PACKAGE = "intltool";
44
## Scalars used by the option stuff
48
my $XML_STYLE_ARG = 0;
49
my $KEYS_STYLE_ARG = 0;
50
my $DESKTOP_STYLE_ARG = 0;
51
my $SCHEMAS_STYLE_ARG = 0;
52
my $RFC822DEB_STYLE_ARG = 0;
54
my $PASS_THROUGH_ARG = 0;
62
"version" => \$VERSION_ARG,
63
"quiet|q" => \$QUIET_ARG,
64
"oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
65
"ba-style|b" => \$BA_STYLE_ARG,
66
"xml-style|x" => \$XML_STYLE_ARG,
67
"keys-style|k" => \$KEYS_STYLE_ARG,
68
"desktop-style|d" => \$DESKTOP_STYLE_ARG,
69
"schemas-style|s" => \$SCHEMAS_STYLE_ARG,
70
"rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
71
"pass-through|p" => \$PASS_THROUGH_ARG,
72
"utf8|u" => \$UTF8_ARG,
73
"cache|c=s" => \$cache_file
80
my %po_files_by_lang = ();
81
my %translations = ();
83
# Use this instead of \w for XML files to handle more possible characters.
84
my $w = "[-A-Za-z0-9._:]";
86
# XML quoted string contents
99
elsif ($BA_STYLE_ARG && @ARGV > 2)
103
&ba_merge_translations;
106
elsif ($XML_STYLE_ARG && @ARGV > 2)
111
&xml_merge_translations;
114
elsif ($KEYS_STYLE_ARG && @ARGV > 2)
119
&keys_merge_translations;
122
elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
126
&desktop_merge_translations;
129
elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
133
&schemas_merge_translations;
136
elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
140
&rfc822deb_merge_translations;
150
## Sub for printing release information
153
print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
154
print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
155
print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
156
print "Copyright (C) 2000-2001 Eazel, Inc.\n";
157
print "This is free software; see the source for copying conditions. There is NO\n";
158
print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
162
## Sub for printing usage information
165
print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
166
print "Generates an output file that includes translated versions of some attributes,\n";
167
print "from an untranslated source and a po directory that includes translations.\n\n";
168
print " -b, --ba-style includes translations in the bonobo-activation style\n";
169
print " -d, --desktop-style includes translations in the desktop style\n";
170
print " -k, --keys-style includes translations in the keys style\n";
171
print " -s, --schemas-style includes translations in the schemas style\n";
172
print " -r, --rfc822deb-style includes translations in the RFC822 style\n";
173
print " -x, --xml-style includes translations in the standard xml style\n";
174
print " -u, --utf8 convert all strings to UTF-8 before merging\n";
175
print " -p, --pass-through use strings as found in .po files, without\n";
176
print " conversion (STRONGLY unrecommended with -x)\n";
177
print " -q, --quiet suppress most messages\n";
178
print " --help display this help and exit\n";
179
print " --version output version information and exit\n";
180
print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
181
print "<xml-i18n-tools-list\@gnome.org>.\n";
186
## Sub for printing error messages
189
print "Try `${PROGRAM} --help' for more information.\n";
196
print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
207
&get_translation_database;
210
# General-purpose code for looking up translations in .po files
215
$tmp =~ s/^.*\/(.*)\.po$/$1/;
221
for my $po_file (glob "$PO_DIR/*.po") {
222
$po_files_by_lang{po_file2lang($po_file)} = $po_file;
226
sub get_local_charset
229
my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias";
231
# seek character encoding aliases in charset.alias (glib)
233
if (open CHARSET_ALIAS, $alias_file)
235
while (<CHARSET_ALIAS>)
238
return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
244
# if not found, return input string
251
my ($in_po_file) = @_;
254
open IN_PO_FILE, $in_po_file or die;
257
## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
258
if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
268
print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
269
$encoding = "ISO-8859-1";
272
$encoding = get_local_charset($encoding);
277
sub utf8_sanity_check
281
if (!$PASS_THROUGH_ARG)
283
$PASS_THROUGH_ARG="1";
288
sub get_translation_database
291
&get_cached_translation_database;
293
&create_translation_database;
297
sub get_newest_po_age
301
foreach my $file (values %po_files_by_lang)
303
my $file_age = -M $file;
304
$newest_age = $file_age if !$newest_age || $file_age < $newest_age;
312
print "Generating and caching the translation database\n" unless $QUIET_ARG;
314
&create_translation_database;
316
open CACHE, ">$cache_file" || die;
317
print CACHE join "\x01", %translations;
323
print "Found cached translation database\n" unless $QUIET_ARG;
326
open CACHE, "<$cache_file" || die;
332
%translations = split "\x01", $contents;
335
sub get_cached_translation_database
337
my $cache_file_age = -M $cache_file;
338
if (defined $cache_file_age)
340
if ($cache_file_age <= &get_newest_po_age)
345
print "Found too-old cached translation database\n" unless $QUIET_ARG;
351
sub create_translation_database
353
for my $lang (keys %po_files_by_lang)
355
my $po_file = $po_files_by_lang{$lang};
359
my $encoding = get_po_encoding ($po_file);
361
if (lc $encoding eq "utf-8")
363
open PO_FILE, "<$po_file";
367
my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
368
open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
373
open PO_FILE, "<$po_file";
384
$nextfuzzy = 1 if /^#, fuzzy/;
386
if (/^msgid "((\\.|[^\\])*)"/ )
388
$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
395
$msgid = unescape_po_string($1);
402
if (/^msgstr "((\\.|[^\\])*)"/)
404
$msgstr = unescape_po_string($1);
409
if (/^"((\\.|[^\\])*)"/)
411
$msgid .= unescape_po_string($1) if $inmsgid;
412
$msgstr .= unescape_po_string($1) if $inmsgstr;
415
$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
423
sub unescape_one_sequence
427
return "\\" if $sequence eq "\\\\";
428
return "\"" if $sequence eq "\\\"";
429
return "\n" if $sequence eq "\\n";
431
# gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
432
# \xXX (hex) and has a comment saying they want to handle \u and \U.
437
sub unescape_po_string
441
$string =~ s/(\\.)/unescape_one_sequence($1)/eg;
446
## NOTE: deal with < - < but not > - > because it seems its ok to have
447
## > in the entity. For further info please look at #84738.
462
my ($pre_encoded) = @_;
464
my @list_of_chars = unpack ('C*', $pre_encoded);
466
if ($PASS_THROUGH_ARG)
468
return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
472
return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
476
sub entity_encode_int_minimalist
478
return """ if $_ == 34;
479
return "&" if $_ == 38;
480
return "'" if $_ == 39;
481
return "<" if $_ == 60;
485
sub entity_encode_int_even_high_bit
487
if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60)
489
# the ($_ > 127) should probably be removed
490
return "&#" . $_ . ";";
498
sub entity_encoded_translation
500
my ($lang, $string) = @_;
502
my $translation = $translations{$lang, $string};
503
return $string if !$translation;
504
return entity_encode ($translation);
507
## XML (bonobo-activation specific) merge code
509
sub ba_merge_translations
514
local $/; # slurp mode
515
open INPUT, "<$FILE" or die "can't open $FILE: $!";
520
open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
522
while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
526
my $node = $2 . "\n";
530
while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
531
push @strings, entity_decode($3);
536
for my $string (@strings)
538
for my $lang (keys %po_files_by_lang)
540
$langs{$lang} = 1 if $translations{$lang, $string};
544
for my $lang (sort keys %langs)
547
s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
548
s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
553
print OUTPUT $source;
559
## XML (non-bonobo-activation) merge code
561
sub xml_merge_translations
566
local $/; # slurp mode
567
open INPUT, "<$FILE" or die "can't open $FILE: $!";
572
open OUTPUT, ">$OUTFILE" or die;
574
# FIXME: support attribute translations
576
# Empty nodes never need translation, so unmark all of them.
577
# For example, <_foo/> is just replaced by <foo/>.
578
$source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
580
# Support for <_foo>blah</_foo> style translations.
581
while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s)
589
print OUTPUT "$spaces<$tag>$string</$tag>\n";
591
$string =~ s/\s+/ /g;
594
$string = entity_decode($string);
596
for my $lang (sort keys %po_files_by_lang)
598
my $translation = $translations{$lang, $string};
599
next if !$translation;
600
$translation = entity_encode($translation);
601
print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
605
print OUTPUT $source;
610
sub keys_merge_translations
612
open INPUT, "<${FILE}" or die;
613
open OUTPUT, ">${OUTFILE}" or die;
617
if (s/^(\s*)_(\w+=(.*))/$1$2/)
623
my $non_translated_line = $_;
625
for my $lang (sort keys %po_files_by_lang)
627
my $translation = $translations{$lang, $string};
628
next if !$translation;
630
$_ = $non_translated_line;
631
s/(\w+)=.*/[$lang]$1=$translation/;
645
sub desktop_merge_translations
647
open INPUT, "<${FILE}" or die;
648
open OUTPUT, ">${OUTFILE}" or die;
652
if (s/^(\s*)_(\w+=(.*))/$1$2/)
658
my $non_translated_line = $_;
660
for my $lang (sort keys %po_files_by_lang)
662
my $translation = $translations{$lang, $string};
663
next if !$translation;
665
$_ = $non_translated_line;
666
s/(\w+)=.*/${1}[$lang]=$translation/;
680
sub schemas_merge_translations
685
local $/; # slurp mode
686
open INPUT, "<$FILE" or die "can't open $FILE: $!";
691
open OUTPUT, ">$OUTFILE" or die;
693
# FIXME: support attribute translations
695
# Empty nodes never need translation, so unmark all of them.
696
# For example, <_foo/> is just replaced by <foo/>.
697
$source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
701
(\s+)(<locale\ name="C">(\s*)
702
(<default>\s*(.*?)\s*<\/default>)?(\s*)
703
(<short>\s*(.*?)\s*<\/short>)?(\s*)
704
(<long>\s*(.*?)\s*<\/long>)?(\s*)
710
my $locale_start_spaces = $2 ? $2 : '';
711
my $default_spaces = $4 ? $4 : '';
712
my $short_spaces = $7 ? $7 : '';
713
my $long_spaces = $10 ? $10 : '';
714
my $locale_end_spaces = $13 ? $13 : '';
715
my $c_default_block = $3 ? $3 : '';
716
my $default_string = $6 ? $6 : '';
717
my $short_string = $9 ? $9 : '';
718
my $long_string = $12 ? $12 : '';
720
$c_default_block =~ s/default>\[.*?\]/default>/s;
722
print OUTPUT "$locale_start_spaces$c_default_block";
724
$default_string =~ s/\s+/ /g;
725
$default_string = entity_decode($default_string);
726
$short_string =~ s/\s+/ /g;
727
$short_string = entity_decode($short_string);
728
$long_string =~ s/\s+/ /g;
729
$long_string = entity_decode($long_string);
731
for my $lang (sort keys %po_files_by_lang)
733
my $default_translation = $translations{$lang, $default_string};
734
my $short_translation = $translations{$lang, $short_string};
735
my $long_translation = $translations{$lang, $long_string};
737
next if (!$default_translation && !$short_translation &&
740
print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
742
print OUTPUT "$default_spaces";
744
if ($default_translation)
746
$default_translation = entity_encode($default_translation);
747
print OUTPUT "<default>$default_translation</default>";
750
print OUTPUT "$short_spaces";
752
if ($short_translation)
754
$short_translation = entity_encode($short_translation);
755
print OUTPUT "<short>$short_translation</short>";
758
print OUTPUT "$long_spaces";
760
if ($long_translation)
762
$long_translation = entity_encode($long_translation);
763
print OUTPUT "<long>$long_translation</long>";
766
print OUTPUT "$locale_end_spaces</locale>";
770
print OUTPUT $source;
775
sub rfc822deb_merge_translations
779
$Text::Wrap::huge = 'overflow';
782
local $/; # slurp mode
783
open INPUT, "<$FILE" or die "can't open $FILE: $!";
788
open OUTPUT, ">${OUTFILE}" or die;
790
while ($source =~ /(^|\n+)(_)?([^:_\n]+)(:\s*)(.*?)(?=\n[\S\n]|$)/sg)
793
my $non_translated_line = $3.$4;
795
my $is_translatable = defined($2);
796
# Remove [] dummy strings
797
$string =~ s/\[\s[^\[\]]*\]$//;
798
$non_translated_line .= $string;
800
print OUTPUT $sep.$non_translated_line;
802
if ($is_translatable)
804
my @str_list = rfc822deb_split($string);
806
for my $lang (sort keys %po_files_by_lang)
808
my $is_translated = 1;
809
my $str_translated = '';
812
for my $str (@str_list)
814
my $translation = $translations{$lang, $str};
822
# $translation may also contain [] dummy
823
# strings, mostly to indicate an empty string
824
$translation =~ s/\[\s[^\[\]]*\]$//;
829
Text::Tabs::expand($translation) .
834
$str_translated .= Text::Tabs::expand(
835
Text::Wrap::wrap(' ', ' ', $translation)) .
840
# To fix some problems with Text::Wrap::wrap
841
$str_translated =~ s/(\n )+\n/\n .\n/g;
843
next unless $is_translated;
845
$str_translated =~ s/\n \.\n$//;
846
$str_translated =~ s/\s+$//;
848
$_ = $non_translated_line;
849
s/^(\w+):\s*.*/$sep${1}-$lang: $str_translated/s;
862
# Debian defines a special way to deal with rfc822-style files:
863
# when a value contain newlines, it consists of
864
# 1. a short form (first line)
865
# 2. a long description, all lines begin with a space,
866
# and paragraphs are separated by a single dot on a line
867
# This routine returns an array of all paragraphs, and reformat
871
return ($text) if $text !~ /\n/;
873
$text =~ s/([^\n]*)\n//;
877
for my $line (split (/\n/, $text))
889
elsif ($line =~ /^\s/)
891
# Line which must not be reformatted
892
$str .= "\n" if length ($str) && $str !~ /\n$/;
897
# Continuation line, remove newline
898
$str .= " " if length ($str) && $str !~ /[\n ]$/;
904
push(@list, $str) if length ($str);