4
# The Intltool Message Merger
6
# Copyright (C) 2000, 2002 Free Software Foundation.
7
# Copyright (C) 2000, 2001 Eazel, Inc
9
# Intltool is free software; you can redistribute it and/or
10
# modify it under the terms of the GNU General Public License
11
# version 2 published by the Free Software Foundation.
13
# Intltool is distributed in the hope that it will be useful,
14
# but WITHOUT ANY WARRANTY; without even the implied warranty of
15
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
# General Public License for more details.
18
# You should have received a copy of the GNU General Public License
19
# along with this program; if not, write to the Free Software
20
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22
# As a special exception to the GNU General Public License, if you
23
# distribute this file as part of a program that contains a
24
# configuration script generated by Autoconf, you may include it under
25
# the same distribution terms that you use for the rest of that program.
27
# Authors: Maciej Stachowiak <mjs@noisehavoc.org>
28
# Kenneth Christiansen <kenneth@gnu.org>
29
# Darin Adler <darin@bentspoon.com>
31
# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
34
## Release information
35
my $PROGRAM = "intltool-merge";
36
my $PACKAGE = "@PACKAGE@";
37
my $VERSION = "@VERSION@";
43
## Scalars used by the option stuff
47
my $XML_STYLE_ARG = 0;
48
my $KEYS_STYLE_ARG = 0;
49
my $DESKTOP_STYLE_ARG = 0;
51
my $PASS_THROUGH_ARG = 0;
59
"version" => \$VERSION_ARG,
60
"quiet|q" => \$QUIET_ARG,
61
"oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
62
"ba-style|b" => \$BA_STYLE_ARG,
63
"xml-style|x" => \$XML_STYLE_ARG,
64
"keys-style|k" => \$KEYS_STYLE_ARG,
65
"desktop-style|d" => \$DESKTOP_STYLE_ARG,
66
"pass-through|p" => \$PASS_THROUGH_ARG,
67
"utf8|u" => \$UTF8_ARG,
68
"cache|c=s" => \$cache_file
75
my %po_files_by_lang = ();
76
my %translations = ();
78
# Use this instead of \w for XML files to handle more possible characters.
79
my $w = "[-A-Za-z0-9._:]";
81
# XML quoted string contents
90
} elsif ($BA_STYLE_ARG && @ARGV > 2) {
93
&ba_merge_translations;
95
} elsif ($XML_STYLE_ARG && @ARGV > 2) {
99
&xml_merge_translations;
101
} elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
105
&keys_merge_translations;
107
} elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
110
&desktop_merge_translations;
118
## Sub for printing release information
121
print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
122
print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
123
print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
124
print "Copyright (C) 2000-2001 Eazel, Inc.\n";
125
print "This is free software; see the source for copying conditions. There is NO\n";
126
print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
130
## Sub for printing usage information
133
print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
134
print "Generates an output file that includes translated versions of some attributes,\n";
135
print "from an untranslated source and a po directory that includes translations.\n\n";
136
print " -b, --ba-style includes translations in the bonobo-activation style\n";
137
print " -d, --desktop-style includes translations in the desktop style\n";
138
print " -k, --keys-style includes translations in the keys style\n";
139
print " -x, --xml-style includes translations in the standard xml style\n";
140
print " -u, --utf8 convert all strings to UTF-8 before merging\n";
141
print " -p, --pass-through use strings as found in .po files, without\n";
142
print " conversion (STRONGLY unrecommended with -x)\n";
143
print " -q, --quiet suppress most messages\n";
144
print " --help display this help and exit\n";
145
print " --version output version information and exit\n";
146
print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
147
print "<xml-i18n-tools-list\@gnome.org>.\n";
152
## Sub for printing error messages
155
print "Try `${PROGRAM} --help' for more information.\n";
162
print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
173
&get_translation_database;
176
# General-purpose code for looking up translations in .po files
181
$tmp =~ s/^.*\/(.*)\.po$/$1/;
187
for my $po_file (glob "$PO_DIR/*.po") {
188
$po_files_by_lang{po_file2lang($po_file)} = $po_file;
194
my ($in_po_file) = @_;
197
open IN_PO_FILE, $in_po_file or die;
198
while (<IN_PO_FILE>) {
199
## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
200
if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
208
print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
209
$encoding = "ISO-8859-1";
214
sub utf8_sanity_check
217
if (!$PASS_THROUGH_ARG) {
218
$PASS_THROUGH_ARG="1";
223
sub get_translation_database
226
&get_cached_translation_database;
228
&create_translation_database;
232
sub get_newest_po_age
236
foreach my $file (values %po_files_by_lang) {
237
my $file_age = -M $file;
238
$newest_age = $file_age if !$newest_age || $file_age < $newest_age;
246
print "Generating and caching the translation database\n" unless $QUIET_ARG;
248
&create_translation_database;
250
open CACHE, ">$cache_file" || die;
251
print CACHE join "\x01", %translations;
257
print "Found cached translation database\n" unless $QUIET_ARG;
260
open CACHE, "<$cache_file" || die;
266
%translations = split "\x01", $contents;
269
sub get_cached_translation_database
271
my $cache_file_age = -M $cache_file;
272
if (defined $cache_file_age) {
273
if ($cache_file_age <= &get_newest_po_age) {
277
print "Found too-old cached translation database\n" unless $QUIET_ARG;
283
sub create_translation_database
285
for my $lang (keys %po_files_by_lang) {
286
my $po_file = $po_files_by_lang{$lang};
289
my $encoding = get_po_encoding ($po_file);
290
if (lc $encoding eq "utf-8") {
291
open PO_FILE, "<$po_file";
293
my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
294
open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
297
open PO_FILE, "<$po_file";
306
$nextfuzzy = 1 if /^#, fuzzy/;
307
if (/^msgid "((\\.|[^\\])*)"/ ) {
308
$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
315
$msgid = unescape_po_string($1);
321
if (/^msgstr "((\\.|[^\\])*)"/) {
322
$msgstr = unescape_po_string($1);
326
if (/^"((\\.|[^\\])*)"/) {
327
$msgid .= unescape_po_string($1) if $inmsgid;
328
$msgstr .= unescape_po_string($1) if $inmsgstr;
331
$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
339
sub unescape_one_sequence
343
return "\\" if $sequence eq "\\\\";
344
return "\"" if $sequence eq "\\\"";
346
# gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
347
# \xXX (hex) and has a comment saying they want to handle \u and \U.
352
sub unescape_po_string
356
$string =~ s/(\\.)/unescape_one_sequence($1)/eg;
374
my ($pre_encoded) = @_;
376
my @list_of_chars = unpack ('C*', $pre_encoded);
378
if ($PASS_THROUGH_ARG) {
379
return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
381
return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
385
sub entity_encode_int_minimalist
387
return """ if $_ == 34;
388
return "&" if $_ == 38;
389
return "'" if $_ == 39;
393
sub entity_encode_int_even_high_bit
395
if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39) {
396
# the ($_ > 127) should probably be removed
397
return "&#" . $_ . ";";
403
sub entity_encoded_translation
405
my ($lang, $string) = @_;
407
my $translation = $translations{$lang, $string};
408
return $string if !$translation;
409
return entity_encode ($translation);
412
## XML (bonobo-activation specific) merge code
414
sub ba_merge_translations
419
local $/; # slurp mode
420
open INPUT, "<$FILE" or die "can't open $FILE: $!";
425
open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
427
while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) {
430
my $node = $2 . "\n";
434
while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
435
push @strings, entity_decode($3);
440
for my $string (@strings) {
441
for my $lang (keys %po_files_by_lang) {
442
$langs{$lang} = 1 if $translations{$lang, $string};
446
for my $lang (sort keys %langs) {
448
s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
449
s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
454
print OUTPUT $source;
460
## XML (non-bonobo-activation) merge code
462
sub xml_merge_translations
467
local $/; # slurp mode
468
open INPUT, "<$FILE" or die "can't open $FILE: $!";
473
open OUTPUT, ">$OUTFILE" or die;
475
# FIXME: support attribute translations
477
# Empty nodes never need translation, so unmark all of them.
478
# For example, <_foo/> is just replaced by <foo/>.
479
$source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
481
# Support for <_foo>blah</_foo> style translations.
482
while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) {
489
print OUTPUT "$spaces<$tag>$string</$tag>\n";
491
$string =~ s/\s+/ /g;
494
$string = entity_decode($string);
496
for my $lang (sort keys %po_files_by_lang) {
497
my $translation = $translations{$lang, $string};
498
next if !$translation;
499
$translation = entity_encode($translation);
500
print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
504
print OUTPUT $source;
509
sub keys_merge_translations
511
open INPUT, "<${FILE}" or die;
512
open OUTPUT, ">${OUTFILE}" or die;
515
if (s/^(\s*)_(\w+=(.*))/$1$2/) {
520
my $non_translated_line = $_;
522
for my $lang (sort keys %po_files_by_lang) {
523
my $translation = $translations{$lang, $string};
524
next if !$translation;
526
$_ = $non_translated_line;
527
s/(\w+)=.*/[$lang]$1=$translation/;
539
sub desktop_merge_translations
541
open INPUT, "<${FILE}" or die;
542
open OUTPUT, ">${OUTFILE}" or die;
545
if (s/^(\s*)_(\w+=(.*))/$1$2/) {
550
my $non_translated_line = $_;
552
for my $lang (sort keys %po_files_by_lang) {
553
my $translation = $translations{$lang, $string};
554
next if !$translation;
556
$_ = $non_translated_line;
557
s/(\w+)=.*/${1}[$lang]=$translation/;