3
# this is part of langpack-o-matic, by Martin Pitt <martin.pitt@canonical.com>
5
# (C) 2005, 2011 Canonical Ltd.
7
# Update all language-pack-%PKGNAME% source packages (they must be in the state of
8
# the previous upload). If a locale does not yet have a -base package, it gets
11
# After this script finishes successfully, there will be a file
12
# 'updated-packages' which contains the source package directory names of all
13
# packages that were updated.
15
# Usage: import <archive> <distro-release> <target dir>
27
DEFAULT_MIRROR = 'http://archive.ubuntu.com/ubuntu'
29
# change working directory to the directory of this script
30
os.chdir(os.path.dirname(sys.argv[0]))
32
# import our own libraries
33
sys.path.append('lib')
38
import static_translations
42
locinfo = localeinfo.SupportedLocales()
43
macros_map = {} # class -> locale -> LangpackMacros
45
locale_count = {} # locale -> #translated strings
46
pot_count = {} # domain -> #translatable strings
47
pot_priority = {} # domain -> priority
53
'''Parse command line options.
55
Return (options, args) pair.
57
optparser = optparse.OptionParser('%prog <translation tarball> <distro release> <target dir>')
58
optparser.add_option('--mirror', default=DEFAULT_MIRROR,
59
metavar='URL', help='Archive mirror URL')
60
optparser.add_option('-s', '--no-static', action='store_false',
61
dest='static', default=True,
62
help='Disable inclusion of static translations (GNOME help)')
63
optparser.add_option('-u', '--update', action='store_true',
64
help='tarball only has updated translations, update '
65
'existing packages (will not create new packages)')
66
optparser.add_option('--no-classes', action='store_false',
67
dest='classes', default=True,
68
help='Disable splitting by classes (GNOME/KDE/common)')
69
optparser.add_option('--class', dest='custom_class',
70
help='build custom class from a package list (needs --pkglist too)')
71
optparser.add_option('-t', '--treshold', type='int', metavar='PERCENT',
72
help='Only build language packs that cover at least '
73
'the given percentage of all translatable strings '
74
'(ignored when updating already existing packages)')
75
optparser.add_option('-p', '--min-priority', type='int', metavar='PRIORITY',
76
help='Only include domains with at least given priority')
77
optparser.add_option('--pkglist',
78
help='file with source package names for --class')
79
optparser.add_option('--distribution', default='ubuntu',
80
metavar='NAME', help='Distribution name (default: ubuntu)')
81
optparser.add_option('-v', '--verbose', action='store_true', default=False,
82
help='Verbose logging')
84
(opts, args) = optparser.parse_args()
87
optparser.error('incorrect number of arguments; use --help for a short help')
89
if opts.custom_class and not opts.pkglist:
90
optparser.error('need to specify --pkglist with --class')
93
logging.basicConfig(level=logging.DEBUG, format='%(levelname)s: %(message)s')
95
logging.basicConfig(level=logging.WARNING, format='%(levelname)s: %(message)s')
100
def get_custom_class_domains(map_file, packages):
101
'''Return set of domains corresponding to packages list'''
103
# read domain map.txt
105
with open(map_file) as f:
110
pkg_domains.setdefault(f[0], []).append(f[1])
113
with open(packages) as f:
118
if pkg in pkg_domains:
119
domains.update(pkg_domains[pkg])
121
logging.debug('package %s is in --pkglist, but not in mapping.txt', pkg)
125
def get_current_macros(cls, locale, version):
126
'''Return a LangpackMacros object for the given class and locale.
128
The LangpackMacros objects are cached for performace reasons.'''
130
loc_map = macros_map.setdefault(cls, {})
131
if locale not in loc_map:
132
loc_map[locale] = macros.LangpackMacros(distribution, locale, cls, release, version)
133
return loc_map[locale]
136
def package_updated(pkg):
137
'''Check if the given package has already been updated (i. e. it appears in
138
updated-packages).'''
140
if not os.path.isfile('updated-packages'):
142
for p in open('updated-packages'):
143
if p.strip() == pkg.strip():
148
def write_po(locale, domain, pkgdir, contents):
149
'''Write file contents to pkgdir/data/locale/LC_MESSAGES/domain.po.'''
151
logging.debug('Copying %s/%s into package %s', locale, domain, pkgdir)
153
os.makedirs(pkgdir + '/data/' + locale + '/LC_MESSAGES')
156
dest = '%s/data/%s/LC_MESSAGES/%s.po' % (pkgdir, locale, domain)
157
if locale.startswith('en_'):
158
# many languages legitimagely have identical strings, such as fr
159
# or pt_BR using the same string as English, but pt does not. So only
160
# used msgequal for English.
161
msgequal = subprocess.Popen(['bin/msgequal', '-', dest],
162
stdin=subprocess.PIPE)
163
msgequal.communicate(contents)
164
assert msgequal.returncode == 0
171
def read_po(locale, domain, pkgdir):
172
'''Read file contents from pkgdir/data/locale/domain.po.
174
Return None if the file does not exist. Strips off surrounding white
178
return open('%s/data/%s/LC_MESSAGES/%s.po' % (pkgdir, locale, domain)).read().strip()
183
def normalize_po(contents):
184
'''Return PO contents in a canonical format suitable for comparison.
186
Return (normalized, num_strings).
191
msgfmt = subprocess.Popen(['/usr/bin/msgfmt', '--statistics', '-o', '-', '-'],
192
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
193
stderr=subprocess.PIPE)
195
(out, err) = msgfmt.communicate(contents)
197
logging.warning('msgfmt failed with OSError: %s, not normalizing', str(e))
199
if msgfmt.returncode:
200
logging.warning('msgfmt failed with code %i, not normalizing', msgfmt.returncode)
203
num_strings = int(err.split()[0])
205
msgunfmt = subprocess.Popen(['/usr/bin/msgunfmt', '-'],
206
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
207
stderr=subprocess.PIPE)
208
if msgunfmt.returncode:
209
raise Exception('msgunfmt returned with exit code ' + str(msgunfmt.returncode))
210
(out, err) = msgunfmt.communicate(out)
212
# remove X-Launchpad-Export-Date:
213
export_date = out.find('\n"X-Launchpad-Export-Date: ')
215
out = out[:export_date] + out[out.index('\n', export_date + 1):]
216
return (out, num_strings)
219
def install_po_auto_class(locale, domain, contents, data_version,
220
include_static, update_mode):
221
'''Install translation file for automatically classified packages (default).
223
locale: Target locale
224
domain: Translation domain
225
data_version: version number of the PO export
226
contents: The actual translation data (PO file contents)
227
update_mode: delta tarball mode, do not build new packages
229
There is a magic domain None which just causes the class-less base package
230
to be created, but nothing installed into it. This is important since the
231
common package must always exist. If e. g. just -gnome exist, its
232
dependencies are unsatisfyable, and it's missing the extra tarball and
233
locales. (LP #422760, #335307)
236
if domain and classifier:
237
cls = classifier.classify_domain(domain)
241
logging.warning('unknown translation domain: %s', domain)
243
logging.debug('The domain is classified as %s', cls)
245
if cls == 'kde' and release_version >= '12.10':
246
logging.debug('Skipping KDE language pack for release %s', release)
249
macr = get_current_macros(cls, locale, data_version)
251
# workaround for Rosetta exported files without translations
253
(ncontents, num_strings) = normalize_po(contents)
254
if not ncontents or not num_strings:
256
# update #translated strings
257
locale_count[locale] = locale_count.get(locale, 0) + num_strings
259
update_pkg = macr.subst_string(target_dir + '/sources-update/language-pack-%PKGNAME%')
260
base_pkg = macr.subst_string(target_dir + '/sources-base/language-pack-%PKGNAME%-base')
262
# if base package does not exist, create it
263
if not os.path.isdir(base_pkg):
265
logging.debug('Skipping %s/%s in update mode, %s does not already exist',
266
locale, domain, base_pkg)
269
# determine name of tarball with extra files
270
extra_tar = 'extra-files/%s-%s.tar' % (cls, macr['PKGCODE'])
271
if not os.path.isfile(extra_tar):
272
extra_tar = extra_tar + '.gz'
273
if not os.path.isfile(extra_tar):
276
# add locales to extra tarball of base package
279
logging.debug('Creating locale tarball')
280
locale_tar = locinfo.create_locale_tar(macr['PKGCODE'])
281
if extra_tar is not None:
282
raise Exception('Not yet implemented: tarball merging (locale+extra.tar)')
284
extra_tar = locale_tar
286
makepkg.make_pkg('skel-base', base_pkg, macr, extra_tar)
288
if locale_tar is not None:
289
os.unlink(locale_tar)
291
# add static translations
293
if not os.path.isdir(static_tar_dir):
294
logging.debug('Downloading and preparing static translations...')
295
# lazily download static tars, so that we don't have to do it when
296
# building updates only
297
os.mkdir(static_tar_dir)
298
if distribution == 'ubuntu':
299
# we don't always rebuild everything in a release, so
300
# search for tarballs in previous releases too
301
tarballs = static_translations.get_static_translation_tarballs('ubuntu', 'trusty', release)
303
tarballs = static_translations.get_static_translation_tarballs(distribution, release, release)
304
static_translations.create_static_tarballs(tarballs, static_tar_dir)
306
static_tar = os.path.join(static_tar_dir, macr['PKGNAME'] + '.tar')
307
if os.path.exists(static_tar):
308
logging.debug('Adding static tarball %s', static_tar)
309
shutil.move(static_tar, os.path.join(base_pkg, 'data', 'static.tar'))
311
# sanity check: we just created -base, so the update package should not
313
if os.path.isdir(update_pkg):
314
raise Exception('Inconsistency: just created fresh base, but update package already exists')
316
# determine %BASEVERDEP% macro (needs to be postponed until here, since we
317
# know where the -base package is, and which version it has, and it is
319
if 'BASEVERDEP' not in macr:
320
macr['BASEVERDEP'] = ' (>= %s)' % makepkg.get_pkg_version(base_pkg)
322
# Create an empty update package
323
makepkg.make_pkg('skel-update', update_pkg, macr)
325
# determine %BASEVERDEP% macro (needs to be postponed until here, since we
326
# know where the -base package is, and which version it has, and is not
328
if 'BASEVERDEP' not in macr:
329
macr['BASEVERDEP'] = ' (>= %s)' % makepkg.get_pkg_version(base_pkg)
334
# ensure that we always have the common package
336
common_base_pkg = macr.subst_string(target_dir + '/sources-base/language-pack-%PKGCODE%-base')
337
if not os.path.isdir(common_base_pkg) and not update_mode:
338
logging.debug('Creating common package for %s', macr['PKGCODE'])
339
install_po_auto_class(locale, None, None, data_version, options.static, False)
341
# prefer to change the base package if we already changed it
342
if package_updated(base_pkg):
343
write_po(locale, domain, base_pkg, contents)
345
if ncontents != normalize_po(read_po(locale, domain, base_pkg))[0] and \
346
ncontents != normalize_po(read_po(locale, domain, update_pkg))[0]:
347
if not package_updated(update_pkg):
348
# if we have an extra tarball, do not install it if the same
349
# version is already in the base package
350
# XXX: deactivated for now, it costs lots of time, does not
351
# respect locales, and is useless ATM
353
# base_extra_tar = os.path.join(base_pkg, 'data',
354
# os.path.basename(extra_tar))
355
# if os.path.isfile(base_extra_tar) and \
356
# filecmp(extra_tar, base_extra_tar):
359
makepkg.make_pkg('skel-update', update_pkg, macr)
361
write_po(locale, domain, update_pkg, contents)
364
def install_po_custom_class(locale, domain, contents, data_version, update_mode):
365
'''Install translation file for custom class
367
locale: Target locale
368
domain: Translation domain
369
contents: The actual translation data (PO file contents)
370
data_version: version number of the PO export
371
update_mode: delta tarball mode, do not build new packages
373
macr = get_current_macros(options.custom_class, locale, data_version)
374
pkg = macr.subst_string(target_dir + '/sources-%CLASS%/language-pack-%PKGNAME%')
375
if not os.path.isdir(pkg):
377
logging.debug('Skipping %s/%s in update mode, %s does not already exist',
381
makepkg.make_pkg('skel-customclass', pkg, macr)
383
if not package_updated(pkg):
384
makepkg.make_pkg('skel-customclass', pkg, macr)
386
write_po(locale, domain, pkg, contents)
388
# update #translated strings
389
msgfmt = subprocess.Popen(['msgfmt', '--statistics', '-o', '/dev/null', '-'],
390
stdin=subprocess.PIPE, stderr=subprocess.PIPE,
391
universal_newlines=True, env={})
392
out = msgfmt.communicate(contents)[1]
393
locale_count[locale] = locale_count.get(locale, 0) + int(out.split()[0])
396
def get_translatable_counts(release):
397
'''Get number of translatable strings per language'''
399
global pot_count, pot_priority
402
os.environ.get('TRANSLATION_STATS_URL', 'http://people.canonical.com/~people-l10n/data/ubuntu-l10n/'),
403
'%s_%s_potemplate-stats.json' % (distribution, release.split('-')[0]))
405
f = urllib.urlopen(url)
410
if i['enabled'] and i['languagepack']:
411
pot_count[i['translation_domain']] = i['total']
412
pot_priority[i['translation_domain']] = i['priority']
415
def discard_languages(langs, custom_class=None):
416
'''Remove built packages for given languages
418
When custom_class is given, only remove packages for that.
424
with open('updated-packages') as f:
426
packages.append(pkg.strip())
427
for pkg in list(packages):
428
if custom_class and 'sources-%s/' % custom_class not in pkg:
431
if pkg.endswith('-' + l) or '-%s-' % l in pkg:
432
logging.debug('Discarding %s', pkg)
436
with open('updated-packages.cleaned', 'w') as f:
439
os.rename('updated-packages.cleaned', 'updated-packages')
446
options, args = parse_argv()
447
(archive_fname, release, target_dir) = args
448
distribution = options.distribution
450
if not os.path.isdir(target_dir):
451
sys.stderr.write('Target directory does not exist\n')
454
release_version = get_current_macros('', 'en_GB.UTF-8', 'invalid').subst_string('%RELEASEVERSION%')
455
assert release_version, 'no release version for ' + release
457
# unpack translation tarball
458
contentdirbase = tempfile.mkdtemp()
460
# extract tarball to a temporary dir
461
if archive_fname[-4:] == '.tar':
462
result = os.spawnlp(os.P_WAIT, 'tar', 'tar', '-C', contentdirbase, '-xf',
463
os.path.abspath(archive_fname))
464
elif archive_fname[-7:] == '.tar.gz':
465
result = os.spawnlp(os.P_WAIT, 'tar', 'tar', '-C', contentdirbase, '-xzf',
466
os.path.abspath(archive_fname))
467
elif archive_fname[-8:] == '.tar.bz2':
468
result = os.spawnlp(os.P_WAIT, 'tar', 'tar', '-C', contentdirbase, '--bzip2 -xf',
469
os.path.abspath(archive_fname))
470
elif archive_fname[-9:] == '.tar.lzma':
471
result = os.spawnlp(os.P_WAIT, 'tar', 'tar', '-C', contentdirbase, '--lzma -xf',
472
os.path.abspath(archive_fname))
474
sys.stderr.write('Unknown tar format')
477
sys.stderr.write('Error executing tar, aborting')
480
toplevel_dirs = os.listdir(contentdirbase)
481
if len(toplevel_dirs) != 1:
482
raise Exception('Archive does not contain a single top level directory')
484
content_dir = os.path.join(contentdirbase, toplevel_dirs[0])
485
static_tar_dir = os.path.join(contentdirbase, 'static-tars')
488
timestamp_file = os.path.join(content_dir, 'timestamp.txt')
489
if not os.path.exists(timestamp_file):
490
raise Exception('Archive does not contain a timestamp')
491
data_version = open(timestamp_file).read().strip()
492
os.unlink(timestamp_file)
494
# initialize domain map
495
map_file = os.path.join(content_dir, 'mapping.txt')
496
if not os.path.exists(map_file):
497
raise Exception('Archive does not contain a domain map file (mapping.txt)')
499
if options.custom_class:
500
custom_class_domains = get_custom_class_domains(map_file, options.pkglist)
501
elif options.classes:
502
classifier = pkg_classify.PackageClassificator(release, map_file,
507
get_translatable_counts(release) # need pot_priority for updates, too!
509
logging.warning('Translations stats missing for %s', release)
510
# hack until http://people.canonical.com/~people-l10n/data/ubuntu-l10n/ exists
512
orig_distribution = distribution
513
distribution = 'ubuntu'
514
get_translatable_counts(release.replace('15.04', 'xenial'))
515
distribution = orig_distribution
517
if not options.update:
518
total_translatable = 0
519
counted_domains = set()
521
def count_domain(domain):
522
global total_translatable, counted_domains
523
if domain in counted_domains:
525
counted_domains.add(domain)
527
total_translatable += pot_count[domain]
529
logging.warning('translation stats are missing domain %s', domain)
531
def count_domain(domain):
532
# not necessary in update mode
535
# Process every .po file
538
for root, dirs, files in os.walk(content_dir):
540
logging.debug('Considering %s', f)
541
if not f.endswith('.po'):
544
file = os.path.join(root, f)
545
comp = file.split(os.sep)[-3:]
547
# Verify that we have locale/LC_MESSAGES/domain.po
548
if len(comp) < 3 or comp[2] == '':
550
if comp[1] != 'LC_MESSAGES':
551
raise IOError('Invalid file: %s' % file)
552
(domain, ext) = os.path.splitext(comp[2])
554
raise IOError('Unknown file type: %s' % file)
557
if domain.startswith('iso_'):
560
# Verify that we have a known locale
562
if locale != lastlocale:
563
logging.debug('--------------------------------')
564
logging.debug('Processing locale %s...', locale)
568
lang = (locale.split('_')[0]).split('@')[0]
569
if not (locinfo.known_language(lang) and locinfo.language_locales(lang)):
570
logging.warning('Skipping unknown language: %s', locale)
573
# check country, if present
574
noat = locale.split('@')[0]
575
if noat.find('_') >= 0:
576
(lang, country) = noat.split('_')
577
# XXX: hack: ignore invalid/obsolete per-country locales from LP
578
if lang not in ('zh', 'en') and noat != 'pt_BR':
579
logging.warning('Skipping obsolete locale: %s', locale)
581
if not locinfo.known_country(country):
582
logging.warning('Skipping unknown country: %s', locale)
585
# split into zh_* now
586
logging.warning('Skipping obsolete locale: %s', locale)
589
# check minimal priority
590
if options.min_priority is not None:
592
if pot_priority[domain] < options.min_priority and pot_priority[domain] > 0:
593
logging.debug('Skipping domain %s with too low priority %i' %
594
(domain, pot_priority[domain]))
597
logging.warning('domain %s has no priority' % domain)
599
# Everything is fine, install it
600
with open(file) as f:
602
if options.custom_class:
603
if domain in custom_class_domains:
604
install_po_custom_class(locale, domain, po_data,
605
data_version, options.update)
608
install_po_auto_class(locale, domain, po_data, data_version,
609
options.static, options.update)
612
if not options.update:
613
# translate locale_count into per-language counts; take the dominant locale
614
# as determining the coverage
616
for locale, count in locale_count.items():
617
lang = (locale.split('_')[0]).split('@')[0]
618
if lang_count.setdefault(lang, 0) < count:
619
lang_count[lang] = count
621
# show stats and discard packages which don't reach the treshold
622
print('Translated strings per language (%i translatable in total):' % total_translatable)
624
for l in sorted(lang_count):
625
if total_translatable:
626
pct = lang_count[l] * 100 // total_translatable
627
if options.treshold is not None and pct < options.treshold:
629
status = ' special-cased'
632
status = ' discarded'
635
print(' %s\t%i (%i%%)%s' % (l, lang_count[l], pct, status))
637
print(' %s\t%i' % (l, lang_count[l]))
639
discard_languages(discarded, options.custom_class)
641
shutil.rmtree(contentdirbase, True)