1
1
# -*- coding: utf-8 -*-
3
# Copyright (C) 2007 Edgewall Software
6
# This software is licensed as described in the file COPYING, which
7
# you should have received as part of this distribution. The terms
8
# are also available at http://babel.edgewall.org/wiki/License.
10
# This software consists of voluntary contributions made by many
11
# individuals. For the exact contribution history, see the revision
12
# history and logs, available at http://babel.edgewall.org/log/.
14
"""Data structures for message catalogs."""
6
Data structures for message catalogs.
8
:copyright: (c) 2013 by the Babel Team.
9
:license: BSD, see LICENSE for more details.
16
15
from cgi import parse_header
17
from datetime import datetime
16
from datetime import datetime, time as time_
18
17
from difflib import get_close_matches
19
18
from email import message_from_string
20
19
from copy import copy
24
21
from babel import __version__ as VERSION
25
22
from babel.core import Locale
26
23
from babel.dates import format_datetime
27
24
from babel.messages.plurals import get_plural
28
from babel.util import odict, distinct, set, LOCALTZ, UTC, FixedOffsetTimezone
25
from babel.util import odict, distinct, LOCALTZ, FixedOffsetTimezone
26
from babel._compat import string_types, number_types, PY2, cmp
30
28
__all__ = ['Message', 'Catalog', 'TranslationError']
31
__docformat__ = 'restructuredtext en'
34
31
PYTHON_FORMAT = re.compile(r'''(?x)
47
44
"""Representation of a single message in a catalog."""
49
46
def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
50
user_comments=(), previous_id=(), lineno=None):
47
user_comments=(), previous_id=(), lineno=None, context=None):
51
48
"""Create the message object.
53
50
:param id: the message ID, or a ``(singular, plural)`` tuple for
62
59
tuple for pluralizable messages
63
60
:param lineno: the line number on which the msgid line was found in the
62
:param context: the message context
66
64
self.id = id #: The message ID
67
65
if not string and self.pluralizable:
75
73
self.flags.discard('python-format')
76
74
self.auto_comments = list(distinct(auto_comments))
77
75
self.user_comments = list(distinct(user_comments))
78
if isinstance(previous_id, basestring):
76
if isinstance(previous_id, string_types):
79
77
self.previous_id = [previous_id]
81
79
self.previous_id = list(previous_id)
82
80
self.lineno = lineno
81
self.context = context
84
83
def __repr__(self):
85
84
return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
88
87
def __cmp__(self, obj):
89
88
"""Compare Messages, taking into account plural ids"""
90
if isinstance(obj, Message):
91
plural = self.pluralizable
92
obj_plural = obj.pluralizable
93
if plural and obj_plural:
94
return cmp(self.id[0], obj.id[0])
96
return cmp(self.id[0], obj.id)
98
return cmp(self.id, obj.id[0])
99
return cmp(self.id, obj.id)
89
def values_to_compare():
90
if isinstance(obj, Message):
91
plural = self.pluralizable
92
obj_plural = obj.pluralizable
93
if plural and obj_plural:
94
return self.id[0], obj.id[0]
96
return self.id[0], obj.id
98
return self.id, obj.id[0]
99
return self.id, obj.id
100
this, other = values_to_compare()
101
return cmp(this, other)
103
def __gt__(self, other):
104
return self.__cmp__(other) > 0
106
def __lt__(self, other):
107
return self.__cmp__(other) < 0
109
def __ge__(self, other):
110
return self.__cmp__(other) >= 0
112
def __le__(self, other):
113
return self.__cmp__(other) <= 0
115
def __eq__(self, other):
116
return self.__cmp__(other) == 0
118
def __ne__(self, other):
119
return self.__cmp__(other) != 0
102
122
return Message(*map(copy, (self.id, self.string, self.locations,
103
123
self.flags, self.auto_comments,
104
124
self.user_comments, self.previous_id,
125
self.lineno, self.context)))
107
127
def check(self, catalog=None):
108
128
"""Run various validation checks on the message. Some validations
119
139
for checker in checkers:
121
141
checker(catalog, self)
122
except TranslationError, e:
142
except TranslationError as e:
127
return 'fuzzy' in self.flags
128
fuzzy = property(fuzzy, doc="""\
129
Whether the translation is fuzzy.
148
"""Whether the translation is fuzzy.
131
150
>>> Message('foo').fuzzy
137
156
<Message 'foo' (flags: ['fuzzy'])>
159
return 'fuzzy' in self.flags
142
162
def pluralizable(self):
143
return isinstance(self.id, (list, tuple))
144
pluralizable = property(pluralizable, doc="""\
145
Whether the message is plurizable.
163
"""Whether the message is plurizable.
147
165
>>> Message('foo').pluralizable
149
167
>>> Message(('foo', 'bar')).pluralizable
171
return isinstance(self.id, (list, tuple))
155
174
def python_format(self):
175
"""Whether the message contains Python-style parameters.
177
>>> Message('foo %(name)s bar').python_format
179
>>> Message(('foo %(name)s', 'foo %(name)s')).python_format
157
184
if not isinstance(ids, (list, tuple)):
159
return bool(filter(None, [PYTHON_FORMAT.search(id) for id in ids]))
160
python_format = property(python_format, doc="""\
161
Whether the message contains Python-style parameters.
163
>>> Message('foo %(name)s bar').python_format
165
>>> Message(('foo %(name)s', 'foo %(name)s')).python_format
186
return any(PYTHON_FORMAT.search(id) for id in ids)
172
189
class TranslationError(Exception):
203
def _parse_header(header_string):
204
# message_from_string only works for str, not for unicode
205
headers = message_from_string(header_string.encode('utf8'))
207
for name, value in headers.items():
208
name = name.decode('utf8')
209
value = value.decode('utf8')
210
decoded_headers[name] = value
211
return decoded_headers
214
_parse_header = message_from_string
185
217
class Catalog(object):
186
218
"""Representation of a message catalog."""
189
221
project=None, version=None, copyright_holder=None,
190
222
msgid_bugs_address=None, creation_date=None,
191
223
revision_date=None, last_translator=None, language_team=None,
192
charset='utf-8', fuzzy=True):
224
charset=None, fuzzy=True):
193
225
"""Initialize the catalog object.
195
227
:param locale: the locale identifier or `Locale` object, or `None`
207
239
:param revision_date: the date the catalog was revised
208
240
:param last_translator: the name and email of the last translator
209
241
:param language_team: the name and email of the language team
210
:param charset: the encoding to use in the output
242
:param charset: the encoding to use in the output (defaults to utf-8)
211
243
:param fuzzy: the fuzzy bit on the catalog header
213
245
self.domain = domain #: The message domain
235
267
creation_date = creation_date.replace(tzinfo=LOCALTZ)
236
268
self.creation_date = creation_date #: Creation date of the template
237
269
if revision_date is None:
238
revision_date = datetime.now(LOCALTZ)
270
revision_date = 'YEAR-MO-DA HO:MI+ZONE'
239
271
elif isinstance(revision_date, datetime) and not revision_date.tzinfo:
240
272
revision_date = revision_date.replace(tzinfo=LOCALTZ)
241
273
self.revision_date = revision_date #: Last revision date of the catalog
248
280
def _get_header_comment(self):
249
281
comment = self._header_comment
282
year = datetime.now(LOCALTZ).strftime('%Y')
283
if hasattr(self.revision_date, 'strftime'):
284
year = self.revision_date.strftime('%Y')
250
285
comment = comment.replace('PROJECT', self.project) \
251
286
.replace('VERSION', self.version) \
252
.replace('YEAR', self.revision_date.strftime('%Y')) \
287
.replace('YEAR', year) \
253
288
.replace('ORGANIZATION', self.copyright_holder)
255
290
comment = comment.replace('Translations template', '%s translations'
300
335
headers.append(('POT-Creation-Date',
301
336
format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ',
303
if self.locale is None:
304
headers.append(('PO-Revision-Date', 'YEAR-MO-DA HO:MI+ZONE'))
305
headers.append(('Last-Translator', 'FULL NAME <EMAIL@ADDRESS>'))
306
headers.append(('Language-Team', 'LANGUAGE <LL@li.org>'))
338
if isinstance(self.revision_date, (datetime, time_) + number_types):
308
339
headers.append(('PO-Revision-Date',
309
340
format_datetime(self.revision_date,
310
341
'yyyy-MM-dd HH:mmZ', locale='en')))
311
headers.append(('Last-Translator', self.last_translator))
343
headers.append(('PO-Revision-Date', self.revision_date))
344
headers.append(('Last-Translator', self.last_translator))
345
if (self.locale is not None) and ('LANGUAGE' in self.language_team):
312
346
headers.append(('Language-Team',
313
347
self.language_team.replace('LANGUAGE',
314
348
str(self.locale))))
350
headers.append(('Language-Team', self.language_team))
351
if self.locale is not None:
315
352
headers.append(('Plural-Forms', self.plural_forms))
316
353
headers.append(('MIME-Version', '1.0'))
317
354
headers.append(('Content-Type',
323
360
def _set_mime_headers(self, headers):
324
361
for name, value in headers:
325
if name.lower() == 'content-type':
326
mimetype, params = parse_header(value)
327
if 'charset' in params:
328
self.charset = params['charset'].lower()
330
for name, value in headers:
331
name = name.lower().decode(self.charset)
332
value = value.decode(self.charset)
333
363
if name == 'project-id-version':
334
364
parts = value.split(' ')
335
365
self.project = u' '.join(parts[:-1])
340
370
self.last_translator = value
341
371
elif name == 'language-team':
342
372
self.language_team = value
373
elif name == 'content-type':
374
mimetype, params = parse_header(value)
375
if 'charset' in params:
376
self.charset = params['charset'].lower()
343
377
elif name == 'plural-forms':
344
378
_, params = parse_header(' ;' + value)
345
379
self._num_plurals = int(params.get('nplurals', 2))
413
447
Here's an example of the output for such a catalog template:
449
>>> from babel.dates import UTC
415
450
>>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
416
451
>>> catalog = Catalog(project='Foobar', version='1.0',
417
452
... creation_date=created)
455
491
def num_plurals(self):
492
"""The number of plurals used by the catalog or locale.
494
>>> Catalog(locale='en').num_plurals
496
>>> Catalog(locale='ga').num_plurals
456
500
if self._num_plurals is None:
459
503
num = get_plural(self.locale)[0]
460
504
self._num_plurals = num
461
505
return self._num_plurals
462
num_plurals = property(num_plurals, doc="""\
463
The number of plurals used by the catalog or locale.
465
>>> Catalog(locale='en').num_plurals
467
>>> Catalog(locale='ga').num_plurals
473
508
def plural_expr(self):
509
"""The plural expression used by the catalog or locale.
511
>>> Catalog(locale='en').plural_expr
513
>>> Catalog(locale='ga').plural_expr
514
'(n==1 ? 0 : n==2 ? 1 : 2)'
516
:type: `string_types`"""
474
517
if self._plural_expr is None:
475
518
expr = '(n != 1)'
477
520
expr = get_plural(self.locale)[1]
478
521
self._plural_expr = expr
479
522
return self._plural_expr
480
plural_expr = property(plural_expr, doc="""\
481
The plural expression used by the catalog or locale.
483
>>> Catalog(locale='en').plural_expr
485
>>> Catalog(locale='ga').plural_expr
486
'(n==1 ? 0 : n==2 ? 1 : 2)'
491
525
def plural_forms(self):
526
"""Return the plural forms declaration for the locale.
528
>>> Catalog(locale='en').plural_forms
529
'nplurals=2; plural=(n != 1)'
530
>>> Catalog(locale='pt_BR').plural_forms
531
'nplurals=2; plural=(n > 1)'
492
534
return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr)
493
plural_forms = property(plural_forms, doc="""\
494
Return the plural forms declaration for the locale.
496
>>> Catalog(locale='en').plural_forms
497
'nplurals=2; plural=(n != 1)'
498
>>> Catalog(locale='pt_BR').plural_forms
499
'nplurals=2; plural=(n > 1)'
504
536
def __contains__(self, id):
505
537
"""Return whether the catalog has a message with the specified ID."""
508
540
def __len__(self):
509
541
"""The number of messages in the catalog.
511
This does not include the special ``msgid ""`` entry.
543
This does not include the special ``msgid ""`` entry."""
513
544
return len(self._messages)
515
546
def __iter__(self):
516
547
"""Iterates through all the entries in the catalog, in the order they
517
548
were added, yielding a `Message` object for every entry.
550
:rtype: ``iterator``"""
522
552
for name, value in self.mime_headers:
523
553
buf.append('%s: %s' % (name, value))
537
567
def __delitem__(self, id):
538
568
"""Delete the message with the specified ID."""
539
key = self._key_for(id)
540
if key in self._messages:
541
del self._messages[key]
543
571
def __getitem__(self, id):
544
572
"""Return the message with the specified ID.
546
574
:param id: the message ID
547
:return: the message with the specified ID, or `None` if no such message
551
return self._messages.get(self._key_for(id))
553
578
def __setitem__(self, id, message):
554
579
"""Add or update the message with the specified ID.
573
598
:param message: the `Message` object
575
600
assert isinstance(message, Message), 'expected a Message object'
576
key = self._key_for(id)
601
key = self._key_for(id, message.context)
577
602
current = self._messages.get(key)
579
604
if message.pluralizable and not current.pluralizable:
590
615
message = current
592
617
# special treatment for the header message
593
headers = message_from_string(message.string.encode(self.charset))
594
self.mime_headers = headers.items()
595
self.header_comment = '\n'.join(['# %s' % comment for comment
618
self.mime_headers = _parse_header(message.string).items()
619
self.header_comment = '\n'.join([('# %s' % c).rstrip() for c
596
620
in message.user_comments])
597
621
self.fuzzy = message.fuzzy
602
626
self._messages[key] = message
604
628
def add(self, id, string=None, locations=(), flags=(), auto_comments=(),
605
user_comments=(), previous_id=(), lineno=None):
629
user_comments=(), previous_id=(), lineno=None, context=None):
606
630
"""Add or update the message with the specified ID.
608
632
>>> catalog = Catalog()
609
633
>>> catalog.add(u'foo')
610
635
>>> catalog[u'foo']
611
636
<Message u'foo' (flags: [])>
625
650
tuple for pluralizable messages
626
651
:param lineno: the line number on which the msgid line was found in the
653
:param context: the message context
629
self[id] = Message(id, string, list(locations), flags, auto_comments,
630
user_comments, previous_id, lineno=lineno)
655
message = Message(id, string, list(locations), flags, auto_comments,
656
user_comments, previous_id, lineno=lineno,
633
662
"""Run various validation checks on the translations in the catalog.
644
673
yield message, errors
675
def get(self, id, context=None):
676
"""Return the message with the specified ID and context.
678
:param id: the message ID
679
:param context: the message context, or ``None`` for no context
681
return self._messages.get(self._key_for(id, context))
683
def delete(self, id, context=None):
684
"""Delete the message with the specified ID and context.
686
:param id: the message ID
687
:param context: the message context, or ``None`` for no context
689
key = self._key_for(id, context)
690
if key in self._messages:
691
del self._messages[key]
646
693
def update(self, template, no_fuzzy_matching=False):
647
694
"""Update the catalog based on the given template catalog.
649
696
>>> from babel.messages import Catalog
650
697
>>> template = Catalog()
651
698
>>> template.add('green', locations=[('main.py', 99)])
652
700
>>> template.add('blue', locations=[('main.py', 100)])
653
702
>>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
654
704
>>> catalog = Catalog(locale='de_DE')
655
705
>>> catalog.add('blue', u'blau', locations=[('main.py', 98)])
656
707
>>> catalog.add('head', u'Kopf', locations=[('util.py', 33)])
657
709
>>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'),
658
710
... locations=[('util.py', 38)])
660
713
>>> catalog.update(template)
697
750
# Prepare for fuzzy matching
698
751
fuzzy_candidates = []
699
752
if not no_fuzzy_matching:
701
self._key_for(msgid) for msgid in messages
702
if msgid and messages[msgid].string
753
fuzzy_candidates = dict([
754
(self._key_for(msgid), messages[msgid].context)
755
for msgid in messages if msgid and messages[msgid].string
704
757
fuzzy_matches = set()
706
759
def _merge(message, oldkey, newkey):
711
764
fuzzy_matches.add(oldkey)
712
765
oldmsg = messages.get(oldkey)
713
if isinstance(oldmsg.id, basestring):
766
if isinstance(oldmsg.id, string_types):
714
767
message.previous_id = [oldmsg.id]
716
769
message.previous_id = list(oldmsg.id)
737
790
for message in template:
739
key = self._key_for(message.id)
792
key = self._key_for(message.id, message.context)
740
793
if key in messages:
741
794
_merge(message, key, key)
743
796
if no_fuzzy_matching is False:
744
797
# do some fuzzy matching with difflib
745
matches = get_close_matches(key.lower().strip(),
798
if isinstance(key, tuple):
799
matchkey = key[0] # just the msgid, no context
802
matches = get_close_matches(matchkey.lower().strip(),
803
fuzzy_candidates.keys(), 1)
748
_merge(message, matches[0], key)
806
newctxt = fuzzy_candidates[newkey]
807
if newctxt is not None:
808
newkey = newkey, newctxt
809
_merge(message, newkey, key)
751
812
self[message.id] = message
753
self.obsolete = odict()
754
814
for msgid in remaining:
755
815
if no_fuzzy_matching or msgid not in fuzzy_matches:
756
816
self.obsolete[msgid] = remaining[msgid]
758
818
# used to update the catalog
759
819
self.creation_date = template.creation_date
761
def _key_for(self, id):
821
def _key_for(self, id, context=None):
762
822
"""The key for a message is just the singular ID even for pluralizable
823
messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
766
827
if isinstance(key, (list, tuple)):
829
if context is not None: