1
1
# -*- coding: utf-8 -*-
3
# Copyright (C) 2007 Edgewall Software
6
# This software is licensed as described in the file COPYING, which
7
# you should have received as part of this distribution. The terms
8
# are also available at http://babel.edgewall.org/wiki/License.
10
# This software consists of voluntary contributions made by many
11
# individuals. For the exact contribution history, see the revision
12
# history and logs, available at http://babel.edgewall.org/log/.
14
"""Reading and writing of files in the ``gettext`` PO (portable object)
17
:see: `The Format of PO Files
18
<http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_
21
from datetime import date, datetime
6
Reading and writing of files in the ``gettext`` PO (portable object)
9
:copyright: (c) 2013 by the Babel Team.
10
:license: BSD, see LICENSE for more details.
25
from babel import __version__ as VERSION
26
16
from babel.messages.catalog import Catalog, Message
27
from babel.util import set, wraptext, LOCALTZ
17
from babel.util import wraptext
18
from babel._compat import text_type
29
__all__ = ['read_po', 'write_po']
30
__docformat__ = 'restructuredtext en'
32
21
def unescape(string):
33
22
r"""Reverse `escape` the given string.
40
29
:param string: the string to unescape
41
:return: the unescaped string
42
:rtype: `str` or `unicode`
44
return string[1:-1].replace('\\\\', '\\') \
45
.replace('\\t', '\t') \
46
.replace('\\r', '\r') \
47
.replace('\\n', '\n') \
31
def replace_escapes(match):
41
return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1])
50
44
def denormalize(string):
51
45
r"""Reverse the normalization done by the `normalize` function.
69
63
:param string: the string to denormalize
70
:return: the denormalized string
71
:rtype: `unicode` or `str`
73
if string.startswith('""'):
75
for line in string.splitlines()[1:]:
76
lines.append(unescape(line))
66
escaped_lines = string.splitlines()
67
if string.startswith('""'):
68
escaped_lines = escaped_lines[1:]
69
lines = map(unescape, escaped_lines)
77
70
return ''.join(lines)
79
72
return unescape(string)
81
def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False):
75
def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=None):
82
76
"""Read messages from a ``gettext`` PO (portable object) file from the given
83
77
file-like object and return a `Catalog`.
79
>>> from datetime import datetime
85
80
>>> from StringIO import StringIO
86
81
>>> buf = StringIO('''
88
83
... #, fuzzy, python-format
89
84
... msgid "foo %(name)s"
85
... msgstr "quux %(name)s"
92
87
... # A user comment
93
88
... #. An auto comment
96
91
... msgid_plural "baz"
100
95
>>> catalog = read_po(buf)
101
96
>>> catalog.revision_date = datetime(2007, 04, 01)
105
100
... print (message.id, message.string)
106
101
... print ' ', (message.locations, message.flags)
107
102
... print ' ', (message.user_comments, message.auto_comments)
108
(u'foo %(name)s', '')
103
(u'foo %(name)s', u'quux %(name)s')
109
104
([(u'main.py', 1)], set([u'fuzzy', u'python-format']))
111
((u'bar', u'baz'), ('', ''))
106
((u'bar', u'baz'), (u'bar', u'baaz'))
112
107
([(u'main.py', 3)], set([]))
113
108
([u'A user comment'], [u'An auto comment'])
110
.. versionadded:: 1.0
111
Added support for explicit charset argument.
115
113
:param fileobj: the file-like object to read the PO file from
116
114
:param locale: the locale identifier or `Locale` object, or `None`
117
115
if the catalog is not bound to a locale (which basically
118
116
means it's a template)
119
117
:param domain: the message domain
120
118
:param ignore_obsolete: whether to ignore obsolete messages in the input
121
:return: an iterator over ``(message, translation, location)`` tuples
119
:param charset: the character set of the catalog.
124
catalog = Catalog(locale=locale, domain=domain)
121
catalog = Catalog(locale=locale, domain=domain, charset=charset)
151
150
string = tuple([denormalize(t[1]) for t in string])
153
152
string = denormalize(translations[0][1])
154
msgctxt = denormalize('\n'.join(context))
154
157
message = Message(msgid, string, list(locations), set(flags),
155
auto_comments, user_comments, lineno=offset[0] + 1)
158
auto_comments, user_comments, lineno=offset[0] + 1,
157
161
if not ignore_obsolete:
158
162
catalog.obsolete[msgid] = message
160
164
catalog[msgid] = message
161
del messages[:]; del translations[:]; del locations[:];
162
del flags[:]; del auto_comments[:]; del user_comments[:]
165
del messages[:]; del translations[:]; del context[:]; del locations[:];
166
del flags[:]; del auto_comments[:]; del user_comments[:];
163
167
obsolete[0] = False
184
188
translations.append([int(idx), msg.lstrip()])
186
190
translations.append([0, msg])
191
elif line.startswith('msgctxt'):
194
in_msgid[0] = in_msgstr[0] = False
195
context.append(line[7:].lstrip())
187
196
elif line.startswith('"'):
189
198
messages[-1] += u'\n' + line.rstrip()
190
199
elif in_msgstr[0]:
191
200
translations[-1][1] += u'\n' + line.rstrip()
202
context.append(line.rstrip())
193
204
for lineno, line in enumerate(fileobj.readlines()):
194
205
line = line.strip()
195
if not isinstance(line, unicode):
206
if not isinstance(line, text_type):
196
207
line = line.decode(catalog.charset)
197
208
if line.startswith('#'):
198
209
in_msgid[0] = in_msgstr[0] = False
239
251
WORD_SEP = re.compile('('
240
252
r'\s+|' # any whitespace
241
253
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
242
254
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash
245
258
def escape(string):
246
259
r"""Escape the given string so that it can be included in double-quoted
247
260
strings in ``PO`` files.
252
265
'"Say:\\n \\"hello, world!\\"\\n"'
254
267
:param string: the string to escape
255
:return: the escaped string
256
:rtype: `str` or `unicode`
258
269
return '"%s"' % string.replace('\\', '\\\\') \
259
270
.replace('\t', '\\t') \
284
296
:param prefix: a string that should be prepended to every line
285
297
:param width: the maximum line width; use `None`, 0, or a negative number
286
298
to completely disable line wrapping
287
:return: the normalized string
290
300
if width and width > 0:
291
301
prefixlen = len(prefix)
293
for idx, line in enumerate(string.splitlines(True)):
303
for line in string.splitlines(True):
294
304
if len(escape(line)) + prefixlen > width:
295
305
chunks = WORD_SEP.split(line)
323
333
lines[-1] += '\n'
324
334
return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines])
326
337
def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
327
338
sort_output=False, sort_by_file=False, ignore_obsolete=False,
328
339
include_previous=False):
332
343
>>> catalog = Catalog()
333
344
>>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
334
345
... flags=('fuzzy',))
335
347
>>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
336
>>> from StringIO import StringIO
349
>>> from io import BytesIO
338
351
>>> write_po(buf, catalog, omit_header=True)
339
352
>>> print buf.getvalue()
367
380
updating the catalog
369
382
def _normalize(key, prefix=''):
370
return normalize(key, prefix=prefix, width=width) \
371
.encode(catalog.charset, 'backslashreplace')
383
return normalize(key, prefix=prefix, width=width)
373
385
def _write(text):
374
if isinstance(text, unicode):
375
text = text.encode(catalog.charset)
386
if isinstance(text, text_type):
387
text = text.encode(catalog.charset, 'backslashreplace')
376
388
fileobj.write(text)
378
390
def _write_comment(comment, prefix=''):
388
400
def _write_message(message, prefix=''):
389
401
if isinstance(message.id, (list, tuple)):
403
_write('%smsgctxt %s\n' % (prefix,
404
_normalize(message.context, prefix)))
390
405
_write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix)))
391
406
_write('%smsgid_plural %s\n' % (
392
407
prefix, _normalize(message.id[1], prefix)
401
416
prefix, idx, _normalize(string, prefix)
420
_write('%smsgctxt %s\n' % (prefix,
421
_normalize(message.context, prefix)))
404
422
_write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix)))
405
423
_write('%smsgstr %s\n' % (
406
424
prefix, _normalize(message.string or '', prefix)
422
440
for line in comment_header.splitlines():
423
441
lines += wraptext(line, width=width,
424
442
subsequent_indent='# ')
425
comment_header = u'\n'.join(lines) + u'\n'
426
_write(comment_header)
443
comment_header = u'\n'.join(lines)
444
_write(comment_header + u'\n')
428
446
for comment in message.user_comments:
429
447
_write_comment(comment)
435
453
for filename, lineno in message.locations])
436
454
_write_comment(locs, prefix=':')
437
455
if message.flags:
438
_write('#%s\n' % ', '.join([''] + list(message.flags)))
456
_write('#%s\n' % ', '.join([''] + sorted(message.flags)))
440
458
if message.previous_id and include_previous:
441
459
_write_comment('msgid %s' % _normalize(message.previous_id[0]),