1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
|
__all__ = [ "load_grd", "save_grd" ]
import os
import logging
from hashlib import md5
from .xtb import load_xtb
from .utils import OrderedXMLParser, ET
class UnknownTag(ValueError):
pass
def example_message(message_node):
r"""
>>> example_message(ET.fromstring('<message> a <ph name="PRODUCT_NAME">$1<ex>Chromium</ex>b</ph> Preferences</message>'))
'a {{Chromium}} Preferences'
>>> example_message(ET.fromstring('<message><ph name="PRODUCT_NAME"><ex>Chromium</ex></ph> Preferences</message>'))
'{{Chromium}} Preferences'
>>> example_message(ET.fromstring('<message><ph name="FILENAME">%s<ex>My Cool Image.gif</ex></ph><ph name="WIDTH">%+0.2d<ex>400</ex></ph>×<ph name="HEIGHT">%d<ex>600</ex></ph></message>'))
'{{My Cool Image.gif}}{{400}}×{{600}}'
"""
found_example = False
ex_parts = list()
for child in message_node:
if message_node.text:
ex_parts.append(message_node.text)
if child.tag == "ph":
ph = child
for child in ph:
if child.tag == "ex":
ex = child
ex_parts.append(r"{{%s}}" % (ex.text.strip(),))
found_example = True
if ph.tail:
ex_parts.append(ph.tail)
else:
raise UnknownTag(ph.tag)
if not found_example:
return None # No example here.
return "".join(ex_parts).strip()
def xtb_message(message_node):
r"""Compute our canonical template id from of the leading 64 bits of the
md5 of the string after all PlaceHolders are stripped out.
>>> xtb_message(ET.fromstring('<message name="IDS_OPTIONS_SETTINGS_INTERNET_OPTIONS_PURCHASE_UNLIMITED_DATA" desc="In settings Internet options, the label that shows user has purchased an unliminted mobile data plan.">\n You purchased unlimited data on <ph name="DATE">$1<ex>January 1, 2011</ex></ph>\n </message>'))
('You purchased unlimited data on <ph name="DATE" />', '...')
>>> xtb_message(ET.fromstring('<message name="IDS_PAUSE_ON_ALL_EXCEPTIONS" desc="Current state: pause on all exceptions. After the \n separator: a hint on how to switch the state.">\nPause on all exceptions.\nClick to Pause on uncaught exceptions.\n</message>'))
('Pause on all exceptions.\nClick to Pause on uncaught exceptions.', ...)
>>> xtb_message(ET.fromstring('<message desc="Text to identify Bluetooth devices of unknown or unsupported class." name="IDS_BLUETOOTH_DEVICE_UNKNOWN"> Unknown or Unsupported Device (<ph name="ADDRESS"/>) </message>'))
('Unknown or Unsupported Device (<ph name="ADDRESS" />)', ...)
>>> xtb_message(ET.fromstring('<message><ph name="PRODUCT_NAME"><ex>Chromium</ex></ph> Preferences</message>'))
('<ph name="PRODUCT_NAME" /> Preferences', ...)
>>> xtb_message(ET.fromstring('<message><ph name="BEGIN_BOLD" />Warning:<ph name="END_BOLD" /> Chromium cannot prevent extensions from recording your browsing history. To disable this extension in incognito mode, unselect this option.</message>'))
('<ph name="BEGIN_BOLD" />Warning:<ph name="END_BOLD" /> Chromium cannot prevent extensions from recording your browsing history. To disable this extension in incognito mode, unselect this option.', ...)
>>> xtb_message(ET.fromstring('<message>f&oo "<ph name="IDS_XX">$1<ex>blabla</ex></ph>" bar</message>'))
(..., '8733787356824206756')
>>> xtb_message(ET.fromstring("<message>The Chromium Authors</message>"))
(..., '985602178874221306')
>>> xtb_message(ET.fromstring('<message>Relaunch Chrome</message>'))
(..., '8914504000324227558')
>>> xtb_message(ET.fromstring('<message> Relaunch Chrome </message>'))
(..., '8914504000324227558')
>>> xtb_message(ET.fromstring("<message>OK</message>"))
(..., '6965382102122355670')
>>> xtb_message(ET.fromstring("<message>Cancel</message>"))
(..., '7658239707568436148')
>>> xtb_message(ET.fromstring("<message>Ins</message>"))
(..., '1871244248791675517')
>>> xtb_message(ET.fromstring("<message>Del</message>"))
(..., '6135826906199951471')
>>> xtb_message(ET.fromstring("<message>Scroll Up</message>"))
(..., '8331626408530291785')
>>> xtb_message(ET.fromstring('<message><ph name="PRODUCT_NAME"><ex>Chromium</ex></ph> Preferences</message>'))
(..., '3664704721673470303')
>>> xtb_message(ET.fromstring('''<message><ph name="FILENAME">%s<ex>My Cool Image.gif</ex></ph><ph name="WIDTH">%d<ex>400</ex></ph>×<ph name="HEIGHT">%d<ex>600</ex></ph></message>'''))
(..., '4611115858363067980')
>>> xtb_message(ET.fromstring('''<message><ph name="BEGIN_BOLD" />Warning:<ph name="END_BOLD" /> Chromium cannot prevent extensions from recording your browsing history. To disable this extension in incognito mode, unselect this option.</message>'''))
(..., '4207043877577553402')
"""
simple_message_parts, grd_idfood_parts = list(), list()
if message_node.text:
simple_message_parts.append(message_node.text)
grd_idfood_parts.append(message_node.text)
for child in message_node:
if child.tag == "ph":
ph = child
# if ph.text:
# simple_message_parts.append(ph.text)
simple_message_parts.append('<ph name="')
simple_message_parts.append(ph.attrib["name"])
simple_message_parts.append('" />')
grd_idfood_parts.append(ph.attrib["name"])
if ph.tail:
simple_message_parts.append(ph.tail)
grd_idfood_parts.append(ph.tail)
else:
raise UnknownTag(ph.tag)
# See svn log http://src.chromium.org/svn/trunk/src/tools/grit/grit/extern/FP.py
hex128 = md5("".join(grd_idfood_parts).strip().encode("UTF-8")).hexdigest()
hex64 = int(hex128[:16], 16)
grdsid = str(hex64 & 0x7FFFFFFFFFFFFFFF)
return "".join(simple_message_parts).strip(), grdsid
def load_grd(txln_info, file_name, conditions, toplevel_grd):
"""Start parsing a GRD file given a filename. This begins the entire
method of GRIT parsing. GRD files can refer to other partial GRD files,
and the parsing mechanism recurses back here. Additionally, this parsing
drives reading the GRIT translation files "XTB"."""
#logging.debug("Parsing %s", file_name)
try:
tree = ET.parse(file_name, parser=OrderedXMLParser())
except FileNotFoundError as exc:
logging.warn("Can't find %s as requested by %s", file_name, toplevel_grd)
return
root = tree.getroot()
parsegrd_walker(txln_info, file_name, root, conditions, toplevel_grd)
def parsegrd_walker(txln_info, file_name, node, conditions, toplevel_grd):
"""Handler of dumb nodes that we don't really care about except as
definining structure. This implements all the logic of following the
GRD_STRUCTURE."""
options = GRD_STRUCTURE[node.tag]
if options is None:
return
for child in node:
if child.tag not in options:
logging.error("Unknown tag %s in %s in %s", child, node, file_name)
continue
next_step = options[child.tag]
if isinstance(next_step, str):
parsegrd_walker(txln_info, file_name, child, conditions, toplevel_grd)
elif next_step is None:
continue
else:
next_step(txln_info, file_name, child, conditions, toplevel_grd)
def parsegrd_message(txln_info, file_name, node, conditions, _):
"""Read in the meat of why we're here, reading a template string that
translators will need."""
if node.attrib.get("translateable", "true").lower().strip() == "false":
return
if node.attrib.get("use_name_for_id", "false").lower().strip() == "true":
logging.debug("Skipping GRD item %s that uses its name as its GRDSID.", node)
return
simplified_message, grdsid = xtb_message(node)
if not simplified_message:
return
if grdsid not in txln_info:
txln_info[grdsid] = dict()
txln_info[grdsid]["text"] = simplified_message
example = example_message(node)
if example:
txln_info[grdsid]["example"] = example
if "refs" not in txln_info[grdsid]:
txln_info[grdsid]["refs"] = list()
assert file_name.endswith((".grd", ".grdp")), file_name
txln_info[grdsid]["refs"].append(
dict(
note=node.attrib.get("desc", ""),
id=node.attrib["name"],
conditions=conditions,
grdfile=file_name # back reference to GRD who wants that GRDID
))
def parsegrd_file(txln_info, file_name, node, conditions, toplevel_grd):
"""This is where we jump into parsing a translation "XTB" file for some
language."""
# This has "grd" in the name because it's an event in parsing a GRD file,
# but here is were we jump into parsing a XTB file!
xtb_filename = os.path.normpath(os.path.join(os.path.dirname(file_name), node.attrib["path"]))
lang = node.attrib["lang"]
try:
load_xtb(lang, txln_info, toplevel_grd, xtb_filename, conditions)
except FileNotFoundError as exc:
logging.warn("file %s, asked for by %s, could not be found, so we didn't import anything from it.", xtb_filename, toplevel_grd)
def parsegrd_if(txln_info, file_name, node, conditions, toplevel_grd):
"""GRDs can have conditions, and we pluck them out and store them with the
data, for evaluation later when we make the gettext files, with which we
inform the translator through comments about whether they should translate
or not.."""
condition = node.attrib["expr"]
parsegrd_walker(txln_info, file_name, node, conditions+(condition,), toplevel_grd)
# This defines the expected structure of a GRD or GRDP file, along with our
# intentions of processing it. The top level keys are accessible start and
# restart points in the parsing flow. The dictionary that follows indicates
# what can be inside that, with keys being the node names, and values being
# what to do with it; None being ignore all children, a string being a place to
# jump to in parsing state, and a function being a custom function to run.
GRD_STRUCTURE = {
"grit" : { "file": None, "output": None, "translations": "translations", "release": "release", "outputs": None },
"grit-part" : { "file": None, "output": None, "translations": "translations", "release": "release", "message": parsegrd_message, "if": parsegrd_if },
"translations": { "file": parsegrd_file, "if": parsegrd_if },
"release": { "structures": None, "includes": None, "messages": "messages", },
"messages": { "if": parsegrd_if, "message": parsegrd_message, "part": lambda ti, fn, n, c, tlg: load_grd(ti, os.path.normpath(os.path.join(os.path.dirname(fn), n.attrib["file"])), c, tlg), },
"if": { "if": parsegrd_if, "message": parsegrd_message, "file": parsegrd_file, "part": lambda ti, fn, n, c, tlg: load_grd(ti, os.path.normpath(os.path.join(os.path.dirname(fn), n.attrib["file"])), c, tlg),}, # "if" seemingly unconnected. We go through condition function first, then jump back in here.
}
def get_grdsids(root, source_file_name):
releases = root.findall("release")
release = releases[0]
temp_txln_info = dict()
parsegrd_walker(temp_txln_info, source_file_name, release, (), source_file_name)
return list(temp_txln_info.keys())
def store_new_xtb_file(txln_info, dest_filename, ref_grd_filename, lang_to_add, grdsid_list_to_insert):
"""We just updated the referring GRD file for this language, so now we dump
all known strings for this language."""
assert ref_grd_filename.endswith((".grd", ".grdp")), ref_grd_filename
root = ET.Element("translationbundle")
root.text = "\n"
tree = ET.ElementTree(root)
for grdsid in grdsid_list_to_insert:
template_info = txln_info.get(grdsid)
if not template_info:
logging.warning("grsid %s, which we want to store, is not in txln_info", grdsid)
continue
if "langs" not in template_info:
logging.debug("In writing XTB for %s, grdsid %s is apparently unwanted.", lang_to_add, grdsid)
continue
txln = template_info["langs"].get(lang_to_add)
if not txln:
logging.debug("In writing XTB for %s, grdsid %s doesn't have anything to send to LP.", lang_to_add, grdsid)
continue
# find best ref
best_ref = None
if "refs" not in template_info:
logging.error("Wanted to find the best ref for %r, for %r, but there are no refs!", dest_filename, grdsid)
continue
refs = template_info["refs"]
# order most matching to least matching on referring grdfile filename
refs.sort(key=lambda ref: -len(set(os.path.split(ref["grdfile"])) & set(os.path.split(ref_grd_filename))))
best_ref = refs[0]
translation = ET.Element("translation", dict(id=best_ref["id"]))
assert isinstance(txln[0][0], str), txln
translation.text = txln[0][0] # string from first available tuple TODO(cm) Decide if this is best.
root.append(translation)
try:
os.makedirs(os.path.dirname(dest_filename), 0o755, exist_ok=True) # pylint: disable=E1123
except FileExistsError: # pylint: disable=E0602
pass
with open(dest_filename, "wb") as f:
tree.write(f, encoding="utf-8")
logging.debug("Wrote XTB %s", dest_filename)
def add_missing_grd_files_and_export_xtb(source_txln_info, root, source_grd_filename, dest_grd_filename, destination_container):
grdsid_list_to_insert = get_grdsids(root, source_grd_filename)
translations_tags = root.findall("translations")
if len(translations_tags) == 0:
logging.debug("No translations tag in %r, so not exporting. Skipping it.", source_grd_filename,)
return
elif len(translations_tags) > 1:
logging.error("%s translations tag to update in %r. I don't know what to do with these.", len(translations_tags), source_grd_filename)
return
translations_tag = translations_tags[0]
languages_in_grdandgettext = set()
for grdsid in grdsid_list_to_insert:
string_info = source_txln_info.get(grdsid)
if not string_info:
logging.error("A grdsid %s from grd+gettext is not in txln_info. Discarding!", grdsid)
continue
if "langs" in string_info:
for lang in string_info["langs"]:
languages_in_grdandgettext.add(lang)
languages_in_grd = set()
for child in translations_tag:
if child.tag == "file":
l = child.attrib["lang"]
languages_in_grd.add(l)
elif child.tag == "if":
iftag = child
for child in iftag:
if child.tag == "file":
l = child.attrib["lang"]
languages_in_grd.add(l)
else:
raise UnknownTag(child)
else:
raise UnknownTag(child)
iflpxln_tag = ET.Element("if", dict(expr="pp_ifdef('use_third_party_translations')"))
iflpxln_tag.text = "\n "
iflpxln_tag.tail = "\n"
translations_tag.append(iflpxln_tag)
for gettext_lang_to_add in languages_in_grdandgettext-languages_in_grd:
assert "." not in gettext_lang_to_add
assert "/" not in gettext_lang_to_add
grd_container = os.path.dirname(dest_grd_filename)
xtb_filename = os.path.join(destination_container, os.path.splitext(os.path.basename(dest_grd_filename))[0], gettext_lang_to_add + ".xtb")
xtb_filename_relative_to_grd = os.path.relpath(xtb_filename, start=grd_container)
# update GRD
file_tag = ET.Element("file", dict(lang=gettext_lang_to_add, path=xtb_filename_relative_to_grd))
file_tag.tail = "\n "
iflpxln_tag.append(file_tag)
# make new XTB
store_new_xtb_file(source_txln_info, xtb_filename, source_grd_filename, gettext_lang_to_add, grdsid_list_to_insert)
# for lang_to_update in languages_in_grdandgettext & languages_in_grd:
# assert "." not in gettext_lang_to_add
# assert "/" not in gettext_lang_to_add
#
# xtb_file_name = os.path.join(, os.path.splitext(os.path.basename(dest_grd_filename))[0] + "_" + gettext_lang_to_add + ".xtb")
# xtb_file_name_relative = os.path.join(os.path.dirname(dest_grd_filename), xtb_file_name)
#
# update_existing_xtb_file(source_txln_info, xtb_file_name_relative, gettext_lang_to_add, grdsid_list_to_insert)
def save_grd(txln_info, source_file_name, source_container, destination_container):
"""Write out a GRIT GRD file. We use the same files we started with, and
merely add languages that are new that Launchpad provides awesomely.
1) learn messages we will be exporting
2) find what languages are already in GRD
3) add new languages to GRD
4) iterate over all languages, overwriting languages that are there.
"""
source_file_relative_to_source_container = source_file_name[len(source_container):].lstrip(os.sep)
dest_filename = os.path.join(destination_container, source_file_relative_to_source_container)
tree = ET.parse(source_file_name, parser=OrderedXMLParser())
root = tree.getroot()
add_missing_grd_files_and_export_xtb(txln_info, root, source_file_name, dest_filename, destination_container)
dest_nearest_container = os.path.dirname(dest_filename)
try:
os.makedirs(dest_nearest_container, 0o755, exist_ok=True) # pylint: disable=E1123
except FileExistsError: # pylint: disable=E0602
pass
with open(dest_filename, "wb") as f:
tree.write(f, encoding="utf-8")
logging.debug("Wrote GRD %s", dest_filename)
|