~chromium-team/chromium-browser/translations-pump

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
#!/usr/bin/python3

"""
Copyright Canonical, 2014, 2015.
Author: Chad Miller

A translations intermediary for Chromium Project and Launchpad.

A translation is driven by a GRD file (and perhaps GRDP files a GRD refers to),
which contains the strings and IDs that programs need and references to XTB
files for some number of languages.

This tool exports PO and POT files that Launchpad can consume and progess, and
this tool imports PO and POT files to create new language entries in GRD and
new XTB lanuage files.

Suppose a upstream-translated string is changed in Launchpad? We always prefer
the Launchpad version to set in Chromium's XTB files, but we send a comment to
Launchpad so humans can resolve the conflict by keeping it or changing it to
what Chromium's official translators set.

We really only care deeply about translations for the stable Chromium version,
but Launchpad translations are not instantaneous so we read the Beta and Dev
Chromium versions' templates so we can send translation requests to Launchpad,
and later receive them in time for release of new versions.


So:  GRD+GRDP are a kind of POT, and XTB are a kind of PO.

There's a minor problem in that the IDs for GRD and XTB are an integer ID,
generated from the text of the template literal string.  

The GRD+GRDP files contain conditions as Python-interpretable expressions. We
try to test those conditions and avoid exporting lots of ChromeOS or Android or
ObscureOtherOS strings.

Finally, the GRD+GRDP and XTB files have some screwy language codes like "no",
"pt-PT", "iw". We treat those as canonical, and only convert to sane versions
at the edges to/from Launchpad PO files. GRD is king, inside this program, and
we convert at the edges, like Unicode internal and encoding on in/out.

Also, conditions are evaluated on generating launchpad items only.
"""

import sys
import argparse
import logging
import os


from fileformats.grd import load_grd, save_grd
from fileformats.po import load_po, save_all_po
from fileformats.pot import save_pot

class UnknownGrdTranslationDomain(ValueError):
    """A GRD file's namespace is unknown."""

# A None domain is something we wish to ignore.
GRD_PATHFRAGMENT_TO_GETTEXT_DOMAIN = {
    "accessibility/extensions/strings/accessibility_extensions_strings.grd": "chromium-browser",
    "android/java/strings/android_chrome_strings.grd": None,
    "android/java/strings/android_ui_strings.grd": None,
    "android_webview/java/strings/android_webview_strings.grd": None,
    "android_webview/ui/aw_resources.grd": None,
    "android_webview/ui/aw_strings.grd": None,
    "app/address_input_strings.grd": "chromium-browser",
    "app/chromium_strings.grd": "chromium_strings",
    "app/generated_resources.grd": "chromium-browser",
    "app/google_chrome_strings.grd": "chromium-browser",
    "app_list/resources/app_list_resources.grd": "chromium-browser",
    "app/resources/locale_settings_chromiumos.grd": None,
    "app/resources/locale_settings_google_chromeos.grd": None,
    "app/resources/locale_settings.grd": None,   # Not strings.
    "app/resources/locale_settings_linux.grd": None,   # Not strings.
    "app/resources/locale_settings_mac.grd": None,
    "app/resources/locale_settings_win.grd": None,
    "app/settings_strings.grd": "chromium-browser",
    "app/theme/chrome_unscaled_resources.grd": None,
    "app/theme/theme_resources.grd": None,
    "ash/ash_strings.grd": None,
    "ash/resources/ash_resources.grd": None,
    "blimp/client/android/java/strings/android_blimp_strings.grd": None,
    "browser/browser_resources.grd": "chromium-browser",
    "browser/devtools/device/webrtc/resources.grd": "chromium-browser",
    "browser/resources/chromeos/chromevox/strings/chromevox_strings.grd": "chromium-browser",
    "browser/resources/component_extension_resources.grd": "chromium-browser",
    "browser/resources/invalidations_resources.grd": "chromium-browser",
    "browser/resources/memory_internals_resources.grd": "chromium-browser",
    "browser/resources/net_internals_resources.grd": "chromium-browser",
    "browser/resources/options_resources.grd": "chromium-browser",
    "browser/resources/options_test_resources.grd": None,
    "browser/resources/password_manager_internals_resources.grd": "chromium-browser",
    "browser/resources/quota_internals_resources.grd": "chromium-browser",
    "browser/resources/settings/settings_resources.grd": "chromium-browser",
    "browser/resources/signin_internals_resources.grd": "chromium-browser",
    "browser/resources/sync_file_system_internals_resources.grd": "chromium-browser",
    "browser/resources/sync_internals_resources.grd": "chromium-browser",
    "browser/resources/translate_internals_resources.grd": "chromium-browser",
    "chromecast/app/resources/chromecast_settings.grd": None,
    "chromecast/app/resources/shell_resources.grd": None,
    "chromeos/resources/ui_chromeos_resources.grd": None,
    "chromeos/ui_chromeos_strings.grd": None,
    "cloud_print/service/win/service_resources.grd": None,
    "cloud_print/virtual_driver/win/install/virtual_driver_setup_resources.grd": None,
    "common/common_resources.grd": "chromium-browser",
    "common/extensions_api_resources.grd": "chromium-browser",
    "components/chrome_apps/chrome_apps_resources.grd": None,
    "components/components_chromium_strings.grd": "chromium-browser",
    "components/components_google_chrome_strings.grd": "chromium-browser",
    "components/components_strings.grd": "chromium-browser",
    "components/html_viewer/html_viewer_resources.grd": None,
    "components/policy/resources/policy_templates.grd": "chromium-browser",
    "components/resources/components_resources.grd": None,
    "components/resources/components_scaled_resources.grd": None,
    "content/app/resources/content_resources.grd": "chromium-browser",
    "content/app/strings/content_strings.grd": "chromium-browser",
    "content/content_resources.grd": "chromium-browser",
    "content/public/android/java/strings/android_content_strings.grd": None,
    "content/shell/shell_resources.grd": "chromium-browser",
    "device/bluetooth/bluetooth_strings.grd": None,
    "extensions/browser/resources/extensions_browser_resources.grd": "chromium-browser",
    "extensions/extensions_resources.grd": "chromium-browser",
    "extensions/extensions_strings.grd": "chromium-browser",
    "extensions/renderer/resources/extensions_renderer_resources.grd": "chromium-browser",
    "extensions/shell/app_shell_resources.grd": "chromium-browser",
    "file_manager/file_manager_resources.grd": "chromium-browser",
    "ios/chrome/app/strings/ios_chromium_strings.grd": None,
    "ios/chrome/app/strings/ios_google_chrome_strings.grd": None,
    "ios/chrome/app/strings/ios_locale_settings.grd": None,
    "ios/chrome/app/strings/ios_strings.grd": None,
    "ios/chrome/app/strings/ios_strings_resources.grd": None,
    "ios/chrome/app/theme/ios_theme_resources.grd": None,
    "ios/chrome/today_extension/strings/ios_today_extension_strings.grd": None,
    "keyboard/keyboard_resources.grd": "chromium-browser",
    "login/login_resources.grd": "chromium-browser",
    "net/base/net_resources.grd": "chromium-browser",
    "remoting/resources/remoting_strings.grd": "remoting_strings",
    "renderer/resources/renderer_resources.grd": "chromium-browser",
    "resources/ui_resources.grd": "chromium-browser",
    "resources/ui_unscaled_resources.grd": "chromium-browser",
    "strings/app_locale_settings.grd": "chromium-browser",
    "strings/ui_strings.grd": "chromium-browser",
    "test/data/webui_test_resources.grd": None,
    "third_party/libaddressinput/src/cpp/res/messages.grd": "chromium-browser",
    "third_party/WebKit/public/blink_image_resources.grd": "chromium-browser",
    "third_party/WebKit/public/blink_resources.grd": "chromium-browser",
    "tools/grit/grit/testdata/buildinfo.grd": None,
    "tools/grit/grit/testdata/chrome/app/generated_resources.grd": None,
    "tools/grit/grit/testdata/substitute.grd": None,
    "tools/grit/grit/testdata/substitute_no_ids.grd": None,
    "tools/grit/grit/testdata/whitelist_resources.grd": None,
    "tools/grit/grit/testdata/whitelist_strings.grd": None,
    "views/resources/views_resources.grd": "chromium-browser",
    "webui/resources/webui_resources.grd": "chromium-browser",
}
used_grd_pathfragment_to_gettext_domain = set()

def test_grit_sanity(txln_info):
    """A few simple tests that note problems in imported data."""
    in_grd_not_xtb = list()
    in_xtb_not_grd = list()
    for sid in txln_info:
        if "text" not in txln_info[sid] and "langs" not in txln_info[sid]:
            logging.debug("WTF %s  %s", sid, txln_info[sid])
        elif "text" not in txln_info[sid]:
            logging.debug("%s has no template   %s", sid, txln_info[sid]["langs"])
            in_xtb_not_grd.append(sid)
        elif "langs" not in txln_info[sid]:
            logging.debug("%s has no translation  %r", sid, txln_info[sid]["text"])
            in_grd_not_xtb.append(sid)
    if in_grd_not_xtb:
        logging.warn("There are %d childless template strings in grd, but not in xtb.", len(in_grd_not_xtb))
    if in_xtb_not_grd:
        logging.warn("There are %d orphaned translations in xtb, but not ref in grd.", len(in_xtb_not_grd))
    orphan_childless_overlap = set(in_grd_not_xtb) & set(in_xtb_not_grd)
    if orphan_childless_overlap:
        logging.warn("Whoa, %d pairless translations and templates overlap! This might be a bug.\n%s", len(orphan_childless_overlap), sorted(orphan_childless_overlap))


grd_refs_with_unknown_domains = set()
def read_in_grit_data(txlns_infos, cr_roots, oswalk=os.walk, load_grd=load_grd):
    """Find GRD files and read templates and, from XTB files, translations."""
    for cr_root in cr_roots:
        for dir_path, _, file_names in oswalk(cr_root):
            for filename in file_names:
                if not filename.endswith(".grd"):
                    continue
                filename = os.path.normpath(os.path.join(dir_path, filename))
                # blacklist some files that we will never need and are noisy

                translation_domain = False

                fragment = filename
                while os.sep in fragment:
                    # Strip off a leading directory to start search
                    fragment = fragment.split(os.sep, 1)[1]

                    translation_domain = GRD_PATHFRAGMENT_TO_GETTEXT_DOMAIN.get(fragment, False)

                    # False is not found. None is found but unwanted. We don't
                    # let unknown ones slip through. Complain and expect the
                    # runner to decide what this new file's domain is.
                    if translation_domain != False:  # We found something. Search no more.
                        used_grd_pathfragment_to_gettext_domain.add(fragment)
                        break

                if translation_domain is None:
                    logging.debug("Skipping %s file because it's in a quashed translation domain.", filename)
                    continue

                if translation_domain is False:
                    grd_refs_with_unknown_domains.add(filename)
                    continue

                if translation_domain not in txlns_infos:
                    txlns_infos[translation_domain] = dict()

                load_grd(txlns_infos[translation_domain], filename, tuple(), filename)

    return txlns_infos


def read_in_gettext_data(txlns_infos, lp_roots, oswalk=os.walk):
    """Pull in data from what Launchpad knows, so we can send it back to them
    and they don't have to do work again."""
    for translation_domain in txlns_infos:
        for lp_root in lp_roots:
            for dir_path, _, file_names in oswalk(os.path.join(lp_root, translation_domain)):
                for filename in file_names:
                    if not filename.endswith(".po"):
                        continue
                    filename = os.path.normpath(os.path.join(dir_path, filename))
                    lp_lang = os.path.basename(filename)[:-3]
                    load_po(txlns_infos[translation_domain], lp_lang, filename)
    return txlns_infos
    

def export_grid_data(txlns_infos, import_cr_roots, export_cr_root, oswalk=os.walk):
    """Write GRIT data.  Use the source GRDs again as a master list of what
    we're interested in."""
    for import_cr_root in import_cr_roots:
        for dir_path, _, file_names in oswalk(import_cr_root):
            for filename in file_names:
                if not filename.endswith(".grd"):
                    continue
                filename = os.path.normpath(os.path.join(dir_path, filename))
                # blacklist some files that we will never need and are noisy

                translation_domain = False

                fragment = filename
                while fragment != "":
                    fragment = fragment.split(os.sep, 1)[1]  # Strip off a leading directory
                    translation_domain = GRD_PATHFRAGMENT_TO_GETTEXT_DOMAIN.get(fragment, False)
                    if translation_domain != False:
                        break
                else:  # We exhausted the search without finding anything and "break"ing out.
                    raise UnknownGrdTranslationDomain(filename)

                if not translation_domain:
                    logging.debug("Skipping %s file because it's in a quashed translation domain.", filename)
                    continue

                save_grd(txlns_infos[translation_domain], filename, import_cr_root, export_cr_root)
    return txlns_infos
    
def export_gettext_data(txlns_infos, lp_out_dir):
    """Write gettext files for importing to LP."""
    for translation_domain in txlns_infos:
        save_pot(txlns_infos[translation_domain], lp_out_dir, translation_domain)
        save_all_po(txlns_infos[translation_domain], lp_out_dir, translation_domain)

def process(cr_in_dirs, cr_out_dir, lp_in_dirs, lp_out_dir):
    """The top-level function."""
    txlns_infos = dict()
    logging.info("Importing GRIT (GRD + XTB) data.")
    read_in_grit_data(txlns_infos, cr_in_dirs)
    logging.info("Importing gettext (POT + PO) data.")
    read_in_gettext_data(txlns_infos, lp_in_dirs)

    for path in sorted(set(GRD_PATHFRAGMENT_TO_GETTEXT_DOMAIN.keys()) - used_grd_pathfragment_to_gettext_domain):
        print("Unused grd/po-domain key: ", path)

    if grd_refs_with_unknown_domains:
        for path in sorted(grd_refs_with_unknown_domains, key=lambda fn: fn.split(os.sep)[2:]):
            print("Unknown GRD file POT domain: ", path)
        raise UnknownGrdTranslationDomain()

    logging.info("Exporting gettext (POT + PO) data.")
    export_gettext_data(txlns_infos, lp_out_dir)
    logging.info("Exporting GRIT (GRD + XTB) data.")
    export_grid_data(txlns_infos, cr_in_dirs, cr_out_dir)


if __name__ == "__main__":
    import doctest, fileformats
    
    try:
        doctest.testmod(raise_on_error=True, exclude_empty=True, verbose=False)
        doctest.testmod(fileformats.xtb, raise_on_error=True, verbose=False)
        doctest.testmod(fileformats.grd, raise_on_error=True, verbose=False, optionflags=doctest.ELLIPSIS)
        doctest.testmod(fileformats.po, raise_on_error=True, verbose=False, optionflags=doctest.ELLIPSIS)
        doctest.testmod(fileformats.pot, raise_on_error=True, verbose=False, optionflags=doctest.ELLIPSIS)
        doctest.testmod(fileformats.gettext, raise_on_error=True, verbose=False, optionflags=doctest.ELLIPSIS)
    except doctest.DocTestFailure as exc:
        print("for test %s" % (exc.example.source.rstrip()))
        print("expected %s" % (exc.example.want.rstrip()))
        print(" but got %s" % (exc.got.rstrip()))
        raise
    except doctest.UnexpectedException as exc:
        import traceback
        print("%s  %s" % (exc.exc_info[0], exc.exc_info[1]))
        for line in traceback.format_tb(exc.exc_info[2]):
            print(line.rstrip())
        sys.exit(1)

    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument("-v", "--verbose", dest="verbose", action="store_true")
    arg_parser.add_argument("-p", "--profile", dest="profile", action="store_true")
    arg_parser.add_argument("-c", "--chromium_source", action="append", required=True)
    arg_parser.add_argument("-C", "--chromium_destination", action="store", required=True)
    arg_parser.add_argument("-l", "--launchpad_source", action="append", required=True)
    arg_parser.add_argument("-L", "--launchpad_destination", action="store", required=True)
    args = arg_parser.parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.INFO, format="%(module)s %(lineno)3d: %(message)s", stream=sys.stdout)
        logging.debug("Verbose logging on.")
    else:
        logging.basicConfig(level=logging.WARNING, format="%(module)s %(lineno)3d: %(message)s", filename="run.log")

    translator_logger = logging.getLogger("to launchpad")
    translator_logger.propagate = False
    translator_logger.addHandler(logging.FileHandler("notes-to-translators", "w"))
    translator_logger.setLevel(logging.DEBUG)

    if args.profile:
        import cProfile
        cProfile.run('process(args.chromium_source, args.chromium_destination, args.launchpad_source, args.launchpad_destination)')
    else:
        process(args.chromium_source, args.chromium_destination, args.launchpad_source, args.launchpad_destination)

    sys.exit(0)