~chromium-team/chromium-browser/translations-pump

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
#!/usr/bin/python3
# -*- coding: utf-8 -*-

"""A translations intermediary for Chromium Project and Launchpad.

A translation is driven by a GRD file (and perhaps GRDP files a GRD refers to),
which contains the strings and IDs that programs need and references to XTB
files for some number of languages.

This tool exports PO and POT files that Launchpad can consume and progess, and
this tool imports PO and POT files to create new language entries in GRD and
new XTB lanuage files.

Suppose a upstream-translated string is changed in Launchpad? We always prefer
the Launchpad version, but we send a comment to Launchpad about the conflict.

We really only care deeply about translations for the stable Chromium version,
but Launchpad translations are not instantaneous so we read the Beta and Dev
Chromium versions' templates so we can send translation requests to Launchpad,
and later receive them in time for release of new versions.


So:  GRD+GRDP are a kind of POT, and XTB are a kind of PO.

There's a trick in that the IDs for GRD and XTB are an integer ID, generated
from the text of the template literal string.  

The GRD+GRDP files contain conditions as Python-interpretable expressions. We
try to test those conditions and avoid exporting lots of ChromeOS or Android or
ObscureOtherOS strings.

Finally, the GRD+GRDP and XTB files have some screwy language codes like "no",
"pt-PT", "iw". We treat those as canonical, and only convert to sane versions
at the edges to/from Launchpad files. GRD is king.

Also, conditions are evaluated on generating launchpad items only.
"""

import sys
import argparse
import logging
import os


from fileformats.grd import load_grd, save_grd
from fileformats.po import load_po, save_all_po
from fileformats.pot import save_pot

class UnknownGrdTranslationDomain(ValueError):
    """A GRD file's namespace is unknown."""


GRD_PATHFRAGMENT_TO_GETTEXT_DOMAIN = {
    "ash/ash_strings.grd": None,
    "ash/resources/ash_resources.grd": None,
    "ash/resources/ash_wallpaper_resources.grd": None,
    "chrome/android/java/strings/android_chrome_strings.grd": None,
    "chrome/app/chromium_strings.grd": "chromium_strings",
    "chrome/app/generated_resources.grd": "generated_resources",
    "chrome/app/google_chrome_strings.grd": "app_strings",
    "chrome/app/policy/policy_templates.grd": "policy_templates",
    "chrome/app/resources/locale_settings_chromiumos.grd": None,
    "chrome/app/resources/locale_settings_google_chromeos.grd": None,
    "chrome/app/resources/locale_settings.grd": None,   # Not strings.
    "chrome/app/resources/locale_settings_linux.grd": None,   # Not strings.
    "chrome/app/resources/locale_settings_mac.grd": None,
    "chrome/app/resources/locale_settings_win.grd": None,
    "chrome/app/theme/chrome_unscaled_resources.grd": None,
    "chrome/app/theme/theme_resources.grd": None,
    "chrome/browser/browser_resources.grd": "chromium-browser",
    "chrome/browser/devtools/frontend/devtools_discovery_page_resources.grd": "chromium-browser",
    "chrome/browser/resources/component_extension_resources.grd": "chromium-browser",
    "chrome/browser/resources/memory_internals_resources.grd": "chromium-browser",
    "chrome/browser/resources/net_internals_resources.grd": "chromium-browser",
    "chrome/browser/resources/options_resources.grd": "chromium-browser",
    "chrome/browser/resources/quota_internals_resources.grd": "chromium-browser",
    "chrome/browser/resources/signin_internals_resources.grd": "chromium-browser",
    "chrome/browser/resources/sync_file_system_internals_resources.grd": "chromium-browser",
    "chrome/browser/resources/sync_internals_resources.grd": "chromium-browser",
    "chrome/browser/resources/translate_internals_resources.grd": "chromium-browser",
    "chrome/common/common_resources.grd": "chromium-browser",
    "chrome/common/extensions_api_resources.grd": "chromium-browser",
    "chrome_frame/resources/chrome_frame_dialogs.grd": "chromium-browser",
    "chrome_frame/resources/chrome_frame_resources.grd": "chromium-browser",
    "chrome/renderer/resources/renderer_resources.grd": "chromium-browser",
    "cloud_print/service/win/service_resources.grd": None,
    "cloud_print/virtual_driver/win/install/virtual_driver_setup_resources.grd": None,
    "components/component_strings.grd": "chromium-browser",
    "content/content_resources.grd": "chromium-browser",
    "content/public/android/java/strings/android_content_strings.grd": None,
    "content/shell/shell_resources.grd": "chromium-browser",
    "device/device_bluetooth_strings.grd": None,
    "device/bluetooth/bluetooth_strings.grd": None,
    "net/base/net_resources.grd": "chromium-browser",
    "remoting/resources/common_resources.grd": "remoting_strings",
    "remoting/resources/remoting_strings.grd": "remoting_strings",
    "third_party/WebKit/public/blink_resources.grd": "webkit_strings",
    "third_party/WebKit/Source/WebKit/chromium/WebKit.grd": "chromium-browser",
    "tools/grit/grit/testdata/buildinfo.grd": None,
    "tools/grit/grit/testdata/chrome/app/generated_resources.grd": None,
    "tools/grit/grit/testdata/substitute.grd": None,
    "ui/android/java/strings/android_ui_strings.grd": None,
    "ui/base/strings/app_locale_settings.grd": "chromium-browser",
    "ui/base/strings/ui_strings.grd": "chromium-browser",
    "ui/keyboard/keyboard_resources.grd": "chromium-browser",
    "ui/resources/ui_resources.grd": "chromium-browser",
    "ui/resources/ui_unscaled_resources.grd": "chromium-browser",
    "ui/webui/resources/webui_resources.grd": "chromium-browser",
    "webkit/glue/inspector_strings.grd": "inspector_strings",
    "webkit/glue/resources/webkit_resources.grd": "webkit_strings",
    "webkit/glue/webkit_strings.grd": "webkit_strings",
    "webkit/tools/test_shell/test_shell_resources.grd": None,
}

def test_grit_sanity(txln_info):
    """A few simple tests that note problems in imported data."""
    in_grd_not_xtb = list()
    in_xtb_not_grd = list()
    for sid in txln_info:
        if "text" not in txln_info[sid] and "langs" not in txln_info[sid]:
            logging.debug("WTF %s  %s", sid, txln_info[sid])
        elif "text" not in txln_info[sid]:
            logging.debug("%s has no template   %s", sid, txln_info[sid]["langs"])
            in_xtb_not_grd.append(sid)
        elif "langs" not in txln_info[sid]:
            logging.debug("%s has no translation  %r", sid, txln_info[sid]["text"])
            in_grd_not_xtb.append(sid)
    if in_grd_not_xtb:
        logging.warn("There are %d childless template strings in grd, but not in xtb.", len(in_grd_not_xtb))
    if in_xtb_not_grd:
        logging.warn("There are %d orphaned translations in xtb, but not ref in grd.", len(in_xtb_not_grd))
    orphan_childless_overlap = set(in_grd_not_xtb) & set(in_xtb_not_grd)
    if orphan_childless_overlap:
        logging.warn("Whoa, %d pairless translations and templates overlap! This might be a bug.\n%s", len(orphan_childless_overlap), sorted(orphan_childless_overlap))


def read_in_grit_data(txlns_infos, cr_roots, oswalk=os.walk, load_grd=load_grd):
    """Find GRD files and read templates and, from XTB files, translations."""
    for cr_root in cr_roots:
        for dir_path, _, file_names in oswalk(cr_root):
            for filename in file_names:
                if not filename.endswith(".grd"):
                    continue
                filename = os.path.normpath(os.path.join(dir_path, filename))
                # blacklist some files that we will never need and are noisy

                translation_domain = False

                fragment = filename
                while fragment != "":
                    try:
                        fragment = fragment.split(os.sep, 1)[1]  # Strip off a leading directory
                    except IndexError:   # Nothing to strip off, at bare file name.
                        raise UnknownGrdTranslationDomain(filename)

                    translation_domain = GRD_PATHFRAGMENT_TO_GETTEXT_DOMAIN.get(fragment, False)
                    if translation_domain != False:
                        break

                else:  # We exhausted the search without finding anything and "break"ing out.
                    raise UnknownGrdTranslationDomain(filename)

                if not translation_domain:
                    logging.debug("Skipping %s file because it's in a quashed translation domain.", filename)
                    continue

                if translation_domain not in txlns_infos:
                    txlns_infos[translation_domain] = dict()

                load_grd(txlns_infos[translation_domain], filename, tuple(), filename)
    return txlns_infos


def read_in_gettext_data(txlns_infos, lp_roots, oswalk=os.walk):
    """Pull in data from what Launchpad knows, so we can send it back to them
    and they don't have to do work again."""
    for translation_domain in txlns_infos:
        for lp_root in lp_roots:
            for dir_path, _, file_names in oswalk(os.path.join(lp_root, translation_domain)):
                for filename in file_names:
                    if not filename.endswith(".po"):
                        continue
                    filename = os.path.normpath(os.path.join(dir_path, filename))
                    lp_lang = os.path.basename(filename)[:-3]
                    load_po(txlns_infos[translation_domain], lp_lang, filename)
    return txlns_infos
    

def export_grid_data(txlns_infos, import_cr_roots, export_cr_root, oswalk=os.walk):
    """Write GRIT data.  Use the source GRDs again as a master list of what
    we're interested in."""
    for import_cr_root in import_cr_roots:
        for dir_path, _, file_names in oswalk(import_cr_root):
            for filename in file_names:
                if not filename.endswith(".grd"):
                    continue
                filename = os.path.normpath(os.path.join(dir_path, filename))
                # blacklist some files that we will never need and are noisy

                translation_domain = False

                fragment = filename
                while fragment != "":
                    fragment = fragment.split(os.sep, 1)[1]  # Strip off a leading directory
                    translation_domain = GRD_PATHFRAGMENT_TO_GETTEXT_DOMAIN.get(fragment, False)
                    if translation_domain != False:
                        break
                else:  # We exhausted the search without finding anything and "break"ing out.
                    raise UnknownGrdTranslationDomain(filename)

                if not translation_domain:
                    logging.debug("Skipping %s file because it's in a quashed translation domain.", filename)
                    continue

                save_grd(txlns_infos[translation_domain], filename, import_cr_root, export_cr_root)
    return txlns_infos
    
def export_gettext_data(txlns_infos, lp_out_dir):
    """Write gettext files for importing to LP."""
    for translation_domain in txlns_infos:
        save_pot(txlns_infos[translation_domain], lp_out_dir, translation_domain)
        save_all_po(txlns_infos[translation_domain], lp_out_dir, translation_domain)

def process(cr_in_dirs, cr_out_dir, lp_in_dirs, lp_out_dir):
    """The top-level function."""
    txlns_infos = dict()
    logging.info("Importing GRIT (GRD + XTB) data.")
    read_in_grit_data(txlns_infos, cr_in_dirs)
    logging.info("Importing gettext (POT + PO) data.")
    read_in_gettext_data(txlns_infos, lp_in_dirs)

    logging.info("Exporting gettext (POT + PO) data.")
    export_gettext_data(txlns_infos, lp_out_dir)
    logging.info("Exporting GRIT (GRD + XTB) data.")
    export_grid_data(txlns_infos, cr_in_dirs, cr_out_dir)


if __name__ == "__main__":
    import doctest, fileformats
    
    try:
        doctest.testmod(raise_on_error=True, exclude_empty=True, verbose=False)
        doctest.testmod(fileformats.xtb, raise_on_error=True, verbose=False)
        doctest.testmod(fileformats.grd, raise_on_error=True, verbose=False, optionflags=doctest.ELLIPSIS)
        doctest.testmod(fileformats.po, raise_on_error=True, verbose=False, optionflags=doctest.ELLIPSIS)
        doctest.testmod(fileformats.pot, raise_on_error=True, verbose=False, optionflags=doctest.ELLIPSIS)
        doctest.testmod(fileformats.gettext, raise_on_error=True, verbose=False, optionflags=doctest.ELLIPSIS)
    except doctest.DocTestFailure as exc:
        print("for test %s" % (exc.example.source.rstrip()))
        print("expected %s" % (exc.example.want.rstrip()))
        print(" but got %s" % (exc.got.rstrip()))
        raise
    except doctest.UnexpectedException as exc:
        import traceback
        print("%s  %s" % (exc.exc_info[0], exc.exc_info[1]))
        for line in traceback.format_tb(exc.exc_info[2]):
            print(line.rstrip())
        sys.exit(1)

    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument("-v", "--verbose", dest="verbose", action="store_true")
    arg_parser.add_argument("-p", "--profile", dest="profile", action="store_true")
    arg_parser.add_argument("-c", "--chromium_source", action="append", required=True)
    arg_parser.add_argument("-C", "--chromium_destination", action="store", required=True)
    arg_parser.add_argument("-l", "--launchpad_source", action="append", required=True)
    arg_parser.add_argument("-L", "--launchpad_destination", action="store", required=True)
    args = arg_parser.parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.INFO, format="%(levelname).1s t%(relativeCreated)+d  %(module)s %(lineno)3d: %(message)s", stream=sys.stdout)
        logging.debug("Verbose logging on.")
    else:
        logging.basicConfig(level=logging.WARNING, format="%(module)s %(lineno)3d: %(message)s", filename="run.log")

    translator_logger = logging.getLogger("to launchpad")
    translator_logger.propagate = False
    translator_logger.addHandler(logging.FileHandler("notes-to-translators", "w"))
    translator_logger.setLevel(logging.DEBUG)


    if args.profile:
        import cProfile
        cProfile.run('process(args.chromium_source, args.chromium_destination, args.launchpad_source, args.launchpad_destination)')
    else:
        process(args.chromium_source, args.chromium_destination, args.launchpad_source, args.launchpad_destination)

    sys.exit(0)