~dholbach/help-app/1426304

« back to all changes in this revision

Viewing changes to edit-here/translations.py

  • Committer: Daniel Holbach
  • Date: 2015-03-02 10:30:45 UTC
  • Revision ID: daniel.holbach@canonical.com-20150302103045-3togoihgvfrj6yho
- move all functionality of generate-* scripts into translations.py
- make find_bcp47_code() a function
- standardise variable names
- refactor code: 
  - class PO: everything related to generation/manipulation of po/pot files
  - class Documents: everything related to reading/manipulating documents

Show diffs side-by-side

added added

removed removed

Lines of Context:
26
26
)
27
27
 
28
28
 
 
29
def find_bcp47_code(gettext_code):
 
30
    if gettext_code not in [c[0] for c in BCP47_OVERRIDES]:
 
31
        return gettext_code.lower().replace('_', '-')
 
32
    return [c[1] for c in BCP47_OVERRIDES
 
33
            if c[0] == gettext_code][0]
 
34
 
 
35
 
29
36
class PO4A(object):
30
 
    def __init__(self, pot_file):
 
37
    def __init__(self):
31
38
        self.default_args = [
32
39
            '-f', 'text',
33
40
            '-o', 'markdown',
34
41
            '-M', 'utf-8',
35
42
            ]
36
 
        self.pot_file = pot_file
37
43
        if not shutil.which('po4a'):
38
44
            print('Missing prerequisites. Please run: "sudo apt install po4a"')
39
45
            sys.exit(1)
47
53
            ret = subprocess.call([po4a_command]+args)
48
54
        return ret
49
55
 
50
 
    def gettextize(self, documents):
 
56
    def gettextize(self, document_fns, pot_file):
51
57
        args = []
52
 
        for document in documents:
53
 
            args += ['-m', document]
 
58
        for document_fn in document_fns:
 
59
            args += ['-m', document_fn]
54
60
        args += [
55
 
            '-p', self.pot_file,
 
61
            '-p', pot_file,
56
62
            '-L', 'utf-8',
57
63
            ]
58
 
        return self.run('po4a-gettextize', args)
 
64
        ret = self.run('po4a-gettextize', args)
 
65
        return (not ret)
59
66
 
60
67
    def updatepo(self, langs, documents):
61
 
        for lang in langs:
 
68
        for po_fn in langs:
62
69
            args = []
63
70
            for document in documents:
64
71
                args += ['-m', document]
65
 
            args += ['-p', lang.file_name]
 
72
            args += ['-p', po_fn]
66
73
            ret = self.run('po4a-updatepo', args)
67
74
            if ret:
68
75
                return False
69
76
        return True
70
77
 
71
 
    def translate(self, doc, lang):
 
78
    def translate(self, doc, po_fn):
72
79
        args = [
73
80
            '-k', TRANSLATION_COMPLETION_PERCENTAGE,
74
81
            '-m', doc,
75
 
            '-p', lang.file_name,
 
82
            '-p', po_fn,
76
83
            '-L', 'utf-8',
77
84
            ]
78
85
        return self.run('po4a-translate', args, with_output=True)
79
86
 
80
87
 
 
88
class PO(object):
 
89
    def __init__(self, po4a):
 
90
        self.translations_dir = os.path.abspath(os.path.join(PATH, '../po'))
 
91
        self.fake_lang_code = 'en_US'
 
92
        self.fake_po_fn = os.path.join(self.translations_dir,
 
93
                                       '%s.po' % self.fake_lang_code)
 
94
        self.pot_fn = os.path.join(self.translations_dir, 'help.pot')
 
95
        self.po4a = po4a
 
96
        self.langs = {}
 
97
        for po_fn in glob.glob(self.translations_dir+'/*.po'):
 
98
            self.add_language(po_fn)
 
99
 
 
100
    def add_language(self, po_fn):
 
101
        gettext_code = os.path.basename(po_fn).split('.po')[0]
 
102
        self.langs[po_fn] = {
 
103
            'bcp47': find_bcp47_code(gettext_code),
 
104
            'gettext_code': gettext_code,
 
105
        }
 
106
 
 
107
    def _remove_fake_po_fn(self):
 
108
        if os.path.exists(self.fake_po_fn):
 
109
            os.remove(self.fake_po_fn)
 
110
 
 
111
    def __del__(self):
 
112
        self._remove_fake_po_fn()
 
113
 
 
114
    def generate_pot_file(self, document_fns):
 
115
        if not self.po4a.gettextize(document_fns, self.pot_fn):
 
116
            return False
 
117
        return self.po4a.updatepo(self.langs, document_fns)
 
118
 
 
119
    # we generate a fake translation for en-US which is going to be
 
120
    # the default
 
121
    def generate_fake_pofile(self):
 
122
        self._remove_fake_po_fn()
 
123
        shutil.copy(self.pot_fn, self.fake_po_fn)
 
124
        self.add_language(self.fake_po_fn)
 
125
 
 
126
    def rewrite_links(self, documents):
 
127
        for po_fn in self.langs:
 
128
            po_file = polib.pofile(po_fn)
 
129
            link_regex = r'\[.+?\]\(\{filename\}(.+?)\)'
 
130
            for entry_group in [po_file.translated_entries(),
 
131
                                po_file.fuzzy_entries(),
 
132
                                po_file.untranslated_entries()]:
 
133
                for entry in entry_group:
 
134
                    if '{filename}' in entry.msgid:
 
135
                        link_msgid = re.findall(link_regex, entry.msgid)[0]
 
136
                        link_msgstr = list(re.findall(link_regex, entry.msgstr))
 
137
                        translated_doc_fn = os.path.basename(
 
138
                            documents.translated_doc_fn(link_msgid,
 
139
                                self.langs[po_fn]['bcp47']))
 
140
                        if not link_msgstr:
 
141
                            entry.msgstr = entry.msgid
 
142
                            link_msgstr = [link_msgid]
 
143
                        entry.msgstr = entry.msgstr.replace(link_msgstr[0],
 
144
                                                            translated_doc_fn)
 
145
            po_file.save(po_fn)
 
146
 
 
147
 
81
148
class Documents(object):
82
149
    def __init__(self):
83
150
        self.docs = []
84
 
        for dirpath, dirnames, filenames in os.walk(PATH):
85
 
            for filename in filenames:
86
 
                self.docs += [os.path.join(dirpath, filename)]
 
151
        for dirpath, dirnames, fns in os.walk(PATH):
 
152
            for fn in fns:
 
153
                self.docs += [os.path.join(dirpath, fn)]
87
154
 
88
 
    def translated_doc(self, file_name, lang):
 
155
    def translated_doc_fn(self, fn, bcp47_code):
89
156
        match = [doc for doc in self.docs
90
 
                 if os.path.basename(doc) == os.path.basename(file_name)]
 
157
                 if os.path.basename(doc) == os.path.basename(fn)]
91
158
        if not match:
92
159
            return None
93
160
        return '%s.%s.md' % (match[0].split('.md')[0],
94
 
                             lang.bcp47_code)
 
161
                             bcp47_code)
95
162
 
96
 
    def _call_po4a_translate(self, doc, lang, po4a):
97
 
        res = po4a.translate(doc, lang)
 
163
    def _call_po4a_translate(self, doc, po_fn, po4a):
 
164
        res = po4a.translate(doc, po_fn)
98
165
        output = codecs.decode(res.communicate()[0])
 
166
        print(output)
99
167
        broken_title_line = [line for line in output.split('\n')
100
168
                             if line.lower().startswith('title:')][0]
101
169
        rest = [line for line in output.split('\n')
103
171
        output = '\n'.join(rest)
104
172
        return (broken_title_line, output)
105
173
 
106
 
    def write_translated_markdown(self, lang, po4a):
107
 
        for doc in self.docs:
108
 
            (broken_title_line, output) = \
109
 
                self._call_po4a_translate(doc, lang, po4a)
110
 
            new_path = self.translated_doc(doc, lang)
111
 
            text = "%s\nDate:\n\n" % (broken_title_line)
112
 
            text += output
113
 
            if os.path.exists(new_path):
114
 
                os.remove(new_path)
115
 
            if not os.path.exists(os.path.dirname(new_path)):
116
 
                os.makedirs(os.path.dirname(new_path))
117
 
            with open(new_path, 'w', encoding='utf-8') as f:
118
 
                f.write(text)
119
 
 
120
 
 
121
 
class Language(object):
122
 
    def __init__(self, po_file, documents):
123
 
        self.file_name = po_file
124
 
        self.gettext_code = os.path.basename(po_file).split('.po')[0]
125
 
        self.bcp47_code = self._find_bcp47_code()
126
 
        self.documents = documents
127
 
 
128
 
    def _find_bcp47_code(self):
129
 
        if self.gettext_code not in [c[0] for c in BCP47_OVERRIDES]:
130
 
            return self.gettext_code.lower().replace('_', '-')
131
 
        return [c[1] for c in BCP47_OVERRIDES
132
 
                if c[0] == self.gettext_code][0]
133
 
 
134
 
    def rewrite_links(self):
135
 
        po_file = polib.pofile(self.file_name)
136
 
        link_regex = r'\[.+?\]\(\{filename\}(.+?)\)'
137
 
        for entry_group in [po_file.translated_entries(),
138
 
                            po_file.fuzzy_entries(),
139
 
                            po_file.untranslated_entries()]:
140
 
            for entry in entry_group:
141
 
                if '{filename}' in entry.msgid:
142
 
                    link_msgid = re.findall(link_regex, entry.msgid)[0]
143
 
                    link_msgstr = list(re.findall(link_regex, entry.msgstr))
144
 
                    translated_doc = os.path.basename(
145
 
                        self.documents.translated_doc(
146
 
                            link_msgid, self))
147
 
                    if not link_msgstr:
148
 
                        entry.msgstr = entry.msgid
149
 
                        link_msgstr = [link_msgid]
150
 
                    entry.msgstr = entry.msgstr.replace(link_msgstr[0],
151
 
                                                        translated_doc)
152
 
        po_file.save(self.file_name)
 
174
    def write_translated_markdown(self, langs, po4a):
 
175
        for po_fn in langs:
 
176
            for doc in self.docs:
 
177
                (broken_title_line, output) = \
 
178
                    self._call_po4a_translate(doc, po_fn, po4a)
 
179
                new_path = self.translated_doc_fn(doc, langs[po_fn]['bcp47'])
 
180
                text = "%s\nDate:\n\n" % (broken_title_line)
 
181
                text += output
 
182
                if os.path.exists(new_path):
 
183
                    os.remove(new_path)
 
184
                if not os.path.exists(os.path.dirname(new_path)):
 
185
                    os.makedirs(os.path.dirname(new_path))
 
186
                with open(new_path, 'w', encoding='utf-8') as f:
 
187
                    f.write(text)
153
188
 
154
189
 
155
190
class Translations(object):
156
191
    def __init__(self):
157
192
        self._cleanup()
158
 
        self.translations_dir = os.path.abspath(os.path.join(PATH, '../po'))
159
 
        self.available_languages = []
160
193
        self.documents = Documents()
161
 
        for po_filename in glob.glob(self.translations_dir+'/*.po'):
162
 
            self.available_languages += [Language(po_filename, self.documents)]
163
 
        self.fake_lang_code = 'en_US'
164
 
        self.fake_po_file = os.path.join(self.translations_dir,
165
 
                                         '%s.po' % self.fake_lang_code)
166
 
        self.pot_file = os.path.join(self.translations_dir,
167
 
                                     "help.pot")
168
 
        self.po4a = PO4A(self.pot_file)
169
 
 
170
 
    def __del__(self):
171
 
        if os.path.exists(self.fake_po_file):
172
 
            os.remove(self.fake_po_file)
 
194
        self.po4a = PO4A()
 
195
        self.po = PO(self.po4a)
173
196
 
174
197
    def _cleanup(self):
175
198
        r = subprocess.Popen(['bzr', 'ignored'], stdout=subprocess.PIPE)
176
 
        files = [os.path.join(PATH, '../..', f.split(' ')[0])
177
 
                 for f in codecs.decode(r.communicate()[0]).split('\n')
178
 
                 if f.strip() != '']
179
 
        files = [f for f in files if os.path.exists(f)]
180
 
        for f in files:
 
199
        fns = [os.path.join(PATH, '../..', f.split(' ')[0])
 
200
               for f in codecs.decode(r.communicate()[0]).split('\n')
 
201
               if f.strip() != '']
 
202
        fns = [f for f in fns if os.path.exists(f)]
 
203
        for f in fns:
181
204
            try:
182
205
                shutil.rmtree(f)
183
206
            except NotADirectoryError:
184
207
                os.remove(f)
185
208
 
186
209
    def generate_pot_file(self):
187
 
        return self.po4a.gettextize(self.documents.docs)
188
 
 
189
 
    def update_po_files(self):
190
 
        return self.po4a.updatepo(self.available_languages,
191
 
                                  self.documents.docs)
192
 
 
193
 
    def rewrite_links(self):
194
 
        self._generate_fake_pofile()
195
 
        for lang in self.available_languages:
196
 
            lang.rewrite_links()
197
 
 
198
 
    # we generate a fake translation for en-US which is going to be
199
 
    # the default
200
 
    def _generate_fake_pofile(self):
201
 
        if os.path.exists(self.fake_po_file):
202
 
            os.remove(self.fake_po_file)
203
 
        shutil.copy(self.pot_file, self.fake_po_file)
204
 
        self.available_languages += [Language(self.fake_po_file,
205
 
                                              self.documents)]
 
210
        return self.po.generate_pot_file(self.documents.docs)
206
211
 
207
212
    def generate_translations(self):
208
 
        for lang in self.available_languages:
209
 
            self.documents.write_translated_markdown(lang, self.po4a)
 
213
        self.po.generate_fake_pofile()
 
214
        self.po.rewrite_links(self.documents)
 
215
        self.documents.write_translated_markdown(self.po.langs, self.po4a)