~dholbach/help-app/1428618

« back to all changes in this revision

Viewing changes to edit-here/translations.py

  • Committer: Daniel Holbach
  • Date: 2015-02-26 12:46:07 UTC
  • mfrom: (48.1.48 help.1425010)
  • Revision ID: daniel.holbach@canonical.com-20150226124607-d8r1pespk41a1wh3
mergedĀ lp:~dholbach/ubuntu-devices-help/1425010

Show diffs side-by-side

added added

removed removed

Lines of Context:
6
6
import re
7
7
import shutil
8
8
import subprocess
9
 
import tempfile
10
9
 
11
10
from pelicanconf import PATH
12
11
 
13
 
METADATA_TAGS = [
14
 
    'title',
15
 
    'lang',
16
 
    'date',
17
 
    ]
18
 
 
19
12
# This defines how complete we expect translations to be before we
20
13
# generate HTML from them. Needs to be string.
21
14
TRANSLATION_COMPLETION_PERCENTAGE = '0'
22
15
 
 
16
BCP47_OVERRIDES = (
 
17
    ('zh_CN', 'zh-hans'),
 
18
    ('zh_TW', 'zh-hant'),
 
19
)
 
20
 
23
21
 
24
22
class PO4A(object):
25
 
    def __init__(self, translations_dir, temp_dir):
 
23
    def __init__(self, pot_file):
26
24
        self.default_args = [
27
25
            '-f', 'text',
28
26
            '-o', 'markdown',
29
27
            '-M', 'utf-8',
30
28
            ]
31
 
        self.translations_dir = translations_dir
32
 
        self.temp_dir = temp_dir
 
29
        self.pot_file = pot_file
33
30
 
34
 
    def run(self, po4a_command, additional_args, with_output=False,
35
 
            working_dir=None):
36
 
        if not working_dir:
37
 
            working_dir = self.temp_dir
38
 
        pwd = os.getcwd()
 
31
    def run(self, po4a_command, additional_args, with_output=False):
39
32
        args = copy.copy(self.default_args)
40
33
        args += additional_args
41
 
        os.chdir(working_dir)
42
34
        if with_output:
43
35
            ret = subprocess.Popen([po4a_command]+args, stdout=subprocess.PIPE)
44
36
        else:
45
37
            ret = subprocess.call([po4a_command]+args)
46
 
        os.chdir(pwd)
47
38
        return ret
48
39
 
49
40
    def gettextize(self, documents):
51
42
        for document in documents:
52
43
            args += ['-m', document]
53
44
        args += [
54
 
            '-p', os.path.join(self.translations_dir, 'help.pot'),
 
45
            '-p', self.pot_file,
55
46
            '-L', 'utf-8',
56
47
            ]
57
48
        return self.run('po4a-gettextize', args)
74
65
            '-p', lang.file_name,
75
66
            '-L', 'utf-8',
76
67
            ]
77
 
        return self.run('po4a-translate', args, with_output=True,
78
 
                        working_dir=os.path.join(PATH, '..'))
 
68
        return self.run('po4a-translate', args, with_output=True)
 
69
 
 
70
 
 
71
class Documents(object):
 
72
    def __init__(self):
 
73
        self.docs = []
 
74
        for dirpath, dirnames, filenames in os.walk(PATH):
 
75
            for filename in filenames:
 
76
                self.docs += [os.path.join(dirpath, filename)]
 
77
 
 
78
    def translated_doc(self, file_name, lang):
 
79
        match = [doc for doc in self.docs
 
80
                 if os.path.basename(doc) == os.path.basename(file_name)]
 
81
        if not match:
 
82
            return None
 
83
        return '%s.%s.md' % (match[0].split('.md')[0],
 
84
                             lang.bcp47_code)
 
85
 
 
86
    def _call_po4a_translate(self, doc, lang, po4a):
 
87
        res = po4a.translate(doc, lang)
 
88
        output = codecs.decode(res.communicate()[0])
 
89
        broken_title_line = [line for line in output.split('\n')
 
90
                             if line.lower().startswith('title:')][0]
 
91
        rest = [line for line in output.split('\n')
 
92
                if not line.lower().startswith('title')]
 
93
        output = '\n'.join(rest)
 
94
        return (broken_title_line, output)
 
95
 
 
96
    def write_translated_markdown(self, lang, po4a):
 
97
        for doc in self.docs:
 
98
            (broken_title_line, output) = \
 
99
                self._call_po4a_translate(doc, lang, po4a)
 
100
            new_path = self.translated_doc(doc, lang)
 
101
            text = "%s\nDate:\n\n" % (broken_title_line)
 
102
            text += output
 
103
            if os.path.exists(new_path):
 
104
                os.remove(new_path)
 
105
            if not os.path.exists(os.path.dirname(new_path)):
 
106
                os.makedirs(os.path.dirname(new_path))
 
107
            with open(new_path, 'w', encoding='utf-8') as f:
 
108
                f.write(text)
79
109
 
80
110
 
81
111
class Language(object):
82
 
    def __init__(self, po_file):
 
112
    def __init__(self, po_file, documents):
83
113
        self.file_name = po_file
84
 
        self.gettext_lang_code = os.path.basename(po_file).split('.po')[0]
 
114
        self.gettext_code = os.path.basename(po_file).split('.po')[0]
 
115
        self.bcp47_code = self._find_bcp47_code()
 
116
        self.documents = documents
 
117
 
 
118
    def _find_bcp47_code(self):
 
119
        if self.gettext_code not in [c[0] for c in BCP47_OVERRIDES]:
 
120
            return self.gettext_code.lower().replace('_', '-')
 
121
        return [c[1] for c in BCP47_OVERRIDES
 
122
                if c[0] == self.gettext_code][0]
85
123
 
86
124
    def rewrite_links(self):
87
125
        po_file = polib.pofile(self.file_name)
88
 
        for entry_group in [po_file.untranslated_entries(),
89
 
                            po_file.translated_entries(),
90
 
                            po_file.fuzzy_entries()]:
 
126
        link_regex = r'\[.+?\]\(\{filename\}(.+?)\)'
 
127
        for entry_group in [po_file.translated_entries(),
 
128
                            po_file.fuzzy_entries(),
 
129
                            po_file.untranslated_entries()]:
91
130
            for entry in entry_group:
92
131
                if '{filename}' in entry.msgid:
93
 
                    if not entry.msgstr:
 
132
                    link_msgid = re.findall(link_regex, entry.msgid)[0]
 
133
                    link_msgstr = list(re.findall(link_regex, entry.msgstr))
 
134
                    translated_doc = os.path.basename(
 
135
                        self.documents.translated_doc(
 
136
                            link_msgid, self))
 
137
                    if not link_msgstr:
94
138
                        entry.msgstr = entry.msgid
95
 
                    link = re.findall(r'\[.+?\]\(\{filename\}(.+?)\)',
96
 
                                      entry.msgid)[0]
97
 
                    entry.msgstr = entry.msgstr.replace(
98
 
                        link,
99
 
                        'lang-%s-%s' % (self.gettext_lang_code, link))
 
139
                        link_msgstr = [link_msgid]
 
140
                    entry.msgstr = entry.msgstr.replace(link_msgstr[0],
 
141
                                                        translated_doc)
100
142
        po_file.save(self.file_name)
101
143
 
102
144
 
105
147
        self._cleanup()
106
148
        self.translations_dir = os.path.abspath(os.path.join(PATH, '../po'))
107
149
        self.available_languages = []
 
150
        self.documents = Documents()
108
151
        for po_filename in glob.glob(self.translations_dir+'/*.po'):
109
 
            self.available_languages += [Language(po_filename)]
110
 
        self.documents = self._find_documents()
111
 
        self.temp_dir = tempfile.mkdtemp()
112
 
        self.po4a = PO4A(self.translations_dir, self.temp_dir)
 
152
            self.available_languages += [Language(po_filename, self.documents)]
 
153
        self.fake_lang_code = 'en_US'
 
154
        self.fake_po_file = os.path.join(self.translations_dir,
 
155
                                         '%s.po' % self.fake_lang_code)
 
156
        self.pot_file = os.path.join(self.translations_dir,
 
157
                                     "help.pot")
 
158
        self.po4a = PO4A(self.pot_file)
113
159
 
114
160
    def __del__(self):
115
 
        shutil.rmtree(self.temp_dir)
116
 
 
117
 
    def _find_documents(self):
118
 
        documents = []
119
 
        for dirpath, dirnames, filenames in os.walk(PATH):
120
 
            for filename in filenames:
121
 
                documents += [os.path.join(dirpath, filename)]
122
 
        return documents
 
161
        if os.path.exists(self.fake_po_file):
 
162
            os.remove(self.fake_po_file)
123
163
 
124
164
    def _cleanup(self):
125
165
        r = subprocess.Popen(['bzr', 'ignored'], stdout=subprocess.PIPE)
133
173
            except NotADirectoryError:
134
174
                os.remove(f)
135
175
 
136
 
    def _remove_metadata(self, filename):
137
 
        new_dir = os.path.join(self.temp_dir, os.path.dirname(filename))
138
 
        os.makedirs(new_dir, exist_ok=True)
139
 
        new_filename = os.path.join(new_dir, os.path.basename(filename))
140
 
        shutil.copy(filename, new_filename)
141
 
        lines = open(new_filename).readlines()
142
 
        title_line = [x for x in lines if
143
 
                      (x.startswith('Title:') or x.startswith('title:'))]
144
 
        index = 0
145
 
        for line in lines:
146
 
            if not [x for x in METADATA_TAGS
147
 
                    if line.lower().startswith(x+':')] and \
148
 
               line.strip() != '':
149
 
                index = lines.index(line)
150
 
                break
151
 
        if not title_line:
152
 
            print('No line starting with "Title: " found in "%s".' %
153
 
                  new_filename)
154
 
            return False
155
 
        os.remove(new_filename)
156
 
        with open(new_filename, 'w', encoding='utf-8') as new_file:
157
 
            new_file.write(title_line[0]+'\n')
158
 
            new_file.write(''.join(lines[index:]))
159
 
        return True
160
 
 
161
 
    def clean_documents(self):
162
 
        for document in self.documents:
163
 
            if not self._remove_metadata(document):
164
 
                return False
165
 
        return True
166
 
 
167
176
    def generate_pot_file(self):
168
 
        return self.po4a.gettextize(self.documents)
 
177
        return self.po4a.gettextize(self.documents.docs)
169
178
 
170
179
    def update_po_files(self):
171
 
        return self.po4a.updatepo(self.available_languages, self.documents)
172
 
 
173
 
    def _call_po4a_translate(self, doc, lang):
174
 
        res = self.po4a.translate(doc, lang)
175
 
        output = codecs.decode(res.communicate()[0])
176
 
        broken_title_line = [line for line in output.split('\n')
177
 
                             if line.lower().startswith('title:')][0]
178
 
        rest = [line for line in output.split('\n')
179
 
                if not line.lower().startswith('title')]
180
 
        output = '\n'.join(rest)
181
 
        return (broken_title_line, output)
182
 
 
183
 
    def _new_header(self, lang, broken_title_line):
184
 
        title_line = broken_title_line
185
 
        for metadata_tag in [x for x in METADATA_TAGS if not x == 'title']:
186
 
            title_line = title_line.split(metadata_tag)[0]
187
 
            title_line = title_line.split(metadata_tag.capitalize())[0]
188
 
            title_line = title_line.split(metadata_tag.upper())[0]
189
 
        return "%s\nLang: %s\nDate:\n\n" % \
190
 
            (title_line, lang.gettext_lang_code)
 
180
        return self.po4a.updatepo(self.available_languages,
 
181
                                  self.documents.docs)
191
182
 
192
183
    def rewrite_links(self):
 
184
        self._generate_fake_pofile()
193
185
        for lang in self.available_languages:
194
186
            lang.rewrite_links()
195
187
 
 
188
    # we generate a fake translation for en-US which is going to be
 
189
    # the default
 
190
    def _generate_fake_pofile(self):
 
191
        if os.path.exists(self.fake_po_file):
 
192
            os.remove(self.fake_po_file)
 
193
        shutil.copy(self.pot_file, self.fake_po_file)
 
194
        self.available_languages += [Language(self.fake_po_file,
 
195
                                              self.documents)]
 
196
 
196
197
    def generate_translations(self):
197
198
        for lang in self.available_languages:
198
 
            for doc in self.documents:
199
 
                (broken_title_line, output) = \
200
 
                    self._call_po4a_translate(doc, lang)
201
 
                new_path = os.path.join(PATH, 'pages',
202
 
                                        'lang-%s-%s' %
203
 
                                        (lang.gettext_lang_code,
204
 
                                         os.path.basename(doc)))
205
 
                text = self._new_header(lang, broken_title_line)
206
 
                text += output
207
 
                if os.path.exists(new_path):
208
 
                    os.remove(new_path)
209
 
                if not os.path.exists(os.path.dirname(new_path)):
210
 
                    os.makedirs(os.path.dirname(new_path))
211
 
                with open(new_path, 'w', encoding='utf-8') as f:
212
 
                    f.write(text)
 
199
            self.documents.write_translated_markdown(lang, self.po4a)