5
"""Uses the Translate Toolkit to extract glossaries from our po files. These
6
extracted glossaries are then used to fill gaps in the given Transifex
9
You will need to have the Translate Toolkit installed in order for the extraction to work:
10
http://toolkit.translatehouse.org/
12
For Debian-based Linux: sudo apt-get install translate-toolkit
14
You will need to provide an export of the Transifex glossary and specify it at
15
the command line. Make sure to select "Include glossary notes in file" when
16
exporting the csv from Transifex.
18
The resulting file then needs to be uploaded manually to Transifex as well.
22
from collections import defaultdict
23
from subprocess import call, check_output, CalledProcessError
30
from file_utils import read_csv_file, make_path, delete_path
32
#############################################################################
34
#############################################################################
38
"""An entry in our parsed glossaries."""
43
# The term's translation
45
# Comment for the source term
46
self.term_comment = ''
47
# Comment for the term's translation
48
self.translation_comment = ''
49
# Wordclass of the source term
53
#############################################################################
55
#############################################################################
58
def load_extracted_glossary(glossary_file, locale):
59
"""Build a defaultdict(GlossaryEntry) glossary from the given extracted
60
glossary csv file for the given locale, raising an error for entries that
61
have no translation."""
62
result = defaultdict(GlossaryEntry)
66
for row in read_csv_file(glossary_file):
67
# Detect the column indices
71
if header == 'source':
72
term_index = colum_counter
73
elif header == 'target':
74
translation_index = colum_counter
75
colum_counter = colum_counter + 1
76
# If there is a translation, parse the entry
77
elif row[translation_index].strip() != '':
78
if translation_index == 0:
80
'Glossary extracted for %s contains no translations.' % locale)
81
entry = GlossaryEntry()
82
entry.term = row[term_index].strip()
83
entry.translation = row[translation_index].strip()
84
# Remove source information with fuzzy matches
85
regex = re.compile('(.+)( \{.*\})(.*)')
86
match = regex.match(entry.translation)
88
entry.translation = match.group(1) + match.group(3)
89
match = regex.match(entry.translation)
90
result[entry.term] = entry
95
def load_transifex_glossary(glossary_file, locale):
96
"""Build a defaultdict(GlossaryEntry) glossary from the given Transifex
97
glossary csv file for the given locale.
99
Include empty translations in the result
102
result = defaultdict(GlossaryEntry)
105
term_comment_index = 0
106
translation_index = 0
108
for row in read_csv_file(glossary_file):
109
# Detect the column indices
114
term_index = colum_counter
115
elif header == 'comment':
116
term_comment_index = colum_counter
117
elif header == 'translation_' + locale or header == locale:
118
translation_index = colum_counter
119
elif header == 'comment_' + locale:
120
comment_index = colum_counter
121
colum_counter = colum_counter + 1
124
if translation_index == 0:
126
'Locale %s is missing from glossary file.' % locale)
127
if comment_index == 0:
129
'Comment field for locale %s is missing from glossary file.' % locale)
130
entry = GlossaryEntry()
131
entry.term = row[term_index].strip()
132
entry.term_comment = row[term_comment_index].strip()
133
entry.translation = row[translation_index].strip()
134
entry.translation_comment = row[comment_index].strip()
135
result[entry.term] = entry
136
counter = counter + 1
140
def load_transifex_source_terms(glossary_file):
141
"""Loads a list of source terms with their comments and word classes as a
142
defaultdict(GlossaryEntry) from the given Transifex glossary csv file."""
143
result = defaultdict(GlossaryEntry)
146
term_comment_index = 0
148
for row in read_csv_file(glossary_file):
149
# Detect the column indices
154
term_index = colum_counter
155
elif header == 'comment':
156
term_comment_index = colum_counter
157
elif header == 'pos':
158
wordclass_index = colum_counter
159
colum_counter = colum_counter + 1
162
entry = GlossaryEntry()
163
entry.term = row[term_index].strip()
164
entry.term_comment = row[term_comment_index].strip()
165
entry.wordclass = row[wordclass_index].strip()
166
result[entry.term] = entry
167
counter = counter + 1
171
#############################################################################
173
#############################################################################
175
def generate_glossary(po_dir, output_path, input_glossary, output_glossary, only_locale):
178
Uses poterminology from the Translate Toolkit to collect glossary entries for all files in 'po_dir' for the given 'only_locale'. If 'only_locale' = "all", processes all locales. Then reads the <input_glossary>, adds new entries that were obtained by the glossary generation if there are any gaps, and then writes the results to <output_glossary>.
182
# Find the locale files to process
183
print('Locale: ' + only_locale)
185
glossaries = defaultdict(list)
187
if only_locale != 'all':
188
locales.append(only_locale)
190
# Get locales from the Transifex glossary file
191
header_row = read_csv_file(input_glossary)[0]
192
regex = re.compile('^(translation_)(.+)$')
193
for header in header_row:
194
match = regex.match(header)
196
locales.append(match.group(2))
198
temp_path = make_path(output_path, 'temp_glossary')
200
for locale in locales:
201
print('Processing locale: ' + locale)
202
# Generate the pot glossary
203
input_path = po_dir + '/*/' + locale + '.po'
204
pot_path = os.path.join(temp_path, 'glossary_' + locale + '.po')
207
# We need shell=True for the wildcards.
208
poterminology_result = check_output(
209
['poterminology ' + input_path + ' -o ' + pot_path], stderr=subprocess.STDOUT, shell=True)
210
if 'Error' in poterminology_result:
211
print('Error running poterminology:\n FILE: ' + input_path + '\n OUTPUT PATH: ' +
212
output_path + '\n ' + poterminology_result.split('\n', 1)[1])
215
except CalledProcessError:
216
print('Failed to run poterminology:\n FILE: ' + input_path + '\n OUTPUT PATH: ' +
217
output_path + '\n ' + poterminology_result.split('\n', 1)[1])
220
# Convert to csv for easy parsing
221
csv_file = os.path.join(temp_path, 'glossary_' + locale + '.csv')
222
call(['po2csv', '--progress=none', pot_path, csv_file])
223
# The po file is no longer needed, delete it.
226
transifex_glossary = load_transifex_glossary(input_glossary, locale)
227
extracted_glossary = load_extracted_glossary(csv_file, locale)
229
# Add generated translation if necessary
230
for key in transifex_glossary.keys():
231
if transifex_glossary[key].translation == '' and extracted_glossary.has_key(key):
232
extracted_entry = extracted_glossary[key]
233
if extracted_entry.translation != '':
234
transifex_entry = transifex_glossary[key]
235
transifex_entry.translation = extracted_entry.translation
236
transifex_entry.translation_comment = 'AUTOGENERATED - PLEASE PROOFREAD!'
237
transifex_glossary[key] = transifex_entry
238
glossaries[locale] = transifex_glossary
240
# Now collect the date for the global csv file
242
print('Writing results to ' + output_glossary)
243
result = 'term,pos,comment,'
244
for locale in locales:
245
result = result + 'translation_' + locale + ','
246
result = result + 'comment_' + locale + ','
247
result = result[0:-1] + '\n'
249
source_terms = load_transifex_source_terms(input_glossary)
250
# Collect all translations for each source term
251
for key in source_terms:
252
result = result + '"%s","%s","%s",' % (source_terms[key].term.replace('"', '""'), source_terms[
253
key].wordclass.replace('"', '""'), source_terms[key].term_comment.replace('"', '""'))
254
for locale in locales:
255
glossary = glossaries[locale]
257
translation_comment = ''
258
if glossary.has_key(key):
259
translation = glossary[key].translation.replace('"', '""')
260
translation_comment = glossary[
261
key].translation_comment.replace('"', '""')
263
'"%s","%s",' % (translation, translation_comment)
264
result = result[0:-1] + '\n'
266
# Now write the file.
267
with open(output_glossary, 'wt') as dest_file:
268
dest_file.write(result)
271
delete_path(temp_path)
272
if not os.listdir(output_path):
273
os.rmdir(output_path)
279
"""Checks whether we are in the correct directory and everything's there,
280
then collects glossary entries from all PO files and writes a new glossary
283
Output is restricted to source terms that are already in the
287
if len(sys.argv) == 3 or len(sys.argv) == 4:
288
print('Generating glossary:')
291
'Usage: generate_glossary.py <input-glossary> <output-glossary> [locale]')
295
print('Current time: %s' % time.ctime())
297
input_glossary = os.path.abspath(os.path.join(
298
os.path.dirname(__file__), sys.argv[1]))
299
output_glossary = os.path.abspath(os.path.join(
300
os.path.dirname(__file__), sys.argv[2]))
302
if len(sys.argv) == 4:
305
if (not (os.path.exists(input_glossary) and os.path.isfile(input_glossary))):
306
print('There is no glossary file at ' + input_glossary)
309
po_dir = os.path.abspath(os.path.join(
310
os.path.dirname(__file__), '../po'))
311
output_path = make_path(os.path.dirname(__file__), '../po_validation')
312
result = generate_glossary(
313
po_dir, output_path, input_glossary, output_glossary, locale)
314
print('Current time: %s' % time.ctime())
318
print('Something went wrong:')
319
traceback.print_exc()
320
delete_path(make_path(output_path, 'temp_glossary'))
323
if __name__ == '__main__':