~widelands-dev/widelands/trunk

Viewing changes to utils/update_translation_stats.py

Committer: The Widelands Bunnybot
Date: 2025-05-30 10:23:42 UTC
Revision ID: bunnybot@widelands.org-20250530102342-q1cu6vgl4rti0iws

Speed up updating translation statistics (CB #5007 / GH #6647)

Co-authored-by: Simon Heimberg <simon.heimberg@heimberg-ea.ch>
Co-authored-by: Tóth András <txa-dev@posteo.hu>
Co-authored-by: Widelands Bunnybot <bunnybot@widelands.org>
Co-committed-by: Tóth András <txa-dev@posteo.hu>

(by bunnybot)
c56ea002875c0884f98ff37366ff22c3c9a52659

files modified:
utils/update_translation_stats.py

Show diffs side-by-side

added added

removed removed

utils/update_translation_stats.py

def generate_translation_stats(po_dir, output_file):

locale_stats = defaultdict(TranslationStats)

sys.stdout.write('Fetching translation stats ')

print('Running pocount...')

proc = subprocess.run(

['pocount', '--csv', po_dir],

text=True, encoding='utf-8', capture_output=True

)

if len(proc.stderr) > 0:

print('\npocount stderr:')

print(proc.stderr)

if proc.returncode != 0 or len(proc.stderr) > 0:

print('\nError running pocount, return code:', proc.returncode)

return 1

print('pocount finished, processing translation stats...')

# Regex to extract the locale from the po filenames.

regex_po = re.compile(r'/\S+/(\w+)\.po')

# We get errors for non-po files in the base po dir, so we have to walk

# the subdirs.

for subdir in sorted(os.listdir(po_dir), key=str.lower):

subdir = os.path.join(po_dir, subdir)

if not os.path.isdir(subdir):

continue

sys.stdout.write('.')

sys.stdout.flush()

try:

stats_output = subprocess.check_output(

['pocount', '--csv', subdir],

encoding='utf-8',

stderr=subprocess.STDOUT,

)

if 'ERROR' in stats_output:

print('\nError running pocount:\n' + stats_output +

'\nAborted creating translation statistics.')

return 1

except subprocess.CalledProcessError:

print('Failed to run pocount:\n FILE: ' + po_dir +

'\n ' + stats_output.split('\n', 1)[1])

COLUMNS = {

'filename': 'Filename',

'total': 'Total Source Words',

'translated': 'Translated Source Words'

}

result = csv.DictReader(csv.StringIO(proc.stdout), dialect='unix', skipinitialspace=True)

missing_cols = set(COLUMNS.values()) - set(result.fieldnames)

if missing_cols:

sys.exit(

'Column(s) "{}" not found in output of pocount'.format('", "'.join(missing_cols)))

### Now do the actual counting

for row in result:

po_filename = row[COLUMNS['filename']]

name_match = regex_po.fullmatch(po_filename)

if name_match:

locale = name_match.group(1)

entry = locale_stats[locale]

entry.total += int(row[COLUMNS['total']])

entry.translated += int(row[COLUMNS['translated']])

if entry.translated > entry.total:

print(('Error! Translated {entry.translated} ({c_translated}) is bigger than ' +

'the total of {entry.total} ({c_total}) on line {line}\n').format(

entry=entry, c_translated=row[COLUMNS['translated']],

c_total=row[COLUMNS['total']], line=result.line_num))

sys.exit(1)

locale_stats[locale] = entry

elif not po_filename.endswith('.pot'):

print('\nUnexpected line in pocount output:\n ' + row[COLUMNS['filename']] +

'\nAborted creating translation statistics.')

return 1

COLUMNS = {

'filename': 'Filename',

'total': 'Total Source Words',

'translated': 'Translated Source Words'

}

result = csv.DictReader(csv.StringIO(stats_output), dialect='unix', skipinitialspace=True)

missing_cols = set(COLUMNS.values()) - set(result.fieldnames)

if missing_cols:

sys.exit(

'Column(s) "{}" not found in output of pocount'.format('", "'.join(missing_cols)))

# Now do the actual counting for the current textdomain

for row in result:

po_filename = row[COLUMNS['filename']]

if po_filename.endswith('.po'):

locale = regex_po.match(po_filename).group(1)

entry = locale_stats[locale]

entry.total += int(row[COLUMNS['total']])

entry.translated += int(row[COLUMNS['translated']])

if entry.translated > entry.total:

print(('Error! Translated {entry.translated} ({c_translated}) is bigger than ' +

'the total of {entry.total} ({c_total}) on line {line}\n').format(

entry=entry, c_translated=row[COLUMNS['translated']],

c_total=row[COLUMNS['total']], line=result.line_num))

100

sys.exit(1)

101

locale_stats[locale] = entry

102

103

print('\n\nLocale\tTotal\tTranslated')

### Counting done, start output

print('\nLocale\tTotal\tTranslated')

104

100

print('------\t-----\t----------')

105

101

106

102

# The total goes in a [global] section and is identical for all locales

Older »