~widelands-dev/widelands/trunk

« back to all changes in this revision

Viewing changes to utils/update_translation_stats.py

  • Committer: The Widelands Bunnybot
  • Date: 2025-05-30 10:23:42 UTC
  • Revision ID: bunnybot@widelands.org-20250530102342-q1cu6vgl4rti0iws
Speed up updating translation statistics (CB #5007 / GH #6647)

Co-authored-by: Simon Heimberg <simon.heimberg@heimberg-ea.ch>
Co-authored-by: Tóth András <txa-dev@posteo.hu>
Co-authored-by: Widelands Bunnybot <bunnybot@widelands.org>
Co-committed-by: Tóth András <txa-dev@posteo.hu>

(by bunnybot)
c56ea002875c0884f98ff37366ff22c3c9a52659

Show diffs side-by-side

added added

removed removed

Lines of Context:
42
42
def generate_translation_stats(po_dir, output_file):
43
43
    locale_stats = defaultdict(TranslationStats)
44
44
 
45
 
    sys.stdout.write('Fetching translation stats ')
 
45
    print('Running pocount...')
 
46
 
 
47
    proc = subprocess.run(
 
48
        ['pocount', '--csv', po_dir],
 
49
        text=True, encoding='utf-8', capture_output=True
 
50
    )
 
51
 
 
52
    if len(proc.stderr) > 0:
 
53
        print('\npocount stderr:')
 
54
        print(proc.stderr)
 
55
 
 
56
    if proc.returncode != 0 or len(proc.stderr) > 0:
 
57
        print('\nError running pocount, return code:', proc.returncode)
 
58
        return 1
 
59
 
 
60
    print('pocount finished, processing translation stats...')
46
61
 
47
62
    # Regex to extract the locale from the po filenames.
48
63
    regex_po = re.compile(r'/\S+/(\w+)\.po')
49
64
 
50
 
    # We get errors for non-po files in the base po dir, so we have to walk
51
 
    # the subdirs.
52
 
    for subdir in sorted(os.listdir(po_dir), key=str.lower):
53
 
        subdir = os.path.join(po_dir, subdir)
54
 
        if not os.path.isdir(subdir):
55
 
            continue
56
 
 
57
 
        sys.stdout.write('.')
58
 
        sys.stdout.flush()
59
 
 
60
 
        try:
61
 
            stats_output = subprocess.check_output(
62
 
                ['pocount', '--csv', subdir],
63
 
                encoding='utf-8',
64
 
                stderr=subprocess.STDOUT,
65
 
            )
66
 
            if 'ERROR' in stats_output:
67
 
                print('\nError running pocount:\n' + stats_output +
68
 
                      '\nAborted creating translation statistics.')
69
 
                return 1
70
 
 
71
 
        except subprocess.CalledProcessError:
72
 
            print('Failed to run pocount:\n  FILE: ' + po_dir +
73
 
                  '\n  ' + stats_output.split('\n', 1)[1])
 
65
    COLUMNS = {
 
66
        'filename': 'Filename',
 
67
        'total': 'Total Source Words',
 
68
        'translated': 'Translated Source Words'
 
69
    }
 
70
 
 
71
    result = csv.DictReader(csv.StringIO(proc.stdout), dialect='unix', skipinitialspace=True)
 
72
    missing_cols = set(COLUMNS.values()) - set(result.fieldnames)
 
73
    if missing_cols:
 
74
        sys.exit(
 
75
            'Column(s) "{}" not found in output of pocount'.format('", "'.join(missing_cols)))
 
76
 
 
77
    ### Now do the actual counting
 
78
    for row in result:
 
79
        po_filename = row[COLUMNS['filename']]
 
80
        name_match = regex_po.fullmatch(po_filename)
 
81
        if name_match:
 
82
            locale = name_match.group(1)
 
83
            entry = locale_stats[locale]
 
84
            entry.total += int(row[COLUMNS['total']])
 
85
            entry.translated += int(row[COLUMNS['translated']])
 
86
            if entry.translated > entry.total:
 
87
                print(('Error! Translated {entry.translated} ({c_translated}) is bigger than ' +
 
88
                       'the total of {entry.total} ({c_total}) on line {line}\n').format(
 
89
                    entry=entry, c_translated=row[COLUMNS['translated']],
 
90
                    c_total=row[COLUMNS['total']], line=result.line_num))
 
91
                sys.exit(1)
 
92
            locale_stats[locale] = entry
 
93
        elif not po_filename.endswith('.pot'):
 
94
            print('\nUnexpected line in pocount output:\n  ' + row[COLUMNS['filename']] +
 
95
                  '\nAborted creating translation statistics.')
74
96
            return 1
75
97
 
76
 
        COLUMNS = {
77
 
            'filename': 'Filename',
78
 
            'total': 'Total Source Words',
79
 
            'translated': 'Translated Source Words'
80
 
        }
81
 
        result = csv.DictReader(csv.StringIO(stats_output), dialect='unix', skipinitialspace=True)
82
 
        missing_cols = set(COLUMNS.values()) - set(result.fieldnames)
83
 
        if missing_cols:
84
 
            sys.exit(
85
 
                'Column(s) "{}" not found in output of pocount'.format('", "'.join(missing_cols)))
86
 
 
87
 
        # Now do the actual counting for the current textdomain
88
 
        for row in result:
89
 
            po_filename = row[COLUMNS['filename']]
90
 
            if po_filename.endswith('.po'):
91
 
                locale = regex_po.match(po_filename).group(1)
92
 
                entry = locale_stats[locale]
93
 
                entry.total += int(row[COLUMNS['total']])
94
 
                entry.translated += int(row[COLUMNS['translated']])
95
 
                if entry.translated > entry.total:
96
 
                    print(('Error! Translated {entry.translated} ({c_translated}) is bigger than ' +
97
 
                           'the total of {entry.total} ({c_total}) on line {line}\n').format(
98
 
                        entry=entry, c_translated=row[COLUMNS['translated']],
99
 
                        c_total=row[COLUMNS['total']], line=result.line_num))
100
 
                    sys.exit(1)
101
 
                locale_stats[locale] = entry
102
 
 
103
 
    print('\n\nLocale\tTotal\tTranslated')
 
98
    ### Counting done, start output
 
99
    print('\nLocale\tTotal\tTranslated')
104
100
    print('------\t-----\t----------')
105
101
 
106
102
    # The total goes in a [global] section and is identical for all locales