~ubuntu-langpack/langpack-o-matic/main

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/python3

# this is part of langpack-o-matic
#
# (C) 2019 Canonical Ltd.
# Author: Ɓukasz 'sil2100' Zemczak <lukasz.zemczak@canonical.com>
#
# Print out a sanity-test report of the generated language-packs, comparing it
# with the previous langpack contents.


import os
import sys
import logging
import subprocess

from collections import defaultdict
from optparse import OptionParser


_logger = logging.getLogger('langpack-sanity-report')


def scan_langpacks(packdir):
    langpacks = defaultdict(dict)
    for family in ('sources-base', 'sources-update'):
        familydir = os.path.join(packdir, family)
        for pack in os.listdir(familydir):
            pack_path = os.path.join(familydir, pack)
            if os.path.isdir(pack_path):
                _logger.debug('Scanning {}...'.format(pack_path))
                # Get the overall size of the langpack
                cmd = ['du', '-s', pack_path]
                try:
                    du = subprocess.check_output(
                        cmd, universal_newlines=True)
                except subprocess.CalledProcessError as e:
                    print('Could not call du properly')
                    raise e
                size = int(du.split('\t')[0])
                po_dir = os.path.join(pack_path, 'data')
                # Get the list of pofiles for the given langpack
                cmd = ['find', po_dir, '-name', '*.po', '-printf', '%P\n']
                try:
                    find = subprocess.check_output(
                        cmd, universal_newlines=True)
                except subprocess.CalledProcessError as e:
                    print('Could not call find properly')
                    raise e
                pofiles = find.split('\n')
                langpacks[pack]['size'] = size
                langpacks[pack]['pofiles'] = pofiles
    return langpacks


def sanity_check(prev_packs, next_packs, threshold):
    # Check if any languages have been dropped
    print('\nChecking for dropped languages...')
    next_set = set(next_packs.keys())
    prev_set = set(prev_packs.keys())
    missing_set = prev_set - next_set
    if missing_set:
        print('The following language-packs have been dropped:')
        for pack in missing_set:
            print(pack)
    print('\nRunning per-language-pack checks...')
    for langpack, data in next_packs.items():
        if langpack not in prev_packs:
            # New language-pack, ignoring
            continue
        # Check if there are any langpacks that shrunk in size drastically
        next_size = data['size']
        prev_size = prev_packs[langpack]['size']
        size_diff = next_size - prev_size
        if size_diff < (-threshold):
            # The langpack got worringly smaller in comparison, report
            print('{} shrunk in size above threshold (was {}, is {})'.format(
                langpack, prev_size, next_size))
        # Check if any po files have been dropped completely
        next_pofiles = set(data['pofiles'])
        prev_pofiles = set(prev_packs[langpack]['pofiles'])
        dropped_set = prev_pofiles - next_pofiles
        if dropped_set:
            print('{} dropped the following .po files:\n {}'.format(
                langpack,
                '\n '.join(dropped_set)))


def main():
    parser = OptionParser(
        usage='Usage: %prog [options] olddir newdir')
    parser.add_option(
        '-d', '--debug', dest='debug', action='store_true')
    parser.add_option(
        '-t', '--threshold', dest='threshold', type='int',
        help='Maximum allowed decrease in a langpack\'s size before treating '
             'it as something suspicious - in KiB (default: 64).',
        default=64)
    opts, args = parser.parse_args()
    if len(args) < 2:
        print('Please provide paths to the old and new langpack directories.')
        return 1
    if opts.debug:
        logging.basicConfig(level=logging.DEBUG)
    # Perform the sanity check
    prev_dir = args[0]
    next_dir = args[1]
    print('Performing language-pack sanity checks, comparing between '
          '{} and {}'.format(
            prev_dir, next_dir))
    prev_packs = scan_langpacks(prev_dir)
    next_packs = scan_langpacks(next_dir)
    sanity_check(prev_packs, next_packs, opts.threshold)


if __name__ == '__main__':
    sys.exit(main())