~ubuntu-langpack/langpack-o-matic/main

587 by Łukasz 'sil2100' Zemczak
Add a basic sanity-report script for quick sanity testing of generated langpacks, outputting an useful report. This is a tool to be used internally on the l-o-m instance.
1
#!/usr/bin/python3
2
3
# this is part of langpack-o-matic
4
#
5
# (C) 2019 Canonical Ltd.
6
# Author: Łukasz 'sil2100' Zemczak <lukasz.zemczak@canonical.com>
7
#
8
# Print out a sanity-test report of the generated language-packs, comparing it
9
# with the previous langpack contents.
10
11
12
import os
13
import sys
14
import logging
15
import subprocess
16
17
from collections import defaultdict
18
from optparse import OptionParser
19
20
21
_logger = logging.getLogger('langpack-sanity-report')
22
23
24
def scan_langpacks(packdir):
25
    langpacks = defaultdict(dict)
26
    for family in ('sources-base', 'sources-update'):
27
        familydir = os.path.join(packdir, family)
28
        for pack in os.listdir(familydir):
29
            pack_path = os.path.join(familydir, pack)
30
            if os.path.isdir(pack_path):
31
                _logger.debug('Scanning {}...'.format(pack_path))
32
                # Get the overall size of the langpack
33
                cmd = ['du', '-s', pack_path]
34
                try:
35
                    du = subprocess.check_output(
36
                        cmd, universal_newlines=True)
37
                except subprocess.CalledProcessError as e:
38
                    print('Could not call du properly')
39
                    raise e
40
                size = int(du.split('\t')[0])
41
                po_dir = os.path.join(pack_path, 'data')
42
                # Get the list of pofiles for the given langpack
43
                cmd = ['find', po_dir, '-name', '*.po', '-printf', '%P\n']
44
                try:
45
                    find = subprocess.check_output(
46
                        cmd, universal_newlines=True)
47
                except subprocess.CalledProcessError as e:
48
                    print('Could not call find properly')
49
                    raise e
50
                pofiles = find.split('\n')
51
                langpacks[pack]['size'] = size
52
                langpacks[pack]['pofiles'] = pofiles
53
    return langpacks
54
55
56
def sanity_check(prev_packs, next_packs, threshold):
57
    # Check if any languages have been dropped
58
    print('\nChecking for dropped languages...')
59
    next_set = set(next_packs.keys())
60
    prev_set = set(prev_packs.keys())
61
    missing_set = prev_set - next_set
62
    if missing_set:
63
        print('The following language-packs have been dropped:')
64
        for pack in missing_set:
65
            print(pack)
66
    print('\nRunning per-language-pack checks...')
67
    for langpack, data in next_packs.items():
68
        if langpack not in prev_packs:
69
            # New language-pack, ignoring
70
            continue
71
        # Check if there are any langpacks that shrunk in size drastically
72
        next_size = data['size']
73
        prev_size = prev_packs[langpack]['size']
74
        size_diff = next_size - prev_size
75
        if size_diff < (-threshold):
76
            # The langpack got worringly smaller in comparison, report
77
            print('{} shrunk in size above threshold (was {}, is {})'.format(
78
                langpack, prev_size, next_size))
79
        # Check if any po files have been dropped completely
80
        next_pofiles = set(data['pofiles'])
81
        prev_pofiles = set(prev_packs[langpack]['pofiles'])
82
        dropped_set = prev_pofiles - next_pofiles
83
        if dropped_set:
84
            print('{} dropped the following .po files:\n {}'.format(
85
                langpack,
86
                '\n '.join(dropped_set)))
87
88
89
def main():
90
    parser = OptionParser(
91
        usage='Usage: %prog [options] olddir newdir')
92
    parser.add_option(
93
        '-d', '--debug', dest='debug', action='store_true')
94
    parser.add_option(
95
        '-t', '--threshold', dest='threshold', type='int',
96
        help='Maximum allowed decrease in a langpack\'s size before treating '
97
             'it as something suspicious - in KiB (default: 64).',
98
        default=64)
99
    opts, args = parser.parse_args()
100
    if len(args) < 2:
101
        print('Please provide paths to the old and new langpack directories.')
102
        return 1
103
    if opts.debug:
104
        logging.basicConfig(level=logging.DEBUG)
105
    # Perform the sanity check
106
    prev_dir = args[0]
107
    next_dir = args[1]
108
    print('Performing language-pack sanity checks, comparing between '
109
          '{} and {}'.format(
110
            prev_dir, next_dir))
111
    prev_packs = scan_langpacks(prev_dir)
112
    next_packs = scan_langpacks(next_dir)
113
    sanity_check(prev_packs, next_packs, opts.threshold)
114
115
116
if __name__ == '__main__':
117
    sys.exit(main())
118