~ubuntu-langpack/langpack-o-matic/main

« back to all changes in this revision

Viewing changes to sanity-report

  • Committer: Łukasz 'sil2100' Zemczak
  • Date: 2019-11-26 18:35:20 UTC
  • Revision ID: lukasz.zemczak@canonical.com-20191126183520-l21uqbn6yo4n4g1d
Add a basic sanity-report script for quick sanity testing of generated langpacks, outputting an useful report. This is a tool to be used internally on the l-o-m instance.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/python3
 
2
 
 
3
# this is part of langpack-o-matic
 
4
#
 
5
# (C) 2019 Canonical Ltd.
 
6
# Author: Łukasz 'sil2100' Zemczak <lukasz.zemczak@canonical.com>
 
7
#
 
8
# Print out a sanity-test report of the generated language-packs, comparing it
 
9
# with the previous langpack contents.
 
10
 
 
11
 
 
12
import os
 
13
import sys
 
14
import logging
 
15
import subprocess
 
16
 
 
17
from collections import defaultdict
 
18
from optparse import OptionParser
 
19
 
 
20
 
 
21
_logger = logging.getLogger('langpack-sanity-report')
 
22
 
 
23
 
 
24
def scan_langpacks(packdir):
 
25
    langpacks = defaultdict(dict)
 
26
    for family in ('sources-base', 'sources-update'):
 
27
        familydir = os.path.join(packdir, family)
 
28
        for pack in os.listdir(familydir):
 
29
            pack_path = os.path.join(familydir, pack)
 
30
            if os.path.isdir(pack_path):
 
31
                _logger.debug('Scanning {}...'.format(pack_path))
 
32
                # Get the overall size of the langpack
 
33
                cmd = ['du', '-s', pack_path]
 
34
                try:
 
35
                    du = subprocess.check_output(
 
36
                        cmd, universal_newlines=True)
 
37
                except subprocess.CalledProcessError as e:
 
38
                    print('Could not call du properly')
 
39
                    raise e
 
40
                size = int(du.split('\t')[0])
 
41
                po_dir = os.path.join(pack_path, 'data')
 
42
                # Get the list of pofiles for the given langpack
 
43
                cmd = ['find', po_dir, '-name', '*.po', '-printf', '%P\n']
 
44
                try:
 
45
                    find = subprocess.check_output(
 
46
                        cmd, universal_newlines=True)
 
47
                except subprocess.CalledProcessError as e:
 
48
                    print('Could not call find properly')
 
49
                    raise e
 
50
                pofiles = find.split('\n')
 
51
                langpacks[pack]['size'] = size
 
52
                langpacks[pack]['pofiles'] = pofiles
 
53
    return langpacks
 
54
 
 
55
 
 
56
def sanity_check(prev_packs, next_packs, threshold):
 
57
    # Check if any languages have been dropped
 
58
    print('\nChecking for dropped languages...')
 
59
    next_set = set(next_packs.keys())
 
60
    prev_set = set(prev_packs.keys())
 
61
    missing_set = prev_set - next_set
 
62
    if missing_set:
 
63
        print('The following language-packs have been dropped:')
 
64
        for pack in missing_set:
 
65
            print(pack)
 
66
    print('\nRunning per-language-pack checks...')
 
67
    for langpack, data in next_packs.items():
 
68
        if langpack not in prev_packs:
 
69
            # New language-pack, ignoring
 
70
            continue
 
71
        # Check if there are any langpacks that shrunk in size drastically
 
72
        next_size = data['size']
 
73
        prev_size = prev_packs[langpack]['size']
 
74
        size_diff = next_size - prev_size
 
75
        if size_diff < (-threshold):
 
76
            # The langpack got worringly smaller in comparison, report
 
77
            print('{} shrunk in size above threshold (was {}, is {})'.format(
 
78
                langpack, prev_size, next_size))
 
79
        # Check if any po files have been dropped completely
 
80
        next_pofiles = set(data['pofiles'])
 
81
        prev_pofiles = set(prev_packs[langpack]['pofiles'])
 
82
        dropped_set = prev_pofiles - next_pofiles
 
83
        if dropped_set:
 
84
            print('{} dropped the following .po files:\n {}'.format(
 
85
                langpack,
 
86
                '\n '.join(dropped_set)))
 
87
 
 
88
 
 
89
def main():
 
90
    parser = OptionParser(
 
91
        usage='Usage: %prog [options] olddir newdir')
 
92
    parser.add_option(
 
93
        '-d', '--debug', dest='debug', action='store_true')
 
94
    parser.add_option(
 
95
        '-t', '--threshold', dest='threshold', type='int',
 
96
        help='Maximum allowed decrease in a langpack\'s size before treating '
 
97
             'it as something suspicious - in KiB (default: 64).',
 
98
        default=64)
 
99
    opts, args = parser.parse_args()
 
100
    if len(args) < 2:
 
101
        print('Please provide paths to the old and new langpack directories.')
 
102
        return 1
 
103
    if opts.debug:
 
104
        logging.basicConfig(level=logging.DEBUG)
 
105
    # Perform the sanity check
 
106
    prev_dir = args[0]
 
107
    next_dir = args[1]
 
108
    print('Performing language-pack sanity checks, comparing between '
 
109
          '{} and {}'.format(
 
110
            prev_dir, next_dir))
 
111
    prev_packs = scan_langpacks(prev_dir)
 
112
    next_packs = scan_langpacks(next_dir)
 
113
    sanity_check(prev_packs, next_packs, opts.threshold)
 
114
 
 
115
 
 
116
if __name__ == '__main__':
 
117
    sys.exit(main())
 
118