587
by Łukasz 'sil2100' Zemczak
Add a basic sanity-report script for quick sanity testing of generated langpacks, outputting an useful report. This is a tool to be used internally on the l-o-m instance. |
1 |
#!/usr/bin/python3
|
2 |
||
3 |
# this is part of langpack-o-matic
|
|
4 |
#
|
|
5 |
# (C) 2019 Canonical Ltd.
|
|
6 |
# Author: Łukasz 'sil2100' Zemczak <lukasz.zemczak@canonical.com>
|
|
7 |
#
|
|
8 |
# Print out a sanity-test report of the generated language-packs, comparing it
|
|
9 |
# with the previous langpack contents.
|
|
10 |
||
11 |
||
12 |
import os |
|
13 |
import sys |
|
14 |
import logging |
|
15 |
import subprocess |
|
16 |
||
17 |
from collections import defaultdict |
|
18 |
from optparse import OptionParser |
|
19 |
||
20 |
||
21 |
_logger = logging.getLogger('langpack-sanity-report') |
|
22 |
||
23 |
||
24 |
def scan_langpacks(packdir): |
|
25 |
langpacks = defaultdict(dict) |
|
26 |
for family in ('sources-base', 'sources-update'): |
|
27 |
familydir = os.path.join(packdir, family) |
|
28 |
for pack in os.listdir(familydir): |
|
29 |
pack_path = os.path.join(familydir, pack) |
|
30 |
if os.path.isdir(pack_path): |
|
31 |
_logger.debug('Scanning {}...'.format(pack_path)) |
|
32 |
# Get the overall size of the langpack
|
|
33 |
cmd = ['du', '-s', pack_path] |
|
34 |
try: |
|
35 |
du = subprocess.check_output( |
|
36 |
cmd, universal_newlines=True) |
|
37 |
except subprocess.CalledProcessError as e: |
|
38 |
print('Could not call du properly') |
|
39 |
raise e |
|
40 |
size = int(du.split('\t')[0]) |
|
41 |
po_dir = os.path.join(pack_path, 'data') |
|
42 |
# Get the list of pofiles for the given langpack
|
|
43 |
cmd = ['find', po_dir, '-name', '*.po', '-printf', '%P\n'] |
|
44 |
try: |
|
45 |
find = subprocess.check_output( |
|
46 |
cmd, universal_newlines=True) |
|
47 |
except subprocess.CalledProcessError as e: |
|
48 |
print('Could not call find properly') |
|
49 |
raise e |
|
50 |
pofiles = find.split('\n') |
|
51 |
langpacks[pack]['size'] = size |
|
52 |
langpacks[pack]['pofiles'] = pofiles |
|
53 |
return langpacks |
|
54 |
||
55 |
||
56 |
def sanity_check(prev_packs, next_packs, threshold): |
|
57 |
# Check if any languages have been dropped
|
|
58 |
print('\nChecking for dropped languages...') |
|
59 |
next_set = set(next_packs.keys()) |
|
60 |
prev_set = set(prev_packs.keys()) |
|
61 |
missing_set = prev_set - next_set |
|
62 |
if missing_set: |
|
63 |
print('The following language-packs have been dropped:') |
|
64 |
for pack in missing_set: |
|
65 |
print(pack) |
|
66 |
print('\nRunning per-language-pack checks...') |
|
67 |
for langpack, data in next_packs.items(): |
|
68 |
if langpack not in prev_packs: |
|
69 |
# New language-pack, ignoring
|
|
70 |
continue
|
|
71 |
# Check if there are any langpacks that shrunk in size drastically
|
|
72 |
next_size = data['size'] |
|
73 |
prev_size = prev_packs[langpack]['size'] |
|
74 |
size_diff = next_size - prev_size |
|
75 |
if size_diff < (-threshold): |
|
76 |
# The langpack got worringly smaller in comparison, report
|
|
77 |
print('{} shrunk in size above threshold (was {}, is {})'.format( |
|
78 |
langpack, prev_size, next_size)) |
|
79 |
# Check if any po files have been dropped completely
|
|
80 |
next_pofiles = set(data['pofiles']) |
|
81 |
prev_pofiles = set(prev_packs[langpack]['pofiles']) |
|
82 |
dropped_set = prev_pofiles - next_pofiles |
|
83 |
if dropped_set: |
|
84 |
print('{} dropped the following .po files:\n {}'.format( |
|
85 |
langpack, |
|
86 |
'\n '.join(dropped_set))) |
|
87 |
||
88 |
||
89 |
def main(): |
|
90 |
parser = OptionParser( |
|
91 |
usage='Usage: %prog [options] olddir newdir') |
|
92 |
parser.add_option( |
|
93 |
'-d', '--debug', dest='debug', action='store_true') |
|
94 |
parser.add_option( |
|
95 |
'-t', '--threshold', dest='threshold', type='int', |
|
96 |
help='Maximum allowed decrease in a langpack\'s size before treating ' |
|
97 |
'it as something suspicious - in KiB (default: 64).', |
|
98 |
default=64) |
|
99 |
opts, args = parser.parse_args() |
|
100 |
if len(args) < 2: |
|
101 |
print('Please provide paths to the old and new langpack directories.') |
|
102 |
return 1 |
|
103 |
if opts.debug: |
|
104 |
logging.basicConfig(level=logging.DEBUG) |
|
105 |
# Perform the sanity check
|
|
106 |
prev_dir = args[0] |
|
107 |
next_dir = args[1] |
|
108 |
print('Performing language-pack sanity checks, comparing between ' |
|
109 |
'{} and {}'.format( |
|
110 |
prev_dir, next_dir)) |
|
111 |
prev_packs = scan_langpacks(prev_dir) |
|
112 |
next_packs = scan_langpacks(next_dir) |
|
113 |
sanity_check(prev_packs, next_packs, opts.threshold) |
|
114 |
||
115 |
||
116 |
if __name__ == '__main__': |
|
117 |
sys.exit(main()) |
|
118 |