1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
#!/usr/bin/python3
# this is part of langpack-o-matic
#
# (C) 2019 Canonical Ltd.
# Author: Ćukasz 'sil2100' Zemczak <lukasz.zemczak@canonical.com>
#
# Print out a sanity-test report of the generated language-packs, comparing it
# with the previous langpack contents.
import os
import sys
import logging
import subprocess
from collections import defaultdict
from optparse import OptionParser
_logger = logging.getLogger('langpack-sanity-report')
def scan_langpacks(packdir):
langpacks = defaultdict(dict)
for family in ('sources-base', 'sources-update'):
familydir = os.path.join(packdir, family)
for pack in os.listdir(familydir):
pack_path = os.path.join(familydir, pack)
if os.path.isdir(pack_path):
_logger.debug('Scanning {}...'.format(pack_path))
# Get the overall size of the langpack
cmd = ['du', '-s', pack_path]
try:
du = subprocess.check_output(
cmd, universal_newlines=True)
except subprocess.CalledProcessError as e:
print('Could not call du properly')
raise e
size = int(du.split('\t')[0])
po_dir = os.path.join(pack_path, 'data')
# Get the list of pofiles for the given langpack
cmd = ['find', po_dir, '-name', '*.po', '-printf', '%P\n']
try:
find = subprocess.check_output(
cmd, universal_newlines=True)
except subprocess.CalledProcessError as e:
print('Could not call find properly')
raise e
pofiles = find.split('\n')
langpacks[pack]['size'] = size
langpacks[pack]['pofiles'] = pofiles
return langpacks
def sanity_check(prev_packs, next_packs, threshold):
# Check if any languages have been dropped
print('\nChecking for dropped languages...')
next_set = set(next_packs.keys())
prev_set = set(prev_packs.keys())
missing_set = prev_set - next_set
if missing_set:
print('The following language-packs have been dropped:')
for pack in missing_set:
print(pack)
print('\nRunning per-language-pack checks...')
for langpack, data in next_packs.items():
if langpack not in prev_packs:
# New language-pack, ignoring
continue
# Check if there are any langpacks that shrunk in size drastically
next_size = data['size']
prev_size = prev_packs[langpack]['size']
size_diff = next_size - prev_size
if size_diff < (-threshold):
# The langpack got worringly smaller in comparison, report
print('{} shrunk in size above threshold (was {}, is {})'.format(
langpack, prev_size, next_size))
# Check if any po files have been dropped completely
next_pofiles = set(data['pofiles'])
prev_pofiles = set(prev_packs[langpack]['pofiles'])
dropped_set = prev_pofiles - next_pofiles
if dropped_set:
print('{} dropped the following .po files:\n {}'.format(
langpack,
'\n '.join(dropped_set)))
def main():
parser = OptionParser(
usage='Usage: %prog [options] olddir newdir')
parser.add_option(
'-d', '--debug', dest='debug', action='store_true')
parser.add_option(
'-t', '--threshold', dest='threshold', type='int',
help='Maximum allowed decrease in a langpack\'s size before treating '
'it as something suspicious - in KiB (default: 64).',
default=64)
opts, args = parser.parse_args()
if len(args) < 2:
print('Please provide paths to the old and new langpack directories.')
return 1
if opts.debug:
logging.basicConfig(level=logging.DEBUG)
# Perform the sanity check
prev_dir = args[0]
next_dir = args[1]
print('Performing language-pack sanity checks, comparing between '
'{} and {}'.format(
prev_dir, next_dir))
prev_packs = scan_langpacks(prev_dir)
next_packs = scan_langpacks(next_dir)
sanity_check(prev_packs, next_packs, opts.threshold)
if __name__ == '__main__':
sys.exit(main())
|