~ubuntu-branches/debian/sid/calligraplan/sid

« back to all changes in this revision

Viewing changes to devtools/scripts/profileOfficeFileLoading.py

  • Committer: Package Import Robot
  • Author(s): Pino Toscano
  • Date: 2018-02-01 18:20:19 UTC
  • Revision ID: package-import@ubuntu.com-20180201182019-1qo7qaim5wejm5k9
Tags: upstream-3.1.0
ImportĀ upstreamĀ versionĀ 3.1.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#! /usr/bin/python -Qwarnall
 
2
 
 
3
# This script profiles loading of documents in Calligra.
 
4
# It is called like this:
 
5
#  profileOfficeFileLoading.py $dir $outputfile
 
6
# The script outputs an csv file that contains the times various functions took
 
7
# for each file in directory $dir. The directory $dir is scanned recursively.
 
8
#
 
9
# Copyright 2010 Jos van den Oever <jos@vandenoever.info>
 
10
 
 
11
import sys, os, tempfile, time, signal, subprocess, re, lxml.etree, zipfile
 
12
 
 
13
applications = {
 
14
  'calligrawords': ['odt', 'doc', 'docx'],
 
15
  'calligrastage': ['odp', 'ppt', 'pptx'],
 
16
  'calligrasheets': ['ods', 'xls', 'xlsx']
 
17
}
 
18
extensions = {
 
19
  'odt': 'odt',
 
20
  'doc': 'odt',
 
21
  'docx': 'odt',
 
22
  'odp': 'odp',
 
23
  'ppt': 'odp',
 
24
  'pptx': 'odp',
 
25
  'ods': 'ods',
 
26
  'xls': 'ods',
 
27
  'xlsx': 'ods'
 
28
}
 
29
 
 
30
# limit how many backtraces are recordes, since it takes a lot of time
 
31
maxbacktraces = 50
 
32
 
 
33
def scanDirectory(rootdir, extensions):
 
34
        if os.path.isfile(rootdir):
 
35
                return [rootdir]
 
36
 
 
37
        filext = map(lambda e: "." + e, extensions)
 
38
        list = []
 
39
        for root, directories, files in os.walk(rootdir):
 
40
                for file in files:
 
41
                        if file.startswith('.'):
 
42
                                continue
 
43
                        if any(map(lambda e: file.endswith(e), filext)):
 
44
                                list.append(os.path.join(root, file))
 
45
        return frozenset(list)
 
46
 
 
47
def getExtensions(list):
 
48
        extensions = []
 
49
        for file in list:
 
50
                (path, ext) = os.path.splitext(file)
 
51
                extensions.append(ext)
 
52
        return frozenset(extensions)
 
53
 
 
54
class object:
 
55
        def __init__(self):
 
56
                pass
 
57
 
 
58
# functions for logging, in this case to TeamCity
 
59
class logger:
 
60
        def __init__(self):
 
61
                self.suitename = None
 
62
        def escape(self, text):
 
63
                def escape(m):
 
64
                        if m.group(0) == '\n':
 
65
                                return '|n'
 
66
                        if m.group(0) == '\r':
 
67
                                return '|r'
 
68
                        return '|'+m.group(0)
 
69
                return re.sub('[\'\n\r\|\]]', escape, text)
 
70
        def startTestSuite(self, name):
 
71
                self.suitename = name
 
72
                print "##teamcity[testSuiteStarted name='" + self.suitename \
 
73
                        + "']"
 
74
                sys.stdout.flush()
 
75
        def endTestSuite(self):
 
76
                if not self.suitename: return
 
77
                print "##teamcity[testSuiteFinished name='" + self.suitename \
 
78
                        + "']"
 
79
                sys.stdout.flush()
 
80
                self.suitename = None
 
81
        def startTest(self, name):
 
82
                if not self.suitename: return
 
83
                self.testname = name
 
84
                print "##teamcity[testStarted name='" + self.testname + "']"
 
85
                sys.stdout.flush()
 
86
        # fail the current test
 
87
        def failTest(self, backtrace):
 
88
                if not self.suitename or not self.testname: return
 
89
                bt = ''
 
90
                for l in backtrace:
 
91
                        bt = bt + self.escape(l)
 
92
                print "##teamcity[testFailed name='" + self.testname \
 
93
                        + "' details='" + bt + "']"
 
94
                sys.stdout.flush()
 
95
        # end test, pass duration as integer representing the milliseconds
 
96
        def endTest(self, duration):
 
97
                if not self.suitename or not self.testname: return
 
98
                print "##teamcity[testFinished name='" + self.testname \
 
99
                        + "' duration='" + str(duration) + "']"
 
100
                sys.stdout.flush()
 
101
                self.testname = None
 
102
 
 
103
def containsRealError(err):
 
104
        # some errors reported by libxml2 are false positives, we filter them
 
105
        # out
 
106
        if str(err).find("ERROR:RELAXNGV:RELAXNG_ERR_CONTENTVALID: Element styles failed to validate content") != -1:
 
107
                return None
 
108
        if str(err).find("ERROR:RELAXNGV:RELAXNG_ERR_CONTENTVALID: Element automatic-styles failed to validate content") != -1:
 
109
                return None
 
110
        return err
 
111
 
 
112
class odfvalidator:
 
113
        def __init__(self):
 
114
                path = sys.path[0]
 
115
                self.relaxNGValidator = lxml.etree.RelaxNG( \
 
116
                                lxml.etree.parse(open(os.path.join(path, \
 
117
                                'OpenDocument-v1.2-cs01-schema-calligra.rng'),
 
118
                                'r')))
 
119
                self.relaxNGManifextValidator = lxml.etree.RelaxNG( \
 
120
                                lxml.etree.parse(open(os.path.join(path, \
 
121
                                'OpenDocument-v1.2-cs01-manifest-schema.rng'), \
 
122
                                'r')))
 
123
        # returns error string on error, None otherwise
 
124
        def validate(self, odtpath): 
 
125
                try:
 
126
                        zip = zipfile.ZipFile(odtpath, 'r')
 
127
                except zipfile.BadZipfile as e:
 
128
                        return str(e)
 
129
                err = self.validateFile(zip, 'content.xml',
 
130
                                self.relaxNGValidator)
 
131
                if (containsRealError(err)):
 
132
                        return err
 
133
                err = self.validateFile(zip, 'styles.xml',
 
134
                                self.relaxNGValidator)
 
135
                if (containsRealError(err)):
 
136
                        return err
 
137
                err = self.validateFile(zip, 'META-INF/manifest.xml',
 
138
                                self.relaxNGManifextValidator)
 
139
                if (containsRealError(err)):
 
140
                        return err
 
141
                return None
 
142
 
 
143
        def validateFile(self, zip, file, validator):
 
144
                try:
 
145
                        data = zip.read(file)
 
146
                        xml = lxml.etree.XML(data);
 
147
                except lxml.etree.XMLSyntaxError as e:
 
148
                        return e
 
149
                except KeyError as e:
 
150
                        return e
 
151
                if len(data) > 1000000:
 
152
                        # if the xml file is larger than 1M, the validator may hang
 
153
                        return
 
154
                if not validator.validate(xml):
 
155
                        return validator.error_log.last_error
 
156
 
 
157
def getExecutablePath(exe):
 
158
        exepath = None
 
159
        env = os.environ
 
160
        for p in env['PATH'].split(':'):
 
161
                exepath = os.path.join(p, exe)
 
162
                if os.path.exists(exepath):
 
163
                        break
 
164
        return exepath
 
165
 
 
166
def runCommand(exepath, arguments, captureStdOut):
 
167
        env = os.environ
 
168
        stdout = None
 
169
        if captureStdOut:
 
170
                (fileno, tmpfilename) = tempfile.mkstemp()
 
171
                stdout = os.fdopen(fileno, 'w')
 
172
 
 
173
        args = 'ulimit -v 1000000 -t 60 -c 0;' + exepath
 
174
        for s in arguments:
 
175
                args += ' "' + s + '"'
 
176
 
 
177
        process = subprocess.Popen(['/bin/bash', '-c', args], env=env,
 
178
                close_fds=True, stdout=stdout, stderr=None)
 
179
        s = os.wait4(process.pid, os.WNOHANG)
 
180
        waited = 0
 
181
        waitstep = 0.1
 
182
        maxwaittime = 65
 
183
        while  s[0] == 0 and s[1] == 0 and waited < maxwaittime:
 
184
                # wait a bit
 
185
                time.sleep(waitstep)
 
186
                waited += waitstep
 
187
                s = os.wait4(process.pid, os.WNOHANG)
 
188
        if waited >= maxwaittime:
 
189
                # if the process did not end nicely, kill it
 
190
                try:
 
191
                        os.killpg(process.pid, signal.SIGKILL)
 
192
                except:
 
193
                        pass
 
194
                try:
 
195
                        s = os.wait4(process.pid, 0)
 
196
                except:
 
197
                        pass
 
198
        r = object()
 
199
        r.utime = s[2].ru_utime
 
200
        r.stime = s[2].ru_stime
 
201
        r.returnValue = s[1]
 
202
        r.stdout = []
 
203
        if captureStdOut:
 
204
                stdout.close()
 
205
                stdout = open(tmpfilename, 'r')
 
206
                r.stdout = stdout.readlines()
 
207
                stdout.close()
 
208
                os.remove(tmpfilename)
 
209
        return r
 
210
 
 
211
def profile(dir, file, logger, validator):
 
212
        global maxbacktraces
 
213
        logger.startTest(file)
 
214
        file = os.path.join(dir, file)
 
215
        (path, ext) = os.path.splitext(file)
 
216
        ext = ext[1:]
 
217
        exe = None
 
218
        for f in applications.keys():
 
219
                if ext in applications[f]:
 
220
                        exe = f
 
221
        exepath = getExecutablePath(exe)
 
222
 
 
223
        # profile
 
224
        (fileno, tmpfilename) = tempfile.mkstemp()
 
225
        roundtripfilename = None
 
226
        args = []
 
227
        # in case of ODF file, do a roundtrip
 
228
        m = re.match('.*(\.od.)$', file)
 
229
        (roundtripfd, roundtripfilename) = tempfile.mkstemp("." + extensions[ext])
 
230
        if m:
 
231
                isOdfFile = True
 
232
        else:
 
233
                isOdfFile = False
 
234
        if isOdfFile:
 
235
                args += ["--roundtrip-filename", roundtripfilename]
 
236
        args += ["--benchmark-loading", "--profile-filename", tmpfilename,
 
237
                "--nocrashhandler", file]
 
238
        r = runCommand(exepath, args, False)
 
239
        outfile = os.fdopen(fileno, 'r')
 
240
        r.lines = outfile.readlines()
 
241
        outfile.close()
 
242
        os.close(roundtripfd)
 
243
        r.backtrace = None
 
244
        if r.returnValue != 0:
 
245
                if maxbacktraces > 0:
 
246
                        maxbacktraces -= 1
 
247
                        # generate a backtrace
 
248
                        args = ["--batch", "--eval-command=run",
 
249
                                "--eval-command=bt", "--args"] + [exepath] + args
 
250
                        exepath = getExecutablePath("gdb")
 
251
                        debugresult = runCommand(exepath, args, True)
 
252
                        r.backtrace = debugresult.stdout
 
253
                        for l in r.backtrace:
 
254
                                print l.rstrip()
 
255
                                sys.stdout.flush()
 
256
                        logger.failTest(r.backtrace)
 
257
                else:
 
258
                        logger.failTest("Crash, no backtrace: limit reached.")
 
259
        else:
 
260
                if not isOdfFile:
 
261
                        # convert ms office file to odf
 
262
                        exepath = getExecutablePath("calligraconverter")
 
263
                        args = [file, roundtripfilename]
 
264
                        cr = runCommand(exepath, args, False)
 
265
 
 
266
                err = validator.validate(roundtripfilename);
 
267
                if err != None:
 
268
                        logger.failTest(str(err))
 
269
 
 
270
        # remove the roundtripfile and the temporary file
 
271
        os.remove(tmpfilename)
 
272
        os.remove(roundtripfilename)
 
273
 
 
274
        logger.endTest(int((r.utime + r.stime)*1000))
 
275
        return r
 
276
 
 
277
def getGlobalMinMax(times):
 
278
        keys = times.min.keys()
 
279
        min = times.min[keys[0]]
 
280
        max = times.max[keys[0]]
 
281
        for k in keys:
 
282
                if times.min[k] <= min:
 
283
                        min = times.min[k]
 
284
                if times.max[k] >= max:
 
285
                        max = times.max[k]
 
286
        return (min, max)
 
287
 
 
288
def getGlobalTask(times):
 
289
        keys = times.min.keys()
 
290
        (min, max) = getGlobalMinMax(times)
 
291
        name = None
 
292
        for k in keys:
 
293
                if times.min[k] == min and times.max[k] == max:
 
294
                        name = k
 
295
        return name
 
296
 
 
297
def getFurthestEnd(start, times, excludedkey):
 
298
        end = start
 
299
        for k in times.min.keys():
 
300
                if times.min[k] <= start and times.max[k] > end and k != excludedkey:
 
301
                        end = times.max[k]
 
302
        if end == start:
 
303
                end = None
 
304
        return end
 
305
 
 
306
def getClosestStart(start, times, excludedkey):
 
307
        s = 10000000
 
308
        for k in times.min.keys():
 
309
                if times.min[k] > start and times.min[k] < s and k != excludedkey:
 
310
                        s = times.min[k]
 
311
        return s
 
312
 
 
313
# if there is one task that starts before all other tasks and ends after all
 
314
# other tasks, then count the time that there is no other task running as a sub-
 
315
# task of this global task
 
316
# if there is no global task, then count the time between the first and last
 
317
# timepoint where there is no task running
 
318
def getUnaccountedTime(lines, times):
 
319
        if len(times.min.keys()) == 0:
 
320
                return 0
 
321
 
 
322
        globaltask = getGlobalTask(times)
 
323
        (min, max) = getGlobalMinMax(times)
 
324
        unaccounted = 0
 
325
        currentmax = min
 
326
        currentmin = min
 
327
        while currentmax != max:
 
328
                s = getFurthestEnd(currentmax, times, globaltask)
 
329
                if s == None:
 
330
                        s = getClosestStart(currentmax, times, globaltask)
 
331
                        if s > max:
 
332
                                s = max
 
333
                        unaccounted += s - currentmax
 
334
                currentmax = s
 
335
        return unaccounted
 
336
 
 
337
def summarize(lines):
 
338
        r = object()
 
339
        r.min = {}
 
340
        r.max = {}
 
341
        for l in lines:
 
342
                (fieldname, time, percentdone) = l.split('\t')
 
343
                time = int(time)
 
344
                if not fieldname in r.min or r.min[fieldname] > time:
 
345
                        r.min[fieldname] = time
 
346
                if not fieldname in r.max or r.max[fieldname] < time:
 
347
                        r.max[fieldname] = time
 
348
        return r
 
349
 
 
350
def profileAll(dir, loggername):
 
351
        exts = []
 
352
        for v in applications.values():
 
353
                for e in v:
 
354
                        exts.append(e);
 
355
        officefiles = scanDirectory(dir, exts);
 
356
        usedExts = getExtensions(officefiles)
 
357
        results = {}
 
358
        log = logger()
 
359
        validator = odfvalidator()
 
360
        for ext in usedExts:
 
361
                if loggername:
 
362
                        log.startTestSuite(loggername + '-' + ext[1:])
 
363
                for f in officefiles:
 
364
                        (path, pathext) = os.path.splitext(f)
 
365
                        if pathext == ext:
 
366
                                relf = os.path.relpath(f, dir)
 
367
                                result = profile(dir, relf, log, validator)
 
368
                                results[f] = result
 
369
                log.endTestSuite()
 
370
        return results
 
371
 
 
372
def addMapEntry(map, start, end):
 
373
        if start in map:
 
374
                a = map[start]
 
375
                if end in a:
 
376
                        a[end] = a[end] + 1
 
377
                else:
 
378
                        a[end] = 1
 
379
        else:
 
380
                a = {}
 
381
                a[end] = 1
 
382
        map[start] = a
 
383
        return map
 
384
 
 
385
def createStackTraceGraph(results):
 
386
        exepath = getExecutablePath('dot')
 
387
        if not exepath:
 
388
                return
 
389
        edges = {}
 
390
        nodes = {}
 
391
        n = 0
 
392
        for file in results.keys():
 
393
                r = results[file]
 
394
                if not r.backtrace:
 
395
                        continue
 
396
                name = os.path.basename(file)
 
397
                nodes[name] = file
 
398
                for l in r.backtrace:
 
399
                        l = l.rstrip()
 
400
                        n += 1
 
401
                        m = re.search('/calligra/.*/([^/]+:\d+)$', l)
 
402
                        if m != None:
 
403
                                key = m.group(1)
 
404
                                nodes[key] = l
 
405
                                edges = addMapEntry(edges, key, name)
 
406
                                name = key
 
407
 
 
408
        (fileno, tmpfilename) = tempfile.mkstemp()
 
409
        out = os.fdopen(fileno, 'w')
 
410
        out.write('digraph {')
 
411
        svn = 'http://websvn.kde.org/trunk'
 
412
        for a in nodes:
 
413
                m = re.search('(/calligra/.*):(\d+)$', nodes[a])
 
414
                n = '"' + a + '" [URL = "'
 
415
                if m:
 
416
                        out.write(n + svn + m.group(1) + '?view=markup#l'
 
417
                                + m.group(2) + '"];')
 
418
                else:
 
419
                        m = re.search('(/calligratests/.*)', nodes[a])
 
420
                        if m:
 
421
                                out.write(n + svn + '/tests' + m.group(1)
 
422
                                        + '"];')
 
423
        for a in edges.keys():
 
424
                for b in edges[a].keys():
 
425
                        out.write('"' + a + '" -> "' + b + '" [penwidth='
 
426
                                + str(edges[a][b]) + '];')
 
427
        out.write('}')
 
428
        os.fdatasync(fileno)
 
429
        out.close()
 
430
 
 
431
        args = ["-Tsvg", "-ostacktraces.svg", tmpfilename]
 
432
        r = runCommand(exepath, args, False)
 
433
        os.remove(tmpfilename)
 
434
 
 
435
if __name__ == '__main__':
 
436
        dir = sys.argv[1]
 
437
        output = sys.argv[2]
 
438
        loggername = None
 
439
        if len(sys.argv) > 3:
 
440
                loggername = sys.argv[3]
 
441
        results = profileAll(dir, loggername)
 
442
 
 
443
        fields = {}
 
444
        for r in results.keys():
 
445
                results[r].times = summarize(results[r].lines)
 
446
                results[r].unaccounted = getUnaccountedTime(results[r].lines,
 
447
                        results[r].times)
 
448
                for f in results[r].times.min:
 
449
                        if not f in fields:
 
450
                                fields[f] = object()
 
451
                                fields[f].count = 0
 
452
                                fields[f].totalduration = 0
 
453
                                fields[f].totaldurationsquared = 0
 
454
                                fields[f].min = 10000000
 
455
                        min = results[r].times.min[f]
 
456
                        duration = results[r].times.max[f] - min
 
457
                        fields[f].totalduration += duration
 
458
                        fields[f].totaldurationsquared += duration*duration
 
459
                        fields[f].count += 1
 
460
                        if fields[f].min > min:
 
461
                                fields[f].min = min
 
462
        fieldnames = fields.keys()
 
463
        def sort(keya, keyb):
 
464
                return cmp(fields[keya].min, fields[keyb].min)
 
465
        fieldnames.sort(sort)
 
466
 
 
467
        # collect all fieldnames
 
468
        out = open(output, 'wb')
 
469
        # write header
 
470
        out.write('filename\text\tsize\tr\tutime\tstime\tunaccounted')
 
471
        for f in fieldnames:
 
472
                out.write('\t' + f)
 
473
        out.write('\n')
 
474
        # write average
 
475
        out.write('average\t\t\t\t\t\t')
 
476
        for f in fieldnames:
 
477
                f = fields[f]
 
478
                out.write('\t')
 
479
                if f.count > 0:
 
480
                        out.write(str(f.totalduration/f.count))
 
481
        out.write('\n')
 
482
        # write for each analyzed file
 
483
        for file in results.keys():
 
484
                r = results[file]
 
485
                (path, ext) = os.path.splitext(file)
 
486
                stat = os.stat(file)
 
487
                out.write(file + '\t' + ext[1:] + '\t' + str(stat.st_size) + '\t' + str(r.returnValue))
 
488
                out.write('\t' + str(r.utime) + '\t' + str(r.stime))
 
489
                out.write('\t' + str(r.unaccounted))
 
490
                for f in fieldnames:
 
491
                        out.write('\t')
 
492
                        if f in r.times.min:
 
493
                                out.write(str(r.times.max[f]-r.times.min[f]))
 
494
                out.write('\n')
 
495
        out.close()
 
496
 
 
497
        createStackTraceGraph(results)