2
# Copyright (C) 2010, 2012 Google Inc. All rights reserved.
4
# Redistribution and use in source and binary forms, with or without
5
# modification, are permitted provided that the following conditions are
8
# * Redistributions of source code must retain the above copyright
9
# notice, this list of conditions and the following disclaimer.
10
# * Redistributions in binary form must reproduce the above
11
# copyright notice, this list of conditions and the following disclaimer
12
# in the documentation and/or other materials provided with the
14
# * Neither the name of Google Inc. nor the names of its
15
# contributors may be used to endorse or promote products derived from
16
# this software without specific prior written permission.
18
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
"""Package that handles non-debug, non-file output for run-webkit-tests."""
35
from webkitpy.tool import grammar
36
from webkitpy.common.net import resultsjsonparser
37
from webkitpy.layout_tests.models import test_expectations
38
from webkitpy.layout_tests.models.test_expectations import TestExpectations, TestExpectationParser
39
from webkitpy.layout_tests.views.metered_stream import MeteredStream
42
NUM_SLOW_TESTS_TO_LOG = 10
47
optparse.make_option('-q', '--quiet', action='store_true', default=False,
48
help='run quietly (errors, warnings, and progress only)'),
49
optparse.make_option('-v', '--verbose', action='store_true', default=False,
50
help='print a summarized result for every test (one line per test)'),
51
optparse.make_option('--details', action='store_true', default=False,
52
help='print detailed results for every test'),
53
optparse.make_option('--debug-rwt-logging', action='store_true', default=False,
54
help='print timestamps and debug information for run-webkit-tests itself'),
58
class Printer(object):
59
"""Class handling all non-debug-logging printing done by run-webkit-tests.
61
Printing from run-webkit-tests falls into two buckets: general or
62
regular output that is read only by humans and can be changed at any
63
time, and output that is parsed by buildbots (and humans) and hence
64
must be changed more carefully and in coordination with the buildbot
65
parsing code (in chromium.org's buildbot/master.chromium/scripts/master/
66
log_parser/webkit_test_command.py script).
68
By default the buildbot-parsed code gets logged to stdout, and regular
69
output gets logged to stderr."""
70
def __init__(self, port, options, regular_output, buildbot_output, logger=None):
71
self.num_completed = 0
74
self._options = options
75
self._buildbot_stream = buildbot_output
76
self._meter = MeteredStream(regular_output, options.debug_rwt_logging, logger=logger,
77
number_of_columns=self._port.host.platform.terminal_width())
78
self._running_tests = []
79
self._completed_tests = []
87
def print_config(self, results_directory):
88
self._print_default("Using port '%s'" % self._port.name())
89
self._print_default("Test configuration: %s" % self._port.test_configuration())
90
self._print_default("Placing test results in %s" % results_directory)
92
# FIXME: should these options be in printing_options?
93
if self._options.new_baseline:
94
self._print_default("Placing new baselines in %s" % self._port.baseline_path())
96
fs = self._port.host.filesystem
97
fallback_path = [fs.split(x)[1] for x in self._port.baseline_search_path()]
98
self._print_default("Baseline search path: %s -> generic" % " -> ".join(fallback_path))
100
self._print_default("Using %s build" % self._options.configuration)
101
if self._options.pixel_tests:
102
self._print_default("Pixel tests enabled")
104
self._print_default("Pixel tests disabled")
106
self._print_default("Regular timeout: %s, slow test timeout: %s" %
107
(self._options.time_out_ms, self._options.slow_time_out_ms))
109
self._print_default('Command line: ' + ' '.join(self._port.driver_cmd_line()))
110
self._print_default('')
112
def print_found(self, num_all_test_files, num_to_run, repeat_each, iterations):
113
num_unique_tests = num_to_run / (repeat_each * iterations)
114
found_str = 'Found %s; running %d' % (grammar.pluralize('test', num_all_test_files), num_unique_tests)
115
if repeat_each * iterations > 1:
116
found_str += ' (%d times each: --repeat-each=%d --iterations=%d)' % (repeat_each * iterations, repeat_each, iterations)
117
found_str += ', skipping %d' % (num_all_test_files - num_unique_tests)
118
self._print_default(found_str + '.')
120
def print_expected(self, result_summary, tests_with_result_type_callback):
121
self._print_expected_results_of_type(result_summary, test_expectations.PASS, "passes", tests_with_result_type_callback)
122
self._print_expected_results_of_type(result_summary, test_expectations.FAIL, "failures", tests_with_result_type_callback)
123
self._print_expected_results_of_type(result_summary, test_expectations.FLAKY, "flaky", tests_with_result_type_callback)
124
self._print_debug('')
126
def print_workers_and_shards(self, num_workers, num_shards, num_locked_shards):
127
driver_name = self._port.driver_name()
129
self._print_default("Running 1 %s over %s." % (driver_name, grammar.pluralize('shard', num_shards)))
131
self._print_default("Running %d %ss in parallel over %d shards (%d locked)." %
132
(num_workers, driver_name, num_shards, num_locked_shards))
133
self._print_default('')
135
def _print_expected_results_of_type(self, result_summary, result_type, result_type_str, tests_with_result_type_callback):
136
tests = tests_with_result_type_callback(result_type)
137
now = result_summary.tests_by_timeline[test_expectations.NOW]
138
wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
140
# We use a fancy format string in order to print the data out in a
141
# nicely-aligned table.
142
fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd wontfix)"
143
% (self._num_digits(now), self._num_digits(wontfix)))
144
self._print_debug(fmtstr % (len(tests), result_type_str, len(tests & now), len(tests & wontfix)))
146
def _num_digits(self, num):
149
ndigits = int(math.log10(len(num))) + 1
152
def print_results(self, run_time, thread_timings, test_timings, individual_test_timings, result_summary, unexpected_results):
153
self._print_timing_statistics(run_time, thread_timings, test_timings, individual_test_timings, result_summary)
154
self._print_result_summary(result_summary)
155
self._print_one_line_summary(result_summary.total - result_summary.expected_skips,
156
result_summary.expected - result_summary.expected_skips,
157
result_summary.unexpected)
158
self._print_unexpected_results(unexpected_results)
160
def _print_timing_statistics(self, total_time, thread_timings,
161
directory_test_timings, individual_test_timings,
163
self._print_debug("Test timing:")
164
self._print_debug(" %6.2f total testing time" % total_time)
165
self._print_debug("")
166
self._print_debug("Thread timing:")
168
for t in thread_timings:
169
self._print_debug(" %10s: %5d tests, %6.2f secs" % (t['name'], t['num_tests'], t['total_time']))
170
cuml_time += t['total_time']
171
self._print_debug(" %6.2f cumulative, %6.2f optimal" % (cuml_time, cuml_time / int(self._options.child_processes)))
172
self._print_debug("")
174
self._print_aggregate_test_statistics(individual_test_timings)
175
self._print_individual_test_times(individual_test_timings, result_summary)
176
self._print_directory_timings(directory_test_timings)
178
def _print_aggregate_test_statistics(self, individual_test_timings):
179
times_for_dump_render_tree = [test_stats.test_run_time for test_stats in individual_test_timings]
180
self._print_statistics_for_test_timings("PER TEST TIME IN TESTSHELL (seconds):", times_for_dump_render_tree)
182
def _print_individual_test_times(self, individual_test_timings, result_summary):
183
# Reverse-sort by the time spent in DumpRenderTree.
184
individual_test_timings.sort(lambda a, b: cmp(b.test_run_time, a.test_run_time))
187
timeout_or_crash_tests = []
188
unexpected_slow_tests = []
189
for test_tuple in individual_test_timings:
190
test_name = test_tuple.test_name
191
is_timeout_crash_or_slow = False
192
if test_name in result_summary.slow_tests:
193
is_timeout_crash_or_slow = True
194
slow_tests.append(test_tuple)
196
if test_name in result_summary.failures:
197
result = result_summary.results[test_name].type
198
if (result == test_expectations.TIMEOUT or
199
result == test_expectations.CRASH):
200
is_timeout_crash_or_slow = True
201
timeout_or_crash_tests.append(test_tuple)
203
if (not is_timeout_crash_or_slow and num_printed < NUM_SLOW_TESTS_TO_LOG):
204
num_printed = num_printed + 1
205
unexpected_slow_tests.append(test_tuple)
207
self._print_debug("")
208
self._print_test_list_timing("%s slowest tests that are not marked as SLOW and did not timeout/crash:" %
209
NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
210
self._print_debug("")
211
self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
212
self._print_debug("")
213
self._print_test_list_timing("Tests that timed out or crashed:", timeout_or_crash_tests)
214
self._print_debug("")
216
def _print_test_list_timing(self, title, test_list):
217
self._print_debug(title)
218
for test_tuple in test_list:
219
test_run_time = round(test_tuple.test_run_time, 1)
220
self._print_debug(" %s took %s seconds" % (test_tuple.test_name, test_run_time))
222
def _print_directory_timings(self, directory_test_timings):
224
for directory in directory_test_timings:
225
num_tests, time_for_directory = directory_test_timings[directory]
226
timings.append((round(time_for_directory, 1), directory, num_tests))
229
self._print_debug("Time to process slowest subdirectories:")
230
min_seconds_to_print = 10
231
for timing in timings:
232
if timing[0] > min_seconds_to_print:
233
self._print_debug(" %s took %s seconds to run %s tests." % (timing[1], timing[0], timing[2]))
234
self._print_debug("")
236
def _print_statistics_for_test_timings(self, title, timings):
237
self._print_debug(title)
240
num_tests = len(timings)
243
percentile90 = timings[int(.9 * num_tests)]
244
percentile99 = timings[int(.99 * num_tests)]
246
if num_tests % 2 == 1:
247
median = timings[((num_tests - 1) / 2) - 1]
249
lower = timings[num_tests / 2 - 1]
250
upper = timings[num_tests / 2]
251
median = (float(lower + upper)) / 2
253
mean = sum(timings) / num_tests
255
for timing in timings:
256
sum_of_deviations = math.pow(timing - mean, 2)
258
std_deviation = math.sqrt(sum_of_deviations / num_tests)
259
self._print_debug(" Median: %6.3f" % median)
260
self._print_debug(" Mean: %6.3f" % mean)
261
self._print_debug(" 90th percentile: %6.3f" % percentile90)
262
self._print_debug(" 99th percentile: %6.3f" % percentile99)
263
self._print_debug(" Standard dev: %6.3f" % std_deviation)
264
self._print_debug("")
266
def _print_result_summary(self, result_summary):
267
if not self._options.debug_rwt_logging:
270
failed = result_summary.total_failures
271
total = result_summary.total - result_summary.expected_skips
272
passed = total - failed - result_summary.remaining
275
pct_passed = float(passed) * 100 / total
277
self._print_for_bot("=> Results: %d/%d tests passed (%.1f%%)" % (passed, total, pct_passed))
278
self._print_for_bot("")
279
self._print_result_summary_entry(result_summary, test_expectations.NOW, "Tests to be fixed")
281
self._print_for_bot("")
282
# FIXME: We should be skipping anything marked WONTFIX, so we shouldn't bother logging these stats.
283
self._print_result_summary_entry(result_summary, test_expectations.WONTFIX,
284
"Tests that will only be fixed if they crash (WONTFIX)")
285
self._print_for_bot("")
287
def _print_result_summary_entry(self, result_summary, timeline, heading):
288
total = len(result_summary.tests_by_timeline[timeline])
289
not_passing = (total -
290
len(result_summary.tests_by_expectation[test_expectations.PASS] &
291
result_summary.tests_by_timeline[timeline]))
292
self._print_for_bot("=> %s (%d):" % (heading, not_passing))
294
for result in TestExpectations.EXPECTATION_ORDER:
295
if result in (test_expectations.PASS, test_expectations.SKIP):
297
results = (result_summary.tests_by_expectation[result] &
298
result_summary.tests_by_timeline[timeline])
299
desc = TestExpectations.EXPECTATION_DESCRIPTIONS[result]
300
if not_passing and len(results):
301
pct = len(results) * 100.0 / not_passing
302
self._print_for_bot(" %5d %-24s (%4.1f%%)" % (len(results), desc, pct))
304
def _print_one_line_summary(self, total, expected, unexpected):
305
incomplete = total - expected - unexpected
308
self._print_default("")
309
incomplete_str = " (%d didn't run)" % incomplete
311
if self._options.verbose or self._options.debug_rwt_logging or unexpected:
316
if expected == total:
318
summary = "All %d tests ran as expected." % expected
320
summary = "The test ran as expected."
322
summary = "%s ran as expected%s." % (grammar.pluralize('test', expected), incomplete_str)
324
summary = "%s ran as expected, %d didn't%s:" % (grammar.pluralize('test', expected), unexpected, incomplete_str)
326
self._print_quiet(summary)
327
self._print_quiet("")
329
def _test_status_line(self, test_name, suffix):
330
format_string = '[%d/%d] %s%s'
331
status_line = format_string % (self.num_completed, self.num_tests, test_name, suffix)
332
if len(status_line) > self._meter.number_of_columns():
333
overflow_columns = len(status_line) - self._meter.number_of_columns()
335
if len(test_name) < overflow_columns + len(ellipsis) + 2:
336
# We don't have enough space even if we elide, just show the test filename.
337
fs = self._port.host.filesystem
338
test_name = fs.split(test_name)[1]
340
new_length = len(test_name) - overflow_columns - len(ellipsis)
341
prefix = int(new_length / 2)
342
test_name = test_name[:prefix] + ellipsis + test_name[-(new_length - prefix):]
343
return format_string % (self.num_completed, self.num_tests, test_name, suffix)
345
def print_started_test(self, test_name):
346
self._running_tests.append(test_name)
347
if len(self._running_tests) > 1:
348
suffix = ' (+%d)' % (len(self._running_tests) - 1)
351
if self._options.verbose:
352
write = self._meter.write_update
354
write = self._meter.write_throttled_update
355
write(self._test_status_line(test_name, suffix))
357
def print_finished_test(self, result, expected, exp_str, got_str):
358
self.num_completed += 1
359
test_name = result.test_name
361
result_message = self._result_message(result.type, result.failures, expected, self._options.verbose)
363
if self._options.details:
364
self._print_test_trace(result, exp_str, got_str)
365
elif (self._options.verbose and not self._options.debug_rwt_logging) or not expected:
366
self.writeln(self._test_status_line(test_name, result_message))
367
elif self.num_completed == self.num_tests:
368
self._meter.write_update('')
370
if test_name == self._running_tests[0]:
371
self._completed_tests.insert(0, [test_name, result_message])
373
self._completed_tests.append([test_name, result_message])
375
for test_name, result_message in self._completed_tests:
376
self._meter.write_throttled_update(self._test_status_line(test_name, result_message))
377
self._completed_tests = []
378
self._running_tests.remove(test_name)
380
def _result_message(self, result_type, failures, expected, verbose):
381
exp_string = ' unexpectedly' if not expected else ''
382
if result_type == test_expectations.PASS:
383
return ' passed%s' % exp_string
385
return ' failed%s (%s)' % (exp_string, ', '.join(failure.message() for failure in failures))
388
def _print_test_trace(self, result, exp_str, got_str):
389
test_name = result.test_name
390
self._print_default(self._test_status_line(test_name, ''))
392
base = self._port.lookup_virtual_test_base(test_name)
394
args = ' '.join(self._port.lookup_virtual_test_args(test_name))
395
self._print_default(' base: %s' % base)
396
self._print_default(' args: %s' % args)
398
for extension in ('.txt', '.png', '.wav', '.webarchive'):
399
self._print_baseline(test_name, extension)
401
self._print_default(' exp: %s' % exp_str)
402
self._print_default(' got: %s' % got_str)
403
self._print_default(' took: %-.3f' % result.test_run_time)
404
self._print_default('')
406
def _print_baseline(self, test_name, extension):
407
baseline = self._port.expected_filename(test_name, extension)
408
if self._port._filesystem.exists(baseline):
409
relpath = self._port.relative_test_filename(baseline)
412
self._print_default(' %s: %s' % (extension[1:], relpath))
414
def _print_unexpected_results(self, unexpected_results):
415
# Prints to the buildbot stream
420
def add_to_dict_of_lists(dict, key, value):
421
dict.setdefault(key, []).append(value)
423
def add_result(test, results, passes=passes, flaky=flaky, regressions=regressions):
424
actual = results['actual'].split(" ")
425
expected = results['expected'].split(" ")
426
if actual == ['PASS']:
427
if 'CRASH' in expected:
428
add_to_dict_of_lists(passes, 'Expected to crash, but passed', test)
429
elif 'TIMEOUT' in expected:
430
add_to_dict_of_lists(passes, 'Expected to timeout, but passed', test)
432
add_to_dict_of_lists(passes, 'Expected to fail, but passed', test)
433
elif len(actual) > 1:
434
# We group flaky tests by the first actual result we got.
435
add_to_dict_of_lists(flaky, actual[0], test)
437
add_to_dict_of_lists(regressions, results['actual'], test)
439
resultsjsonparser.for_each_test(unexpected_results['tests'], add_result)
441
if len(passes) or len(flaky) or len(regressions):
442
self._print_for_bot("")
444
for key, tests in passes.iteritems():
445
self._print_for_bot("%s: (%d)" % (key, len(tests)))
448
self._print_for_bot(" %s" % test)
449
self._print_for_bot("")
450
self._print_for_bot("")
453
descriptions = TestExpectations.EXPECTATION_DESCRIPTIONS
454
for key, tests in flaky.iteritems():
455
result = TestExpectations.EXPECTATIONS[key.lower()]
456
self._print_for_bot("Unexpected flakiness: %s (%d)" % (descriptions[result], len(tests)))
460
result = resultsjsonparser.result_for_test(unexpected_results['tests'], test)
461
actual = result['actual'].split(" ")
462
expected = result['expected'].split(" ")
463
result = TestExpectations.EXPECTATIONS[key.lower()]
464
# FIXME: clean this up once the old syntax is gone
465
new_expectations_list = [TestExpectationParser._inverted_expectation_tokens[exp] for exp in list(set(actual) | set(expected))]
466
self._print_for_bot(" %s [ %s ]" % (test, " ".join(new_expectations_list)))
467
self._print_for_bot("")
468
self._print_for_bot("")
471
descriptions = TestExpectations.EXPECTATION_DESCRIPTIONS
472
for key, tests in regressions.iteritems():
473
result = TestExpectations.EXPECTATIONS[key.lower()]
474
self._print_for_bot("Regressions: Unexpected %s (%d)" % (descriptions[result], len(tests)))
477
self._print_for_bot(" %s [ %s ]" % (test, TestExpectationParser._inverted_expectation_tokens[key]))
478
self._print_for_bot("")
480
if len(unexpected_results['tests']) and self._options.debug_rwt_logging:
481
self._print_for_bot("%s" % ("-" * 78))
483
def _print_quiet(self, msg):
486
def _print_default(self, msg):
487
if not self._options.quiet:
490
def _print_debug(self, msg):
491
if self._options.debug_rwt_logging:
494
def _print_for_bot(self, msg):
495
self._buildbot_stream.write(msg + "\n")
497
def write_update(self, msg):
498
self._meter.write_update(msg)
500
def writeln(self, msg):
501
self._meter.writeln(msg)