29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
1 |
#!/usr/bin/env python
|
2 |
||
58
by Michael Hope
Bumped the version as we're almost at 1.0. Tidied up the README. Added an overview comment to one of the scripts. |
3 |
"""Simple harness that benchmarks different variants of the routines,
|
4 |
caches the results, and emits all of the records at the end.
|
|
5 |
||
6 |
Results are generated for different values of:
|
|
7 |
* Source
|
|
8 |
* Routine
|
|
9 |
* Length
|
|
10 |
* Alignment
|
|
11 |
"""
|
|
12 |
||
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
13 |
import subprocess |
14 |
import math |
|
15 |
import sys |
|
16 |
||
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
17 |
# Prefix to the executables
|
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
18 |
build = '../build/try-' |
19 |
||
79
by Michael Hope
Update the list of variants and routines in each. Rework how the |
20 |
ALL = 'memchr memcmp memcpy memset strchr strcmp strcpy strlen' |
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
21 |
|
22 |
HAS = { |
|
79
by Michael Hope
Update the list of variants and routines in each. Rework how the |
23 |
'this': 'bounce memchr memcpy memset strchr strcpy strlen', |
24 |
'bionic': 'memcmp memcpy memset strcmp strcpy strlen', |
|
25 |
'bionic-c': ALL, |
|
26 |
'csl': 'memcpy memset', |
|
27 |
'glibc': 'memcpy memset strlen', |
|
28 |
'glibc-c': ALL, |
|
29 |
'newlib': 'memcpy strcmp strcpy strlen', |
|
30 |
'newlib-c': ALL, |
|
31 |
'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen', |
|
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
32 |
'plain': 'memset memcpy strcmp strcpy', |
33 |
}
|
|
34 |
||
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
35 |
def run(cache, variant, function, bytes, loops, alignment=8, quiet=False): |
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
36 |
"""Perform a single run, exercising the cache as appropriate."""
|
37 |
key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment)) |
|
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
38 |
|
39 |
if key in cache: |
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
40 |
got = cache[key] |
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
41 |
else: |
42 |
xbuild = build |
|
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
43 |
cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s' % locals() |
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
44 |
|
30
by Michael Hope
Added more ranges. changed everything to MB/s. Account for the loop overhead. |
45 |
try: |
46 |
got = subprocess.check_output(cmd.split()).strip() |
|
47 |
except OSError, ex: |
|
48 |
assert False, 'Error %s while running %s' % (ex, cmd) |
|
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
49 |
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
50 |
parts = got.split(':') |
51 |
took = float(parts[5]) |
|
52 |
||
53 |
cache[key] = got |
|
54 |
||
55 |
if not quiet: |
|
56 |
print got |
|
57 |
sys.stdout.flush() |
|
58 |
||
59 |
return took |
|
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
60 |
|
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
61 |
def run_many(cache, variants, bytes, alignments): |
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
62 |
# We want the data to come out in a useful order. So fix an
|
63 |
# alignment and function, and do all sizes for a variant first
|
|
64 |
bytes = sorted(bytes) |
|
65 |
mid = bytes[len(bytes)/2] |
|
66 |
||
67 |
# Use the ordering in 'this' as the default
|
|
68 |
all_functions = HAS['this'].split() |
|
69 |
||
70 |
# Find all other functions
|
|
71 |
for functions in HAS.values(): |
|
72 |
for function in functions.split(): |
|
73 |
if function not in all_functions: |
|
74 |
all_functions.append(function) |
|
75 |
||
76 |
for alignment in alignments: |
|
77 |
for function in all_functions: |
|
78 |
for variant in variants: |
|
79 |
if function not in HAS[variant].split(): |
|
80 |
continue
|
|
81 |
||
82 |
# Run a tracer through and see how long it takes and
|
|
83 |
# adjust the number of loops based on that. Not great
|
|
84 |
# for memchr() and similar which are O(n), but it will
|
|
85 |
# do
|
|
86 |
f = 50000000 |
|
87 |
want = 5.0 |
|
88 |
||
89 |
loops = int(f / math.sqrt(max(1, mid))) |
|
90 |
took = run(cache, variant, function, mid, loops, alignment, quiet=True) |
|
91 |
# Keep it reasonable for silly routines like bounce
|
|
92 |
factor = min(20, max(0.05, want/took)) |
|
93 |
f = f * factor |
|
94 |
||
95 |
# Round f to a few significant figures
|
|
96 |
scale = 10**int(math.log10(f) - 1) |
|
97 |
f = scale*int(f/scale) |
|
98 |
||
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
99 |
for b in sorted(bytes): |
100 |
# Figure out the number of loops to give a roughly consistent run
|
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
101 |
loops = int(f / math.sqrt(max(1, b))) |
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
102 |
run(cache, variant, function, b, loops, alignment) |
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
103 |
|
104 |
def run_top(cache): |
|
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
105 |
variants = sorted(HAS.keys()) |
106 |
||
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
107 |
# Upper limit in bytes to test to
|
108 |
top = 512*1024 |
|
109 |
# Test all powers of 2
|
|
110 |
step1 = 2.0 |
|
111 |
# Test intermediate powers of 1.4
|
|
112 |
step2 = 1.4 |
|
113 |
||
114 |
bytes = [] |
|
79
by Michael Hope
Update the list of variants and routines in each. Rework how the |
115 |
|
116 |
for step in [step1, step2]: |
|
117 |
if step: |
|
118 |
# Figure out how many steps get us up to the top
|
|
119 |
steps = int(round(math.log(top) / math.log(step))) |
|
120 |
bytes.extend([int(step**x) for x in range(0, steps+1)]) |
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
121 |
|
122 |
alignments = [8, 16, 4, 1, 2, 32] |
|
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
123 |
|
124 |
run_many(cache, variants, bytes, alignments) |
|
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
125 |
|
126 |
def main(): |
|
127 |
cachename = 'cache.txt' |
|
128 |
||
129 |
cache = {} |
|
130 |
||
131 |
try: |
|
132 |
with open(cachename) as f: |
|
133 |
for line in f: |
|
134 |
line = line.strip() |
|
135 |
parts = line.split(':') |
|
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
136 |
cache[':'.join(parts[:5])] = line |
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
137 |
except: |
138 |
pass
|
|
139 |
||
140 |
try: |
|
141 |
run_top(cache) |
|
142 |
finally: |
|
143 |
with open(cachename, 'w') as f: |
|
79
by Michael Hope
Update the list of variants and routines in each. Rework how the |
144 |
for line in sorted(cache.values()): |
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
145 |
print >> f, line |
146 |
||
30
by Michael Hope
Added more ranges. changed everything to MB/s. Account for the loop overhead. |
147 |
if __name__ == '__main__': |
148 |
main() |