29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
1 |
#!/usr/bin/env python
|
2 |
||
58
by Michael Hope
Bumped the version as we're almost at 1.0. Tidied up the README. Added an overview comment to one of the scripts. |
3 |
"""Simple harness that benchmarks different variants of the routines,
|
4 |
caches the results, and emits all of the records at the end.
|
|
5 |
||
6 |
Results are generated for different values of:
|
|
7 |
* Source
|
|
8 |
* Routine
|
|
9 |
* Length
|
|
10 |
* Alignment
|
|
11 |
"""
|
|
12 |
||
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
13 |
import subprocess |
14 |
import math |
|
15 |
import sys |
|
16 |
||
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
17 |
# Prefix to the executables
|
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
18 |
build = '../build/try-' |
19 |
||
79
by Michael Hope
Update the list of variants and routines in each. Rework how the |
20 |
ALL = 'memchr memcmp memcpy memset strchr strcmp strcpy strlen' |
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
21 |
|
22 |
HAS = { |
|
79
by Michael Hope
Update the list of variants and routines in each. Rework how the |
23 |
'this': 'bounce memchr memcpy memset strchr strcpy strlen', |
104
by Will Newton
Add support for bionic-a9 and bionic-a15. |
24 |
'bionic-a9': 'memcmp memcpy memset strcmp strcpy strlen', |
25 |
'bionic-a15': 'memcmp memcpy memset strcmp strcpy strlen', |
|
79
by Michael Hope
Update the list of variants and routines in each. Rework how the |
26 |
'bionic-c': ALL, |
27 |
'csl': 'memcpy memset', |
|
28 |
'glibc': 'memcpy memset strlen', |
|
29 |
'glibc-c': ALL, |
|
30 |
'newlib': 'memcpy strcmp strcpy strlen', |
|
31 |
'newlib-c': ALL, |
|
32 |
'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen', |
|
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
33 |
'plain': 'memset memcpy strcmp strcpy', |
34 |
}
|
|
35 |
||
113
by Will Newton
Allow aligning source and destination buffers separately. |
36 |
BOUNCE_ALIGNMENTS = ['1'] |
37 |
SINGLE_BUFFER_ALIGNMENTS = ['1', '2', '4', '8', '16', '32'] |
|
38 |
DUAL_BUFFER_ALIGNMENTS = ['1:32', '2:32', '4:32', '8:32', '16:32', '32:32'] |
|
39 |
||
40 |
ALIGNMENTS = { |
|
41 |
'bounce': BOUNCE_ALIGNMENTS, |
|
42 |
'memchr': SINGLE_BUFFER_ALIGNMENTS, |
|
43 |
'memset': SINGLE_BUFFER_ALIGNMENTS, |
|
44 |
'strchr': SINGLE_BUFFER_ALIGNMENTS, |
|
45 |
'strlen': SINGLE_BUFFER_ALIGNMENTS, |
|
46 |
'memcmp': DUAL_BUFFER_ALIGNMENTS, |
|
47 |
'memcpy': DUAL_BUFFER_ALIGNMENTS, |
|
48 |
'strcmp': DUAL_BUFFER_ALIGNMENTS, |
|
49 |
'strcpy': DUAL_BUFFER_ALIGNMENTS, |
|
50 |
}
|
|
51 |
||
115
by Will Newton
Support multiple runs of each benchmark. |
52 |
NUM_RUNS = 5 |
53 |
||
54 |
def run(cache, variant, function, bytes, loops, alignment, run_id, quiet=False): |
|
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
55 |
"""Perform a single run, exercising the cache as appropriate."""
|
115
by Will Newton
Support multiple runs of each benchmark. |
56 |
key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment, run_id)) |
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
57 |
|
58 |
if key in cache: |
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
59 |
got = cache[key] |
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
60 |
else: |
61 |
xbuild = build |
|
115
by Will Newton
Support multiple runs of each benchmark. |
62 |
cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s -r %(run_id)s' % locals() |
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
63 |
|
30
by Michael Hope
Added more ranges. changed everything to MB/s. Account for the loop overhead. |
64 |
try: |
65 |
got = subprocess.check_output(cmd.split()).strip() |
|
66 |
except OSError, ex: |
|
67 |
assert False, 'Error %s while running %s' % (ex, cmd) |
|
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
68 |
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
69 |
parts = got.split(':') |
115
by Will Newton
Support multiple runs of each benchmark. |
70 |
took = float(parts[7]) |
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
71 |
|
72 |
cache[key] = got |
|
73 |
||
74 |
if not quiet: |
|
75 |
print got |
|
76 |
sys.stdout.flush() |
|
77 |
||
78 |
return took |
|
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
79 |
|
113
by Will Newton
Allow aligning source and destination buffers separately. |
80 |
def run_many(cache, variants, bytes, all_functions): |
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
81 |
# We want the data to come out in a useful order. So fix an
|
82 |
# alignment and function, and do all sizes for a variant first
|
|
83 |
bytes = sorted(bytes) |
|
113
by Will Newton
Allow aligning source and destination buffers separately. |
84 |
mid = bytes[int(len(bytes)/1.5)] |
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
85 |
|
105
by Will Newton
Allow running a subset of benchmarks. |
86 |
if not all_functions: |
87 |
# Use the ordering in 'this' as the default
|
|
88 |
all_functions = HAS['this'].split() |
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
89 |
|
105
by Will Newton
Allow running a subset of benchmarks. |
90 |
# Find all other functions
|
91 |
for functions in HAS.values(): |
|
92 |
for function in functions.split(): |
|
93 |
if function not in all_functions: |
|
94 |
all_functions.append(function) |
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
95 |
|
113
by Will Newton
Allow aligning source and destination buffers separately. |
96 |
for function in all_functions: |
97 |
for alignment in ALIGNMENTS[function]: |
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
98 |
for variant in variants: |
99 |
if function not in HAS[variant].split(): |
|
100 |
continue
|
|
101 |
||
102 |
# Run a tracer through and see how long it takes and
|
|
103 |
# adjust the number of loops based on that. Not great
|
|
104 |
# for memchr() and similar which are O(n), but it will
|
|
105 |
# do
|
|
106 |
f = 50000000 |
|
107 |
want = 5.0 |
|
108 |
||
109 |
loops = int(f / math.sqrt(max(1, mid))) |
|
115
by Will Newton
Support multiple runs of each benchmark. |
110 |
took = run(cache, variant, function, mid, loops, alignment, 0, |
111 |
quiet=True) |
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
112 |
# Keep it reasonable for silly routines like bounce
|
113 |
factor = min(20, max(0.05, want/took)) |
|
114 |
f = f * factor |
|
115 |
||
116 |
# Round f to a few significant figures
|
|
117 |
scale = 10**int(math.log10(f) - 1) |
|
118 |
f = scale*int(f/scale) |
|
119 |
||
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
120 |
for b in sorted(bytes): |
121 |
# Figure out the number of loops to give a roughly consistent run
|
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
122 |
loops = int(f / math.sqrt(max(1, b))) |
115
by Will Newton
Support multiple runs of each benchmark. |
123 |
for run_id in range(0, NUM_RUNS): |
124 |
run(cache, variant, function, b, loops, alignment, |
|
125 |
run_id) |
|
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
126 |
|
127 |
def run_top(cache): |
|
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
128 |
variants = sorted(HAS.keys()) |
105
by Will Newton
Allow running a subset of benchmarks. |
129 |
functions = sys.argv[1:] |
32
by Michael Hope
Added support for different alignments. Flattened out the loops. |
130 |
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
131 |
# Upper limit in bytes to test to
|
132 |
top = 512*1024 |
|
133 |
# Test all powers of 2
|
|
134 |
step1 = 2.0 |
|
135 |
# Test intermediate powers of 1.4
|
|
136 |
step2 = 1.4 |
|
137 |
||
138 |
bytes = [] |
|
79
by Michael Hope
Update the list of variants and routines in each. Rework how the |
139 |
|
140 |
for step in [step1, step2]: |
|
141 |
if step: |
|
142 |
# Figure out how many steps get us up to the top
|
|
143 |
steps = int(round(math.log(top) / math.log(step))) |
|
144 |
bytes.extend([int(step**x) for x in range(0, steps+1)]) |
|
49
by Michael Hope
Benchmark more sizes and alignments. Make the figures bigger to make the text smaller. Put the 'this' results first. |
145 |
|
113
by Will Newton
Allow aligning source and destination buffers separately. |
146 |
run_many(cache, variants, bytes, functions) |
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
147 |
|
148 |
def main(): |
|
149 |
cachename = 'cache.txt' |
|
150 |
||
151 |
cache = {} |
|
152 |
||
153 |
try: |
|
154 |
with open(cachename) as f: |
|
155 |
for line in f: |
|
156 |
line = line.strip() |
|
157 |
parts = line.split(':') |
|
115
by Will Newton
Support multiple runs of each benchmark. |
158 |
cache[':'.join(parts[:7])] = line |
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
159 |
except: |
160 |
pass
|
|
161 |
||
162 |
try: |
|
163 |
run_top(cache) |
|
164 |
finally: |
|
165 |
with open(cachename, 'w') as f: |
|
79
by Michael Hope
Update the list of variants and routines in each. Rework how the |
166 |
for line in sorted(cache.values()): |
29
by Michael Hope
Added some scripts to run the benchmarks and to plot the results. |
167 |
print >> f, line |
168 |
||
30
by Michael Hope
Added more ranges. changed everything to MB/s. Account for the loop overhead. |
169 |
if __name__ == '__main__': |
170 |
main() |