19
19
'csl': 'memcpy memset'
22
ORDER = 'this'.split()
24
def run(cache, variant, function, bytes, loops, alignment=8):
22
def run(cache, variant, function, bytes, loops, alignment=8, quiet=False):
25
23
"""Perform a single run, exercising the cache as appropriate."""
26
24
key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment))
32
30
cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s' % locals()
35
33
got = subprocess.check_output(cmd.split()).strip()
38
34
except OSError, ex:
39
35
assert False, 'Error %s while running %s' % (ex, cmd)
37
parts = got.split(':')
38
took = float(parts[5])
44
48
def run_many(cache, variants, bytes, alignments):
45
for variant in variants:
46
functions = HAS[variant].split()
48
for function in functions:
49
for alignment in alignments:
49
# We want the data to come out in a useful order. So fix an
50
# alignment and function, and do all sizes for a variant first
52
mid = bytes[len(bytes)/2]
54
# Use the ordering in 'this' as the default
55
all_functions = HAS['this'].split()
57
# Find all other functions
58
for functions in HAS.values():
59
for function in functions.split():
60
if function not in all_functions:
61
all_functions.append(function)
63
for alignment in alignments:
64
for function in all_functions:
65
for variant in variants:
66
if function not in HAS[variant].split():
69
# Run a tracer through and see how long it takes and
70
# adjust the number of loops based on that. Not great
71
# for memchr() and similar which are O(n), but it will
76
loops = int(f / math.sqrt(max(1, mid)))
77
took = run(cache, variant, function, mid, loops, alignment, quiet=True)
78
# Keep it reasonable for silly routines like bounce
79
factor = min(20, max(0.05, want/took))
82
# Round f to a few significant figures
83
scale = 10**int(math.log10(f) - 1)
84
f = scale*int(f/scale)
50
86
for b in sorted(bytes):
51
87
# Figure out the number of loops to give a roughly consistent run
52
loops = int(50000000*5 / math.sqrt(b))
88
loops = int(f / math.sqrt(max(1, b)))
53
89
run(cache, variant, function, b, loops, alignment)
55
91
def run_top(cache):
56
92
variants = sorted(HAS.keys())
59
bytes.update([2**x for x in range(0, 14)])
60
# bytes.extend([2**x - 1 for x in range(1, 14)])
61
# bytes.extend([int(1.3*x) for x in range(1, 45)])
63
alignments = [8, 16] #1, 2, 4, 8, 16, 32]
94
# Upper limit in bytes to test to
96
# Test all powers of 2
98
# Test intermediate powers of 1.4
101
# Figure out how many steps get us up to the top
102
steps1 = int(round(math.log(top) / math.log(step1)))
103
steps2 = int(round(math.log(top) / math.log(step2)))
106
bytes.extend([int(step1**x) for x in range(0, steps1+1)])
107
bytes.extend([int(step2**x) for x in range(0, steps2+1)])
109
alignments = [8, 16, 4, 1, 2, 32]
66
111
run_many(cache, variants, bytes, alignments)