2
// floop14 generated by makeloops.py Thu Jun 30 16:44:56 2011
4
#include <blitz/vector2.h>
5
#include <blitz/array.h>
6
#include <random/uniform.h>
7
#include <blitz/benchext.h>
9
#ifdef BZ_HAVE_VALARRAY
10
#define BENCHMARK_VALARRAY
13
#ifdef BENCHMARK_VALARRAY
21
BZ_USING_NAMESPACE(blitz)
22
BZ_USING_NAMESPACE(std)
24
#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES)
25
#define floop14_f77 floop14_f77_
26
#define floop14_f77overhead floop14_f77overhead_
27
#define floop14_f90 floop14_f90_
28
#define floop14_f90overhead floop14_f90overhead_
29
#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES)
30
#define floop14_f77 floop14_f77__
31
#define floop14_f77overhead floop14_f77overhead__
32
#define floop14_f90 floop14_f90__
33
#define floop14_f90overhead floop14_f90overhead__
34
#elif defined(BZ_FORTRAN_SYMBOLS_CAPS)
35
#define floop14_f77 FLOOP14_F77
36
#define floop14_f77overhead FLOOP14_F77OVERHEAD
37
#define floop14_f90 FLOOP14_F90
38
#define floop14_f90overhead FLOOP14_F90OVERHEAD
42
void floop14_f77(const int& N, float* y, float* x, float* a, float* b);
43
void floop14_f77overhead(const int& N, float* y, float* x, float* a, float* b);
44
void floop14_f90(const int& N, float* y, float* x, float* a, float* b);
45
void floop14_f90overhead(const int& N, float* y, float* x, float* a, float* b);
49
void VectorVersion(BenchmarkExt<int>& bench);
50
void ArrayVersion(BenchmarkExt<int>& bench);
51
void ArrayVersion_unaligned(BenchmarkExt<int>& bench);
52
void ArrayVersion_misaligned(BenchmarkExt<int>& bench);
53
void ArrayVersion_index(BenchmarkExt<int>& bench);
54
void doTinyVectorVersion(BenchmarkExt<int>& bench);
55
void F77Version(BenchmarkExt<int>& bench);
57
void F90Version(BenchmarkExt<int>& bench);
59
#ifdef BENCHMARK_VALARRAY
60
void ValarrayVersion(BenchmarkExt<int>& bench);
63
const int numSizes = 80;
64
const bool runvector=false; // no point as long as Vector is Array<1>
68
int numBenchmarks = 5;
69
if (runvector) numBenchmarks++;
70
#ifdef BENCHMARK_VALARRAY
77
BenchmarkExt<int> bench("floop14: $x = $a+$b; $y = $a-$b", numBenchmarks);
79
bench.setNumParameters(numSizes);
81
Array<int,1> parameters(numSizes);
82
Array<long,1> iters(numSizes);
83
Array<double,1> flops(numSizes);
85
parameters=pow(pow(2.,0.25),tensor::i)+tensor::i;
86
flops = 2 * parameters;
87
iters = 100000000L / flops;
88
iters = where(iters<2, 2, iters);
89
cout << iters << endl;
91
bench.setParameterVector(parameters);
92
bench.setIterations(iters);
93
bench.setOpsPerIteration(flops);
94
bench.setDependentVariable("flops");
95
bench.beginBenchmarking();
100
ArrayVersion_unaligned(bench);
101
ArrayVersion_misaligned(bench);
102
ArrayVersion_index(bench);
103
//doTinyVectorVersion(bench);
108
#ifdef BENCHMARK_VALARRAY
109
ValarrayVersion(bench);
113
VectorVersion(bench);
115
bench.endBenchmarking();
117
bench.saveMatlabGraph("floop14.m");
122
void initializeRandomDouble(T* data, int numElements, int stride = 1)
124
ranlib::Uniform<T> rnd;
126
for (int i=0; i < numElements; ++i)
127
data[size_t(i*stride)] = rnd.random();
131
void initializeRandomDouble(valarray<T>& data, int numElements, int stride = 1)
133
ranlib::Uniform<T> rnd;
135
for (int i=0; i < numElements; ++i)
136
data[size_t(i*stride)] = rnd.random();
139
void VectorVersion(BenchmarkExt<int>& bench)
141
bench.beginImplementation("Vector<T>");
143
while (!bench.doneImplementationBenchmark())
145
int N = bench.getParameter();
146
long iters = bench.getIterations();
148
cout << bench.currentImplementation() << ": N = " << N << endl;
151
initializeRandomDouble(y.data(), N);
153
initializeRandomDouble(x.data(), N);
155
initializeRandomDouble(a.data(), N);
157
initializeRandomDouble(b.data(), N);
161
for (long i=0; i < iters; ++i)
168
bench.startOverhead();
169
for (long i=0; i < iters; ++i) {
173
bench.stopOverhead();
176
bench.endImplementation();
180
void ArrayVersion(BenchmarkExt<int>& bench)
182
bench.beginImplementation("Array<T,1>");
184
while (!bench.doneImplementationBenchmark())
186
int N = bench.getParameter();
187
long iters = bench.getIterations();
189
cout << bench.currentImplementation() << ": N = " << N << endl;
192
initializeRandomDouble(y.dataFirst(), N);
194
initializeRandomDouble(x.dataFirst(), N);
196
initializeRandomDouble(a.dataFirst(), N);
198
initializeRandomDouble(b.dataFirst(), N);
202
for (long i=0; i < iters; ++i)
209
bench.startOverhead();
210
for (long i=0; i < iters; ++i) {
214
bench.stopOverhead();
217
bench.endImplementation();
221
void ArrayVersion_index(BenchmarkExt<int>& bench)
223
bench.beginImplementation("Array<T,1> (indexexpr.)");
225
while (!bench.doneImplementationBenchmark())
227
int N = bench.getParameter();
228
long iters = bench.getIterations();
230
cout << bench.currentImplementation() << ": N = " << N << endl;
233
initializeRandomDouble(y.dataFirst(), N);
235
initializeRandomDouble(x.dataFirst(), N);
237
initializeRandomDouble(a.dataFirst(), N);
239
initializeRandomDouble(b.dataFirst(), N);
243
for (long i=0; i < iters; ++i)
245
x = a(tensor::i)+b(tensor::i); y = a(tensor::i)-b(tensor::i);;
250
bench.startOverhead();
251
for (long i=0; i < iters; ++i) {
255
bench.stopOverhead();
258
bench.endImplementation();
261
void ArrayVersion_unaligned(BenchmarkExt<int>& bench)
263
bench.beginImplementation("Array<T,1> (unal.)");
265
while (!bench.doneImplementationBenchmark())
267
int N = bench.getParameter();
268
long iters = bench.getIterations();
270
cout << bench.currentImplementation() << ": N = " << N << endl;
273
Array<float,1> yfill(N+1);
274
Array<float,1> y(yfill(Range(1,N)));
275
initializeRandomDouble(y.dataFirst(), N);
277
Array<float,1> xfill(N+1);
278
Array<float,1> x(xfill(Range(1,N)));
279
initializeRandomDouble(x.dataFirst(), N);
281
Array<float,1> afill(N+1);
282
Array<float,1> a(afill(Range(1,N)));
283
initializeRandomDouble(a.dataFirst(), N);
285
Array<float,1> bfill(N+1);
286
Array<float,1> b(bfill(Range(1,N)));
287
initializeRandomDouble(b.dataFirst(), N);
291
for (long i=0; i < iters; ++i)
298
bench.startOverhead();
299
for (long i=0; i < iters; ++i) {
303
bench.stopOverhead();
306
bench.endImplementation();
309
void ArrayVersion_misaligned(BenchmarkExt<int>& bench)
311
bench.beginImplementation("Array<T,1> (misal.)");
313
while (!bench.doneImplementationBenchmark())
315
int N = bench.getParameter();
316
long iters = bench.getIterations();
318
cout << bench.currentImplementation() << ": N = " << N << endl;
321
Array<float,1> yfill(N+4);
322
Array<float,1> y(yfill(Range(0,N+0-1)));
323
initializeRandomDouble(y.dataFirst(), N);
325
Array<float,1> xfill(N+4);
326
Array<float,1> x(xfill(Range(1,N+1-1)));
327
initializeRandomDouble(x.dataFirst(), N);
329
Array<float,1> afill(N+4);
330
Array<float,1> a(afill(Range(2,N+2-1)));
331
initializeRandomDouble(a.dataFirst(), N);
333
Array<float,1> bfill(N+4);
334
Array<float,1> b(bfill(Range(3,N+3-1)));
335
initializeRandomDouble(b.dataFirst(), N);
339
for (long i=0; i < iters; ++i)
346
bench.startOverhead();
347
for (long i=0; i < iters; ++i) {
351
bench.stopOverhead();
354
bench.endImplementation();
357
#ifdef BENCHMARK_VALARRAY
358
void ValarrayVersion(BenchmarkExt<int>& bench)
360
bench.beginImplementation("valarray<T>");
362
while (!bench.doneImplementationBenchmark())
364
int N = bench.getParameter();
365
cout << bench.currentImplementation() << ": N = " << N << endl;
367
long iters = bench.getIterations();
369
valarray<float> y(N);
370
initializeRandomDouble(y, N);
371
valarray<float> x(N);
372
initializeRandomDouble(x, N);
373
valarray<float> a(N);
374
initializeRandomDouble(a, N);
375
valarray<float> b(N);
376
initializeRandomDouble(b, N);
380
for (long i=0; i < iters; ++i)
387
bench.startOverhead();
388
for (long i=0; i < iters; ++i) {
391
bench.stopOverhead();
394
bench.endImplementation();
398
void F77Version(BenchmarkExt<int>& bench)
400
bench.beginImplementation("Fortran 77");
402
while (!bench.doneImplementationBenchmark())
404
int N = bench.getParameter();
405
cout << bench.currentImplementation() << ": N = " << N << endl;
407
int iters = bench.getIterations();
409
float* y = new float[N];
410
initializeRandomDouble(y, N);
411
float* x = new float[N];
412
initializeRandomDouble(x, N);
413
float* a = new float[N];
414
initializeRandomDouble(a, N);
415
float* b = new float[N];
416
initializeRandomDouble(b, N);
420
for (int iter=0; iter < iters; ++iter)
421
floop14_f77(N, y, x, a, b);
424
bench.startOverhead();
425
for (int iter=0; iter < iters; ++iter)
426
floop14_f77overhead(N, y, x, a, b);
428
bench.stopOverhead();
437
bench.endImplementation();
441
void F90Version(BenchmarkExt<int>& bench)
443
bench.beginImplementation("Fortran 90");
445
while (!bench.doneImplementationBenchmark())
447
int N = bench.getParameter();
448
cout << bench.currentImplementation() << ": N = " << N << endl;
450
int iters = bench.getIterations();
452
float* y = new float[N];
453
initializeRandomDouble(y, N);
454
float* x = new float[N];
455
initializeRandomDouble(x, N);
456
float* a = new float[N];
457
initializeRandomDouble(a, N);
458
float* b = new float[N];
459
initializeRandomDouble(b, N);
463
for (int iter=0; iter < iters; ++iter)
464
floop14_f90(N, y, x, a, b);
467
bench.startOverhead();
468
for (int iter=0; iter < iters; ++iter)
469
floop14_f90overhead(N, y, x, a, b);
471
bench.stopOverhead();
479
bench.endImplementation();