41
43
ap->memA = align ? (void*) ((((size_t) cp)/align)*align + align) : cp;
43
45
* Misalign to misalign
46
* We often need to make sure to unaligned addresses share the same modulo
47
* so that they have the *same* degree of misalignment (so that their alignment
48
* can be fixed by simple peeling), and so in this case force the address
49
* modulo the misalign to be the exact align value.
47
if (((size_t)ap->memA)%misalign == 0)
48
ap->memA = ((char*)ap->memA) + align;
52
ap->memA = (void*)((((size_t)ap->memA)/malign)*malign + malign + align);
117
120
for (ap=allocQ; ap && ap->memA != ptr; ap = ap->next) prev = ap;
120
fprintf(stderr, "Couldn't find mem=%ld\nmemQ=\n", ptr);
123
fprintf(stderr, "Couldn't find mem=%ld\nmemQ=\n", (size_t)ptr);
121
124
for (ap=allocQ; ap; ap = ap->next)
122
fprintf(stderr, " %ld, %ld\n", ap->memA, ap->mem);
125
fprintf(stderr, " %ld, %ld\n", (size_t)ap->memA,
125
129
if (ap == allocQ)
198
202
#define TEST_DOT ATL_DOT
205
double DoOneTiming(int N, int nkflop, int cachesize, int incX, int incY)
207
* This method of timing can be used when we have a cycle-accurate timer
208
* available so we don't need to call routine multiple times to get above
213
TYPE TEST_DOT(const int N, const TYPE *X, const int incX,
214
TYPE *Y, const int incY);
217
void TEST_DOT(const int N, const TYPE *X, const int incX,
218
TYPE *Y, const int incY, TYPE *dotc);
222
double *W, dtmp, t0, t1;
224
const int FLBYADDR = (nkflop < 0);
227
Nx = (incX >= 0) ? incX : -incX;
229
X = FA_malloc(ATL_sizeof*Nx, FAx, MAx);
231
Ny = (incY >= 0) ? incY : -incY;
233
Y = FA_malloc(ATL_sizeof*Ny, FAy, MAy);
236
for (i=0; i < N; i++)
238
X[i*incX] = dumb_rand();
239
Y[i*incY] = dumb_rand();
245
ATL_flushCacheByAddr(Nx*ATL_sizeof, X);
246
ATL_flushCacheByAddr(Ny*ATL_sizeof, Y);
248
fprintf(stderr, "No flush by address!!\n");
254
dtmp = ATL_flushcache(cachesize);
255
dtmp += ATL_flushcache(-1);
259
dot = TEST_DOT(N, X, incX, Y, incY);
261
TEST_DOT(N, X, incX, Y, incY, dot);
265
FA_free(X, FAx, MAx);
266
FA_free(Y, FAy, MAy);
201
269
double DoTiming(int N, int nkflop, int cachesize, int incX, int incY)
268
336
for (i=0; i < nrep; i++)
339
tims[i] = DoOneTiming(N, nkflop, cachesize, incX, incY);
270
341
tims[i] = DoTiming(N, nkflop, cachesize, incX, incY);
271
342
fprintf(stdout, " N=%d, tim=%e\n", N, tims[i]);
528
599
FAy = sizeof(TYPE);
531
main(int nargs, char **args)
602
int main(int nargs, char **args)
534
605
int nN, nkflops, cachesize, incX, incY;
536
607
GetFlags(nargs, args, &nN, &Ns, &nkflops, &cachesize, &fout, &incX, &incY);
537
608
DoTimings(nN, Ns, nkflops, cachesize, fout, incX, incY);