1
#ifndef ATLAS_MVTESTTIME_H
2
#define ATLAS_MVTESTTIME_H
4
#include "atlas_mvparse.h"
5
#include "atlas_gentesttime.h"
7
static int SprintAlignStr
9
char pre, /* precision modifier */
10
ATL_mvnode_t *kp, /* kernel pointer */
11
char *str /* (OUTPUT) string to print to */
14
* Prints alignment string to str, based on alignment setting.
15
* This routine assumes to force the alignment to sizeof(TYPE) unless
16
* restrictions are enabled.
17
* RETURNS: number of chars added to str
20
const int size = (pre == 'd' || pre == 's') ? pre2size(pre)
22
int alignA, alignX, alignY, i;
24
alignA = (kp->alignA) ? kp->alignA : size;
25
alignX = (kp->alignX) ? kp->alignX : size;
26
alignY = (kp->alignY) ? kp->alignY : size;
28
* If we are doing an AXPY-based No-Trans GEMV, it is Y, not X that must
29
* be aligned to A. Therefore, alignX/ALIGNX2A actually refer to Y, and
30
* alignY refers to X. Do this confusing transpose for the non-trans case.
32
if (kp->TA == AtlasNoTrans) /* ALIGNX* affects Y for No-Trans */
37
if (FLAG_IS_SET(kp->flag, MVF_ALIGNX2A))
38
alignY = alignA; /* ALIGNX2A overrides alignY */
40
else if (FLAG_IS_SET(kp->flag, MVF_ALIGNX2A))
43
* If ALIGNX2A, we must force to vectors to have the same remainder when
44
* divided by the vector length. We do this by insisting they have the
45
* the same modulo by ATL_Cachelen, which by definition is always a
46
* multiple of the vector length (eg, veclen=16/32 (SSE/AVX), ATL_cl=32).
48
if (FLAG_IS_SET(kp->flag, MVF_ALIGNX2A))
50
int myalign = ATL_Cachelen - size;
53
myalign = ((ATL_Cachelen-size)/alignA)*alignA;
57
if (kp->TA == AtlasNoTrans) /* ALIGNX* affects Y for No-Trans */
59
if (myalign < ATL_Cachelen)
60
i = sprintf(str, " align=\"-Fa %d -Fa -%d -Fy %d -Fy -%d",
61
myalign, ATL_Cachelen, myalign, ATL_Cachelen);
63
i = sprintf(str, " align=\"-Fa %d -Fy %d", myalign, myalign);
64
if (2*alignX <= ATL_Cachelen)
65
i += sprintf(str+i, " -Fx %d -Fx -%d\"", alignX, 2*alignX);
67
i += sprintf(str+i, " -Fx %d\"", alignX);
71
if (myalign < ATL_Cachelen)
72
i = sprintf(str, " align=\"-Fa %d -Fa -%d -Fx %d -Fx -%d",
73
myalign, ATL_Cachelen, myalign, ATL_Cachelen);
75
i = sprintf(str, " align=\"-Fa %d -Fx %d", myalign, myalign);
77
if (2*alignY <= ATL_Cachelen)
78
i += sprintf(str+i, " -Fy %d -Fy -%d\"", alignY, 2*alignY);
80
i += sprintf(str+i, " -Fy %d\"", alignY);
85
if (2*alignA <= ATL_Cachelen)
86
i = sprintf(str, " align=\"-Fa %d -Fa -%d", alignA, 2*alignA);
88
i = sprintf(str, " align=\"-Fa %d ", alignA);
89
if (2*alignX <= ATL_Cachelen)
90
i += sprintf(str+i, " -Fx %d -Fx -%d", alignX, 2*alignX);
92
i += sprintf(str+i, " -Fx %d", alignX);
93
if (2*alignY <= ATL_Cachelen)
94
i += sprintf(str+i, " -Fy %d -Fy -%d\"", alignY, 2*alignY);
96
i += sprintf(str+i, " -Fy %d\"", alignY);
102
static int MVKernelFailsTest
103
(int verb, char pre, ATL_INT M, ATL_INT N, ATL_INT lda, ATL_mvnode_t *kn)
108
static char outnam[L_tmpnam];
109
static int FirstTime=1;
115
assert(tmpnam(outnam));
118
* If the file is generated, call generator to create it
122
i = sprintf(ln, "%s", kn->genstr);
124
i += sprintf(ln+i, " > %s 2>&1\n", outnam);
127
fprintf(stderr, "ERROR, LINE %d of %s\n", __LINE__, __FILE__);
128
fprintf(stderr, "UNABLE TO GENERATE WITH COMMAND: %s\n", kn->genstr);
131
fprintf(stderr, "\nOUTPUT OF system():\n");
132
sprintf(ln, "cat %s 1>&2\n", outnam);
140
assert (M >= kn->minM);
141
assert (N >= kn->minN);
142
sp = (kn->TA == AtlasNoTrans) ? "mvn" : "mvt";
143
if (kn->TA == AtlasNoTrans)
144
i = sprintf(ln, "make %cmvnktest mvnrout=%s", pre, kn->rout);
146
i = sprintf(ln, "make %cmvtktest mvtrout=%s", pre, kn->rout);
147
i += SprintAlignStr(pre, kn, ln+i);
148
if (FLAG_IS_SET(kn->flag, MVF_FNU))
149
i += sprintf(ln+i, " Nt=%d ", (1008/kn->NU)*kn->NU);
151
i += sprintf(ln+i, " %s", kn->exflags);
152
if (1) /* NOTE: replace with test on restrict or not! */
153
i += sprintf(ln+i, " incy=1");
155
i += sprintf(ln+i, " %cMVCC=\"%s\"", pre, kn->comp);
157
i += sprintf(ln+i, " %cMVFLAGS=\"%s\"", pre, kn->cflags);
158
i += sprintf(ln+i, " Mt=%d Nt=%d ldat=%d", M, N, lda);
160
i += sprintf(ln+i, " > %s 2>&1\n", outnam);
162
i += sprintf(ln+i, "\n");
164
fprintf(stdout, "system call:%s\n", ln);
170
fprintf(stderr, "\n%s(ID=%d) FAILS TESTER!!\n", kn->rout,kn->ID);
171
fprintf(stderr, "FAILING CALL: '%s'\n", ln);
172
if (verb < 3 && verb > 0)
175
fprintf(stderr, "\nOUTPUT OF system():\n");
176
sprintf(ln, "cat %s 1>&2\n", outnam);
181
fprintf(stderr, "%s(ID=%d) *PASSES* TESTER!!\n", kn->rout,kn->ID);
190
static char *GetResIdStr(ATL_mvnode_t *r1p, ATL_INT M, ATL_INT N,
191
ATL_INT lda, ATL_INT percL1, int mflop)
194
* Return filename suffix that disambiguates most kernels:
195
* <ID><TA>_<M>x<N>_<lda>-<ldamul>_<MU>x<NU>_<percL1>_a<alignA>x<aX>x<aY>_<flag>
198
sprintf(ln, "%d%c_%dx%d_%d-%d_%dx%d_%d_a%dx%dx%d_%d", r1p->ID,
199
(r1p->TA == AtlasNoTrans) ? 'N' : 'T', M, N, lda, r1p->ldamul,
200
r1p->MU, r1p->NU, percL1, r1p->alignA, r1p->alignX, r1p->alignY,
206
static double TimeMVKernel
207
(int verb, /* 0: no output, 1 min ouput, 2: full output */
208
int FORCETIME, /* if nonzero, ignore existing timing file */
209
/* if negative, don't retain timing file */
210
ATL_mvnode_t *r1p, /* ptr to kernel structure */
211
char pre, /* precision prefix */
212
ATL_INT M, ATL_INT N, /* dimensions to time */
213
ATL_INT lda, /* stride between row elements */
214
ATL_INT percL1, /* if 0, time kernel directly wt no blocking */
215
/* if non-zero, block for that % of L1 cache size */
216
int nrep, /* if >=1, # of trials, else use default (3) */
217
int mflop, /* force mflop flops in each timing interval */
218
int cflush /* if >= 0, size of cache flush area, else ignored */
221
char ln[2048], resf[256], *sp;
223
int i, align = pre2size(pre);
224
static char outnam[L_tmpnam];
225
static int FirstTime=1;
231
assert(tmpnam(outnam));
234
* If the file is generated, call generator to create it
238
i = sprintf(ln, "%s", r1p->genstr);
240
i += sprintf(ln+i, " > %s 2>&1\n", outnam);
243
fprintf(stderr, "ERROR, LINE %d of %s\n", __LINE__, __FILE__);
244
fprintf(stderr, "UNABLE TO GENERATE WITH COMMAND: %s\n", r1p->genstr);
248
fprintf(stderr, "\nOUTPUT OF system():\n");
249
sprintf(ln, "cat %s 1>&2\n", outnam);
257
N = Mmax(N, r1p->minN);
260
M = Mmax(M, r1p->minM);
264
if (FLAG_IS_SET(r1p->flag, MVF_FNU))
265
N = Mmax(r1p->NU, (N/r1p->NU)*r1p->NU);
266
i = r1p->ldamul / pre2size(pre);
267
lda = (i) ? ((lda+i-1)/i)*i : lda;
270
sprintf(resf, "res/%cmvtmp", pre);
272
sprintf(resf, "res/%cmv%s", pre,
273
GetResIdStr(r1p, M, N, lda, percL1, mflop));
276
dp = FORCETIME ? NULL : ReadResultsFile(0, nrep, resf);
280
fprintf(stdout, " %d:%s (M=%d, N=%d, lda=%d) gets %.2f MFLOPS\n",
281
r1p->ID, r1p->rout, M, N, lda, *dp);
285
sp = (r1p->TA == AtlasNoTrans || r1p->TA == AtlasConj) ? "mvn" : "mvt";
287
i = sprintf(ln, "make %c%stime M=%d N=%d lda=%d l1mul=%d %srout=\"%s\"",
288
pre, sp, M, N, lda, percL1, sp, r1p->rout);
290
i = sprintf(ln, "make %c%sktime M=%d N=%d lda=%d %srout=\"%s\"",
291
pre, sp, M, N, lda, sp, r1p->rout);
293
i += sprintf(ln+i, " iflag=%d", r1p->flag);
295
i += sprintf(ln+i, " %s", r1p->exflags);
297
i += sprintf(ln+i, " %cMVCC=\"%s\"", pre, r1p->comp);
299
i += sprintf(ln+i, " %cMVFLAGS=\"%s\"", pre, r1p->cflags);
300
i += SprintAlignStr(pre, r1p, ln+i);
302
i += sprintf(ln+i, " flushKB=%d", cflush);
303
i += sprintf(ln+i, " tflags=\"-f %s", resf);
305
i += sprintf(ln+i, " -# %d", nrep);
308
i += sprintf(ln+i, " -F %d", mflop);
309
i += sprintf(ln+i, "\"");
310
i += sprintf(ln+i, " mu=%d nu=%d", r1p->MU, r1p->NU);
312
i += sprintf(ln+i, " > %s 2>&1\n", outnam);
314
i += sprintf(ln+i, "\n");
318
fprintf(stderr, "\nERROR %d, LINE %d OF %s\n", i, __LINE__, __FILE__);
319
fprintf(stderr, "SYSTEM CALL FAILED: %s\n", ln);
323
fprintf(stderr, "\nOUTPUT OF system():\n");
324
sprintf(ln, "cat %s 1>&2\n", outnam);
334
dp = ReadResultsFile(1, nrep, resf);
335
mf = PrintResultsFromFile(stdout, dp);
340
dp = ReadResultsFile(0, nrep, resf);
343
fprintf(stdout, " %d:%s (M=%d, N=%d, lda=%d) gets %.2f MFLOPS\n",
344
r1p->ID, r1p->rout, M, N, lda, *dp);
348
static void FillInMVExtractGenStrings(char pre, ATL_mvnode_t *kb)
350
* Creates generator strings to match kb settings
353
char ln[4096], *suff;
355
if (pre != 'd' && pre != 'c')
356
CL = (pre == 'z') ? 4 : 16;
359
if (kb->ID < 900000 || kb->ID >= 1000000)
364
if (kb->asmbits == asmNames2bitfield("GAS_x8664"))
366
assert(kb->MU%CL == 0);
378
if (kb->TA == AtlasNoTrans)
379
i = sprintf(ln, "make %cmvnext_%s order=clmajor mu=%d nu=%d", pre,
382
i = sprintf(ln, "make %cmvtext_%s order=clmajor mu=%d nu=%d", pre,
384
if (kb->alignA && kb->alignA%16==0 && kb->ldamul && kb->ldamul%16==0)
385
i += sprintf(ln+i, " genflags=\"-def ALIGNED 1\"");
389
kb->genstr = DupString(ln);
393
#endif /* end guard around atlas_mvtesttime.h */