1
#ifndef ATLAS_R1TESTTIME_H
2
#define ATLAS_R1TESTTIME_H
4
#include "atlas_r1parse.h"
5
#include "atlas_gentesttime.h"
7
static int SprintAlignStr
9
char pre, /* precision modifier */
10
ATL_r1node_t *kp, /* kernel pointer */
11
char *str /* (OUTPUT) string to print to */
14
* Prints alignment string to str, based on alignment setting.
15
* This routine assumes to force the alignment to sizeof(TYPE) unless
16
* restrictions are enabled.
17
* RETURNS: number of chars added to str
20
const int size = (pre == 'd' || pre == 's') ? pre2size(pre)
22
int alignA, alignX, alignY, i;
24
alignA = (kp->alignA) ? kp->alignA : size;
25
alignX = (kp->alignX) ? kp->alignX : size;
26
alignY = (kp->alignY) ? kp->alignY : size;
27
if (FLAG_IS_SET(kp->flag, R1F_ALIGNX2A))
28
alignX = alignA; /* ALIGNX2A overrides alignX */
30
* If ALIGNX2A, we must force to vectors to have the same remainder when
31
* divided by the vector length. We do this by insisting they have the
32
* the same modulo by ATL_Cachelen, which by definition is always a
33
* multiple of the vector length (eg, veclen=16/32 (SSE/AVX), ATL_cl=32).
35
if (FLAG_IS_SET(kp->flag, R1F_ALIGNX2A))
37
int myalign = ATL_Cachelen - size;
40
myalign = ((ATL_Cachelen-size)/alignA)*alignA;
44
if (myalign < ATL_Cachelen)
45
i = sprintf(str, " align=\"-Fa %d -Fa -%d -Fx %d -Fx -%d",
46
myalign, ATL_Cachelen, myalign, ATL_Cachelen);
48
i = sprintf(str, " align=\"-Fa %d -Fx %d", myalign, myalign);
49
if (2*alignY <= ATL_Cachelen)
50
i += sprintf(str+i, " -Fy %d -Fy -%d\"", alignY, 2*alignY);
52
i += sprintf(str+i, " -Fy %d\"", alignY);
56
if (2*alignA <= ATL_Cachelen)
57
i = sprintf(str, " align=\"-Fa %d -Fa -%d", alignA, 2*alignA);
59
i = sprintf(str, " align=\"-Fa %d ", alignA);
60
if (2*alignX <= ATL_Cachelen)
61
i += sprintf(str+i, " -Fx %d -Fx -%d", alignX, 2*alignX);
63
i += sprintf(str+i, " -Fx %d", alignX);
64
if (2*alignY <= ATL_Cachelen)
65
i += sprintf(str+i, " -Fy %d -Fy -%d\"", alignY, 2*alignY);
67
i += sprintf(str+i, " -Fy %d\"", alignY);
73
static int R1KernelFailsTest
74
(int verb, char pre, ATL_INT M, ATL_INT N, ATL_INT lda, ATL_r1node_t *kn)
79
static char outnam[L_tmpnam];
80
static int FirstTime=1;
86
assert(tmpnam(outnam));
89
* If the file is generated, call generator to create it
93
i = sprintf(ln, "%s", kn->genstr);
95
i += sprintf(ln+i, " > %s 2>&1\n", outnam);
98
fprintf(stderr, "ERROR, LINE %d of %s\n", __LINE__, __FILE__);
99
fprintf(stderr, "UNABLE TO GENERATE WITH COMMAND: %s\n", kn->genstr);
102
fprintf(stderr, "\nOUTPUT OF system():\n");
103
sprintf(ln, "cat %s 1>&2\n", outnam);
111
assert (M >= kn->minM);
112
assert (N >= kn->minN);
113
i = sprintf(ln, "make %cr1ktest r1rout=%s", pre, kn->rout);
114
i += SprintAlignStr(pre, kn, ln+i);
115
if (FLAG_IS_SET(kn->flag, R1F_FNU))
116
i += sprintf(ln+i, " Nt=%d ", (1008/kn->NU)*kn->NU);
118
i += sprintf(ln+i, " %s", kn->exflags);
119
if (1) /* NOTE: replace with test on restrict or not! */
120
i += sprintf(ln+i, " incy=1");
122
i += sprintf(ln+i, " %cR1CC=\"%s\"", pre, kn->comp);
124
i += sprintf(ln+i, " %cR1CFLAGS=\"%s\"", pre, kn->cflags);
125
i += sprintf(ln+i, " Mt=%d Nt=%d ldat=%d", M, N, lda);
127
i += sprintf(ln+i, " > %s 2>&1\n", outnam);
129
i += sprintf(ln+i, "\n");
131
fprintf(stdout, "system call:%s\n", ln);
137
fprintf(stderr, "\n%s(ID=%d) FAILS TESTER!!\n", kn->rout,kn->ID);
138
fprintf(stderr, "FAILING CALL: '%s'\n", ln);
139
if (verb < 3 && verb > 0)
142
fprintf(stderr, "\nOUTPUT OF system():\n");
143
sprintf(ln, "cat %s 1>&2\n", outnam);
148
fprintf(stderr, "%s(ID=%d) *PASSES* TESTER!!\n", kn->rout,kn->ID);
157
static char *GetResIdStr(ATL_r1node_t *r1p, ATL_INT M, ATL_INT N,
158
ATL_INT lda, ATL_INT percL1, int mflop)
161
* Return filename suffix that disambiguates most kernels:
162
* <ID><TA>_<M>x<N>_<lda>-<ldamul>_<MU>x<NU>_<percL1>_a<alignA>x<aX>x<aY>_<flag>
165
sprintf(ln, "%d_%dx%d_%d-%d_%dx%d_%d_a%dx%dx%d_%d", r1p->ID,
166
M, N, lda, r1p->ldamul, r1p->MU, r1p->NU, percL1,
167
r1p->alignA, r1p->alignX, r1p->alignY, r1p->flag);
172
static double TimeR1Kernel
173
(int verb, /* 0: no output, 1 min ouput, 2: full output */
174
int FORCETIME, /* if nonzero, ignore existing timing file */
175
/* if negative, don't retain timing file */
176
ATL_r1node_t *r1p, /* ptr to kernel structure */
177
char pre, /* precision prefix */
178
ATL_INT M, ATL_INT N, /* dimensions to time */
179
ATL_INT lda, /* stride between row elements */
180
ATL_INT percL1, /* if 0, time kernel directly wt no blocking */
181
/* if non-zero, block for that % of L1 cache size */
182
int nrep, /* if >=1, # of trials, else use default (3) */
183
int mflop, /* force mflop flops in each timing interval */
184
int cflush /* if >= 0, size of cache flush area, else ignored */
187
char ln[2048], resf[256], *sp;
189
int i, align = pre2size(pre);
190
static char outnam[L_tmpnam];
191
static int FirstTime=1;
197
assert(tmpnam(outnam));
200
* If the file is generated, call generator to create it
204
i = sprintf(ln, "%s", r1p->genstr);
206
i += sprintf(ln+i, " > %s 2>&1\n", outnam);
209
fprintf(stderr, "ERROR, LINE %d of %s\n", __LINE__, __FILE__);
210
fprintf(stderr, "UNABLE TO GENERATE WITH COMMAND: %s\n", r1p->genstr);
214
fprintf(stderr, "\nOUTPUT OF system():\n");
215
sprintf(ln, "cat %s 1>&2\n", outnam);
223
N = Mmax(N, r1p->minN);
226
M = Mmax(M, r1p->minM);
230
if (FLAG_IS_SET(r1p->flag, R1F_FNU))
231
N = Mmax(r1p->NU, (N/r1p->NU)*r1p->NU);
232
i = r1p->ldamul / pre2size(pre);
233
lda = (i) ? ((lda+i-1)/i)*i : lda;
236
sprintf(resf, "res/%cr1tmp", pre);
238
sprintf(resf, "res/%cr1%s", pre,
239
GetResIdStr(r1p, M, N, lda, percL1, mflop));
242
dp = FORCETIME ? NULL : ReadResultsFile(0, nrep, resf);
246
fprintf(stdout, " %d:%s (M=%d, N=%d, lda=%d) gets %.2f MFLOPS\n",
247
r1p->ID, r1p->rout, M, N, lda, *dp);
252
i = sprintf(ln, "make %cr1time M=%d N=%d lda=%d l1mul=%d r1rout=\"%s\"",
253
pre, M, N, lda, percL1, r1p->rout);
255
i = sprintf(ln, "make %cr1ktime M=%d N=%d lda=%d r1rout=\"%s\"",
256
pre, M, N, lda, r1p->rout);
258
i += sprintf(ln+i, " iflag=%d", r1p->flag);
260
i += sprintf(ln+i, " %s", r1p->exflags);
262
i += sprintf(ln+i, " %cR1CC=\"%s\"", pre, r1p->comp);
264
i += sprintf(ln+i, " %cR1CFLAGS=\"%s\"", pre, r1p->cflags);
265
i += SprintAlignStr(pre, r1p, ln+i);
267
i += sprintf(ln+i, " flushKB=%d", cflush);
268
i += sprintf(ln+i, " tflags=\"-f %s", resf);
270
i += sprintf(ln+i, " -# %d", nrep);
273
i += sprintf(ln+i, " -F %d", mflop);
274
i += sprintf(ln+i, "\"");
275
i += sprintf(ln+i, " mu=%d nu=%d", r1p->MU, r1p->NU);
277
i += sprintf(ln+i, " > %s 2>&1\n", outnam);
279
i += sprintf(ln+i, "\n");
283
fprintf(stderr, "\nERROR %d, LINE %d OF %s\n", i, __LINE__, __FILE__);
284
fprintf(stderr, "SYSTEM CALL FAILED: %s\n", ln);
288
fprintf(stderr, "\nOUTPUT OF system():\n");
289
sprintf(ln, "cat %s 1>&2\n", outnam);
299
dp = ReadResultsFile(1, nrep, resf);
300
mf = PrintResultsFromFile(stdout, dp);
305
dp = ReadResultsFile(0, nrep, resf);
308
fprintf(stdout, " %d:%s (M=%d, N=%d, lda=%d) gets %.2f MFLOPS\n",
309
r1p->ID, r1p->rout, M, N, lda, *dp);
313
static void FillInR1ExtractGenStrings(char pre, ATL_r1node_t *kb)
315
* Creates generator strings to match kb settings
318
char ln[4096], *suff;
320
if (pre != 'd' && pre != 'c')
321
CL = (pre == 'z') ? 4 : 16;
324
if (kb->ID < 900000 || kb->ID >= 1000000)
329
if (kb->asmbits == asmNames2bitfield("GAS_x8664"))
331
assert(kb->MU%CL == 0);
343
i = sprintf(ln, "make %cr1ext_%s order=clmajor mu=%d nu=%d", pre,
345
if (kb->alignA && kb->alignA%16==0 && kb->ldamul && kb->ldamul%16==0)
346
i += sprintf(ln+i, " genflags=\"-def ALIGNED 1\"");
350
kb->genstr = DupString(ln);
354
#endif /* end guard around atlas_r1testtime.h */