2
* Automatically Tuned Linear Algebra Software v3.10.1
3
* Copyright (C) 2010 R. Clint Whaley
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions, and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
* 3. The name of the ATLAS group or the names of its contributers may
14
* not be used to endorse or promote products derived from this
15
* software without specific written permission.
17
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
21
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
* POSSIBILITY OF SUCH DAMAGE.
34
#include "atlas_misc.h"
35
#include "atlas_tst.h"
36
#include "atlas_lvl2.h"
37
#include "atlas_level1.h"
38
#include "atlas_genparse.h"
39
#include "atlas_gentesttime.h"
42
#if defined(__MINGW32__) || defined(__MINGW64__)
44
int slashdrivesub(char *ln)
46
* replaces \\c\ with c:\, returns change in string length
47
* this version required for older cygwins
50
char *sp, *lp=ln, ctmp;
54
sp = strstr(lp, "\\\\");
55
if (sp && strlen(sp) > 3)
57
if (sp[2] == 'a' || sp[2] == 'b' || sp[2] == 'c' || sp[2] == 'd' ||
58
sp[2] == 'e' || sp[2] == 'f' || sp[2] == 'g' || sp[2] == 'h')
66
for (lp=sp+3; *lp = lp[1]; lp++);
80
int cygdrivesub(char *ln)
82
* replaces \cygdrive\c\ with c:\, returns change in string length
83
* this version works cygnus version 1.1.0
89
while(sp = strstr(ln, "\\cygdrive\\"))
96
while (*sp = sp[9]) sp++;
98
return( slashdrivesub(ln) - (i*9) );
101
void slashsub(char *ln)
103
* changes forward slash of unix to backslash of windoze
107
for (i=0; ln[i]; i++) if (ln[i] == '/') ln[i] = '\\';
112
static double GetTime
114
enum ATLAS_UPLO Uplo,/* which triangle? */
115
int verb, /* verbosity */
116
int nreps, /* number of reps to do for one timing sample */
117
size_t flushelts, /* size of area to flush to avoid cache reuse */
118
ATL_CINT N, /* matrix size */
119
int NX /* what to set ref/kernel crossover to */
122
size_t setsz, nsets, accsz, Nt;
125
TYPE *tp, *x, *y, *a;
127
const TYPE one[2] = {ATL_rone, ATL_rzero};
129
const TYPE one = ATL_rone;
134
accsz = 1*N + ((size_t)N)*(N>>1);
135
setsz = 1*N + ((size_t)N)*N;
136
nsets = (flushelts + accsz-1)/accsz;
137
nsets = (nsets) ? nsets : 1;
139
tp = vp = malloc(ATL_MulBySize(Nt));
141
Mjoin(PATL,gegen)(Nt, 1, tp, Nt, N+127*37);
143
for (j=0, i=nreps; i; i--)
148
Mjoin(PATL,her)(Uplo, N, ATL_rone, x, 1, a, N);
150
Mjoin(PATL,syr)(Uplo, N, ATL_rone, x, 1, a, N);
156
t1 = (t1 - t0) / nreps;
160
static double GetTimes
162
enum ATLAS_UPLO Uplo,/* which triangle? */
163
int verb, /* verbosity */
164
int nsample, /* number of samples to take */
165
int nreps, /* number of reps to do for one timing sample */
166
size_t flushelts, /* size of area to flush to avoid cache reuse */
167
ATL_CINT N, /* matrix size */
168
int NX /* what to set ref/kernel crossover to */
174
times = malloc(nsample*sizeof(double));
176
for (i=0; i < nsample; i++)
178
times[i] = GetTime(Uplo, verb, nreps, flushelts, N, NX);
180
printf(" %d: %e\n", i, times[i]);
182
SortDoubles(nsample, times);
186
i = (nsample > 1) ? (nsample>>1)+1 : 0;
191
printf(" RETURNING TIME: %e\n", t0);
198
enum ATLAS_UPLO Uplo,/* which triangle? */
199
int verb, /* verbosity */
200
int nsample, /* number of samples to take */
201
int nreps, /* number of reps to do for one timing sample */
202
size_t flushelts, /* size of area to flush to avoid cache reuse */
203
ATL_CINT N /* matrix size */
206
double t0, tL, tN; /* 0, Last, Next */
207
double tB, tE, tM; /* beginning, end, middle timings */
211
t0 = GetTimes(Uplo, verb, nsample, nreps, flushelts, N, N);
212
printf("\n Time for N=NX=%d : %e\n", N, t0);
213
printf(" N NX %% of N=%2d\n", NX0);
214
printf("====== ==== ===========\n");
216
* Now halve NX until performance stops increasing
223
tN = GetTimes(Uplo, verb, nsample, nreps, flushelts, N, nN);
224
printf("%6d %4d %11.2f\n", N, nN, (tN/t0)*100.0);
225
if (tN > tL) break; /* stop if new time longer than last */
229
while (nN); /* stop if NX = 0 */
238
nM = nB + ((nE-nB)>>1);
239
nM = (nM>>3) << 3; /* keep mul of 8 for alignment, etc */
240
tM = GetTimes(Uplo, verb, nsample, nreps, flushelts, N, nM);
241
printf("%6d %4d %11.2f\n", N, nM, (tM/t0)*100.0);
253
if (tE < tB && tE < tM)
258
else if (tB < tE && tB < tM)
263
printf("NX selected as %d (%.2f%%)!\n", nM, (tM/t0)*100.0);
267
void PrintUsage(char *name, int ierr, char *flag)
270
fprintf(stderr, "Bad argument #%d: '%s'\n",
271
ierr, flag ? flag : "Not enough arguments");
273
fprintf(stderr, "ERROR: %s\n", flag);
275
fprintf(stderr, "USAGE: %s [flags]:\n", name);
276
fprintf(stderr, " -U <u/l>\n");
277
fprintf(stderr, " -n <N>\n");
278
fprintf(stderr, " -r <reps>\n");
279
fprintf(stderr, " -s <nsample>\n");
280
fprintf(stderr, " -v <verb>\n");
281
fprintf(stderr, " -C <flushKB>\n");
282
fprintf(stderr, " -o <outfile>\n");
283
exit(ierr ? ierr : -1);
286
int GetFlags(int nargs, char **args, enum ATLAS_UPLO *Uplo, int *verb,
287
int *nsample, int *nreps, size_t *flushelts, char **outfile)
299
for (i=1; i < nargs; i++)
301
if (args[i][0] != '-')
302
PrintUsage(args[0], i, "No '-' preceeding flag!");
307
PrintUsage(args[0], i-1, "out of flags in -s ");
308
*nsample = atoi(args[i]);
312
PrintUsage(args[0], i-1, "out of flags in -v ");
313
*verb = atoi(args[i]);
317
PrintUsage(args[0], i-1, "out of flags in -r ");
318
*nreps = atoi(args[i]);
322
PrintUsage(args[0], i-1, "out of flags in -n ");
327
PrintUsage(args[0], i-1, "out of flags in -C) ");
329
*Uplo = (ch == 'l' || ch == 'L') ? AtlasLower : AtlasUpper;
333
PrintUsage(args[0], i-1, "out of flags in -C) ");
334
*flushelts = atoll(args[i])*1024;
338
PrintUsage(args[0], i-1, "out of flags in -C) ");
342
PrintUsage(args[0], i, args[i]);
347
*outfile = DupString(of);
348
#if defined(__MINGW32__) || defined(__MINGW64__)
350
cygdrivesub(*outfile);
355
of = malloc(sizeof(char)*32);
357
#if defined(__MINGW32__) || defined(__MINGW64__)
358
sprintf(of, "res\atlas_%ssyrNX.h", Mstr(PRE));
360
sprintf(of, "res/atlas_%ssyrNX.h", Mstr(PRE));
367
void GenIncFile(char *outfile, int NX)
370
fpout = fopen(outfile, "w");
372
fprintf(fpout, "#ifndef ATLAS_%sSYR_H\n #define ATLAS_%sSYR_H\n",
373
Mstr(PREU), Mstr(PREU));
374
fprintf(fpout, " #define ATL_S1NX %d\n#endif\n", NX);
378
int main(int nargs, char **args)
382
int N, verb, nsample, nreps, NX;
383
enum ATLAS_UPLO Uplo;
385
N = GetFlags(nargs, args, &Uplo, &verb, &nsample, &nreps, &flushelts,
387
NX = RecDoubleNX(Uplo, verb, nsample, nreps, flushelts, N);
388
GenIncFile(outfile, NX);
390
printf("\nNX=%d!\n", NX);