2
* Automatically Tuned Linear Algebra Software v3.8.4
3
* (C) Copyright 1999 R. Clint Whaley
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions, and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
* 3. The name of the ATLAS group or the names of its contributers may
14
* not be used to endorse or promote products derived from this
15
* software without specific written permission.
17
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
21
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
* POSSIBILITY OF SUCH DAMAGE.
31
#include "atlas_misc.h"
32
#include "atlas_lvl2.h"
33
#include "atlas_fopen.h"
35
int FAx=0, MAx=0, FAy=0, MAy=0, FAa=0, MAa=0;
43
struct FA_allocs *next;
46
struct FA_allocs *NewAlloc(size_t size, struct FA_allocs *next,
47
int align, int misalign)
49
* Allocates size allocation that is aligned to [align], but not aligned
50
* to [misalign]. Therefore, misalign > align. Align must minimally be sizeof
51
* while misalign may be 0 if we don't need to avoid a particular alignment.
58
const int malign = align >= misalign ? align : misalign;
60
n = size + align + malign;
64
cp = malloc(n + sizeof(struct FA_allocs));
66
ap = (struct FA_allocs *) (cp + n);
69
* Align to min alignment
71
ap->memA = align ? (void*) ((((size_t) cp)/align)*align + align) : cp;
73
* Misalign to misalign
77
if (((size_t)ap->memA)%misalign == 0)
78
ap->memA = ((char*)ap->memA) + align;
85
* no-align malloc free retaining system default behavior
87
void *NA_malloc(size_t size)
91
void *NA_calloc(size_t n, size_t size)
93
return(calloc(n, size));
95
void NA_free(void *ptr)
102
* malloc/free pair that aligns data to align, but not to misalign
104
void *FA_malloc(size_t size, int align, int misalign)
106
if ((!misalign && align <= 8) || !size)
107
return(malloc(size));
110
allocQ = NewAlloc(size, allocQ, align, misalign);
111
return(allocQ->memA);
114
void *FA_calloc(size_t n, size_t size, int align, int misalign)
122
cp = FA_malloc(tsize, align, misalign);
123
if (size == sizeof(int))
124
for (ip=(int*)cp,i=0; i < n; i++)
126
else if (size == sizeof(double))
127
for (dp=(double*)cp,i=0; i < n; i++)
130
for (i=0; i < tsize; i++)
135
void FA_free(void *ptr, int align, int misalign)
137
* Part of malloc/free pair that aligns data to FALIGN
140
struct FA_allocs *ap, *prev;
143
if ((!misalign && align <= 8))
147
for (ap=allocQ; ap && ap->memA != ptr; ap = ap->next) prev = ap;
150
fprintf(stderr, "Couldn't find mem=%ld\nmemQ=\n", ptr);
151
for (ap=allocQ; ap; ap = ap->next)
152
fprintf(stderr, " %ld, %ld\n", ap->memA, ap->mem);
156
allocQ = allocQ->next;
158
prev->next = ap->next;
164
#define ATL_NoBlock(iflag_) ( ((iflag_) | 32) == (iflag_) )
168
#define test_ger(M, N, alpha, X, incX, Y, incY, A, lda) \
169
Mjoin(PATL,ger)(M, N, alpha, X, incX, Y, incY, A, lda)
171
#define test_ger(M, N, alpha, X, incX, Y, incY, A, lda) \
172
Mjoin(PATL,geru)(M, N, alpha, X, incX, Y, incY, A, lda)
175
double gercase(const int MFLOP, const int M, const int N, const SCALAR alpha,
179
const int aincX = 1, aincY = 1;
180
const int incX = 1, incY = 1;
183
const double flops = 2.0 * M * N;
185
const double flops = 8.0 * M * N;
187
double ttest, mftest, t0;
188
const int inca = lda*N SHIFT, incx = M*incX SHIFT, incy = N*incY SHIFT;
189
TYPE *a, *A, *stA, *A0, *x, *X, *X0, *stX, *y, *Y, *Y0, *stY;
191
const TYPE nalpha = -alpha;
194
const TYPE *alp = alpha;
198
nalpha[0] = -alpha[0]; nalpha[1] = alpha[1];
201
i = (ATL_DivBySize(L2SIZE)+M-1)/M;
203
X0 = X = x = FA_malloc(ATL_MulBySize(lx), FAx, MAx);
204
if (x == NULL) return(-1);
206
i = (ATL_DivBySize(L2SIZE)+N-1)/N;
208
Y0 = Y = y = FA_malloc(ATL_MulBySize(ly), FAy, MAy);
211
FA_free(x, FAx, MAx);
214
i = (ATL_DivBySize(L2SIZE)+M*N)/(M*N);
216
A0 = A = a = FA_malloc(ATL_MulBySize(la), FAa, MAa);
219
FA_free(x, FAy, MAy);
220
FA_free(y, FAy, MAy);
226
x = X = x + (lx SHIFT);
228
else stX = x + (lx SHIFT);
232
y = Y = y + (ly SHIFT);
234
else stY = y + (ly SHIFT);
235
stA = a + (la SHIFT);
237
reps = (MFLOP * 1000000.0) / flops;
238
if (reps < 1) reps = 1;
239
Mjoin(PATL,gegen)(ly, 1, Y0, ly, M*incY);
240
Mjoin(PATL,gegen)(lx, 1, X0, lx, N*incY+127*50+77);
241
Mjoin(PATL,gegen)(la, 1, A0, la, N*M+513*7+90);
246
test_ger(M, N, alp, x, incX, y, incY, A0, lda);
255
if (alp == alpha) alp = nalpha;
259
ttest = time00() - t0;
261
if (ttest > 0.0) mftest = (reps * flops) / (1000000.0 * ttest);
264
FA_free(A0, FAa, MAa);
265
FA_free(X0, FAx, MAx);
266
FA_free(Y0, FAy, MAy);
270
void PrintUsage(char *nam)
272
fprintf(stderr, "USAGE: %s -C <case #> -l <l1mul> -F <mflop> -m <M> -n <N> -f <iflag> -a <alpha> -o <outfile>\n\n", nam);
276
void GetFlags(int nargs, char **args, char *pre, int *MFLOP,
277
int *M, int *N, TYPE *alpha, int *lda, char *outnam)
280
int i, k, cas, iflag=0;
295
*MFLOP = (ATL_nkflop) / 2000.0;
296
if (*MFLOP < 1) *MFLOP = 1;
300
*lda = *M = *N = 1000;
303
alpha[1] = ATL_rzero;
306
for (i=1; i < nargs; i++)
308
if (args[i][0] != '-') PrintUsage(args[0]);
312
cas = atoi(args[++i]);
314
case 'F': /* mflops */
340
case 'f': /* iflag */
341
iflag = atoi(args[++i]);
344
*M = atoi(args[++i]);
347
*N = atoi(args[++i]);
350
l1mul = atoi(args[++i]);
353
*alpha = atof(args[++i]);
355
alpha[1] = atof(args[++i]);
359
strcpy(outnam, args[++i]);
365
if (outnam[0] == '\0')
367
if (ATL_NoBlock(iflag))
368
sprintf(outnam, "res/%cger1_%d_0", *pre, cas);
369
else sprintf(outnam, "res/%cger1_%d_%d", *pre, cas, l1mul);
373
main(int nargs, char **args)
376
int MFLOP, M, N, lda, cas, i;
385
GetFlags(nargs, args, &pre, &MFLOP, &M, &N, SADD alpha, &lda, fnam);
387
if (!FileExists(fnam))
389
fp = fopen(fnam, "w");
391
for (i=0; i < 3; i++)
393
mf = gercase(MFLOP, M, N, alpha, lda);
394
fprintf(stdout, " %s : %f MFLOPS\n", fnam, mf);
395
fprintf(fp, "%lf\n", mf);
401
fp = fopen(fnam, "r");
402
for (i=0; i < 3; i++) ATL_assert(fscanf(fp, " %lf", &mfs[i]) == 1);
405
mf = (mfs[0] + mfs[1] + mfs[2]) / 3.0;
406
fprintf(stdout, " %s : %.2f MFLOPS\n", fnam, mf);