2
* Automatically Tuned Linear Algebra Software v3.2
3
* (C) Copyright 1998 Jeff Horner
5
* Code contributers : Jeff Horner, R. Clint Whaley
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions, and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
* 3. The name of the University of Tennessee, the ATLAS group,
16
* or the names of its contributers may not be used to endorse
17
* or promote products derived from this software without specific
20
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE
24
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
* POSSIBILITY OF SUCH DAMAGE.
34
* =====================================================================
36
* =====================================================================
43
___main(){} __main(){} MAIN__(){} _MAIN_(){}
45
#define isdigit(ch_) ( ((ch_)=='0')||((ch_)=='1')||((ch_)=='2')|| \
46
((ch_)=='3')||((ch_)=='4')||((ch_)=='5')|| \
47
((ch_)=='6')||((ch_)=='7')||((ch_)=='8')|| \
54
#include "atlas_misc.h"
55
#include "atlas_tst.h"
57
* =====================================================================
58
* #define macro constants
59
* =====================================================================
61
#define MEGA 1000000.0
62
#if defined( SREAL ) || defined( SCPLX )
70
* =====================================================================
72
* =====================================================================
74
* The following and mutually exclusive macros allow to select various
75
* BLAS implementations to test the ATLAS implementation against:
77
* USE_F77_BLAS : Fortran 77 BLAS interface,
78
* USE_L3_REFERENCE : C ATLAS reference implementation,
80
* If none of these macros is defined at compile time, the ATLAS imple-
81
* mentation is to be tested against itself, after all this is the only
82
* version we are sure to have available.
84
* By default the mono-threaded ATLAS routines are tested. To test the
85
* multi-threaded ATLAS routines, define the following macro:
86
* USE_L3_PTHREADS : multi-threaded ATLAS implementation.
90
#ifdef ATL_USEPTHREADS
91
#define USE_L3_PTHREADS
95
* =====================================================================
97
#if defined( USE_F77_BLAS ) /* Trusted BLAS version to test against */
98
#define TP3 Mjoin( PATL, f77 )
99
#elif defined( USE_L3_REFERENCE )
100
#include "atlas_reflevel3.h"
101
#define TP3 Mjoin( PATL, ref )
102
#else /* defined( USE_L3_ATLAS ) */ /* use ATLAS itself !! (default) */
103
#include "atlas_level3.h"
107
#define trusted_gemm( TA, TB, M, N, K, al, A, lA, B, lB, be, C, lC) \
108
Mjoin( TP3, gemm )( TA, TB, M, N, K, al, A, lA, B, lB, be, C, lC)
110
#define trusted_hemm( SI, UP, M, N, al, A, lA, B, lB, be, C, lC) \
111
Mjoin( TP3, hemm )( SI, UP, M, N, al, A, lA, B, lB, be, C, lC)
112
#define trusted_herk( UP, TA, N, K, al, A, lA, be, C, lC) \
113
Mjoin( TP3, herk )( UP, TA, N, K, al, A, lA, be, C, lC)
114
#define trusted_her2k( UP, TA, N, K, al, A, lA, B, lB, be, C, lC) \
115
Mjoin( TP3, her2k )( UP, TA, N, K, al, A, lA, B, lB, be, C, lC)
117
#define trusted_symm( SI, UP, M, N, al, A, lA, B, lB, be, C, lC) \
118
Mjoin( TP3, symm )( SI, UP, M, N, al, A, lA, B, lB, be, C, lC)
119
#define trusted_syrk( UP, TA, N, K, al, A, lA, be, C, lC) \
120
Mjoin( TP3, syrk )( UP, TA, N, K, al, A, lA, be, C, lC)
121
#define trusted_syr2k( UP, TA, N, K, al, A, lA, B, lB, be, C, lC) \
122
Mjoin( TP3, syr2k )( UP, TA, N, K, al, A, lA, B, lB, be, C, lC)
123
#define trusted_trmm( SI, UP, TA, DI, M, N, al, A, lA, B, lB) \
124
Mjoin( TP3, trmm )( SI, UP, TA, DI, M, N, al, A, lA, B, lB)
125
#define trusted_trsm( SI, UP, TA, DI, M, N, al, A, lA, B, lB) \
126
Mjoin( TP3, trsm )( SI, UP, TA, DI, M, N, al, A, lA, B, lB)
129
* ATLAS version of the BLAS to test.
131
#if defined( USE_L3_PTHREADS )
132
#include "atlas_pthreads.h"
133
#include "atlas_ptlvl3.h"
134
#define AP3 Mjoin( PATL, pt )
136
#include "atlas_level3.h"
140
#define test_gemm( TA, TB, M, N, K, al, A, lA, B, lB, be, C, lC) \
141
Mjoin( AP3, gemm )( TA, TB, M, N, K, al, A, lA, B, lB, be, C, lC)
143
#define test_hemm( SI, UP, M, N, al, A, lA, B, lB, be, C, lC) \
144
Mjoin( AP3, hemm )( SI, UP, M, N, al, A, lA, B, lB, be, C, lC)
145
#define test_herk( UP, TA, N, K, al, A, lA, be, C, lC) \
146
Mjoin( AP3, herk )( UP, TA, N, K, al, A, lA, be, C, lC)
147
#define test_her2k( UP, TA, N, K, al, A, lA, B, lB, be, C, lC) \
148
Mjoin( AP3, her2k )( UP, TA, N, K, al, A, lA, B, lB, be, C, lC)
150
#define test_symm( SI, UP, M, N, al, A, lA, B, lB, be, C, lC) \
151
Mjoin( AP3, symm )( SI, UP, M, N, al, A, lA, B, lB, be, C, lC)
152
#define test_syrk( UP, TA, N, K, al, A, lA, be, C, lC) \
153
Mjoin( AP3, syrk )( UP, TA, N, K, al, A, lA, be, C, lC)
154
#define test_syr2k( UP, TA, N, K, al, A, lA, B, lB, be, C, lC) \
155
Mjoin( AP3, syr2k )( UP, TA, N, K, al, A, lA, B, lB, be, C, lC)
156
#define test_trmm( SI, UP, TA, DI, M, N, al, A, lA, B, lB) \
157
Mjoin( AP3, trmm )( SI, UP, TA, DI, M, N, al, A, lA, B, lB)
158
#define test_trsm( SI, UP, TA, DI, M, N, al, A, lA, B, lB) \
159
Mjoin( AP3, trsm )( SI, UP, TA, DI, M, N, al, A, lA, B, lB)
162
* =====================================================================
164
* =====================================================================
167
#define Mabs1(X) (Mabs(*X) + Mabs(*(X+1)))
169
#define Mabs1(X) (Mabs(X))
173
#define LCSIZE ATL_NTHREADS * L2SIZE
175
#define LCSIZE L2SIZE
178
* =====================================================================
179
* typedef definitions
180
* =====================================================================
183
enum LVL3_ROUT { GEMM=0, SYMM, SYRK, SYR2K, TRMM, TRSM, ALLROUTS };
186
{ GEMM=0, HEMM, HERK, HER2K, SYMM, SYRK, SYR2K, TRMM, TRSM, ALLROUTS };
189
* =====================================================================
190
* Prototypes for the testing routines
191
* =====================================================================
194
( const enum LVL3_ROUT, const int, const int,
197
( const enum ATLAS_UPLO, const int, TYPE *,
201
( const enum LVL3_ROUT, const int, const enum ATLAS_TRANS,
202
const enum ATLAS_TRANS, const int, const int,
203
const int, const SCALAR, const int, const int,
204
const SCALAR, const int, const TYPE, double *,
205
double *, double *, double * );
207
( const enum LVL3_ROUT, const int, const enum ATLAS_SIDE,
208
const enum ATLAS_UPLO, const int, const int,
209
const SCALAR, const int, const int, const SCALAR,
210
const int, const TYPE, double *, double *,
211
double *, double * );
213
( const enum LVL3_ROUT, const int, const enum ATLAS_UPLO,
214
const enum ATLAS_TRANS, const int, const int,
215
const SCALAR, const int, const int, const SCALAR,
216
const int, const TYPE, double *, double *,
217
double *, double * );
219
( const enum LVL3_ROUT, const int, const enum ATLAS_UPLO,
220
const enum ATLAS_TRANS, const int, const int,
221
const SCALAR, const int, const SCALAR, const int,
222
const TYPE, double *, double *, double *,
225
( const enum LVL3_ROUT, const int, const enum ATLAS_SIDE,
226
const enum ATLAS_UPLO, const enum ATLAS_TRANS,
227
const enum ATLAS_DIAG, const int, const int,
228
const SCALAR, const int, const int, const TYPE,
229
double *, double *, double *, double * );
231
( const enum LVL3_ROUT, const int, const enum ATLAS_SIDE,
232
const enum ATLAS_UPLO, const enum ATLAS_TRANS,
233
const enum ATLAS_DIAG, const int, const int,
234
const SCALAR, const int, const int, const TYPE,
235
double *, double *, double *, double * );
238
( const enum LVL3_ROUT, const int, const int,
239
const enum ATLAS_TRANS, const enum ATLAS_TRANS,
240
const int, const int, const int, const SCALAR,
241
const int, const int, const SCALAR, const int,
242
const TYPE, double *, double *, double *,
245
( const enum LVL3_ROUT, const int, const int,
246
const enum ATLAS_SIDE, const enum ATLAS_UPLO,
247
const int, const int, const SCALAR, const int,
248
const int, const SCALAR, const int, const TYPE,
249
double *, double *, double *, double * );
251
( const enum LVL3_ROUT, const int, const int,
252
const enum ATLAS_UPLO, const enum ATLAS_TRANS,
253
const int, const int, const SCALAR, const int,
254
const int, const SCALAR, const int, const TYPE,
255
double *, double *, double *, double * );
257
( const enum LVL3_ROUT, const int, const int,
258
const enum ATLAS_UPLO, const enum ATLAS_TRANS,
259
const int, const int, const SCALAR, const int,
260
const SCALAR, const int, const TYPE, double *,
261
double *, double *, double * );
263
( const enum LVL3_ROUT, const int, const int,
264
const enum ATLAS_SIDE, const enum ATLAS_UPLO,
265
const enum ATLAS_TRANS, const enum ATLAS_DIAG,
266
const int, const int, const SCALAR, const int,
267
const int, const TYPE, double *, double *,
268
double *, double * );
271
( const enum LVL3_ROUT, const int, const int,
272
const int, const int, const enum ATLAS_TRANS *,
273
const int, const enum ATLAS_TRANS *, int,
276
const int, const TYPE *, const int, const TYPE *,
277
const TYPE, int *, int * );
279
( const enum LVL3_ROUT, const int, const int,
280
const int, const int, const enum ATLAS_SIDE *,
281
const int, const enum ATLAS_UPLO *, int,
283
int, const int, const TYPE *, const int,
284
const TYPE *, const TYPE, int *, int * );
286
( const enum LVL3_ROUT, const int, const int,
287
const int, const int, const enum ATLAS_UPLO *,
288
const int, const enum ATLAS_TRANS *, int,
290
int, const int, const TYPE *, const int,
291
const TYPE *, const TYPE, int *, int * );
293
( const enum LVL3_ROUT, const int, const int,
294
const int, const int, const enum ATLAS_UPLO *,
295
const int, const enum ATLAS_TRANS *, int,
297
int, const int, const TYPE *, const int,
298
const TYPE *, const TYPE, int *, int * );
300
( const enum LVL3_ROUT, const int, const int,
301
const int, const int, const enum ATLAS_SIDE *,
302
const int, const enum ATLAS_UPLO *, const int,
303
const enum ATLAS_TRANS *, const int, const enum ATLAS_DIAG *,
305
int, int, const int, const TYPE *,
306
const TYPE, int *, int * );
309
( const int, const int, const int, const int,
310
const enum ATLAS_SIDE *, const int, const enum ATLAS_UPLO *,
311
const int, const enum ATLAS_TRANS *, const int,
312
const enum ATLAS_TRANS *, const int, const enum ATLAS_DIAG *,
313
const int, const int, const int, const int,
314
const int, const int, const int, const int,
315
const int, const int, const TYPE *, const int,
316
const TYPE *, const int, const enum LVL3_ROUT * );
322
( int, char **, int *, enum LVL3_ROUT **,
323
int *, int *, int *, int *,
324
enum ATLAS_SIDE **, int *, enum ATLAS_UPLO **,
325
int *, enum ATLAS_TRANS **, int *,
326
enum ATLAS_TRANS **, int *, enum ATLAS_DIAG **,
327
int *, int *, int *, int *,
328
int *, int *, int *, int *,
329
int *, int *, TYPE **, int *,
335
* =====================================================================
339
const enum LVL3_ROUT ROUT,
345
double adds = 0.0, em, en, ek, muls = 0.0;
347
* On entry, M, N, and K contain parameter values used by the Level 3
348
* BLAS. The output matrix is always M x N or N x N if symmetric, but K
349
* has different uses in different contexts. For example, in the matrix-
350
* matrix multiply routine, we have C = A * B where C is M x N, A is
351
* M x K, and B is K x N. In xSYMM, xHEMM, xTRMM, and xTRSM, K indicates
352
* whether the matrix A is applied on the left or right. If K <= 0, the
353
* matrix is applied on the left, and if K > 0, on the right.
355
if( M <= 0 ) return( 0.0 );
357
em = (double)(M); en = (double)(N); ek = (double)(K);
359
if( ROUT == GEMM ) { muls = em * ek * en; adds = em * ek * en; }
361
else if( ROUT == SYMM )
363
else if( ( ROUT == SYMM ) || ( ROUT == HEMM ) )
365
{ /* If K <= 0, assume A multiplies B on the left. */
366
if( K <= 0 ) { muls = em * em * en; adds = em * em * en; }
367
else { muls = em * en * en; adds = em * en * en; }
369
else if( ROUT == TRMM )
370
{ /* If K <= 0, assume A multiplies B on the left. */
373
muls = en * em * ( em + 1.0 ) / 2.0;
374
adds = en * em * ( em - 1.0 ) / 2.0;
378
muls = em * en * ( en + 1.0 ) / 2.0;
379
adds = em * en * ( en - 1.0 ) / 2.0;
383
else if( ROUT == SYRK )
385
else if( ( ROUT == SYRK ) || ( ROUT == HERK ) )
388
muls = ek * em * ( em + 1.0 ) / 2.0;
389
adds = ek * em * ( em + 1.0 ) / 2.0;
392
else if( ROUT == SYR2K )
394
else if( ( ROUT == SYR2K ) || ( ROUT == HER2K ) )
396
{ muls = ek * em * em; adds = ek * em * em + em; }
397
else if( ROUT == TRSM )
398
{ /* If K <= 0, assume A multiplies B on the left. */
401
muls = en * em * ( em + 1.0 ) / 2.0;
402
adds = en * em * ( em - 1.0 ) / 2.0;
406
muls = em * en * ( en + 1.0 ) / 2.0;
407
adds = em * en * ( en - 1.0 ) / 2.0;
411
return( muls + adds );
413
return( 6.0 * muls + 2.0 * adds );
419
const enum ATLAS_UPLO UPLO,
426
* Scale strictly lower (resp. upper) part of triangular matrix by 1 / N
427
* to make it diagonally dominant.
429
int i, iaij, j, jaj, lda2 = ( LDA SHIFT ),
430
ldap12 = (( LDA + 1 ) SHIFT);
435
alpha = ATL_rone / (TYPE)(N);
437
if( UPLO == AtlasUpper )
439
for( j = 0, jaj = 0; j < N; j++, jaj += lda2 )
441
for( i = 0, iaij = jaj; i < j; i++, iaij += (1 SHIFT) )
448
if( A[iaij ] >= ATL_rzero ) A[iaij ] += ATL_rone;
449
else A[iaij ] -= ATL_rone;
451
if( A[iaij+1] >= ATL_rzero ) A[iaij+1] += ATL_rone;
452
else A[iaij+1] -= ATL_rone;
458
for( j = N-1, jaj = (N-1)*ldap12; j >= 0; j--, jaj -= ldap12 )
460
if( A[jaj ] >= ATL_rzero ) A[jaj ] += ATL_rone;
461
else A[jaj ] -= ATL_rone;
463
if( A[jaj+1] >= ATL_rzero ) A[jaj+1] += ATL_rone;
464
else A[jaj+1] -= ATL_rone;
466
for( i = j+1, iaij = jaj+(1 SHIFT); i < N; i++, iaij += (1 SHIFT) )
477
* =====================================================================
479
* =====================================================================
483
const enum LVL3_ROUT ROUT,
485
const enum ATLAS_TRANS TRANSA,
486
const enum ATLAS_TRANS TRANSB,
502
double l2ret, ops, t0, ttest, ttrust;
503
TYPE normA, normB, normC, normD, resid;
504
TYPE * A = NULL, * B = NULL, * C = NULL, * C0,
506
int mA, mB, nA, nB, Aseed, Bseed, Cseed;
508
*TTRUST0 = *TTEST0 = *MFTEST0 = *MFTRUST0 = 0.0;
509
if( ( M == 0 ) || ( N == 0 ) ) { return( ATL_rzero ); }
511
if( TRANSA == AtlasNoTrans ) { mA = M; nA = K; }
512
else { mA = K; nA = M; }
513
if( TRANSB == AtlasNoTrans ) { mB = K; nB = N; }
514
else { mB = N; nB = K; }
516
ops = opbl3( ROUT, M, N, K );
518
* Allocate L2 cache space, A, X, Y and Y0
520
l2ret = ATL_flushcache( L2SIZE );
521
A = (TYPE *)malloc( ATL_MulBySize( LDA ) * nA );
522
B = (TYPE *)malloc( ATL_MulBySize( LDB ) * nB );
523
C = (TYPE *)malloc( ATL_MulBySize( LDC ) * N * 2 );
525
if( ( A == NULL ) || ( B == NULL ) || ( C == NULL ) )
527
l2ret = ATL_flushcache( 0 );
534
C0 = C + LDC * ( N SHIFT );
536
* Generate random operands
538
Aseed = mA * nA + 513 * 7 + 90;
539
Bseed = mB * nB + 127 * 50 + 77;
540
Cseed = M * N + 101 * 2 + 53;
542
Mjoin( PATL, gegen )( mA, nA, A, LDA, Aseed );
543
Mjoin( PATL, gegen )( mB, nB, B, LDB, Bseed );
544
Mjoin( PATL, gegen )( M, N, C, LDC, Cseed );
545
Mjoin( PATL, gegen )( M, N, C0, LDC, Cseed );
547
* Compute the norm of C for later use in testing
551
normC = Mjoin( PATL, genrm1 )( M, N, C, LDC );
552
if( Mabs1( BETA ) > ATL_rone ) normC *= Mabs1( BETA );
553
if( normC == ATL_rzero ) normC = ATL_rone;
555
else { normC = ATL_rone; }
557
* Start cold cache timing operations for the trusted routine
559
a = A; b = B; c = C0;
561
l2ret = ATL_flushcache( -1 );
563
trusted_gemm( TRANSA, TRANSB, M, N, K, ALPHA, a, LDA, b, LDB, BETA, c, LDC );
564
ttrust = time00() - t0;
566
{ *TTRUST0 = ttrust; *MFTRUST0 = ops / ( ttrust * MEGA ); }
568
* Start cold cache timing operations for the tested routine
572
l2ret = ATL_flushcache( -1 );
574
test_gemm( TRANSA, TRANSB, M, N, K, ALPHA, a, LDA, b, LDB, BETA, c, LDC );
575
ttest = time00() - t0;
577
{ *TTEST0 = ttest; *MFTEST0 = ops / ( ttest * MEGA ); }
579
* if timing only, I am done ... so leave.
581
l2ret = ATL_flushcache( 0 );
583
if( !( TEST ) ) { free( A ); free( B ); free( C ); return( ATL_rzero ); }
585
* else perform error check
587
normA = Mjoin( PATL, genrm1 )( mA, nA, A, LDA );
588
if( Mabs1( ALPHA ) > ATL_rone ) normA *= Mabs1( ALPHA );
589
if( normA == ATL_rzero ) normA = ATL_rone;
592
normB = Mjoin( PATL, genrm1 )( mB, nB, B, LDB );
593
if( normB == ATL_rzero ) normB = ATL_rone;
596
* Ensure the difference of the output operands is relatively tiny enough
598
normD = Mjoin( PATL, gediffnrm1 )( M, N, C, LDC, C0, LDC );
599
resid = normD / ( Mmax( normA, ATL_rone ) * Mmax( normB, ATL_rone ) *
600
Mmax( normC, ATL_rone ) * EPSILON *
601
Mmax( Mmax( M, N ), K ) );
603
if( ( resid > THRESH ) || ( resid != resid ) )
605
(void) fprintf( stderr,
606
"ERROR: resid=%f, normD=%f, normA=%f, normB=%f, normC=%f, eps=%e\n",
607
resid, normD, normA, normB, normC, EPSILON );
609
Mjoin( PATL, geprint )( "C_trusted", M, N, C0, LDC );
610
Mjoin( PATL, geprint )( "C_test", M, N, C, LDC );
621
const enum LVL3_ROUT ROUT,
623
const enum ATLAS_SIDE SIDE,
624
const enum ATLAS_UPLO UPLO,
639
double l2ret, ops, t0, ttest, ttrust;
640
TYPE normA, normB, normC, normD, resid;
641
TYPE * A = NULL, * B = NULL, * C = NULL, * C0,
643
int nA, Aseed, Bseed, Cseed;
645
*TTRUST0 = *TTEST0 = *MFTEST0 = *MFTRUST0 = 0.0;
646
if( N == 0 ) { return( ATL_rzero ); }
648
if( SIDE == AtlasLeft ) { ops = opbl3( ROUT, M, N, -1 ); nA = M; }
649
else { ops = opbl3( ROUT, M, N, 1 ); nA = N; }
651
* Allocate L2 cache space, A, X, Y and Y0
653
l2ret = ATL_flushcache( L2SIZE );
654
A = (TYPE *)malloc( ATL_MulBySize( LDA ) * nA );
655
B = (TYPE *)malloc( ATL_MulBySize( LDB ) * N );
656
C = (TYPE *)malloc( ATL_MulBySize( LDC ) * N * 2 );
658
if( ( A == NULL ) || ( B == NULL ) || ( C == NULL ) )
660
l2ret = ATL_flushcache( 0 );
667
C0 = C + LDC * ( N SHIFT );
669
* Generate random operands
671
Aseed = nA * nA + 513 * 7 + 90;
672
Bseed = M * N + 127 * 50 + 77;
673
Cseed = M * N + 101 * 2 + 53;
675
Mjoin( PATL, gegen )( nA, nA, A, LDA, Aseed );
676
Mjoin( PATL, gegen )( M, N, B, LDB, Bseed );
677
Mjoin( PATL, gegen )( M, N, C, LDC, Cseed );
678
Mjoin( PATL, gegen )( M, N, C0, LDC, Cseed );
680
* Compute the norm of C for later use in testing
684
normC = Mjoin( PATL, genrm1 )( M, N, C, LDC );
685
if( Mabs1( BETA ) > ATL_rone ) normC *= Mabs1( BETA );
686
if( normC == ATL_rzero ) normC = ATL_rone;
688
else { normC = ATL_rone; }
690
* Start cold cache timing operations for the trusted routine
692
a = A; b = B; c = C0;
695
l2ret = ATL_flushcache( -1 );
697
trusted_symm( SIDE, UPLO, M, N, ALPHA, a, LDA, b, LDB, BETA, c, LDC );
698
ttrust = time00() - t0;
700
{ *TTRUST0 = ttrust; *MFTRUST0 = ops / ( ttrust * MEGA ); }
704
l2ret = ATL_flushcache( -1 );
706
trusted_symm( SIDE, UPLO, M, N, ALPHA, a, LDA, b, LDB, BETA, c, LDC );
707
ttrust = time00() - t0;
709
{ *TTRUST0 = ttrust; *MFTRUST0 = ops / ( ttrust * MEGA ); }
711
else /* if( ROUT == HEMM ) */
713
l2ret = ATL_flushcache( -1 );
715
trusted_hemm( SIDE, UPLO, M, N, ALPHA, a, LDA, b, LDB, BETA, c, LDC );
716
ttrust = time00() - t0;
718
{ *TTRUST0 = ttrust; *MFTRUST0 = ops / ( ttrust * MEGA ); }
722
* Start cold cache timing operations for the tested routine
727
l2ret = ATL_flushcache( -1 );
729
test_symm( SIDE, UPLO, M, N, ALPHA, a, LDA, b, LDB, BETA, c, LDC );
730
ttest = time00() - t0;
732
{ *TTEST0 = ttest; *MFTEST0 = ops / ( ttest * MEGA ); }
736
l2ret = ATL_flushcache( -1 );
738
test_symm( SIDE, UPLO, M, N, ALPHA, a, LDA, b, LDB, BETA, c, LDC );
739
ttest = time00() - t0;
741
{ *TTEST0 = ttest; *MFTEST0 = ops / ( ttest * MEGA ); }
743
else /* if( ROUT == HEMM ) */
745
l2ret = ATL_flushcache( -1 );
747
test_hemm( SIDE, UPLO, M, N, ALPHA, a, LDA, b, LDB, BETA, c, LDC );
748
ttest = time00() - t0;
750
{ *TTEST0 = ttest; *MFTEST0 = ops / ( ttest * MEGA ); }
754
* if timing only, I am done ... so leave.
756
l2ret = ATL_flushcache( 0 );
758
if( !( TEST ) ) { free( A ); free( B ); free( C ); return( ATL_rzero ); }
760
* else perform error check
763
normA = Mjoin( PATL, synrm )( UPLO, nA, A, LDA );
765
if( ROUT == SYMM ) normA = Mjoin( PATL, synrm )( UPLO, nA, A, LDA );
766
else normA = Mjoin( PATL, henrm )( UPLO, nA, A, LDA );
768
if( Mabs1( ALPHA ) > ATL_rone ) normA *= Mabs1( ALPHA );
769
if( normA == ATL_rzero ) normA = ATL_rone;
772
normB = Mjoin( PATL, genrm1 )( M, N, B, LDB );
773
if( normB == ATL_rzero ) normB = ATL_rone;
776
* Ensure the difference of the output operands is relatively tiny enough
778
normD = Mjoin( PATL, gediffnrm1 )( M, N, C, LDC, C0, LDC );
779
resid = normD / ( Mmax( normA, ATL_rone ) * Mmax( normB, ATL_rone ) *
780
Mmax( normC, ATL_rone ) * EPSILON * Mmax( M, N ) );
782
if( ( resid > THRESH ) || ( resid != resid ) )
784
(void) fprintf( stderr,
785
"ERROR: resid=%f, normD=%f, normA=%f, normB=%f, normC=%f, eps=%e\n",
786
resid, normD, normA, normB, normC, EPSILON );
788
Mjoin( PATL, geprint )( "C_trusted", M, N, C0, LDC );
789
Mjoin( PATL, geprint )( "C_test", M, N, C, LDC );
800
const enum LVL3_ROUT ROUT,
802
const enum ATLAS_UPLO UPLO,
803
const enum ATLAS_TRANS TRANS,
818
double l2ret, ops, t0, ttest, ttrust;
819
TYPE normA, normB, normC, normD, resid;
820
TYPE * A = NULL, * B = NULL, * C = NULL, * C0,
822
int mAB, nAB, Aseed, Bseed, Cseed;
825
*TTRUST0 = *TTEST0 = *MFTEST0 = *MFTRUST0 = 0.0;
826
if( N == 0 ) { return( ATL_rzero ); }
828
if( TRANS == AtlasNoTrans )
829
{ ta = TRANS; mAB = N; nAB = K; }
831
{ ta = ( ROUT == SYR2K ? AtlasTrans : AtlasConjTrans ); mAB = K; nAB = N; }
833
ops = opbl3( ROUT, N, 0, K );
835
* Allocate L2 cache space, A, C and C0
837
l2ret = ATL_flushcache( L2SIZE );
838
A = (TYPE *)malloc( ATL_MulBySize( LDA ) * nAB );
839
B = (TYPE *)malloc( ATL_MulBySize( LDB ) * nAB );
840
C = (TYPE *)malloc( ATL_MulBySize( LDC ) * N * 2 );
842
if( ( A == NULL ) || ( B == NULL ) || ( C == NULL ) )
844
l2ret = ATL_flushcache( 0 );
851
C0 = C + LDC * ( N SHIFT );
853
* Generate random operands
855
Aseed = mAB * nAB + 513 * 7 + 90;
856
Bseed = mAB * nAB + 127 * 50 + 77;
857
Cseed = N * N + 101 * 2 + 53;
859
Mjoin( PATL, gegen )( mAB, nAB, A, LDA, Aseed );
860
Mjoin( PATL, gegen )( mAB, nAB, B, LDB, Bseed );
861
Mjoin( PATL, gegen )( N, N, C, LDC, Cseed );
862
Mjoin( PATL, gegen )( N, N, C0, LDC, Cseed );
864
* Compute the norm of C for later use in testing
869
normC = Mjoin( PATL, synrm )( UPLO, N, C, LDC );
870
if( Mabs1( BETA ) > ATL_rone ) normC *= Mabs1( BETA );
871
if( normC == ATL_rzero ) normC = ATL_rone;
875
normC = Mjoin( PATL, synrm )( UPLO, N, C, LDC );
876
if( Mabs1( BETA ) > ATL_rone ) normC *= Mabs1( BETA );
877
if( normC == ATL_rzero ) normC = ATL_rone;
881
normC = Mjoin( PATL, henrm )( UPLO, N, C, LDC );
882
if( Mabs( BETA[0] ) > ATL_rone ) normC *= Mabs( BETA[0] );
883
if( normC == ATL_rzero ) normC = ATL_rone;
887
else { normC = ATL_rone; }
889
* Start cold cache timing operations for the trusted routine
891
a = A; b = B; c = C0;
893
l2ret = ATL_flushcache( -1 );
895
trusted_syr2k( UPLO, ta, N, K, ALPHA, a, LDA, b, LDB, BETA, c, LDC );
896
ttrust = time00() - t0;
900
l2ret = ATL_flushcache( -1 );
902
trusted_syr2k( UPLO, ta, N, K, ALPHA, a, LDA, b, LDB, BETA, c, LDC );
903
ttrust = time00() - t0;
905
else /* if( ROUT == HER2K ) */
907
l2ret = ATL_flushcache( -1 );
909
trusted_her2k( UPLO, ta, N, K, ALPHA, a, LDA, b, LDB, (TYPE)(BETA[0]),
911
ttrust = time00() - t0;
915
{ *TTRUST0 = ttrust; *MFTRUST0 = ops / ( ttrust * MEGA ); }
917
* Start cold cache timing operations for the tested routine
921
l2ret = ATL_flushcache( -1 );
923
test_syr2k( UPLO, TRANS, N, K, ALPHA, a, LDA, b, LDB, BETA, c, LDC );
924
ttest = time00() - t0;
928
l2ret = ATL_flushcache( -1 );
930
test_syr2k( UPLO, ta, N, K, ALPHA, a, LDA, b, LDB, BETA, c, LDC );
931
ttest = time00() - t0;
933
else /* if( ROUT == HERK ) */
935
l2ret = ATL_flushcache( -1 );
937
test_her2k( UPLO, ta, N, K, ALPHA, a, LDA, b, LDB, (TYPE)(BETA[0]),
939
ttest = time00() - t0;
943
{ *TTEST0 = ttest; *MFTEST0 = ops / ( ttest * MEGA ); }
945
* if timing only, I am done ... so leave.
947
l2ret = ATL_flushcache( 0 );
949
if( !( TEST ) ) { free( A ); free( C ); return( ATL_rzero ); }
951
* else perform error check
953
normA = Mjoin( PATL, genrm1 )( mAB, nAB, A, LDA );
954
if( Mabs1( ALPHA ) > ATL_rone ) normA *= Mabs1( ALPHA );
955
if( normA == ATL_rzero ) normA = ATL_rone;
958
normB = Mjoin( PATL, genrm1 )( mAB, nAB, B, LDB );
959
if( normB == ATL_rzero ) normB = ATL_rone;
962
* Ensure the difference of the output operands is relatively tiny enough
964
normD = Mjoin( PATL, gediffnrm1 )( N, N, C, LDC, C0, LDC );
965
resid = normD / ( Mmax( normC, ATL_rone ) * Mmax( normA, ATL_rone ) *
966
Mmax( normB, ATL_rone ) * EPSILON * Mmax( N, K ) );
968
if( ( resid > THRESH ) || ( resid != resid ) )
970
(void) fprintf( stderr,
971
"ERROR: resid=%f, normD=%f, normA=%f, normC=%f, eps=%e\n",
972
resid, normD, normA, normC, EPSILON );
974
Mjoin( PATL, geprint )( "C_trusted", N, N, C0, LDC );
975
Mjoin( PATL, geprint )( "C_test", N, N, C, LDC );
986
const enum LVL3_ROUT ROUT,
988
const enum ATLAS_UPLO UPLO,
989
const enum ATLAS_TRANS TRANS,
1003
double l2ret, ops, t0, ttest, ttrust;
1004
TYPE normA, normC, normD, resid;
1005
TYPE * A = NULL, * C = NULL, * C0, * a, * c;
1006
int mA, nA, Aseed, Cseed;
1007
enum ATLAS_TRANS ta;
1009
*TTRUST0 = *TTEST0 = *MFTEST0 = *MFTRUST0 = 0.0;
1010
if( N == 0 ) { return( ATL_rzero ); }
1012
if( TRANS == AtlasNoTrans )
1013
{ ta = TRANS; mA = N; nA = K; }
1015
{ ta = ( ROUT == SYRK ? AtlasTrans : AtlasConjTrans ); mA = K; nA = N; }
1017
ops = opbl3( ROUT, N, 0, K );
1019
* Allocate L2 cache space, A, C and C0
1021
l2ret = ATL_flushcache( L2SIZE );
1022
A = (TYPE *)malloc( ATL_MulBySize( LDA ) * nA );
1023
C = (TYPE *)malloc( ATL_MulBySize( LDC ) * N * 2 );
1025
if( ( A == NULL ) || ( C == NULL ) )
1027
l2ret = ATL_flushcache( 0 );
1030
return( ATL_rnone );
1033
C0 = C + LDC * ( N SHIFT );
1035
* Generate random operands
1037
Aseed = mA * nA + 513 * 7 + 90;
1038
Cseed = N * N + 101 * 2 + 53;
1040
Mjoin( PATL, gegen )( mA, nA, A, LDA, Aseed );
1041
Mjoin( PATL, gegen )( N, N, C, LDC, Cseed );
1042
Mjoin( PATL, gegen )( N, N, C0, LDC, Cseed );
1044
* Compute the norm of C for later use in testing
1049
normC = Mjoin( PATL, synrm )( UPLO, N, C, LDC );
1050
if( Mabs1( BETA ) > ATL_rone ) normC *= Mabs1( BETA );
1051
if( normC == ATL_rzero ) normC = ATL_rone;
1055
normC = Mjoin( PATL, synrm )( UPLO, N, C, LDC );
1056
if( Mabs1( BETA ) > ATL_rone ) normC *= Mabs1( BETA );
1057
if( normC == ATL_rzero ) normC = ATL_rone;
1061
normC = Mjoin( PATL, henrm )( UPLO, N, C, LDC );
1062
if( Mabs( BETA[0] ) > ATL_rone ) normC *= Mabs( BETA[0] );
1063
if( normC == ATL_rzero ) normC = ATL_rone;
1067
else { normC = ATL_rone; }
1069
* Start cold cache timing operations for the trusted routine
1073
l2ret = ATL_flushcache( -1 );
1075
trusted_syrk( UPLO, ta, N, K, ALPHA, a, LDA, BETA, c, LDC );
1076
ttrust = time00() - t0;
1080
l2ret = ATL_flushcache( -1 );
1082
trusted_syrk( UPLO, ta, N, K, ALPHA, a, LDA, BETA, c, LDC );
1083
ttrust = time00() - t0;
1085
else /* if( ROUT == HERK ) */
1087
l2ret = ATL_flushcache( -1 );
1089
trusted_herk( UPLO, ta, N, K, (TYPE)(ALPHA[0]), a, LDA, (TYPE)(BETA[0]),
1091
ttrust = time00() - t0;
1095
{ *TTRUST0 = ttrust; *MFTRUST0 = ops / ( ttrust * MEGA ); }
1097
* Start cold cache timing operations for the tested routine
1101
l2ret = ATL_flushcache( -1 );
1103
test_syrk( UPLO, TRANS, N, K, ALPHA, a, LDA, BETA, c, LDC );
1104
ttest = time00() - t0;
1108
l2ret = ATL_flushcache( -1 );
1110
test_syrk( UPLO, ta, N, K, ALPHA, a, LDA, BETA, c, LDC );
1111
ttest = time00() - t0;
1113
else /* if( ROUT == HERK ) */
1115
l2ret = ATL_flushcache( -1 );
1117
test_herk( UPLO, ta, N, K, (TYPE)(ALPHA[0]), a, LDA, (TYPE)(BETA[0]),
1119
ttest = time00() - t0;
1123
{ *TTEST0 = ttest; *MFTEST0 = ops / ( ttest * MEGA ); }
1125
* if timing only, I am done ... so leave.
1127
l2ret = ATL_flushcache( 0 );
1129
if( !( TEST ) ) { free( A ); free( C ); return( ATL_rzero ); }
1131
* else perform error check
1133
normA = Mjoin( PATL, genrm1 )( mA, nA, A, LDA );
1135
if( Mabs1( ALPHA ) > ATL_rone ) normA *= Mabs1( ALPHA );
1138
{ if( Mabs1( ALPHA ) > ATL_rone ) normA *= Mabs1( ALPHA ); }
1140
{ if( Mabs( ALPHA[0] ) > ATL_rone ) normA *= Mabs( ALPHA[0] ); }
1142
if( normA == ATL_rzero ) normA = ATL_rone;
1145
* Ensure the difference of the output operands is relatively tiny enough
1147
normD = Mjoin( PATL, gediffnrm1 )( N, N, C, LDC, C0, LDC );
1148
resid = normD / ( Mmax( normC, ATL_rone ) * Mmax( normA, ATL_rone ) *
1149
EPSILON * Mmax( N, K ) );
1151
if( ( resid > THRESH ) || ( resid != resid ) )
1153
(void) fprintf( stderr,
1154
"ERROR: resid=%f, normD=%f, normA=%f, normC=%f, eps=%e\n",
1155
resid, normD, normA, normC, EPSILON );
1157
Mjoin( PATL, geprint )( "C_trusted", N, N, C0, LDC );
1158
Mjoin( PATL, geprint )( "C_test", N, N, C, LDC );
1169
const enum LVL3_ROUT ROUT,
1171
const enum ATLAS_SIDE SIDE,
1172
const enum ATLAS_UPLO UPLO,
1173
const enum ATLAS_TRANS TRANS,
1174
const enum ATLAS_DIAG DIAG,
1187
double l2ret, ops, t0, ttest, ttrust;
1188
TYPE normA, normB, normD, resid;
1189
TYPE * A = NULL, * B = NULL, * B0, * a, * b;
1190
int nA, Aseed, Bseed;
1192
*TTRUST0 = *TTEST0 = *MFTEST0 = *MFTRUST0 = 0.0;
1193
if( ( M == 0 ) || ( N == 0 ) ) { return( ATL_rzero ); }
1195
if( SIDE == AtlasLeft ) { nA = M; ops = opbl3( ROUT, M, N, -1 ); }
1196
else { nA = N; ops = opbl3( ROUT, M, N, 1 ); }
1198
* Allocate L2 cache space, A, X and X0
1200
l2ret = ATL_flushcache( L2SIZE );
1201
A = (TYPE *)malloc( ATL_MulBySize( LDA ) * nA );
1202
B = (TYPE *)malloc( ATL_MulBySize( LDB ) * N * 2 );
1204
if( ( A == NULL ) || ( B == NULL ) )
1206
l2ret = ATL_flushcache( 0 );
1209
return( ATL_rnone );
1212
B0 = B + LDB * ( N SHIFT );
1214
* Generate random operands
1216
Aseed = nA * nA + 513 * 7 + 90;
1217
Bseed = M * N + 127 * 50 + 77;
1219
Mjoin( PATL, gegen )( nA, nA, A, LDA, Aseed );
1220
Mjoin( PATL, gegen )( M, N, B, LDB, Bseed );
1221
Mjoin( PATL, gegen )( M, N, B0, LDB, Bseed );
1223
* Compute the norm of B for later use in testing
1227
normB = Mjoin( PATL, genrm1 )( M, N, B, LDB );
1228
if( Mabs1( ALPHA ) > ATL_rone ) normB *= Mabs1( ALPHA );
1229
if( normB == ATL_rzero ) normB = ATL_rone;
1231
else { normB = ATL_rone; }
1233
* Start cold cache timing operations for the trusted routine
1237
l2ret = ATL_flushcache( -1 );
1239
trusted_trmm( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, a, LDA, b, LDB );
1240
ttrust = time00() - t0;
1242
{ *TTRUST0 = ttrust; *MFTRUST0 = ops / ( ttrust * MEGA ); }
1244
* Start cold cache timing operations for the tested routine
1248
l2ret = ATL_flushcache( -1 );
1250
test_trmm( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, a, LDA, b, LDB );
1251
ttest = time00() - t0;
1253
{ *TTEST0 = ttest; *MFTEST0 = ops / ( ttest * MEGA ); }
1255
* if timing only, I am done ... so leave.
1257
l2ret = ATL_flushcache( 0 );
1259
if( !( TEST ) ) { free( A ); free( B ); return( ATL_rzero ); }
1261
* else perform error check
1263
normA = Mjoin( PATL, trnrm1 )( UPLO, DIAG, nA, A, LDA );
1264
if( normA == ATL_rzero ) normA = ATL_rone;
1267
* Ensure the difference of the output operands is relatively tiny enough
1269
normD = Mjoin( PATL, gediffnrm1 )( M, N, B, LDB, B0, LDB );
1270
resid = normD / ( Mmax( normA, ATL_rone ) * Mmax( normB, ATL_rone ) *
1271
EPSILON * Mmax( M, N ) );
1273
if( ( resid > THRESH ) || ( resid != resid ) )
1275
(void) fprintf( stderr,
1276
"ERROR: resid=%f, normD=%f, normA=%f, normB=%f, eps=%e\n",
1277
resid, normD, normA, normB, EPSILON );
1279
Mjoin( PATL, geprint )( "B_trusted", M, N, B0, LDB );
1280
Mjoin( PATL, geprint )( "B_test", M, N, B, LDB );
1291
const enum LVL3_ROUT ROUT,
1293
const enum ATLAS_SIDE SIDE,
1294
const enum ATLAS_UPLO UPLO,
1295
const enum ATLAS_TRANS TRANS,
1296
const enum ATLAS_DIAG DIAG,
1309
double l2ret, ops, t0, ttest, ttrust;
1310
TYPE normA, normB, normD, resid;
1311
TYPE * A = NULL, * B = NULL, * B0, * a, * b;
1312
int nA, Aseed, Bseed;
1314
*TTRUST0 = *TTEST0 = *MFTEST0 = *MFTRUST0 = 0.0;
1315
if( ( M == 0 ) || ( N == 0 ) ) { return( ATL_rzero ); }
1317
if( SIDE == AtlasLeft ) { nA = M; ops = opbl3( ROUT, M, N, -1 ); }
1318
else { nA = N; ops = opbl3( ROUT, M, N, 1 ); }
1320
* Allocate L2 cache space, A, X and X0
1322
l2ret = ATL_flushcache( L2SIZE );
1323
A = (TYPE *)malloc( ATL_MulBySize( LDA ) * nA );
1324
B = (TYPE *)malloc( ATL_MulBySize( LDB ) * N * 2 );
1326
if( ( A == NULL ) || ( B == NULL ) )
1328
l2ret = ATL_flushcache( 0 );
1331
return( ATL_rnone );
1334
B0 = B + LDB * ( N SHIFT );
1336
* Generate random operands
1338
Aseed = nA * nA + 513 * 7 + 90;
1339
Bseed = M * N + 127 * 50 + 77;
1341
Mjoin( PATL, gegen )( nA, nA, A, LDA, Aseed ); trddom( UPLO, nA, A, LDA );
1342
Mjoin( PATL, gegen )( M, N, B, LDB, Bseed );
1343
Mjoin( PATL, gegen )( M, N, B0, LDB, Bseed );
1345
* Compute the norm of B for later use in testing
1349
normB = Mjoin( PATL, genrm1 )( M, N, B, LDB );
1350
if( Mabs1( ALPHA ) > ATL_rone ) normB *= Mabs1( ALPHA );
1351
if( normB == ATL_rzero ) normB = ATL_rone;
1353
else { normB = ATL_rone; }
1355
* Start cold cache timing operations for the trusted routine
1359
l2ret = ATL_flushcache( -1 );
1361
trusted_trsm( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, a, LDA, b, LDB );
1362
ttrust = time00() - t0;
1364
{ *TTRUST0 = ttrust; *MFTRUST0 = ops / ( ttrust * MEGA ); }
1366
* Start cold cache timing operations for the tested routine
1370
l2ret = ATL_flushcache( -1 );
1372
test_trsm( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, a, LDA, b, LDB );
1373
ttest = time00() - t0;
1375
{ *TTEST0 = ttest; *MFTEST0 = ops / ( ttest * MEGA ); }
1377
* if timing only, I am done ... so leave.
1379
l2ret = ATL_flushcache( 0 );
1381
if( !( TEST ) ) { free( A ); free( B ); return( ATL_rzero ); }
1383
* else perform error check
1385
normA = Mjoin( PATL, trnrm1 )( UPLO, DIAG, nA, A, LDA );
1386
if( normA == ATL_rzero ) normA = ATL_rone;
1389
* Ensure the difference of the output operands is relatively tiny enough
1391
normD = Mjoin( PATL, gediffnrm1 )( M, N, B, LDB, B0, LDB );
1392
resid = normD / ( Mmax( normA, ATL_rone ) * Mmax( normB, ATL_rone ) *
1393
EPSILON * Mmax( M, N ) );
1395
if( ( resid > THRESH ) || ( resid != resid ) )
1397
(void) fprintf( stderr,
1398
"ERROR: resid=%f, normD=%f, normA=%f, normB=%f, eps=%e\n",
1399
resid, normD, normA, normB, EPSILON );
1401
Mjoin( PATL, geprint )( "B_trusted", M, N, B0, LDB );
1402
Mjoin( PATL, geprint )( "B_test", M, N, B, LDB );
1411
* =====================================================================
1413
* =====================================================================
1417
const enum LVL3_ROUT ROUT,
1420
const enum ATLAS_TRANS TRANSA,
1421
const enum ATLAS_TRANS TRANSB,
1437
double flops, ttrust, ttest, mftrust, mftest, t0;
1438
TYPE resid = ATL_rzero;
1440
TYPE bet, beta, nbeta;
1442
TYPE *bet, beta[2], nbeta[2];
1444
TYPE * a, * stA, *b, * stB, * c, * stC, * A,
1445
* A0 = NULL, * B, * B0 = NULL, * C, * C0 = NULL;
1446
unsigned long ir, reps;
1447
int inca, incb, incc, lA, lB, lC, mA, nA, mB, nB,
1448
passed, Aseed, Bseed, Cseed;
1450
if( ( MEGA * MFLOP <= ( flops = opbl3( ROUT, M, N, K ) ) ) || ( TEST ) )
1452
resid = gemmtst( ROUT, TEST, TRANSA, TRANSB, M, N, K, ALPHA, LDA, LDB,
1453
BETA, LDC, EPSILON, TTRUST0, TTEST0, MFTRUST0, MFTEST0 );
1454
if( resid > THRESH ) (void) fprintf( stderr, " resid=%f\n", resid );
1456
if( resid < ATL_rzero ) passed = -1;
1457
else passed = ( resid < THRESH );
1459
if( MEGA * MFLOP <= flops ) return( passed );
1461
if( TRANSA == AtlasNoTrans ) { mA = M; nA = K; } else { mA = K; nA = M; }
1462
if( TRANSB == AtlasNoTrans ) { mB = K; nB = N; } else { mB = N; nB = K; }
1464
inca = LDA * ( nA SHIFT );
1465
incb = LDB * ( nB SHIFT );
1466
incc = LDC * ( N SHIFT );
1468
lA = inca * ( ( ATL_DivBySize( LCSIZE ) + mA*nA - 1 ) / ( mA * nA ) );
1469
lB = incb * ( ( ATL_DivBySize( LCSIZE ) + mB*nB - 1 ) / ( mB * nB ) );
1470
lC = incc * ( ( ATL_DivBySize( LCSIZE ) + M * N - 1 ) / ( M * N ) );
1472
A0 = (TYPE *)malloc( ATL_MulBySize( lA ) );
1473
B0 = (TYPE *)malloc( ATL_MulBySize( lB ) );
1474
C0 = (TYPE *)malloc( ATL_MulBySize( lC ) );
1476
if( ( A0 == NULL ) || ( B0 == NULL ) || ( C0 == NULL ) )
1478
if( A0 ) free( A0 );
1479
if( B0 ) free( B0 );
1480
if( C0 ) free( C0 );
1484
A = A0; stA = A0 + ( lA SHIFT );
1485
B = B0; stB = B0 + ( lB SHIFT );
1486
C = C0; stC = C0 + ( lC SHIFT );
1492
*beta = *BETA; beta [1] = BETA[1];
1493
*nbeta = -(*BETA); nbeta[1] = -BETA[1];
1496
Aseed = mA * nA + 513 * 7 + 90;
1497
Bseed = mB * nB + 127 * 50 + 77;
1498
Cseed = M * N + 101 * 2 + 53;
1500
reps = ( MEGA * MFLOP ) / flops;
1502
* Generate the random data and time the trusted routine
1504
bet = beta; a = A; b = B; c = C;
1506
Mjoin( PATL, gegen )( lA, 1, A0, lA, Aseed );
1507
Mjoin( PATL, gegen )( lB, 1, B0, lB, Bseed );
1508
Mjoin( PATL, gegen )( lC, 1, C0, lC, Cseed );
1511
for( ir = reps; ir; ir-- )
1513
trusted_gemm( TRANSA, TRANSB, M, N, K, ALPHA, a, LDA, b, LDB,
1514
(SCALAR)(bet), c, LDC );
1515
a += inca; if( a == stA ) { a = A; }
1516
b += incb; if( b == stB ) { b = B; }
1518
if( c == stC ) { c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1520
ttrust = time00() - t0;
1521
if( ttrust > 0.0 ) mftrust = ( reps * flops ) / ( MEGA * ttrust );
1523
ttrust /= reps; *TTRUST0 = ttrust; *MFTRUST0 = mftrust;
1525
* Generate the random data and time the tested routine
1527
bet = beta; a = A; b = B; c = C;
1529
Mjoin( PATL, gegen )( lA, 1, A0, lA, Aseed );
1530
Mjoin( PATL, gegen )( lB, 1, B0, lB, Bseed );
1531
Mjoin( PATL, gegen )( lC, 1, C0, lC, Cseed );
1534
for( ir = reps; ir; ir-- )
1536
test_gemm( TRANSA, TRANSB, M, N, K, ALPHA, a, LDA, b, LDB,
1537
(SCALAR)(bet), c, LDC );
1538
a += inca; if( a == stA ) { a = A; }
1539
b += incb; if( b == stB ) { b = B; }
1541
if( c == stC ) { c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1543
ttest = time00() - t0;
1544
if( ttest > 0.0 ) mftest = ( reps * flops ) / ( MEGA * ttest );
1546
ttest /= reps; *TTEST0 = ttest; *MFTEST0 = mftest;
1548
* release the memory and exit
1559
const enum LVL3_ROUT ROUT,
1562
const enum ATLAS_SIDE SIDE,
1563
const enum ATLAS_UPLO UPLO,
1578
double flops, ttrust, ttest, mftrust, mftest, t0;
1579
TYPE resid = ATL_rzero;
1581
TYPE bet, beta, nbeta;
1583
TYPE *bet, beta[2], nbeta[2];
1585
TYPE * a, * stA, *b, * stB, * c, * stC, * A,
1586
* A0 = NULL, * B, * B0 = NULL, * C, * C0 = NULL;
1587
unsigned long ir, reps;
1588
int inca, incb, incc, lA, lB, lC, nA, passed, Aseed,
1591
flops = opbl3( ROUT, M, N, ( SIDE == AtlasLeft ? -1 : 1 ) );
1593
if( ( MEGA * MFLOP <= flops ) || ( TEST ) )
1595
resid = symmtst( ROUT, TEST, SIDE, UPLO, M, N, ALPHA, LDA, LDB, BETA,
1596
LDC, EPSILON, TTRUST0, TTEST0, MFTRUST0, MFTEST0 );
1597
if( resid > THRESH ) (void) fprintf( stderr, " resid=%f\n", resid );
1599
if( resid < ATL_rzero ) passed = -1;
1600
else passed = ( resid < THRESH );
1602
if( MEGA * MFLOP <= flops ) return( passed );
1604
if( SIDE == AtlasLeft ) { nA = M; } else { nA = N; }
1606
inca = LDA * ( nA SHIFT );
1607
incb = LDB * ( N SHIFT );
1608
incc = LDC * ( N SHIFT );
1610
lA = inca * ( ( ATL_DivBySize( LCSIZE ) + nA*nA - 1 ) / ( nA * nA ) );
1611
lB = incb * ( ( ATL_DivBySize( LCSIZE ) + M * N - 1 ) / ( M * N ) );
1612
lC = incc * ( ( ATL_DivBySize( LCSIZE ) + M * N - 1 ) / ( M * N ) );
1614
A0 = (TYPE *)malloc( ATL_MulBySize( lA ) );
1615
B0 = (TYPE *)malloc( ATL_MulBySize( lB ) );
1616
C0 = (TYPE *)malloc( ATL_MulBySize( lC ) );
1618
if( ( A0 == NULL ) || ( B0 == NULL ) || ( C0 == NULL ) )
1620
if( A0 ) free( A0 );
1621
if( B0 ) free( B0 );
1622
if( C0 ) free( C0 );
1626
A = A0; stA = A0 + ( lA SHIFT );
1627
B = B0; stB = B0 + ( lB SHIFT );
1628
C = C0; stC = C0 + ( lC SHIFT );
1634
*beta = *BETA; beta [1] = BETA[1];
1635
*nbeta = -(*BETA); nbeta[1] = -BETA[1];
1638
Aseed = nA * nA + 513 * 7 + 90;
1639
Bseed = M * N + 127 * 50 + 77;
1640
Cseed = M * N + 101 * 2 + 53;
1642
reps = ( MEGA * MFLOP ) / flops;
1644
* Generate the random data and time the trusted routine
1646
bet = beta; a = A; b = B; c = C;
1648
Mjoin( PATL, gegen )( lA, 1, A0, lA, Aseed );
1649
Mjoin( PATL, gegen )( lB, 1, B0, lB, Bseed );
1650
Mjoin( PATL, gegen )( lC, 1, C0, lC, Cseed );
1654
for( ir = reps; ir; ir-- )
1656
trusted_symm( SIDE, UPLO, M, N, ALPHA, a, LDA, b, LDB, (SCALAR)(bet),
1658
a += inca; if( a == stA ) { a = A; }
1659
b += incb; if( b == stB ) { b = B; }
1661
if( c == stC ) { c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1663
ttrust = time00() - t0;
1668
for( ir = reps; ir; ir-- )
1670
trusted_symm( SIDE, UPLO, M, N, ALPHA, a, LDA, b, LDB, (SCALAR)(bet),
1672
a += inca; if( a == stA ) { a = A; }
1673
b += incb; if( b == stB ) { b = B; }
1676
{ c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1678
ttrust = time00() - t0;
1680
else /* if( ROUT == HEMM ) */
1683
for( ir = reps; ir; ir-- )
1685
trusted_hemm( SIDE, UPLO, M, N, ALPHA, a, LDA, b, LDB, (SCALAR)(bet),
1687
a += inca; if( a == stA ) { a = A; }
1688
b += incb; if( b == stB ) { b = B; }
1691
{ c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1693
ttrust = time00() - t0;
1696
if( ttrust > 0.0 ) mftrust = ( reps * flops ) / ( MEGA * ttrust );
1698
ttrust /= reps; *TTRUST0 = ttrust; *MFTRUST0 = mftrust;
1700
* Generate the random data and time the tested routine
1702
bet = beta; a = A; b = B; c = C;
1704
Mjoin( PATL, gegen )( lA, 1, A0, lA, Aseed );
1705
Mjoin( PATL, gegen )( lB, 1, B0, lB, Bseed );
1706
Mjoin( PATL, gegen )( lC, 1, C0, lC, Cseed );
1710
for( ir = reps; ir; ir-- )
1712
test_symm( SIDE, UPLO, M, N, ALPHA, a, LDA, b, LDB, (SCALAR)(bet),
1714
a += inca; if( a == stA ) { a = A; }
1715
b += incb; if( b == stB ) { b = B; }
1717
if( c == stC ) { c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1719
ttest = time00() - t0;
1724
for( ir = reps; ir; ir-- )
1726
test_symm( SIDE, UPLO, M, N, ALPHA, a, LDA, b, LDB, (SCALAR)(bet),
1728
a += inca; if( a == stA ) { a = A; }
1729
b += incb; if( b == stB ) { b = B; }
1732
{ c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1734
ttest = time00() - t0;
1736
else /* if( ROUT == HEMM ) */
1739
for( ir = reps; ir; ir-- )
1741
test_hemm( SIDE, UPLO, M, N, ALPHA, a, LDA, b, LDB, (SCALAR)(bet),
1743
a += inca; if( a == stA ) { a = A; }
1744
b += incb; if( b == stB ) { b = B; }
1747
{ c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1749
ttest = time00() - t0;
1752
if( ttest > 0.0 ) mftest = ( reps * flops ) / ( MEGA * ttest );
1754
ttest /= reps; *TTEST0 = ttest; *MFTEST0 = mftest;
1756
* release the memory and exit
1767
const enum LVL3_ROUT ROUT,
1770
const enum ATLAS_UPLO UPLO,
1771
const enum ATLAS_TRANS TRANS,
1786
double flops, ttrust, ttest, mftrust, mftest, t0;
1787
TYPE resid = ATL_rzero;
1789
TYPE bet, beta, nbeta;
1791
TYPE *bet, beta[2], nbeta[2];
1793
TYPE * a, * stA, *b, * stB, * c, * stC, * A,
1794
* A0 = NULL, * B, * B0 = NULL, * C, * C0 = NULL;
1795
unsigned long ir, reps;
1796
int inca, incb, incc, lA, lB, lC, mAB, nAB, passed,
1797
Aseed, Bseed, Cseed;
1798
enum ATLAS_TRANS ta;
1800
if( ( MEGA * MFLOP <= ( flops = opbl3( ROUT, N, 0, K ) ) ) || ( TEST ) )
1802
resid = syr2ktst( ROUT, TEST, UPLO, TRANS, N, K, ALPHA, LDA, LDB, BETA,
1803
LDC, EPSILON, TTRUST0, TTEST0, MFTRUST0, MFTEST0 );
1804
if( resid > THRESH ) (void) fprintf( stderr, " resid=%f\n", resid );
1806
if( resid < ATL_rzero ) passed = -1;
1807
else passed = ( resid < THRESH );
1809
if( MEGA * MFLOP <= flops ) return( passed );
1811
if( TRANS == AtlasNoTrans )
1812
{ ta = TRANS; mAB = N; nAB = K; }
1814
{ ta = ( ROUT == SYR2K ? AtlasTrans : AtlasConjTrans ); mAB = K; nAB = N; }
1816
inca = LDA * ( nAB SHIFT );
1817
incb = LDB * ( nAB SHIFT );
1818
incc = LDC * ( N SHIFT );
1820
lA = inca * ( ( ATL_DivBySize( LCSIZE ) + mAB*nAB - 1 ) / ( mAB * nAB ) );
1821
lB = incb * ( ( ATL_DivBySize( LCSIZE ) + mAB*nAB - 1 ) / ( mAB * nAB ) );
1822
lC = incc * ( ( ATL_DivBySize( LCSIZE ) + N * N - 1 ) / ( N * N ) );
1824
A0 = (TYPE *)malloc( ATL_MulBySize( lA ) );
1825
B0 = (TYPE *)malloc( ATL_MulBySize( lB ) );
1826
C0 = (TYPE *)malloc( ATL_MulBySize( lC ) );
1828
if( ( A0 == NULL ) || ( B0 == NULL ) || ( C0 == NULL ) )
1830
if( A0 ) free( A0 );
1831
if( B0 ) free( B0 );
1832
if( C0 ) free( C0 );
1836
A = A0; stA = A0 + ( lA SHIFT );
1837
B = B0; stB = B0 + ( lB SHIFT );
1838
C = C0; stC = C0 + ( lC SHIFT );
1844
*beta = *BETA; beta [1] = BETA[1];
1845
*nbeta = -(*BETA); nbeta[1] = -BETA[1];
1848
Aseed = mAB * nAB + 513 * 7 + 90;
1849
Bseed = mAB * nAB + 127 * 50 + 77;
1850
Cseed = N * N + 101 * 2 + 53;
1852
reps = ( MEGA * MFLOP ) / flops;
1854
* Generate the random data and time the trusted routine
1856
bet = beta; a = A; b = B; c = C;
1858
Mjoin( PATL, gegen )( lA, 1, A0, lA, Aseed );
1859
Mjoin( PATL, gegen )( lB, 1, B0, lB, Bseed );
1860
Mjoin( PATL, gegen )( lC, 1, C0, lC, Cseed );
1864
for( ir = reps; ir; ir-- )
1866
trusted_syr2k( UPLO, ta, N, K, ALPHA, a, LDA, b, LDB, (SCALAR)(bet),
1868
a += inca; if( a == stA ) { a = A; }
1869
b += incb; if( b == stB ) { b = B; }
1871
if( c == stC ) { c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1873
ttrust = time00() - t0;
1878
for( ir = reps; ir; ir-- )
1880
trusted_syr2k( UPLO, ta, N, K, ALPHA, a, LDA, b, LDB, (SCALAR)(bet),
1882
a += inca; if( a == stA ) { a = A; }
1883
b += incb; if( b == stB ) { b = B; }
1886
{ c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1888
ttrust = time00() - t0;
1890
else /* if( ROUT == HER2K ) */
1893
for( ir = reps; ir; ir-- )
1895
trusted_her2k( UPLO, ta, N, K, ALPHA, a, LDA, b, LDB, (TYPE)(bet[0]),
1897
a += inca; if( a == stA ) { a = A; }
1898
b += incb; if( b == stB ) { b = B; }
1901
{ c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1903
ttrust = time00() - t0;
1906
if( ttrust > 0.0 ) mftrust = ( reps * flops ) / ( MEGA * ttrust );
1908
ttrust /= reps; *TTRUST0 = ttrust; *MFTRUST0 = mftrust;
1910
* Generate the random data and time the tested routine
1912
bet = beta; a = A; b = B; c = C;
1914
Mjoin( PATL, gegen )( lA, 1, A0, lA, Aseed );
1915
Mjoin( PATL, gegen )( lB, 1, B0, lB, Bseed );
1916
Mjoin( PATL, gegen )( lC, 1, C0, lC, Cseed );
1920
for( ir = reps; ir; ir-- )
1922
test_syr2k( UPLO, ta, N, K, ALPHA, a, LDA, b, LDB, (SCALAR)(bet),
1924
a += inca; if( a == stA ) { a = A; }
1925
b += incb; if( b == stB ) { b = B; }
1927
if( c == stC ) { c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1929
ttest = time00() - t0;
1934
for( ir = reps; ir; ir-- )
1936
test_syr2k( UPLO, ta, N, K, ALPHA, a, LDA, b, LDB, (SCALAR)(bet),
1938
a += inca; if( a == stA ) { a = A; }
1939
b += incb; if( b == stB ) { b = B; }
1942
{ c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1944
ttest = time00() - t0;
1946
else /* if( ROUT == HER2K ) */
1949
for( ir = reps; ir; ir-- )
1951
test_her2k( UPLO, ta, N, K, ALPHA, a, LDA, b, LDB, (TYPE)(bet[0]),
1953
a += inca; if( a == stA ) { a = A; }
1954
b += incb; if( b == stB ) { b = B; }
1957
{ c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
1959
ttest = time00() - t0;
1962
if( ttest > 0.0 ) mftest = ( reps * flops ) / ( MEGA * ttest );
1964
ttest /= reps; *TTEST0 = ttest; *MFTEST0 = mftest;
1966
* release the memory and exit
1977
const enum LVL3_ROUT ROUT,
1980
const enum ATLAS_UPLO UPLO,
1981
const enum ATLAS_TRANS TRANS,
1995
double flops, ttrust, ttest, mftrust, mftest, t0;
1996
TYPE resid = ATL_rzero;
1998
TYPE bet, beta, nbeta;
2000
TYPE *bet, beta[2], nbeta[2];
2002
TYPE * a, * stA, * c, * stC, * A, * A0 = NULL,
2004
unsigned long ir, reps;
2005
int inca, incc, lA, lC, mA, nA, passed, Aseed, Cseed;
2006
enum ATLAS_TRANS ta;
2008
if( ( MEGA * MFLOP <= ( flops = opbl3( ROUT, N, 0, K ) ) ) || ( TEST ) )
2010
resid = syrktst( ROUT, TEST, UPLO, TRANS, N, K, ALPHA, LDA, BETA,
2011
LDC, EPSILON, TTRUST0, TTEST0, MFTRUST0, MFTEST0 );
2012
if( resid > THRESH ) (void) fprintf( stderr, " resid=%f\n", resid );
2014
if( resid < ATL_rzero ) passed = -1;
2015
else passed = ( resid < THRESH );
2017
if( MEGA * MFLOP <= flops ) return( passed );
2019
if( TRANS == AtlasNoTrans )
2020
{ ta = TRANS; mA = N; nA = K; }
2022
{ ta = ( ROUT == SYRK ? AtlasTrans : AtlasConjTrans ); mA = K; nA = N; }
2024
inca = LDA * ( nA SHIFT );
2025
incc = LDC * ( N SHIFT );
2027
lA = inca * ( ( ATL_DivBySize( LCSIZE ) + mA*nA - 1 ) / ( mA * nA ) );
2028
lC = incc * ( ( ATL_DivBySize( LCSIZE ) + N * N - 1 ) / ( N * N ) );
2030
A0 = (TYPE *)malloc( ATL_MulBySize( lA ) );
2031
C0 = (TYPE *)malloc( ATL_MulBySize( lC ) );
2033
if( ( A0 == NULL ) || ( C0 == NULL ) )
2034
{ if( A0 ) free( A0 ); if( C0 ) free( C0 ); return( -1 ); }
2036
A = A0; stA = A0 + ( lA SHIFT );
2037
C = C0; stC = C0 + ( lC SHIFT );
2043
*beta = *BETA; beta [1] = BETA[1];
2044
*nbeta = -(*BETA); nbeta[1] = -BETA[1];
2047
Aseed = mA * nA + 513 * 7 + 90;
2048
Cseed = N * N + 101 * 2 + 53;
2050
reps = ( MEGA * MFLOP ) / flops;
2052
* Generate the random data and time the trusted routine
2054
bet = beta; a = A; c = C;
2056
Mjoin( PATL, gegen )( lA, 1, A0, lA, Aseed );
2057
Mjoin( PATL, gegen )( lC, 1, C0, lC, Cseed );
2061
for( ir = reps; ir; ir-- )
2063
trusted_syrk( UPLO, ta, N, K, ALPHA, a, LDA, (SCALAR)(bet), c, LDC );
2064
a += inca; if( a == stA ) { a = A; }
2066
if( c == stC ) { c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
2068
ttrust = time00() - t0;
2073
for( ir = reps; ir; ir-- )
2075
trusted_syrk( UPLO, ta, N, K, ALPHA, a, LDA, (SCALAR)(bet), c, LDC );
2076
a += inca; if( a == stA ) { a = A; }
2079
{ c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
2081
ttrust = time00() - t0;
2083
else /* if( ROUT == HERK ) */
2086
for( ir = reps; ir; ir-- )
2088
trusted_herk( UPLO, ta, N, K, (TYPE)(ALPHA[0]), a, LDA,
2089
(TYPE)(bet[0]), c, LDC );
2090
a += inca; if( a == stA ) { a = A; }
2093
{ c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
2095
ttrust = time00() - t0;
2098
if( ttrust > 0.0 ) mftrust = ( reps * flops ) / ( MEGA * ttrust );
2100
ttrust /= reps; *TTRUST0 = ttrust; *MFTRUST0 = mftrust;
2102
* Generate the random data and time the tested routine
2104
bet = beta; a = A; c = C;
2106
Mjoin( PATL, gegen )( lA, 1, A0, lA, Aseed );
2107
Mjoin( PATL, gegen )( lC, 1, C0, lC, Cseed );
2111
for( ir = reps; ir; ir-- )
2113
test_syrk( UPLO, ta, N, K, ALPHA, a, LDA, (SCALAR)(bet), c, LDC );
2114
a += inca; if( a == stA ) { a = A; }
2116
if( c == stC ) { c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
2118
ttest = time00() - t0;
2123
for( ir = reps; ir; ir-- )
2125
test_syrk( UPLO, ta, N, K, ALPHA, a, LDA, (SCALAR)(bet), c, LDC );
2126
a += inca; if( a == stA ) { a = A; }
2129
{ c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
2131
ttest = time00() - t0;
2133
else /* if( ROUT == HERK ) */
2136
for( ir = reps; ir; ir-- )
2138
test_herk( UPLO, ta, N, K, (TYPE)(ALPHA[0]), a, LDA,
2139
(TYPE)(bet[0]), c, LDC );
2140
a += inca; if( a == stA ) { a = A; }
2143
{ c = C; if( bet == beta ) bet = nbeta; else bet = beta; }
2145
ttest = time00() - t0;
2148
if( ttest > 0.0 ) mftest = ( reps * flops ) / ( MEGA * ttest );
2150
ttest /= reps; *TTEST0 = ttest; *MFTEST0 = mftest;
2152
* release the memory and exit
2162
const enum LVL3_ROUT ROUT,
2165
const enum ATLAS_SIDE SIDE,
2166
const enum ATLAS_UPLO UPLO,
2167
const enum ATLAS_TRANS TRANS,
2168
const enum ATLAS_DIAG DIAG,
2181
double flops, ttrust, ttest, mftrust, mftest, t0;
2182
TYPE resid = ATL_rzero;
2183
TYPE * a, * stA, * b, * stB, * A, * A0 = NULL,
2185
unsigned long ir, reps;
2186
int inca, incb, lA, lB, nA, passed, Aseed, Bseed;
2188
flops = opbl3( ROUT, M, N, ( SIDE == AtlasLeft ? -1 : 1 ) );
2190
if( ( MEGA * MFLOP <= flops ) || ( TEST ) )
2194
resid = trmmtst( ROUT, TEST, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA,
2195
LDA, LDB, EPSILON, TTRUST0, TTEST0, MFTRUST0,
2200
resid = trsmtst( ROUT, TEST, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA,
2201
LDA, LDB, EPSILON, TTRUST0, TTEST0, MFTRUST0,
2204
if( resid > THRESH ) (void) fprintf( stderr, " resid=%f\n", resid );
2206
if( resid < ATL_rzero ) passed = -1;
2207
else passed = ( resid < THRESH );
2209
if( MEGA * MFLOP <= flops ) return( passed );
2211
if( SIDE == AtlasLeft ) { nA = M; } else { nA = N; }
2213
inca = LDA * ( nA SHIFT );
2214
incb = LDB * ( N SHIFT );
2216
lA = inca * ( ( ATL_DivBySize( LCSIZE ) + nA*nA - 1 ) / ( nA * nA ) );
2217
lB = incb * ( ( ATL_DivBySize( LCSIZE ) + M * N - 1 ) / ( M * N ) );
2219
A0 = (TYPE *)malloc( ATL_MulBySize( lA ) );
2220
B0 = (TYPE *)malloc( ATL_MulBySize( lB ) );
2222
if( ( A0 == NULL ) || ( B0 == NULL ) )
2223
{ if( A0 ) free( A0 ); if( B0 ) free( B0 ); return( -1 ); }
2225
A = A0; stA = A0 + ( lA SHIFT );
2226
B = B0; stB = B0 + ( lB SHIFT );
2228
Aseed = nA * nA + 513 * 7 + 90;
2229
Bseed = M * N + 101 * 2 + 53;
2231
reps = ( MEGA * MFLOP ) / flops;
2233
* Generate the random data and time the trusted routine
2237
Mjoin( PATL, gegen )( lA, 1, A0, lA, Aseed );
2238
Mjoin( PATL, gegen )( lB, 1, B0, lB, Bseed );
2243
for( ir = reps; ir; ir-- )
2245
trusted_trmm( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, a, LDA, b, LDB );
2246
a += inca; if( a == stA ) { a = A; }
2247
b += incb; if( b == stB ) { b = B; }
2249
ttrust = time00() - t0;
2251
else /* if( ROUT == TRSM ) */
2253
do { trddom( UPLO, nA, a, LDA ); a += inca; } while( a != stA ); a = A;
2256
for( ir = reps; ir; ir-- )
2258
trusted_trsm( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, a, LDA, b, LDB );
2259
a += inca; if( a == stA ) { a = A; }
2260
b += incb; if( b == stB ) { b = B; }
2262
ttrust = time00() - t0;
2264
if( ttrust > 0.0 ) mftrust = ( reps * flops ) / ( MEGA * ttrust );
2266
ttrust /= reps; *TTRUST0 = ttrust; *MFTRUST0 = mftrust;
2268
* Generate the random data and time the tested routine
2272
Mjoin( PATL, gegen )( lA, 1, A0, lA, Aseed );
2273
Mjoin( PATL, gegen )( lB, 1, B0, lB, Bseed );
2278
for( ir = reps; ir; ir-- )
2280
test_trmm( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, a, LDA, b, LDB );
2281
a += inca; if( a == stA ) { a = A; }
2282
b += incb; if( b == stB ) { b = B; }
2284
ttest = time00() - t0;
2286
else /* if( ROUT == TRSM ) */
2288
do { trddom( UPLO, nA, a, LDA ); a += inca; } while( a != stA ); a = A;
2291
for( ir = reps; ir; ir-- )
2293
test_trsm( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, a, LDA, b, LDB );
2294
a += inca; if( a == stA ) { a = A; }
2295
b += incb; if( b == stB ) { b = B; }
2297
ttest = time00() - t0;
2299
if( ttest > 0.0 ) mftest = ( reps * flops ) / ( MEGA * ttest );
2301
ttest /= reps; *TTEST0 = ttest; *MFTEST0 = mftest;
2303
* release the memory and exit
2311
* =====================================================================
2313
* =====================================================================
2317
const enum LVL3_ROUT ROUT,
2322
const enum ATLAS_TRANS * TRANSA,
2324
const enum ATLAS_TRANS * TRANSB,
2335
const TYPE * ALPHAS,
2343
double t0, mftrust, mftest, ttrust, ttest;
2344
char * pass, * form;
2345
int al, be, ires, k, kk, lda, ldb, ldc, m, mm, n,
2346
nn, ta, tb, ksame=0, msame=0;
2347
char ctrana, ctranb;
2349
if( M0 == -1 ) { msame = 1; M0 = MN = MINC = NN; }
2350
if( K0 == -1 ) { ksame = 1; K0 = KN = KINC = NN; }
2353
(void) fprintf( stdout, "\n%s%s%s\n",
2354
"--------------------------------- ", "GEMM",
2355
" ----------------------------------" );
2356
(void) fprintf( stdout, "%s%s",
2357
"TST# A B M N K ALPHA LDA LDB BETA",
2358
" LDC TIME MFLOP SpUp TEST\n" );
2359
(void) fprintf( stdout, "%s%s",
2360
"==== = = ==== ==== ==== ===== ==== ==== =====",
2361
" ==== ===== ===== ==== =====\n" );
2362
form = "%4d %c %c %4d %4d %4d %5.1f %4d %4d %5.1f %4d %5.2f %5.1f %4.2f %5s\n";
2364
(void) fprintf( stdout, "\n%s%s%s\n",
2365
"----------------------------------- ", "GEMM",
2366
" ---------------------------------------" );
2367
(void) fprintf( stdout, "%s%s",
2368
"TST# A B M N K ALPHA LDA LDB",
2369
" BETA LDC TIME MFLOP SpUp TEST\n" );
2370
(void) fprintf( stdout, "%s%s",
2371
"==== = = ==== ==== ==== ==== ==== ==== ====",
2372
" ==== ==== ==== ==== ===== ==== =====\n" );
2374
"%4d %c %c %4d %4d %4d %4.1f %4.1f %4d %4d %4.1f %4.1f %4d %4.1f %5.1f %4.2f %5s\n";
2377
for( mm = M0; mm <= MN; mm += MINC )
2379
for( nn = N0; nn <= NN; nn += NINC )
2381
n = nn; if( msame ) { m = nn; } else { m = mm; }
2382
if( LDA_IS_M ) ldc = Mmax( 1, m ); else ldc = MN;
2384
for( kk = K0; kk <= KN; kk += KINC )
2386
if( ksame ) { k = nn; } else { k = kk; }
2388
for( ta = 0; ta < NTRANA; ta++ )
2390
if( TRANSA[ta] == AtlasNoTrans ) ctrana = 'N';
2391
else if( TRANSA[ta] == AtlasTrans ) ctrana = 'T';
2394
if( TRANSA[ta] == AtlasNoTrans )
2395
{ if( LDA_IS_M ) lda = Mmax( 1, m ); else lda = MN; }
2397
{ if( LDA_IS_M ) lda = Mmax( 1, k ); else lda = KN; }
2399
for( tb = 0; tb < NTRANB; tb++ )
2401
if( TRANSB[tb] == AtlasNoTrans ) ctranb = 'N';
2402
else if( TRANSB[tb] == AtlasTrans ) ctranb = 'T';
2405
if( TRANSB[tb] == AtlasNoTrans )
2406
{ if( LDA_IS_M ) ldb = Mmax( 1, k ); else ldb = KN; }
2408
{ if( LDA_IS_M ) ldb = Mmax( 1, n ); else ldb = NN; }
2410
for( al = 0; al < NALPHA; al++ )
2412
for( be = 0; be < NBETA; be++ )
2415
ires = gemmcase( ROUT, TEST, MFLOP, TRANSA[ta],
2416
TRANSB[tb], m, n, k, ALPHAS[al], lda,
2417
ldb, BETAS[be], ldc, EPSILON, &ttrust,
2418
&ttest, &mftrust, &mftest );
2420
ires = gemmcase( ROUT, TEST, MFLOP, TRANSA[ta],
2421
TRANSB[tb], m, n, k, ALPHAS+2*al, lda,
2422
ldb, BETAS+2*be, ldc, EPSILON, &ttrust,
2423
&ttest, &mftrust, &mftest );
2425
if( !( TEST ) ) pass = "SKIP ";
2426
else if( ires < 0 ) pass = "NoMEM";
2427
else if( ires ) pass = "PASS ";
2428
else pass = "FAIL ";
2430
if( ires > 0 ) (*NPASSED)++;
2432
if( ( mftrust > 0.0 ) && ( mftest > 0.0 ) )
2433
t0 = mftest / mftrust;
2436
(void) fprintf( stdout, form, *NTESTS, ctrana, ctranb,
2437
m, n, k, ALPHAS[al], lda, ldb,
2438
BETAS[be], ldc, ttrust, mftrust, 1.0,
2440
(void) fprintf( stdout, form, *NTESTS, ctrana, ctranb,
2441
m, n, k, ALPHAS[al], lda, ldb,
2442
BETAS[be], ldc, ttest, mftest, t0,
2445
(void) fprintf( stdout, form, *NTESTS, ctrana, ctranb,
2446
m, n, k, ALPHAS[2*al], ALPHAS[2*al+1],
2447
lda, ldb, BETAS [2*be], BETAS [2*be+1],
2448
ldc, ttrust, mftrust, 1.0, "-----" );
2449
(void) fprintf( stdout, form, *NTESTS, ctrana, ctranb,
2450
m, n, k, ALPHAS[2*al], ALPHAS[2*al+1],
2451
lda, ldb, BETAS [2*be], BETAS [2*be+1],
2452
ldc, ttest, mftest, t0, pass );
2466
const enum LVL3_ROUT ROUT,
2471
const enum ATLAS_SIDE * SIDES,
2473
const enum ATLAS_UPLO * UPLOS,
2481
const TYPE * ALPHAS,
2489
double t0, mftrust, mftest, ttrust, ttest;
2490
char * pass, * form;
2491
int al, be, ires, lda, ldb, ldc, m, msame=0, mm, n,
2495
if( M0 == -1 ) { msame = 1; M0 = MN = MINC = NN; }
2498
(void) fprintf( stdout, "\n%s%s%s\n",
2499
"-------------------------------- ", "SYMM",
2500
" --------------------------------" );
2501
(void) fprintf( stdout, "%s%s",
2502
"TST# S U M N ALPHA LDA LDB BETA LDC",
2503
" TIME MFLOP SpUp TEST\n" );
2504
(void) fprintf( stdout, "%s%s",
2505
"==== = = ==== ==== ===== ==== ==== ===== ====",
2506
" ====== ===== ===== =====\n" );
2507
form = "%4d %c %c %4d %4d %5.1f %4d %4d %5.1f %4d %6.2f %5.1f %5.2f %5s\n";
2511
(void) fprintf( stdout, "\n%s%s%s\n",
2512
"------------------------------------ ", "SYMM",
2513
" ------------------------------------" );
2514
(void) fprintf( stdout, "%s%s",
2515
"TST# S U M N ALPHA LDA LDB BETA",
2516
" LDC TIME MFLOP SpUp TEST\n" );
2517
(void) fprintf( stdout, "%s%s",
2518
"==== = = ==== ==== ==== ==== ==== ==== ==== ====",
2519
" ==== ====== ===== ===== =====\n" );
2521
"%4d %c %c %4d %4d %4.1f %4.1f %4d %4d %4.1f %4.1f %4d %6.2f %5.1f %5.2f %5s\n";
2525
(void) fprintf( stdout, "\n%s%s%s\n",
2526
"------------------------------------ ", "HEMM",
2527
" ------------------------------------" );
2528
(void) fprintf( stdout, "%s%s",
2529
"TST# S U M N ALPHA LDA LDB BETA",
2530
" LDC TIME MFLOP SpUp TEST\n" );
2531
(void) fprintf( stdout, "%s%s",
2532
"==== = = ==== ==== ==== ==== ==== ==== ==== ====",
2533
" ==== ====== ===== ===== =====\n" );
2535
"%4d %c %c %4d %4d %4.1f %4.1f %4d %4d %4.1f %4.1f %4d %6.2f %5.1f %5.2f %5s\n";
2538
for( mm = M0; mm <= MN; mm += MINC )
2540
for( nn = N0; nn <= NN; nn += NINC )
2542
n = nn; if( msame ) { m = nn; } else { m = mm; }
2543
if( LDA_IS_M ) { ldb = ldc = Mmax( 1, m ); } else { ldb = ldc = MN; }
2545
for( si = 0; si < NSIDE; si++ )
2547
if( SIDES[si] == AtlasLeft )
2548
{ cside = 'L'; if( LDA_IS_M ) lda = Mmax( 1, m ); else lda = MN; }
2550
{ cside = 'R'; if( LDA_IS_M ) lda = Mmax( 1, n ); else lda = NN; }
2552
for( up = 0; up < NUPLO; up++ )
2554
if( UPLOS[up] == AtlasLower ) cuplo = 'L';
2557
for( al = 0; al < NALPHA; al++ )
2559
for( be = 0; be < NBETA; be++ )
2562
ires = symmcase( ROUT, TEST, MFLOP, SIDES[si], UPLOS[up],
2563
m, n, ALPHAS[al], lda, ldb, BETAS[be],
2564
ldc, EPSILON, &ttrust, &ttest, &mftrust,
2567
ires = symmcase( ROUT, TEST, MFLOP, SIDES[si], UPLOS[up],
2568
m, n, ALPHAS+2*al, lda, ldb, BETAS+2*be,
2569
ldc, EPSILON, &ttrust, &ttest, &mftrust,
2572
if( !( TEST ) ) pass = "SKIP ";
2573
else if( ires < 0 ) pass = "NoMEM";
2574
else if( ires ) pass = "PASS ";
2575
else pass = "FAIL ";
2577
if( ires > 0 ) (*NPASSED)++;
2579
if( ( mftrust > 0.0 ) && ( mftest > 0.0 ) )
2580
t0 = mftest / mftrust;
2583
(void) fprintf( stdout, form, *NTESTS, cside, cuplo, m, n,
2584
ALPHAS[al], lda, ldb, BETAS[be], ldc,
2585
ttrust, mftrust, 1.0, "-----" );
2586
(void) fprintf( stdout, form, *NTESTS, cside, cuplo, m, n,
2587
ALPHAS[al], lda, ldb, BETAS[be], ldc,
2588
ttest, mftest, t0, pass );
2590
(void) fprintf( stdout, form, *NTESTS, cside, cuplo, m, n,
2591
ALPHAS[2*al], ALPHAS[2*al+1], lda, ldb,
2592
BETAS[2*be], BETAS[2*be+1], ldc, ttrust,
2593
mftrust, 1.0, "-----" );
2594
(void) fprintf( stdout, form, *NTESTS, cside, cuplo, m, n,
2595
ALPHAS[2*al], ALPHAS[2*al+1], lda, ldb,
2596
BETAS[2*be], BETAS[2*be+1], ldc, ttest,
2610
const enum LVL3_ROUT ROUT,
2615
const enum ATLAS_UPLO * UPLOS,
2617
const enum ATLAS_TRANS * TRANS,
2625
const TYPE * ALPHAS,
2633
double t0, mftrust, mftest, ttrust, ttest;
2634
char * pass, * form;
2635
int al, be, ires, k, kk, ksame=0, lda, ldb, ldc, n,
2639
if( K0 == -1 ) { ksame = 1; K0 = KN = KINC = NN; }
2642
(void) fprintf( stdout, "\n%s%s%s\n",
2643
"-------------------------------- ", "SYR2K",
2644
" --------------------------------" );
2645
(void) fprintf( stdout, "%s%s",
2646
"TST# U T N K ALPHA LDA LDB BETA LDC TIME",
2647
" MFLOP SpUp TEST\n" );
2648
(void) fprintf( stdout, "%s%s",
2649
"==== = = ==== ==== ===== ==== ==== ===== ==== ======",
2650
" ====== ===== =====\n" );
2651
form = "%4d %c %c %4d %4d %5.1f %4d %4d %5.1f %4d %6.2f %6.1f %5.2f %5s\n";
2655
(void) fprintf( stdout, "\n%s%s%s\n",
2656
"------------------------------------ ", "SYR2K",
2657
" ------------------------------------" );
2658
(void) fprintf( stdout, "%s%s",
2659
"TST# U T N K ALPHA LDA LDB BETA",
2660
" LDC TIME MFLOP SpUp TEST\n" );
2661
(void) fprintf( stdout, "%s%s",
2662
"==== = = ==== ==== ==== ==== ==== ==== ==== ====",
2663
" ==== ====== ====== ===== =====\n" );
2665
"%4d %c %c %4d %4d %4.1f %4.1f %4d %4d %4.1f %4.1f %4d %6.2f %6.1f %5.2f %5s\n";
2669
(void) fprintf( stdout, "\n%s%s%s\n",
2670
"----------------------------------- ", "HER2K",
2671
" -----------------------------------" );
2672
(void) fprintf( stdout, "%s%s",
2673
"TST# U T N K ALPHA LDA LDB BETA",
2674
" LDC TIME MFLOP SpUp TEST\n" );
2675
(void) fprintf( stdout, "%s%s",
2676
"==== = = ==== ==== ===== ===== ==== ==== =====",
2677
" ==== ====== ====== ===== =====\n" );
2679
"%4d %c %c %4d %4d %5.1f %5.1f %4d %4d %5.1f %4d %6.2f %6.1f %5.2f %5s\n";
2682
for( nn = N0; nn <= NN; nn += NINC )
2684
n = nn; if( LDA_IS_M ) ldc = Mmax( 1, n ); else ldc = NN;
2686
for( kk = K0; kk <= KN; kk += KINC )
2688
if( ksame ) { k = nn; } else { k = kk; }
2690
for( up = 0; up < NUPLO; up++ )
2692
if( UPLOS[up] == AtlasUpper ) cuplo = 'U';
2695
for( ta = 0; ta < NTRAN; ta++ )
2698
if( TRANS[ta] == AtlasNoTrans )
2701
if( LDA_IS_M ) lda = ldb = n; else lda = ldb = NN;
2706
if( LDA_IS_M ) lda = ldb = k; else lda = ldb = KN;
2711
if( TRANS[ta] == AtlasNoTrans )
2714
if( LDA_IS_M ) lda = ldb = n; else lda = ldb = NN;
2719
if( LDA_IS_M ) lda = ldb = k; else lda = ldb = KN;
2724
if( TRANS[ta] == AtlasNoTrans )
2727
if( LDA_IS_M ) lda = ldb = n; else lda = ldb = NN;
2732
if( LDA_IS_M ) lda = ldb = k; else lda = ldb = KN;
2736
for( al = 0; al < NALPHA; al++ )
2738
for( be = 0; be < NBETA; be++ )
2741
ires = syr2kcase( ROUT, TEST, MFLOP, UPLOS[up], TRANS[ta],
2742
n, k, ALPHAS[al], lda, ldb, BETAS[be],
2743
ldc, EPSILON, &ttrust, &ttest, &mftrust,
2746
ires = syr2kcase( ROUT, TEST, MFLOP, UPLOS[up], TRANS[ta],
2747
n, k, ALPHAS+2*al, lda, ldb, BETAS+2*be,
2748
ldc, EPSILON, &ttrust, &ttest, &mftrust,
2751
if( !( TEST ) ) pass = "SKIP ";
2752
else if( ires < 0 ) pass = "NoMEM";
2753
else if( ires ) pass = "PASS ";
2754
else pass = "FAIL ";
2756
if( ires > 0 ) (*NPASSED)++;
2758
if( ( mftrust > 0.0 ) && ( mftest > 0.0 ) )
2759
t0 = mftest / mftrust;
2762
(void) fprintf( stdout, form, *NTESTS, cuplo, ctran, n, k,
2763
ALPHAS[al], lda, ldb, BETAS[be], ldc,
2764
ttrust, mftrust, 1.0, "-----" );
2765
(void) fprintf( stdout, form, *NTESTS, cuplo, ctran, n, k,
2766
ALPHAS[al], lda, ldb, BETAS[be], ldc,
2767
ttest, mftest, t0, pass );
2771
(void) fprintf( stdout, form, *NTESTS, cuplo, ctran,
2772
n, k, ALPHAS[2*al], ALPHAS[2*al+1],
2773
lda, ldb, BETAS [2*be], BETAS [2*be+1],
2774
ldc, ttrust, mftrust, 1.0, "-----" );
2775
(void) fprintf( stdout, form, *NTESTS, cuplo, ctran,
2776
n, k, ALPHAS[2*al], ALPHAS[2*al+1],
2777
lda, ldb, BETAS [2*be], BETAS [2*be+1],
2778
ldc, ttest, mftest, t0, pass );
2782
(void) fprintf( stdout, form, *NTESTS, cuplo, ctran,
2783
n, k, ALPHAS[2*al], ALPHAS[2*al+1],
2784
lda, ldb, BETAS [2*be], ldc, ttrust,
2785
mftrust, 1.0, "-----" );
2786
(void) fprintf( stdout, form, *NTESTS, cuplo, ctran,
2787
n, k, ALPHAS[2*al], ALPHAS[2*al+1],
2788
lda, ldb, BETAS [2*be], ldc, ttest,
2803
const enum LVL3_ROUT ROUT,
2808
const enum ATLAS_UPLO * UPLOS,
2810
const enum ATLAS_TRANS * TRANS,
2818
const TYPE * ALPHAS,
2826
double t0, mftrust, mftest, ttrust, ttest;
2827
char * pass, * form;
2828
int al, be, ires, k, kk, ksame=0, lda, ldc, n, nn,
2832
if( K0 == -1 ) { ksame = 1; K0 = KN = KINC = NN; }
2835
(void) fprintf( stdout, "\n%s%s%s\n",
2836
"------------------------------ ", "SYRK",
2837
" ------------------------------" );
2838
(void) fprintf( stdout, "%s%s",
2839
"TST# U T N K ALPHA LDA BETA LDC TIME",
2840
" MFLOP SpUp TEST\n" );
2841
(void) fprintf( stdout, "%s%s",
2842
"==== = = ==== ==== ===== ==== ===== ==== ======",
2843
" ====== ===== =====\n" );
2844
form = "%4d %c %c %4d %4d %5.1f %4d %5.1f %4d %6.2f %6.1f %5.2f %5s\n";
2848
(void) fprintf( stdout, "\n%s%s%s\n",
2849
"------------------------------------ ", "SYRK",
2850
" ------------------------------------" );
2851
(void) fprintf( stdout, "%s%s",
2852
"TST# U T N K ALPHA LDA BETA",
2853
" LDC TIME MFLOP SpUp TEST\n" );
2854
(void) fprintf( stdout, "%s%s",
2855
"==== = = ==== ==== ===== ===== ==== ===== =====",
2856
" ==== ====== ====== ===== =====\n" );
2858
"%4d %c %c %4d %4d %5.1f %5.1f %4d %5.1f %5.1f %4d %6.2f %6.1f %5.2f %5s\n";
2862
(void) fprintf( stdout, "\n%s%s%s\n",
2863
"------------------------------ ", "HERK",
2864
" ------------------------------" );
2865
(void) fprintf( stdout, "%s%s",
2866
"TST# U T N K ALPHA LDA BETA",
2867
" LDC TIME MFLOP SpUp TEST\n" );
2868
(void) fprintf( stdout, "%s%s",
2869
"==== = = ==== ==== ===== ==== =====",
2870
" ==== ====== ====== ===== =====\n" );
2871
form = "%4d %c %c %4d %4d %5.1f %4d %5.1f %4d %6.2f %6.1f %5.2f %5s\n";
2874
for( nn = N0; nn <= NN; nn += NINC )
2876
n = nn; if( LDA_IS_M ) ldc = Mmax( 1, n ); else ldc = NN;
2878
for( kk = K0; kk <= KN; kk += KINC )
2880
if( ksame ) { k = nn; } else { k = kk; }
2882
for( up = 0; up < NUPLO; up++ )
2884
if( UPLOS[up] == AtlasUpper ) cuplo = 'U';
2887
for( ta = 0; ta < NTRAN; ta++ )
2890
if( TRANS[ta] == AtlasNoTrans )
2891
{ ctran = 'N'; if( LDA_IS_M ) lda = n; else lda = NN; }
2893
{ ctran = 'T'; if( LDA_IS_M ) lda = k; else lda = KN; }
2897
if( TRANS[ta] == AtlasNoTrans )
2898
{ ctran = 'N'; if( LDA_IS_M ) lda = n; else lda = NN; }
2900
{ ctran = 'T'; if( LDA_IS_M ) lda = k; else lda = KN; }
2904
if( TRANS[ta] == AtlasNoTrans )
2905
{ ctran = 'N'; if( LDA_IS_M ) lda = n; else lda = NN; }
2907
{ ctran = 'C'; if( LDA_IS_M ) lda = k; else lda = KN; }
2910
for( al = 0; al < NALPHA; al++ )
2912
for( be = 0; be < NBETA; be++ )
2915
ires = syrkcase( ROUT, TEST, MFLOP, UPLOS[up], TRANS[ta],
2916
n, k, ALPHAS[al], lda, BETAS[be], ldc,
2917
EPSILON, &ttrust, &ttest, &mftrust,
2920
ires = syrkcase( ROUT, TEST, MFLOP, UPLOS[up], TRANS[ta],
2921
n, k, ALPHAS+2*al, lda, BETAS+2*be, ldc,
2922
EPSILON, &ttrust, &ttest, &mftrust,
2925
if( !( TEST ) ) pass = "SKIP ";
2926
else if( ires < 0 ) pass = "NoMEM";
2927
else if( ires ) pass = "PASS ";
2928
else pass = "FAIL ";
2930
if( ires > 0 ) (*NPASSED)++;
2932
if( ( mftrust > 0.0 ) && ( mftest > 0.0 ) )
2933
t0 = mftest / mftrust;
2936
(void) fprintf( stdout, form, *NTESTS, cuplo, ctran, n, k,
2937
ALPHAS[al], lda, BETAS[be], ldc, ttrust,
2938
mftrust, 1.0, "-----" );
2939
(void) fprintf( stdout, form, *NTESTS, cuplo, ctran, n, k,
2940
ALPHAS[al], lda, BETAS[be], ldc, ttest,
2945
(void) fprintf( stdout, form, *NTESTS, cuplo, ctran,
2946
n, k, ALPHAS[2*al], ALPHAS[2*al+1],
2947
lda, BETAS [2*be], BETAS [2*be+1],
2948
ldc, ttrust, mftrust, 1.0, "-----" );
2949
(void) fprintf( stdout, form, *NTESTS, cuplo, ctran,
2950
n, k, ALPHAS[2*al], ALPHAS[2*al+1],
2951
lda, BETAS [2*be], BETAS [2*be+1],
2952
ldc, ttest, mftest, t0, pass );
2956
(void) fprintf( stdout, form, *NTESTS, cuplo, ctran,
2957
n, k, ALPHAS[2*al], lda, BETAS[2*be],
2958
ldc, ttrust, mftrust, 1.0, "-----" );
2959
(void) fprintf( stdout, form, *NTESTS, cuplo, ctran,
2960
n, k, ALPHAS[2*al], lda, BETAS[2*be],
2961
ldc, ttest, mftest, t0, pass );
2975
const enum LVL3_ROUT ROUT,
2980
const enum ATLAS_SIDE * SIDES,
2982
const enum ATLAS_UPLO * UPLOS,
2984
const enum ATLAS_TRANS * TRANS,
2986
const enum ATLAS_DIAG * DIAGS,
2994
const TYPE * ALPHAS,
3000
double t0, mftrust, mftest, ttrust, ttest;
3001
char * pass, * form;
3002
int al, si, up, ta, di, ires, lda, ldb, m, mm,
3004
char cside, ctran, cdiag, cuplo;
3006
if( M0 == -1 ) { msame = 1; M0 = MN = MINC = NN; }
3010
(void) fprintf( stdout, "\n%s%s%s\n",
3011
"---------------------------", " TRMM ",
3012
"----------------------------" );
3014
(void) fprintf( stdout, "\n%s%s%s\n",
3015
"---------------------------", " TRSM ",
3016
"----------------------------" );
3017
(void) fprintf( stdout, "%s%s",
3018
"TST# S U T D M N ALPHA LDA LDB TIME",
3019
" MFLOP SpUp TEST\n" );
3020
(void) fprintf( stdout, "%s%s",
3021
"==== = = = = ==== ==== ===== ==== ==== =====",
3022
" ===== ==== =====\n" );
3023
form = "%4d %c %c %c %c %4d %4d %5.1f %4d %4d %5.2f %5.1f %4.2f %5s\n";
3026
(void) fprintf( stdout, "\n%s%s%s\n",
3027
"------------------------------", " TRMM ",
3028
"-------------------------------" );
3030
(void) fprintf( stdout, "\n%s%s%s\n",
3031
"------------------------------", " TRSM ",
3032
"-------------------------------" );
3033
(void) fprintf( stdout, "%s%s",
3034
"TST# S U T D M N ALPHA LDA LDB TIME",
3035
" MFLOP SpUp TEST\n" );
3036
(void) fprintf( stdout, "%s%s",
3037
"==== = = = = ==== ==== ===== ===== ==== ==== =====",
3038
" ===== ==== =====\n" );
3039
form = "%4d %c %c %c %c %4d %4d %5.1f %5.1f %4d %4d %5.2f %5.1f %4.2f %5s\n";
3041
for( mm = M0; mm <= MN; mm += MINC )
3043
for( nn = N0; nn <= NN; nn += NINC )
3045
n = nn; if( msame ) { m = nn; } else { m = mm; }
3046
if( LDA_IS_M ) { ldb = Mmax( 1, m ); } else { ldb = MN; }
3048
for( si = 0; si < NSIDE; si++ )
3050
if( SIDES[si] == AtlasLeft )
3051
{ cside = 'L'; if( LDA_IS_M ) lda = Mmax( 1, m ); else lda = MN; }
3053
{ cside = 'R'; if( LDA_IS_M ) lda = Mmax( 1, n ); else lda = NN; }
3055
for( up = 0; up < NUPLO; up++ )
3057
if( UPLOS[up] == AtlasUpper ) cuplo = 'U';
3060
for( ta = 0; ta < NTRAN; ta++ )
3062
if( TRANS[ta] == AtlasNoTrans ) ctran = 'N';
3063
else if( TRANS[ta] == AtlasTrans ) ctran = 'T';
3066
for( di = 0; di < NDIAG; di++ )
3068
if( DIAGS[di] == AtlasUnit ) cdiag = 'U';
3071
for( al = 0; al < NALPHA; al++ )
3074
ires = trxmcase( ROUT, TEST, MFLOP, SIDES[si],
3075
UPLOS[up], TRANS[ta], DIAGS[di],
3076
m, n, ALPHAS[al], lda, ldb, EPSILON,
3077
&ttrust, &ttest, &mftrust, &mftest );
3079
ires = trxmcase( ROUT, TEST, MFLOP, SIDES[si],
3080
UPLOS[up], TRANS[ta], DIAGS[di],
3081
m, n, ALPHAS+2*al, lda, ldb, EPSILON,
3082
&ttrust, &ttest, &mftrust, &mftest );
3084
if( !( TEST ) ) pass = "SKIP ";
3085
else if( ires < 0 ) pass = "NoMEM";
3086
else if( ires ) pass = "PASS ";
3087
else pass = "FAIL ";
3089
if( ires > 0 ) (*NPASSED)++;
3091
if( ( mftrust > 0.0 ) && ( mftest > 0.0 ) )
3092
t0 = mftest / mftrust;
3095
(void) fprintf( stdout, form, *NTESTS, cside, cuplo,
3096
ctran, cdiag, m, n, ALPHAS[al], lda,
3097
ldb, ttrust, mftrust, 1.0, "-----" );
3098
(void) fprintf( stdout, form, *NTESTS, cside, cuplo,
3099
ctran, cdiag, m, n, ALPHAS[al], lda,
3100
ldb, ttest, mftest, t0, pass );
3102
(void) fprintf( stdout, form, *NTESTS, cside, cuplo,
3103
ctran, cdiag, m, n, ALPHAS[2*al],
3104
ALPHAS[2*al+1], lda, ldb, ttrust,
3105
mftrust, 1.0, "-----" );
3106
(void) fprintf( stdout, form, *NTESTS, cside, cuplo,
3107
ctran, cdiag, m, n, ALPHAS[2*al],
3108
ALPHAS[2*al+1], lda, ldb, ttest,
3127
const enum ATLAS_SIDE * SIDES,
3129
const enum ATLAS_UPLO * UPLOS,
3131
const enum ATLAS_TRANS * TRANSA,
3133
const enum ATLAS_TRANS * TRANSB,
3135
const enum ATLAS_DIAG * DIAGS,
3146
const TYPE * ALPHAS,
3150
const enum LVL3_ROUT * ROUTS
3154
int ro, ntests=0, np=0;
3156
eps = Mjoin( PATL, epsilon )();
3158
for( ro = 0; ro < NROUT; ro++ )
3160
if( ROUTS[ro] == GEMM )
3162
RungemmCase( ROUTS[ro], TEST, MFLOP, LDA_IS_M, NTRANA, TRANSA, NTRANB,
3163
TRANSB, M0, MN, MINC, N0, NN, NINC, K0, KN, KINC, NALPHA,
3164
ALPHAS, NBETA, BETAS, eps, &np, &ntests );
3167
else if( ROUTS[ro] == SYMM )
3169
else if( ( ROUTS[ro] == SYMM ) || ( ROUTS[ro] == HEMM ) )
3172
RunsymmCase( ROUTS[ro], TEST, MFLOP, LDA_IS_M, NSIDE, SIDES, NUPLO,
3173
UPLOS, M0, MN, MINC, N0, NN, NINC, NALPHA, ALPHAS, NBETA,
3174
BETAS, eps, &np, &ntests );
3177
else if( ROUTS[ro] == SYRK )
3179
else if( ( ROUTS[ro] == SYRK ) || ( ROUTS[ro] == HERK ) )
3182
RunsyrkCase( ROUTS[ro], TEST, MFLOP, LDA_IS_M, NUPLO, UPLOS, NTRANA,
3183
TRANSA, N0, NN, NINC, K0, KN, KINC, NALPHA, ALPHAS, NBETA,
3184
BETAS, eps, &np, &ntests );
3187
else if( ROUTS[ro] == SYR2K )
3189
else if( ( ROUTS[ro] == SYR2K ) || ( ROUTS[ro] == HER2K ) )
3192
Runsyr2kCase( ROUTS[ro], TEST, MFLOP, LDA_IS_M, NUPLO, UPLOS, NTRANA,
3193
TRANSA, N0, NN, NINC, K0, KN, KINC, NALPHA, ALPHAS,
3194
NBETA, BETAS, eps, &np, &ntests );
3196
else if( ( ROUTS[ro] == TRMM ) || ( ROUTS[ro] == TRSM ) )
3198
RuntrxmCase( ROUTS[ro], TEST, MFLOP, LDA_IS_M, NSIDE, SIDES, NUPLO,
3199
UPLOS, NTRANA, TRANSA, NDIAG, DIAGS, M0, MN, MINC, N0,
3200
NN, NINC, NALPHA, ALPHAS, eps, &np, &ntests );
3205
(void) fprintf( stdout, "\n%d tests run, %d passed\n\n", ntests, np );
3207
(void) fprintf( stdout, "\n%d tests run, all skipped\n\n", ntests );
3210
* =====================================================================
3212
* =====================================================================
3214
void PrintUsage( char * nam )
3216
(void) fprintf( stderr, "ATLAS usage:\n" );
3217
(void) fprintf( stderr, " %s [-options ...]\n\n", nam );
3218
(void) fprintf( stderr, "where options include:\n" );
3220
(void) fprintf( stderr, " -h " );
3221
(void) fprintf( stderr, ". print this message \n" );
3223
(void) fprintf( stderr, " -R <rout> " );
3224
(void) fprintf( stderr, ". select one or all routines to test. \n" );
3225
(void) fprintf( stderr, " " );
3227
(void) fprintf( stderr, " rout must be in {all, gemm,symm,syrk, \n" );
3228
(void) fprintf( stderr, " " );
3229
(void) fprintf( stderr, " syr2k,trmm,trsm}. \n" );
3231
(void) fprintf( stderr, " rout must be in {all, gemm,hemm,herk, \n" );
3232
(void) fprintf( stderr, " " );
3233
(void) fprintf( stderr, " her2k,symm,syr2k,syrk,trmm,trsm}. \n" );
3235
(void) fprintf( stderr, " " );
3236
(void) fprintf( stderr, " Default: -R gemm. Ex: -R trmm \n" );
3238
(void) fprintf( stderr, " -R <nrout> <rout1> ... <routN> " );
3239
(void) fprintf( stderr, ". same as above for more than one rout- \n" );
3240
(void) fprintf( stderr, " " );
3241
(void) fprintf( stderr, " tine. Ex: -R 3 genn trmm symm \n" );
3243
(void) fprintf( stderr, " -S <nside> L/R " );
3244
(void) fprintf( stderr, ". select values for the SIDE parameter. \n" );
3245
(void) fprintf( stderr, " " );
3246
(void) fprintf( stderr, " Default: -S 1 L. Ex: -S 2 L R \n" );
3248
(void) fprintf( stderr, " -U <nuplo> L/U " );
3249
(void) fprintf( stderr, ". select values for the UPLO parameter. \n" );
3250
(void) fprintf( stderr, " " );
3251
(void) fprintf( stderr, " Default: -U 1 L. Ex: -U 2 L U \n" );
3253
(void) fprintf( stderr, " -A <ntrans> N/T/C " );
3254
(void) fprintf( stderr, ". select values of the TRANSA parameter.\n" );
3255
(void) fprintf( stderr, " " );
3256
(void) fprintf( stderr, " Default: -A 1 n. Ex: -A 2 N T \n" );
3258
(void) fprintf( stderr, " -B <ntrans> N/T/C " );
3259
(void) fprintf( stderr, ". select values of the TRANSB parameter.\n" );
3260
(void) fprintf( stderr, " " );
3261
(void) fprintf( stderr, " Default: -B 1 N. Ex: -B 2 N T \n" );
3263
(void) fprintf( stderr, " -D <ndiags> N/U " );
3264
(void) fprintf( stderr, ". select values for the DIAG parameter. \n" );
3265
(void) fprintf( stderr, " " );
3266
(void) fprintf( stderr, " Default: -D 1 N. Ex: -Diag 2 N U \n" );
3268
(void) fprintf( stderr, " -m <m> " );
3269
(void) fprintf( stderr, ". select one value for the parameter M. \n" );
3270
(void) fprintf( stderr, " " );
3271
(void) fprintf( stderr, " Ex: -m 100 \n" );
3273
(void) fprintf( stderr, " -n <n> " );
3274
(void) fprintf( stderr, ". same as above for the parameter N. \n" );
3276
(void) fprintf( stderr, " -k <k> " );
3277
(void) fprintf( stderr, ". same as above for the parameter K. \n" );
3279
(void) fprintf( stderr, " -M <m1> <mN> <minc> " );
3280
(void) fprintf( stderr, ". select the values of M, from m1 to mN \n" );
3281
(void) fprintf( stderr, " " );
3282
(void) fprintf( stderr, " by increment of minc. m1 > 0. \n" );
3283
(void) fprintf( stderr, " " );
3284
(void) fprintf( stderr, " Ex: -M 100 1000 100 \n" );
3286
(void) fprintf( stderr, " -N <n1> <nN> <ninc> " );
3287
(void) fprintf( stderr, ". same as above for the values of N. \n" );
3289
(void) fprintf( stderr, " -K <k1> <kN> <kinc> " );
3290
(void) fprintf( stderr, ". same as above for the values of K. \n" );
3293
(void) fprintf( stderr, " -a <nalphas> <a1> ... <aN> " );
3294
(void) fprintf( stderr, ". select the values of ALPHA. Default: \n" );
3295
(void) fprintf( stderr, " " );
3296
(void) fprintf( stderr, " -a 1 1.0. Ex: -a 3 -1.0 0.0 1.0 \n" );
3297
(void) fprintf( stderr, " -b <nbetas> <beta1> ... <betaN> " );
3298
(void) fprintf( stderr, ". same as above for the parameter BETA. \n" );
3300
(void) fprintf( stderr, " -a <nalphas> <a1r> <a1i> ... <aNi> " );
3301
(void) fprintf( stderr, ". select the values of ALPHA, where a1r \n" );
3302
(void) fprintf( stderr, " " );
3303
(void) fprintf( stderr, " and a1i are the real and imaginary \n" );
3304
(void) fprintf( stderr, " " );
3305
(void) fprintf( stderr, " parts of a1. Default: -a 1 1.0 0.0 \n" );
3306
(void) fprintf( stderr, " -b <nbetas> <b1r> <b1i> ... <bNi> " );
3307
(void) fprintf( stderr, ". same as above for the parameter BETA. \n" );
3310
(void) fprintf( stderr, " -d " );
3311
(void) fprintf( stderr, ". use smallest possible leading dimen- \n" );
3312
(void) fprintf( stderr, " " );
3313
(void) fprintf( stderr, " sion for the array A. \n" );
3314
(void) fprintf( stderr, " " );
3315
(void) fprintf( stderr, " Default: max( mN, nN, kN ). \n" );
3317
(void) fprintf( stderr, " -T <0/1> " );
3318
(void) fprintf( stderr, ". disable/enable computational check. \n" );
3319
(void) fprintf( stderr, " " );
3320
(void) fprintf( stderr, " Default: -T 1 \n" );
3322
(void) fprintf( stderr, " -F <mflops> " );
3323
(void) fprintf( stderr, ". perform at least mflops per measure. \n" );
3325
(void) fprintf( stderr, "\n" );
3334
enum LVL3_ROUT ** ROUTS,
3339
enum ATLAS_SIDE ** SIDES,
3341
enum ATLAS_UPLO ** UPLOS,
3343
enum ATLAS_TRANS ** TRANSA,
3345
enum ATLAS_TRANS ** TRANSB,
3347
enum ATLAS_DIAG ** DIAGS,
3368
*NROUT = -1; /* No routine to be tested */
3369
*TEST = 1; /* Enable the testing */
3370
*LDA_IS_M = 0; /* Leading dimension chosen as max testing value */
3371
*MFLOP = 0; /* smallest number of flops possible */
3372
/* Default bandwidth is 1 */
3374
*NSIDE = *NUPLO = *NTRANA = *NTRANB = *NDIAG = -1;
3375
*M0 = *N0 = *K0 = -1;
3376
*NALPHA = *NBETA = -1;
3378
for( i = 1; i < NARGS; )
3380
if( ARGS[i][0] != '-' ) PrintUsage( ARGS[0] );
3382
switch( ARGS[i++][1] )
3385
PrintUsage( ARGS[0] );
3388
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3389
*TEST = atoi( ARGS[i++] );
3395
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3396
*MFLOP = atoi( ARGS[i++] );
3397
if( *MFLOP < 0 ) PrintUsage( ARGS[0] );
3401
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3402
*NSIDE = atoi( ARGS[i++] );
3403
if( *NSIDE <= 0 ) PrintUsage( ARGS[0] );
3404
*SIDES = (enum ATLAS_SIDE *)malloc( *NSIDE *
3405
sizeof( enum ATLAS_SIDE ) );
3406
ATL_assert( *SIDES );
3407
for( j = 0; j != *NSIDE; j++)
3409
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3411
if( ch == 'l' || ch == 'L' ) (*SIDES)[j] = AtlasLeft;
3412
else if( ch == 'r' || ch == 'R' ) (*SIDES)[j] = AtlasRight;
3413
else PrintUsage( ARGS[0] );
3417
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3418
*NUPLO = atoi( ARGS[i++] );
3419
if( *NUPLO <= 0 ) PrintUsage( ARGS[0] );
3420
*UPLOS = (enum ATLAS_UPLO *)malloc( *NUPLO *
3421
sizeof( enum ATLAS_UPLO ) );
3422
ATL_assert( *UPLOS );
3423
for( j = 0; j != *NUPLO; j++)
3425
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3427
if( ch == 'u' || ch == 'U' ) (*UPLOS)[j] = AtlasUpper;
3428
else if( ch == 'l' || ch == 'L' ) (*UPLOS)[j] = AtlasLower;
3429
else PrintUsage( ARGS[0] );
3433
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3434
*NTRANA = atoi(ARGS[i++]);
3435
if( *NTRANA <= 0 ) PrintUsage( ARGS[0] );
3436
*TRANSA = (enum ATLAS_TRANS *)malloc( *NTRANA *
3437
sizeof( enum ATLAS_TRANS ) );
3438
ATL_assert( *TRANSA );
3439
for( j = 0; j != *NTRANA; j++ )
3441
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3443
if( ch == 'n' || ch == 'N' ) (*TRANSA)[j] = AtlasNoTrans;
3444
else if( ch == 't' || ch == 'T' ) (*TRANSA)[j] = AtlasTrans;
3445
else if( ch == 'c' || ch == 'C' ) (*TRANSA)[j] = AtlasConjTrans;
3446
else PrintUsage( ARGS[0] );
3450
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3451
*NTRANB = atoi(ARGS[i++]);
3452
if( *NTRANB <= 0 ) PrintUsage( ARGS[0] );
3453
*TRANSB = (enum ATLAS_TRANS *)malloc( *NTRANB *
3454
sizeof( enum ATLAS_TRANS ) );
3455
ATL_assert( *TRANSB );
3456
for( j = 0; j != *NTRANB; j++ )
3458
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3460
if( ch == 'n' || ch == 'N' ) (*TRANSB)[j] = AtlasNoTrans;
3461
else if( ch == 't' || ch == 'T' ) (*TRANSB)[j] = AtlasTrans;
3462
else if( ch == 'c' || ch == 'C' ) (*TRANSB)[j] = AtlasConjTrans;
3463
else PrintUsage( ARGS[0] );
3467
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3468
*NDIAG = atoi( ARGS[i++] );
3469
if( *NDIAG <= 0 ) PrintUsage( ARGS[0] );
3470
*DIAGS = (enum ATLAS_DIAG *)malloc( *NDIAG *
3471
sizeof( enum ATLAS_DIAG ) );
3472
ATL_assert( *DIAGS );
3473
for( j = 0; j != *NDIAG; j++ )
3475
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3477
if( ch == 'u' || ch == 'U' ) (*DIAGS)[j] = AtlasUnit;
3478
else if( ch == 'n' || ch == 'N' ) (*DIAGS)[j] = AtlasNonUnit;
3479
else PrintUsage( ARGS[0] );
3484
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3485
*M0 = atoi( ARGS[i++] );
3486
if( *M0 < 0 ) PrintUsage( ARGS[0] );
3487
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3488
*MN = atoi( ARGS[i++] );
3489
if( *MN < 0 ) PrintUsage( ARGS[0] );
3490
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3491
*MINC = atoi( ARGS[i++] );
3492
if( *MINC <= 0 ) PrintUsage( ARGS[0] );
3495
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3496
*M0 = *MN = atoi( ARGS[i++] ); *MINC = 1;
3497
if( *M0 <= 0 ) PrintUsage( ARGS[0] );
3500
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3501
*N0 = atoi( ARGS[i++] );
3502
if( *N0 < 0 ) PrintUsage( ARGS[0] );
3503
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3504
*NN = atoi( ARGS[i++] );
3505
if( *NN < 0 ) PrintUsage( ARGS[0] );
3506
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3507
*NINC = atoi( ARGS[i++] );
3508
if( *NINC <= 0 ) PrintUsage( ARGS[0] );
3511
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3512
*N0 = *NN = atoi( ARGS[i++] ); *NINC = 1;
3513
if( *N0 < 0 ) PrintUsage( ARGS[0] );
3516
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3517
*K0 = atoi( ARGS[i++] );
3518
if( *K0 < 0 ) PrintUsage( ARGS[0] );
3519
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3520
*KN = atoi( ARGS[i++] );
3521
if( *KN < 0 ) PrintUsage( ARGS[0] );
3522
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3523
*KINC = atoi( ARGS[i++] );
3524
if( *KINC <= 0 ) PrintUsage( ARGS[0] );
3527
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3528
*K0 = *KN = atoi( ARGS[i++] ); *KINC = 1;
3529
if( *K0 < 0 ) PrintUsage( ARGS[0] );
3533
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3534
*NALPHA = atoi( ARGS[i++] );
3535
if( *NALPHA <= 0 ) PrintUsage( ARGS[0] );
3536
*ALPHAS = (TYPE *)malloc( ATL_MulBySize( *NALPHA ) );
3537
ATL_assert( *ALPHAS );
3538
for( j = 0; j < (*NALPHA SHIFT); j++ )
3540
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3541
(*ALPHAS)[j] = (TYPE)atof( ARGS[i++] );
3545
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3546
*NBETA = atoi( ARGS[i++] );
3547
if( *NBETA <= 0 ) PrintUsage( ARGS[0] );
3548
*BETAS = (TYPE *)malloc( ATL_MulBySize( *NBETA ) );
3549
ATL_assert( *BETAS );
3550
for( j = 0; j < (*NBETA SHIFT); j++ )
3552
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3553
(*BETAS)[j] = (TYPE)atof( ARGS[i++] );
3558
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3560
if( ( strcmp( ARGS[i], "ALL" ) == 0 ) ||
3561
( strcmp( ARGS[i], "all" ) == 0 ) )
3568
*ROUTS = (enum LVL3_ROUT *)malloc( (*NROUT) *
3569
sizeof( enum LVL3_ROUT ) );
3570
ATL_assert( *ROUTS );
3572
(*ROUTS)[ 0] = GEMM; (*ROUTS)[ 1] = SYMM; (*ROUTS)[ 2] = SYR2K;
3573
(*ROUTS)[ 3] = SYRK; (*ROUTS)[ 4] = TRMM; (*ROUTS)[ 5] = TRSM;
3575
(*ROUTS)[ 6] = HEMM; (*ROUTS)[ 7] = HERK; (*ROUTS)[ 8] = HER2K;
3581
if( isdigit( *ARGS[i] ) ) { *NROUT = atoi( ARGS[i++] ); }
3582
else { *NROUT = 1; }
3583
*ROUTS = (enum LVL3_ROUT *)malloc( (*NROUT) *
3584
sizeof( enum LVL3_ROUT ) );
3585
ATL_assert( *ROUTS );
3587
for( j = 0; j < *NROUT; j++ )
3589
if( ARGS[i] == NULL ) PrintUsage( ARGS[0] );
3591
if( ( strcmp( ARGS[i], "GEMM" ) == 0 ) ||
3592
( strcmp( ARGS[i], "gemm" ) == 0 ) )
3594
else if( ( strcmp( ARGS[i], "SYMM" ) == 0 ) ||
3595
( strcmp( ARGS[i], "symm" ) == 0 ) )
3597
else if( ( strcmp( ARGS[i], "SYR2K" ) == 0 ) ||
3598
( strcmp( ARGS[i], "syr2k" ) == 0 ) )
3599
(*ROUTS)[j] = SYR2K;
3600
else if( ( strcmp( ARGS[i], "SYRK" ) == 0 ) ||
3601
( strcmp( ARGS[i], "syrk" ) == 0 ) )
3603
else if( ( strcmp( ARGS[i], "TRMM" ) == 0 ) ||
3604
( strcmp( ARGS[i], "trmm" ) == 0 ) )
3606
else if( ( strcmp( ARGS[i], "TRSM" ) == 0 ) ||
3607
( strcmp( ARGS[i], "trsm" ) == 0 ) )
3610
else if( ( strcmp( ARGS[i], "HEMM" ) == 0 ) ||
3611
( strcmp( ARGS[i], "hemm" ) == 0 ) )
3613
else if( ( strcmp( ARGS[i], "HER2K" ) == 0 ) ||
3614
( strcmp( ARGS[i], "her2k" ) == 0 ) )
3615
(*ROUTS)[j] = HER2K;
3616
else if( ( strcmp( ARGS[i], "HERK" ) == 0 ) ||
3617
( strcmp( ARGS[i], "herk" ) == 0 ) )
3620
else PrintUsage( ARGS[0] );
3626
PrintUsage( ARGS[0] );
3631
* Finish setting up defaults if the user has not selected
3636
*ROUTS = (enum LVL3_ROUT *)malloc( sizeof( enum LVL3_ROUT ) );
3637
ATL_assert( *ROUTS );
3644
*SIDES = (enum ATLAS_SIDE *)malloc( sizeof( enum ATLAS_SIDE ) );
3645
ATL_assert( *SIDES );
3646
(*SIDES)[0] = AtlasLeft;
3651
*UPLOS = (enum ATLAS_UPLO *)malloc( sizeof( enum ATLAS_UPLO ) );
3652
ATL_assert( *UPLOS );
3653
(*UPLOS)[0] = AtlasLower;
3658
*TRANSA = (enum ATLAS_TRANS *)malloc( sizeof( enum ATLAS_TRANS ) );
3659
ATL_assert( *TRANSA );
3660
(*TRANSA)[0] = AtlasNoTrans;
3665
*TRANSB = (enum ATLAS_TRANS *)malloc( sizeof( enum ATLAS_TRANS ) );
3666
ATL_assert( *TRANSB );
3667
(*TRANSB)[0] = AtlasNoTrans;
3672
*DIAGS = (enum ATLAS_DIAG *)malloc( sizeof( enum ATLAS_DIAG ) );
3673
ATL_assert( *DIAGS );
3674
(*DIAGS)[0] = AtlasNonUnit;
3677
if( *N0 == -1 ) { *N0 = 100; *NN = 1000; *NINC = 100; }
3682
*ALPHAS = (TYPE *)malloc( ATL_MulBySize( 1 ) );
3683
ATL_assert( *ALPHAS );
3685
(*ALPHAS)[0] = ATL_rone;
3687
(*ALPHAS)[0] = ATL_rone;
3688
(*ALPHAS)[1] = ATL_rzero;
3694
*BETAS = (TYPE *)malloc( ATL_MulBySize( 1 ) );
3695
ATL_assert( *BETAS );
3697
(*BETAS)[0] = ATL_rone;
3699
(*BETAS)[0] = ATL_rone;
3700
(*BETAS)[1] = ATL_rzero;
3705
int main( int NARGS, char **ARGS )
3707
int kinc, kstart, kstop, ldaism, mflopmin, minc,
3708
mstart, mstop, ninc, nstart, nstop, nalpha,
3709
nbeta, ndiag, nrout, nside, ntrana, ntranb,
3712
TYPE * alphas = NULL, * betas = NULL;
3713
enum LVL3_ROUT * routs = NULL;
3714
enum ATLAS_SIDE * sides = NULL;
3715
enum ATLAS_UPLO * uplos = NULL;
3716
enum ATLAS_TRANS * transa = NULL, * transb = NULL;
3717
enum ATLAS_DIAG * diags = NULL;
3719
GetFlags( NARGS, ARGS, &nrout, &routs, &test, &ldaism, &mflopmin, &nside,
3720
&sides, &nuplo, &uplos, &ntrana, &transa, &ntranb, &transb,
3721
&ndiag, &diags, &mstart, &mstop, &minc, &nstart, &nstop, &ninc,
3722
&kstart, &kstop, &kinc, &nalpha, &alphas, &nbeta, &betas );
3723
RunCases( test, mflopmin, ldaism, nside, sides, nuplo, uplos, ntrana, transa,
3724
ntranb, transb, ndiag, diags, mstart, mstop, minc, nstart, nstop,
3725
ninc, kstart, kstop, kinc, nalpha, alphas, nbeta, betas, nrout,
3728
if( sides ) free( sides );
3729
if( uplos ) free( uplos );
3730
if( transa ) free( transa );
3731
if( transb ) free( transb );
3732
if( diags ) free( diags );
3733
if( alphas ) free( alphas );
3734
if( betas ) free( betas );
3735
if( routs ) free( routs );