1
/* Fast GEMM routine for Alpha 21164/21264 */
2
/* on Linux, Digital UNIX */
3
/* by Kazushige Goto <goto@statabo.rim.or.jp> */
5
* Modified by R. Clint Whaley (rwhaley@cs.utk.edu) on 03/24/00
6
* for inclusion in ATLAS.
8
* (1) Added support for ATLAS's data type macros
9
* (2) Changed routine name macros to match ATLAS's naming scheme
10
* and got rid of unused naming macros
11
* (3) Got rid of unused prototypes
12
* To see all changes, the author's original tarfile is in :
13
* libgemm-20000228.tar.bz
22
#if defined(DREAL) || defined(DCPLX)
26
#elif defined(SREAL) || defined(SCPLX)
34
#define GEMM_TT ATLU_dgotogemm_tt
35
#define GEMM_TN ATLU_dgotogemm_tn
36
#define GEMM_NT ATLU_dgotogemm_nt
37
#define GEMM_NN ATLU_dgotogemm_nn
39
#define ZGEMM_CC ATLU_zgotogemm_cc
40
#define ZGEMM_CT ATLU_zgotogemm_ct
41
#define ZGEMM_CN ATLU_zgotogemm_cn
42
#define ZGEMM_TC ATLU_zgotogemm_tc
43
#define ZGEMM_TT ATLU_zgotogemm_tt
44
#define ZGEMM_TN ATLU_zgotogemm_tn
45
#define ZGEMM_NC ATLU_zgotogemm_nc
46
#define ZGEMM_NT ATLU_zgotogemm_nt
47
#define ZGEMM_NN ATLU_zgotogemm_nn
59
#define GEMM_PARAM_T dgemm_param_t
60
#define ZGEMM_PARAM_T zgemm_param_t
61
#define GEMM_THREAD dgemm_thread
62
#define ZGEMM_THREAD zgemm_thread
66
#define GEMM_TT ATLU_sgotogemm_tt
67
#define GEMM_TN ATLU_sgotogemm_tn
68
#define GEMM_NT ATLU_sgotogemm_nt
69
#define GEMM_NN ATLU_sgotogemm_nn
71
#define ZGEMM_CC ATLU_cgotogemm_cc
72
#define ZGEMM_CT ATLU_cgotogemm_ct
73
#define ZGEMM_CN ATLU_cgotogemm_cn
74
#define ZGEMM_TC ATLU_cgotogemm_tc
75
#define ZGEMM_TT ATLU_cgotogemm_tt
76
#define ZGEMM_TN ATLU_cgotogemm_tn
77
#define ZGEMM_NC ATLU_cgotogemm_nc
78
#define ZGEMM_NT ATLU_cgotogemm_nt
79
#define ZGEMM_NN ATLU_cgotogemm_nn
91
#define GEMM_PARAM_T sgemm_param_t
92
#define ZGEMM_PARAM_T cgemm_param_t
93
#define GEMM_THREAD sgemm_thread
94
#define ZGEMM_THREAD cgemm_thread
100
#define WH64(a) .long (0x18<<26 | 0x1f << 21 | (a)<<16 | 0xf800)
102
#define WH64(a) wh64 ($##a)
105
#define WH64(a) lds $f31, 0($##a)
108
/* for x >= 0, split the address x into x_h and x_l
110
x = x_h * 65536 + x_l
111
where x_l in [-32768, 32767] that is [-(1<<15), (1<<15)-1]
112
the formula acturally used is
113
x + (1<<15) = ( x_h<<16 ) + ( x_l + (1<<15) )
115
#define Address_H(x) (((x)+(1<<15))>>16)
116
#define Address_L(x) ((x)-((Address_H(x))<<16))
120
#define MIN(a,b) (a>b? b:a)
121
#define MAX(a,b) (a<b? b:a)
123
void xerbla_(char *, int *info, long);
125
int GEMM_TT(int, int, int, FLOAT, FLOAT *, int, FLOAT* ,
126
int, FLOAT *, int, FLOAT *);
127
int GEMM_TN(int, int, int, FLOAT, FLOAT *, int, FLOAT* ,
128
int, FLOAT *, int, FLOAT *);
129
int GEMM_NT(int, int, int, FLOAT, FLOAT *, int, FLOAT* ,
130
int, FLOAT *, int, FLOAT *);
131
int GEMM_NN(int, int, int, FLOAT, FLOAT *, int, FLOAT* ,
132
int, FLOAT *, int, FLOAT *);
134
int ZGEMM_CC(int m, int n, int k, FLOAT *alpha, FLOAT *a,
135
int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
136
int ZGEMM_CT(int m, int n, int k, FLOAT *alpha, FLOAT *a,
137
int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
138
int ZGEMM_CN(int m, int n, int k, FLOAT *alpha, FLOAT *a,
139
int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
140
int ZGEMM_TC(int m, int n, int k, FLOAT *alpha, FLOAT *a,
141
int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
142
int ZGEMM_TT(int m, int n, int k, FLOAT *alpha, FLOAT *a,
143
int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
144
int ZGEMM_TN(int m, int n, int k, FLOAT *alpha, FLOAT *a,
145
int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
146
int ZGEMM_NC(int m, int n, int k, FLOAT *alpha, FLOAT *a,
147
int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
148
int ZGEMM_NT(int m, int n, int k, FLOAT *alpha, FLOAT *a,
149
int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
150
int ZGEMM_NN(int m, int n, int k, FLOAT *alpha, FLOAT *a,
151
int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
153
/* the info number used to differentiate abnormal exit from Goto's code */