2
* Automatically Tuned Linear Algebra Software v3.2
3
* (C) Copyright 1999 R. Clint Whaley
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions, and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
* 3. The name of the University of Tennessee, the ATLAS group,
14
* or the names of its contributers may not be used to endorse
15
* or promote products derived from this software without specific
18
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE
22
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
* POSSIBILITY OF SUCH DAMAGE.
31
#include "atlas_misc.h"
35
void Mjoin(Mjoin(Mjoin(PATL,geadd),NM),BNM)
36
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
37
const SCALAR beta, TYPE *C, const int ldc)
39
* C <- alpha*A + beta*C
42
Mjoin(PATL,Mjoin(gescal,BNM))(M, N, beta, C, ldc);
47
void Mjoin(Mjoin(Mjoin(PATL,geadd),NM),BNM)
48
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
49
const SCALAR beta, TYPE *C, const int ldc)
51
* C <- alpha*A + beta*C
54
Mjoin(PATL,Mjoin(gemove,NM))(M, N, alpha, A, lda, C, ldc);
60
void Mjoin(Mjoin(Mjoin(PATL,geadd),NM),BNM)
61
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
62
const SCALAR beta, TYPE *C, const int ldc)
64
* C <- alpha*A + beta*C
67
const int n = N >> 1, incA = lda << 1, incC = ldc << 1;
68
const TYPE *a0 = A, *a1 = A + lda;
70
TYPE *c0 = C, *c1 = C + ldc;
72
for (j=n; j; j--, c0 += incC, c1 += incC, a0 += incA, a1 += incA)
74
for (i=0; i != M; i++)
94
c0[i] = beta*c0[i] + a0[i];
95
c1[i] = beta*c1[i] + a1[i];
97
c0[i] = beta*c0[i] + alpha*a0[i];
98
c1[i] = beta*c1[i] + alpha*a1[i];
105
for (i=0; i != M; i++)
117
c0[i] += alpha*a0[i];
121
c0[i] = beta*c0[i] + a0[i];
123
c0[i] = beta*c0[i] + alpha*a0[i];
129
#elif (defined(ALPHA0) && defined(BETA0))
130
void Mjoin(PATL,geadd_a0_b0)
131
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
132
const SCALAR beta, TYPE *C, const int ldc)
134
Mjoin(Mjoin(ATL_,UPR),geadd_a0_b0)(M<<1, N, *alpha, A, lda<<1, *beta, C, ldc<<1);
136
#elif (defined(ALPHA0) && defined(BETA1))
137
void Mjoin(PATL,geadd_a0_b1)
138
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
139
const SCALAR beta, TYPE *C, const int ldc)
141
Mjoin(Mjoin(ATL_,UPR),geadd_a0_b1)(M<<1, N, *alpha, A, lda<<1, *beta, C, ldc<<1);
143
#elif (defined(ALPHA0) && defined(BETAXI0))
144
void Mjoin(PATL,geadd_a0_bXi0)
145
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
146
const SCALAR beta, TYPE *C, const int ldc)
148
Mjoin(Mjoin(ATL_,UPR),geadd_a0_bX)(M<<1, N, *alpha, A, lda<<1, *beta, C, ldc<<1);
150
#elif (defined(ALPHA1) && defined(BETA0))
151
void Mjoin(PATL,geadd_a1_b0)
152
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
153
const SCALAR beta, TYPE *C, const int ldc)
155
Mjoin(Mjoin(ATL_,UPR),geadd_a1_b0)(M<<1, N, *alpha, A, lda<<1, *beta, C, ldc<<1);
157
#elif (defined(ALPHA1) && defined(BETA1))
158
void Mjoin(PATL,geadd_a1_b1)
159
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
160
const SCALAR beta, TYPE *C, const int ldc)
162
Mjoin(Mjoin(ATL_,UPR),geadd_a1_b1)(M<<1, N, *alpha, A, lda<<1, *beta, C, ldc<<1);
164
#elif (defined(ALPHA1) && defined(BETAXI0))
165
void Mjoin(PATL,geadd_a1_bXi0)
166
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
167
const SCALAR beta, TYPE *C, const int ldc)
169
Mjoin(Mjoin(ATL_,UPR),geadd_a1_bX)(M<<1, N, *alpha, A, lda<<1, *beta, C, ldc<<1);
171
#elif (defined(ALPHAXI0) && defined(BETA0))
172
void Mjoin(PATL,geadd_aXi0_b0)
173
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
174
const SCALAR beta, TYPE *C, const int ldc)
176
Mjoin(Mjoin(ATL_,UPR),geadd_aX_b0)(M<<1, N, *alpha, A, lda<<1, *beta, C, ldc<<1);
178
#elif (defined(ALPHAXI0) && defined(BETA1))
179
void Mjoin(PATL,geadd_aXi0_b1)
180
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
181
const SCALAR beta, TYPE *C, const int ldc)
183
Mjoin(Mjoin(ATL_,UPR),geadd_aX_b1)(M<<1, N, *alpha, A, lda<<1, *beta, C, ldc<<1);
185
#elif (defined(ALPHAXI0) && defined(BETAXI0))
186
void Mjoin(PATL,geadd_aXi0_bXi0)
187
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
188
const SCALAR beta, TYPE *C, const int ldc)
190
Mjoin(Mjoin(ATL_,UPR),geadd_aX_bX)(M<<1, N, *alpha, A, lda<<1, *beta, C, ldc<<1);
193
void Mjoin(Mjoin(Mjoin(PATL,geadd),NM),BNM)
194
(const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,
195
const SCALAR beta, TYPE *C, const int ldc)
197
* C <- alpha*A + beta*C
200
const int incA = (lda-M)<<1, incC = (ldc-M)<<1;
202
const register TYPE ralpha = *alpha, ialpha = alpha[1];
203
const register TYPE rbeta = *beta, ibeta = beta[1];
204
register TYPE cr, ci, ar, ai, t0;
206
for (j=N; j; j--, A += incA, C += incC)
208
for (i=M; i; i--, A += 2, C += 2)
216
cr = cr * rbeta - ci * ibeta;
217
ci = t0 * ibeta + ci * rbeta;
226
ar = ar * ralpha - ai * ialpha;
227
ai = t0 * ialpha + ai * ralpha;