2
/* ---------------------------------------------------------------------
4
* -- Automatically Tuned Linear Algebra Software (ATLAS)
5
* (C) Copyright 2000 All Rights Reserved
7
* -- ATLAS routine -- Version 3.2 -- December 25, 2000
9
* Author : Antoine P. Petitet
10
* Originally developed at the University of Tennessee,
11
* Innovative Computing Laboratory, Knoxville TN, 37996-1301, USA.
13
* ---------------------------------------------------------------------
15
* -- Copyright notice and Licensing terms:
17
* Redistribution and use in source and binary forms, with or without
18
* modification, are permitted provided that the following conditions
21
* 1. Redistributions of source code must retain the above copyright
22
* notice, this list of conditions and the following disclaimer.
23
* 2. Redistributions in binary form must reproduce the above copyright
24
* notice, this list of conditions, and the following disclaimer in
25
* the documentation and/or other materials provided with the distri-
27
* 3. The name of the University, the ATLAS group, or the names of its
28
* contributors may not be used to endorse or promote products deri-
29
* ved from this software without specific written permission.
33
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
34
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
35
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
36
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
37
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
38
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
39
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
40
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO-
41
* RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (IN-
42
* CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
43
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45
* ---------------------------------------------------------------------
51
#include "atlas_ptmisc.h"
52
#include "atlas_ptlvl3.h"
53
#include "atlas_ptlevel3.h"
57
const PT_LVL3_TYPE_T * PLVL3,
58
const unsigned int NODE,
59
const unsigned int NTHREADS,
60
pthread_attr_t * ATTR,
62
const enum ATLAS_UPLO UPLO,
63
const enum ATLAS_TRANS TRANS,
64
const enum ATLAS_TRANS TGEMM,
78
* .. Local Variables ..
80
PT_TREE_T left, right, tree = NULL;
81
PT_SYRK_ARGS_T * a_syrk = NULL;
83
DIM_TZSPLIT_T split = AtlasTzNoSplit;
84
unsigned int next = ( NODE << 1 ), nt1=0, nt2=0;
85
int incA, incC, mnk1=0, mnk2=0;
87
* .. Executable Statements ..
90
if( NTHREADS < 1 ) return( tree );
93
split = ATL_tzsplit( UPLO, NTHREADS, M, N, K, NB, &nt1, &nt2,
96
if( split == AtlasTzSplitMrow )
98
if( UPLO == AtlasLower )
100
if( TRANS == AtlasNoTrans )
102
left = ATL_Ssyrk( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS,
103
TGEMM, mnk1, N, K, L, ALPHA, A, LDA, BETA,
105
right = ATL_Sgemm( PLVL3, next+2, nt2, ATTR, NB, AtlasNoTrans,
106
TGEMM, mnk2, N+K, L, ALPHA,
107
Mvpt3( A, K + mnk1, size ), LDA,
108
Mvpt3( A, - N, size ), LDA, BETA,
109
Mvpt3( C, K + mnk1, size ), LDC );
113
left = ATL_Ssyrk( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS,
114
TGEMM, mnk1, N, K, L, ALPHA, A, LDA, BETA,
116
right = ATL_Sgemm( PLVL3, next+2, nt2, ATTR, NB, TGEMM,
117
AtlasNoTrans, mnk2, N+K, L, ALPHA,
118
Mvpt3( A, ( N + K + mnk1 ) * LDA,
119
size ), LDA, A, LDA, BETA,
120
Mvpt3( C, K + mnk1, size ), LDC );
125
if( TRANS == AtlasNoTrans )
127
left = ATL_Sgemm( PLVL3, next+1, nt1, ATTR, NB, AtlasNoTrans,
128
TGEMM, mnk1, N+K, L, ALPHA, A, LDA,
129
Mvpt3( A, M, size ), LDA, BETA,
131
right = ATL_Ssyrk( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS,
132
TGEMM, mnk2, N, K, L, ALPHA,
133
Mvpt3( A, mnk1, size ), LDA, BETA,
134
Mvpt3( C, mnk1, size ), LDC );
138
left = ATL_Sgemm( PLVL3, next+1, nt1, ATTR, NB, TGEMM,
139
AtlasNoTrans, mnk1, N+K, L, ALPHA,
140
Mvpt3( A, - M * LDA, size ), LDA,
141
A, LDA, BETA, C, LDC );
142
right = ATL_Ssyrk( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS,
143
TGEMM, mnk2, N, K, L, ALPHA, A, LDA, BETA,
144
Mvpt3( C, mnk1, size ), LDC );
148
tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL );
150
else if( split == AtlasTzSplitKrow )
152
if( UPLO == AtlasLower )
154
left = ATL_Ssyrk( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS, TGEMM,
155
0, N, mnk1, L, ALPHA, A, LDA, BETA, C, LDC );
156
incA = ( TRANS == AtlasNoTrans ? mnk1 : 0 );
158
right = ATL_Ssyrk( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS, TGEMM,
159
M, N + mnk1, mnk2, L, ALPHA, Mvpt3( A, incA, size ),
160
LDA, BETA, Mvpt3( C, incC, size ), LDC );
164
left = ATL_Ssyrk( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS, TGEMM, M,
165
N + mnk2, mnk1, L, ALPHA, A, LDA, BETA, C, LDC );
166
incA = ( TRANS == AtlasNoTrans ? mnk1 : mnk1 * LDA );
167
incC = mnk1 * ( LDC + 1 );
168
right = ATL_Ssyrk( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS, TGEMM, 0,
169
N, mnk2, L, ALPHA, Mvpt3( A, incA, size ),
170
LDA, BETA, Mvpt3( C, incC, size ), LDC );
172
tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL );
174
else if( split == AtlasTzSplitKcol )
176
if( UPLO == AtlasLower )
178
left = ATL_Ssyrk( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS, TGEMM,
179
M + mnk2, N, mnk1, L, ALPHA, A, LDA, BETA,
181
incA = ( TRANS == AtlasNoTrans ? mnk1 : ( N + mnk1 ) * LDA );
182
incC = mnk1 + ( N + mnk1 ) * LDC;
183
right = ATL_Ssyrk( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS, TGEMM,
184
M, 0, mnk2, L, ALPHA, Mvpt3( A, incA,
185
size ), LDA, BETA, Mvpt3( C, incC,
190
left = ATL_Ssyrk( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS, TGEMM,
191
M, 0, mnk1, L, ALPHA, A, LDA, BETA, C, LDC );
192
incA = ( TRANS == AtlasNoTrans ? 0 : mnk1 * LDA );
194
right = ATL_Ssyrk( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS, TGEMM,
195
M + mnk1, N, mnk2, L, ALPHA, Mvpt3( A,
196
incA, size ), LDA, BETA, Mvpt3( C, incC,
199
tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL );
201
else if( split == AtlasTzSplitNcol )
203
if( UPLO == AtlasLower )
205
if( TRANS == AtlasNoTrans )
208
left = ATL_Sgemm( PLVL3, next+1, nt1, ATTR, NB, AtlasNoTrans,
209
TGEMM, M + K, mnk1, L, ALPHA, A, LDA,
210
Mvpt3( A, incA, size ), LDA, BETA,
213
right = ATL_Ssyrk( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS,
214
TGEMM, M, mnk2, K, L, ALPHA, A, LDA, BETA,
215
Mvpt3( C, incC, size ), LDC );
219
incA = ( N + K ) * LDA;
220
left = ATL_Sgemm( PLVL3, next+1, nt1, ATTR, NB, TGEMM,
221
AtlasNoTrans, M + K, mnk1, L, ALPHA,
222
Mvpt3( A, incA, size ), LDA,
223
A, LDA, BETA, C, LDC );
226
right = ATL_Ssyrk( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS,
227
TGEMM, M, mnk2, K, L, ALPHA,
228
Mvpt3( A, incA, size ), LDA, BETA,
229
Mvpt3( C, incC, size ), LDC );
234
left = ATL_Ssyrk( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS, TGEMM,
235
M, mnk1, K, L, ALPHA, A, LDA, BETA, C, LDC );
236
incC = ( K + mnk1 ) * LDC;
238
if( TRANS == AtlasNoTrans )
241
right = ATL_Sgemm( PLVL3, next+2, nt2, ATTR, NB, AtlasNoTrans,
242
TGEMM, M + K, mnk2, L, ALPHA, A, LDA,
243
Mvpt3( A, incA, size ), LDA, BETA,
244
Mvpt3( C, incC, size ), LDC );
248
incA = ( K + mnk1 ) * LDA;
249
right = ATL_Sgemm( PLVL3, next+2, nt2, ATTR, NB, TGEMM,
250
AtlasNoTrans, M + K, mnk2, L, ALPHA,
251
Mvpt3( A, -M * LDA, size ), LDA,
252
Mvpt3( A, incA, size ), LDA, BETA,
253
Mvpt3( C, incC, size ), LDC );
256
tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL );
260
a_syrk = (PT_SYRK_ARGS_T *)malloc( sizeof( PT_SYRK_ARGS_T ) );
261
ATL_assert( a_syrk != NULL );
262
a_syrk->up = UPLO; a_syrk->tr = TRANS; a_syrk->m = M;
263
a_syrk->n = N; a_syrk->k = K; a_syrk->l = L;
264
a_syrk->al = ALPHA; a_syrk->a = A; a_syrk->la = LDA;
265
a_syrk->be = BETA; a_syrk->c = C; a_syrk->lc = LDC;
267
tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL,
268
( TGEMM == AtlasTrans ? PLVL3->syrk0 :
269
PLVL3->herk0 ), (void *)(a_syrk) );