2
/* ---------------------------------------------------------------------
4
* -- Automatically Tuned Linear Algebra Software (ATLAS)
5
* (C) Copyright 2000 All Rights Reserved
7
* -- ATLAS routine -- Version 3.2 -- December 25, 2000
9
* Author : Antoine P. Petitet
10
* Originally developed at the University of Tennessee,
11
* Innovative Computing Laboratory, Knoxville TN, 37996-1301, USA.
13
* ---------------------------------------------------------------------
15
* -- Copyright notice and Licensing terms:
17
* Redistribution and use in source and binary forms, with or without
18
* modification, are permitted provided that the following conditions
21
* 1. Redistributions of source code must retain the above copyright
22
* notice, this list of conditions and the following disclaimer.
23
* 2. Redistributions in binary form must reproduce the above copyright
24
* notice, this list of conditions, and the following disclaimer in
25
* the documentation and/or other materials provided with the distri-
27
* 3. The name of the University, the ATLAS group, or the names of its
28
* contributors may not be used to endorse or promote products deri-
29
* ved from this software without specific written permission.
33
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
34
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
35
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
36
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
37
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
38
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
39
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
40
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO-
41
* RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (IN-
42
* CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
43
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45
* ---------------------------------------------------------------------
51
#include "atlas_ptmisc.h"
52
#include "atlas_ptlvl3.h"
53
#include "atlas_ptlevel3.h"
57
const PT_LVL3_TYPE_T * PLVL3,
58
const unsigned int NODE,
59
const unsigned int NTHREADS,
60
pthread_attr_t * ATTR,
62
const enum ATLAS_SIDE SIDE,
63
const enum ATLAS_UPLO UPLO,
64
const enum ATLAS_TRANS TRANS,
65
const enum ATLAS_DIAG DIAG,
76
* .. Local Variables ..
79
PT_TREE_T left, right, root = NULL;
80
PT_TRMM_ARGS_T * a_mm = NULL;
82
unsigned int next = ( NODE << 1 ), nt1, nt2;
83
int incA, incB, m1, m2, n1, n2;
85
PT_TREE_T left, right, tree = NULL;
86
PT_TRMM_ARGS_T * a_mm = NULL;
88
DIM_1DSPLIT_T splitM, splitN;
90
unsigned int next = ( NODE << 1 ), ntm1, ntm2, ntn1, ntn2;
91
int incB, m1, m2, n1, n2;
94
* .. Executable Statements ..
97
if( NTHREADS < 1 ) return( tree );
101
if( SIDE == AtlasLeft )
106
ATL_gesplit( NTHREADS, M, NB, &nt1, &nt2, &m1, &m2 );
107
incA = m1 + m1 * LDA;
109
root = PLVL3->pttrmm( NTHREADS, ATTR, SIDE, UPLO, TRANS, DIAG,
110
m2, N, ALPHA, Mvpt3( A, incA, size ), LDA,
111
Mvpt3( B, incB, size ), LDB );
112
ATL_join_tree ( root );
113
ATL_free_tree ( root );
114
root = PLVL3->ptgemm( NTHREADS, ATTR, AtlasNoTrans, AtlasNoTrans,
116
Mvpt3( A, m1, size ), LDA,
118
Mvpt3( B, m1, size ), LDB );
119
ATL_join_tree ( root );
120
ATL_free_tree ( root );
121
root = PLVL3->pttrmm( NTHREADS, ATTR, SIDE, UPLO, TRANS, DIAG,
122
m1, N, ALPHA, A, LDA, B, LDB );
123
ATL_join_tree ( root );
124
ATL_free_tree ( root );
130
ATL_gesplit( NTHREADS, N, NB, &nt1, &nt2, &n1, &n2 );
131
left = ATL_Strmm( PLVL3, next+1, nt1, ATTR, NB, SIDE, UPLO, TRANS,
132
DIAG, M, n1, ALPHA, A, LDA, B, LDB );
134
right = ATL_Strmm( PLVL3, next+2, nt2, ATTR, NB, SIDE, UPLO, TRANS,
135
DIAG, M, n2, ALPHA, A, LDA, Mvpt3( B, incB, size ),
139
splitN = ATL_1dsplit( NTHREADS, N, NB, &ntn1, &ntn2, &n1, &n2, &rn );
141
if( splitN == Atlas1dSplit )
143
left = ATL_Strmm( PLVL3, next+1, ntn1, ATTR, NB, SIDE, UPLO, TRANS,
144
DIAG, M, n1, ALPHA, A, LDA, B, LDB );
146
right = ATL_Strmm( PLVL3, next+2, ntn2, ATTR, NB, SIDE, UPLO, TRANS,
147
DIAG, M, n2, ALPHA, A, LDA, Mvpt3( B, incB,
149
tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL );
153
a_mm = (PT_TRMM_ARGS_T *)malloc( sizeof( PT_TRMM_ARGS_T ) );
154
ATL_assert( a_mm != NULL );
155
a_mm->si = SIDE; a_mm->up = UPLO; a_mm->tr = TRANS; a_mm->di = DIAG;
156
a_mm->m = M; a_mm->n = N; a_mm->al = ALPHA;
157
a_mm->a = A; a_mm->la = LDA; a_mm->b = B; a_mm->lb = LDB;
158
tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL, PLVL3->trmm0,
165
splitM = ATL_1dsplit( NTHREADS, M, NB, &ntm1, &ntm2, &m1, &m2, &rm );
167
if( splitM == Atlas1dSplit )
169
left = ATL_Strmm( PLVL3, next+1, ntm1, ATTR, NB, SIDE, UPLO, TRANS,
170
DIAG, m1, N, ALPHA, A, LDA, B, LDB );
172
right = ATL_Strmm( PLVL3, next+2, ntm2, ATTR, NB, SIDE, UPLO, TRANS,
173
DIAG, m2, N, ALPHA, A, LDA, Mvpt3( B, incB,
175
tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL );
179
a_mm = (PT_TRMM_ARGS_T *)malloc( sizeof( PT_TRMM_ARGS_T ) );
180
ATL_assert( a_mm != NULL );
181
a_mm->si = SIDE; a_mm->up = UPLO; a_mm->tr = TRANS; a_mm->di = DIAG;
182
a_mm->m = M; a_mm->n = N; a_mm->al = ALPHA;
183
a_mm->a = A; a_mm->la = LDA; a_mm->b = B; a_mm->lb = LDB;
184
tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL, PLVL3->trmm0,