1
/* ---------------------------------------------------------------------
3
* -- Automatically Tuned Linear Algebra Software (ATLAS)
4
* (C) Copyright 2000 All Rights Reserved
6
* -- ATLAS routine -- Version 3.0 -- April 1, 2000
8
* -- Suggestions, comments, bugs reports should be sent to the follo-
9
* wing e-mail address: atlas@cs.utk.edu
11
* Author : Antoine P. Petitet
12
* Contributor(s) : R. Clint Whaley
13
* University of Tennessee - Innovative Computing Laboratory
14
* Knoxville TN, 37996-1301, USA.
16
* ---------------------------------------------------------------------
18
* -- Copyright notice and Licensing terms:
20
* Redistribution and use in source and binary forms, with or without
21
* modification, are permitted provided that the following conditions
24
* 1. Redistributions of source code must retain the above copyright
25
* notice, this list of conditions and the following disclaimer.
26
* 2. Redistributions in binary form must reproduce the above copyright
27
* notice, this list of conditions, and the following disclaimer in
28
* the documentation and/or other materials provided with the distri-
30
* 3. The name of the University, the ATLAS group, or the names of its
31
* contributors may not be used to endorse or promote products deri-
32
* ved from this software without specific written permission.
36
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
37
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
39
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
40
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
41
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
42
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
43
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO-
44
* RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (IN-
45
* CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
46
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
* ---------------------------------------------------------------------
53
#include "atlas_misc.h"
56
#include "atlas_level1.h"
57
#include "atlas_kernel2.h"
58
#include "atlas_reflvl2.h"
59
#include "atlas_lvl2.h"
61
void Mjoin( PATL, tpsvLC )
63
const enum ATLAS_DIAG DIAG,
64
const int N, /* N > 0 assumed */
74
* Mjoin( PATL, tpsvLC ) solves the following triangular system of equations
78
* where b and x are n-element vectors and A is an n by n unit or non-
79
* unit, lower triangular matrix supplied in packed form.
81
* No test for singularity or near-singularity is included in this
82
* routine. Such tests must be performed before calling this routine.
84
* This is a blocked version of the algorithm. For a more detailed des-
85
* cription of the arguments of this function, see the reference imple-
86
* mentation in the ATLAS/src/blas/reference directory.
88
* ---------------------------------------------------------------------
91
* .. Local Variables ..
94
#define none ATL_rnone
97
TYPE none[2] = { ATL_rnone, ATL_rzero },
98
one [2] = { ATL_rone, ATL_rzero };
102
int incX, lda = LDA, mb, mb1, n, nb;
105
int incX, lda = LDA, m, mb, nb, nb1;
107
void (*tpsv0)( const int, const TYPE *, const int, TYPE * );
109
* .. Executable Statements ..
112
ATL_GetPartMVN( A, N, &mb, &nb );
114
if( DIAG == AtlasNonUnit ) tpsv0 = Mjoin( PATL, tpsvLCN );
115
else tpsv0 = Mjoin( PATL, tpsvLCU );
118
mb1 = N - ( ( N - 1 ) / mb ) * mb;
119
incX = (mb SHIFT); x0 = X; X += (mb1 SHIFT); A0 = (TYPE *)(A);
121
tpsv0( mb1, A, lda, x0 ); MLpnext( mb1, A, lda );
123
for( n = mb1; n < N; n += mb, X += incX )
125
Mjoin( PATL, gpmv )( AtlasLower, AtlasConj, mb, n, none,
126
A0 + (n SHIFT), LDA, x0, 1, one, X, 1 );
127
tpsv0( mb, A, lda, X ); MLpnext( mb, A, lda );
130
nb1 = N - ( ( N - 1 ) / nb ) * nb; incX = (nb SHIFT); x0 = X + (nb SHIFT);
132
for( m = N - nb; m > 0; m -= nb, x0 += incX, X += incX )
134
tpsv0( nb, A, lda, X );
135
Mjoin( PATL, gpmv )( AtlasLower, AtlasConj, m, nb, none,
136
A + incX, lda, X, 1, one, x0, 1 );
137
MLpnext( nb, A, lda );
139
tpsv0( nb1, A, lda, X );
142
* End of Mjoin( PATL, tpsvLC )