2
* Automatically Tuned Linear Algebra Software v3.2
3
* (C) Copyright 1999 R. Clint Whaley
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions, and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
* 3. The name of the University of Tennessee, the ATLAS group,
14
* or the names of its contributers may not be used to endorse
15
* or promote products derived from this software without specific
18
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE
22
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
* POSSIBILITY OF SUCH DAMAGE.
32
#include "atlas_misc.h"
33
#include "atlas_lvl3.h"
34
#include "atlas_level3.h"
35
#include "atlas_level1.h"
36
#include "atlas_lapack.h"
39
#define ATL_CplxInv(in, out) Mjoin(PATL,cplxinvert)(1, in, 1, out, 1);
42
int ATL_getrfR(const int M, const int N, TYPE *A, const int lda, int *ipiv)
44
* Row-major factorization of form
46
* where P is a column-permutation matrix, L is lower triangular (lower
47
* trapazoidal if M > N), and U is upper triangular with unit diagonals (upper
48
* trapazoidal if M < N). This is the recursive Level 3 BLAS version.
51
const int MN = Mmin(M, N);
52
int Nup, Ndown, i, ierr=0;
54
const TYPE one[2] = {ATL_rone, ATL_rzero};
55
const TYPE none[2] = {ATL_rnone, ATL_rzero};
59
#define none ATL_rnone
68
if (Nup > NB) Nup = ATL_MulByNB(ATL_DivByNB(Nup));
71
i = ATL_getrfR(Nup, N, A, lda, ipiv);
72
if (i) if (!ierr) ierr = i;
73
Ar = A + (Nup * lda SHIFT);
75
An = Ar + (Nup SHIFT);
77
ATL_laswp(Ndown, Ar, lda, 0, Nup, ipiv, 1); /* apply pivots */
78
cblas_trsm(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans,
79
CblasUnit, Ndown, Nup, one, A, lda, Ar, lda);
80
cblas_gemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, Ndown, N-Nup, Nup,
81
none, Ar, lda, Ac, lda, one, An, lda);
83
i = ATL_getrfR(Ndown, N-Nup, An, lda, ipiv+Nup);
84
if (i) if (!ierr) ierr = Nup + i;
85
for (i=Nup; i != MN; i++) ipiv[i] += Nup;
86
ATL_laswp(Nup, A, lda, Nup, MN, ipiv, 1); /* apply pivots */
90
*ipiv = i = cblas_iamax(N, A, 1);
95
cblas_scal(N, ATL_rone/tmp, A, 1);
104
if (tmp[0] != ATL_rzero || tmp[1] != ATL_rzero)
106
ATL_CplxInv(tmp, inv);
107
cblas_scal(N, inv, A, 1);