2
* Automatically Tuned Linear Algebra Software v3.10.1
3
* Copyright (C) 2009 Siju Samuel
5
* Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions, and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
* 3. The name of the ATLAS group or the names of its contributers may
16
* not be used to endorse or promote products derived from this
17
* software without specific written permission.
19
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
23
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
* POSSIBILITY OF SUCH DAMAGE.
33
#include "atlas_misc.h"
35
#include "atlas_ptalias_lapack.h"
36
#include "atlas_lapack.h"
37
#include "atlas_lvl3.h"
38
#include "atlas_qrrmeth.h"
40
#ifdef ATL_USEPTHREADS
41
#include "atlas_threads.h"
42
#include "atlas_taffinity.h"
43
#include "atlas_tcacheedge.h"
45
#include "atlas_cacheedge.h"
49
int ATL_gerqr(ATL_CINT M, ATL_CINT N, TYPE *A, ATL_CINT LDA, TYPE *TAU,
50
TYPE *ws_RQ2, TYPE *ws_T, ATL_CINT LDT,
51
TYPE *WORKM, const int buildT)
53
int top, bottom, buildT_temp;
55
int I, INFO, IINFO, lbuilt, rbuilt, method;
56
int LDA2 = LDA SHIFT; /* for complex LDA *2 */
57
int LDT2 = LDT SHIFT; /* for complex LDT *2 */
58
ATL_CINT minMN = Mmin(M, N);
61
TYPE ONE[2] = {ATL_rone, ATL_rzero};
66
if (M < 1 || N < 1) return(0); /* Nothing to do. */
67
METHOD(method, N, M, LDA); /* Find the method. */
68
#if !defined(ATL_USEPTHREADS)
69
if (method == 2 || method == 3) method=1; /* Don't PCA if no affinity. */
72
switch(method) /* Based on method; */
74
case 0: /* RECURSION. */
77
* Choose a smart recursive column partitioning based on M:
79
if (minMN >= NB+NB) /* big prob, put remainder on right */
81
topMN = ATL_MulByNB(ATL_DivByNB(minMN>>1));
82
bottom = minMN - topMN;
85
else /* small prob, keep M mult of MU (MU more critical than NU) */
87
bottom = ((minMN>>1)/ATL_mmMU)*ATL_mmMU;
88
topMN = minMN - bottom;
92
if (top==0 || bottom==0) /* If too small for that, */
94
bottom = (minMN>>1); /* Stop trying to be clever. */
95
topMN = minMN - bottom;
99
/*----------------------------------------------------------------------*/
100
/* On the bottom half, we use the same workspaces. */
101
/* Because we know we have a top hand side we must always */
102
/* build T, so we can multiply by Q before doing the top side. */
103
/*----------------------------------------------------------------------*/
104
ATL_gerqr(bottom,N, (A+(top SHIFT)), LDA, (TAU+(topMN SHIFT)), ws_RQ2,
105
( ws_T+(topMN SHIFT)+topMN*LDT2), LDT, WORKM, 1);
107
/*----------------------------------------------------------------------*/
108
/* Now we must adjust the top hand side according to our T. */
109
/* We must apply H' */
110
/*----------------------------------------------------------------------*/
112
ATL_larfb(CblasRight, CblasNoTrans, LABackward, LARowStore,
113
top, N, bottom, (A +(top SHIFT)) , LDA,
114
(ws_T+(topMN SHIFT)+topMN*LDT2), LDT, A, LDA, WORKM, M);
116
/*----------------------------------------------------------------------*/
117
/* On the top half, */
118
/*----------------------------------------------------------------------*/
119
ATL_gerqr(top, N-bottom,(A), LDA, (TAU),
121
( ws_T), LDT, WORKM, buildT);
123
/*----------------------------------------------------------------------*/
124
/* If we build T, the bottom side must be completely built, and */
125
/* the top side should be partially built. We need to fill in */
126
/* the lower left hand block, 'bottom' rows by 'top' columns. */
127
/* The formula is -T2 * (Y2 * Y1^T) * T1. */
128
/* The routine is in ATL_larft.c. */
129
/*----------------------------------------------------------------------*/
133
ATL_larft_block(LABackward, LARowStore,
134
N, minMN, minMN-bottom, bottom,
135
A+((M -minMN) SHIFT), LDA, ws_T, LDT);
140
case 1: /* SERIAL (single core mode) */
143
Mjoin(PATL,gemoveT)(N, minMN, ONE, A+((M-minMN) SHIFT),LDA,WORKM,N);
144
ATL_geql2(N, minMN, WORKM, N, TAU, ws_RQ2);
145
Mjoin(PATL,gemoveT)(minMN,N,ONE,WORKM, N, A+((M-minMN) SHIFT), LDA);
146
// make conjugate of TAU
148
Mjoin(PATLU,scal)(minMN, ATL_rnone, TAU+1, 2);
153
ATL_gerq2(minMN, N, A+((M -minMN) SHIFT) , LDA, TAU, ws_RQ2);
156
if (buildT || M > minMN)
158
ATL_larft(LABackward, LARowStore, N, minMN,
159
A+((M -minMN) SHIFT), LDA, TAU, ws_T, LDT);
161
break; /* END CASE */
163
#if defined(ATL_USEPTHREADS) /* Cases 2 & 3 only for parallel. */
164
case 2: /* PCA COPY */
165
case 3: /* PCA NOCOPY (but does not exist for RQ) */
166
if (buildT || (M > minMN) )
172
buildT_temp = buildT;
175
/* Here minMN, N are reversed */
176
ATL_tgerq2(N, minMN, A+((M-minMN) SHIFT), LDA, TAU, ws_RQ2,
177
ws_T, LDT, WORKM, buildT_temp, 1);
178
break; /* END CASE */
179
#endif /* defined(ATL_USEPTHREADS) */
180
} /* END SWITCH on method */
182
/* Common code for cases Serial, PCA COPY, PCA NOCOPY */
185
ATL_larfb(CblasRight, CblasNoTrans,
186
LABackward, LARowStore, M-minMN, N, minMN, A+((M -minMN) SHIFT),
187
LDA, (ws_T), LDT, A, LDA, WORKM, M);
191
} /* END ATL_gerqr */