2
* Automatically Tuned Linear Algebra Software v3.10.1
3
* Copyright (C) 2009 Siju Samuel
5
* Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions, and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
* 3. The name of the ATLAS group or the names of its contributers may
16
* not be used to endorse or promote products derived from this
17
* software without specific written permission.
19
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
23
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
* POSSIBILITY OF SUCH DAMAGE.
33
#include "atlas_misc.h"
35
#include "atlas_ptalias_lapack.h"
36
#include "atlas_lapack.h"
37
#include "atlas_lvl3.h"
38
#include "atlas_qrrmeth.h"
40
#ifdef ATL_USEPTHREADS
41
#include "atlas_threads.h"
42
#include "atlas_taffinity.h"
43
#include "atlas_tcacheedge.h"
45
#include "atlas_cacheedge.h"
49
int ATL_geqlr(ATL_CINT M, ATL_CINT N, TYPE *A, ATL_CINT LDA, TYPE *TAU,
50
TYPE *ws_QL2, TYPE *ws_T, ATL_CINT LDT,
51
TYPE *WORKM, const int buildT)
54
* This is a recursive implementation of ATL_geqlf.c; it performs a QR
55
* factorization of a panel (M > N) with a bottom level of ATL_geql2. The
56
* recursion is on columns only; it divides by 2 until it reaches a
57
* stopping point; at which time it calls ATL_geql2 to complete a sub-panel,
58
* ATL_larft and ATL_larfb to propagate the results, etc.
61
* int ATL_geqlr(int M, int N, TYPE *A, int LDA, TYPE *TAU,
62
* TYPE *ws_QL2, TYPE *ws_T, int LDT,
63
* TYPE *WORKM, int buildT)
64
* NOTE : ATL_geql2.c will get compiled to four precisions
65
* single precision real, double precision real
66
* single precision complex, double precision complex
70
* ATL_geqlr computes a QR factorization of a real M-by-N matrix A:
77
* The number of rows of the matrix A. M >= 0.
80
* The number of columns of the matrix A. N >= 0.
82
* A (input/output) array, dimension (LDA,N)
83
* On entry, the M-by-N matrix A.
84
* On exit, the elements on and above the diagonal of the array
85
* contain the min(M,N)-by-N upper trapezoidal matrix R (R is
86
* upper triangular if m >= n); the elements below the diagonal,
87
* with the array TAU, represent the orthogonal matrix Q as a
88
* product of min(m,n) elementary reflectors (see Further
92
* The leading dimension of the array A. LDA >= max(1,M).
95
* TAU (output) DOUBLE PRECISION array, dimension (min(M,N))
96
* The scalar factors of the elementary reflectors (see Further
99
* ws_QL2 (workspace) workspace for geql2 factorization. To be allocated
100
* with space of max(M,N)
102
* ws_T (input/output). Is the size of T matrix. To be allocated
103
* with a space of min(M,N) X min(M,N). If buildT flag is true,
104
* T is computed and populated as output. If buildT is false,
105
* T must not be used as output
107
* LDT (input) INTEGER
108
* The leading dimension of the array T. LDT >= max(1,min(M,N)).
110
* WORKM (workspace) Work space matrix, N rows by
111
* minMN columns; the amount used by larfb.
113
* buildT If non-zero, dgeqrr will build in ws_T the complete T necessary
114
* for the original panel it is passed;
115
* such that Q= I - transpose(Y) * T * Y.
116
* If zero, ws_T will contain only those elements of T necessary to
117
* complete the panel.
122
int I, INFO, IINFO, lbuilt, rbuilt, method;
123
int LDA2 = LDA SHIFT; /* for complex LDA *2 */
124
int LDT2 = LDT SHIFT; /* for complex LDT *2 */
125
ATL_CINT minMN = Mmin(M, N);
127
if (M < 1 || N < 1) return(0); /* Nothing to do. */
128
METHOD(method, M, N, LDA); /* Find the method. */
129
#if !defined(ATL_USEPTHREADS)
130
if (method == 2 || method == 3) method=1; /* Don't PCA if no affinity. */
133
switch(method) /* Based on method; */
135
case 0: /* RECURSION. */
139
* Choose a smart recursive column partitioning based on N:
140
* The mapping from this routs dim:GEMM is : right:M, left:N, K:M-left
141
* For big probs, max M & K by making left small & a mult of NB.
142
* For small problems, make M a mult of MU (many x86 have NU=1 anyway!).
145
if (minMN >= NB+NB) /* big prob, put remainder on right. */
147
leftMN = ATL_MulByNB(ATL_DivByNB(minMN>>1));
148
right = minMN - leftMN;
151
else /* small prob, keep M mult of MU (MU more critical than NU) */
153
right = ((minMN>>1)/ATL_mmMU)*ATL_mmMU;
154
leftMN = minMN - right;
158
if (left==0 || right==0) /* Stop trying to be fancy. */
161
leftMN = minMN - right;
165
/*----------------------------------------------------------------------*/
166
/* On the right half, we use the same workspaces. */
167
/* Because we know we have a left hand side we must always */
168
/* build T, so we can multiply by Q before doing the left side. */
169
/* Build T @ T[left,left]. */
170
/*----------------------------------------------------------------------*/
171
ATL_geqlr(M, right, (A+(left*LDA2)), LDA, (TAU+(leftMN SHIFT)), ws_QL2,
172
(ws_T+(leftMN SHIFT)+leftMN*LDT2), LDT, WORKM, 1);
174
/*----------------------------------------------------------------------*/
175
/* Now we must adjust the left hand side according to our T. */
176
/* We must apply H' */
177
/*----------------------------------------------------------------------*/
179
ATL_larfb(CblasLeft, CblasTrans,
180
LABackward, LAColumnStore,
181
M, left, right, (A +(left*LDA2)) , LDA,
182
(ws_T+(leftMN SHIFT)+leftMN*LDT2),
183
LDT, A, LDA, WORKM, N);
185
/*----------------------------------------------------------------------*/
186
/* On the left half, we must adjust all pointers. */
187
/*----------------------------------------------------------------------*/
188
ATL_geqlr(M-right, left, (A), LDA, (TAU), ws_QL2, ( ws_T),
191
/*----------------------------------------------------------------------*/
192
/* If we build T, the right side must be completely built, and */
193
/* the left side should be partially built. We need to fill in */
194
/* the lower left hand block, 'right' rows by 'left' columns. */
195
/* The formula is -T2 * (Y2^T * Y1) * T1. */
196
/* The routine is in ATL_larft.c. */
197
/*----------------------------------------------------------------------*/
201
ATL_larft_block(LABackward, LAColumnStore,
202
M, minMN, minMN-right, right,
203
(A+((N-minMN)*LDA2)) , LDA,
207
return(0); /* END CASE RECURSION */
210
case 1: /* SERIAL. */
212
ATL_geql2(M, minMN, A+((N-minMN)*LDA2), LDA, TAU, ws_QL2);
214
if (buildT || (N > minMN) )
216
ATL_larft(LABackward, LAColumnStore, M, minMN, A+((N-minMN)*LDA2),
217
LDA, TAU, ws_T, LDT);
219
break; /* END CASE (Update T is after switch). */
221
#if defined(ATL_USEPTHREADS) /* Cases 2 & 3 only for parallel. */
222
case 2: /* PCA COPY (last two parameters: BuildT, Copy) */
223
ATL_tgeql2(M, minMN, A+((N-minMN)*LDA2), LDA, TAU, ws_QL2,
224
ws_T, LDT, WORKM, 1, 1);
225
break; /* END CASE (Update T is after switch). */
227
case 3: /* PCA NOCOPY (last two parameters: BuildT, Copy) */
228
ATL_tgeql2(M, minMN, A+((N-minMN)*LDA2), LDA, TAU, ws_QL2,
229
ws_T, LDT, WORKM, 1, 0);
230
break; /* END CASE (Update T is after switch). */
231
#endif /* defined(ATL_USEPTHREADS) */
232
} /* END SWITCH on method. */
235
* For cases Serial, PCA_Copy, PCA NoCopy, we must update T.
236
* Adjust remainder matrix according to T: apply H' , if N > minMN
240
ATL_larfb(CblasLeft, CblasTrans, LABackward, LAColumnStore,
241
M, N-minMN, minMN, (A+((N-minMN)*LDA2)) , LDA, (ws_T),
242
LDT, A, LDA, WORKM, N);
246
} /* END ATL_geqlr */