2
* Automatically Tuned Linear Algebra Software v3.10.1
3
* Copyright (C) 2009 Siju Samuel
5
* Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions, and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
* 3. The name of the ATLAS group or the names of its contributers may
16
* not be used to endorse or promote products derived from this
17
* software without specific written permission.
19
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
23
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
* POSSIBILITY OF SUCH DAMAGE.
35
#include "atlas_lapack.h"
51
(const enum CBLAS_SIDE SIDE, const enum CBLAS_TRANSPOSE TRANS,
52
ATL_CINT M, ATL_CINT N, ATL_CINT K, TYPE *A, ATL_CINT lda,
53
const TYPE *TAU, TYPE *C, ATL_CINT ldc, TYPE *WORK, ATL_CINT LWORK)
55
* This is the C translation of the standard LAPACK Fortran routine:
56
* SUBROUTINE ATL_ormqr( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
60
* int ATL_ormqr(const enum CBLAS_SIDE SIDE SIDE,
61
* const enum CBLAS_TRANSPOSE TRANS, ATL_CINT M, ATL_CINT N,
62
* ATL_CINT K, TYPE * A, ATL_CINT lda,TYPE * TAU, TYPE * C, ATL_CINT ldc,
63
* TYPE * WORK, ATL_CINT LWORK)
65
* NOTE : ATL_ormqr.c will get compiled to four precisions
66
* single precision real, double precision real
67
* single precision complex, double precision complex
74
* ATL_ormqr overwrites the general real M-by-N matrix C with
76
* SIDE = 'L' SIDE = 'R'
77
* TRANS = 'N': Q * C C * Q
78
* TRANS = 'T': Q**T * C C * Q**T
81
* a real orthogonal matrix defined as the product of k
82
* elementary reflectors
84
* Q = H(1) H(2) . . . H(k)
87
* a complex unitary matrix defined as a product of k
88
* elementary reflectors
90
* Q = H(1) H(2) . . . H(k)
92
* as returned by ATLL_geqrf.c. Q is of order M if SIDE = 'L' and of order N
98
* SIDE (input) CHARACTER*1
99
* = 'L': apply Q or Q**T from the Left;
100
* = 'R': apply Q or Q**T from the Right.
102
* TRANS (input) CHARACTER*1
103
* = 'N': No transpose, apply Q;
104
* = 'T': Transpose, apply Q**T.
107
* The number of rows of the matrix C. M >= 0.
110
* The number of columns of the matrix C. N >= 0.
113
* The number of elementary reflectors whose product defines
115
* If SIDE = 'L', M >= K >= 0;
116
* if SIDE = 'R', N >= K >= 0.
118
* A (input) array, dimension (LDA,K)
119
* The i-th column must contain the vector which defines the
120
* elementary reflector H(i), for i = 1,2,...,k, as returned by
121
* DGEQRF in the first k columns of its array argument A.
122
* A is modified by the routine but restored on exit.
124
* lda (input) INTEGER
125
* The leading dimension of the array A.
126
* If SIDE = 'L', LDA >= max(1,M);
127
* if SIDE = 'R', LDA >= max(1,N).
129
* TAU (input) array, dimension (K)
130
* TAU(i) must contain the scalar factor of the elementary
131
* reflector H(i), as returned by ATL_geqrf.c.
133
* C (input/output) array, dimension (LDC,N)
134
* On entry, the M-by-N matrix C.
135
* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
137
* ldc (input) INTEGER
138
* The leading dimension of the array C. LDC >= max(1,M).
140
* WORK (workspace/output) array, dimension (MAX(1,LWORK))
141
* On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
143
* LWORK (input) INTEGER
144
* The dimension of the array WORK.
145
* If SIDE = 'L', LWORK >= max(1,N);
146
* if SIDE = 'R', LWORK >= max(1,M).
147
* For optimum performance LWORK >= N*NB if SIDE = 'L', and
148
* LWORK >= M*NB if SIDE = 'R', where NB is the optimal
151
* If LWORK = -1, then a workspace query is assumed; the routine
152
* only calculates the optimal size of the WORK array, returns
153
* this value as the first entry of the WORK array, and no error
154
* message related to LWORK is issued by XERBLA.
156
* INFO (output) INTEGER
157
* = 0: successful exit
158
* < 0: if INFO = -i, the i-th argument had an illegal value
161
ATL_CINT minMN = Mmin(M, N), maxMN = Mmax(M, N);
162
ATL_INT n, nb, j, ib, mi, ni, ic, jc ;
163
TYPE *ws_QR2, *ws_T, *ws_larfb; /* Workspace for QR2,T, larfb */
166
nb = clapack_ilaenv(LAIS_OPT_NB, LAormqr, MYOPT+LARight+LAUpper, M, N, K,-1);
169
* If it is a workspace query, return the size of work required.
170
* wrksz = wrksz of ATL_larfb + ATL_larft + ATL_geqr2
174
if(SIDE == CblasLeft)
176
*WORK = ( N*nb + nb*nb + maxMN ) ;
180
*WORK = ( M*nb + nb*nb + maxMN ) ;
184
else if (M < 1 || N < 1) /* quick return if no work to do */
187
* If the user gives us too little space, see if we can allocate it ourselves
191
if(SIDE == CblasLeft)
193
if (LWORK < (N*nb + nb*nb + maxMN))
195
vp = malloc(ATL_MulBySize(N*nb + nb*nb + maxMN) + ATL_Cachelen);
198
WORK = ATL_AlignPtr(vp);
203
if (LWORK < (M*nb + nb*nb + maxMN))
205
vp = malloc(ATL_MulBySize(M*nb + nb*nb + maxMN) + ATL_Cachelen);
208
WORK = ATL_AlignPtr(vp);
210
} /* if CblasRight */
214
* Assign workspace areas for ATL_larft, ATL_geqr2, ATL_larfb
217
ws_T = WORK; /* T at begining of work */
218
ws_QR2 = WORK +(nb SHIFT)*nb; /* After T Work space */
219
ws_larfb = ws_QR2 + (maxMN SHIFT); /* After workspace for T and QR2 */
222
if (SIDE == CblasLeft)
224
if ( TRANS == CblasNoTrans )
231
for (j; j >= 0; j = j - nb)
239
* Form the triangular factor of the block reflector
240
* H = H(i) H(i+1) . . . H(i+ib-1)
242
ATL_larft(LAForward, LAColumnStore, M-j, ib, A+(j SHIFT)*(lda+1),
243
lda, TAU+(j SHIFT), ws_T, ib);
245
* H or H' is applied to C(i:m,1:n)
247
ATL_larfb(SIDE, TRANS, LAForward, LAColumnStore,
248
(M-j), N, ib, A+(j SHIFT)*(lda+1), lda, ws_T, ib,
249
C+(j SHIFT), ldc, ws_larfb, N);
254
for (j = 0 ; j < K; j = j + nb)
258
* Form the triangular factor of the block reflector
259
* H = H(i) H(i+1) . . . H(i+ib-1)
261
ATL_larft(LAForward, LAColumnStore, M-j, ib, A+(j SHIFT)*(lda+1),
262
lda, TAU+(j SHIFT), ws_T, ib);
264
* H or H' is applied to C(i:m,1:n)
266
ATL_larfb(SIDE, TRANS, LAForward, LAColumnStore,
267
(M-j), N, ib, A+(j SHIFT)*(lda+1), lda, ws_T, ib,
268
C+(j SHIFT), ldc, ws_larfb, N);
274
if ( TRANS == CblasNoTrans )
276
for (j = 0 ; j < K; j = j + nb)
280
* Form the triangular factor of the block reflector
281
* H = H(i) H(i+1) . . . H(i+ib-1)
283
ATL_larft(LAForward, LAColumnStore, N-j, ib, A+(j SHIFT)*(lda+1),
284
lda, TAU+(j SHIFT), ws_T, ib);
286
* H or H' is applied to C(1:m,i:n)
288
ATL_larfb(SIDE, TRANS, LAForward, LAColumnStore,
289
M, N-j, ib, A+(j SHIFT)*(lda+1), lda, ws_T, ib,
290
C+((j SHIFT)*ldc), ldc, ws_larfb, M);
300
for (j; j >= 0; j = j - nb)
309
* Form the triangular factor of the block reflector
310
* H = H(i) H(i+1) . . . H(i+ib-1)
312
ATL_larft(LAForward, LAColumnStore, N-j, ib, A+(j SHIFT)*(lda+1),
313
lda, TAU+(j SHIFT), ws_T, ib);
315
* H or H' is applied to C(1:m,i:n)
318
ATL_larfb(SIDE, TRANS, LAForward, LAColumnStore,
319
M, N-j , ib, A+(j SHIFT)*(lda+1), lda, ws_T, ib,
320
C+((j SHIFT)*ldc) , ldc, ws_larfb, M);
323
} /* Cblas Tran on Right */
329
} /* END ATL_ormqr */