4
#include "atlas_lapack.h"
20
(const enum CBLAS_SIDE SIDE, const enum CBLAS_TRANSPOSE TRANS,
21
ATL_CINT M, ATL_CINT N, ATL_CINT K, TYPE *A, ATL_CINT lda,
22
const TYPE * TAU, TYPE *C, ATL_CINT ldc, TYPE *WORK, ATL_CINT LWORK)
24
* This is the C translation of the standard LAPACK Fortran routine:
25
* SUBROUTINE ATL_ormql( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
29
* int ATL_ormql(const enum CBLAS_SIDE SIDE SIDE,
30
* const enum CBLAS_TRANSPOSE TRANS, ATL_CINT M, ATL_CINT N,
31
* ATL_CINT K, TYPE * A, ATL_CINT lda,TYPE * TAU, TYPE * C, ATL_CINT ldc,
32
* TYPE * WORK, ATL_CINT LWORK)
34
* NOTE : ATL_ormql.c will get compiled to four precisions
35
* single precision real, double precision real
36
* single precision complex, double precision complex
42
* ATL_ormql overwrites the general real M-by-N matrix C with
44
* SIDE = 'L' SIDE = 'R'
45
* TRANS = 'N': Q * C C * Q
46
* TRANS = 'T': Q**T * C C * Q**T
49
* a real orthogonal matrix defined as the product of k
50
* elementary reflectors
52
* Q = H(k) . . . H(2) H(1)
55
* a complex unitary matrix defined as a product of k
56
* elementary reflectors
58
* Q = H(k) . . . H(2) H(1)
60
* as returned by ATLL_geqrf.c. Q is of order M if SIDE = 'L' and of order N
66
* SIDE (input) CHARACTER*1
67
* = 'L': apply Q or Q**T from the Left;
68
* = 'R': apply Q or Q**T from the Right.
70
* TRANS (input) CHARACTER*1
71
* = 'N': No transpose, apply Q;
72
* = 'T': Transpose, apply Q**T.
75
* The number of rows of the matrix C. M >= 0.
78
* The number of columns of the matrix C. N >= 0.
81
* The number of elementary reflectors whose product defines
83
* If SIDE = 'L', M >= K >= 0;
84
* if SIDE = 'R', N >= K >= 0.
86
* A (input) array, dimension (LDA,K)
87
* The i-th column must contain the vector which defines the
88
* elementary reflector H(i), for i = 1,2,...,k, as returned by
89
* DGEQRF in the first k columns of its array argument A.
90
* A is modified by the routine but restored on exit.
93
* The leading dimension of the array A.
94
* If SIDE = 'L', LDA >= max(1,M);
95
* if SIDE = 'R', LDA >= max(1,N).
97
* TAU (input) array, dimension (K)
98
* TAU(i) must contain the scalar factor of the elementary
99
* reflector H(i), as returned by ATL_geqrf.c.
101
* C (input/output) array, dimension (LDC,N)
102
* On entry, the M-by-N matrix C.
103
* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
105
* ldc (input) INTEGER
106
* The leading dimension of the array C. LDC >= max(1,M).
108
* WORK (workspace/output) array, dimension (MAX(1,LWORK))
109
* On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
111
* LWORK (input) INTEGER
112
* The dimension of the array WORK.
113
* If SIDE = 'L', LWORK >= max(1,N);
114
* if SIDE = 'R', LWORK >= max(1,M).
115
* For optimum performance LWORK >= N*NB if SIDE = 'L', and
116
* LWORK >= M*NB if SIDE = 'R', where NB is the optimal
119
* If LWORK = -1, then a workspace query is assumed; the routine
120
* only calculates the optimal size of the WORK array, returns
121
* this value as the first entry of the WORK array, and no error
122
* message related to LWORK is issued by XERBLA.
124
* INFO (output) INTEGER
125
* = 0: successful exit
126
* < 0: if INFO = -i, the i-th argument had an illegal value
130
ATL_CINT minMN = Mmin(M, N), maxMN = Mmax(M, N);
131
ATL_INT n, nb, j, ib, mi, ni, ic, jc ;
132
TYPE *ws_QL2, *ws_T, *ws_larfb; /* Workspace for QR2,T, larfb */
135
nb = clapack_ilaenv(LAIS_OPT_NB, LAormqr, MYOPT+LARight+LALower, M, N,K,-1);
138
* If it is a workspace query, return the size of work required.
139
* wrksz = wrksz of ATL_larfb + ATL_larft + ATL_geqr2
143
if(SIDE == CblasLeft)
145
*WORK = ( N*nb + nb*nb + maxMN ) ;
149
*WORK = ( M*nb + nb*nb + maxMN ) ;
153
else if (M < 1 || N < 1) /* quick return if no work to do */
156
* If the user gives us too little space, see if we can allocate it ourselves
160
if(SIDE == CblasLeft)
162
if (LWORK < (N*nb + nb*nb + maxMN))
164
vp = malloc(ATL_MulBySize(N*nb + nb*nb + maxMN) + ATL_Cachelen);
167
WORK = ATL_AlignPtr(vp);
172
if (LWORK < (M*nb + nb*nb + maxMN))
174
vp = malloc(ATL_MulBySize(M*nb + nb*nb + maxMN) + ATL_Cachelen);
177
WORK = ATL_AlignPtr(vp);
179
} /* if CblasRight */
183
* Assign workspace areas for ATL_larft, ATL_geql2, ATL_larfb
186
ws_T = WORK; /* T at begining of work */
187
ws_QL2 = WORK +(nb SHIFT)*nb; /* After T Work space */
188
ws_larfb = ws_QL2 + (maxMN SHIFT); /* After workspace for T and QR2 */
191
if (SIDE == CblasLeft)
193
if (TRANS == CblasNoTrans)
195
for (j=0 ; j < K ; j = j + nb)
199
* Form the triangular factor of the block reflector
200
* H = H(i+ib-1) . . . H(i+1) H(i)
202
ATL_larft(LABackward, LAColumnStore, M-K+j+ib, ib,
203
A+(j SHIFT)*lda , lda, TAU+(j SHIFT),
206
* H or H' is applied to C(1:m-k+i+ib-1,1:n)
210
ATL_larfb(SIDE, TRANS, LABackward, LAColumnStore,
211
mi, N, ib, A+(j SHIFT)*lda, lda, ws_T, ib,
212
C, ldc, ws_larfb, N);
223
for (j; j >= 0; j = j - nb)
231
* Form the triangular factor of the block reflector
232
* H = H(i+ib-1) . . . H(i+1) H(i)
234
ATL_larft(LABackward, LAColumnStore, M-K+j+ib, ib,
235
A+(j SHIFT)*(lda), lda, TAU+(j SHIFT), ws_T, ib);
237
* H or H' is applied to C(1:m-k+i+ib-1,1:n)
241
ATL_larfb(SIDE, TRANS, LABackward, LAColumnStore,
242
mi, N, ib, A+(j SHIFT)*(lda), lda, ws_T, ib,
243
C, ldc, ws_larfb, N);
246
} /* cblasLeft main logic */
249
if ( TRANS == CblasNoTrans )
256
for (j; j >= 0; j = j - nb)
265
* Form the triangular factor of the block reflector
266
* H = H(i+ib-1) . . . H(i+1) H(i)
268
ATL_larft(LABackward, LAColumnStore, N-K+j+ib, ib,
269
A+(j SHIFT)*(lda), lda, TAU+(j SHIFT), ws_T, ib);
271
* H or H' is applied to C(1:m-k+i+ib-1,1:n)
275
ATL_larfb(SIDE, TRANS, LABackward, LAColumnStore,
276
M, ni, ib, A+(j SHIFT)*(lda), lda, ws_T, ib,
277
C, ldc, ws_larfb, M);
280
} /* Cblas Tran on Right */
283
for (j = 0 ; j < K; j = j + nb)
287
* Form the triangular factor of the block reflector
288
* H = H(i+ib-1) . . . H(i+1) H(i)
290
ATL_larft(LABackward, LAColumnStore, N-K+j+ib, ib,
291
A+(j SHIFT)*lda , lda, TAU+(j SHIFT),
294
* H or H' is applied to C(1:m-k+i+ib-1,1:n)
297
ATL_larfb(SIDE, TRANS, LABackward, LAColumnStore,
298
M, ni, ib, A+(j SHIFT)*lda, lda, ws_T, ib,
299
C, ldc, ws_larfb, M);
307
} /* END ATL_ormql */