1
SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
2
$ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
3
$ NV, WV, LDWV, WORK, LWORK )
5
* -- LAPACK auxiliary routine (version 3.2.1) --
6
* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
9
* .. Scalar Arguments ..
10
INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
11
$ LDZ, LWORK, N, ND, NH, NS, NV, NW
14
* .. Array Arguments ..
15
COMPLEX*16 H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
16
$ WORK( * ), WV( LDWV, * ), Z( LDZ, * )
19
* ******************************************************************
20
* Aggressive early deflation:
22
* This subroutine accepts as input an upper Hessenberg matrix
23
* H and performs an unitary similarity transformation
24
* designed to detect and deflate fully converged eigenvalues from
25
* a trailing principal submatrix. On output H has been over-
26
* written by a new Hessenberg matrix that is a perturbation of
27
* an unitary similarity transformation of H. It is to be
28
* hoped that the final version of H has many zero subdiagonal
31
* ******************************************************************
32
* WANTT (input) LOGICAL
33
* If .TRUE., then the Hessenberg matrix H is fully updated
34
* so that the triangular Schur factor may be
35
* computed (in cooperation with the calling subroutine).
36
* If .FALSE., then only enough of H is updated to preserve
39
* WANTZ (input) LOGICAL
40
* If .TRUE., then the unitary matrix Z is updated so
41
* so that the unitary Schur factor may be computed
42
* (in cooperation with the calling subroutine).
43
* If .FALSE., then Z is not referenced.
46
* The order of the matrix H and (if WANTZ is .TRUE.) the
47
* order of the unitary matrix Z.
49
* KTOP (input) INTEGER
50
* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
51
* KBOT and KTOP together determine an isolated block
52
* along the diagonal of the Hessenberg matrix.
54
* KBOT (input) INTEGER
55
* It is assumed without a check that either
56
* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
57
* determine an isolated block along the diagonal of the
61
* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
63
* H (input/output) COMPLEX*16 array, dimension (LDH,N)
64
* On input the initial N-by-N section of H stores the
65
* Hessenberg matrix undergoing aggressive early deflation.
66
* On output H has been transformed by a unitary
67
* similarity transformation, perturbed, and the returned
68
* to Hessenberg form that (it is to be hoped) has some
69
* zero subdiagonal entries.
72
* Leading dimension of H just as declared in the calling
73
* subroutine. N .LE. LDH
75
* ILOZ (input) INTEGER
76
* IHIZ (input) INTEGER
77
* Specify the rows of Z to which transformations must be
78
* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
80
* Z (input/output) COMPLEX*16 array, dimension (LDZ,N)
81
* IF WANTZ is .TRUE., then on output, the unitary
82
* similarity transformation mentioned above has been
83
* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
84
* If WANTZ is .FALSE., then Z is unreferenced.
87
* The leading dimension of Z just as declared in the
88
* calling subroutine. 1 .LE. LDZ.
91
* The number of unconverged (ie approximate) eigenvalues
92
* returned in SR and SI that may be used as shifts by the
96
* The number of converged eigenvalues uncovered by this
99
* SH (output) COMPLEX*16 array, dimension KBOT
100
* On output, approximate eigenvalues that may
101
* be used for shifts are stored in SH(KBOT-ND-NS+1)
102
* through SR(KBOT-ND). Converged eigenvalues are
103
* stored in SH(KBOT-ND+1) through SH(KBOT).
105
* V (workspace) COMPLEX*16 array, dimension (LDV,NW)
106
* An NW-by-NW work array.
108
* LDV (input) integer scalar
109
* The leading dimension of V just as declared in the
110
* calling subroutine. NW .LE. LDV
112
* NH (input) integer scalar
113
* The number of columns of T. NH.GE.NW.
115
* T (workspace) COMPLEX*16 array, dimension (LDT,NW)
117
* LDT (input) integer
118
* The leading dimension of T just as declared in the
119
* calling subroutine. NW .LE. LDT
122
* The number of rows of work array WV available for
123
* workspace. NV.GE.NW.
125
* WV (workspace) COMPLEX*16 array, dimension (LDWV,NW)
127
* LDWV (input) integer
128
* The leading dimension of W just as declared in the
129
* calling subroutine. NW .LE. LDV
131
* WORK (workspace) COMPLEX*16 array, dimension LWORK.
132
* On exit, WORK(1) is set to an estimate of the optimal value
133
* of LWORK for the given values of N, NW, KTOP and KBOT.
135
* LWORK (input) integer
136
* The dimension of the work array WORK. LWORK = 2*NW
137
* suffices, but greater efficiency may result from larger
140
* If LWORK = -1, then a workspace query is assumed; ZLAQR3
141
* only estimates the optimal workspace size for the given
142
* values of N, NW, KTOP and KBOT. The estimate is returned
143
* in WORK(1). No error message related to LWORK is issued
144
* by XERBLA. Neither H nor Z are accessed.
146
* ================================================================
147
* Based on contributions by
148
* Karen Braman and Ralph Byers, Department of Mathematics,
149
* University of Kansas, USA
151
* ================================================================
154
PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ),
155
$ ONE = ( 1.0d0, 0.0d0 ) )
156
DOUBLE PRECISION RZERO, RONE
157
PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 )
159
* .. Local Scalars ..
160
COMPLEX*16 BETA, CDUM, S, TAU
161
DOUBLE PRECISION FOO, SAFMAX, SAFMIN, SMLNUM, ULP
162
INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,
163
$ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
166
* .. External Functions ..
167
DOUBLE PRECISION DLAMCH
169
EXTERNAL DLAMCH, ILAENV
171
* .. External Subroutines ..
172
EXTERNAL DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR,
173
$ ZLAQR4, ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR
175
* .. Intrinsic Functions ..
176
INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN
178
* .. Statement Functions ..
179
DOUBLE PRECISION CABS1
181
* .. Statement Function definitions ..
182
CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
184
* .. Executable Statements ..
186
* ==== Estimate optimal workspace. ====
188
JW = MIN( NW, KBOT-KTOP+1 )
193
* ==== Workspace query call to ZGEHRD ====
195
CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
196
LWK1 = INT( WORK( 1 ) )
198
* ==== Workspace query call to ZUNMHR ====
200
CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
202
LWK2 = INT( WORK( 1 ) )
204
* ==== Workspace query call to ZLAQR4 ====
206
CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, V,
207
$ LDV, WORK, -1, INFQR )
208
LWK3 = INT( WORK( 1 ) )
210
* ==== Optimal workspace ====
212
LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )
215
* ==== Quick return in case of workspace query. ====
217
IF( LWORK.EQ.-1 ) THEN
218
WORK( 1 ) = DCMPLX( LWKOPT, 0 )
222
* ==== Nothing to do ...
223
* ... for an empty active block ... ====
229
* ... nor for an empty deflation window. ====
233
* ==== Machine constants ====
235
SAFMIN = DLAMCH( 'SAFE MINIMUM' )
236
SAFMAX = RONE / SAFMIN
237
CALL DLABAD( SAFMIN, SAFMAX )
238
ULP = DLAMCH( 'PRECISION' )
239
SMLNUM = SAFMIN*( DBLE( N ) / ULP )
241
* ==== Setup deflation window ====
243
JW = MIN( NW, KBOT-KTOP+1 )
244
KWTOP = KBOT - JW + 1
245
IF( KWTOP.EQ.KTOP ) THEN
248
S = H( KWTOP, KWTOP-1 )
251
IF( KBOT.EQ.KWTOP ) THEN
253
* ==== 1-by-1 deflation window: not much to do ====
255
SH( KWTOP ) = H( KWTOP, KWTOP )
258
IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,
263
$ H( KWTOP, KWTOP-1 ) = ZERO
269
* ==== Convert to spike-triangular form. (In case of a
270
* . rare QR failure, this routine continues to do
271
* . aggressive early deflation using that part of
272
* . the deflation window that converged using INFQR
273
* . here and there to keep track.) ====
275
CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
276
CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
278
CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
279
NMIN = ILAENV( 12, 'ZLAQR3', 'SV', JW, 1, JW, LWORK )
280
IF( JW.GT.NMIN ) THEN
281
CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
282
$ JW, V, LDV, WORK, LWORK, INFQR )
284
CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
285
$ JW, V, LDV, INFQR )
288
* ==== Deflation detection loop ====
292
DO 10 KNT = INFQR + 1, JW
294
* ==== Small spike tip deflation test ====
296
FOO = CABS1( T( NS, NS ) )
299
IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )
302
* ==== One more converged eigenvalue ====
307
* ==== One undeflatable eigenvalue. Move it up out of the
308
* . way. (ZTREXC can not fail in this case.) ====
311
CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
316
* ==== Return to Hessenberg form ====
323
* ==== sorting the diagonal of T improves accuracy for
324
* . graded matrices. ====
326
DO 30 I = INFQR + 1, NS
329
IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )
334
$ CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
338
* ==== Restore shift/eigenvalue array from T ====
340
DO 40 I = INFQR + 1, JW
341
SH( KWTOP+I-1 ) = T( I, I )
345
IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
346
IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
348
* ==== Reflect spike back into lower triangle ====
350
CALL ZCOPY( NS, V, LDV, WORK, 1 )
352
WORK( I ) = DCONJG( WORK( I ) )
355
CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU )
358
CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
360
CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT,
362
CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
364
CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
367
CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
371
* ==== Copy updated reduced window into place ====
374
$ H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) )
375
CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
376
CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
379
* ==== Accumulate orthogonal matrix in order update
380
* . H and Z, if requested. ====
382
IF( NS.GT.1 .AND. S.NE.ZERO )
383
$ CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
384
$ WORK( JW+1 ), LWORK-JW, INFO )
386
* ==== Update vertical slab in H ====
393
DO 60 KROW = LTOP, KWTOP - 1, NV
394
KLN = MIN( NV, KWTOP-KROW )
395
CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
396
$ LDH, V, LDV, ZERO, WV, LDWV )
397
CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
400
* ==== Update horizontal slab in H ====
403
DO 70 KCOL = KBOT + 1, N, NH
404
KLN = MIN( NH, N-KCOL+1 )
405
CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
406
$ H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
407
CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
412
* ==== Update vertical slab in Z ====
415
DO 80 KROW = ILOZ, IHIZ, NV
416
KLN = MIN( NV, IHIZ-KROW+1 )
417
CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
418
$ LDZ, V, LDV, ZERO, WV, LDWV )
419
CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
425
* ==== Return the number of deflations ... ====
429
* ==== ... and the number of shifts. (Subtracting
430
* . INFQR from the spike length takes care
431
* . of the case of a rare QR failure while
432
* . calculating eigenvalues of the deflation
437
* ==== Return optimal workspace. ====
439
WORK( 1 ) = DCMPLX( LWKOPT, 0 )
441
* ==== End of ZLAQR3 ====