1
c-----------------------------------------------------------------------
7
c Given the Arnoldi factorization
9
c A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T,
11
c apply NP shifts implicitly resulting in
13
c A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q
15
c where Q is an orthogonal matrix of order KEV+NP. Q is the product of
16
c rotations resulting from the NP bulge chasing sweeps. The updated Arnoldi
17
c factorization becomes:
19
c A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T.
23
c ( N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ, WORKD )
27
c Problem size, i.e. dimension of matrix A.
29
c KEV Integer. (INPUT)
30
c INPUT: KEV+NP is the size of the input matrix H.
31
c OUTPUT: KEV is the size of the updated matrix HNEW.
34
c Number of implicit shifts to be applied.
36
c SHIFT Real array of length NP. (INPUT)
37
c The shifts to be applied.
39
c V Real N by (KEV+NP) array. (INPUT/OUTPUT)
40
c INPUT: V contains the current KEV+NP Arnoldi vectors.
41
c OUTPUT: VNEW = V(1:n,1:KEV); the updated Arnoldi vectors
42
c are in the first KEV columns of V.
44
c LDV Integer. (INPUT)
45
c Leading dimension of V exactly as declared in the calling
48
c H Real (KEV+NP) by 2 array. (INPUT/OUTPUT)
49
c INPUT: H contains the symmetric tridiagonal matrix of the
50
c Arnoldi factorization with the subdiagonal in the 1st column
51
c starting at H(2,1) and the main diagonal in the 2nd column.
52
c OUTPUT: H contains the updated tridiagonal matrix in the
53
c KEV leading submatrix.
55
c LDH Integer. (INPUT)
56
c Leading dimension of H exactly as declared in the calling
59
c RESID Real array of length (N). (INPUT/OUTPUT)
60
c INPUT: RESID contains the the residual vector r_{k+p}.
61
c OUTPUT: RESID is the updated residual vector rnew_{k}.
63
c Q Real KEV+NP by KEV+NP work array. (WORKSPACE)
64
c Work array used to accumulate the rotations during the bulge
67
c LDQ Integer. (INPUT)
68
c Leading dimension of Q exactly as declared in the calling
71
c WORKD Real work array of length 2*N. (WORKSPACE)
72
c Distributed array used in the application of the accumulated
73
c orthogonal matrix Q.
77
c-----------------------------------------------------------------------
85
c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
86
c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
88
c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
89
c Restarted Arnoldi Iteration", Rice University Technical Report
90
c TR95-13, Department of Computational and Applied Mathematics.
93
c ivout ARPACK utility routine that prints integers.
94
c second ARPACK utility routine for timing.
95
c svout ARPACK utility routine that prints vectors.
96
c slamch LAPACK routine that determines machine constants.
97
c slartg LAPACK Givens rotation construction routine.
98
c slacpy LAPACK matrix copy routine.
99
c slaset LAPACK matrix initialization routine.
100
c sgemv Level 2 BLAS routine for matrix vector multiplication.
101
c saxpy Level 1 BLAS that computes a vector triad.
102
c scopy Level 1 BLAS that copies one vector to another.
103
c sscal Level 1 BLAS that scales a vector.
106
c Danny Sorensen Phuong Vu
107
c Richard Lehoucq CRPC / Rice University
108
c Dept. of Computational & Houston, Texas
109
c Applied Mathematics
114
c 12/16/93: Version ' 2.4'
116
c\SCCS Information: @(#)
117
c FILE: sapps.F SID: 2.6 DATE OF SID: 3/28/97 RELEASE: 2
120
c 1. In this version, each shift is applied to all the subblocks of
121
c the tridiagonal matrix H and not just to the submatrix that it
122
c comes from. This routine assumes that the subdiagonal elements
123
c of H that are stored in h(1:kev+np,1) are nonegative upon input
124
c and enforce this condition upon output. This version incorporates
125
c deflation. See code for documentation.
129
c-----------------------------------------------------------------------
132
& ( n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq, workd )
134
c %----------------------------------------------------%
135
c | Include files for debugging and timing information |
136
c %----------------------------------------------------%
141
c %------------------%
142
c | Scalar Arguments |
143
c %------------------%
145
integer kev, ldh, ldq, ldv, n, np
147
c %-----------------%
148
c | Array Arguments |
149
c %-----------------%
152
& h(ldh,2), q(ldq,kev+np), resid(n), shift(np),
153
& v(ldv,kev+np), workd(2*n)
161
parameter (one = 1.0E+0, zero = 0.0E+0)
167
integer i, iend, istart, itop, j, jj, kplusp, msglvl
170
& a1, a2, a3, a4, big, c, epsmch, f, g, r, s
174
c %----------------------%
175
c | External Subroutines |
176
c %----------------------%
178
external saxpy, scopy, sscal, slacpy, slartg, slaset, svout,
179
& ivout, second, sgemv
181
c %--------------------%
182
c | External Functions |
183
c %--------------------%
189
c %----------------------%
190
c | Intrinsics Functions |
191
c %----------------------%
199
data first / .true. /
201
c %-----------------------%
202
c | Executable Statements |
203
c %-----------------------%
206
epsmch = slamch('Epsilon-Machine')
211
c %-------------------------------%
212
c | Initialize timing statistics |
213
c | & message level for debugging |
214
c %-------------------------------%
221
c %----------------------------------------------%
222
c | Initialize Q to the identity matrix of order |
223
c | kplusp used to accumulate the rotations. |
224
c %----------------------------------------------%
226
call slaset ('All', kplusp, kplusp, zero, one, q, ldq)
228
c %----------------------------------------------%
229
c | Quick return if there are no shifts to apply |
230
c %----------------------------------------------%
232
if (np .eq. 0) go to 9000
234
c %----------------------------------------------------------%
235
c | Apply the np shifts implicitly. Apply each shift to the |
236
c | whole matrix and not just to the submatrix from which it |
238
c %----------------------------------------------------------%
244
c %----------------------------------------------------------%
245
c | Check for splitting and deflation. Currently we consider |
246
c | an off-diagonal element h(i+1,1) negligible if |
247
c | h(i+1,1) .le. epsmch*( |h(i,2)| + |h(i+1,2)| ) |
248
c | for i=1:KEV+NP-1. |
249
c | If above condition tests true then we set h(i+1,1) = 0. |
250
c | Note that h(1:KEV+NP,1) are assumed to be non negative. |
251
c %----------------------------------------------------------%
255
c %------------------------------------------------%
256
c | The following loop exits early if we encounter |
257
c | a negligible off diagonal element. |
258
c %------------------------------------------------%
260
do 30 i = istart, kplusp-1
261
big = abs(h(i,2)) + abs(h(i+1,2))
262
if (h(i+1,1) .le. epsmch*big) then
263
if (msglvl .gt. 0) then
264
call ivout (logfil, 1, i, ndigit,
265
& '_sapps: deflation at row/column no.')
266
call ivout (logfil, 1, jj, ndigit,
267
& '_sapps: occured before shift number.')
268
call svout (logfil, 1, h(i+1,1), ndigit,
269
& '_sapps: the corresponding off diagonal element')
279
if (istart .lt. iend) then
281
c %--------------------------------------------------------%
282
c | Construct the plane rotation G'(istart,istart+1,theta) |
283
c | that attempts to drive h(istart+1,1) to zero. |
284
c %--------------------------------------------------------%
286
f = h(istart,2) - shift(jj)
288
call slartg (f, g, c, s, r)
290
c %-------------------------------------------------------%
291
c | Apply rotation to the left and right of H; |
292
c | H <- G' * H * G, where G = G(istart,istart+1,theta). |
293
c | This will create a "bulge". |
294
c %-------------------------------------------------------%
296
a1 = c*h(istart,2) + s*h(istart+1,1)
297
a2 = c*h(istart+1,1) + s*h(istart+1,2)
298
a4 = c*h(istart+1,2) - s*h(istart+1,1)
299
a3 = c*h(istart+1,1) - s*h(istart,2)
300
h(istart,2) = c*a1 + s*a2
301
h(istart+1,2) = c*a4 - s*a3
302
h(istart+1,1) = c*a3 + s*a4
304
c %----------------------------------------------------%
305
c | Accumulate the rotation in the matrix Q; Q <- Q*G |
306
c %----------------------------------------------------%
308
do 60 j = 1, min(istart+jj,kplusp)
309
a1 = c*q(j,istart) + s*q(j,istart+1)
310
q(j,istart+1) = - s*q(j,istart) + c*q(j,istart+1)
315
c %----------------------------------------------%
316
c | The following loop chases the bulge created. |
317
c | Note that the previous rotation may also be |
318
c | done within the following loop. But it is |
319
c | kept separate to make the distinction among |
320
c | the bulge chasing sweeps and the first plane |
321
c | rotation designed to drive h(istart+1,1) to |
323
c %----------------------------------------------%
325
do 70 i = istart+1, iend-1
327
c %----------------------------------------------%
328
c | Construct the plane rotation G'(i,i+1,theta) |
329
c | that zeros the i-th bulge that was created |
330
c | by G(i-1,i,theta). g represents the bulge. |
331
c %----------------------------------------------%
336
c %----------------------------------%
337
c | Final update with G(i-1,i,theta) |
338
c %----------------------------------%
340
h(i+1,1) = c*h(i+1,1)
341
call slartg (f, g, c, s, r)
343
c %-------------------------------------------%
344
c | The following ensures that h(1:iend-1,1), |
345
c | the first iend-2 off diagonal of elements |
346
c | H, remain non negative. |
347
c %-------------------------------------------%
349
if (r .lt. zero) then
355
c %--------------------------------------------%
356
c | Apply rotation to the left and right of H; |
357
c | H <- G * H * G', where G = G(i,i+1,theta) |
358
c %--------------------------------------------%
362
a1 = c*h(i,2) + s*h(i+1,1)
363
a2 = c*h(i+1,1) + s*h(i+1,2)
364
a3 = c*h(i+1,1) - s*h(i,2)
365
a4 = c*h(i+1,2) - s*h(i+1,1)
368
h(i+1,2) = c*a4 - s*a3
369
h(i+1,1) = c*a3 + s*a4
371
c %----------------------------------------------------%
372
c | Accumulate the rotation in the matrix Q; Q <- Q*G |
373
c %----------------------------------------------------%
375
do 50 j = 1, min( i+jj, kplusp )
376
a1 = c*q(j,i) + s*q(j,i+1)
377
q(j,i+1) = - s*q(j,i) + c*q(j,i+1)
385
c %--------------------------%
386
c | Update the block pointer |
387
c %--------------------------%
391
c %------------------------------------------%
392
c | Make sure that h(iend,1) is non-negative |
393
c | If not then set h(iend,1) <-- -h(iend,1) |
394
c | and negate the last column of Q. |
395
c | We have effectively carried out a |
396
c | similarity on transformation H |
397
c %------------------------------------------%
399
if (h(iend,1) .lt. zero) then
400
h(iend,1) = -h(iend,1)
401
call sscal(kplusp, -one, q(1,iend), 1)
404
c %--------------------------------------------------------%
405
c | Apply the same shift to the next block if there is any |
406
c %--------------------------------------------------------%
408
if (iend .lt. kplusp) go to 20
410
c %-----------------------------------------------------%
411
c | Check if we can increase the the start of the block |
412
c %-----------------------------------------------------%
414
do 80 i = itop, kplusp-1
415
if (h(i+1,1) .gt. zero) go to 90
419
c %-----------------------------------%
420
c | Finished applying the jj-th shift |
421
c %-----------------------------------%
425
c %------------------------------------------%
426
c | All shifts have been applied. Check for |
427
c | more possible deflation that might occur |
428
c | after the last shift is applied. |
429
c %------------------------------------------%
431
do 100 i = itop, kplusp-1
432
big = abs(h(i,2)) + abs(h(i+1,2))
433
if (h(i+1,1) .le. epsmch*big) then
434
if (msglvl .gt. 0) then
435
call ivout (logfil, 1, i, ndigit,
436
& '_sapps: deflation at row/column no.')
437
call svout (logfil, 1, h(i+1,1), ndigit,
438
& '_sapps: the corresponding off diagonal element')
444
c %-------------------------------------------------%
445
c | Compute the (kev+1)-st column of (V*Q) and |
446
c | temporarily store the result in WORKD(N+1:2*N). |
447
c | This is not necessary if h(kev+1,1) = 0. |
448
c %-------------------------------------------------%
450
if ( h(kev+1,1) .gt. zero )
451
& call sgemv ('N', n, kplusp, one, v, ldv,
452
& q(1,kev+1), 1, zero, workd(n+1), 1)
454
c %-------------------------------------------------------%
455
c | Compute column 1 to kev of (V*Q) in backward order |
456
c | taking advantage that Q is an upper triangular matrix |
457
c | with lower bandwidth np. |
458
c | Place results in v(:,kplusp-kev:kplusp) temporarily. |
459
c %-------------------------------------------------------%
462
call sgemv ('N', n, kplusp-i+1, one, v, ldv,
463
& q(1,kev-i+1), 1, zero, workd, 1)
464
call scopy (n, workd, 1, v(1,kplusp-i+1), 1)
467
c %-------------------------------------------------%
468
c | Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). |
469
c %-------------------------------------------------%
471
call slacpy ('All', n, kev, v(1,np+1), ldv, v, ldv)
473
c %--------------------------------------------%
474
c | Copy the (kev+1)-st column of (V*Q) in the |
475
c | appropriate place if h(kev+1,1) .ne. zero. |
476
c %--------------------------------------------%
478
if ( h(kev+1,1) .gt. zero )
479
& call scopy (n, workd(n+1), 1, v(1,kev+1), 1)
481
c %-------------------------------------%
482
c | Update the residual vector: |
483
c | r <- sigmak*r + betak*v(:,kev+1) |
485
c | sigmak = (e_{kev+p}'*Q)*e_{kev} |
486
c | betak = e_{kev+1}'*H*e_{kev} |
487
c %-------------------------------------%
489
call sscal (n, q(kplusp,kev), resid, 1)
490
if (h(kev+1,1) .gt. zero)
491
& call saxpy (n, h(kev+1,1), v(1,kev+1), 1, resid, 1)
493
if (msglvl .gt. 1) then
494
call svout (logfil, 1, q(kplusp,kev), ndigit,
495
& '_sapps: sigmak of the updated residual vector')
496
call svout (logfil, 1, h(kev+1,1), ndigit,
497
& '_sapps: betak of the updated residual vector')
498
call svout (logfil, kev, h(1,2), ndigit,
499
& '_sapps: updated main diagonal of H for next iteration')
501
call svout (logfil, kev-1, h(2,1), ndigit,
502
& '_sapps: updated sub diagonal of H for next iteration')
507
tsapps = tsapps + (t1 - t0)