2
SUBROUTINE DBCG (N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MTTVEC,
3
+ MSOLVE, MTSOLV, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, Z,
4
+ P, RR, ZZ, PP, DZ, RWORK, IWORK)
5
C***BEGIN PROLOGUE DBCG
6
C***PURPOSE Preconditioned BiConjugate Gradient Sparse Ax = b Solver.
7
C Routine to solve a Non-Symmetric linear system Ax = b
8
C using the Preconditioned BiConjugate Gradient method.
9
C***LIBRARY SLATEC (SLAP)
10
C***CATEGORY D2A4, D2B4
11
C***TYPE DOUBLE PRECISION (SBCG-S, DBCG-D)
12
C***KEYWORDS BICONJUGATE GRADIENT, ITERATIVE PRECONDITION,
13
C NON-SYMMETRIC LINEAR SYSTEM, SLAP, SPARSE
14
C***AUTHOR Greenbaum, Anne, (Courant Institute)
15
C Seager, Mark K., (LLNL)
16
C Lawrence Livermore National Laboratory
18
C Livermore, CA 94550 (510) 423-3141
23
C INTEGER N, NELT, IA(NELT), JA(NELT), ISYM, ITOL, ITMAX
24
C INTEGER ITER, IERR, IUNIT, IWORK(USER DEFINED)
25
C DOUBLE PRECISION B(N), X(N), A(NELT), TOL, ERR, R(N), Z(N), P(N)
26
C DOUBLE PRECISION RR(N), ZZ(N), PP(N), DZ(N)
27
C DOUBLE PRECISION RWORK(USER DEFINED)
28
C EXTERNAL MATVEC, MTTVEC, MSOLVE, MTSOLV
30
C CALL DBCG(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MTTVEC,
31
C $ MSOLVE, MTSOLV, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT,
32
C $ R, Z, P, RR, ZZ, PP, DZ, RWORK, IWORK)
36
C Order of the Matrix.
37
C B :IN Double Precision B(N).
38
C Right-hand side vector.
39
C X :INOUT Double Precision X(N).
40
C On input X is your initial guess for solution vector.
41
C On output X is the final approximate solution.
43
C Number of Non-Zeros stored in A.
44
C IA :IN Integer IA(NELT).
45
C JA :IN Integer JA(NELT).
46
C A :IN Double Precision A(NELT).
47
C These arrays contain the matrix data structure for A.
48
C It could take any form. See "Description", below, for more
51
C Flag to indicate symmetric storage format.
52
C If ISYM=0, all non-zero entries of the matrix are stored.
53
C If ISYM=1, the matrix is symmetric, and only the upper
54
C or lower triangle of the matrix is stored.
55
C MATVEC :EXT External.
56
C Name of a routine which performs the matrix vector multiply
57
C operation Y = A*X given A and X. The name of the MATVEC
58
C routine must be declared external in the calling program.
59
C The calling sequence of MATVEC is:
60
C CALL MATVEC( N, X, Y, NELT, IA, JA, A, ISYM )
61
C Where N is the number of unknowns, Y is the product A*X upon
62
C return, X is an input vector. NELT, IA, JA, A and ISYM
63
C define the SLAP matrix data structure: see Description,below.
64
C MTTVEC :EXT External.
65
C Name of a routine which performs the matrix transpose vector
66
C multiply y = A'*X given A and X (where ' denotes transpose).
67
C The name of the MTTVEC routine must be declared external in
68
C the calling program. The calling sequence to MTTVEC is the
69
C same as that for MTTVEC, viz.:
70
C CALL MTTVEC( N, X, Y, NELT, IA, JA, A, ISYM )
71
C Where N is the number of unknowns, Y is the product A'*X
72
C upon return, X is an input vector. NELT, IA, JA, A and ISYM
73
C define the SLAP matrix data structure: see Description,below.
74
C MSOLVE :EXT External.
75
C Name of a routine which solves a linear system MZ = R for Z
76
C given R with the preconditioning matrix M (M is supplied via
77
C RWORK and IWORK arrays). The name of the MSOLVE routine
78
C must be declared external in the calling program. The
79
C calling sequence of MSOLVE is:
80
C CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
81
C Where N is the number of unknowns, R is the right-hand side
82
C vector, and Z is the solution upon return. NELT, IA, JA, A
83
C and ISYM define the SLAP matrix data structure: see
84
C Description, below. RWORK is a double precision array that
85
C can be used to pass necessary preconditioning information and/
86
C or workspace to MSOLVE. IWORK is an integer work array for
87
C the same purpose as RWORK.
88
C MTSOLV :EXT External.
89
C Name of a routine which solves a linear system M'ZZ = RR for
90
C ZZ given RR with the preconditioning matrix M (M is supplied
91
C via RWORK and IWORK arrays). The name of the MTSOLV routine
92
C must be declared external in the calling program. The call-
93
C ing sequence to MTSOLV is:
94
C CALL MTSOLV(N, RR, ZZ, NELT, IA, JA, A, ISYM, RWORK, IWORK)
95
C Where N is the number of unknowns, RR is the right-hand side
96
C vector, and ZZ is the solution upon return. NELT, IA, JA, A
97
C and ISYM define the SLAP matrix data structure: see
98
C Description, below. RWORK is a double precision array that
99
C can be used to pass necessary preconditioning information and/
100
C or workspace to MTSOLV. IWORK is an integer work array for
101
C the same purpose as RWORK.
103
C Flag to indicate type of convergence criterion.
104
C If ITOL=1, iteration stops when the 2-norm of the residual
105
C divided by the 2-norm of the right-hand side is less than TOL.
106
C If ITOL=2, iteration stops when the 2-norm of M-inv times the
107
C residual divided by the 2-norm of M-inv times the right hand
108
C side is less than TOL, where M-inv is the inverse of the
110
C ITOL=11 is often useful for checking and comparing different
111
C routines. For this case, the user must supply the "exact"
112
C solution or a very accurate approximation (one with an error
113
C much less than TOL) through a common block,
114
C COMMON /DSLBLK/ SOLN( )
115
C If ITOL=11, iteration stops when the 2-norm of the difference
116
C between the iterative approximation and the user-supplied
117
C solution divided by the 2-norm of the user-supplied solution
118
C is less than TOL. Note that this requires the user to set up
119
C the "COMMON /DSLBLK/ SOLN(LENGTH)" in the calling routine.
120
C The routine with this declaration should be loaded before the
121
C stop test so that the correct length is used by the loader.
122
C This procedure is not standard Fortran and may not work
123
C correctly on your system (although it has worked on every
124
C system the authors have tried). If ITOL is not 11 then this
125
C common block is indeed standard Fortran.
126
C TOL :INOUT Double Precision.
127
C Convergence criterion, as described above. (Reset if IERR=4.)
129
C Maximum number of iterations.
131
C Number of iterations required to reach convergence, or
132
C ITMAX+1 if convergence criterion could not be achieved in
134
C ERR :OUT Double Precision.
135
C Error estimate of error in final approximate solution, as
139
C IERR = 0 => All went well.
140
C IERR = 1 => Insufficient space allocated for WORK or IWORK.
141
C IERR = 2 => Method failed to converge in ITMAX steps.
142
C IERR = 3 => Error in user input.
143
C Check input values of N, ITOL.
144
C IERR = 4 => User error tolerance set too tight.
145
C Reset to 500*D1MACH(3). Iteration proceeded.
146
C IERR = 5 => Preconditioning matrix, M, is not positive
147
C definite. (r,z) < 0.
148
C IERR = 6 => Matrix A is not positive definite. (p,Ap) < 0.
150
C Unit number on which to write the error at each iteration,
151
C if this is desired for monitoring convergence. If unit
152
C number is 0, no writing will occur.
153
C R :WORK Double Precision R(N).
154
C Z :WORK Double Precision Z(N).
155
C P :WORK Double Precision P(N).
156
C RR :WORK Double Precision RR(N).
157
C ZZ :WORK Double Precision ZZ(N).
158
C PP :WORK Double Precision PP(N).
159
C DZ :WORK Double Precision DZ(N).
160
C Double Precision arrays used for workspace.
161
C RWORK :WORK Double Precision RWORK(USER DEFINED).
162
C Double Precision array that can be used for workspace in
164
C IWORK :WORK Integer IWORK(USER DEFINED).
165
C Integer array that can be used for workspace in MSOLVE
169
C This routine does not care what matrix data structure is used
170
C for A and M. It simply calls MATVEC, MTTVEC, MSOLVE, MTSOLV
171
C routines, with arguments as above. The user could write any
172
C type of structure, and appropriate MATVEC, MSOLVE, MTTVEC,
173
C and MTSOLV routines. It is assumed that A is stored in the
174
C IA, JA, A arrays in some fashion and that M (or INV(M)) is
175
C stored in IWORK and RWORK in some fashion. The SLAP
176
C routines DSDBCG and DSLUBC are examples of this procedure.
178
C Two examples of matrix data structures are the: 1) SLAP
179
C Triad format and 2) SLAP Column format.
181
C =================== S L A P Triad format ===================
182
C In this format only the non-zeros are stored. They may
183
C appear in *ANY* order. The user supplies three arrays of
184
C length NELT, where NELT is the number of non-zeros in the
185
C matrix: (IA(NELT), JA(NELT), A(NELT)). For each non-zero
186
C the user puts the row and column index of that matrix
187
C element in the IA and JA arrays. The value of the non-zero
188
C matrix element is placed in the corresponding location of
189
C the A array. This is an extremely easy data structure to
190
C generate. On the other hand it is not too efficient on
191
C vector computers for the iterative solution of linear
192
C systems. Hence, SLAP changes this input data structure to
193
C the SLAP Column format for the iteration (but does not
196
C Here is an example of the SLAP Triad storage format for a
197
C 5x5 Matrix. Recall that the entries may appear in any order.
199
C 5x5 Matrix SLAP Triad format for 5x5 matrix on left.
200
C 1 2 3 4 5 6 7 8 9 10 11
201
C |11 12 0 0 15| A: 51 12 11 33 15 53 55 22 35 44 21
202
C |21 22 0 0 0| IA: 5 1 1 3 1 5 5 2 3 4 2
203
C | 0 0 33 0 35| JA: 1 2 1 3 5 3 5 2 5 4 1
207
C =================== S L A P Column format ==================
209
C In this format the non-zeros are stored counting down
210
C columns (except for the diagonal entry, which must appear
211
C first in each "column") and are stored in the double pre-
212
C cision array A. In other words, for each column in the
213
C matrix first put the diagonal entry in A. Then put in the
214
C other non-zero elements going down the column (except the
215
C diagonal) in order. The IA array holds the row index for
216
C each non-zero. The JA array holds the offsets into the IA,
217
C A arrays for the beginning of each column. That is,
218
C IA(JA(ICOL)),A(JA(ICOL)) are the first elements of the ICOL-
219
C th column in IA and A, and IA(JA(ICOL+1)-1), A(JA(ICOL+1)-1)
220
C are the last elements of the ICOL-th column. Note that we
221
C always have JA(N+1)=NELT+1, where N is the number of columns
222
C in the matrix and NELT is the number of non-zeros in the
225
C Here is an example of the SLAP Column storage format for a
226
C 5x5 Matrix (in the A and IA arrays '|' denotes the end of a
229
C 5x5 Matrix SLAP Column format for 5x5 matrix on left.
230
C 1 2 3 4 5 6 7 8 9 10 11
231
C |11 12 0 0 15| A: 11 21 51 | 22 12 | 33 53 | 44 | 55 15 35
232
C |21 22 0 0 0| IA: 1 2 5 | 2 1 | 3 5 | 4 | 5 1 3
233
C | 0 0 33 0 35| JA: 1 4 6 8 9 12
238
C This routine will attempt to write to the Fortran logical output
239
C unit IUNIT, if IUNIT .ne. 0. Thus, the user must make sure that
240
C this logical unit is attached to a file or terminal before calling
241
C this routine with a non-zero value for IUNIT. This routine does
242
C not check for the validity of a non-zero IUNIT unit number.
244
C***SEE ALSO DSDBCG, DSLUBC
245
C***REFERENCES 1. Mark K. Seager, A SLAP for the Masses, in
246
C G. F. Carey, Ed., Parallel Supercomputing: Methods,
247
C Algorithms and Applications, Wiley, 1989, pp.135-155.
248
C***ROUTINES CALLED D1MACH, DAXPY, DCOPY, DDOT, ISDBCG
249
C***REVISION HISTORY (YYMMDD)
250
C 890404 DATE WRITTEN
251
C 890404 Previous REVISION DATE
252
C 890915 Made changes requested at July 1989 CML Meeting. (MKS)
253
C 890921 Removed TeX from comments. (FNF)
254
C 890922 Numerous changes to prologue to make closer to SLATEC
256
C 890929 Numerous changes to reduce SP/DP differences. (FNF)
257
C 891004 Added new reference.
258
C 910411 Prologue converted to Version 4.0 format. (BAB)
259
C 910502 Removed MATVEC, MTTVEC, MSOLVE, MTSOLV from ROUTINES
261
C 920407 COMMON BLOCK renamed DSLBLK. (WRB)
262
C 920511 Added complete declaration section. (WRB)
263
C 920929 Corrected format of reference. (FNF)
264
C 921019 Changed 500.0 to 500 to reduce SP/DP differences. (FNF)
265
C 921113 Corrected C***CATEGORY line. (FNF)
266
C***END PROLOGUE DBCG
267
C .. Scalar Arguments ..
268
DOUBLE PRECISION ERR, TOL
269
INTEGER IERR, ISYM, ITER, ITMAX, ITOL, IUNIT, N, NELT
270
C .. Array Arguments ..
271
DOUBLE PRECISION A(NELT), B(N), DZ(N), P(N), PP(N), R(N), RR(N),
272
+ RWORK(*), X(N), Z(N), ZZ(N)
273
INTEGER IA(NELT), IWORK(*), JA(NELT)
274
C .. Subroutine Arguments ..
275
EXTERNAL MATVEC, MSOLVE, MTSOLV, MTTVEC
276
C .. Local Scalars ..
277
DOUBLE PRECISION AK, AKDEN, BK, BKDEN, BKNUM, BNRM, FUZZ, SOLNRM,
280
C .. External Functions ..
281
DOUBLE PRECISION D1MACH, DDOT
283
EXTERNAL D1MACH, DDOT, ISDBCG
284
C .. External Subroutines ..
285
EXTERNAL DAXPY, DCOPY
286
C .. Intrinsic Functions ..
288
C***FIRST EXECUTABLE STATEMENT DBCG
290
C Check some of the input data.
301
IF( TOL.LT.TOLMIN ) THEN
306
C Calculate initial residual and pseudo-residual, and check
307
C stopping criterion.
308
CALL MATVEC(N, X, R, NELT, IA, JA, A, ISYM)
313
CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
314
CALL MTSOLV(N, RR, ZZ, NELT, IA, JA, A, ISYM, RWORK, IWORK)
316
IF( ISDBCG(N, B, X, NELT, IA, JA, A, ISYM, MSOLVE, ITOL, TOL,
317
$ ITMAX, ITER, ERR, IERR, IUNIT, R, Z, P, RR, ZZ, PP,
318
$ DZ, RWORK, IWORK, AK, BK, BNRM, SOLNRM) .NE. 0 )
320
IF( IERR.NE.0 ) RETURN
322
C ***** iteration loop *****
327
C Calculate coefficient BK and direction vectors P and PP.
328
BKNUM = DDOT(N, Z, 1, RR, 1)
329
IF( ABS(BKNUM).LE.FUZZ ) THEN
334
CALL DCOPY(N, Z, 1, P, 1)
335
CALL DCOPY(N, ZZ, 1, PP, 1)
339
P(I) = Z(I) + BK*P(I)
340
PP(I) = ZZ(I) + BK*PP(I)
345
C Calculate coefficient AK, new iterate X, new residuals R and
346
C RR, and new pseudo-residuals Z and ZZ.
347
CALL MATVEC(N, P, Z, NELT, IA, JA, A, ISYM)
348
AKDEN = DDOT(N, PP, 1, Z, 1)
350
IF( ABS(AKDEN).LE.FUZZ ) THEN
354
CALL DAXPY(N, AK, P, 1, X, 1)
355
CALL DAXPY(N, -AK, Z, 1, R, 1)
356
CALL MTTVEC(N, PP, ZZ, NELT, IA, JA, A, ISYM)
357
CALL DAXPY(N, -AK, ZZ, 1, RR, 1)
358
CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
359
CALL MTSOLV(N, RR, ZZ, NELT, IA, JA, A, ISYM, RWORK, IWORK)
361
C check stopping criterion.
362
IF( ISDBCG(N, B, X, NELT, IA, JA, A, ISYM, MSOLVE, ITOL, TOL,
363
$ ITMAX, ITER, ERR, IERR, IUNIT, R, Z, P, RR, ZZ,
364
$ PP, DZ, RWORK, IWORK, AK, BK, BNRM, SOLNRM) .NE. 0 )
369
C ***** end of loop *****
371
C stopping criterion not satisfied.
376
C------------- LAST LINE OF DBCG FOLLOWS ----------------------------