1
/*BHEADER**********************************************************************
2
* Copyright (c) 2006 The Regents of the University of California.
3
* Produced at the Lawrence Livermore National Laboratory.
4
* Written by the HYPRE team. UCRL-CODE-222953.
7
* This file is part of HYPRE (see http://www.llnl.gov/CASC/hypre/).
8
* Please see the COPYRIGHT_and_LICENSE file for the copyright notice,
9
* disclaimer, contact information and the GNU Lesser General Public License.
11
* HYPRE is free software; you can redistribute it and/or modify it under the
12
* terms of the GNU General Public License (as published by the Free Software
13
* Foundation) version 2.1 dated February 1999.
15
* HYPRE is distributed in the hope that it will be useful, but WITHOUT ANY
16
* WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or FITNESS
17
* FOR A PARTICULAR PURPOSE. See the terms and conditions of the GNU General
18
* Public License for more details.
20
* You should have received a copy of the GNU Lesser General Public License
21
* along with this program; if not, write to the Free Software Foundation,
22
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
***********************************************************************EHEADER*/
32
* -- SuperLU routine (version 2.0) --
33
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
34
* and Lawrence Berkeley National Lab.
37
* Changes made to this file addressing issue regarding calls to
38
* blas/lapack functions (Dec 2003 at LLNL)
42
* History: Modified from lapack routine DGERFS
46
#include "superlu_util.h"
49
dgsrfs(char *trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
50
int *perm_r, int *perm_c, char *equed, double *R, double *C,
51
SuperMatrix *B, SuperMatrix *X,
52
double *ferr, double *berr, int *info)
58
* DGSRFS improves the computed solution to a system of linear
59
* equations and provides error bounds and backward error estimates for
62
* If equilibration was performed, the system becomes:
63
* (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
65
* See supermatrix.h for the definition of 'SuperMatrix' structure.
71
* Specifies the form of the system of equations:
72
* = 'N': A * X = B (No transpose)
73
* = 'T': A**T * X = B (Transpose)
74
* = 'C': A**H * X = B (Conjugate transpose = Transpose)
76
* A (input) SuperMatrix*
77
* The original matrix A in the system, or the scaled A if
78
* equilibration was done. The type of A can be:
79
* Stype = NC, Dtype = D_D, Mtype = GE.
81
* L (input) SuperMatrix*
82
* The factor L from the factorization Pr*A*Pc=L*U. Use
83
* compressed row subscripts storage for supernodes,
84
* i.e., L has types: Stype = SC, Dtype = D_D, Mtype = TRLU.
86
* U (input) SuperMatrix*
87
* The factor U from the factorization Pr*A*Pc=L*U as computed by
88
* dgstrf(). Use column-wise storage scheme,
89
* i.e., U has types: Stype = NC, Dtype = D_D, Mtype = TRU.
91
* perm_r (input) int*, dimension (A->nrow)
92
* Row permutation vector, which defines the permutation matrix Pr;
93
* perm_r[i] = j means row i of A is in position j in Pr*A.
95
* perm_c (input) int*, dimension (A->ncol)
96
* Column permutation vector, which defines the
97
* permutation matrix Pc; perm_c[i] = j means column i of A is
98
* in position j in A*Pc.
100
* equed (input) Specifies the form of equilibration that was done.
101
* = 'N': No equilibration.
102
* = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
103
* = 'C': Column equilibration, i.e., A was postmultiplied by
105
* = 'B': Both row and column equilibration, i.e., A was replaced
106
* by diag(R)*A*diag(C).
108
* R (input) double*, dimension (A->nrow)
109
* The row scale factors for A.
110
* If equed = 'R' or 'B', A is premultiplied by diag(R).
111
* If equed = 'N' or 'C', R is not accessed.
113
* C (input) double*, dimension (A->ncol)
114
* The column scale factors for A.
115
* If equed = 'C' or 'B', A is postmultiplied by diag(C).
116
* If equed = 'N' or 'R', C is not accessed.
118
* B (input) SuperMatrix*
119
* B has types: Stype = DN, Dtype = D_D, Mtype = GE.
120
* The right hand side matrix B.
121
* if equed = 'R' or 'B', B is premultiplied by diag(R).
123
* X (input/output) SuperMatrix*
124
* X has types: Stype = DN, Dtype = D_D, Mtype = GE.
125
* On entry, the solution matrix X, as computed by dgstrs().
126
* On exit, the improved solution matrix X.
127
* if *equed = 'C' or 'B', X should be premultiplied by diag(C)
128
* in order to obtain the solution to the original system.
130
* FERR (output) double*, dimension (B->ncol)
131
* The estimated forward error bound for each solution vector
132
* X(j) (the j-th column of the solution matrix X).
133
* If XTRUE is the true solution corresponding to X(j), FERR(j)
134
* is an estimated upper bound for the magnitude of the largest
135
* element in (X(j) - XTRUE) divided by the magnitude of the
136
* largest element in X(j). The estimate is as reliable as
137
* the estimate for RCOND, and is almost always a slight
138
* overestimate of the true error.
140
* BERR (output) double*, dimension (B->ncol)
141
* The componentwise relative backward error of each solution
142
* vector X(j) (i.e., the smallest relative change in
143
* any element of A or B that makes X(j) an exact solution).
146
* = 0: successful exit
147
* < 0: if INFO = -i, the i-th argument had an illegal value
149
* Internal Parameters
150
* ===================
152
* ITMAX is the maximum number of steps of iterative refinement.
158
/* Table of constant values */
163
/* Local variables */
167
DNformat *Bstore, *Xstore, *Bjcol_store;
168
double *Bmat, *Xmat, *Bptr, *Xptr;
171
int i, j, k, irow, nz, count, notran, rowequ, colequ;
173
double s, xk, lstres, eps, safmin;
178
extern double hypre_F90_NAME_BLAS(dlamch,DLAMCH)(char *);
179
extern int dlacon_(int *, double *, double *, int *, double *, int *);
181
extern int SCOPY(int *, double *, int *, double *, int *);
182
extern int SSAXPY(int *, double *, double *, int *, double *, int *);
184
extern int hypre_F90_NAME_BLAS(dcopy,DCOPY)(int *, double *, int *, double *, int *);
185
extern int hypre_F90_NAME_BLAS(daxpy,DAXPY)(int *, double *, double *, int *, double *, int *);
189
Aval = Astore->nzval;
192
Bmat = Bstore->nzval;
193
Xmat = Xstore->nzval;
198
/* Test the input parameters */
200
notran = superlu_lsame(trans, "N");
201
if ( !notran && !superlu_lsame(trans, "T") && !superlu_lsame(trans, "C"))
203
else if ( A->nrow != A->ncol || A->nrow < 0 ||
204
A->Stype != NC || A->Dtype != D_D || A->Mtype != GE )
206
else if ( L->nrow != L->ncol || L->nrow < 0 ||
207
L->Stype != SC || L->Dtype != D_D || L->Mtype != TRLU )
209
else if ( U->nrow != U->ncol || U->nrow < 0 ||
210
U->Stype != NC || U->Dtype != D_D || U->Mtype != TRU )
212
else if ( ldb < MAX(0, A->nrow) ||
213
B->Stype != DN || B->Dtype != D_D || B->Mtype != GE )
215
else if ( ldx < MAX(0, A->nrow) ||
216
X->Stype != DN || X->Dtype != D_D || X->Mtype != GE )
220
superlu_xerbla("dgsrfs", &i);
224
/* Quick return if possible */
225
if ( A->nrow == 0 || nrhs == 0) {
226
for (j = 0; j < nrhs; ++j) {
233
rowequ = superlu_lsame(equed, "R") || superlu_lsame(equed, "B");
234
colequ = superlu_lsame(equed, "C") || superlu_lsame(equed, "B");
236
/* Allocate working space */
237
work = doubleMalloc(2*A->nrow);
238
rwork = (double *) SUPERLU_MALLOC( A->nrow * sizeof(double) );
239
iwork = intMalloc(2*A->nrow);
240
if ( !work || !rwork || !iwork )
241
ABORT("Malloc fails for work/rwork/iwork.");
244
*(unsigned char *)transt = 'T';
246
*(unsigned char *)transt = 'N';
249
/* NZ = maximum number of nonzero elements in each row of A, plus 1 */
251
eps = hypre_F90_NAME_BLAS(dlamch,DLAMCH)("Epsilon");
252
safmin = hypre_F90_NAME_BLAS(dlamch,DLAMCH)("Safe minimum");
256
/* Compute the number of nonzeros in each row (or column) of A */
257
for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
259
for (k = 0; k < A->ncol; ++k)
260
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
261
++iwork[Astore->rowind[i]];
263
for (k = 0; k < A->ncol; ++k)
264
iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
267
/* Copy one column of RHS B into Bjcol. */
268
Bjcol.Stype = B->Stype;
269
Bjcol.Dtype = B->Dtype;
270
Bjcol.Mtype = B->Mtype;
271
Bjcol.nrow = B->nrow;
273
Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
274
if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
275
Bjcol_store = Bjcol.Store;
276
Bjcol_store->lda = ldb;
277
Bjcol_store->nzval = work; /* address aliasing */
279
/* Do for each right hand side ... */
280
for (j = 0; j < nrhs; ++j) {
286
while (1) { /* Loop until stopping criterion is satisfied. */
288
/* Compute residual R = B - op(A) * X,
289
where op(A) = A, A**T, or A**H, depending on TRANS. */
292
SCOPY(&A->nrow, Bptr, &ione, work, &ione);
294
hypre_F90_NAME_BLAS(dcopy,DCOPY)(&A->nrow, Bptr, &ione, work, &ione);
296
sp_dgemv(trans, ndone, A, Xptr, ione, done, work, ione);
298
/* Compute componentwise relative backward error from formula
299
max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
300
where abs(Z) is the componentwise absolute value of the matrix
301
or vector Z. If the i-th component of the denominator is less
302
than SAFE2, then SAFE1 is added to the i-th component of the
303
numerator and denominator before dividing. */
305
for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
307
/* Compute abs(op(A))*abs(X) + abs(B). */
309
for (k = 0; k < A->ncol; ++k) {
310
xk = fabs( Xptr[k] );
311
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
312
rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
315
for (k = 0; k < A->ncol; ++k) {
317
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
318
irow = Astore->rowind[i];
319
s += fabs(Aval[i]) * fabs(Xptr[irow]);
325
for (i = 0; i < A->nrow; ++i) {
326
if (rwork[i] > safe2)
327
s = MAX( s, fabs(work[i]) / rwork[i] );
329
s = MAX( s, (fabs(work[i]) + safe1) /
330
(rwork[i] + safe1) );
334
/* Test stopping criterion. Continue iterating if
335
1) The residual BERR(J) is larger than machine epsilon, and
336
2) BERR(J) decreased by at least a factor of 2 during the
338
3) At most ITMAX iterations tried. */
340
if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
341
/* Update solution and try again. */
342
dgstrs (trans, L, U, perm_r, perm_c, &Bjcol, info);
345
SAXPY(&A->nrow, &done, work, &ione,
346
&Xmat[j*ldx], &ione);
348
hypre_F90_NAME_BLAS(daxpy,DAXPY)(&A->nrow, &done, work, &ione,
349
&Xmat[j*ldx], &ione);
359
/* Bound error from formula:
360
norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*
361
( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
363
norm(Z) is the magnitude of the largest component of Z
364
inv(op(A)) is the inverse of op(A)
365
abs(Z) is the componentwise absolute value of the matrix or
367
NZ is the maximum number of nonzeros in any row of A, plus 1
368
EPS is machine epsilon
370
The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
371
is incremented by SAFE1 if the i-th component of
372
abs(op(A))*abs(X) + abs(B) is less than SAFE2.
374
Use DLACON to estimate the infinity-norm of the matrix
375
inv(op(A)) * diag(W),
376
where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
378
for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
380
/* Compute abs(op(A))*abs(X) + abs(B). */
382
for (k = 0; k < A->ncol; ++k) {
383
xk = fabs( Xptr[k] );
384
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
385
rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
388
for (k = 0; k < A->ncol; ++k) {
390
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
391
irow = Astore->rowind[i];
392
xk = fabs( Xptr[irow] );
393
s += fabs(Aval[i]) * xk;
399
for (i = 0; i < A->nrow; ++i)
400
if (rwork[i] > safe2)
401
rwork[i] = fabs(work[i]) + (iwork[i]+1)*eps*rwork[i];
403
rwork[i] = fabs(work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
408
dlacon_(&A->nrow, &work[A->nrow], work,
409
&iwork[A->nrow], &ferr[j], &kase);
410
if (kase == 0) break;
413
/* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
414
if ( notran && colequ )
415
for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
416
else if ( !notran && rowequ )
417
for (i = 0; i < A->nrow; ++i) work[i] *= R[i];
419
dgstrs (transt, L, U, perm_r, perm_c, &Bjcol, info);
421
for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
423
/* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
424
for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
426
dgstrs (trans, L, U, perm_r, perm_c, &Bjcol, info);
428
if ( notran && colequ )
429
for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
430
else if ( !notran && rowequ )
431
for (i = 0; i < A->ncol; ++i) work[i] *= R[i];
434
} while ( kase != 0 );
437
/* Normalize error. */
439
if ( notran && colequ ) {
440
for (i = 0; i < A->nrow; ++i)
441
lstres = MAX( lstres, C[i] * fabs( Xptr[i]) );
442
} else if ( !notran && rowequ ) {
443
for (i = 0; i < A->nrow; ++i)
444
lstres = MAX( lstres, R[i] * fabs( Xptr[i]) );
446
for (i = 0; i < A->nrow; ++i)
447
lstres = MAX( lstres, fabs( Xptr[i]) );
452
} /* for each RHS j ... */
457
SUPERLU_FREE(Bjcol.Store);