4
* -- SuperLU routine (version 2.0) --
5
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
6
* and Lawrence Berkeley National Lab.
12
* History: Modified from lapack routine CGERFS
19
cgsrfs(char *trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
20
int *perm_r, int *perm_c, char *equed, float *R, float *C,
21
SuperMatrix *B, SuperMatrix *X,
22
float *ferr, float *berr, int *info)
28
* CGSRFS improves the computed solution to a system of linear
29
* equations and provides error bounds and backward error estimates for
32
* If equilibration was performed, the system becomes:
33
* (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
35
* See supermatrix.h for the definition of 'SuperMatrix' structure.
41
* Specifies the form of the system of equations:
42
* = 'N': A * X = B (No transpose)
43
* = 'T': A**T * X = B (Transpose)
44
* = 'C': A**H * X = B (Conjugate transpose = Transpose)
46
* A (input) SuperMatrix*
47
* The original matrix A in the system, or the scaled A if
48
* equilibration was done. The type of A can be:
49
* Stype = NC, Dtype = C_, Mtype = GE.
51
* L (input) SuperMatrix*
52
* The factor L from the factorization Pr*A*Pc=L*U. Use
53
* compressed row subscripts storage for supernodes,
54
* i.e., L has types: Stype = SC, Dtype = C_, Mtype = TRLU.
56
* U (input) SuperMatrix*
57
* The factor U from the factorization Pr*A*Pc=L*U as computed by
58
* cgstrf(). Use column-wise storage scheme,
59
* i.e., U has types: Stype = NC, Dtype = C_, Mtype = TRU.
61
* perm_r (input) int*, dimension (A->nrow)
62
* Row permutation vector, which defines the permutation matrix Pr;
63
* perm_r[i] = j means row i of A is in position j in Pr*A.
65
* perm_c (input) int*, dimension (A->ncol)
66
* Column permutation vector, which defines the
67
* permutation matrix Pc; perm_c[i] = j means column i of A is
68
* in position j in A*Pc.
70
* equed (input) Specifies the form of equilibration that was done.
71
* = 'N': No equilibration.
72
* = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
73
* = 'C': Column equilibration, i.e., A was postmultiplied by
75
* = 'B': Both row and column equilibration, i.e., A was replaced
76
* by diag(R)*A*diag(C).
78
* R (input) float*, dimension (A->nrow)
79
* The row scale factors for A.
80
* If equed = 'R' or 'B', A is premultiplied by diag(R).
81
* If equed = 'N' or 'C', R is not accessed.
83
* C (input) float*, dimension (A->ncol)
84
* The column scale factors for A.
85
* If equed = 'C' or 'B', A is postmultiplied by diag(C).
86
* If equed = 'N' or 'R', C is not accessed.
88
* B (input) SuperMatrix*
89
* B has types: Stype = DN, Dtype = C_, Mtype = GE.
90
* The right hand side matrix B.
91
* if equed = 'R' or 'B', B is premultiplied by diag(R).
93
* X (input/output) SuperMatrix*
94
* X has types: Stype = DN, Dtype = C_, Mtype = GE.
95
* On entry, the solution matrix X, as computed by cgstrs().
96
* On exit, the improved solution matrix X.
97
* if *equed = 'C' or 'B', X should be premultiplied by diag(C)
98
* in order to obtain the solution to the original system.
100
* FERR (output) float*, dimension (B->ncol)
101
* The estimated forward error bound for each solution vector
102
* X(j) (the j-th column of the solution matrix X).
103
* If XTRUE is the true solution corresponding to X(j), FERR(j)
104
* is an estimated upper bound for the magnitude of the largest
105
* element in (X(j) - XTRUE) divided by the magnitude of the
106
* largest element in X(j). The estimate is as reliable as
107
* the estimate for RCOND, and is almost always a slight
108
* overestimate of the true error.
110
* BERR (output) float*, dimension (B->ncol)
111
* The componentwise relative backward error of each solution
112
* vector X(j) (i.e., the smallest relative change in
113
* any element of A or B that makes X(j) an exact solution).
116
* = 0: successful exit
117
* < 0: if INFO = -i, the i-th argument had an illegal value
119
* Internal Parameters
120
* ===================
122
* ITMAX is the maximum number of steps of iterative refinement.
128
/* Table of constant values */
130
complex ndone = {-1., 0.};
131
complex done = {1., 0.};
133
/* Local variables */
137
DNformat *Bstore, *Xstore, *Bjcol_store;
138
complex *Bmat, *Xmat, *Bptr, *Xptr;
141
int i, j, k, irow, nz, count, notran, rowequ, colequ;
143
float s, xk, lstres, eps, safmin;
148
extern double slamch_(char *);
149
extern int clacon_(int *, complex *, complex *, float *, int *);
151
extern int CCOPY(int *, complex *, int *, complex *, int *);
152
extern int CSAXPY(int *, complex *, complex *, int *, complex *, int *);
154
extern int ccopy_(int *, complex *, int *, complex *, int *);
155
extern int caxpy_(int *, complex *, complex *, int *, complex *, int *);
159
Aval = Astore->nzval;
162
Bmat = Bstore->nzval;
163
Xmat = Xstore->nzval;
168
/* Test the input parameters */
170
notran = lsame_(trans, "N");
171
if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) *info = -1;
172
else if ( A->nrow != A->ncol || A->nrow < 0 ||
173
A->Stype != NC || A->Dtype != C_ || A->Mtype != GE )
175
else if ( L->nrow != L->ncol || L->nrow < 0 ||
176
L->Stype != SC || L->Dtype != C_ || L->Mtype != TRLU )
178
else if ( U->nrow != U->ncol || U->nrow < 0 ||
179
U->Stype != NC || U->Dtype != C_ || U->Mtype != TRU )
181
else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
182
B->Stype != DN || B->Dtype != C_ || B->Mtype != GE )
184
else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
185
X->Stype != DN || X->Dtype != C_ || X->Mtype != GE )
189
xerbla_("cgsrfs", &i);
193
/* Quick return if possible */
194
if ( A->nrow == 0 || nrhs == 0) {
195
for (j = 0; j < nrhs; ++j) {
202
rowequ = lsame_(equed, "R") || lsame_(equed, "B");
203
colequ = lsame_(equed, "C") || lsame_(equed, "B");
205
/* Allocate working space */
206
work = complexMalloc(2*A->nrow);
207
rwork = (float *) SUPERLU_MALLOC( A->nrow * sizeof(float) );
208
iwork = intMalloc(A->nrow);
209
if ( !work || !rwork || !iwork )
210
ABORT("Malloc fails for work/rwork/iwork.");
213
*(unsigned char *)transt = 'T';
215
*(unsigned char *)transt = 'N';
218
/* NZ = maximum number of nonzero elements in each row of A, plus 1 */
220
eps = slamch_("Epsilon");
221
safmin = slamch_("Safe minimum");
225
/* Compute the number of nonzeros in each row (or column) of A */
226
for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
228
for (k = 0; k < A->ncol; ++k)
229
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
230
++iwork[Astore->rowind[i]];
232
for (k = 0; k < A->ncol; ++k)
233
iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
236
/* Copy one column of RHS B into Bjcol. */
237
Bjcol.Stype = B->Stype;
238
Bjcol.Dtype = B->Dtype;
239
Bjcol.Mtype = B->Mtype;
240
Bjcol.nrow = B->nrow;
242
Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
243
if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
244
Bjcol_store = Bjcol.Store;
245
Bjcol_store->lda = ldb;
246
Bjcol_store->nzval = work; /* address aliasing */
248
/* Do for each right hand side ... */
249
for (j = 0; j < nrhs; ++j) {
255
while (1) { /* Loop until stopping criterion is satisfied. */
257
/* Compute residual R = B - op(A) * X,
258
where op(A) = A, A**T, or A**H, depending on TRANS. */
261
CCOPY(&A->nrow, Bptr, &ione, work, &ione);
263
ccopy_(&A->nrow, Bptr, &ione, work, &ione);
265
sp_cgemv(trans, ndone, A, Xptr, ione, done, work, ione);
267
/* Compute componentwise relative backward error from formula
268
max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
269
where abs(Z) is the componentwise absolute value of the matrix
270
or vector Z. If the i-th component of the denominator is less
271
than SAFE2, then SAFE1 is added to the i-th component of the
272
numerator and denominator before dividing. */
274
for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
276
/* Compute abs(op(A))*abs(X) + abs(B). */
278
for (k = 0; k < A->ncol; ++k) {
279
xk = c_abs1( &Xptr[k] );
280
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
281
rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
284
for (k = 0; k < A->ncol; ++k) {
286
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
287
irow = Astore->rowind[i];
288
s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]);
294
for (i = 0; i < A->nrow; ++i) {
295
if (rwork[i] > safe2)
296
s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] );
298
s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) /
299
(rwork[i] + safe1) );
303
/* Test stopping criterion. Continue iterating if
304
1) The residual BERR(J) is larger than machine epsilon, and
305
2) BERR(J) decreased by at least a factor of 2 during the
307
3) At most ITMAX iterations tried. */
309
if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
310
/* Update solution and try again. */
311
cgstrs (trans, L, U, perm_r, perm_c, &Bjcol, info);
314
CAXPY(&A->nrow, &done, work, &ione,
315
&Xmat[j*ldx], &ione);
317
caxpy_(&A->nrow, &done, work, &ione,
318
&Xmat[j*ldx], &ione);
328
/* Bound error from formula:
329
norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*
330
( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
332
norm(Z) is the magnitude of the largest component of Z
333
inv(op(A)) is the inverse of op(A)
334
abs(Z) is the componentwise absolute value of the matrix or
336
NZ is the maximum number of nonzeros in any row of A, plus 1
337
EPS is machine epsilon
339
The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
340
is incremented by SAFE1 if the i-th component of
341
abs(op(A))*abs(X) + abs(B) is less than SAFE2.
343
Use CLACON to estimate the infinity-norm of the matrix
344
inv(op(A)) * diag(W),
345
where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
347
for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
349
/* Compute abs(op(A))*abs(X) + abs(B). */
351
for (k = 0; k < A->ncol; ++k) {
352
xk = c_abs1( &Xptr[k] );
353
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
354
rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
357
for (k = 0; k < A->ncol; ++k) {
359
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
360
irow = Astore->rowind[i];
361
xk = c_abs1( &Xptr[irow] );
362
s += c_abs1(&Aval[i]) * xk;
368
for (i = 0; i < A->nrow; ++i)
369
if (rwork[i] > safe2)
370
rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
372
rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
376
clacon_(&A->nrow, &work[A->nrow], work,
378
if (kase == 0) break;
381
/* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
382
if ( notran && colequ )
383
for (i = 0; i < A->ncol; ++i) {
384
cs_mult(&work[i], &work[i], C[i]);
386
else if ( !notran && rowequ )
387
for (i = 0; i < A->nrow; ++i) {
388
cs_mult(&work[i], &work[i], R[i]);
391
cgstrs (transt, L, U, perm_r, perm_c, &Bjcol, info);
393
for (i = 0; i < A->nrow; ++i) {
394
cs_mult(&work[i], &work[i], rwork[i]);
397
/* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
398
for (i = 0; i < A->nrow; ++i) {
399
cs_mult(&work[i], &work[i], rwork[i]);
402
cgstrs (trans, L, U, perm_r, perm_c, &Bjcol, info);
404
if ( notran && colequ )
405
for (i = 0; i < A->ncol; ++i) {
406
cs_mult(&work[i], &work[i], C[i]);
408
else if ( !notran && rowequ )
409
for (i = 0; i < A->ncol; ++i) {
410
cs_mult(&work[i], &work[i], R[i]);
414
} while ( kase != 0 );
416
/* Normalize error. */
418
if ( notran && colequ ) {
419
for (i = 0; i < A->nrow; ++i)
420
lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) );
421
} else if ( !notran && rowequ ) {
422
for (i = 0; i < A->nrow; ++i)
423
lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) );
425
for (i = 0; i < A->nrow; ++i)
426
lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) );
431
} /* for each RHS j ... */
436
SUPERLU_FREE(Bjcol.Store);