3
* -- SuperLU routine (version 3.0) --
4
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
5
* and Lawrence Berkeley National Lab.
11
* History: Modified from lapack routine SGERFS
17
sgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
18
int *perm_c, int *perm_r, char *equed, float *R, float *C,
19
SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr,
20
SuperLUStat_t *stat, int *info)
26
* SGSRFS improves the computed solution to a system of linear
27
* equations and provides error bounds and backward error estimates for
30
* If equilibration was performed, the system becomes:
31
* (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
33
* See supermatrix.h for the definition of 'SuperMatrix' structure.
38
* trans (input) trans_t
39
* Specifies the form of the system of equations:
40
* = NOTRANS: A * X = B (No transpose)
41
* = TRANS: A'* X = B (Transpose)
42
* = CONJ: A**H * X = B (Conjugate transpose)
44
* A (input) SuperMatrix*
45
* The original matrix A in the system, or the scaled A if
46
* equilibration was done. The type of A can be:
47
* Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_GE.
49
* L (input) SuperMatrix*
50
* The factor L from the factorization Pr*A*Pc=L*U. Use
51
* compressed row subscripts storage for supernodes,
52
* i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
54
* U (input) SuperMatrix*
55
* The factor U from the factorization Pr*A*Pc=L*U as computed by
56
* sgstrf(). Use column-wise storage scheme,
57
* i.e., U has types: Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
59
* perm_c (input) int*, dimension (A->ncol)
60
* Column permutation vector, which defines the
61
* permutation matrix Pc; perm_c[i] = j means column i of A is
62
* in position j in A*Pc.
64
* perm_r (input) int*, dimension (A->nrow)
65
* Row permutation vector, which defines the permutation matrix Pr;
66
* perm_r[i] = j means row i of A is in position j in Pr*A.
68
* equed (input) Specifies the form of equilibration that was done.
69
* = 'N': No equilibration.
70
* = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
71
* = 'C': Column equilibration, i.e., A was postmultiplied by
73
* = 'B': Both row and column equilibration, i.e., A was replaced
74
* by diag(R)*A*diag(C).
76
* R (input) float*, dimension (A->nrow)
77
* The row scale factors for A.
78
* If equed = 'R' or 'B', A is premultiplied by diag(R).
79
* If equed = 'N' or 'C', R is not accessed.
81
* C (input) float*, dimension (A->ncol)
82
* The column scale factors for A.
83
* If equed = 'C' or 'B', A is postmultiplied by diag(C).
84
* If equed = 'N' or 'R', C is not accessed.
86
* B (input) SuperMatrix*
87
* B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
88
* The right hand side matrix B.
89
* if equed = 'R' or 'B', B is premultiplied by diag(R).
91
* X (input/output) SuperMatrix*
92
* X has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
93
* On entry, the solution matrix X, as computed by sgstrs().
94
* On exit, the improved solution matrix X.
95
* if *equed = 'C' or 'B', X should be premultiplied by diag(C)
96
* in order to obtain the solution to the original system.
98
* FERR (output) float*, dimension (B->ncol)
99
* The estimated forward error bound for each solution vector
100
* X(j) (the j-th column of the solution matrix X).
101
* If XTRUE is the true solution corresponding to X(j), FERR(j)
102
* is an estimated upper bound for the magnitude of the largest
103
* element in (X(j) - XTRUE) divided by the magnitude of the
104
* largest element in X(j). The estimate is as reliable as
105
* the estimate for RCOND, and is almost always a slight
106
* overestimate of the true error.
108
* BERR (output) float*, dimension (B->ncol)
109
* The componentwise relative backward error of each solution
110
* vector X(j) (i.e., the smallest relative change in
111
* any element of A or B that makes X(j) an exact solution).
113
* stat (output) SuperLUStat_t*
114
* Record the statistics on runtime and floating-point operation count.
115
* See util.h for the definition of 'SuperLUStat_t'.
118
* = 0: successful exit
119
* < 0: if INFO = -i, the i-th argument had an illegal value
121
* Internal Parameters
122
* ===================
124
* ITMAX is the maximum number of steps of iterative refinement.
130
/* Table of constant values */
135
/* Local variables */
139
DNformat *Bstore, *Xstore, *Bjcol_store;
140
float *Bmat, *Xmat, *Bptr, *Xptr;
143
int i, j, k, irow, nz, count, notran, rowequ, colequ;
145
float s, xk, lstres, eps, safmin;
151
extern double slamch_(char *);
152
extern int slacon_(int *, float *, float *, int *, float *, int *);
154
extern int SCOPY(int *, float *, int *, float *, int *);
155
extern int SSAXPY(int *, float *, float *, int *, float *, int *);
157
extern int scopy_(int *, float *, int *, float *, int *);
158
extern int saxpy_(int *, float *, float *, int *, float *, int *);
162
Aval = Astore->nzval;
165
Bmat = Bstore->nzval;
166
Xmat = Xstore->nzval;
171
/* Test the input parameters */
173
notran = (trans == NOTRANS);
174
if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
175
else if ( A->nrow != A->ncol || A->nrow < 0 ||
176
A->Stype != SLU_NC || A->Dtype != SLU_S || A->Mtype != SLU_GE )
178
else if ( L->nrow != L->ncol || L->nrow < 0 ||
179
L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU )
181
else if ( U->nrow != U->ncol || U->nrow < 0 ||
182
U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU )
184
else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
185
B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
187
else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
188
X->Stype != SLU_DN || X->Dtype != SLU_S || X->Mtype != SLU_GE )
192
xerbla_("sgsrfs", &i);
196
/* Quick return if possible */
197
if ( A->nrow == 0 || nrhs == 0) {
198
for (j = 0; j < nrhs; ++j) {
205
rowequ = lsame_(equed, "R") || lsame_(equed, "B");
206
colequ = lsame_(equed, "C") || lsame_(equed, "B");
208
/* Allocate working space */
209
work = floatMalloc(2*A->nrow);
210
rwork = (float *) SUPERLU_MALLOC( A->nrow * sizeof(float) );
211
iwork = intMalloc(2*A->nrow);
212
if ( !work || !rwork || !iwork )
213
ABORT("Malloc fails for work/rwork/iwork.");
216
*(unsigned char *)transc = 'N';
219
*(unsigned char *)transc = 'T';
223
/* NZ = maximum number of nonzero elements in each row of A, plus 1 */
225
eps = slamch_("Epsilon");
226
safmin = slamch_("Safe minimum");
230
/* Compute the number of nonzeros in each row (or column) of A */
231
for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
233
for (k = 0; k < A->ncol; ++k)
234
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
235
++iwork[Astore->rowind[i]];
237
for (k = 0; k < A->ncol; ++k)
238
iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
241
/* Copy one column of RHS B into Bjcol. */
242
Bjcol.Stype = B->Stype;
243
Bjcol.Dtype = B->Dtype;
244
Bjcol.Mtype = B->Mtype;
245
Bjcol.nrow = B->nrow;
247
Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
248
if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
249
Bjcol_store = Bjcol.Store;
250
Bjcol_store->lda = ldb;
251
Bjcol_store->nzval = work; /* address aliasing */
253
/* Do for each right hand side ... */
254
for (j = 0; j < nrhs; ++j) {
260
while (1) { /* Loop until stopping criterion is satisfied. */
262
/* Compute residual R = B - op(A) * X,
263
where op(A) = A, A**T, or A**H, depending on TRANS. */
266
SCOPY(&A->nrow, Bptr, &ione, work, &ione);
268
scopy_(&A->nrow, Bptr, &ione, work, &ione);
270
sp_sgemv(transc, ndone, A, Xptr, ione, done, work, ione);
272
/* Compute componentwise relative backward error from formula
273
max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
274
where abs(Z) is the componentwise absolute value of the matrix
275
or vector Z. If the i-th component of the denominator is less
276
than SAFE2, then SAFE1 is added to the i-th component of the
277
numerator and denominator before dividing. */
279
for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
281
/* Compute abs(op(A))*abs(X) + abs(B). */
283
for (k = 0; k < A->ncol; ++k) {
284
xk = fabs( Xptr[k] );
285
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
286
rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
289
for (k = 0; k < A->ncol; ++k) {
291
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
292
irow = Astore->rowind[i];
293
s += fabs(Aval[i]) * fabs(Xptr[irow]);
299
for (i = 0; i < A->nrow; ++i) {
300
if (rwork[i] > safe2)
301
s = SUPERLU_MAX( s, fabs(work[i]) / rwork[i] );
303
s = SUPERLU_MAX( s, (fabs(work[i]) + safe1) /
304
(rwork[i] + safe1) );
308
/* Test stopping criterion. Continue iterating if
309
1) The residual BERR(J) is larger than machine epsilon, and
310
2) BERR(J) decreased by at least a factor of 2 during the
312
3) At most ITMAX iterations tried. */
314
if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
315
/* Update solution and try again. */
316
sgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
319
SAXPY(&A->nrow, &done, work, &ione,
320
&Xmat[j*ldx], &ione);
322
saxpy_(&A->nrow, &done, work, &ione,
323
&Xmat[j*ldx], &ione);
333
stat->RefineSteps = count;
335
/* Bound error from formula:
336
norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*
337
( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
339
norm(Z) is the magnitude of the largest component of Z
340
inv(op(A)) is the inverse of op(A)
341
abs(Z) is the componentwise absolute value of the matrix or
343
NZ is the maximum number of nonzeros in any row of A, plus 1
344
EPS is machine epsilon
346
The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
347
is incremented by SAFE1 if the i-th component of
348
abs(op(A))*abs(X) + abs(B) is less than SAFE2.
350
Use SLACON to estimate the infinity-norm of the matrix
351
inv(op(A)) * diag(W),
352
where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
354
for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
356
/* Compute abs(op(A))*abs(X) + abs(B). */
358
for (k = 0; k < A->ncol; ++k) {
359
xk = fabs( Xptr[k] );
360
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
361
rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
364
for (k = 0; k < A->ncol; ++k) {
366
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
367
irow = Astore->rowind[i];
368
xk = fabs( Xptr[irow] );
369
s += fabs(Aval[i]) * xk;
375
for (i = 0; i < A->nrow; ++i)
376
if (rwork[i] > safe2)
377
rwork[i] = fabs(work[i]) + (iwork[i]+1)*eps*rwork[i];
379
rwork[i] = fabs(work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
384
slacon_(&A->nrow, &work[A->nrow], work,
385
&iwork[A->nrow], &ferr[j], &kase);
386
if (kase == 0) break;
389
/* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
390
if ( notran && colequ )
391
for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
392
else if ( !notran && rowequ )
393
for (i = 0; i < A->nrow; ++i) work[i] *= R[i];
395
sgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
397
for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
399
/* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
400
for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
402
sgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
404
if ( notran && colequ )
405
for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
406
else if ( !notran && rowequ )
407
for (i = 0; i < A->ncol; ++i) work[i] *= R[i];
410
} while ( kase != 0 );
413
/* Normalize error. */
415
if ( notran && colequ ) {
416
for (i = 0; i < A->nrow; ++i)
417
lstres = SUPERLU_MAX( lstres, C[i] * fabs( Xptr[i]) );
418
} else if ( !notran && rowequ ) {
419
for (i = 0; i < A->nrow; ++i)
420
lstres = SUPERLU_MAX( lstres, R[i] * fabs( Xptr[i]) );
422
for (i = 0; i < A->nrow; ++i)
423
lstres = SUPERLU_MAX( lstres, fabs( Xptr[i]) );
428
} /* for each RHS j ... */
433
SUPERLU_FREE(Bjcol.Store);