3
* -- SuperLU routine (version 3.0) --
4
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
5
* and Lawrence Berkeley National Lab.
12
cgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
13
int *etree, char *equed, float *R, float *C,
14
SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
15
SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth,
16
float *rcond, float *ferr, float *berr,
17
mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info )
23
* CGSSVX solves the system of linear equations A*X=B or A'*X=B, using
24
* the LU factorization from cgstrf(). Error bounds on the solution and
25
* a condition estimate are also provided. It performs the following steps:
27
* 1. If A is stored column-wise (A->Stype = SLU_NC):
29
* 1.1. If options->Equil = YES, scaling factors are computed to
30
* equilibrate the system:
31
* options->Trans = NOTRANS:
32
* diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
33
* options->Trans = TRANS:
34
* (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
35
* options->Trans = CONJ:
36
* (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
37
* Whether or not the system will be equilibrated depends on the
38
* scaling of the matrix A, but if equilibration is used, A is
39
* overwritten by diag(R)*A*diag(C) and B by diag(R)*B
40
* (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
43
* 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
44
* matrix that usually preserves sparsity.
45
* For more details of this step, see sp_preorder.c.
47
* 1.3. If options->Fact != FACTORED, the LU decomposition is used to
48
* factor the matrix A (after equilibration if options->Equil = YES)
49
* as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
51
* 1.4. Compute the reciprocal pivot growth factor.
53
* 1.5. If some U(i,i) = 0, so that U is exactly singular, then the
54
* routine returns with info = i. Otherwise, the factored form of
55
* A is used to estimate the condition number of the matrix A. If
56
* the reciprocal of the condition number is less than machine
57
* precision, info = A->ncol+1 is returned as a warning, but the
58
* routine still goes on to solve for X and computes error bounds
61
* 1.6. The system of equations is solved for X using the factored form
64
* 1.7. If options->IterRefine != NOREFINE, iterative refinement is
65
* applied to improve the computed solution matrix and calculate
66
* error bounds and backward error estimates for it.
68
* 1.8. If equilibration was used, the matrix X is premultiplied by
69
* diag(C) (if options->Trans = NOTRANS) or diag(R)
70
* (if options->Trans = TRANS or CONJ) so that it solves the
71
* original system before equilibration.
73
* 2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
74
* to the transpose of A:
76
* 2.1. If options->Equil = YES, scaling factors are computed to
77
* equilibrate the system:
78
* options->Trans = NOTRANS:
79
* diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
80
* options->Trans = TRANS:
81
* (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
82
* options->Trans = CONJ:
83
* (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
84
* Whether or not the system will be equilibrated depends on the
85
* scaling of the matrix A, but if equilibration is used, A' is
86
* overwritten by diag(R)*A'*diag(C) and B by diag(R)*B
87
* (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
89
* 2.2. Permute columns of transpose(A) (rows of A),
90
* forming transpose(A)*Pc, where Pc is a permutation matrix that
91
* usually preserves sparsity.
92
* For more details of this step, see sp_preorder.c.
94
* 2.3. If options->Fact != FACTORED, the LU decomposition is used to
95
* factor the transpose(A) (after equilibration if
96
* options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
97
* permutation Pr determined by partial pivoting.
99
* 2.4. Compute the reciprocal pivot growth factor.
101
* 2.5. If some U(i,i) = 0, so that U is exactly singular, then the
102
* routine returns with info = i. Otherwise, the factored form
103
* of transpose(A) is used to estimate the condition number of the
104
* matrix A. If the reciprocal of the condition number
105
* is less than machine precision, info = A->nrow+1 is returned as
106
* a warning, but the routine still goes on to solve for X and
107
* computes error bounds as described below.
109
* 2.6. The system of equations is solved for X using the factored form
112
* 2.7. If options->IterRefine != NOREFINE, iterative refinement is
113
* applied to improve the computed solution matrix and calculate
114
* error bounds and backward error estimates for it.
116
* 2.8. If equilibration was used, the matrix X is premultiplied by
117
* diag(C) (if options->Trans = NOTRANS) or diag(R)
118
* (if options->Trans = TRANS or CONJ) so that it solves the
119
* original system before equilibration.
121
* See supermatrix.h for the definition of 'SuperMatrix' structure.
126
* options (input) superlu_options_t*
127
* The structure defines the input parameters to control
128
* how the LU decomposition will be performed and how the
129
* system will be solved.
131
* A (input/output) SuperMatrix*
132
* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
133
* of the linear equations is A->nrow. Currently, the type of A can be:
134
* Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
135
* In the future, more general A may be handled.
137
* On entry, If options->Fact = FACTORED and equed is not 'N',
138
* then A must have been equilibrated by the scaling factors in
140
* On exit, A is not modified if options->Equil = NO, or if
141
* options->Equil = YES but equed = 'N' on exit.
142
* Otherwise, if options->Equil = YES and equed is not 'N',
143
* A is scaled as follows:
144
* If A->Stype = SLU_NC:
145
* equed = 'R': A := diag(R) * A
146
* equed = 'C': A := A * diag(C)
147
* equed = 'B': A := diag(R) * A * diag(C).
148
* If A->Stype = SLU_NR:
149
* equed = 'R': transpose(A) := diag(R) * transpose(A)
150
* equed = 'C': transpose(A) := transpose(A) * diag(C)
151
* equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C).
153
* perm_c (input/output) int*
154
* If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
155
* which defines the permutation matrix Pc; perm_c[i] = j means
156
* column i of A is in position j in A*Pc.
157
* On exit, perm_c may be overwritten by the product of the input
158
* perm_c and a permutation that postorders the elimination tree
159
* of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
160
* is already in postorder.
162
* If A->Stype = SLU_NR, column permutation vector of size A->nrow,
163
* which describes permutation of columns of transpose(A)
164
* (rows of A) as described above.
166
* perm_r (input/output) int*
167
* If A->Stype = SLU_NC, row permutation vector of size A->nrow,
168
* which defines the permutation matrix Pr, and is determined
169
* by partial pivoting. perm_r[i] = j means row i of A is in
170
* position j in Pr*A.
172
* If A->Stype = SLU_NR, permutation vector of size A->ncol, which
173
* determines permutation of rows of transpose(A)
174
* (columns of A) as described above.
176
* If options->Fact = SamePattern_SameRowPerm, the pivoting routine
177
* will try to use the input perm_r, unless a certain threshold
178
* criterion is violated. In that case, perm_r is overwritten by a
179
* new permutation determined by partial pivoting or diagonal
180
* threshold pivoting.
181
* Otherwise, perm_r is output argument.
183
* etree (input/output) int*, dimension (A->ncol)
184
* Elimination tree of Pc'*A'*A*Pc.
185
* If options->Fact != FACTORED and options->Fact != DOFACT,
186
* etree is an input argument, otherwise it is an output argument.
187
* Note: etree is a vector of parent pointers for a forest whose
188
* vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
190
* equed (input/output) char*
191
* Specifies the form of equilibration that was done.
192
* = 'N': No equilibration.
193
* = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
194
* = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
195
* = 'B': Both row and column equilibration, i.e., A was replaced
196
* by diag(R)*A*diag(C).
197
* If options->Fact = FACTORED, equed is an input argument,
198
* otherwise it is an output argument.
200
* R (input/output) float*, dimension (A->nrow)
201
* The row scale factors for A or transpose(A).
202
* If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
203
* (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
204
* If equed = 'N' or 'C', R is not accessed.
205
* If options->Fact = FACTORED, R is an input argument,
206
* otherwise, R is output.
207
* If options->zFact = FACTORED and equed = 'R' or 'B', each element
208
* of R must be positive.
210
* C (input/output) float*, dimension (A->ncol)
211
* The column scale factors for A or transpose(A).
212
* If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
213
* (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
214
* If equed = 'N' or 'R', C is not accessed.
215
* If options->Fact = FACTORED, C is an input argument,
216
* otherwise, C is output.
217
* If options->Fact = FACTORED and equed = 'C' or 'B', each element
218
* of C must be positive.
220
* L (output) SuperMatrix*
221
* The factor L from the factorization
222
* Pr*A*Pc=L*U (if A->Stype SLU_= NC) or
223
* Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
224
* Uses compressed row subscripts storage for supernodes, i.e.,
225
* L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
227
* U (output) SuperMatrix*
228
* The factor U from the factorization
229
* Pr*A*Pc=L*U (if A->Stype = SLU_NC) or
230
* Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
231
* Uses column-wise storage scheme, i.e., U has types:
232
* Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
234
* work (workspace/output) void*, size (lwork) (in bytes)
235
* User supplied workspace, should be large enough
236
* to hold data structures for factors L and U.
237
* On exit, if fact is not 'F', L and U point to this array.
240
* Specifies the size of work array in bytes.
241
* = 0: allocate space internally by system malloc;
242
* > 0: use user-supplied work array of length lwork in bytes,
243
* returns error if space runs out.
244
* = -1: the routine guesses the amount of space needed without
245
* performing the factorization, and returns it in
246
* mem_usage->total_needed; no other side effects.
248
* See argument 'mem_usage' for memory usage statistics.
250
* B (input/output) SuperMatrix*
251
* B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
252
* On entry, the right hand side matrix.
253
* If B->ncol = 0, only LU decomposition is performed, the triangular
256
* if equed = 'N', B is not modified; otherwise
257
* if A->Stype = SLU_NC:
258
* if options->Trans = NOTRANS and equed = 'R' or 'B',
259
* B is overwritten by diag(R)*B;
260
* if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
261
* B is overwritten by diag(C)*B;
262
* if A->Stype = SLU_NR:
263
* if options->Trans = NOTRANS and equed = 'C' or 'B',
264
* B is overwritten by diag(C)*B;
265
* if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
266
* B is overwritten by diag(R)*B.
268
* X (output) SuperMatrix*
269
* X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
270
* If info = 0 or info = A->ncol+1, X contains the solution matrix
271
* to the original system of equations. Note that A and B are modified
272
* on exit if equed is not 'N', and the solution to the equilibrated
273
* system is inv(diag(C))*X if options->Trans = NOTRANS and
274
* equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
275
* and equed = 'R' or 'B'.
277
* recip_pivot_growth (output) float*
278
* The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
279
* The infinity norm is used. If recip_pivot_growth is much less
280
* than 1, the stability of the LU factorization could be poor.
282
* rcond (output) float*
283
* The estimate of the reciprocal condition number of the matrix A
284
* after equilibration (if done). If rcond is less than the machine
285
* precision (in particular, if rcond = 0), the matrix is singular
286
* to working precision. This condition is indicated by a return
289
* FERR (output) float*, dimension (B->ncol)
290
* The estimated forward error bound for each solution vector
291
* X(j) (the j-th column of the solution matrix X).
292
* If XTRUE is the true solution corresponding to X(j), FERR(j)
293
* is an estimated upper bound for the magnitude of the largest
294
* element in (X(j) - XTRUE) divided by the magnitude of the
295
* largest element in X(j). The estimate is as reliable as
296
* the estimate for RCOND, and is almost always a slight
297
* overestimate of the true error.
298
* If options->IterRefine = NOREFINE, ferr = 1.0.
300
* BERR (output) float*, dimension (B->ncol)
301
* The componentwise relative backward error of each solution
302
* vector X(j) (i.e., the smallest relative change in
303
* any element of A or B that makes X(j) an exact solution).
304
* If options->IterRefine = NOREFINE, berr = 1.0.
306
* mem_usage (output) mem_usage_t*
307
* Record the memory usage statistics, consisting of following fields:
309
* The amount of space used in bytes for L\U data structures.
310
* - total_needed (float)
311
* The amount of space needed in bytes to perform factorization.
313
* The number of memory expansions during the LU factorization.
315
* stat (output) SuperLUStat_t*
316
* Record the statistics on runtime and floating-point operation count.
317
* See util.h for the definition of 'SuperLUStat_t'.
320
* = 0: successful exit
321
* < 0: if info = -i, the i-th argument had an illegal value
322
* > 0: if info = i, and i is
323
* <= A->ncol: U(i,i) is exactly zero. The factorization has
324
* been completed, but the factor U is exactly
325
* singular, so the solution and error bounds
326
* could not be computed.
327
* = A->ncol+1: U is nonsingular, but RCOND is less than machine
328
* precision, meaning that the matrix is singular to
329
* working precision. Nevertheless, the solution and
330
* error bounds are computed because there are a number
331
* of situations where the computed solution can be more
332
* accurate than the value of RCOND would suggest.
333
* > A->ncol+1: number of bytes allocated when memory allocation
334
* failure occurred, plus A->ncol.
338
DNformat *Bstore, *Xstore;
339
complex *Bmat, *Xmat;
341
SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
342
SuperMatrix AC; /* Matrix postmultiplied by Pc */
343
int colequ, equil, nofact, notran, rowequ, permc_spec;
347
float amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
348
int relax, panel_size;
349
float diag_pivot_thresh, drop_tol;
350
double t0; /* temporary time */
353
/* External functions */
354
extern float clangs(char *, SuperMatrix *);
355
extern double slamch_(char *);
359
Bmat = Bstore->nzval;
360
Xmat = Xstore->nzval;
366
nofact = (options->Fact != FACTORED);
367
equil = (options->Equil == YES);
368
notran = (options->Trans == NOTRANS);
370
*(unsigned char *)equed = 'N';
374
rowequ = lsame_(equed, "R") || lsame_(equed, "B");
375
colequ = lsame_(equed, "C") || lsame_(equed, "B");
376
smlnum = slamch_("Safe minimum");
377
bignum = 1. / smlnum;
381
printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
382
options->Fact, options->Trans, *equed);
385
/* Test the input parameters */
386
if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
387
options->Fact != SamePattern_SameRowPerm &&
388
!notran && options->Trans != TRANS && options->Trans != CONJ &&
389
!equil && options->Equil != NO)
391
else if ( A->nrow != A->ncol || A->nrow < 0 ||
392
(A->Stype != SLU_NC && A->Stype != SLU_NR) ||
393
A->Dtype != SLU_C || A->Mtype != SLU_GE )
395
else if (options->Fact == FACTORED &&
396
!(rowequ || colequ || lsame_(equed, "N")))
402
for (j = 0; j < A->nrow; ++j) {
403
rcmin = SUPERLU_MIN(rcmin, R[j]);
404
rcmax = SUPERLU_MAX(rcmax, R[j]);
406
if (rcmin <= 0.) *info = -7;
407
else if ( A->nrow > 0)
408
rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
411
if (colequ && *info == 0) {
414
for (j = 0; j < A->nrow; ++j) {
415
rcmin = SUPERLU_MIN(rcmin, C[j]);
416
rcmax = SUPERLU_MAX(rcmax, C[j]);
418
if (rcmin <= 0.) *info = -8;
419
else if (A->nrow > 0)
420
colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
424
if ( lwork < -1 ) *info = -12;
425
else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
426
B->Stype != SLU_DN || B->Dtype != SLU_C ||
429
else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
430
(B->ncol != 0 && B->ncol != X->ncol) ||
431
X->Stype != SLU_DN ||
432
X->Dtype != SLU_C || X->Mtype != SLU_GE )
438
xerbla_("cgssvx", &i);
442
/* Initialization for factor parameters */
443
panel_size = sp_ienv(1);
445
diag_pivot_thresh = options->DiagPivotThresh;
450
/* Convert A to SLU_NC format when necessary. */
451
if ( A->Stype == SLU_NR ) {
452
NRformat *Astore = A->Store;
453
AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
454
cCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz,
455
Astore->nzval, Astore->colind, Astore->rowptr,
456
SLU_NC, A->Dtype, A->Mtype);
457
if ( notran ) { /* Reverse the transpose argument. */
464
} else { /* A->Stype == SLU_NC */
465
trant = options->Trans;
469
if ( nofact && equil ) {
470
t0 = SuperLU_timer_();
471
/* Compute row and column scalings to equilibrate the matrix A. */
472
cgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
475
/* Equilibrate matrix A. */
476
claqgs(AA, R, C, rowcnd, colcnd, amax, equed);
477
rowequ = lsame_(equed, "R") || lsame_(equed, "B");
478
colequ = lsame_(equed, "C") || lsame_(equed, "B");
480
utime[EQUIL] = SuperLU_timer_() - t0;
484
/* Scale the right hand side if equilibration was performed. */
487
for (j = 0; j < nrhs; ++j)
488
for (i = 0; i < A->nrow; ++i) {
489
cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]);
492
} else if ( colequ ) {
493
for (j = 0; j < nrhs; ++j)
494
for (i = 0; i < A->nrow; ++i) {
495
cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]);
502
t0 = SuperLU_timer_();
504
* Gnet column permutation vector perm_c[], according to permc_spec:
505
* permc_spec = NATURAL: natural ordering
506
* permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
507
* permc_spec = MMD_ATA: minimum degree on structure of A'*A
508
* permc_spec = COLAMD: approximate minimum degree column ordering
509
* permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
511
permc_spec = options->ColPerm;
512
if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
513
get_perm_c(permc_spec, AA, perm_c);
514
utime[COLPERM] = SuperLU_timer_() - t0;
516
t0 = SuperLU_timer_();
517
sp_preorder(options, AA, perm_c, etree, &AC);
518
utime[ETREE] = SuperLU_timer_() - t0;
520
/* printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n",
521
relax, panel_size, sp_ienv(3), sp_ienv(4));
524
/* Compute the LU factorization of A*Pc. */
525
t0 = SuperLU_timer_();
526
cgstrf(options, &AC, drop_tol, relax, panel_size,
527
etree, work, lwork, perm_c, perm_r, L, U, stat, info);
528
utime[FACT] = SuperLU_timer_() - t0;
531
mem_usage->total_needed = *info - A->ncol;
536
if ( options->PivotGrowth ) {
538
if ( *info <= A->ncol ) {
539
/* Compute the reciprocal pivot growth factor of the leading
540
rank-deficient *info columns of A. */
541
*recip_pivot_growth = cPivotGrowth(*info, AA, perm_c, L, U);
546
/* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
547
*recip_pivot_growth = cPivotGrowth(A->ncol, AA, perm_c, L, U);
550
if ( options->ConditionNumber ) {
551
/* Estimate the reciprocal of the condition number of A. */
552
t0 = SuperLU_timer_();
554
*(unsigned char *)norm = '1';
556
*(unsigned char *)norm = 'I';
558
anorm = clangs(norm, AA);
559
cgscon(norm, L, U, anorm, rcond, stat, info);
560
utime[RCOND] = SuperLU_timer_() - t0;
564
/* Compute the solution matrix X. */
565
for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */
566
for (i = 0; i < B->nrow; i++)
567
Xmat[i + j*ldx] = Bmat[i + j*ldb];
569
t0 = SuperLU_timer_();
570
cgstrs (trant, L, U, perm_c, perm_r, X, stat, info);
571
utime[SOLVE] = SuperLU_timer_() - t0;
573
/* Use iterative refinement to improve the computed solution and compute
574
error bounds and backward error estimates for it. */
575
t0 = SuperLU_timer_();
576
if ( options->IterRefine != NOREFINE ) {
577
cgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B,
578
X, ferr, berr, stat, info);
580
for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0;
582
utime[REFINE] = SuperLU_timer_() - t0;
584
/* Transform the solution matrix X to a solution of the original system. */
587
for (j = 0; j < nrhs; ++j)
588
for (i = 0; i < A->nrow; ++i) {
589
cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], C[i]);
592
} else if ( rowequ ) {
593
for (j = 0; j < nrhs; ++j)
594
for (i = 0; i < A->nrow; ++i) {
595
cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], R[i]);
598
} /* end if nrhs > 0 */
600
if ( options->ConditionNumber ) {
601
/* Set INFO = A->ncol+1 if the matrix is singular to working precision. */
602
if ( *rcond < slamch_("E") ) *info = A->ncol + 1;
606
cQuerySpace(L, U, mem_usage);
607
Destroy_CompCol_Permuted(&AC);
609
if ( A->Stype == SLU_NR ) {
610
Destroy_SuperMatrix_Store(AA);