3
* -- SuperLU routine (version 3.0) --
4
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
5
* and Lawrence Berkeley National Lab.
10
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
12
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
13
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
15
Permission is hereby granted to use or copy this program for any
16
purpose, provided the above notices are retained on all copies.
17
Permission to modify the code and to distribute modified code is
18
granted, provided the above notices are retained, and a notice that
19
the code was modified is included with the above copyright notice.
28
void dusolve(int, int, double*, double*);
29
void dlsolve(int, int, double*, double*);
30
void dmatvec(int, int, int, double*, double*, double*);
34
dgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
35
int *perm_c, int *perm_r, SuperMatrix *B,
36
SuperLUStat_t *stat, int *info)
42
* DGSTRS solves a system of linear equations A*X=B or A'*X=B
43
* with A sparse and B dense, using the LU factorization computed by
46
* See supermatrix.h for the definition of 'SuperMatrix' structure.
51
* trans (input) trans_t
52
* Specifies the form of the system of equations:
53
* = NOTRANS: A * X = B (No transpose)
54
* = TRANS: A'* X = B (Transpose)
55
* = CONJ: A**H * X = B (Conjugate transpose)
57
* L (input) SuperMatrix*
58
* The factor L from the factorization Pr*A*Pc=L*U as computed by
59
* dgstrf(). Use compressed row subscripts storage for supernodes,
60
* i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
62
* U (input) SuperMatrix*
63
* The factor U from the factorization Pr*A*Pc=L*U as computed by
64
* dgstrf(). Use column-wise storage scheme, i.e., U has types:
65
* Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
67
* perm_c (input) int*, dimension (L->ncol)
68
* Column permutation vector, which defines the
69
* permutation matrix Pc; perm_c[i] = j means column i of A is
70
* in position j in A*Pc.
72
* perm_r (input) int*, dimension (L->nrow)
73
* Row permutation vector, which defines the permutation matrix Pr;
74
* perm_r[i] = j means row i of A is in position j in Pr*A.
76
* B (input/output) SuperMatrix*
77
* B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
78
* On entry, the right hand side matrix.
79
* On exit, the solution matrix if info = 0;
81
* stat (output) SuperLUStat_t*
82
* Record the statistics on runtime and floating-point operation count.
83
* See util.h for the definition of 'SuperLUStat_t'.
86
* = 0: successful exit
87
* < 0: if info = -i, the i-th argument had an illegal value
91
_fcd ftcs1, ftcs2, ftcs3, ftcs4;
93
int incx = 1, incy = 1;
94
#ifdef USE_VENDOR_BLAS
95
double alpha = 1.0, beta = 1.0;
103
int fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
104
int i, j, k, iptr, jcol, n, ldb, nrhs;
105
double *work, *rhs_work, *soln;
109
/* Test input parameters ... */
114
if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
115
else if ( L->nrow != L->ncol || L->nrow < 0 ||
116
L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU )
118
else if ( U->nrow != U->ncol || U->nrow < 0 ||
119
U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU )
121
else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
122
B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
126
xerbla_("dgstrs", &i);
131
work = doubleCalloc(n * nrhs);
132
if ( !work ) ABORT("Malloc fails for local work[].");
133
soln = doubleMalloc(n);
134
if ( !soln ) ABORT("Malloc fails for local soln[].");
136
Bmat = Bstore->nzval;
138
Lval = Lstore->nzval;
140
Uval = Ustore->nzval;
143
if ( trans == NOTRANS ) {
144
/* Permute right hand sides to form Pr*B */
145
for (i = 0; i < nrhs; i++) {
146
rhs_work = &Bmat[i*ldb];
147
for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
148
for (k = 0; k < n; k++) rhs_work[k] = soln[k];
151
/* Forward solve PLy=Pb. */
152
for (k = 0; k <= Lstore->nsuper; k++) {
153
fsupc = L_FST_SUPC(k);
154
istart = L_SUB_START(fsupc);
155
nsupr = L_SUB_START(fsupc+1) - istart;
156
nsupc = L_FST_SUPC(k+1) - fsupc;
157
nrow = nsupr - nsupc;
159
solve_ops += nsupc * (nsupc - 1) * nrhs;
160
solve_ops += 2 * nrow * nsupc * nrhs;
163
for (j = 0; j < nrhs; j++) {
164
rhs_work = &Bmat[j*ldb];
165
luptr = L_NZ_START(fsupc);
166
for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
169
rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
173
luptr = L_NZ_START(fsupc);
174
#ifdef USE_VENDOR_BLAS
176
ftcs1 = _cptofcd("L", strlen("L"));
177
ftcs2 = _cptofcd("N", strlen("N"));
178
ftcs3 = _cptofcd("U", strlen("U"));
179
STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
180
&Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
182
SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha,
183
&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
184
&beta, &work[0], &n );
186
dtrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
187
&Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
189
dgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha,
190
&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
191
&beta, &work[0], &n );
193
for (j = 0; j < nrhs; j++) {
194
rhs_work = &Bmat[j*ldb];
195
work_col = &work[j*n];
196
iptr = istart + nsupc;
197
for (i = 0; i < nrow; i++) {
199
rhs_work[irow] -= work_col[i]; /* Scatter */
205
for (j = 0; j < nrhs; j++) {
206
rhs_work = &Bmat[j*ldb];
207
dlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
208
dmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
209
&rhs_work[fsupc], &work[0] );
211
iptr = istart + nsupc;
212
for (i = 0; i < nrow; i++) {
214
rhs_work[irow] -= work[i];
224
printf("After L-solve: y=\n");
225
dprint_soln(n, nrhs, Bmat);
231
for (k = Lstore->nsuper; k >= 0; k--) {
232
fsupc = L_FST_SUPC(k);
233
istart = L_SUB_START(fsupc);
234
nsupr = L_SUB_START(fsupc+1) - istart;
235
nsupc = L_FST_SUPC(k+1) - fsupc;
236
luptr = L_NZ_START(fsupc);
238
solve_ops += nsupc * (nsupc + 1) * nrhs;
242
for (j = 0; j < nrhs; j++) {
243
rhs_work[fsupc] /= Lval[luptr];
247
#ifdef USE_VENDOR_BLAS
249
ftcs1 = _cptofcd("L", strlen("L"));
250
ftcs2 = _cptofcd("U", strlen("U"));
251
ftcs3 = _cptofcd("N", strlen("N"));
252
STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
253
&Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
255
dtrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
256
&Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
259
for (j = 0; j < nrhs; j++)
260
dusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
264
for (j = 0; j < nrhs; ++j) {
265
rhs_work = &Bmat[j*ldb];
266
for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
267
solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
268
for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
270
rhs_work[irow] -= rhs_work[jcol] * Uval[i];
278
printf("After U-solve: x=\n");
279
dprint_soln(n, nrhs, Bmat);
282
/* Compute the final solution X := Pc*X. */
283
for (i = 0; i < nrhs; i++) {
284
rhs_work = &Bmat[i*ldb];
285
for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
286
for (k = 0; k < n; k++) rhs_work[k] = soln[k];
289
stat->ops[SOLVE] = solve_ops;
291
} else { /* Solve A'*X=B */
292
/* Permute right hand sides to form Pc'*B. */
293
for (i = 0; i < nrhs; i++) {
294
rhs_work = &Bmat[i*ldb];
295
for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
296
for (k = 0; k < n; k++) rhs_work[k] = soln[k];
299
stat->ops[SOLVE] = 0;
301
for (k = 0; k < nrhs; ++k) {
303
/* Multiply by inv(U'). */
304
sp_dtrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
306
/* Multiply by inv(L'). */
307
sp_dtrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
311
/* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
312
for (i = 0; i < nrhs; i++) {
313
rhs_work = &Bmat[i*ldb];
314
for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
315
for (k = 0; k < n; k++) rhs_work[k] = soln[k];
325
* Diagnostic print of the solution vector
328
dprint_soln(int n, int nrhs, double *soln)
332
for (i = 0; i < n; i++)
333
printf("\t%d: %.4f\n", i, soln[i]);