4
* -- SuperLU routine (version 2.0) --
5
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
6
* and Lawrence Berkeley National Lab.
11
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
13
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
14
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
16
Permission is hereby granted to use or copy this program for any
17
purpose, provided the above notices are retained on all copies.
18
Permission to modify the code and to distribute modified code is
19
granted, provided the above notices are retained, and a notice that
20
the code was modified is included with the above copyright notice.
30
void dusolve(int, int, double*, double*);
31
void dlsolve(int, int, double*, double*);
32
void dmatvec(int, int, int, double*, double*, double*);
36
dgstrsL(char *trans, SuperMatrix *L, int *perm_r, SuperMatrix *B, int *info)
42
* DGSTRSL only performs the L-solve using the LU factorization computed
45
* See supermatrix.h for the definition of 'SuperMatrix' structure.
51
* Specifies the form of the system of equations:
52
* = 'N': A * X = B (No transpose)
53
* = 'T': A'* X = B (Transpose)
54
* = 'C': A**H * X = B (Conjugate transpose)
56
* L (input) SuperMatrix*
57
* The factor L from the factorization Pr*A*Pc=L*U as computed by
58
* dgstrf(). Use compressed row subscripts storage for supernodes,
59
* i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
61
* U (input) SuperMatrix*
62
* The factor U from the factorization Pr*A*Pc=L*U as computed by
63
* dgstrf(). Use column-wise storage scheme, i.e., U has types:
64
* Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
66
* perm_r (input) int*, dimension (L->nrow)
67
* Row permutation vector, which defines the permutation matrix Pr;
68
* perm_r[i] = j means row i of A is in position j in Pr*A.
70
* B (input/output) SuperMatrix*
71
* B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
72
* On entry, the right hand side matrix.
73
* On exit, the solution matrix if info = 0;
76
* = 0: successful exit
77
* < 0: if info = -i, the i-th argument had an illegal value
81
_fcd ftcs1, ftcs2, ftcs3, ftcs4;
83
int incx = 1, incy = 1;
84
double alpha = 1.0, beta = 1.0;
90
int fsupc, nsupr, nsupc, luptr, istart, irow;
91
int i, j, k, iptr, jcol, n, ldb, nrhs;
92
double *work, *work_col, *rhs_work, *soln;
94
extern SuperLUStat_t SuperLUStat;
97
/* Test input parameters ... */
102
notran = lsame_(trans, "N");
103
if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C") ) *info = -1;
104
else if ( L->nrow != L->ncol || L->nrow < 0 ||
105
L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU )
107
else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
108
B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
112
xerbla_("dgstrsL", &i);
117
work = doubleCalloc(n * nrhs);
118
if ( !work ) ABORT("Malloc fails for local work[].");
119
soln = doubleMalloc(n);
120
if ( !soln ) ABORT("Malloc fails for local soln[].");
122
Bmat = Bstore->nzval;
124
Lval = Lstore->nzval;
128
/* Permute right hand sides to form Pr*B */
129
for (i = 0; i < nrhs; i++) {
130
rhs_work = &Bmat[i*ldb];
131
for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
132
for (k = 0; k < n; k++) rhs_work[k] = soln[k];
135
/* Forward solve PLy=Pb. */
136
for (k = 0; k <= Lstore->nsuper; k++) {
137
fsupc = L_FST_SUPC(k);
138
istart = L_SUB_START(fsupc);
139
nsupr = L_SUB_START(fsupc+1) - istart;
140
nsupc = L_FST_SUPC(k+1) - fsupc;
141
nrow = nsupr - nsupc;
143
solve_ops += nsupc * (nsupc - 1) * nrhs;
144
solve_ops += 2 * nrow * nsupc * nrhs;
147
for (j = 0; j < nrhs; j++) {
148
rhs_work = &Bmat[j*ldb];
149
luptr = L_NZ_START(fsupc);
150
for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
153
rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
157
luptr = L_NZ_START(fsupc);
158
#ifdef USE_VENDOR_BLAS
160
ftcs1 = _cptofcd("L", strlen("L"));
161
ftcs2 = _cptofcd("N", strlen("N"));
162
ftcs3 = _cptofcd("U", strlen("U"));
163
STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
164
&Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
166
SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha,
167
&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
168
&beta, &work[0], &n );
170
dtrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
171
&Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
173
dgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha,
174
&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
175
&beta, &work[0], &n );
177
for (j = 0; j < nrhs; j++) {
178
rhs_work = &Bmat[j*ldb];
179
work_col = &work[j*n];
180
iptr = istart + nsupc;
181
for (i = 0; i < nrow; i++) {
183
rhs_work[irow] -= work_col[i]; /* Scatter */
189
for (j = 0; j < nrhs; j++) {
190
rhs_work = &Bmat[j*ldb];
191
dlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
192
dmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
193
&rhs_work[fsupc], &work[0] );
195
iptr = istart + nsupc;
196
for (i = 0; i < nrow; i++) {
198
rhs_work[irow] -= work[i];
208
printf("After L-solve: y=\n");
209
dprint_soln(n, nrhs, Bmat);
212
SuperLUStat.ops[SOLVE] = solve_ops;
215
printf("Transposed solve not implemented.\n");
224
* Diagnostic print of the solution vector
227
dprint_soln(int n, int nrhs, double *soln)
231
for (i = 0; i < n; i++)
232
printf("\t%d: %.4f\n", i, soln[i]);