4
* -- SuperLU routine (version 2.0) --
5
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
6
* and Lawrence Berkeley National Lab.
11
* File name: sp_blas2.c
12
* Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
22
void susolve(int, int, float*, float*);
23
void slsolve(int, int, float*, float*);
24
void smatvec(int, int, int, float*, float*, float*);
28
sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
29
SuperMatrix *U, float *x, int *info)
35
* sp_strsv() solves one of the systems of equations
36
* A*x = b, or A'*x = b,
37
* where b and x are n element vectors and A is a sparse unit , or
38
* non-unit, upper or lower triangular matrix.
39
* No test for singularity or near-singularity is included in this
40
* routine. Such tests must be performed before calling this routine.
45
* uplo - (input) char*
46
* On entry, uplo specifies whether the matrix is an upper or
47
* lower triangular matrix as follows:
48
* uplo = 'U' or 'u' A is an upper triangular matrix.
49
* uplo = 'L' or 'l' A is a lower triangular matrix.
51
* trans - (input) char*
52
* On entry, trans specifies the equations to be solved as
54
* trans = 'N' or 'n' A*x = b.
55
* trans = 'T' or 't' A'*x = b.
56
* trans = 'C' or 'c' A'*x = b.
58
* diag - (input) char*
59
* On entry, diag specifies whether or not A is unit
60
* triangular as follows:
61
* diag = 'U' or 'u' A is assumed to be unit triangular.
62
* diag = 'N' or 'n' A is not assumed to be unit
65
* L - (input) SuperMatrix*
66
* The factor L from the factorization Pr*A*Pc=L*U. Use
67
* compressed row subscripts storage for supernodes,
68
* i.e., L has types: Stype = SC, Dtype = S_, Mtype = TRLU.
70
* U - (input) SuperMatrix*
71
* The factor U from the factorization Pr*A*Pc=L*U.
72
* U has types: Stype = NC, Dtype = S_, Mtype = TRU.
74
* x - (input/output) float*
75
* Before entry, the incremented array X must contain the n
76
* element right-hand side vector b. On exit, X is overwritten
77
* with the solution vector x.
79
* info - (output) int*
80
* If *info = -i, the i-th argument had an illegal value.
84
_fcd ftcs1 = _cptofcd("L", strlen("L")),
85
ftcs2 = _cptofcd("N", strlen("N")),
86
ftcs3 = _cptofcd("U", strlen("U"));
91
int incx = 1, incy = 1;
92
float alpha = 1.0, beta = 1.0;
94
int fsupc, nsupr, nsupc, luptr, istart, irow;
98
extern SuperLUStat_t SuperLUStat;
100
/* Test the input parameters */
102
if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
103
else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2;
104
else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
105
else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
106
else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
109
xerbla_("sp_strsv", &i);
114
Lval = Lstore->nzval;
116
Uval = Ustore->nzval;
119
if ( !(work = floatCalloc(L->nrow)) )
120
ABORT("Malloc fails for work in sp_strsv().");
122
if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
124
if ( lsame_(uplo, "L") ) {
125
/* Form x := inv(L)*x */
126
if ( L->nrow == 0 ) return 0; /* Quick return */
128
for (k = 0; k <= Lstore->nsuper; k++) {
129
fsupc = L_FST_SUPC(k);
130
istart = L_SUB_START(fsupc);
131
nsupr = L_SUB_START(fsupc+1) - istart;
132
nsupc = L_FST_SUPC(k+1) - fsupc;
133
luptr = L_NZ_START(fsupc);
134
nrow = nsupr - nsupc;
136
solve_ops += nsupc * (nsupc - 1);
137
solve_ops += 2 * nrow * nsupc;
140
for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
143
x[irow] -= x[fsupc] * Lval[luptr];
146
#ifdef USE_VENDOR_BLAS
148
STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
151
SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
152
&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
154
strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
157
sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
158
&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
161
slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
163
smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
164
&x[fsupc], &work[0] );
167
iptr = istart + nsupc;
168
for (i = 0; i < nrow; ++i, ++iptr) {
170
x[irow] -= work[i]; /* Scatter */
178
/* Form x := inv(U)*x */
180
if ( U->nrow == 0 ) return 0; /* Quick return */
182
for (k = Lstore->nsuper; k >= 0; k--) {
183
fsupc = L_FST_SUPC(k);
184
nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
185
nsupc = L_FST_SUPC(k+1) - fsupc;
186
luptr = L_NZ_START(fsupc);
188
solve_ops += nsupc * (nsupc + 1);
191
x[fsupc] /= Lval[luptr];
192
for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
194
x[irow] -= x[fsupc] * Uval[i];
197
#ifdef USE_VENDOR_BLAS
199
STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
202
strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
206
susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
209
for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
210
solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
211
for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
214
x[irow] -= x[jcol] * Uval[i];
221
} else { /* Form x := inv(A')*x */
223
if ( lsame_(uplo, "L") ) {
224
/* Form x := inv(L')*x */
225
if ( L->nrow == 0 ) return 0; /* Quick return */
227
for (k = Lstore->nsuper; k >= 0; --k) {
228
fsupc = L_FST_SUPC(k);
229
istart = L_SUB_START(fsupc);
230
nsupr = L_SUB_START(fsupc+1) - istart;
231
nsupc = L_FST_SUPC(k+1) - fsupc;
232
luptr = L_NZ_START(fsupc);
234
solve_ops += 2 * (nsupr - nsupc) * nsupc;
236
for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
237
iptr = istart + nsupc;
238
for (i = L_NZ_START(jcol) + nsupc;
239
i < L_NZ_START(jcol+1); i++) {
241
x[jcol] -= x[irow] * Lval[i];
247
solve_ops += nsupc * (nsupc - 1);
249
ftcs1 = _cptofcd("L", strlen("L"));
250
ftcs2 = _cptofcd("T", strlen("T"));
251
ftcs3 = _cptofcd("U", strlen("U"));
252
STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
255
strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
261
/* Form x := inv(U')*x */
262
if ( U->nrow == 0 ) return 0; /* Quick return */
264
for (k = 0; k <= Lstore->nsuper; k++) {
265
fsupc = L_FST_SUPC(k);
266
nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
267
nsupc = L_FST_SUPC(k+1) - fsupc;
268
luptr = L_NZ_START(fsupc);
270
for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
271
solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
272
for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
274
x[jcol] -= x[irow] * Uval[i];
278
solve_ops += nsupc * (nsupc + 1);
281
x[fsupc] /= Lval[luptr];
284
ftcs1 = _cptofcd("U", strlen("U"));
285
ftcs2 = _cptofcd("T", strlen("T"));
286
ftcs3 = _cptofcd("N", strlen("N"));
287
STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
290
strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
298
SuperLUStat.ops[SOLVE] += solve_ops;
307
sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
308
int incx, float beta, float *y, int incy)
313
sp_sgemv() performs one of the matrix-vector operations
314
y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
315
where alpha and beta are scalars, x and y are vectors and A is a
316
sparse A->nrow by A->ncol matrix.
321
TRANS - (input) char*
322
On entry, TRANS specifies the operation to be performed as
324
TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
325
TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
326
TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
328
ALPHA - (input) float
329
On entry, ALPHA specifies the scalar alpha.
331
A - (input) SuperMatrix*
332
Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
333
Currently, the type of A can be:
334
Stype = NC or NCP; Dtype = S_; Mtype = GE.
335
In the future, more general A can be handled.
337
X - (input) float*, array of DIMENSION at least
338
( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
340
( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
341
Before entry, the incremented array X must contain the
345
On entry, INCX specifies the increment for the elements of
346
X. INCX must not be zero.
349
On entry, BETA specifies the scalar beta. When BETA is
350
supplied as zero then Y need not be set on input.
352
Y - (output) float*, array of DIMENSION at least
353
( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
355
( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
356
Before entry with BETA non-zero, the incremented array Y
357
must contain the vector y. On exit, Y is overwritten by the
361
On entry, INCY specifies the increment for the elements of
362
Y. INCY must not be zero.
364
==== Sparse Level 2 Blas routine.
367
/* Local variables */
372
int lenx, leny, i, j, irow;
373
int iy, jx, jy, kx, ky;
376
notran = lsame_(trans, "N");
378
Aval = Astore->nzval;
380
/* Test the input parameters */
382
if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
383
else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
384
else if (incx == 0) info = 5;
385
else if (incy == 0) info = 8;
387
xerbla_("sp_sgemv ", &info);
391
/* Quick return if possible. */
392
if (A->nrow == 0 || A->ncol == 0 || alpha == 0. && beta == 1.)
395
/* Set LENX and LENY, the lengths of the vectors x and y, and set
396
up the start points in X and Y. */
397
if (lsame_(trans, "N")) {
404
if (incx > 0) kx = 0;
405
else kx = - (lenx - 1) * incx;
406
if (incy > 0) ky = 0;
407
else ky = - (leny - 1) * incy;
409
/* Start the operations. In this version the elements of A are
410
accessed sequentially with one pass through A. */
411
/* First form y := beta*y. */
415
for (i = 0; i < leny; ++i) y[i] = 0.;
417
for (i = 0; i < leny; ++i) y[i] = beta * y[i];
421
for (i = 0; i < leny; ++i) {
426
for (i = 0; i < leny; ++i) {
427
y[iy] = beta * y[iy];
433
if (alpha == 0.) return 0;
436
/* Form y := alpha*A*x + y. */
439
for (j = 0; j < A->ncol; ++j) {
441
temp = alpha * x[jx];
442
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
443
irow = Astore->rowind[i];
444
y[irow] += temp * Aval[i];
450
ABORT("Not implemented.");
453
/* Form y := alpha*A'*x + y. */
456
for (j = 0; j < A->ncol; ++j) {
458
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
459
irow = Astore->rowind[i];
460
temp += Aval[i] * x[irow];
462
y[jy] += alpha * temp;
466
ABORT("Not implemented.");