4
* -- SuperLU routine (version 2.0) --
5
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
6
* and Lawrence Berkeley National Lab.
11
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
13
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
14
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
16
Permission is hereby granted to use or copy this program for any
17
purpose, provided the above notices are retained on all copies.
18
Permission to modify the code and to distribute modified code is
19
granted, provided the above notices are retained, and a notice that
20
the code was modified is included with the above copyright notice.
31
void dusolve(int, int, double*, double*);
32
void dlsolve(int, int, double*, double*);
33
void dmatvec(int, int, int, double*, double*, double*);
37
/* Return value: 0 - successful return
38
* > 0 - number of bytes allocated when run out of space
42
const int jcol, /* in */
43
const int nseg, /* in */
44
double *dense, /* in */
45
double *tempv, /* working array */
48
int fpanelc, /* in -- first column in the current panel */
49
GlobalLU_t *Glu /* modified */
55
* Performs numeric block updates (sup-col) in topological order.
56
* It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
57
* Special processing on the supernodal portion of L\U[*,j]
61
_fcd ftcs1 = _cptofcd("L", strlen("L")),
62
ftcs2 = _cptofcd("N", strlen("N")),
63
ftcs3 = _cptofcd("U", strlen("U"));
65
int incx = 1, incy = 1;
68
/* krep = representative of current k-th supernode
69
* fsupc = first supernodal column
70
* nsupc = no of columns in supernode
71
* nsupr = no of rows in supernode (used as leading dimension)
72
* luptr = location of supernodal LU-block in storage
73
* kfnz = first nonz in the k-th supernodal segment
74
* no_zeros = no of leading zeros in a supernodal U-segment
76
double ukj, ukj1, ukj2;
77
int luptr, luptr1, luptr2;
78
int fsupc, nsupc, nsupr, segsze;
79
int nrow; /* No of rows in the matrix of matrix-vector */
80
int jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
81
register int lptr, kfnz, isub, irow, i;
82
register int no_zeros, new_next;
84
int fst_col; /* First column within small LU update */
85
int d_fsupc; /* Distance between the first column of the current
86
panel and the first column of the current snode. */
97
extern SuperLUStat_t SuperLUStat;
98
flops_t *ops = SuperLUStat.ops;
105
xlusup = Glu->xlusup;
106
nzlumax = Glu->nzlumax;
108
jsupno = supno[jcol];
111
* For each nonz supernode segment of U[*,j] in topological order
114
for (ksub = 0; ksub < nseg; ksub++) {
118
ksupno = supno[krep];
119
if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
121
fsupc = xsup[ksupno];
122
fst_col = SUPERLU_MAX ( fsupc, fpanelc );
124
/* Distance from the current supernode to the current panel;
125
d_fsupc=0 if fsupc > fpanelc. */
126
d_fsupc = fst_col - fsupc;
128
luptr = xlusup[fst_col] + d_fsupc;
129
lptr = xlsub[fsupc] + d_fsupc;
132
kfnz = SUPERLU_MAX ( kfnz, fpanelc );
134
segsze = krep - kfnz + 1;
135
nsupc = krep - fst_col + 1;
136
nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
137
nrow = nsupr - d_fsupc - nsupc;
138
krep_ind = lptr + nsupc - 1;
140
ops[TRSV] += segsze * (segsze - 1);
141
ops[GEMV] += 2 * nrow * segsze;
145
* Case 1: Update U-segment of size 1 -- col-col update
148
ukj = dense[lsub[krep_ind]];
149
luptr += nsupr*(nsupc-1) + nsupc;
151
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
153
dense[irow] -= ukj*lusup[luptr];
157
} else if ( segsze <= 3 ) {
158
ukj = dense[lsub[krep_ind]];
159
luptr += nsupr*(nsupc-1) + nsupc-1;
160
ukj1 = dense[lsub[krep_ind - 1]];
161
luptr1 = luptr - nsupr;
163
if ( segsze == 2 ) { /* Case 2: 2cols-col update */
164
ukj -= ukj1 * lusup[luptr1];
165
dense[lsub[krep_ind]] = ukj;
166
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
170
dense[irow] -= ( ukj*lusup[luptr]
171
+ ukj1*lusup[luptr1] );
173
} else { /* Case 3: 3cols-col update */
174
ukj2 = dense[lsub[krep_ind - 2]];
175
luptr2 = luptr1 - nsupr;
176
ukj1 -= ukj2 * lusup[luptr2-1];
177
ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
178
dense[lsub[krep_ind]] = ukj;
179
dense[lsub[krep_ind-1]] = ukj1;
180
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
185
dense[irow] -= ( ukj*lusup[luptr]
186
+ ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
194
* Case: sup-col update
195
* Perform a triangular solve and block update,
196
* then scatter the result of sup-col update to dense
199
no_zeros = kfnz - fst_col;
201
/* Copy U[*,j] segment from dense[*] to tempv[*] */
202
isub = lptr + no_zeros;
203
for (i = 0; i < segsze; i++) {
205
tempv[i] = dense[irow];
209
/* Dense triangular solve -- start effective triangle */
210
luptr += nsupr * no_zeros + no_zeros;
212
#ifdef USE_VENDOR_BLAS
214
STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
215
&nsupr, tempv, &incx );
217
dtrsv_( "L", "N", "U", &segsze, &lusup[luptr],
218
&nsupr, tempv, &incx );
220
luptr += segsze; /* Dense matrix-vector */
221
tempv1 = &tempv[segsze];
225
SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
226
&nsupr, tempv, &incx, &beta, tempv1, &incy );
228
dgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
229
&nsupr, tempv, &incx, &beta, tempv1, &incy );
232
dlsolve ( nsupr, segsze, &lusup[luptr], tempv );
234
luptr += segsze; /* Dense matrix-vector */
235
tempv1 = &tempv[segsze];
236
dmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
240
/* Scatter tempv[] into SPA dense[] as a temporary storage */
241
isub = lptr + no_zeros;
242
for (i = 0; i < segsze; i++) {
244
dense[irow] = tempv[i];
249
/* Scatter tempv1[] into SPA dense[] */
250
for (i = 0; i < nrow; i++) {
252
dense[irow] -= tempv1[i];
258
} /* if jsupno ... */
260
} /* for each segment... */
263
* Process the supernodal portion of L\U[*,j]
265
nextlu = xlusup[jcol];
266
fsupc = xsup[jsupno];
268
/* Copy the SPA dense into L\U[*,j] */
269
new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
270
while ( new_next > nzlumax ) {
271
if (mem_error = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))
277
for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
279
lusup[nextlu] = dense[irow];
284
xlusup[jcolp1] = nextlu; /* Close L\U[*,jcol] */
286
/* For more updates within the panel (also within the current supernode),
287
* should start from the first column of the panel, or the first column
288
* of the supernode, whichever is bigger. There are 2 cases:
289
* 1) fsupc < fpanelc, then fst_col := fpanelc
290
* 2) fsupc >= fpanelc, then fst_col := fsupc
292
fst_col = SUPERLU_MAX ( fsupc, fpanelc );
294
if ( fst_col < jcol ) {
296
/* Distance between the current supernode and the current panel.
297
d_fsupc=0 if fsupc >= fpanelc. */
298
d_fsupc = fst_col - fsupc;
300
lptr = xlsub[fsupc] + d_fsupc;
301
luptr = xlusup[fst_col] + d_fsupc;
302
nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
303
nsupc = jcol - fst_col; /* Excluding jcol */
304
nrow = nsupr - d_fsupc - nsupc;
306
/* Points to the beginning of jcol in snode L\U(jsupno) */
307
ufirst = xlusup[jcol] + d_fsupc;
309
ops[TRSV] += nsupc * (nsupc - 1);
310
ops[GEMV] += 2 * nrow * nsupc;
312
#ifdef USE_VENDOR_BLAS
314
STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr],
315
&nsupr, &lusup[ufirst], &incx );
317
dtrsv_( "L", "N", "U", &nsupc, &lusup[luptr],
318
&nsupr, &lusup[ufirst], &incx );
321
alpha = none; beta = one; /* y := beta*y + alpha*A*x */
324
SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
325
&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
327
dgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
328
&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
331
dlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
333
dmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
334
&lusup[ufirst], tempv );
336
/* Copy updates from tempv[*] into lusup[*] */
337
isub = ufirst + nsupc;
338
for (i = 0; i < nrow; i++) {
339
lusup[isub] -= tempv[i];
347
} /* if fst_col < jcol ... */