4
* -- SuperLU routine (version 2.0) --
5
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
6
* and Lawrence Berkeley National Lab.
11
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
13
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
14
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
16
Permission is hereby granted to use or copy this program for any
17
purpose, provided the above notices are retained on all copies.
18
Permission to modify the code and to distribute modified code is
19
granted, provided the above notices are retained, and a notice that
20
the code was modified is included with the above copyright notice.
31
void zusolve(int, int, doublecomplex*, doublecomplex*);
32
void zlsolve(int, int, doublecomplex*, doublecomplex*);
33
void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*);
37
/* Return value: 0 - successful return
38
* > 0 - number of bytes allocated when run out of space
42
const int jcol, /* in */
43
const int nseg, /* in */
44
doublecomplex *dense, /* in */
45
doublecomplex *tempv, /* working array */
48
int fpanelc, /* in -- first column in the current panel */
49
GlobalLU_t *Glu /* modified */
55
* Performs numeric block updates (sup-col) in topological order.
56
* It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
57
* Special processing on the supernodal portion of L\U[*,j]
61
_fcd ftcs1 = _cptofcd("L", strlen("L")),
62
ftcs2 = _cptofcd("N", strlen("N")),
63
ftcs3 = _cptofcd("U", strlen("U"));
65
int incx = 1, incy = 1;
66
doublecomplex alpha, beta;
68
/* krep = representative of current k-th supernode
69
* fsupc = first supernodal column
70
* nsupc = no of columns in supernode
71
* nsupr = no of rows in supernode (used as leading dimension)
72
* luptr = location of supernodal LU-block in storage
73
* kfnz = first nonz in the k-th supernodal segment
74
* no_zeros = no of leading zeros in a supernodal U-segment
76
doublecomplex ukj, ukj1, ukj2;
77
int luptr, luptr1, luptr2;
78
int fsupc, nsupc, nsupr, segsze;
79
int nrow; /* No of rows in the matrix of matrix-vector */
80
int jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
81
register int lptr, kfnz, isub, irow, i;
82
register int no_zeros, new_next;
84
int fst_col; /* First column within small LU update */
85
int d_fsupc; /* Distance between the first column of the current
86
panel and the first column of the current snode. */
92
doublecomplex *tempv1;
93
doublecomplex zero = {0.0, 0.0};
94
doublecomplex one = {1.0, 0.0};
95
doublecomplex none = {-1.0, 0.0};
96
doublecomplex comp_temp, comp_temp1;
98
extern SuperLUStat_t SuperLUStat;
99
flops_t *ops = SuperLUStat.ops;
106
xlusup = Glu->xlusup;
107
nzlumax = Glu->nzlumax;
109
jsupno = supno[jcol];
112
* For each nonz supernode segment of U[*,j] in topological order
115
for (ksub = 0; ksub < nseg; ksub++) {
119
ksupno = supno[krep];
120
if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
122
fsupc = xsup[ksupno];
123
fst_col = SUPERLU_MAX ( fsupc, fpanelc );
125
/* Distance from the current supernode to the current panel;
126
d_fsupc=0 if fsupc > fpanelc. */
127
d_fsupc = fst_col - fsupc;
129
luptr = xlusup[fst_col] + d_fsupc;
130
lptr = xlsub[fsupc] + d_fsupc;
133
kfnz = SUPERLU_MAX ( kfnz, fpanelc );
135
segsze = krep - kfnz + 1;
136
nsupc = krep - fst_col + 1;
137
nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
138
nrow = nsupr - d_fsupc - nsupc;
139
krep_ind = lptr + nsupc - 1;
141
ops[TRSV] += 4 * segsze * (segsze - 1);
142
ops[GEMV] += 8 * nrow * segsze;
147
* Case 1: Update U-segment of size 1 -- col-col update
150
ukj = dense[lsub[krep_ind]];
151
luptr += nsupr*(nsupc-1) + nsupc;
153
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
155
zz_mult(&comp_temp, &ukj, &lusup[luptr]);
156
z_sub(&dense[irow], &dense[irow], &comp_temp);
160
} else if ( segsze <= 3 ) {
161
ukj = dense[lsub[krep_ind]];
162
luptr += nsupr*(nsupc-1) + nsupc-1;
163
ukj1 = dense[lsub[krep_ind - 1]];
164
luptr1 = luptr - nsupr;
166
if ( segsze == 2 ) { /* Case 2: 2cols-col update */
167
zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
168
z_sub(&ukj, &ukj, &comp_temp);
169
dense[lsub[krep_ind]] = ukj;
170
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
174
zz_mult(&comp_temp, &ukj, &lusup[luptr]);
175
zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
176
z_add(&comp_temp, &comp_temp, &comp_temp1);
177
z_sub(&dense[irow], &dense[irow], &comp_temp);
179
} else { /* Case 3: 3cols-col update */
180
ukj2 = dense[lsub[krep_ind - 2]];
181
luptr2 = luptr1 - nsupr;
182
zz_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
183
z_sub(&ukj1, &ukj1, &comp_temp);
185
zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
186
zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
187
z_add(&comp_temp, &comp_temp, &comp_temp1);
188
z_sub(&ukj, &ukj, &comp_temp);
190
dense[lsub[krep_ind]] = ukj;
191
dense[lsub[krep_ind-1]] = ukj1;
192
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
197
zz_mult(&comp_temp, &ukj, &lusup[luptr]);
198
zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
199
z_add(&comp_temp, &comp_temp, &comp_temp1);
200
zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
201
z_add(&comp_temp, &comp_temp, &comp_temp1);
202
z_sub(&dense[irow], &dense[irow], &comp_temp);
209
* Case: sup-col update
210
* Perform a triangular solve and block update,
211
* then scatter the result of sup-col update to dense
214
no_zeros = kfnz - fst_col;
216
/* Copy U[*,j] segment from dense[*] to tempv[*] */
217
isub = lptr + no_zeros;
218
for (i = 0; i < segsze; i++) {
220
tempv[i] = dense[irow];
224
/* Dense triangular solve -- start effective triangle */
225
luptr += nsupr * no_zeros + no_zeros;
227
#ifdef USE_VENDOR_BLAS
229
CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
230
&nsupr, tempv, &incx );
232
ztrsv_( "L", "N", "U", &segsze, &lusup[luptr],
233
&nsupr, tempv, &incx );
235
luptr += segsze; /* Dense matrix-vector */
236
tempv1 = &tempv[segsze];
240
CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
241
&nsupr, tempv, &incx, &beta, tempv1, &incy );
243
zgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
244
&nsupr, tempv, &incx, &beta, tempv1, &incy );
247
zlsolve ( nsupr, segsze, &lusup[luptr], tempv );
249
luptr += segsze; /* Dense matrix-vector */
250
tempv1 = &tempv[segsze];
251
zmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
255
/* Scatter tempv[] into SPA dense[] as a temporary storage */
256
isub = lptr + no_zeros;
257
for (i = 0; i < segsze; i++) {
259
dense[irow] = tempv[i];
264
/* Scatter tempv1[] into SPA dense[] */
265
for (i = 0; i < nrow; i++) {
267
z_sub(&dense[irow], &dense[irow], &tempv1[i]);
273
} /* if jsupno ... */
275
} /* for each segment... */
278
* Process the supernodal portion of L\U[*,j]
280
nextlu = xlusup[jcol];
281
fsupc = xsup[jsupno];
283
/* Copy the SPA dense into L\U[*,j] */
284
new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
285
while ( new_next > nzlumax ) {
286
if (mem_error = zLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))
292
for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
294
lusup[nextlu] = dense[irow];
299
xlusup[jcolp1] = nextlu; /* Close L\U[*,jcol] */
301
/* For more updates within the panel (also within the current supernode),
302
* should start from the first column of the panel, or the first column
303
* of the supernode, whichever is bigger. There are 2 cases:
304
* 1) fsupc < fpanelc, then fst_col := fpanelc
305
* 2) fsupc >= fpanelc, then fst_col := fsupc
307
fst_col = SUPERLU_MAX ( fsupc, fpanelc );
309
if ( fst_col < jcol ) {
311
/* Distance between the current supernode and the current panel.
312
d_fsupc=0 if fsupc >= fpanelc. */
313
d_fsupc = fst_col - fsupc;
315
lptr = xlsub[fsupc] + d_fsupc;
316
luptr = xlusup[fst_col] + d_fsupc;
317
nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
318
nsupc = jcol - fst_col; /* Excluding jcol */
319
nrow = nsupr - d_fsupc - nsupc;
321
/* Points to the beginning of jcol in snode L\U(jsupno) */
322
ufirst = xlusup[jcol] + d_fsupc;
324
ops[TRSV] += 4 * nsupc * (nsupc - 1);
325
ops[GEMV] += 8 * nrow * nsupc;
327
#ifdef USE_VENDOR_BLAS
329
CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr],
330
&nsupr, &lusup[ufirst], &incx );
332
ztrsv_( "L", "N", "U", &nsupc, &lusup[luptr],
333
&nsupr, &lusup[ufirst], &incx );
336
alpha = none; beta = one; /* y := beta*y + alpha*A*x */
339
CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
340
&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
342
zgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
343
&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
346
zlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
348
zmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
349
&lusup[ufirst], tempv );
351
/* Copy updates from tempv[*] into lusup[*] */
352
isub = ufirst + nsupc;
353
for (i = 0; i < nrow; i++) {
354
z_sub(&lusup[isub], &lusup[isub], &tempv[i]);
362
} /* if fst_col < jcol ... */