3
* -- SuperLU routine (version 3.0) --
4
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
5
* and Lawrence Berkeley National Lab.
10
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
12
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
13
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
15
Permission is hereby granted to use or copy this program for any
16
purpose, provided the above notices are retained on all copies.
17
Permission to modify the code and to distribute modified code is
18
granted, provided the above notices are retained, and a notice that
19
the code was modified is included with the above copyright notice.
26
cCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
27
complex *nzval, int *rowind, int *colptr,
28
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
37
A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
38
if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
41
Astore->nzval = nzval;
42
Astore->rowind = rowind;
43
Astore->colptr = colptr;
47
cCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz,
48
complex *nzval, int *colind, int *rowptr,
49
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
58
A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
59
if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
62
Astore->nzval = nzval;
63
Astore->colind = colind;
64
Astore->rowptr = rowptr;
67
/* Copy matrix A into matrix B. */
69
cCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
71
NCformat *Astore, *Bstore;
78
B->ncol = ncol = A->ncol;
79
Astore = (NCformat *) A->Store;
80
Bstore = (NCformat *) B->Store;
81
Bstore->nnz = nnz = Astore->nnz;
82
for (i = 0; i < nnz; ++i)
83
((complex *)Bstore->nzval)[i] = ((complex *)Astore->nzval)[i];
84
for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
85
for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
90
cCreate_Dense_Matrix(SuperMatrix *X, int m, int n, complex *x, int ldx,
91
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
100
X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
101
if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
102
Xstore = (DNformat *) X->Store;
104
Xstore->nzval = (complex *) x;
108
cCopy_Dense_Matrix(int M, int N, complex *X, int ldx,
116
* Copies a two-dimensional matrix X to another matrix Y.
120
for (j = 0; j < N; ++j)
121
for (i = 0; i < M; ++i)
122
Y[i + j*ldy] = X[i + j*ldx];
126
cCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
127
complex *nzval, int *nzval_colptr, int *rowind,
128
int *rowind_colptr, int *col_to_sup, int *sup_to_col,
129
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
138
L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
139
if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
142
Lstore->nsuper = col_to_sup[n];
143
Lstore->nzval = nzval;
144
Lstore->nzval_colptr = nzval_colptr;
145
Lstore->rowind = rowind;
146
Lstore->rowind_colptr = rowind_colptr;
147
Lstore->col_to_sup = col_to_sup;
148
Lstore->sup_to_col = sup_to_col;
154
* Convert a row compressed storage into a column compressed storage.
157
cCompRow_to_CompCol(int m, int n, int nnz,
158
complex *a, int *colind, int *rowptr,
159
complex **at, int **rowind, int **colptr)
161
register int i, j, col, relpos;
164
/* Allocate storage for another copy of the matrix. */
165
*at = (complex *) complexMalloc(nnz);
166
*rowind = (int *) intMalloc(nnz);
167
*colptr = (int *) intMalloc(n+1);
168
marker = (int *) intCalloc(n);
170
/* Get counts of each column of A, and set up column pointers */
171
for (i = 0; i < m; ++i)
172
for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
174
for (j = 0; j < n; ++j) {
175
(*colptr)[j+1] = (*colptr)[j] + marker[j];
176
marker[j] = (*colptr)[j];
179
/* Transfer the matrix into the compressed column storage. */
180
for (i = 0; i < m; ++i) {
181
for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
183
relpos = marker[col];
184
(*rowind)[relpos] = i;
185
(*at)[relpos] = a[j];
190
SUPERLU_FREE(marker);
195
cPrint_CompCol_Matrix(char *what, SuperMatrix *A)
201
printf("\nCompCol matrix %s:\n", what);
202
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
204
Astore = (NCformat *) A->Store;
205
dp = (float *) Astore->nzval;
206
printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
208
for (i = 0; i < 2*Astore->colptr[n]; ++i) printf("%f ", dp[i]);
209
printf("\nrowind: ");
210
for (i = 0; i < Astore->colptr[n]; ++i) printf("%d ", Astore->rowind[i]);
211
printf("\ncolptr: ");
212
for (i = 0; i <= n; ++i) printf("%d ", Astore->colptr[i]);
218
cPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
221
register int i, j, k, c, d, n, nsup;
223
int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
225
printf("\nSuperNode matrix %s:\n", what);
226
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
228
Astore = (SCformat *) A->Store;
229
dp = (float *) Astore->nzval;
230
col_to_sup = Astore->col_to_sup;
231
sup_to_col = Astore->sup_to_col;
232
rowind_colptr = Astore->rowind_colptr;
233
rowind = Astore->rowind;
234
printf("nrow %d, ncol %d, nnz %d, nsuper %d\n",
235
A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
237
for (k = 0; k <= Astore->nsuper; ++k) {
239
nsup = sup_to_col[k+1] - c;
240
for (j = c; j < c + nsup; ++j) {
241
d = Astore->nzval_colptr[j];
242
for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
243
printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d++], dp[d++]);
248
for (i = 0; i < 2*Astore->nzval_colptr[n]; ++i) printf("%f ", dp[i]);
250
printf("\nnzval_colptr: ");
251
for (i = 0; i <= n; ++i) printf("%d ", Astore->nzval_colptr[i]);
252
printf("\nrowind: ");
253
for (i = 0; i < Astore->rowind_colptr[n]; ++i)
254
printf("%d ", Astore->rowind[i]);
255
printf("\nrowind_colptr: ");
256
for (i = 0; i <= n; ++i) printf("%d ", Astore->rowind_colptr[i]);
257
printf("\ncol_to_sup: ");
258
for (i = 0; i < n; ++i) printf("%d ", col_to_sup[i]);
259
printf("\nsup_to_col: ");
260
for (i = 0; i <= Astore->nsuper+1; ++i)
261
printf("%d ", sup_to_col[i]);
267
cPrint_Dense_Matrix(char *what, SuperMatrix *A)
273
printf("\nDense matrix %s:\n", what);
274
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
275
Astore = (DNformat *) A->Store;
276
dp = (float *) Astore->nzval;
277
printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
279
for (i = 0; i < 2*A->nrow; ++i) printf("%f ", dp[i]);
285
* Diagnostic print of column "jcol" in the U/L factor.
288
cprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
303
xlusup = Glu->xlusup;
309
printf("col %d: pivrow %d, supno %d, xprune %d\n",
310
jcol, pivrow, supno[jcol], xprune[jcol]);
312
printf("\tU-col:\n");
313
for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
314
printf("\t%d%10.4f, %10.4f\n", usub[i], ucol[i].r, ucol[i].i);
315
printf("\tL-col in rectangular snode:\n");
316
fsupc = xsup[supno[jcol]]; /* first col of the snode */
319
while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
320
printf("\t%d\t%10.4f, %10.4f\n", lsub[i], lusup[k].r, lusup[k].i);
328
* Check whether tempv[] == 0. This should be true before and after
329
* calling any numeric routines, i.e., "panel_bmod" and "column_bmod".
331
void ccheck_tempv(int n, complex *tempv)
335
for (i = 0; i < n; i++) {
336
if ((tempv[i].r != 0.0) || (tempv[i].i != 0.0))
338
fprintf(stderr,"tempv[%d] = {%f, %f}\n", i, tempv[i].r, tempv[i].i);
339
ABORT("ccheck_tempv");
346
cGenXtrue(int n, int nrhs, complex *x, int ldx)
349
for (j = 0; j < nrhs; ++j)
350
for (i = 0; i < n; ++i) {
351
x[i + j*ldx].r = 1.0;
352
x[i + j*ldx].i = 0.0;
357
* Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
360
cFillRHS(trans_t trans, int nrhs, complex *x, int ldx,
361
SuperMatrix *A, SuperMatrix *B)
367
complex one = {1.0, 0.0};
368
complex zero = {0.0, 0.0};
373
Aval = (complex *) Astore->nzval;
378
if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
379
else *(unsigned char *)transc = 'T';
381
sp_cgemm(transc, "N", A->nrow, nrhs, A->ncol, one, A,
382
x, ldx, zero, rhs, ldc);
387
* Fills a complex precision array with a given value.
390
cfill(complex *a, int alen, complex dval)
393
for (i = 0; i < alen; i++) a[i] = dval;
399
* Check the inf-norm of the error vector
401
void cinf_norm_error(int nrhs, SuperMatrix *X, complex *xtrue)
405
complex *Xmat, *soln_work;
410
Xmat = Xstore->nzval;
412
for (j = 0; j < nrhs; j++) {
413
soln_work = &Xmat[j*Xstore->lda];
415
for (i = 0; i < X->nrow; i++) {
416
c_sub(&temp, &soln_work[i], &xtrue[i]);
417
err = SUPERLU_MAX(err, c_abs(&temp));
418
xnorm = SUPERLU_MAX(xnorm, c_abs(&soln_work[i]));
421
printf("||X - Xtrue||/||X|| = %e\n", err);
427
/* Print performance of the code. */
429
cPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
430
float rpg, float rcond, float *ferr,
431
float *berr, char *equed, SuperLUStat_t *stat)
441
if ( utime[FACT] != 0. )
442
printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
443
ops[FACT]*1e-6/utime[FACT]);
444
printf("Identify relaxed snodes = %8.2f\n", utime[RELAX]);
445
if ( utime[SOLVE] != 0. )
446
printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
447
ops[SOLVE]*1e-6/utime[SOLVE]);
449
Lstore = (SCformat *) L->Store;
450
Ustore = (NCformat *) U->Store;
451
printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
452
printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
453
printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
455
printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
456
mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
457
mem_usage->expansions);
459
printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
460
printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
461
utime[FACT], ops[FACT]*1e-6/utime[FACT],
462
utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
463
utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
465
printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
466
printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
467
rpg, rcond, ferr[0], berr[0], equed);
474
print_complex_vec(char *what, int n, complex *vec)
477
printf("%s: n %d\n", what, n);
478
for (i = 0; i < n; ++i) printf("%d\t%f%f\n", i, vec[i].r, vec[i].i);