3
* -- SuperLU routine (version 3.0) --
4
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
5
* and Lawrence Berkeley National Lab.
10
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
12
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
13
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
15
Permission is hereby granted to use or copy this program for any
16
purpose, provided the above notices are retained on all copies.
17
Permission to modify the code and to distribute modified code is
18
granted, provided the above notices are retained, and a notice that
19
the code was modified is included with the above copyright notice.
26
dCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
27
double *nzval, int *rowind, int *colptr,
28
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
37
A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
38
if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
41
Astore->nzval = nzval;
42
Astore->rowind = rowind;
43
Astore->colptr = colptr;
47
dCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz,
48
double *nzval, int *colind, int *rowptr,
49
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
58
A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
59
if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
62
Astore->nzval = nzval;
63
Astore->colind = colind;
64
Astore->rowptr = rowptr;
67
/* Copy matrix A into matrix B. */
69
dCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
71
NCformat *Astore, *Bstore;
78
B->ncol = ncol = A->ncol;
79
Astore = (NCformat *) A->Store;
80
Bstore = (NCformat *) B->Store;
81
Bstore->nnz = nnz = Astore->nnz;
82
for (i = 0; i < nnz; ++i)
83
((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i];
84
for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
85
for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
90
dCreate_Dense_Matrix(SuperMatrix *X, int m, int n, double *x, int ldx,
91
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
100
X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
101
if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
102
Xstore = (DNformat *) X->Store;
104
Xstore->nzval = (double *) x;
108
dCopy_Dense_Matrix(int M, int N, double *X, int ldx,
116
* Copies a two-dimensional matrix X to another matrix Y.
120
for (j = 0; j < N; ++j)
121
for (i = 0; i < M; ++i)
122
Y[i + j*ldy] = X[i + j*ldx];
126
dCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
127
double *nzval, int *nzval_colptr, int *rowind,
128
int *rowind_colptr, int *col_to_sup, int *sup_to_col,
129
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
138
L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
139
if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
142
Lstore->nsuper = col_to_sup[n];
143
Lstore->nzval = nzval;
144
Lstore->nzval_colptr = nzval_colptr;
145
Lstore->rowind = rowind;
146
Lstore->rowind_colptr = rowind_colptr;
147
Lstore->col_to_sup = col_to_sup;
148
Lstore->sup_to_col = sup_to_col;
154
* Convert a row compressed storage into a column compressed storage.
157
dCompRow_to_CompCol(int m, int n, int nnz,
158
double *a, int *colind, int *rowptr,
159
double **at, int **rowind, int **colptr)
161
register int i, j, col, relpos;
164
/* Allocate storage for another copy of the matrix. */
165
*at = (double *) doubleMalloc(nnz);
166
*rowind = (int *) intMalloc(nnz);
167
*colptr = (int *) intMalloc(n+1);
168
marker = (int *) intCalloc(n);
170
/* Get counts of each column of A, and set up column pointers */
171
for (i = 0; i < m; ++i)
172
for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
174
for (j = 0; j < n; ++j) {
175
(*colptr)[j+1] = (*colptr)[j] + marker[j];
176
marker[j] = (*colptr)[j];
179
/* Transfer the matrix into the compressed column storage. */
180
for (i = 0; i < m; ++i) {
181
for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
183
relpos = marker[col];
184
(*rowind)[relpos] = i;
185
(*at)[relpos] = a[j];
190
SUPERLU_FREE(marker);
195
dPrint_CompCol_Matrix(char *what, SuperMatrix *A)
201
printf("\nCompCol matrix %s:\n", what);
202
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
204
Astore = (NCformat *) A->Store;
205
dp = (double *) Astore->nzval;
206
printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
208
for (i = 0; i < Astore->colptr[n]; ++i) printf("%f ", dp[i]);
209
printf("\nrowind: ");
210
for (i = 0; i < Astore->colptr[n]; ++i) printf("%d ", Astore->rowind[i]);
211
printf("\ncolptr: ");
212
for (i = 0; i <= n; ++i) printf("%d ", Astore->colptr[i]);
218
dPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
221
register int i, j, k, c, d, n, nsup;
223
int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
225
printf("\nSuperNode matrix %s:\n", what);
226
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
228
Astore = (SCformat *) A->Store;
229
dp = (double *) Astore->nzval;
230
col_to_sup = Astore->col_to_sup;
231
sup_to_col = Astore->sup_to_col;
232
rowind_colptr = Astore->rowind_colptr;
233
rowind = Astore->rowind;
234
printf("nrow %d, ncol %d, nnz %d, nsuper %d\n",
235
A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
237
for (k = 0; k <= Astore->nsuper; ++k) {
239
nsup = sup_to_col[k+1] - c;
240
for (j = c; j < c + nsup; ++j) {
241
d = Astore->nzval_colptr[j];
242
for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
243
printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]);
248
for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f ", dp[i]);
250
printf("\nnzval_colptr: ");
251
for (i = 0; i <= n; ++i) printf("%d ", Astore->nzval_colptr[i]);
252
printf("\nrowind: ");
253
for (i = 0; i < Astore->rowind_colptr[n]; ++i)
254
printf("%d ", Astore->rowind[i]);
255
printf("\nrowind_colptr: ");
256
for (i = 0; i <= n; ++i) printf("%d ", Astore->rowind_colptr[i]);
257
printf("\ncol_to_sup: ");
258
for (i = 0; i < n; ++i) printf("%d ", col_to_sup[i]);
259
printf("\nsup_to_col: ");
260
for (i = 0; i <= Astore->nsuper+1; ++i)
261
printf("%d ", sup_to_col[i]);
267
dPrint_Dense_Matrix(char *what, SuperMatrix *A)
273
printf("\nDense matrix %s:\n", what);
274
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
275
Astore = (DNformat *) A->Store;
276
dp = (double *) Astore->nzval;
277
printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
279
for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]);
285
* Diagnostic print of column "jcol" in the U/L factor.
288
dprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
303
xlusup = Glu->xlusup;
309
printf("col %d: pivrow %d, supno %d, xprune %d\n",
310
jcol, pivrow, supno[jcol], xprune[jcol]);
312
printf("\tU-col:\n");
313
for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
314
printf("\t%d%10.4f\n", usub[i], ucol[i]);
315
printf("\tL-col in rectangular snode:\n");
316
fsupc = xsup[supno[jcol]]; /* first col of the snode */
319
while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
320
printf("\t%d\t%10.4f\n", lsub[i], lusup[k]);
328
* Check whether tempv[] == 0. This should be true before and after
329
* calling any numeric routines, i.e., "panel_bmod" and "column_bmod".
331
void dcheck_tempv(int n, double *tempv)
335
for (i = 0; i < n; i++) {
338
fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]);
339
ABORT("dcheck_tempv");
346
dGenXtrue(int n, int nrhs, double *x, int ldx)
349
for (j = 0; j < nrhs; ++j)
350
for (i = 0; i < n; ++i) {
351
x[i + j*ldx] = 1.0;/* + (double)(i+1.)/n;*/
356
* Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
359
dFillRHS(trans_t trans, int nrhs, double *x, int ldx,
360
SuperMatrix *A, SuperMatrix *B)
372
Aval = (double *) Astore->nzval;
377
if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
378
else *(unsigned char *)transc = 'T';
380
sp_dgemm(transc, "N", A->nrow, nrhs, A->ncol, one, A,
381
x, ldx, zero, rhs, ldc);
386
* Fills a double precision array with a given value.
389
dfill(double *a, int alen, double dval)
392
for (i = 0; i < alen; i++) a[i] = dval;
398
* Check the inf-norm of the error vector
400
void dinf_norm_error(int nrhs, SuperMatrix *X, double *xtrue)
404
double *Xmat, *soln_work;
408
Xmat = Xstore->nzval;
410
for (j = 0; j < nrhs; j++) {
411
soln_work = &Xmat[j*Xstore->lda];
413
for (i = 0; i < X->nrow; i++) {
414
err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
415
xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
418
printf("||X - Xtrue||/||X|| = %e\n", err);
424
/* Print performance of the code. */
426
dPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
427
double rpg, double rcond, double *ferr,
428
double *berr, char *equed, SuperLUStat_t *stat)
438
if ( utime[FACT] != 0. )
439
printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
440
ops[FACT]*1e-6/utime[FACT]);
441
printf("Identify relaxed snodes = %8.2f\n", utime[RELAX]);
442
if ( utime[SOLVE] != 0. )
443
printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
444
ops[SOLVE]*1e-6/utime[SOLVE]);
446
Lstore = (SCformat *) L->Store;
447
Ustore = (NCformat *) U->Store;
448
printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
449
printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
450
printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
452
printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
453
mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
454
mem_usage->expansions);
456
printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
457
printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
458
utime[FACT], ops[FACT]*1e-6/utime[FACT],
459
utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
460
utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
462
printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
463
printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
464
rpg, rcond, ferr[0], berr[0], equed);
471
print_double_vec(char *what, int n, double *vec)
474
printf("%s: n %d\n", what, n);
475
for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]);