4
* -- SuperLU routine (version 2.0) --
5
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
6
* and Lawrence Berkeley National Lab.
11
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
13
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
14
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
16
Permission is hereby granted to use or copy this program for any
17
purpose, provided the above notices are retained on all copies.
18
Permission to modify the code and to distribute modified code is
19
granted, provided the above notices are retained, and a notice that
20
the code was modified is included with the above copyright notice.
28
dCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
29
double *nzval, int *rowind, int *colptr,
30
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
39
A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
40
if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
43
Astore->nzval = nzval;
44
Astore->rowind = rowind;
45
Astore->colptr = colptr;
49
dCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz,
50
double *nzval, int *colind, int *rowptr,
51
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
60
A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
61
if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
64
Astore->nzval = nzval;
65
Astore->colind = colind;
66
Astore->rowptr = rowptr;
69
/* Copy matrix A into matrix B. */
71
dCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
73
NCformat *Astore, *Bstore;
80
B->ncol = ncol = A->ncol;
81
Astore = (NCformat *) A->Store;
82
Bstore = (NCformat *) B->Store;
83
Bstore->nnz = nnz = Astore->nnz;
84
for (i = 0; i < nnz; ++i)
85
((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i];
86
for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
87
for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
92
dCreate_Dense_Matrix(SuperMatrix *X, int m, int n, double *x, int ldx,
93
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
102
X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
103
if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
104
Xstore = (DNformat *) X->Store;
106
Xstore->nzval = (double *) x;
110
dCopy_Dense_Matrix(int M, int N, double *X, int ldx,
118
* Copies a two-dimensional matrix X to another matrix Y.
122
for (j = 0; j < N; ++j)
123
for (i = 0; i < M; ++i)
124
Y[i + j*ldy] = X[i + j*ldx];
128
dCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
129
double *nzval, int *nzval_colptr, int *rowind,
130
int *rowind_colptr, int *col_to_sup, int *sup_to_col,
131
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
140
L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
141
if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
144
Lstore->nsuper = col_to_sup[n];
145
Lstore->nzval = nzval;
146
Lstore->nzval_colptr = nzval_colptr;
147
Lstore->rowind = rowind;
148
Lstore->rowind_colptr = rowind_colptr;
149
Lstore->col_to_sup = col_to_sup;
150
Lstore->sup_to_col = sup_to_col;
156
* Convert a row compressed storage into a column compressed storage.
159
dCompRow_to_CompCol(int m, int n, int nnz,
160
double *a, int *colind, int *rowptr,
161
double **at, int **rowind, int **colptr)
163
register int i, j, col, relpos;
166
/* Allocate storage for another copy of the matrix. */
167
*at = (double *) doubleMalloc(nnz);
168
*rowind = (int *) intMalloc(nnz);
169
*colptr = (int *) intMalloc(n+1);
170
marker = (int *) intCalloc(n);
172
/* Get counts of each column of A, and set up column pointers */
173
for (i = 0; i < m; ++i)
174
for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
176
for (j = 0; j < n; ++j) {
177
(*colptr)[j+1] = (*colptr)[j] + marker[j];
178
marker[j] = (*colptr)[j];
181
/* Transfer the matrix into the compressed column storage. */
182
for (i = 0; i < m; ++i) {
183
for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
185
relpos = marker[col];
186
(*rowind)[relpos] = i;
187
(*at)[relpos] = a[j];
192
SUPERLU_FREE(marker);
197
dPrint_CompCol_Matrix(char *what, SuperMatrix *A)
203
printf("\nCompCol matrix %s:\n", what);
204
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
206
Astore = (NCformat *) A->Store;
207
dp = (double *) Astore->nzval;
208
printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
210
for (i = 0; i < Astore->colptr[n]; ++i) printf("%f ", dp[i]);
211
printf("\nrowind: ");
212
for (i = 0; i < Astore->colptr[n]; ++i) printf("%d ", Astore->rowind[i]);
213
printf("\ncolptr: ");
214
for (i = 0; i <= n; ++i) printf("%d ", Astore->colptr[i]);
220
dPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
223
register int i, j, k, c, d, n, nsup;
225
int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
227
printf("\nSuperNode matrix %s:\n", what);
228
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
230
Astore = (SCformat *) A->Store;
231
dp = (double *) Astore->nzval;
232
col_to_sup = Astore->col_to_sup;
233
sup_to_col = Astore->sup_to_col;
234
rowind_colptr = Astore->rowind_colptr;
235
rowind = Astore->rowind;
236
printf("nrow %d, ncol %d, nnz %d, nsuper %d\n",
237
A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
239
for (k = 0; k <= Astore->nsuper+1; ++k) {
241
nsup = sup_to_col[k+1] - c;
242
for (j = c; j < c + nsup; ++j) {
243
d = Astore->nzval_colptr[j];
244
for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
245
printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]);
250
for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f ", dp[i]);
252
printf("\nnzval_colptr: ");
253
for (i = 0; i <= n; ++i) printf("%d ", Astore->nzval_colptr[i]);
254
printf("\nrowind: ");
255
for (i = 0; i < Astore->rowind_colptr[n]; ++i)
256
printf("%d ", Astore->rowind[i]);
257
printf("\nrowind_colptr: ");
258
for (i = 0; i <= n; ++i) printf("%d ", Astore->rowind_colptr[i]);
259
printf("\ncol_to_sup: ");
260
for (i = 0; i < n; ++i) printf("%d ", col_to_sup[i]);
261
printf("\nsup_to_col: ");
262
for (i = 0; i <= Astore->nsuper+1; ++i)
263
printf("%d ", sup_to_col[i]);
269
dPrint_Dense_Matrix(char *what, SuperMatrix *A)
275
printf("\nDense matrix %s:\n", what);
276
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
277
Astore = (DNformat *) A->Store;
278
dp = (double *) Astore->nzval;
279
printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
281
for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]);
287
* Diagnostic print of column "jcol" in the U/L factor.
290
dprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
305
xlusup = Glu->xlusup;
311
printf("col %d: pivrow %d, supno %d, xprune %d\n",
312
jcol, pivrow, supno[jcol], xprune[jcol]);
314
printf("\tU-col:\n");
315
for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
316
printf("\t%d%10.4f\n", usub[i], ucol[i]);
317
printf("\tL-col in rectangular snode:\n");
318
fsupc = xsup[supno[jcol]]; /* first col of the snode */
321
while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
322
printf("\t%d\t%10.4f\n", lsub[i], lusup[k]);
330
* Check whether tempv[] == 0. This should be true before and after
331
* calling any numeric routines, i.e., "panel_bmod" and "column_bmod".
333
void dcheck_tempv(int n, double *tempv)
337
for (i = 0; i < n; i++) {
340
fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]);
341
ABORT("dcheck_tempv");
348
dGenXtrue(int n, int nrhs, double *x, int ldx)
351
for (j = 0; j < nrhs; ++j)
352
for (i = 0; i < n; ++i) {
353
x[i + j*ldx] = 1.0;/* + (double)(i+1.)/n;*/
358
* Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
361
dFillRHS(char *trans, int nrhs, double *x, int ldx,
362
SuperMatrix *A, SuperMatrix *B)
373
Aval = (double *) Astore->nzval;
378
sp_dgemm(trans, "N", A->nrow, nrhs, A->ncol, one, A,
379
x, ldx, zero, rhs, ldc);
384
* Fills a double precision array with a given value.
387
dfill(double *a, int alen, double dval)
390
for (i = 0; i < alen; i++) a[i] = dval;
396
* Check the inf-norm of the error vector
398
void dinf_norm_error(int nrhs, SuperMatrix *X, double *xtrue)
402
double *Xmat, *soln_work;
406
Xmat = Xstore->nzval;
408
for (j = 0; j < nrhs; j++) {
409
soln_work = &Xmat[j*Xstore->lda];
411
for (i = 0; i < X->nrow; i++) {
412
err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
413
xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
416
printf("||X - Xtrue||/||X|| = %e\n", err);
422
/* Print performance of the code. */
424
dPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
425
double rpg, double rcond, double *ferr,
426
double *berr, char *equed)
430
extern SuperLUStat_t SuperLUStat;
434
utime = SuperLUStat.utime;
435
ops = SuperLUStat.ops;
437
if ( utime[FACT] != 0. )
438
printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
439
ops[FACT]*1e-6/utime[FACT]);
440
printf("Identify relaxed snodes = %8.2f\n", utime[RELAX]);
441
if ( utime[SOLVE] != 0. )
442
printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
443
ops[SOLVE]*1e-6/utime[SOLVE]);
445
Lstore = (SCformat *) L->Store;
446
Ustore = (NCformat *) U->Store;
447
printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
448
printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
449
printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
451
printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
452
mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
453
mem_usage->expansions);
455
printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
456
printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
457
utime[FACT], ops[FACT]*1e-6/utime[FACT],
458
utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
459
utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
461
printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
462
printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
463
rpg, rcond, ferr[0], berr[0], equed);
470
print_double_vec(char *what, int n, double *vec)
473
printf("%s: n %d\n", what, n);
474
for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]);