3
* -- SuperLU routine (version 3.0) --
4
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
5
* and Lawrence Berkeley National Lab.
12
#define NO_MEMTYPE 4 /* 0: lusup;
16
#define GluIntArray(n) (5 * (n) + 5)
18
/* Internal prototypes */
19
void *zexpand (int *, MemType,int, int, GlobalLU_t *);
20
int zLUWorkInit (int, int, int, int **, doublecomplex **, LU_space_t);
21
void copy_mem_doublecomplex (int, void *, void *);
22
void zStackCompress (GlobalLU_t *);
23
void zSetupSpace (void *, int, LU_space_t *);
24
void *zuser_malloc (int, int);
25
void zuser_free (int, int);
27
/* External prototypes (in memory.c - prec-indep) */
28
extern void copy_mem_int (int, void *, void *);
29
extern void user_bcopy (char *, char *, int);
31
/* Headers for 4 types of dynamatically managed memory */
32
typedef struct e_node {
33
int size; /* length of the memory that has been used */
34
void *mem; /* pointer to the new malloc'd store */
40
int top1; /* grow upward, relative to &array[0] */
41
int top2; /* grow downward */
45
/* Variables local to this file */
46
static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
47
static LU_stack_t stack;
50
/* Macros to manipulate stack */
51
#define StackFull(x) ( x + stack.used >= stack.size )
52
#define NotDoubleAlign(addr) ( (long int)addr & 7 )
53
#define DoubleAlign(addr) ( ((long int)addr + 7) & ~7L )
54
#define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
55
(w + 1) * m * sizeof(doublecomplex) )
56
#define Reduce(alpha) ((alpha + 1) / 2) /* i.e. (alpha-1)/2 + 1 */
62
* Setup the memory model to be used for factorization.
63
* lwork = 0: use system malloc;
64
* lwork > 0: use user-supplied work[] space.
66
void zSetupSpace(void *work, int lwork, LU_space_t *MemModel)
69
*MemModel = SYSTEM; /* malloc/free */
70
} else if ( lwork > 0 ) {
71
*MemModel = USER; /* user provided space */
74
stack.top2 = (lwork/4)*4; /* must be word addressable */
75
stack.size = stack.top2;
76
stack.array = (void *) work;
82
void *zuser_malloc(int bytes, int which_end)
86
if ( StackFull(bytes) ) return (NULL);
88
if ( which_end == HEAD ) {
89
buf = (char*) stack.array + stack.top1;
93
buf = (char*) stack.array + stack.top2;
101
void zuser_free(int bytes, int which_end)
103
if ( which_end == HEAD ) {
114
* mem_usage consists of the following fields:
116
* The amount of space used in bytes for the L\U data structures.
117
* - total_needed (float)
118
* The amount of space needed in bytes to perform factorization.
120
* Number of memory expansions during the LU factorization.
122
int zQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
126
register int n, iword, dword, panel_size = sp_ienv(1);
132
dword = sizeof(doublecomplex);
135
mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
136
dword + Lstore->rowind_colptr[n] * iword );
137
mem_usage->for_lu += (float)( (n + 1) * iword +
138
Ustore->colptr[n] * (dword + iword) );
140
/* Working storage to support factorization */
141
mem_usage->total_needed = mem_usage->for_lu +
142
(float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
143
(panel_size + 1) * n * dword );
145
mem_usage->expansions = --no_expand;
151
* Allocate storage for the data structures common to all factor routines.
152
* For those unpredictable size, make a guess as FILL * nnz(A).
154
* If lwork = -1, return the estimated amount of space required, plus n;
155
* otherwise, return the amount of space actually allocated when
156
* memory allocation failure occurred.
159
zLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
160
int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
161
int **iwork, doublecomplex **dwork)
163
int info, iword, dword;
168
doublecomplex *lusup;
172
int nzlmax, nzumax, nzlumax;
173
int FILL = sp_ienv(6);
178
dword = sizeof(doublecomplex);
181
expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
182
if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
184
if ( fact != SamePattern_SameRowPerm ) {
185
/* Guess for L\U factors */
186
nzumax = nzlumax = FILL * annz;
187
nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
190
return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
191
+ (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
193
zSetupSpace(work, lwork, &Glu->MemModel);
197
printf("zLUMemInit() called: annz %d, MemModel %d\n",
198
annz, Glu->MemModel);
201
/* Integer pointers for L\U factors */
202
if ( Glu->MemModel == SYSTEM ) {
203
xsup = intMalloc(n+1);
204
supno = intMalloc(n+1);
205
xlsub = intMalloc(n+1);
206
xlusup = intMalloc(n+1);
207
xusub = intMalloc(n+1);
209
xsup = (int *)zuser_malloc((n+1) * iword, HEAD);
210
supno = (int *)zuser_malloc((n+1) * iword, HEAD);
211
xlsub = (int *)zuser_malloc((n+1) * iword, HEAD);
212
xlusup = (int *)zuser_malloc((n+1) * iword, HEAD);
213
xusub = (int *)zuser_malloc((n+1) * iword, HEAD);
216
lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu );
217
ucol = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu );
218
lsub = (int *) zexpand( &nzlmax, LSUB, 0, 0, Glu );
219
usub = (int *) zexpand( &nzumax, USUB, 0, 1, Glu );
221
while ( !lusup || !ucol || !lsub || !usub ) {
222
if ( Glu->MemModel == SYSTEM ) {
228
zuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
233
if ( nzlumax < annz ) {
234
printf("Not enough memory to perform factorization.\n");
235
return (zmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
237
lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu );
238
ucol = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu );
239
lsub = (int *) zexpand( &nzlmax, LSUB, 0, 0, Glu );
240
usub = (int *) zexpand( &nzumax, USUB, 0, 1, Glu );
244
/* fact == SamePattern_SameRowPerm */
247
xsup = Lstore->sup_to_col;
248
supno = Lstore->col_to_sup;
249
xlsub = Lstore->rowind_colptr;
250
xlusup = Lstore->nzval_colptr;
251
xusub = Ustore->colptr;
252
nzlmax = Glu->nzlmax; /* max from previous factorization */
253
nzumax = Glu->nzumax;
254
nzlumax = Glu->nzlumax;
257
return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
258
+ (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
259
} else if ( lwork == 0 ) {
260
Glu->MemModel = SYSTEM;
262
Glu->MemModel = USER;
263
stack.top2 = (lwork/4)*4; /* must be word-addressable */
264
stack.size = stack.top2;
267
lsub = expanders[LSUB].mem = Lstore->rowind;
268
lusup = expanders[LUSUP].mem = Lstore->nzval;
269
usub = expanders[USUB].mem = Ustore->rowind;
270
ucol = expanders[UCOL].mem = Ustore->nzval;;
271
expanders[LSUB].size = nzlmax;
272
expanders[LUSUP].size = nzlumax;
273
expanders[USUB].size = nzumax;
274
expanders[UCOL].size = nzumax;
282
Glu->xlusup = xlusup;
286
Glu->nzlmax = nzlmax;
287
Glu->nzumax = nzumax;
288
Glu->nzlumax = nzlumax;
290
info = zLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
292
return ( info + zmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
299
/* Allocate known working storage. Returns 0 if success, otherwise
300
returns the number of bytes allocated so far when failure occurred. */
302
zLUWorkInit(int m, int n, int panel_size, int **iworkptr,
303
doublecomplex **dworkptr, LU_space_t MemModel)
305
int isize, dsize, extra;
306
doublecomplex *old_ptr;
307
int maxsuper = sp_ienv(3),
310
isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
311
dsize = (m * panel_size +
312
NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(doublecomplex);
314
if ( MemModel == SYSTEM )
315
*iworkptr = (int *) intCalloc(isize/sizeof(int));
317
*iworkptr = (int *) zuser_malloc(isize, TAIL);
319
fprintf(stderr, "zLUWorkInit: malloc fails for local iworkptr[]\n");
323
if ( MemModel == SYSTEM )
324
*dworkptr = (doublecomplex *) SUPERLU_MALLOC(dsize);
326
*dworkptr = (doublecomplex *) zuser_malloc(dsize, TAIL);
327
if ( NotDoubleAlign(*dworkptr) ) {
329
*dworkptr = (doublecomplex*) DoubleAlign(*dworkptr);
330
*dworkptr = (doublecomplex*) ((double*)*dworkptr - 1);
331
extra = (char*)old_ptr - (char*)*dworkptr;
333
printf("zLUWorkInit: not aligned, extra %d\n", extra);
340
fprintf(stderr, "malloc fails for local dworkptr[].");
341
return (isize + dsize + n);
349
* Set up pointers for real working arrays.
352
zSetRWork(int m, int panel_size, doublecomplex *dworkptr,
353
doublecomplex **dense, doublecomplex **tempv)
355
doublecomplex zero = {0.0, 0.0};
357
int maxsuper = sp_ienv(3),
360
*tempv = *dense + panel_size*m;
361
zfill (*dense, m * panel_size, zero);
362
zfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);
366
* Free the working storage used by factor routines.
368
void zLUWorkFree(int *iwork, doublecomplex *dwork, GlobalLU_t *Glu)
370
if ( Glu->MemModel == SYSTEM ) {
371
SUPERLU_FREE (iwork);
372
SUPERLU_FREE (dwork);
374
stack.used -= (stack.size - stack.top2);
375
stack.top2 = stack.size;
376
/* zStackCompress(Glu); */
379
SUPERLU_FREE (expanders);
383
/* Expand the data structures for L and U during the factorization.
384
* Return value: 0 - successful return
385
* > 0 - number of bytes allocated when run out of space
388
zLUMemXpand(int jcol,
389
int next, /* number of elements currently in the factors */
390
MemType mem_type, /* which type of memory to expand */
391
int *maxlen, /* modified - maximum length of a data structure */
392
GlobalLU_t *Glu /* modified - global LU data structures */
398
printf("zLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
399
jcol, next, *maxlen, mem_type);
402
if (mem_type == USUB)
403
new_mem = zexpand(maxlen, mem_type, next, 1, Glu);
405
new_mem = zexpand(maxlen, mem_type, next, 0, Glu);
408
int nzlmax = Glu->nzlmax;
409
int nzumax = Glu->nzumax;
410
int nzlumax = Glu->nzlumax;
411
fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
412
return (zmemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
415
switch ( mem_type ) {
417
Glu->lusup = (doublecomplex *) new_mem;
418
Glu->nzlumax = *maxlen;
421
Glu->ucol = (doublecomplex *) new_mem;
422
Glu->nzumax = *maxlen;
425
Glu->lsub = (int *) new_mem;
426
Glu->nzlmax = *maxlen;
429
Glu->usub = (int *) new_mem;
430
Glu->nzumax = *maxlen;
441
copy_mem_doublecomplex(int howmany, void *old, void *new)
444
doublecomplex *dold = old;
445
doublecomplex *dnew = new;
446
for (i = 0; i < howmany; i++) dnew[i] = dold[i];
450
* Expand the existing storage to accommodate more fill-ins.
454
int *prev_len, /* length used from previous call */
455
MemType type, /* which part of the memory to expand */
456
int len_to_copy, /* size of the memory to be copied to new store */
457
int keep_prev, /* = 1: use prev_len;
458
= 0: compute new_len to expand */
459
GlobalLU_t *Glu /* modified - global LU data structures */
464
void *new_mem, *old_mem;
465
int new_len, tries, lword, extra, bytes_to_copy;
469
if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
472
new_len = alpha * *prev_len;
475
if ( type == LSUB || type == USUB ) lword = sizeof(int);
476
else lword = sizeof(doublecomplex);
478
if ( Glu->MemModel == SYSTEM ) {
479
new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
480
/* new_mem = (void *) calloc(new_len, lword); */
481
if ( no_expand != 0 ) {
484
if ( !new_mem ) return (NULL);
487
if ( ++tries > 10 ) return (NULL);
488
alpha = Reduce(alpha);
489
new_len = alpha * *prev_len;
490
new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
491
/* new_mem = (void *) calloc(new_len, lword); */
494
if ( type == LSUB || type == USUB ) {
495
copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
497
copy_mem_doublecomplex(len_to_copy, expanders[type].mem, new_mem);
499
SUPERLU_FREE (expanders[type].mem);
501
expanders[type].mem = (void *) new_mem;
503
} else { /* MemModel == USER */
504
if ( no_expand == 0 ) {
505
new_mem = zuser_malloc(new_len * lword, HEAD);
506
if ( NotDoubleAlign(new_mem) &&
507
(type == LUSUP || type == UCOL) ) {
509
new_mem = (void *)DoubleAlign(new_mem);
510
extra = (char*)new_mem - (char*)old_mem;
512
printf("expand(): not aligned, extra %d\n", extra);
517
expanders[type].mem = (void *) new_mem;
521
extra = (new_len - *prev_len) * lword;
523
if ( StackFull(extra) ) return (NULL);
525
while ( StackFull(extra) ) {
526
if ( ++tries > 10 ) return (NULL);
527
alpha = Reduce(alpha);
528
new_len = alpha * *prev_len;
529
extra = (new_len - *prev_len) * lword;
533
if ( type != USUB ) {
534
new_mem = (void*)((char*)expanders[type + 1].mem + extra);
535
bytes_to_copy = (char*)stack.array + stack.top1
536
- (char*)expanders[type + 1].mem;
537
user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
540
Glu->usub = expanders[USUB].mem =
541
(void*)((char*)expanders[USUB].mem + extra);
544
Glu->lsub = expanders[LSUB].mem =
545
(void*)((char*)expanders[LSUB].mem + extra);
548
Glu->ucol = expanders[UCOL].mem =
549
(void*)((char*)expanders[UCOL].mem + extra);
553
if ( type == UCOL ) {
554
stack.top1 += extra; /* Add same amount for USUB */
563
expanders[type].size = new_len;
565
if ( no_expand ) ++no_expand;
567
return (void *) expanders[type].mem;
573
* Compress the work[] array to remove fragmentation.
576
zStackCompress(GlobalLU_t *Glu)
578
register int iword, dword, ndim;
579
char *last, *fragment;
581
doublecomplex *dfrom, *dto;
582
int *xlsub, *lsub, *xusub, *usub, *xlusup;
583
doublecomplex *ucol, *lusup;
586
dword = sizeof(doublecomplex);
593
xlusup = Glu->xlusup;
598
dto = (doublecomplex *)((char*)lusup + xlusup[ndim] * dword);
599
copy_mem_doublecomplex(xusub[ndim], dfrom, dto);
603
ito = (int *) ((char*)ucol + xusub[ndim] * iword);
604
copy_mem_int(xlsub[ndim], ifrom, ito);
608
ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
609
copy_mem_int(xusub[ndim], ifrom, ito);
612
last = (char*)usub + xusub[ndim] * iword;
613
fragment = (char*) (((char*)stack.array + stack.top1) - last);
614
stack.used -= (long int) fragment;
615
stack.top1 -= (long int) fragment;
622
printf("zStackCompress: fragment %d\n", fragment);
623
/* for (last = 0; last < ndim; ++last)
624
print_lu_col("After compress:", last, 0);*/
630
* Allocate storage for original matrix A
633
zallocateA(int n, int nnz, doublecomplex **a, int **asub, int **xa)
635
*a = (doublecomplex *) doublecomplexMalloc(nnz);
636
*asub = (int *) intMalloc(nnz);
637
*xa = (int *) intMalloc(n+1);
641
doublecomplex *doublecomplexMalloc(int n)
644
buf = (doublecomplex *) SUPERLU_MALLOC(n * sizeof(doublecomplex));
646
ABORT("SUPERLU_MALLOC failed for buf in doublecomplexMalloc()\n");
651
doublecomplex *doublecomplexCalloc(int n)
655
doublecomplex zero = {0.0, 0.0};
656
buf = (doublecomplex *) SUPERLU_MALLOC(n * sizeof(doublecomplex));
658
ABORT("SUPERLU_MALLOC failed for buf in doublecomplexCalloc()\n");
660
for (i = 0; i < n; ++i) buf[i] = zero;
665
int zmemory_usage(const int nzlmax, const int nzumax,
666
const int nzlumax, const int n)
668
register int iword, dword;
671
dword = sizeof(doublecomplex);
673
return (10 * n * iword +
674
nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);