19
#define BLOCK_SIZE 500
20
/*#define USE_SCALAPACK_DISTR*/
24
static int nprocs, me;
26
static void init_array(double *a, int n)
29
double max=0.0; /* for pivoting */
35
a[i*n+j] = (double)rand()/RAND_MAX; /* (i*n+j+1); */
36
if(max<a[i*n+j]) max = a[i*n+j];
40
/* Forcing the diagonal is always max to avoid pivoting*/
43
a[i*n+i] = max + (double)i;
47
void copy_array(double *a, double *b, int n)
60
void print_array(double *a, int n)
64
printf("Print Matrix of size %d x %d:\n", n, n);
69
if(a[i*n+j]<0) printf("%2.4e ", a[i*n+j]);
70
else printf(" %2.4e ", a[i*n+j]);
77
/* print U, i.e. upper triangular */
78
void print_U(double *a, int n)
82
printf("Print Upper triangular matrix of size %d x %d:\n", n, n);
87
printf(" %2.4e ", 0.0);
92
if(a[i*n+j]<0) printf("%2.4e ", a[i*n+j]);
93
else printf(" %2.4e ", a[i*n+j]);
101
/* print L, i.e. unit lower triangular */
102
void print_L(double *a, int n)
106
printf("Print Unit Lower triangular matrix of size %d x %d:\n", n, n);
111
if(a[i*n+j]<0) printf("%2.4e ", a[i*n+j]);
112
else printf(" %2.4e ", a[i*n+j]);
116
if(i==j) printf(" %2.4e ", 1.0);
117
else printf(" %2.4e ", 0.0);
125
void lu_basic(double *a, int n) {
129
for (i=0; i<n-1; i++)
131
/* scale lower matrix column */
132
for (j=i+1; j<n; j++)
134
a[j*n+i] = a[j*n+i]/a[i*n+i]; /* a[j][i] = a[j][i]/a[i][i] */
137
/* rank-one update of matrix */
138
for (j=i+1; j<n; j++)
140
for (k=i+1; k<n; k++)
142
/*a[k][j] = a[k][j] - a[i][j]/a[k][i] */
143
a[k*n+j] = a[k*n+j] - a[i*n+j]/a[k*n+i];
149
int lu_lapack(double *a, int n)
153
BlasInt *ipiv=NULL, info;
154
BlasInt ld=(BlasInt)n;
155
BlasInt N=(BlasInt)n;
157
aa = (double*)malloc(n*n*sizeof(double));
158
ipiv = (BlasInt*)malloc(n*sizeof(BlasInt));
160
/* row-major to column-major (NOTE: dgetrf_ is a fortran function) */
165
aa[j*n+i] = a[i*n+j];
169
LAPACK_DGETRF(&N, &N, aa, &ld, ipiv, &info); /* LAPACK's LU */
171
/* column-major to row-major */
176
a[i*n+j] = aa[j*n+i];
181
printf("ipiv[] = [ ");
182
for(i=0; i<n; i++) printf("%ld ", ipiv[i]);
190
void lu(double *A, int matrix_size)
192
double *a=NULL, *a_verify=NULL;
195
a = (double*)malloc(matrix_size*matrix_size*sizeof(double));
196
a_verify = (double*)malloc(matrix_size*matrix_size*sizeof(double));
197
if(a == NULL || a_verify == NULL)
199
GA_Error("lu(): malloc failed", matrix_size*matrix_size*sizeof(double));
202
copy_array(A, a, matrix_size);
203
copy_array(A, a_verify, matrix_size);
206
printf("\nDoing LU Factorization\n\n");
207
lu_basic(a, matrix_size);
209
print_array(a, matrix_size);
211
printf("\nDoing LAPACK's LU Factorization\n\n");
213
info = lu_lapack(a_verify, matrix_size);
217
print_array(a_verify, matrix_size);
222
printf("\nError: dgetrf() of lapack is NOT successful (INFO=%d)\n\n",
224
printf("NOTE:\n INFO=0: successful exit\n INFO<0: if INFO = -i, the i-th argument had an illegal value\n INFO>0: if INFO = i, U(i,i) is exactly zero. The factorization\n has been completed, but the factor U is exactly singular, and\n division by zero will occur if it is used to solve a system of\n equations.\n");
235
void dtrsm_lapack(double *a, double *b, int n,
236
char side, char uplo, char transa, char diag)
241
BlasInt ld = (BlasInt)n;
242
BlasInt N = (BlasInt)n;
244
aa = (double*)malloc(n*n*sizeof(double));
245
bb = (double*)malloc(n*n*sizeof(double));
247
/* row-major to column-major (NOTE: dgetrf_ is a fortran function) */
252
aa[j*n+i] = a[i*n+j];
253
bb[j*n+i] = b[i*n+j];
260
LAPACK_DTRSM(&side, &uplo, &transa, &diag, &N, &N, &alpha, aa, &ld, bb, &ld);
263
/* column-major to row-major */
268
a[i*n+j] = aa[j*n+i];
269
b[i*n+j] = bb[j*n+i];
277
void transpose(double *a, int n)
282
aa = (double*)malloc(n*n*sizeof(double));
283
copy_array(a, aa, n);
285
/* row-major to column-major (NOTE: dgetrf_ is a fortran function) */
290
a[j*n+i] = aa[i*n+j];
296
/* input is matrix size */
297
void ga_lu(double *A, int matrix_size)
299
int g_a, g_b, dims[2], type=C_DBL;
300
int lo[2], hi[2], ld;
302
#ifdef USE_SCALAPACK_DISTR
307
/* create a 2-d GA (global matrix) */
308
dims[0] = matrix_size;
309
dims[1] = matrix_size;
310
block_size[0] = BLOCK_SIZE;
311
block_size[1] = BLOCK_SIZE;
312
#ifdef USE_SCALAPACK_DISTR
315
proc_grid[1] = nprocs/2;
316
if(nprocs%2) GA_Error("For ScaLAPACK stle distribution, nprocs must be "
317
" divisible by 2", 0);
322
g_a = NGA_Create(type, 2, dims, "A", NULL);
323
g_b = GA_Duplicate(g_a, "transposed array B");
325
g_a = GA_Create_handle();
326
GA_Set_data(g_a, 2, dims, type);
327
GA_Set_array_name(g_a,"A");
328
# ifdef USE_SCALAPACK_DISTR
329
GA_Set_block_cyclic_proc_grid(g_a, block_size, proc_grid);
331
GA_Set_block_cyclic(g_a, block_size);
335
g_b = GA_Create_handle();
336
GA_Set_data(g_b, 2, dims, type);
337
GA_Set_array_name(g_b,"B");
338
# ifdef USE_SCALAPACK_DISTR
339
GA_Set_block_cyclic_proc_grid(g_b, block_size, proc_grid);
341
GA_Set_block_cyclic(g_b, block_size);
347
/* copy the local matrix into GA */
351
hi[0] = matrix_size - 1;
353
hi[1] = matrix_size - 1;
356
NGA_Put(g_a, lo, hi, A, &ld);
360
GA_Transpose(g_a, g_b);
362
/* The following function does not exist. Not sure what to replace it
363
* with. GA_Lu_solve(char trans, int g_a, int g_b) requiresa an
365
/* GA_Lu('n', g_b); */
366
time = MP_TIMER() - time;
368
/* 2/3 N^3 - 1/2 N^2 flops for LU and 2*N^2 for solver */
369
gflops = ( (((double)matrix_size) * matrix_size)/(time*1.0e+9) *
370
(2.0/3.0 * (double)matrix_size - 0.5) );
371
if(me==0) printf("\nGA_Lu: N=%d flops=%2.5e Gflops, time=%2.5e secs\n\n",
372
matrix_size, gflops, time);
378
/* if(me==0) lu(A, matrix_size); */
385
int main(int argc, char **argv) {
387
int heap=20000000, stack=200000000;
396
matrix_size = atoi(argv[1]);
400
printf("Usage Error\n\t Usage: <program> [matrix_size]\n");
406
printf("Error: matrix size (%d) should be > 0\n",
408
GA_Error("matrix size should be >0", 1);
411
/* *****************************************************************
412
* Initialize MPI/TCGMSG-MPI, GA and MA
413
* *****************************************************************/
416
GA_INIT(argc,argv); /* initialize GA */
419
nprocs = GA_Nnodes();
424
if(! MA_init(MT_F_DBL, stack, heap)) /* initialize MA */
426
GA_Error("MA_init failed",stack+heap);
429
/* create/initialize the matrix */
430
if((A = (double*)malloc(matrix_size*matrix_size*sizeof(double))) == NULL)
432
GA_Error("malloc failed", matrix_size*matrix_size*sizeof(double));
435
for(i=0; i<2; i++) /* 5 runs */
437
init_array(A, matrix_size);
439
if(me==0) print_array(A, matrix_size);
441
/* *****************************************************************
442
* Perform LU Factorization
443
* *****************************************************************/
444
ga_lu(A, matrix_size);
449
/* *****************************************************************
450
* Terminate MPI/TCGMSG-MPI, GA and MA
451
* *****************************************************************/
452
if(me==0)printf("Success\n");
462
* - LU for non-square matrix