5
/****************************************************************************
7
* date: Tue Oct 3 12:31:59 PDT 1995
8
* author: Jarek Nieplocha
9
* purpose: This program demonstrates interface between GA and MPI.
10
* For a given square matrix, it creates a vector that contains maximum
11
* elements for each matrix row. MPI group communication is used.
13
* notes: The program can run in two modes:
14
* 1. Using TCGMSG calls available through the TCGMSG-MPI library
15
* and MPI. In this mode initialization must be done with
16
* the TCGMSG PBEGIN call.
17
* 2. Using MPI calls only -- preprocessor symbol MPI must be defined.
19
****************************************************************************/
36
#define N 100 /* dimension of matrices */
41
int ZERO=0; /* useful constants */
43
int n=N, ndim=2,type=MT_F_DBL,dims[2]={N,N},coord[2];
44
int me=GA_Nodeid(), nproc=GA_Nnodes();
48
/* Note: on all current platforms DoublePrecision = double */
49
DoublePrecision buf[N], *max_row=NULL;
52
int ilo,ihi, jlo,jhi, ld, prow, pcol;
53
int root=0, grp_me=-1;
55
if(me==0)printf("Creating matrix A\n");
57
g_a = NGA_Create(type, ndim, dims, "A", NULL);
58
if(!g_a) GA_Error("create failed: A",n);
59
if(me==0)printf("OK\n");
61
if(me==0)printf("Creating matrix B\n");
63
g_b = NGA_Create(type, 1, dims, "B", NULL);
64
if(!g_b) GA_Error("create failed: B",n);
65
if(me==0)printf("OK\n");
67
GA_Zero(g_a); /* zero the matrix */
69
if(me==0)printf("Initializing matrix A\n");
70
/* fill in matrix A with values: A(i,j) = (i+j) */
71
for(row=me; row<n; row+= nproc){
73
* simple load balancing:
74
* each process works on a different row in MIMD style
76
for(i=0; i<n; i++) buf[i]=(DoublePrecision)(i+row+1);
78
lo[1]=ZERO; hi[1]=n-1;
79
NGA_Put(g_a, lo, hi, buf, &n);
83
NGA_Distribution(g_a, me, lo, hi);
89
max_row=(DoublePrecision*)malloc(sizeof(DoublePrecision)*(ihi-ilo+1));
90
if (!max_row) GA_Error("malloc 3 failed",(ihi-ilo+1));
91
for (i=0; i<(ihi-ilo+1); i++) {
95
NGA_Proc_topology(g_a, me, coord); /* block coordinates */
99
if(me==0)printf("Splitting comm according to distribution of A\n");
101
/* GA on SP1 requires synchronization before & after message-passing !!*/
104
if(me==0)printf("Computing max row elements\n");
105
/* create communicator for processes that 'own' A[:,jlo:jhi] */
106
MPI_Barrier(MPI_COMM_WORLD);
107
if(pcol < 0 || prow <0)
108
MPI_Comm_split(MPI_COMM_WORLD,MPI_UNDEFINED,MPI_UNDEFINED, &ROW_COMM);
110
MPI_Comm_split(MPI_COMM_WORLD, (int)pcol, (int)prow, &ROW_COMM);
112
if(ROW_COMM != MPI_COMM_NULL){
114
MPI_Comm_rank(ROW_COMM, &grp_me);
116
/* each process computes max elements in the block it 'owns' */
117
lo[0]=ilo; hi[0]=ihi;
118
lo[1]=jlo; hi[1]=jhi;
119
NGA_Access(g_a, lo, hi, &ptr, &ld);
120
for(i=0; i<ihi-ilo+1; i++){
121
for(j=0; j<jhi-jlo+1; j++)
122
if(max_row[i] < ptr[i*ld + j]){
123
max_row[i] = ptr[i*ld + j];
126
MPI_Reduce(max_row, buf, ihi-ilo+1, MPI_DOUBLE, MPI_MAX,
129
}else fprintf(stderr,"process %d not participating\n",me);
132
/* processes with rank=root in ROW_COMM put results into g_b */
135
lo[0]=ilo; hi[0]=ihi;
136
NGA_Put(g_b, lo, hi, buf, &ld);
141
if(me==0)printf("Checking the result\n");
143
lo[0]=ZERO; hi[0]=n-1;
144
NGA_Get(g_b, lo, hi, buf, &n);
145
for(i=0; i< n; i++)if(buf[i] != (double)n+i){
146
fprintf(stderr,"error:%d max=%f should be:%d\n",i,buf[i],n+i);
147
GA_Error("terminating...",1);
151
if(me==0)printf("OK\n");
163
int heap=20000, stack=20000;
167
GA_INIT(argc,argv); /* initialize GA */
170
if(me==0) printf("Using %ld processes\n",(long)nproc);
174
if(! MA_init((int)MT_F_DBL, stack, heap))
175
GA_Error("MA_init failed",stack+heap); /* initialize memory allocator*/
178
if(me==0)printf("Terminating ..\n");