1
/****************************************************************************
3
* date: Tue Oct 3 12:31:59 PDT 1995
4
* author: Jarek Nieplocha
5
* purpose: This program demonstrates interface between GA and MPI.
6
* For a given square matrix, it creates a vector that contains maximum
7
* elements for each matrix row. MPI group communication is used.
9
* notes: The program can run in two modes:
10
* 1. Using TCGMSG calls available through the TCGMSG-MPI library
11
* and MPI. In this mode initialization must be done with
12
* the TCGMSG PBEGIN call.
13
* 2. Using MPI calls only -- preprocessor symbol MPI must be defined.
15
****************************************************************************/
27
#define N 100 /* dimension of matrices */
32
int ONE=1, ZERO=0; /* useful constants */
34
int n=N, ndim=2,type=MT_F_DBL,dims[2]={N,N},coord[2];
35
int me=GA_Nodeid(), nproc=GA_Nnodes();
39
/* Note: on all current platforms DoublePrecision = double */
40
DoublePrecision buf[N], *max_row;
43
int ilo,ihi, jlo,jhi, ld, prow, pcol;
44
int root=0, grp_me=-1;
46
if(me==0)printf("Creating matrix A\n");
48
g_a = NGA_Create(type, ndim, dims, "A", NULL);
49
if(!g_a) GA_Error("create failed: A",n);
50
if(me==0)printf("OK\n");
52
if(me==0)printf("Creating matrix B\n");
54
g_b = NGA_Create(type, 1, dims, "B", NULL);
55
if(!g_b) GA_Error("create failed: B",n);
56
if(me==0)printf("OK\n");
58
GA_Zero(g_a); /* zero the matrix */
60
if(me==0)printf("Initializing matrix A\n");
61
/* fill in matrix A with values: A(i,j) = (i+j) */
62
for(row=me; row<n; row+= nproc){
64
* simple load balancing:
65
* each process works on a different row in MIMD style
67
for(i=0; i<n; i++) buf[i]=(DoublePrecision)(i+row+1);
69
lo[1]=ZERO; hi[1]=n-1;
70
NGA_Put(g_a, lo, hi, buf, &n);
74
NGA_Distribution(g_a, me, lo, hi);
80
max_row=(DoublePrecision*)malloc(sizeof(DoublePrecision)*(ihi-ilo+1));
81
if (!max_row) GA_Error("malloc 3 failed",(ihi-ilo+1));
82
for (i=0; i<(ihi-ilo+1); i++) {
86
NGA_Proc_topology(g_a, me, coord); /* block coordinates */
90
if(me==0)printf("Splitting comm according to distribution of A\n");
92
/* GA on SP1 requires synchronization before & after message-passing !!*/
95
if(me==0)printf("Computing max row elements\n");
96
/* create communicator for processes that 'own' A[:,jlo:jhi] */
97
MPI_Barrier(MPI_COMM_WORLD);
98
if(pcol < 0 || prow <0)
99
MPI_Comm_split(MPI_COMM_WORLD,MPI_UNDEFINED,MPI_UNDEFINED, &ROW_COMM);
101
MPI_Comm_split(MPI_COMM_WORLD, (int)pcol, (int)prow, &ROW_COMM);
103
if(ROW_COMM != MPI_COMM_NULL){
105
MPI_Comm_rank(ROW_COMM, &grp_me);
107
/* each process computes max elements in the block it 'owns' */
108
lo[0]=ilo; hi[0]=ihi;
109
lo[1]=jlo; hi[1]=jhi;
110
NGA_Access(g_a, lo, hi, &ptr, &ld);
111
for(i=0; i<ihi-ilo+1; i++){
112
for(j=0; j<jhi-jlo+1; j++)
113
if(max_row[i] < ptr[i*ld + j]){
114
max_row[i] = ptr[i*ld + j];
117
MPI_Reduce(max_row, buf, ihi-ilo+1, MPI_DOUBLE, MPI_MAX,
120
}else fprintf(stderr,"process %d not participating\n",me);
123
/* processes with rank=root in ROW_COMM put results into g_b */
126
lo[0]=ilo; hi[0]=ihi;
127
NGA_Put(g_b, lo, hi, buf, &ld);
132
if(me==0)printf("Checking the result\n");
134
lo[0]=ZERO; hi[0]=n-1;
135
NGA_Get(g_b, lo, hi, buf, &n);
136
for(i=0; i< n; i++)if(buf[i] != (double)n+i){
137
fprintf(stderr,"error:%d max=%lf should be:%ld\n",i,buf[i],n+i);
138
GA_Error("terminating...",0);
142
if(me==0)printf("OK\n");
154
int heap=20000, stack=20000;
159
int desired = MPI_THREAD_MULTIPLE;
161
MPI_Init_thread(&argc, &argv, desired, &provided);
162
if ( provided != MPI_THREAD_MULTIPLE ) printf("provided != MPI_THREAD_MULTIPLE\n");
164
MPI_Init (&argc, &argv); /* initialize MPI */
167
PBEGIN_(argc, argv); /* initialize TCGMSG */
170
GA_Initialize(); /* initialize GA */
173
if(me==0) printf("Using %ld processes\n",(long)nproc);
177
if(! MA_init((int)MT_F_DBL, stack, heap))
178
ga_error("MA_init failed",stack+heap); /* initialize memory allocator*/
181
if(me==0)printf("Terminating ..\n");