32
34
static int verbose = 0;
34
void RunAccFence( MPI_Win win, int destRank, int cnt, int sz );
35
void RunAccLock( MPI_Win win, int destRank, int cnt, int sz );
36
void RunPutFence( MPI_Win win, int destRank, int cnt, int sz );
37
void RunPutLock( MPI_Win win, int destRank, int cnt, int sz );
38
void RunAccPSCW( MPI_Win win, int destRank, int cnt, int sz,
39
MPI_Group exposureGroup, MPI_Group accessGroup );
40
void RunPutPSCW( MPI_Win win, int destRank, int cnt, int sz,
41
MPI_Group exposureGroup, MPI_Group accessGroup );
36
void RunAccFence(MPI_Win win, int destRank, int cnt, int sz);
37
void RunAccLock(MPI_Win win, int destRank, int cnt, int sz);
38
void RunPutFence(MPI_Win win, int destRank, int cnt, int sz);
39
void RunPutLock(MPI_Win win, int destRank, int cnt, int sz);
40
void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz,
41
MPI_Group exposureGroup, MPI_Group accessGroup);
42
void RunPutPSCW(MPI_Win win, int destRank, int cnt, int sz,
43
MPI_Group exposureGroup, MPI_Group accessGroup);
43
int main( int argc, char *argv[] )
45
int main(int argc, char *argv[])
45
int arraysize, i, cnt, sz, maxCount=MAX_COUNT, *arraybuffer;
47
int arraysize, i, cnt, sz, maxCount = MAX_COUNT, *arraybuffer;
46
48
int wrank, wsize, destRank, srcRank;
48
50
MPI_Group wgroup, accessGroup, exposureGroup;
49
int maxSz = MAX_RMA_SIZE;
51
MPI_Init( &argc, &argv );
53
for (i=1; i<argc; i++) {
54
if (strcmp( argv[i], "-put" ) == 0) {
55
if (rmaChoice == RMA_ALL) rmaChoice = RMA_NONE;
58
else if (strcmp( argv[i], "-acc" ) == 0) {
59
if (rmaChoice == RMA_ALL) rmaChoice = RMA_NONE;
62
else if (strcmp( argv[i], "-fence" ) == 0) {
63
if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
64
syncChoice |= SYNC_FENCE;
66
else if (strcmp( argv[i], "-lock" ) == 0) {
67
if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
68
syncChoice |= SYNC_LOCK;
70
else if (strcmp( argv[i], "-pscw" ) == 0) {
71
if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
72
syncChoice |= SYNC_PSCW;
74
else if (strcmp( argv[i], "-maxsz" ) == 0) {
76
maxSz = atoi( argv[i] );
78
else if (strcmp( argv[i], "-maxcount" ) == 0) {
80
maxCount = atoi( argv[i] );
83
fprintf( stderr, "Unrecognized argument %s\n", argv[i] );
84
fprintf( stderr, "%s [ -put ] [ -acc ] [ -lock ] [ -fence ] [ -pscw ] [ -maxsz msgsize ]\n", argv[0] );
85
MPI_Abort( MPI_COMM_WORLD, 1 );
51
int maxSz = MAX_RMA_SIZE;
54
MPI_Init(&argc, &argv);
56
for (i = 1; i < argc; i++) {
57
if (strcmp(argv[i], "-put") == 0) {
58
if (rmaChoice == RMA_ALL)
62
else if (strcmp(argv[i], "-acc") == 0) {
63
if (rmaChoice == RMA_ALL)
67
else if (strcmp(argv[i], "-fence") == 0) {
68
if (syncChoice == SYNC_ALL)
69
syncChoice = SYNC_NONE;
70
syncChoice |= SYNC_FENCE;
72
else if (strcmp(argv[i], "-lock") == 0) {
73
if (syncChoice == SYNC_ALL)
74
syncChoice = SYNC_NONE;
75
syncChoice |= SYNC_LOCK;
77
else if (strcmp(argv[i], "-pscw") == 0) {
78
if (syncChoice == SYNC_ALL)
79
syncChoice = SYNC_NONE;
80
syncChoice |= SYNC_PSCW;
82
else if (strcmp(argv[i], "-maxsz") == 0) {
84
maxSz = atoi(argv[i]);
86
else if (strcmp(argv[i], "-maxcount") == 0) {
88
maxCount = atoi(argv[i]);
91
fprintf(stderr, "Unrecognized argument %s\n", argv[i]);
93
"%s [ -put ] [ -acc ] [ -lock ] [ -fence ] [ -pscw ] [ -maxsz msgsize ]\n",
95
MPI_Abort(MPI_COMM_WORLD, 1);
89
MPI_Comm_rank( MPI_COMM_WORLD, &wrank );
90
MPI_Comm_size( MPI_COMM_WORLD, &wsize );
99
MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
100
MPI_Comm_size(MPI_COMM_WORLD, &wsize);
91
101
destRank = wrank + 1;
92
while (destRank >= wsize) destRank = destRank - wsize;
102
while (destRank >= wsize)
103
destRank = destRank - wsize;
93
104
srcRank = wrank - 1;
94
if (srcRank < 0) srcRank += wsize;
96
108
/* Create groups for PSCW */
97
MPI_Comm_group( MPI_COMM_WORLD, &wgroup );
98
MPI_Group_incl( wgroup, 1, &destRank, &accessGroup );
99
MPI_Group_incl( wgroup, 1, &srcRank, &exposureGroup );
100
MPI_Group_free( &wgroup );
109
MPI_Comm_group(MPI_COMM_WORLD, &wgroup);
110
MPI_Group_incl(wgroup, 1, &destRank, &accessGroup);
111
MPI_Group_incl(wgroup, 1, &srcRank, &exposureGroup);
112
MPI_Group_free(&wgroup);
102
114
arraysize = maxSz * MAX_COUNT;
103
arraybuffer = (int*)malloc( arraysize * sizeof(int) );
115
arraybuffer = (int *) malloc(arraysize * sizeof(int));
104
116
if (!arraybuffer) {
105
fprintf( stderr, "Unable to allocate %d words\n", arraysize );
106
MPI_Abort( MPI_COMM_WORLD, 1 );
117
fprintf(stderr, "Unable to allocate %d words\n", arraysize);
118
MPI_Abort(MPI_COMM_WORLD, 1);
109
MPI_Win_create( arraybuffer, arraysize*sizeof(int), (int)sizeof(int),
110
MPI_INFO_NULL, MPI_COMM_WORLD, &win );
121
MPI_Win_create(arraybuffer, arraysize * sizeof(int), (int) sizeof(int),
122
MPI_INFO_NULL, MPI_COMM_WORLD, &win);
112
124
if (maxCount > MAX_COUNT) {
113
fprintf( stderr, "MaxCount must not exceed %d\n", MAX_COUNT );
114
MPI_Abort( MPI_COMM_WORLD, 1 );
125
fprintf(stderr, "MaxCount must not exceed %d\n", MAX_COUNT);
126
MPI_Abort(MPI_COMM_WORLD, 1);
117
129
if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_ACC)) {
118
for (sz=1; sz<=maxSz; sz = sz + sz) {
119
if (wrank == 0 && verbose)
120
printf( "Accumulate with fence, %d elements\n", sz );
122
while (cnt <= maxCount) {
123
RunAccFence( win, destRank, cnt, sz );
130
for (sz = 1; sz <= maxSz; sz = sz + sz) {
131
if (wrank == 0 && verbose)
132
printf("Accumulate with fence, %d elements\n", sz);
133
for (cnt = 1; cnt <= maxCount; cnt *= 2) {
135
RunAccFence(win, destRank, cnt, sz);
137
if (end - start > MAX_ITER_TIME)
129
143
if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_ACC)) {
130
for (sz=1; sz<=maxSz; sz = sz + sz) {
131
if (wrank == 0 && verbose)
132
printf( "Accumulate with lock, %d elements\n", sz );
134
while (cnt <= maxCount) {
135
RunAccLock( win, destRank, cnt, sz );
144
for (sz = 1; sz <= maxSz; sz = sz + sz) {
145
if (wrank == 0 && verbose)
146
printf("Accumulate with lock, %d elements\n", sz);
147
for (cnt = 1; cnt <= maxCount; cnt *= 2) {
149
RunAccLock(win, destRank, cnt, sz);
151
if (end - start > MAX_ITER_TIME)
141
157
if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_PUT)) {
142
for (sz=1; sz<=maxSz; sz = sz + sz) {
143
if (wrank == 0 && verbose)
144
printf( "Put with fence, %d elements\n", sz );
146
while (cnt <= maxCount) {
147
RunPutFence( win, destRank, cnt, sz );
158
for (sz = 1; sz <= maxSz; sz = sz + sz) {
159
if (wrank == 0 && verbose)
160
printf("Put with fence, %d elements\n", sz);
161
for (cnt = 1; cnt <= maxCount; cnt *= 2) {
163
RunPutFence(win, destRank, cnt, sz);
165
if (end - start > MAX_ITER_TIME)
153
171
if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_PUT)) {
154
for (sz=1; sz<=maxSz; sz = sz + sz) {
155
if (wrank == 0 && verbose)
156
printf( "Put with lock, %d elements\n", sz );
158
while (cnt <= maxCount) {
159
RunPutLock( win, destRank, cnt, sz );
172
for (sz = 1; sz <= maxSz; sz = sz + sz) {
173
if (wrank == 0 && verbose)
174
printf("Put with lock, %d elements\n", sz);
175
for (cnt = 1; cnt <= maxCount; cnt *= 2) {
177
RunPutLock(win, destRank, cnt, sz);
179
if (end - start > MAX_ITER_TIME)
165
185
if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_PUT)) {
166
for (sz=1; sz<=maxSz; sz = sz + sz) {
167
if (wrank == 0 && verbose)
168
printf( "Put with pscw, %d elements\n", sz );
170
while (cnt <= maxCount) {
171
RunPutPSCW( win, destRank, cnt, sz,
172
exposureGroup, accessGroup );
186
for (sz = 1; sz <= maxSz; sz = sz + sz) {
187
if (wrank == 0 && verbose)
188
printf("Put with pscw, %d elements\n", sz);
189
for (cnt = 1; cnt <= maxCount; cnt *= 2) {
191
RunPutPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup);
193
if (end - start > MAX_ITER_TIME)
178
199
if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_ACC)) {
179
for (sz=1; sz<=maxSz; sz = sz + sz) {
180
if (wrank == 0 && verbose)
181
printf( "Accumulate with pscw, %d elements\n", sz );
183
while (cnt <= maxCount) {
184
RunAccPSCW( win, destRank, cnt, sz,
185
exposureGroup, accessGroup );
200
for (sz = 1; sz <= maxSz; sz = sz + sz) {
201
if (wrank == 0 && verbose)
202
printf("Accumulate with pscw, %d elements\n", sz);
203
for (cnt = 1; cnt <= maxCount; cnt *= 2) {
205
RunAccPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup);
207
if (end - start > MAX_ITER_TIME)
191
MPI_Win_free( &win );
193
MPI_Group_free( &accessGroup );
194
MPI_Group_free( &exposureGroup );
215
MPI_Group_free(&accessGroup);
216
MPI_Group_free(&exposureGroup);
196
218
/* If we get here without timing out or failing, we succeeded */
197
if (wrank == 0) printf( " No Errors\n" );
220
printf(" No Errors\n");
204
void RunAccFence( MPI_Win win, int destRank, int cnt, int sz )
206
int k, i, j, one = 1;
208
for (k=0; k<MAX_RUNS; k++) {
209
MPI_Barrier( MPI_COMM_WORLD );
210
MPI_Win_fence( 0, win );
212
for (i=0; i<cnt; i++) {
213
MPI_Accumulate( &one, sz, MPI_INT, destRank,
214
j, sz, MPI_INT, MPI_SUM, win );
217
MPI_Win_fence( 0, win );
221
void RunAccLock( MPI_Win win, int destRank, int cnt, int sz )
223
int k, i, j, one = 1;
225
for (k=0; k<MAX_RUNS; k++) {
226
MPI_Barrier( MPI_COMM_WORLD );
227
MPI_Win_lock( MPI_LOCK_SHARED, destRank, 0, win );
229
for (i=0; i<cnt; i++) {
230
MPI_Accumulate( &one, sz, MPI_INT, destRank,
231
j, sz, MPI_INT, MPI_SUM, win );
234
MPI_Win_unlock( destRank, win );
238
void RunPutFence( MPI_Win win, int destRank, int cnt, int sz )
240
int k, i, j, one = 1;
242
for (k=0; k<MAX_RUNS; k++) {
243
MPI_Barrier( MPI_COMM_WORLD );
244
MPI_Win_fence( 0, win );
246
for (i=0; i<cnt; i++) {
247
MPI_Put( &one, sz, MPI_INT, destRank,
248
j, sz, MPI_INT, win );
251
MPI_Win_fence( 0, win );
255
void RunPutLock( MPI_Win win, int destRank, int cnt, int sz )
257
int k, i, j, one = 1;
259
for (k=0; k<MAX_RUNS; k++) {
260
MPI_Barrier( MPI_COMM_WORLD );
261
MPI_Win_lock( MPI_LOCK_SHARED, destRank, 0, win );
263
for (i=0; i<cnt; i++) {
264
MPI_Put( &one, sz, MPI_INT, destRank, j, sz, MPI_INT, win );
267
MPI_Win_unlock( destRank, win );
271
void RunPutPSCW( MPI_Win win, int destRank, int cnt, int sz,
272
MPI_Group exposureGroup, MPI_Group accessGroup )
274
int k, i, j, one = 1;
276
for (k=0; k<MAX_RUNS; k++) {
277
MPI_Barrier( MPI_COMM_WORLD );
278
MPI_Win_post( exposureGroup, 0, win );
279
MPI_Win_start( accessGroup, 0, win );
281
for (i=0; i<cnt; i++) {
282
MPI_Put( &one, sz, MPI_INT, destRank, j, sz, MPI_INT, win );
285
MPI_Win_complete( win );
290
void RunAccPSCW( MPI_Win win, int destRank, int cnt, int sz,
291
MPI_Group exposureGroup, MPI_Group accessGroup )
293
int k, i, j, one = 1;
295
for (k=0; k<MAX_RUNS; k++) {
296
MPI_Barrier( MPI_COMM_WORLD );
297
MPI_Win_post( exposureGroup, 0, win );
298
MPI_Win_start( accessGroup, 0, win );
300
for (i=0; i<cnt; i++) {
301
MPI_Accumulate( &one, sz, MPI_INT, destRank,
302
j, sz, MPI_INT, MPI_SUM, win );
305
MPI_Win_complete( win );
227
void RunAccFence(MPI_Win win, int destRank, int cnt, int sz)
229
int k, i, j, one = 1;
231
for (k = 0; k < MAX_RUNS; k++) {
232
MPI_Barrier(MPI_COMM_WORLD);
233
MPI_Win_fence(0, win);
235
for (i = 0; i < cnt; i++) {
236
MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
239
MPI_Win_fence(0, win);
243
void RunAccLock(MPI_Win win, int destRank, int cnt, int sz)
245
int k, i, j, one = 1;
247
for (k = 0; k < MAX_RUNS; k++) {
248
MPI_Barrier(MPI_COMM_WORLD);
249
MPI_Win_lock(MPI_LOCK_SHARED, destRank, 0, win);
251
for (i = 0; i < cnt; i++) {
252
MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
255
MPI_Win_unlock(destRank, win);
259
void RunPutFence(MPI_Win win, int destRank, int cnt, int sz)
261
int k, i, j, one = 1;
263
for (k = 0; k < MAX_RUNS; k++) {
264
MPI_Barrier(MPI_COMM_WORLD);
265
MPI_Win_fence(0, win);
267
for (i = 0; i < cnt; i++) {
268
MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
271
MPI_Win_fence(0, win);
275
void RunPutLock(MPI_Win win, int destRank, int cnt, int sz)
277
int k, i, j, one = 1;
279
for (k = 0; k < MAX_RUNS; k++) {
280
MPI_Barrier(MPI_COMM_WORLD);
281
MPI_Win_lock(MPI_LOCK_SHARED, destRank, 0, win);
283
for (i = 0; i < cnt; i++) {
284
MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
287
MPI_Win_unlock(destRank, win);
291
void RunPutPSCW(MPI_Win win, int destRank, int cnt, int sz,
292
MPI_Group exposureGroup, MPI_Group accessGroup)
294
int k, i, j, one = 1;
296
for (k = 0; k < MAX_RUNS; k++) {
297
MPI_Barrier(MPI_COMM_WORLD);
298
MPI_Win_post(exposureGroup, 0, win);
299
MPI_Win_start(accessGroup, 0, win);
301
for (i = 0; i < cnt; i++) {
302
MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
305
MPI_Win_complete(win);
310
void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz,
311
MPI_Group exposureGroup, MPI_Group accessGroup)
313
int k, i, j, one = 1;
315
for (k = 0; k < MAX_RUNS; k++) {
316
MPI_Barrier(MPI_COMM_WORLD);
317
MPI_Win_post(exposureGroup, 0, win);
318
MPI_Win_start(accessGroup, 0, win);
320
for (i = 0; i < cnt; i++) {
321
MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
324
MPI_Win_complete(win);