5
/* $Id: perf_nb.c,v 1.3 2004-03-29 19:14:51 vinod Exp $ */
22
# define sleep(x) Sleep(1000*(x))
31
/* Solaris has shared memory shortages in the default system configuration */
35
#elif defined(__alpha__)
49
#define EDIM1 (DIM1+OFF)
50
#define EDIM2 (DIM2+OFF)
51
#define EDIM3 (DIM3+OFF)
52
#define EDIM4 (DIM4+OFF)
53
#define EDIM5 (DIM5+OFF)
54
#define EDIM6 (DIM6+OFF)
55
#define EDIM7 (DIM7+OFF)
59
#define MAX_DIM_VAL 50
72
#define MAXELEMS 131072 /* 262144 */
73
#define MAX_REQUESTS MAXELEMS
78
int VERIFY = 1; /* verifies results */
79
int DEBUG = 0; /* if debug=1, dump extra messages */
81
/***************************** macros ************************/
82
#define COPY(src, dst, bytes) memcpy((dst),(src),(bytes))
83
#define ARMCI_MAX(a,b) (((a) >= (b)) ? (a) : (b))
84
#define ARMCI_MIN(a,b) (((a) <= (b)) ? (a) : (b))
85
#define ARMCI_ABS(a) (((a) <0) ? -(a) : (a))
87
/***************************** global data *******************/
89
void *work[MAXPROC]; /* work array for propagating addresses */
90
double *ddst[MAXPROC];
93
void pvm_init(int argc, char *argv[])
95
int mytid, mygid, ctid[MAXPROC];
99
if ((argc != 2) && (argc != 1)) {
106
if ((np = atoi(argv[1])) < 1) {
113
mygid = pvm_joingroup(MPGROUP);
117
i = pvm_spawn(argv[0], argv + 1, 0, "", np - 1, ctid);
120
while (pvm_gsize(MPGROUP) < np) {
125
pvm_barrier(MPGROUP, np);
127
printf("PVM initialization done!\n");
132
fprintf(stderr, "usage: %s <nproc>\n", argv[0]);
138
/*void create_array(void *a[], int elem_size, int ndim, int dims[])*/
139
void create_array(double *a[], int ndim, int dims[])
141
int bytes = sizeof(double), i, rc;
143
assert(ndim <= MAXDIMS);
144
for (i = 0; i < ndim; i++) {
148
rc = ARMCI_Malloc((void **)a, bytes);
155
/*void destroy_array(void *ptr[])*/
156
void destroy_array(double *ptr[])
160
assert(!ARMCI_Free(ptr[me]));
163
void verify_results(int op, int *elems)
171
for (j = 0; j < elems[1]; j++) {
172
if (ARMCI_ABS(ddst[me][j] - j * 1.001) > 0.1) {
173
ARMCI_Error("put failed...Invalid Value Obtained..1", 0);
177
if (DEBUG) if (me == 0) {
178
printf(" verifying put ..O.K.\n");
184
for (i = 1; i < nproc; i++) {
185
for (j = 0; j < elems[1]; j++) {
186
if (ARMCI_ABS(ddst[me][i*elems[1] + j] - j * 1.001 *(i + 1)) > 0.1) {
187
ARMCI_Error("get failed...Invalid Value Obtained..1", 0);
193
if (DEBUG) if (me == 0) {
194
printf(" verifying get ..O.K.\n\n");
200
for (j = 0; j < elems[1]; j++) {
201
/*printf("ddst[%d][%d] = %lf\n", me, j, ddst[me][j]);
203
if (ARMCI_ABS(ddst[me][j] - (double)nproc) > 0.1) {
204
ARMCI_Error("accumulate failed...Invalid Value Obtained..1", 0);
208
if (DEBUG)if (me == 0) {
209
printf(" verifying accumulate ..O.K.\n");
214
ARMCI_Error("Invalid Operation", 0);
220
void test_perf_nb(int dry_run)
223
int i, j, loop, rc, bytes, elems[2] = {MAXPROC, MAXELEMS};
224
int stride, k = 0, ntimes;
225
double stime, t1, t2, t3, t4, t5, t6, t7, t8, t9;
226
double *dsrc[MAXPROC], scale = 1.0;
227
armci_hdl_t hdl_get, hdl_put, hdl_acc;
229
create_array(ddst, 2, elems);
230
create_array(dsrc, 1, &elems[1]);
232
if (!dry_run)if (me == 0) {
233
printf("\n\t\t\tRemote 1-D Array Section\n");
234
printf("section get nbget wait put nbput ");
235
printf(" wait acc nbacc wait\n");
236
printf("------- -------- -------- -------- -------- --------");
237
printf(" -------- -------- -------- --------\n");
241
for (loop = 1; loop <= MAXELEMS; loop *= 2, k++) {
244
ntimes = (int)sqrt((double)(MAXELEMS / elems[1]));
249
/* -------------------------- SETUP --------------------------- */
250
/*initializing non-blocking handles,time,src & dst buffers*/
251
ARMCI_INIT_HANDLE(&hdl_put);
252
ARMCI_INIT_HANDLE(&hdl_get);
253
ARMCI_INIT_HANDLE(&hdl_acc);
254
t1 = t2 = t3 = t4 = t5 = t6 = t7 = t8 = t9 = 0.0;
255
for (i = 0; i < elems[1]; i++) {
256
dsrc[me][i] = i * 1.001 * (me + 1);
258
for (i = 0; i < elems[0]*elems[1]; i++) {
263
/* bytes transfered */
264
bytes = sizeof(double) * elems[1];
267
/* -------------------------- PUT/GET -------------------------- */
269
for (i = 1; i < nproc; i++) {
270
stime = armci_timer();
271
for (j = 0; j < ntimes; j++)
272
if ((rc = ARMCI_Put(&dsrc[me][0], &ddst[i][me*elems[1]], bytes, i))) {
273
ARMCI_Error("armci_nbput failed\n", rc);
275
t1 += armci_timer() - stime;
282
verify_results(PUT, elems);
284
for (i = 0; i < elems[0]*elems[1]; i++) {
290
for (i = 1; i < nproc; i++) {
291
stime = armci_timer();
292
for (j = 0; j < ntimes; j++)
293
if ((rc = ARMCI_Get(&dsrc[i][0], &ddst[me][i*elems[1]], bytes, i))) {
294
printf("%d: armci_get. rc=%d\n", me, rc);
296
ARMCI_Error("armci_nbget failed\n", rc);
298
t4 += armci_timer() - stime;
305
verify_results(GET, elems);
307
for (i = 0; i < elems[0]*elems[1]; i++) {
312
/* ------------------------ nb PUT/GET ------------------------- */
314
for (i = 1; i < nproc; i++) {
315
for (j = 0; j < ntimes; j++) {
316
stime = armci_timer();
317
if ((rc = ARMCI_NbPut(&dsrc[me][0], &ddst[i][me*elems[1]], bytes,
319
ARMCI_Error("armci_nbput failed\n", rc);
321
t2 += armci_timer() - stime;
322
stime = armci_timer();
323
ARMCI_Wait(&hdl_put);
324
t3 += armci_timer() - stime;
332
verify_results(PUT, elems);
334
for (i = 0; i < elems[0]*elems[1]; i++) {
340
for (i = 1; i < nproc; i++) {
341
for (j = 0; j < ntimes; j++) {
342
stime = armci_timer();
343
if ((rc = ARMCI_NbGet(&dsrc[i][0], &ddst[me][i*elems[1]], bytes,
345
ARMCI_Error("armci_nbget failed\n", rc);
347
t5 += armci_timer() - stime;
348
stime = armci_timer();
349
ARMCI_Wait(&hdl_get);
350
t6 += armci_timer() - stime;
358
verify_results(GET, elems);
360
for (i = 0; i < elems[0]*elems[1]; i++) {
366
/* ------------------------ Accumulate ------------------------- */
367
for (i = 0; i < elems[1]; i++) {
371
stride = elems[1] * sizeof(double);
373
for (j = 0; j < ntimes; j++) {
374
stime = armci_timer();
375
if ((rc = ARMCI_AccS(ARMCI_ACC_DBL, &scale, &dsrc[me][0], &stride,
376
&ddst[0][0], &stride, &bytes, 0, 0))) {
377
ARMCI_Error("armci_acc failed\n", rc);
379
t7 += armci_timer() - stime;
385
verify_results(ACC, elems);
387
for (i = 0; i < elems[0]*elems[1]; i++) {
394
/* See the note below why this part is disabled */
395
/* ---------------------- nb-Accumulate ------------------------ */
396
for (i = 0; i < elems[1]; i++) {
400
stride = elems[1] * sizeof(double);
402
for (j = 0; j < ntimes; j++) {
403
stime = armci_timer();
404
if ((rc = ARMCI_NbAccS(ARMCI_ACC_DBL, &scale, &dsrc[me][0], &stride,
405
&ddst[0][0], &stride, &bytes, 0, 0, &hdl_acc))) {
406
ARMCI_Error("armci_nbacc failed\n", rc);
408
t8 += armci_timer() - stime;
409
stime = armci_timer();
410
ARMCI_Wait(&hdl_acc);
411
t9 += armci_timer() - stime;
417
verify_results(ACC, elems);
419
for (i = 0; i < elems[0]*elems[1]; i++) {
427
if (!dry_run) if (me == 0) printf("%d\t %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.2e\n",
428
bytes, t4 / ntimes, t5 / ntimes, t6 / ntimes, t1 / ntimes,
429
t2 / ntimes, t3 / ntimes, t7 / ntimes, t8 / ntimes, t9 / ntimes);
435
if (!dry_run)if (me == 0) {
444
int main(int argc, char *argv[])
446
ARMCI_Init_args(&argc, &argv);
447
nproc = armci_msg_nproc();
450
if (nproc < 2 || nproc > MAXPROC) {
453
"USAGE: 2 <= processes < %d - got %d\n", MAXPROC, nproc);
455
armci_msg_finalize();
460
printf("ARMCI test program (%d processes)\n", nproc);
466
printf("\n put/get/acc requests (Time in secs)\n\n");
476
printf("\nSuccess!!\n");
483
armci_msg_finalize();
488
NOTE: ARMCI_NbAcc fails in opus for buffer sizes greater than 400Kb