5
/* $Id: test_mt.c,v 1.1.2.1 2007-07-02 05:34:13 d3p687 Exp $
8
* Developed by Andriy Kot <andriy.kot@pnl.gov>
9
* Copyright (c) 2007 Pacific Northwest National Laboratory
12
* 2007-02-17 - created
33
#define DEBUG /* note: a TBUFSIZE (per thread) buffer is used to print arrays */
34
#define TBUFSIZE 65535
35
/* prints debug information to files named test_mt.th<#th_rank>*/
37
#define NOTHREADS_ /* debug: does not spawn threads if set */
39
typedef double atype_t; /* type of the element of the array */
40
#define MAX_TPP 8 /* max treads per processor */
41
#define TPP 1//2 /* threads per processor */
42
#define ASIZE 4//5000 /* size of array */
43
#define ITERS 1//10 /* iterations per test */
44
enum {PUT = 101, GET, ACC};
52
#define THREAD_OFF 1000.0
54
#define ELEM_INC 0.0003
56
/* each ARMCI mem block that allocated consists of
57
* th_size blocks, one per each thread (systemwide), which consist of
58
* iters blocks, one per each iteration, which consist of
59
* asize doubles (or atype_t)
61
#define ASIZE_BYTES (asize*sizeof(atype_t))
62
#define ASIZExITERS (asize*iters)
63
#define ASIZExITERS_BYTES (ASIZE_BYTES*iters)
64
#define ASIZExITERSxTH (asize*iters*th_size)
65
#define ASIZExITERSxTH_BYTES (ASIZE_BYTES*iters*th_size)
67
/* p_ - pointer (points to area owned by particula thread)
68
* th_ - thread section
70
* i_ - element offset (0 for beginning of array)
72
#define AELEM(p_,th_,it_,i_) ((atype_t *)p_)[th_*ASIZExITERS+it_*asize+i_]
73
#define AELEM_VAL(th_,it_,i_) (THREAD_OFF*th_+ITER_OFF*it_+ELEM_INC*(i_+1))
75
int rank, size, th_size;
78
int *pairs, *rnd_tgts, rnd_one;
79
void **ptrs1, **ptrs2;
82
char fname[] = "test_mt.th000";
86
#define PRINTF0 if(!rank)printf
87
#define PRINTF0T if(!TH_ME)printf
88
#define RND(l_,h_) (l_+(int)(((double)h_)*rand()/(RAND_MAX+((double)l_))))
89
#define TH_ME (th_rank[th_idx])
91
void prndbg(int th_idx, char *fmt, ...)
97
# define DSCR dbg[th_idx]
100
printf("%3d: ", TH_ME);
102
vfprintf(DSCR, fmt, ap);
111
printf("Usage: test_mt, or \n");
112
printf(" test_mt -tTHREADS_PER_PROC -sARRAY_SIZE -iITERATIONS_COUNT\n");
115
armci_msg_finalize();
119
void *thread_main(void *arg);
120
void zero_array(int th_idx, void *ptr);
121
void init_array(int th_idx, void *ptr);
122
void print_array(int th_idx, char *msg, atype_t *array);
123
void test_pairs(int th_idx); // deprecated?
124
void test_PutGetAcc(int th_idx, int tgt, int *rmt, int rmt_cnt);
125
void check_PutGetAcc(int th_idx, int rmt, int op, atype_t *array);
127
int main(int argc, char *argv[])
132
thread_t threads[MAX_TPP];
135
ARMCI_Init_args(&argc, &argv);
136
size = armci_msg_nproc();
137
rank = armci_msg_me();
139
while ((ch = getopt(argc, argv, "t:s:i:d:h")) != -1) {
141
case 't': /* # of threads */
143
if (tpp < 1 || tpp > MAX_TPP) {
144
PRINTF0("\"%s\" is improper value for -t, should be a "
145
"number between 1 and %d(MAX_TPP)\n",
150
case 'i': /* # of iterations */
151
iters = atoi(optarg);
153
PRINTF0("\"%s\" is improper value for -t, should be a "
154
"number equal or larger than 1\n", optarg);
158
case 's': /* # of elements in the array */
159
asize = atoi(optarg);
161
PRINTF0("\"%s\" is improper value for -s, should be a "
162
"number equal or larger than 1\n", optarg);
167
delay = atoi(optarg);
168
break; /* delay before start */
171
break; /* print usage info */
176
PRINTF0("Warning: NOTHREADS debug symbol is set -- running w/o threads\n");
178
th_size = size * tpp;
179
PRINTF0("\nTest of multi-threaded capabilities:\n"
180
"%d threads per process (%d threads total),\n"
181
"%d array elements of size %d,\n"
182
"%d iteration(s)\n\n", tpp, th_size, asize, sizeof(atype_t), iters);
184
printf("%d: %d\n", rank, getpid());
190
for (i = 0; i < tpp; i++) {
191
th_rank[i] = rank * tpp + i;
194
#if defined(DEBUG) && defined(LOG2FILE)
195
for (i = 0; i < tpp; i++) {
196
fname[10] = '0' + th_rank[i] / 100;
197
fname[11] = '0' + th_rank[i] % 100 / 10;
198
fname[12] = '0' + th_rank[i] % 10;
199
dbg[i] = fopen(fname, "w");
202
for (i = 0; i < tpp; i++) {
203
prndbg(i, "proc %d, thread %d(%d):\n", rank, i, th_rank[i]);
206
/* set global seed (to ensure same random sequence across procs) */
207
time_seed = (unsigned)time(NULL);
208
armci_msg_brdcst(&time_seed, sizeof(time_seed), 0);
211
prndbg(0, "seed = %u\n", time_seed);
213
pairs = calloc(th_size, sizeof(int));
214
for (i = 0; i < th_size; i++) {
217
for (i = 0; i < th_size; i++) {
218
if (pairs[i] != -1) {
222
while (i == r || pairs[r] != -1) {
228
for (i = 0, cbufl = 0; i < th_size; i++)
229
cbufl += sprintf(cbuf + cbufl, " %d->%d|%d->%d",
230
i, pairs[i], pairs[i], pairs[pairs[i]]);
231
prndbg(0, "random pairs:%s\n", cbuf);
233
rnd_tgts = calloc(th_size, sizeof(int));
234
for (i = 0, cbufl = 0; i < th_size; i++) {
235
rnd_tgts[i] = RND(0, th_size);
236
if (rnd_tgts[i] == i) {
240
cbufl += sprintf(cbuf + cbufl, " %d", rnd_tgts[i]);
242
prndbg(0, "random targets:%s\n", cbuf);
244
rnd_one = RND(0, th_size);
245
prndbg(0, "random one = %d\n", rnd_one);
247
assert(ptrs1 = calloc(th_size, sizeof(void *)));
248
assert(ptrs2 = calloc(th_size, sizeof(void *)));
250
thread_main((void *)(long)0);
252
for (i = 0; i < tpp; i++) {
253
THREAD_CREATE(threads + i, thread_main, (void *)(long)i);
255
for (i = 0; i < tpp; i++) {
256
THREAD_JOIN(threads[i], NULL);
261
PRINTF0("Tests Completed\n");
264
#if defined(DEBUG) && defined(LOG2FILE)
265
for (i = 0; i < tpp; i++) {
271
armci_msg_finalize();
277
void *thread_main(void *arg)
280
int tgt, *rmt, rmt_cnt;
282
th_idx = (int)(long)arg;
283
prndbg(th_idx, "thread %d(%d|%d) STARTED\n", TH_ME, rank, th_idx);
285
assert(!ARMCI_MALLOC_MT(ptrs1, ASIZExITERSxTH_BYTES));
286
assert(!ARMCI_MALLOC_MT(ptrs2, ASIZExITERSxTH_BYTES));
288
for (i = 0, cbufl = 0; i < th_size; i++) {
289
cbufl += sprintf(cbuf + cbufl, " %p", ptrs1[i]);
291
prndbg(th_idx, "ptrs1: %s\n", cbuf);
292
for (i = 0, cbufl = 0; i < th_size; i++) {
293
cbufl += sprintf(cbuf + cbufl, " %p", ptrs2[i]);
295
prndbg(th_idx, "ptrs2: %s\n", cbuf);
298
init_array(th_idx, ptrs1[TH_ME]);
299
init_array(th_idx, ptrs2[TH_ME]);
302
assert(rmt = calloc(th_size, sizeof(int)));
304
PRINTF0T(" TESTING GET/PUT/ACC\n\n");
307
PRINTF0T("> Testing pair-wise communication pattern ...\n");
308
tgt = rmt[0] = pairs[TH_ME];
310
test_PutGetAcc(th_idx, tgt, rmt, rmt_cnt);
311
PRINTF0T(" pair-wise is OK\n\n");
312
//return NULL; // REMOVE WHEN DONE
314
/* test random target */
315
PRINTF0T("> Testing random target communication pattern ...\n");
316
tgt = rnd_tgts[TH_ME];
317
for (i = 0, rmt_cnt = 0; i < th_size; i++) if (rnd_tgts[i] == TH_ME) {
320
test_PutGetAcc(th_idx, tgt, rmt, rmt_cnt);
321
PRINTF0T(" random target is OK\n\n");
323
/* test all to one */
324
PRINTF0T("> Testing hotspot(all to one) communication pattern ...\n");
325
if (TH_ME == rnd_one) {
327
for (i = 0, rmt_cnt = 0; i < th_size; i++) if (i != TH_ME) {
335
test_PutGetAcc(th_idx, tgt, rmt, rmt_cnt);
336
PRINTF0T(" hotspot is OK\n\n");
341
void zero_array(int th_idx, void *ptr)
344
for (i = 0; i < th_size; i++)for (j = 0; j < iters; j++)for (k = 0; k < asize; k++) {
345
AELEM(ptr, i, j, k) = 0.0;
349
void init_array(int th_idx, void *ptr)
352
for (i = 0; i < th_size; i++)for (j = 0; j < iters; j++)for (k = 0; k < asize; k++) {
353
AELEM(ptr, i, j, k) = AELEM_VAL(TH_ME, j, k);
355
/*AELEM(ptr, i, j) = THREAD_OFF*TH_ME + ITER_OFF*i + ELEM_INC*(j+1);*/
357
print_array(th_idx, "initialized", ptr);
360
for (i = 0, cbufl = 0; i < th_size; i++) {
361
for (j = 0; j < iters; j++) {
362
cbufl += sprintf(cbuf + cbufl, "(%d,%d)%p:", i, j, &(((atype_t *)ptr)[i*ASIZExITERS+j*asize]));
363
for (k = 0; k < asize; k++) {
364
cbufl += sprintf(cbuf + cbufl, " %.4f", ((atype_t *)ptr)[i*ASIZExITERS+j*asize+k]);
366
cbufl += sprintf(cbuf + cbufl, "\n");
368
cbufl += sprintf(cbuf + cbufl, "\n");
371
for (i = 0, cbufl = 0; i < (th_size * iters * asize); i++) {
372
cbufl += sprintf(cbuf + cbufl, " %.4f", ((atype_t *)ptr)[i]);
375
prndbg(th_idx, "initialized:\n%s\n", cbuf);
379
void print_array(int th_idx, char *msg, atype_t *array)
385
if (ASIZExITERSxTH_BYTES > TBUFSIZE / 2) {
386
prndbg(th_idx, "%s:\n%s\n", msg, "array is too big to print");
389
for (i = 0, tbufl = 0; i < th_size; i++) {
390
for (j = 0; j < iters; j++) {
391
tbufl += sprintf(tbuf + tbufl, "(%d,%d)%p:", i, j, &AELEM(array, i, j, 0));
392
for (k = 0; k < asize; k++) {
393
tbufl += sprintf(tbuf + tbufl, " %.4f", AELEM(array, i, j, k));
395
tbufl += sprintf(tbuf + tbufl, "\n");
397
tbufl += sprintf(tbuf + tbufl, "\n");
399
prndbg(th_idx, "%s:\n%s\n", msg, tbuf);
405
void print_array(int th_idx, char *msg, atype_t *array, int count) {
408
for (i = 0, cbufl = 0; i < count; i++)
409
cbufl+=sprintf(cbuf+cbufl, " %.4f", array[i]);
410
prndbg(th_idx, "%s:%s\n", msg, cbuf);
415
int check_result(atype_t *array, int th)
417
int i, j, k, mismatch;
418
for (i = 0, k = 0, mismatch; i < iters; i++) for (j = 0; j < asize; j++, k++) {
419
if (array[k] != AELEM_VAL(th, i, j)) {
420
printf("mismatch detected: th=%d, i=%d, j=%d, k=%d, elem=%d, array=%d\n",
421
th, i, j, k, AELEM_VAL(th, i, j), array[k]);
428
void test_pairs(int th_idx)
430
int rem_th, rem_proc;
434
rem_th = pairs[TH_ME];
435
rem_proc = TH2PROC(rem_th);
437
prndbg(th_idx, "test_pair: %d<->%d(%d)\n", TH_ME, rem_th, rem_proc);
441
print_array(th_idx, "before", &AELEM(ptrs2[TH_ME], rem_th, 0, 0), ASIZExITERS);
443
for (i = 0; i < iters; i++) {
444
/* src - addr of my thread block on remote proc/thread */
445
src = &AELEM(ptrs1[rem_th], TH_ME, i, 0);
446
/* src - addr of remote thread block on my proc/thread */
447
dst = &AELEM(ptrs2[TH_ME], rem_th, i, 0);
448
/* get from my pair */
449
assert(!ARMCI_Get(src, dst, ASIZE_BYTES, rem_proc));
454
print_array(th_idx, "rcvd", &AELEM(ptrs2[TH_ME], rem_th, 0, 0), ASIZExITERS);
457
check_result(&AELEM(ptrs2[TH_ME], rem_th, 0, 0), rem_th);
461
/* test Put/Get/Acc sequence regardless of communication pattern
462
* tgt -- remote target for put/get/acc (none if -1)
463
* rmt -- list of remote thread that put/acc to here (correctness is cheked here)
464
* rmt_cnt -- # of threads in rmt
466
void test_PutGetAcc(int th_idx, int tgt, int *rmt, int rmt_cnt)
468
/* a - local thread, b - remote thread */
469
int a, b, b_proc, stride[2], count[2];
473
for (i = 0, cbufl = 0; i < rmt_cnt; i++) {
474
cbufl += sprintf(cbuf + cbufl, " %d", rmt[i]);
476
prndbg(th_idx, "test_PutGetAcc: put/acc to %d, get from %d, check put/acc from %s\n",
477
tgt, tgt, rmt_cnt ? cbuf : "none");
480
stride[0] = ASIZE_BYTES;
481
count[0] = ASIZE_BYTES;
485
init_array(th_idx, ptrs1[TH_ME]);
486
init_array(th_idx, ptrs2[TH_ME]);
489
/* put - put a.ptrs1[b] into b.ptrs2[a] */
493
for (i = 0; i < iters; i++) {
494
src = &AELEM(ptrs1[a], b, i, 0); /* a.ptrs1[b] */
495
dst = &AELEM(ptrs2[b], a, i, 0); /* b.ptrs2[a] */
496
// assert(!ARMCI_Put(src, dst, ASIZE_BYTES, b_proc));
497
assert(!ARMCI_PutS(src, stride, dst, stride, count, 1, b_proc));
502
print_array(th_idx, "PUT:ptrs1[TH_ME]", ptrs1[TH_ME]);
503
print_array(th_idx, "PUT:ptrs2[TH_ME]", ptrs2[TH_ME]);
506
/* chk put(s) from b(s): a.ptrs2[b] */
507
for (j = 0; j < rmt_cnt; j++) {
510
check_PutGetAcc(th_idx, b, PUT, &AELEM(ptrs2[a], b, 0, 0));
512
//return; // REMOVE WHEN DONE
515
init_array(th_idx, ptrs1[TH_ME]);
516
init_array(th_idx, ptrs2[TH_ME]);
519
/* get - get b.ptrs1[a] into a.ptrs2[b] */
523
for (i = 0; i < iters; i++) {
524
src = &AELEM(ptrs1[b], a, i, 0); /* b.ptrs1[a] */
525
dst = &AELEM(ptrs2[a], b, i, 0); /* a.ptrs2[b] */
526
assert(!ARMCI_GetS(src, stride, dst, stride, count, 1, b_proc));
529
print_array(th_idx, "GET:ptrs1[TH_ME]", ptrs1[TH_ME]);
530
print_array(th_idx, "GET:ptrs2[TH_ME]", ptrs2[TH_ME]);
533
/* chk get from b: a.ptrs2[b] */
535
check_PutGetAcc(th_idx, b, GET, &AELEM(ptrs2[a], b, 0, 0));
540
init_array(th_idx, ptrs1[TH_ME]);
541
init_array(th_idx, ptrs2[TH_ME]);
544
/* acc - acc a.ptrs1[b] * scale + b.ptrs2[a] into b.ptrs2[a] */
548
for (i = 0; i < iters; i++) {
549
src = &AELEM(ptrs1[a], b, i, 0); /* a.ptrs1[b] */
550
dst = &AELEM(ptrs2[b], a, i, 0); /* b.ptrs2[a] */
551
assert(!ARMCI_AccS(ARMCI_ACC_DBL, &scale, src, stride, dst, stride, count, 1, b_proc));
556
print_array(th_idx, "ACC:ptrs1[TH_ME]", ptrs1[TH_ME]);
557
print_array(th_idx, "ACC:ptrs2[TH_ME]", ptrs2[TH_ME]);
560
/* chk acc(s) from b(s): a.ptrs2[b] */
561
for (j = 0; j < rmt_cnt; j++) {
564
check_PutGetAcc(th_idx, b, ACC, &AELEM(ptrs2[a], b, 0, 0));
571
void check_PutGetAcc(int th_idx, int rmt, int op, atype_t *array)
576
for (i = 0, k = 0; i < iters; i++) for (j = 0; j < asize; j++, k++) {
577
expected = op == ACC ? AELEM_VAL(TH_ME, i, j) + scale * AELEM_VAL(rmt, i, j) :
578
AELEM_VAL(rmt, i, j);
579
if (array[k] != expected) {
580
printf("mismatch detected: TM_ME=%d, rmt=%d, op=%d, i=%d, j=%d, "
581
"k=%d, expected=%f, array=%f\n",
582
TH_ME, rmt, op, i, j, k, expected, array[k]);
588
if (array[k] != AELEM_VAL(th, i, j)) {
589
printf("mismatch detected: th=%d, i=%d, j=%d, k=%d, elem=%d, array=%d\n",
590
th, i, j, k, AELEM_VAL(th, i, j), array[k]);