1
/*$Id: comdegree.c,v 1.1.2.1 2007-06-20 17:41:49 vinod Exp $*/
4
* Pacific Northwest National Laboratory,
5
* Battelle Memorial Institute.
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
10
* are met: - Redistributions of source code must retain the above
11
* copyright notice, this list of conditions and the following disclaimer.
13
* - Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in the
15
* documentation and/or other materials provided with the distribution.
16
* - Neither the name of the Battelle nor the names of its contributors
17
* may be used to endorse or promote products derived from this software
18
* without specific prior written permission.
20
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35
/*************************
36
* This test checks the networks ability to overlap data transfers.
37
* It does it both for an optimitic case (with no other communication) and
38
* a more realistic case.
40
* --Pacific Northwest National Laboratory
42
*************************/
53
# define MPGROUP (char *)NULL
54
# define MP_INIT(arc,argv)
56
# define MPGROUP "mp_working_group"
57
# define MP_INIT(arc,argv) pvm_init(arc, argv)
59
# define MP_FINALIZE() pvm_exit()
60
# define MP_BARRIER() pvm_barrier(MPGROUP,-1)
61
# define MP_MYID(pid) *(pid) = pvm_getinst(MPGROUP,pvm_mytid())
62
# define MP_PROCS(pproc) *(pproc) = (int)pvm_gsize(MPGROUP)
63
void pvm_init(int argc, char *argv[]);
67
# define MP_BARRIER() SYNCH_(&tcg_tag)
68
# define MP_INIT(arc,argv) PBEGIN_((argc),(argv))
69
# define MP_FINALIZE() PEND_()
70
# define MP_MYID(pid) *(pid) = (int)NODEID_()
71
# define MP_PROCS(pproc) *(pproc) = (int)NNODES_()
74
# define MP_BARRIER() MPI_Barrier(MPI_COMM_WORLD)
75
# define MP_FINALIZE() MPI_Finalize()
76
# define MP_INIT(arc,argv) MPI_Init(&(argc),&(argv))
77
# define MP_MYID(pid) MPI_Comm_rank(MPI_COMM_WORLD, (pid))
78
# define MP_PROCS(pproc) MPI_Comm_size(MPI_COMM_WORLD, (pproc));
79
# define MP_TIMER() MPI_Wtime()
85
/***************************** macros ************************/
86
#define COPY(src, dst, bytes) memcpy((dst),(src),(bytes))
87
#define MAX(a,b) (((a) >= (b)) ? (a) : (b))
88
#define MIN(a,b) (((a) <= (b)) ? (a) : (b))
89
#define ABS(a) (((a) <0) ? -(a) : (a))
91
/***************************** global data *******************/
95
void create_array(void *a[], int size)
97
armci_size_t bytes=size;
100
rc = ARMCI_Malloc(a, bytes);
104
printf("%d after malloc ndim=%d b=%d ptr=%p\n",me,ndim,(int) bytes,a[me]);
112
void destroy_array(void *ptr[])
116
assert(!ARMCI_Free(ptr[me]));
124
void test_get_multidma()
127
int i,j, proc=1,Idx=1,idx=0;
128
void *b[MAXPROC], *a[MAXPROC];
129
int left = (me+nproc-1) % nproc;
130
int right = (me+1) % nproc;
131
int sendersright=0,sendersleft=0;
132
int loopcnt=10, itercount=5,less=2, strl; /*less>1 takes a partial plane */
133
double tt, t0[LCC],t1[LCC],t2[LCC],t3[LCC],t4=0,t5=0,t6=0;
134
armci_hdl_t hdl1,hdl2;
143
/* create shared and local arrays */
144
create_array(b, 1024*1024*10);
145
create_array(a, 1024*1024*10);
147
ARMCI_INIT_HANDLE(&hdl1);
148
ARMCI_INIT_HANDLE(&hdl2);
149
ARMCI_NbGet((double*)b[left],(double*)a[me],1024,left,&hdl1);
150
ARMCI_NbGet((double*)b[right]+1024,(double*)a[me]+1024,1024,
158
for(j=0;j<itercount;j++){
159
for(i=0;i<loopcnt;i++){
160
int lc, rc,wc,lc1,rc1,wc1,bytes;
162
sendersright = (j+1)%nproc;
163
sendersleft = (j+nproc-1)%nproc;
165
bytes = 1024*pow(2,i);
167
ARMCI_INIT_HANDLE(&hdl1);
170
/*first time a regular call*/
172
ARMCI_NbGet((double*)b[left],(double*)a[me],bytes, left,&hdl1);
174
t1[i] += (MP_TIMER()-tt);
178
/*now time 1 left + 1 right but realize there is one xtra issue*/
179
ARMCI_INIT_HANDLE(&hdl1);
180
ARMCI_INIT_HANDLE(&hdl2);
182
ARMCI_NbGet((double*)b[left],(double*)a[me],bytes/2,left,&hdl1);
183
ARMCI_NbGet((double*)b[right]+bytes/16,(double*)a[me]+bytes/16,bytes/2,
187
t2[i] += (MP_TIMER()-tt);
191
/*now time both to the left*/
192
ARMCI_INIT_HANDLE(&hdl1);
193
ARMCI_INIT_HANDLE(&hdl2);
195
ARMCI_NbGet((double*)b[left],(double*)a[me],bytes/2,left,&hdl1);
196
ARMCI_NbGet((double*)b[left]+bytes/16,(double*)a[me]+bytes/16,bytes/2,
200
t3[i] += ( MP_TIMER()-tt);
207
for(i=0;i<loopcnt;i++){
208
fprintf(stderr,"\n%.0f\t%.2e\t%.2e\t%.2e",
209
1024.0*pow(2,i),t1[i]/loopcnt,t3[i]/loopcnt,
217
for(i=0;i<loopcnt;i++){
218
fprintf(stderr,"\n%.0f\t%.2e\t%.2e\t%.2e",
219
1024.0*pow(2,i),t1[i]/loopcnt,t3[i]/loopcnt,
226
for(j=0;j<nproc;j++) {
228
for(i=0;i<loopcnt;i++){
229
printf("\n%d:size=%f onesnd=%.2e twosnd=%.2e twosnddiffdir=%.2e\n",
230
me,1024.0*pow(2,i),t1[i]/loopcnt,t3[i]/loopcnt,
248
void test_put_multidma()
251
int i,j, proc=1,Idx=1,idx=0;
252
void *b[MAXPROC], *a[MAXPROC];
253
int left = (me+nproc-1) % nproc;
254
int right = (me+1) % nproc;
255
int sendersright=0,sendersleft=0;
256
int loopcnt=LCC, itercount=1000,less=2, strl; /*less>1 takes a partial plane */
257
double tt, t0[LCC],t1[LCC],t2[LCC],t3[LCC],t4=0,t5=0,t6=0;
258
armci_hdl_t hdl1,hdl2;
261
/* create shared and local arrays */
262
create_array(b, 1024*1024*10);
263
create_array(a, 1024*1024*10);
272
for(j=0;j<itercount;j++){
273
for(i=0;i<loopcnt;i++){
274
int lc, rc,wc,lc1,rc1,wc1,bytes;
276
sendersright = (j+1)%nproc;
277
sendersleft = (j+nproc-1)%nproc;
279
bytes = 1024*pow(2,i)/8;
281
ARMCI_INIT_HANDLE(&hdl1);
282
ARMCI_NbPut((double*)a[me]+1024,(double*)b[left]+1024,bytes,left,&hdl1);
284
ARMCI_INIT_HANDLE(&hdl1);
285
ARMCI_NbPut((double*)a[me]+1024,(double*)b[right]+1024,bytes,right,&hdl1);
288
ARMCI_INIT_HANDLE(&hdl1);
292
ARMCI_NbPut((double*)a[me],(double*)b[left],bytes, left,&hdl1);
294
t1[i] += (MP_TIMER()-tt);
295
//lc=armci_notify(left);
297
//rc = armci_notify_wait(right,&wc);
298
//t1[i] += (MP_TIMER()-tt);
300
ARMCI_INIT_HANDLE(&hdl1);
301
ARMCI_INIT_HANDLE(&hdl2);
305
ARMCI_NbPut((double*)a[me],(double*)b[left],bytes,left,&hdl1);
306
ARMCI_NbPut((double*)a[me],(double*)b[right],bytes,
309
t2[i] += (MP_TIMER()-tt);
310
//lc=armci_notify(left);
311
//lc1=armci_notify(right);
313
//rc1 = armci_notify_wait(left,&wc1);
314
//rc = armci_notify_wait(right,&wc);
315
//t2[i] += (MP_TIMER()-tt);
319
ARMCI_INIT_HANDLE(&hdl1);
320
ARMCI_INIT_HANDLE(&hdl2);
324
ARMCI_NbPut((double*)a[me],(double*)b[left],bytes/2,left,&hdl1);
325
ARMCI_NbPut((double*)a[me]+bytes/16,(double*)b[left]+bytes/16,bytes/2,
329
t3[i] += ( MP_TIMER()-tt);
330
lc=armci_notify(left);
332
rc = armci_notify_wait(right,&wc);
333
t3[i] += ( MP_TIMER()-tt);
342
for(i=0;i<loopcnt;i++){
343
fprintf(stderr,"\n%.0f\t%.2e\t%.2e\t%.2e",
344
128.0*pow(2,i),t1[i]/itercount,t3[i]/itercount,
353
for(i=0;i<loopcnt;i++){
354
fprintf(stderr,"\n%.0f\t%.2e\t%.2e\t%.2e",
355
128.0*pow(2,i),t1[i]/itercount,t3[i]/itercount,
362
for(j=0;j<nproc;j++) {
364
for(i=0;i<loopcnt;i++){
365
printf("\n%d:size=%f onesnd=%.2e twosnd=%.2e twosnddiffdir=%.2e\n",
366
me,1024.0*pow(2,i),t1[i]/loopcnt,t3[i]/loopcnt,
384
int main(int argc, char* argv[])
397
printf("\nTesting transfer overlap with ARMCI put calls\n");
398
printf("\nsize\tone-send\ttwo-sends\ttwo-sends-diff-dir\n");
406
printf("\nTesting transfer overlap with ARMCI get calls\n");
407
printf("\nsize\tone-send\ttwo-sends\ttwo-sends-diff-dir\n");
413
if(me==0)printf("\n");