4
c $Id: mir_perf2.F,v 1.1 2003-02-26 15:33:38 d3g293 Exp $
5
c------------------------------------------------------------------------
6
c Program perf.x is used to test performance of GA put, get, accumulate |
7
c It has to be executed on four processors. |
8
c remote operations access data on processes 1,2,3 in the round-robin way|
9
c------------------------------------------------------------------------
14
#include "mafdecls.fh"
18
c*** Intitialize a message passing library
22
c*** Intitialize the GA package
24
if(ga_nnodes().ne.4 .and. ga_nodeid().eq.0)
25
$ call ga_error('Program requires 4 GA processes',ga_nnodes())
27
c*** Initialize the MA package
29
if (.not. ma_init(MT_DBL, heap,heap))
30
$ call ga_error('ma init failed',2*heap)
35
if(ga_nodeid().eq.0) print *,'All tests successful '
45
#include "mafdecls.fh"
49
integer n, nn, num_chunks
50
parameter (n = 1024*1024, nn = n/4, num_chunks=16)
51
double precision buf(nn)
54
integer ilo, ihi, jlo, jhi
55
integer nproc, me, loop
56
integer chunk(num_chunks)
57
data chunk /1,9,16,81,256,576,900,2304,4096,8281,
58
$ 16384,29241,65536,124609,193600,262144/
63
c*** Create global array
64
if (.not. ga_create(MT_DBL, n, 1, 'a', 0, 0, g_a))
65
$ call ga_error(' ga_create failed ',1)
77
55 format(' Performance of GA get, put & acc',
78
$ ' for 1-dimensional sections of array[',i7,']')
86
call ga_distribution(g_a, me, ilo, ihi, jlo, jhi)
88
& (g_a, n, chunk, num_chunks, buf, ilo, ihi, jlo, jhi, .true.)
93
& (g_a, n, chunk, num_chunks, buf, ilo, ihi, jlo, jhi,.false.)
99
subroutine TestPutGetAcc1
100
& (g_a, n, chunk, num_chunks, buf, ilo, ihi, jlo,jhi, local)
103
#include "testutil.fh"
105
integer num_chunks, chunk(num_chunks)
106
integer n, ilo, ihi, jlo,jhi,g_a
107
double precision buf(*), tg, tp, ta
108
double precision time_acc1, time_get1, time_put1
112
integer loop, jump, count, bytes
118
write(6,'(26X, 25HLocal 1-D Array Section )')
120
write(6,'(26X, 25HRemote 1-D Array Section )')
123
write(6,*)' section get put',
125
write(6,*)' bytes dim sec MB/s sec MB/s',
131
do loop = 1, num_chunks
132
bytes = util_mdtob(1)*chunk(loop) ! how much data is accessed
133
jump = n/(6000*loop) ! jump distance between consecutive patches
134
if(loop.eq.num_chunks)jump=0
136
c everybody touches own data
137
call ga_fill_patch(g_a, 1, n, 1, 1 , 1d0*me*loop)
139
tg=time_get1(g_a,ilo,ihi,jlo,jhi,buf,chunk(loop),jump,count,
145
c everybody touches own data
146
call ga_fill_patch(g_a, 1, n, 1, 1 , 1d0*me*loop)
148
tp=time_put1(g_a,ilo,ihi,jlo,jhi,buf,chunk(loop),jump,count,
154
c everybody touches own data
155
call ga_fill_patch(g_a, 1, n, 1, 1 , 1d0*me*loop)
157
ta=time_acc1(g_a,ilo,ihi,jlo,jhi,buf,chunk(loop),jump,count,
164
write(6,77)bytes, chunk(loop), tg,
165
& 1d-6*bytes/tg,tp, 1d-6*bytes/tp, ta, 1d-6*bytes/ta
170
77 format(i7, i7,1x, 3(1x,d8.3,1x,d8.3))
175
double precision function
176
& time_acc1(g_a, is, ie, js, je, buf, chunk, jump, count, local)
180
#include "testutil.fh"
182
integer g_a, chunk, jump, count, is, js, ie, je
184
integer rows, indx, shifti
186
integer ilo, ihi, jlo, jhi
187
double precision seconds, buf
195
seconds = util_timer()
197
c distance between consecutive patches increased by jump
198
c to destroy locality of reference
199
do ilo = is, ie -chunk-jump +1, chunk+jump
203
call ga_acc(g_a, ilo, ihi, jlo, jhi, buf, chunk, 1d0)
205
call ga_acc(g_a, ilo+shifti, ihi+shifti,
206
$ jlo, jhi, buf, chunk, 1d0)
209
seconds = util_timer() - seconds
211
time_acc1 = seconds/count
215
double precision function
216
& time_get1(g_a, is, ie, js, je, buf, chunk, jump, count, local)
220
#include "testutil.fh"
222
integer g_a, chunk, jump, count, is, js, ie, je
223
integer rows, indx, shifti
226
integer ilo, ihi, jlo, jhi
227
double precision seconds, buf
235
seconds = util_timer()
237
c distance between consecutive patches increased by jump
238
c to destroy locality of reference
239
do ilo = is, ie -chunk-jump +1, chunk+jump
243
call ga_get(g_a, ilo, ihi, jlo, jhi, buf, chunk)
245
call ga_get(g_a, ilo+shifti, ihi+shifti,
246
$ jlo, jhi, buf, chunk)
249
seconds = util_timer() - seconds
251
time_get1 = seconds/count
256
double precision function
257
& time_put1(g_a, is, ie, js, je, buf, chunk, jump, count, local)
261
#include "testutil.fh"
263
integer g_a, chunk, jump, count, is, js, ie, je
264
integer rows, indx, shifti
267
integer ilo, ihi, jlo, jhi
268
double precision seconds, buf
276
seconds = util_timer()
278
c distance between consecutive patches increased by jump
279
c to destroy locality of reference
280
do ilo = is, ie -chunk-jump +1, chunk+jump
284
call ga_put(g_a, ilo, ihi, jlo, jhi, buf, chunk)
286
call ga_put(g_a, ilo+shifti, ihi+shifti,
287
$ jlo, jhi, buf, chunk)
290
seconds = util_timer() - seconds
292
time_put1 = seconds/count
298
c test for square patches
302
#include "mafdecls.fh"
305
integer n, nn, num_chunks
306
parameter (n = 1024, nn = n*n/4, num_chunks=16)
307
double precision buf(nn)
310
integer ilo, ihi, jlo, jhi
311
integer nproc, me, loop
312
integer chunk(num_chunks)
313
data chunk /1,3,4,9,16,24,30,48,64,91,128,171,256,353,440,512/
318
c*** Create global array
319
if (.not. ga_create(MT_DBL, n, n, 'a', 0, 0, g_a))
320
$ call ga_error(' ga_create failed ',1)
330
55 format(' Performance of GA get, put & acc',
331
$ ' for square sections of array[',i4,',',i4,']')
339
call ga_distribution(g_a, me, ilo, ihi, jlo, jhi)
341
& (g_a, n, chunk, num_chunks, buf, ilo, ihi, jlo, jhi, .true.)
346
& (g_a, n, chunk, num_chunks, buf, ilo, ihi, jlo, jhi,.false.)
352
subroutine TestPutGetAcc
353
& (g_a, n, chunk, num_chunks, buf, ilo, ihi, jlo,jhi, local)
356
#include "testutil.fh"
358
integer num_chunks, chunk(num_chunks)
359
integer n, ilo, ihi, jlo,jhi,g_a
360
double precision buf(*), tg, tp, ta
361
double precision time_acc, time_get, time_put
365
integer loop, jump, count, bytes
371
write(6,'(26X, 25HLocal 2-D Array Section )')
373
write(6,'(26X, 25HRemote 2-D Array Section )')
376
write(6,*)' section get put',
378
write(6,*)' bytes dim sec MB/s sec MB/s',
384
do loop = 1, num_chunks
385
bytes = util_mdtob(1)*chunk(loop)*chunk(loop) !how much data is accessed
386
jump = n/(60*loop) ! jump distance between consecutive patches
387
if(loop.eq.num_chunks)jump=0
389
c everybody touches own data
390
call ga_fill_patch(g_a, 1, n, 1, n , 1d0*me*loop)
392
tg=time_get(g_a,ilo,ihi,jlo,jhi,buf,chunk(loop),jump,count,
398
c everybody touches own data
399
call ga_fill_patch(g_a, 1, n, 1, n , 1d0*me*loop)
401
tp=time_put(g_a,ilo,ihi,jlo,jhi,buf,chunk(loop),jump,count,
407
c everybody touches own data
408
call ga_fill_patch(g_a, 1, n, 1, n , 1d0*me*loop)
410
ta=time_acc(g_a,ilo,ihi,jlo,jhi,buf,chunk(loop),jump,count,
417
write(6,77)bytes, chunk(loop), tg,
418
& 1d-6*bytes/tg,tp, 1d-6*bytes/tp, ta, 1d-6*bytes/ta
423
77 format(i7, i7,1x, 3(1x,d8.3,1x,d8.3))
428
double precision function
429
& time_acc(g_a, is, ie, js, je, buf, chunk, jump, count, local)
433
#include "testutil.fh"
435
integer g_a, chunk, jump, count, is, js, ie, je
437
integer rows, cols, indx, shifti(3), shiftj(3)
439
integer ilo, ihi, jlo, jhi
440
double precision seconds, buf
452
seconds = util_timer()
454
c distance between consecutive patches increased by jump
455
c to destroy locality of reference
456
do ilo = is, ie -chunk-jump +1, chunk+jump
458
do jlo = js, je -chunk-jump +1, chunk+jump
462
call ga_acc(g_a, ilo, ihi, jlo, jhi, buf, chunk, 1d0)
464
indx = Mod(count,3) + 1
465
call ga_acc(g_a, ilo+shifti(indx), ihi+shifti(indx),
466
$ jlo+shiftj(indx), jhi+shiftj(indx),
471
seconds = util_timer() - seconds
473
time_acc = seconds/count
477
double precision function
478
& time_get(g_a, is, ie, js, je, buf, chunk, jump, count, local)
482
#include "testutil.fh"
484
integer g_a, chunk, jump, count, is, js, ie, je
485
integer rows, cols, indx, shifti(3), shiftj(3)
488
integer ilo, ihi, jlo, jhi
489
double precision seconds, buf
501
seconds = util_timer()
503
c distance between consecutive patches increased by jump
504
c to destroy locality of reference
505
do ilo = is, ie -chunk-jump +1, chunk+jump
507
do jlo = js, je -chunk-jump +1, chunk+jump
511
call ga_get(g_a, ilo, ihi, jlo, jhi, buf, chunk)
513
indx = Mod(count,3) + 1
514
call ga_get(g_a, ilo+shifti(indx), ihi+shifti(indx),
515
$ jlo+shiftj(indx), jhi+shiftj(indx),
520
seconds = util_timer() - seconds
522
time_get = seconds/count
527
double precision function
528
& time_put(g_a, is, ie, js, je, buf, chunk, jump, count, local)
532
#include "testutil.fh"
534
integer g_a, chunk, jump, count, is, js, ie, je
535
integer rows, cols, indx, shifti(3), shiftj(3)
538
integer ilo, ihi, jlo, jhi
539
double precision seconds, buf
551
seconds = util_timer()
553
c distance between consecutive patches increased by jump
554
c to destroy locality of reference
555
do ilo = is, ie -chunk-jump +1, chunk+jump
557
do jlo = js, je -chunk-jump +1, chunk+jump
561
call ga_put(g_a, ilo, ihi, jlo, jhi, buf, chunk)
563
indx = Mod(count,3) + 1
564
call ga_put(g_a, ilo+shifti(indx), ihi+shifti(indx),
565
$ jlo+shiftj(indx), jhi+shiftj(indx),
570
seconds = util_timer() - seconds
572
time_put = seconds/count
576
subroutine my_sleep(seconds)
579
#include "testutil.fh"
581
double precision ts, te, work
582
integer seconds, loop
583
common /sleep_block/ work
592
if(te - ts .lt. real(seconds)) goto 100
593
* print *, work, ts, te