4
c vector boxes lack arithmetic precision
14
#define MISMATCH(x,y) abs(x-y)/max(1d0,abs(x)).gt.THRESH
15
#define MISMATCHF(x,y) abs(x-y)/max(1.0,abs(x)).gt.THRESHF
20
#include "mafdecls.fh"
22
#include "testutil.fh"
23
integer heap, stack, fudge, ma_heap, me
24
integer nmax, DIM, nwidth, MAXPROC, nloop
25
parameter (nmax = 4, DIM = 2, nwidth = 2, MAXPROC = 2000)
27
integer ndim, nproc, pdims(7), type, dcnt, g_a, maxval
28
integer i, j, k, dims(7), width(7), map(2*nmax)
29
integer lo(7), hi(7), ld(7)
30
integer lo2(7), hi2(7), ld2(7)
31
integer dims3(7), ld3(7), index3, chunk(7)
32
integer a(nmax, nmax), b(nmax+2*nwidth,nmax+2*nwidth)
33
double precision start,finish,start1,finish1,t1,t2,t3,t4,t5,tmp
34
double precision t6,t7
35
logical status, safe_put, safe_get, has_data(0:MAXPROC-1)
36
parameter (heap=60*60*4, fudge=100, stack=100*100)
38
c*** Intitialize a message passing library
44
c There are 2 choices: ga_initialize or ga_initialize_ltd.
45
c In the first case, there is no explicit limit on memory usage.
46
c In the second, user can set limit (per processor) in bytes.
52
c we can also use GA_set_memory_limit BEFORE first ga_create call
54
ma_heap = heap + fudge
55
call GA_set_memory_limit(util_mdtob(ma_heap))
57
if(ga_nodeid().eq.0)then
58
print *,' GA initialized '
62
c*** Initialize the MA package
63
c MA must be initialized before any global array is allocated
65
status = ma_init(MT_DCPL, stack, ma_heap)
66
if (.not. status) call ga_error('ma_init failed',-1)
69
print *, 'using ', nproc, ' process(es)'
73
c Test ghost distributions
77
c Create irregular distribution on all nodes
79
call factor(nproc,ndim,pdims)
80
dims(1) = pdims(1) * nmax
81
dims(2) = pdims(2) * nmax
84
maxval = dims(i)*maxval
90
map(dcnt) = (i-1)*nmax + 1
94
map(dcnt) = (i-1)*nmax + 1
101
if (pdims(i).gt.dims(i)) pdims(i) = dims(i)
103
write(6,*) 'Value of pdims(',i,') is ',pdims(i)
110
write(6,'("map(",i2,") = ",i5)') i,map(i)
116
status = nga_create_ghosts_irreg (type, ndim, dims, width,
117
+ "test_array", map, pdims, g_a)
118
if (status.and.me.eq.0) then
120
write(6,*) '* Global array creation was successful'
122
elseif (.not.status) then
123
write(6,*) 'Global array creation failure on ',me
126
c Find processors that have data
130
call nga_distribution(g_a, i, lo, hi)
133
if (lo(j).eq.0.and.hi(j).eq.-1) has_data(i) = .false.
137
write(6,*) '* Distribution on processor ',i
139
write(6,110) lo(1), hi(1)
140
write(6,110) lo(2), hi(2)
150
call nga_distribution(g_a, me, lo, hi)
151
do i = 1, hi(1) - lo(1) + 1
152
do j = 1, hi(2) - lo(2) + 1
153
a(i,j) = (i + lo(1) - 2)*dims(1) + (j + lo(2) - 2) + 1
158
if (hi(i).lt.lo(i)) safe_put = .false.
160
if (has_data(me).and.safe_put) call nga_put(g_a, lo, hi, a, ld)
162
c print out values of a
166
if (k.eq.me.and.has_data(me).and.maxval.lt.10000) then
168
write(6,*) 'Initial data on processor ',k
170
do i = 1, min(hi(1)-lo(1)+1,10)
171
write (6,101) (a(i,j),j=1,min(hi(2)-lo(2)+1,10))
183
status = nga_update_ghost_dir(g_a,1,1,.true.)
184
status = nga_update_ghost_dir(g_a,1,-1,.true.)
185
status = nga_update_ghost_dir(g_a,2,1,.true.)
186
status = nga_update_ghost_dir(g_a,2,-1,.true.)
187
finish = util_timer()
188
t1 = t1 + finish - start
194
write(6,*) '* Completed update successfully'
199
c get patch with ghost cells
202
lo2(i) = lo(i) - width(i)
203
hi2(i) = hi(i) + width(i)
204
ld2(i) = ld(i) + 2*width(i)
211
write(6,*) 'ghost patch dimensions on processor ',i
214
write(6,*) 'lo(',j,') = ',lo2(j)
215
write(6,*) 'hi(',j,') = ',hi2(j)
216
write(6,*) 'ld(',j,') = ',ld2(j)
230
start1 = util_timer()
231
if (has_data(me).and.safe_get)
232
+ call nga_periodic_get(g_a, lo2, hi2, b, ld2)
233
finish1 = util_timer()
235
finish = util_timer()
236
t2 = t2 + finish1 - start1
237
t3 = t3 + finish - start
242
if (me.eq.0.and.maxval.lt.10000) then
244
write(6,*) '* Write out contents of local patch using'
245
write(6,*) '* nga_periodic_get'
251
if (me.eq.k.and.has_data(me).and.maxval.lt.10000) then
253
write(6,*) '* Data on processor ',k
255
do i = 1, min(hi2(1)-lo2(1)+1,12)
256
write (6,102) (b(i,j),j=1,min(hi2(2)-lo2(2)+1,12))
264
write(6,*) '* Performing nga_access_ghosts'
268
if (has_data(me)) call nga_access_ghosts(g_a, dims3,
272
+ call aprint(int_mb(index3),dims3(1),dims3(2),ld3,has_data)
273
call atest(int_mb(index3),dims3(1),dims3(2),ld3,b,
274
+ nmax+2*nwidth,has_data)
277
call ga_dgop(1,tmp,1,'max')
279
write(6,*) 'Maximum time for nga_update_ghosts ',tmp
282
call ga_dgop(2,tmp,1,'min')
284
write(6,*) 'Minimum time for nga_update_ghosts ',tmp
287
call ga_dgop(3,tmp,1,'+')
289
write(6,*) 'Average time for nga_update_ghosts ',tmp/dble(nproc)
292
call ga_dgop(4,tmp,1,'max')
294
write(6,*) 'Maximum time for nga_periodic_get ',tmp
297
call ga_dgop(5,tmp,1,'min')
299
write(6,*) 'Minimum time for nga_periodic_get ',tmp
302
call ga_dgop(6,tmp,1,'+')
304
write(6,*) 'Average time for nga_periodic_get ',tmp/dble(nproc)
307
call ga_dgop(4,tmp,1,'max')
309
write(6,*) 'Maximum time for (sync)nga_periodic_get ',tmp
312
call ga_dgop(5,tmp,1,'min')
314
write(6,*) 'Minimum time for (sync)nga_periodic_get ',tmp
317
call ga_dgop(6,tmp,1,'+')
319
write(6,*) 'Average time for (sync)nga_periodic_get ',
328
write(6,*) 'All tests successful'
332
c*** Tidy up the GA package
336
c*** Tidy up after message-passing library
344
subroutine aprint(a,nrow,ncol,ld,has_data)
348
integer i, j, k, nproc
349
logical has_data(0:1999)
354
if (k-1.eq.ga_nodeid().and.has_data(k-1)) then
356
write(6,*) '* Data on processor ',k-1
358
do i = 1, min(nrow,12)
359
write (6,102) (a(i,j), j = 1, min(ncol,12))
369
subroutine atest(a,nrow,ncol,ld,b,ld2,has_data)
372
integer a(ld,*), b(ld2,*)
373
integer i, j, k, nproc
374
logical has_data(0:1999), check_data
380
if (k-1.eq.ga_nodeid().and.has_data(k-1)) then
383
if (a(i,j).ne.b(i,j)) check_data = .false.
388
write(6,*) '* Data from nga_access_ghosts and'
389
write(6,*) '* nga_periodic_get is the same on'
390
write(6,*) '* processor ',k-1
394
write(6,*) '* Data from nga_access_ghosts and'
395
write(6,*) '* nga_periodic_get is NOT the same on'
396
write(6,*) '* processor ',k-1
406
subroutine factor(p,ndim,dims)
408
integer i,j,p,ndim,dims(7),imin,mdim
409
integer ip,ifac,pmax,prime(1000)
418
c factor p completely
419
c first, find all prime numbers less than or equal to p
424
if (mod(i,prime(j)).eq.0) go to 100
431
c find all prime factors of p
435
200 if (mod(ip,prime(i)).eq.0) then
443
c determine dimensions of processor grid
447
c find dimension with minimum value
452
if (dims(j).lt.imin) then
457
dims(mdim) = dims(mdim)*fac(i)