4
C $iD: PATch.F,v 1.21 2002/10/15 23:35:52 vinod Exp $
5
#if (defined(CRAY) && !defined(__crayx1)) || defined(KSR)
9
# define xgemm TEST_DGEMM
10
# define ygemm TEST_ZGEMM
12
#if defined(FUJITSU) || defined(CRAY_YMP)
13
# define THRESH 1.0d-10
15
# define THRESH 1.0d-20
17
#define MISMATCH(x,y) abs(x-y)/max(1,abs(x)).gt.THRESH
20
c#define USE_SCALAPACK_DISTR
21
#ifdef USE_SCALAPACK_DISTR
27
#include "mafdecls.fh"
31
c*** Initialize a message passing library
36
if(ga_nodeid().eq.0)then
37
print *,' GA initialized'
41
status = ma_init(MT_DBL, 500000, 900000/ga_nnodes())
42
if (.not. status)call ga_error( 'ma_init failed', -1)
43
if(ga_nodeid().eq.0)then
45
print *,'CHECKING PATCH OPERATIONS FOR DOUBLES '
50
if(ga_nodeid().eq.0)then
52
print *,'CHECKING PATCH OPERATIONS FOR DOUBLE COMPLEX'
57
if(ga_nodeid().eq.0)then
59
print *,'CHECKING PATCH OPERATIONS FOR SINGLE PRECISION'
64
if(ga_nodeid().eq.0) print *,'All tests successful '
73
subroutine dpatch_test()
75
#include "mafdecls.fh"
77
#include "testutil.fh"
82
double precision a(n,n), b(n,n), c(n,n), buf(m), val
83
double precision alpha, beta
85
integer i, j, ailo, ajlo, bilo, bjlo, base, iran
86
integer aihi, ajhi, bihi, bjhi
88
integer rows, cols, loop
89
logical status, dist_same
92
integer block_size(2), proc_grid(2)
94
iran(i) = int(drand(1)*real(i)) + 1
102
#ifdef USE_SCALAPACK_DISTR
103
if (mod(nproc,2).ne.0)
104
+ call ga_error("Available procs must be divisible by 2",0)
106
proc_grid(2) = nproc/2
113
a(i,j) = i-1 + (j-1)*n
118
c*** Create a global array
121
status = ga_create(MT_DBL, n, n, 'a', 0, 0, g_a)
123
g_a = ga_create_handle()
127
call ga_set_data(g_a,ndim,dims,MT_DBL)
128
call ga_set_array_name(g_a,'a')
129
#ifdef USE_SCALAPACK_DISTR
130
call ga_set_block_cyclic_proc_grid(g_a,block_size,proc_grid)
132
call ga_set_block_cyclic(g_a,block_size)
134
status = ga_allocate(g_a)
136
if (.not. status) then
137
write(6,*) ' ga_create failed'
139
call ga_error('... exiting ',0)
143
status = ga_duplicate(g_a, g_b, 'a_duplicated')
144
if(.not.ga_compare_distr(g_a, g_b))
145
$ call ga_error("g_b distribution different",0)
146
status = ga_duplicate(g_a, g_c, 'a_duplicated_again')
147
if(.not.ga_compare_distr(g_a, g_c))
148
$ call ga_error("g_c distribution different",0)
151
status = ga_create(MT_DBL, n, n, 'b', 0, n, g_b)
153
g_b = ga_create_handle()
157
call ga_set_data(g_b,ndim,dims,MT_DBL)
158
call ga_set_array_name(g_b,'b')
159
#ifdef USE_SCALAPACK_DISTR
160
call ga_set_block_cyclic_proc_grid(g_b,block_size,proc_grid)
162
call ga_set_block_cyclic(g_b,block_size)
164
status = ga_allocate(g_b)
166
if (.not. status) call ga_error('ga_create failed:b',0)
168
status = ga_create(MT_DBL, n, n, 'c', n, 0, g_c)
170
g_c = ga_create_handle()
174
call ga_set_data(g_c,ndim,dims,MT_DBL)
175
call ga_set_array_name(g_c,'c')
176
#ifdef USE_SCALAPACK_DISTR
177
call ga_set_block_cyclic_proc_grid(g_c,block_size,proc_grid)
179
call ga_set_block_cyclic(g_c,block_size)
181
status = ga_allocate(g_c)
183
if (.not. status) call ga_error('ga_create failed:c',0)
189
write(6,*)'> Checking ga_fill_patch ... '
194
call ga_fill_patch(g_a, 2,n/2, 2,n, val)
195
* call ga_print(g_a, 1)
196
do j = 2+me, n, nproc
197
call ga_get(g_a, 1,n/2, j,j, buf,n/2)
199
if(buf(i) .ne.val ) then
200
print *,me, ' error ',i,j, buf(i),val
201
call ga_error('exiting ...',0)
212
do j = 1+me, n, nproc
213
call ga_put(g_a,1,n,j,j,a(1,j),n)
218
write(6,*)'> Checking ga_copy_patch ... '
225
aihi = min(n, -1+ailo+n/2)
226
ajhi = min(n, -1+ajlo+n/4)
232
bihi = bilo + rows -1
233
bjhi = bjlo + cols -1
235
write(6,'(2x,1h[,4i4,1h],5h --> ,1h[,4i4,1h])')
236
$ ailo,aihi,ajlo,ajhi,
237
$ bilo,bihi, bjlo, bjhi
243
call ga_copy_patch('n', g_a, ailo, aihi, ajlo, ajhi,
244
& g_b, bilo, bihi, bjlo, bjhi)
245
c call ga_print(g_a,1)
246
c call ga_print(g_b,1)
247
call ga_get(g_b,bilo,bihi, bjlo, bjhi, buf, rows)
250
if(Mod(j,nproc).eq.me) then
253
if(buf(base) .ne. a(i,j)) then
254
print *,me, ' error ',i,j, buf(base), a(i,j)
255
call ga_error('exiting ...',0)
271
write(6,*)' without transpose: OK '
276
write(6,'(2x,1h[,4i4,1h],5h --> ,1h[,4i4,1h])')
277
$ ailo,aihi,ajlo,ajhi,
278
$ bilo,bihi, bjlo, bjhi
281
call ga_copy_patch('n', g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
282
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
283
* call ga_print_patch(g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,1)
284
* call ga_print(g_b, 1)
285
call ga_get(g_b,bilo,bilo+n/3,bjlo, bjlo+n/2,buf,n/3+1)
287
do j = ajlo, ajlo+n/3
288
if(Mod(j,nproc).eq.me) then
289
do i = ailo, ailo+n/2
291
if(buf(base) .ne. a(i,j)) then
292
print *,me, ' error ',i,j, buf(base), a(i,j)
293
call ga_error('exiting ...',0)
303
write(6,*)' without transpose: OK '
308
write(6,'(2x,1h[,4i4,2h]~,5h --> ,1h[,4i4,1h])')
309
$ ailo,aihi,ajlo,ajhi,
310
$ bilo,bihi, bjlo, bjhi
312
call ga_copy_patch('t', g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
313
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
314
* call ga_print_patch(g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,1)
315
* call ga_print(g_b, 1)
316
call ga_get(g_b,bilo,bilo+n/3,bjlo, bjlo+n/2,buf,n/3+1)
318
do i = ailo, ailo+n/2
319
if(Mod(j,nproc).eq.me) then
320
do j = ajlo, ajlo+n/3
322
if(buf(base) .ne. a(i,j)) then
323
print *,me, ' error ',i,j, buf(base), a(i,j)
324
call ga_error('exiting ...',0)
334
write(6,*)' transposed: OK '
342
write(6,*)'> Checking ga_scale_patch ... '
345
call ga_copy_patch('n', g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
346
$ g_b, bilo,bilo+n/2, bjlo, bjlo+n/3)
348
call ga_scale_patch(g_b, bilo,bilo+n/2,bjlo, bjlo+n/3,val)
349
call ga_get(g_b,bilo,bilo+n/2,bjlo, bjlo+n/3,buf,n/2+1)
351
do j = ajlo, ajlo+n/3
352
if(Mod(j,nproc).eq.me) then
353
do i = ailo, ailo+n/2
355
if(buf(base) .ne. a(i,j)*val) then
356
print *,me, ' error ',i,j, buf(base), a(i,j)*val
357
call ga_error('exiting ...',0)
374
write(6,*)'> Checking ga_add_patch ... '
380
call ga_add_patch(alpha, g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
381
$ beta, g_b, bilo,bilo+n/2, bjlo, bjlo+n/3,
382
$ g_c, bilo,bilo+n/2, bjlo, bjlo+n/3)
383
call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/3,buf,n/2+1)
385
val = val*beta + alpha
386
do j = ajlo, ajlo+n/3
387
if(Mod(j,nproc).eq.me) then
388
do i = ailo, ailo+n/2
390
if(ABS(buf(base)- a(i,j)*val).gt.1d-5) then
391
print *,me, ' error ',i,j, buf(base), a(i,j)*val
392
call ga_error('exiting ...',0)
409
write(6,*)'> Checking ga_ddot_patch ... '
412
alpha= ga_ddot_patch(g_a,'n', ailo,ailo+n/2, ajlo, ajlo+n/3,
413
$ g_c,'n', bilo,bilo+n/2, bjlo, bjlo+n/3)
415
do j = ajlo, ajlo+n/3
416
do i = ailo, ailo+n/2
417
beta = beta + a(i,j)**2
421
if(ABS(beta*val- alpha).gt.1d-6*alpha) then
422
print *,me, ' error ', beta*val, alpha
423
call ga_error('exiting ...',0)
432
c......................................................
435
write(6,*)'> Checking ga_matmul_patch ... '
438
do j = 1+me, n, nproc
439
call ga_put(g_b,1,n,j,j,b(1,j),n)
442
call ga_matmul_patch('n','n', 1d0, 0d0,
443
$ g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
444
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2,
445
$ g_c, bilo,bilo+n/2, bjlo, bjlo+n/2)
446
call xgemm('n','n',n/2+1,n/2+1,n/3+1,1d0,a(ailo,ajlo), n,
447
$ b(bilo,bjlo),n, 0d0, c, n)
448
* call ga_print_patch(g_a, ailo,ailo+n/2, ajlo, ajlo+n/3)
449
* call ga_print_patch(g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
450
* call ga_print(g_c, 1)
451
call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/2,buf,n/2+1)
454
if(Mod(j,nproc).eq.me) then
457
if(ABS(buf(base)- c(i,j)).gt.1d-8) then
458
print *,me, ' error ',i,j, buf(base), c(i,j)
459
call ga_error('exiting ...',0)
469
write(6,*)' a*b: OK '
474
call ga_matmul_patch('t','n', 1d0, 0d0,
475
$ g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
476
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2,
477
$ g_c, bilo,bilo+n/2, bjlo, bjlo+n/2)
478
call xgemm('t','n',n/2+1,n/2+1,n/3+1,1d0,a(ajlo,ailo), n,
479
$ b(bilo,bjlo),n, 0d0, c, n)
480
* call ga_print(g_a,1)
481
* call ga_print_patch(g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
482
* call ga_print(g_c, 1)
483
call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/2,buf,n/2+1)
486
if(Mod(j,nproc).eq.me) then
489
if(ABS(buf(base)- c(i,j)).gt.1d-8) then
490
print *,me, ' error ',i,j, buf(base), c(i,j)
491
call ga_error('exiting ...',0)
501
write(6,*)' trans(a)*b: OK '
506
call ga_matmul_patch('n','t', 1d0, 0d0,
507
$ g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
508
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2,
509
$ g_c, bilo,bilo+n/2, bjlo, bjlo+n/2)
510
call xgemm('n','t',n/2+1,n/2+1,n/3+1,1d0,a(ailo,ajlo), n,
511
$ b(bjlo,bilo),n, 0d0, c, n)
512
* call ga_print(g_a,1)
513
* call ga_print_patch(g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
514
* call ga_print(g_c, 1)
515
call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/2,buf,n/2+1)
518
if(Mod(j,nproc).eq.me) then
521
if(ABS(buf(base)- c(i,j)).gt.1d-8) then
522
print *,me, ' error ',i,j, buf(base), c(i,j)
523
call ga_error('exiting ...',0)
533
write(6,*)' a*trans(b): OK '
538
call ga_matmul_patch('t','t', 1d0, 0d0,
539
$ g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
540
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2,
541
$ g_c, bilo,bilo+n/2, bjlo, bjlo+n/2)
542
call xgemm('t','t',n/2+1,n/2+1,n/3+1,1d0,a(ajlo,ailo), n,
543
$ b(bjlo,bilo),n, 0d0, c, n)
544
* call ga_print(g_a,1)
545
* call ga_print_patch(g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
546
* call ga_print(g_c, 1)
547
call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/2,buf,n/2+1)
550
if(Mod(j,nproc).eq.me) then
553
if(ABS(buf(base)- c(i,j)).gt.1d-8) then
554
print *,me, ' error ',i,j, buf(base), c(i,j)
555
call ga_error('exiting ...',0)
565
write(6,*)' trans(a)*trans(b): OK '
568
status = ga_destroy(g_a)
569
status = status .and. ga_destroy(g_b)
570
status = status .and. ga_destroy(g_c)
571
if(.not. status) print *, 'ga_destroy failed'
579
subroutine zpatch_test()
581
#include "mafdecls.fh"
583
#include "testutil.fh"
588
double complex a(n,n), b(n,n), c(n,n), buf(m), val
589
double complex alpha, beta
591
integer i, j, ailo, ajlo, bilo, bjlo, base, iran
592
integer aihi, ajhi, bihi, bjhi
593
integer g_a, g_b, g_c
594
integer rows, cols, loop
595
logical status, dist_same
597
integer ndim, dims(2)
598
integer block_size(2), proc_grid(2)
600
iran(i) = int(drand(1)*real(i)) + 1
608
#ifdef USE_SCALAPACK_DISTR
609
if (mod(nproc,2).ne.0)
610
+ call ga_error("Available procs must be divisible by 2",0)
612
proc_grid(2) = nproc/2
618
a(i,j) = cmplx(dble(i-1), dble((j-1)*n))
619
b(i,j) = cmplx(dble(i+j),1d0)
623
c*** Create a global array
626
status = ga_create(MT_DCPL, n, n, 'a', 0, 0, g_a)
628
g_a = ga_create_handle()
632
call ga_set_data(g_a,ndim,dims,MT_DCPL)
633
call ga_set_array_name(g_a,'a')
634
#ifdef USE_SCALAPACK_DISTR
635
call ga_set_block_cyclic_proc_grid(g_a,block_size,proc_grid)
637
call ga_set_block_cyclic(g_a,block_size)
639
status = ga_allocate(g_a)
641
if (.not. status) then
642
write(6,*) ' ga_create failed'
644
call ga_error('... exiting ',0)
648
status = ga_duplicate(g_a, g_b, 'a_duplicated')
649
if(.not.ga_compare_distr(g_a, g_b))
650
$ call ga_error("g_b distribution different",0)
651
status = ga_duplicate(g_a, g_c, 'a_duplicated_again')
652
if(.not.ga_compare_distr(g_a, g_c))
653
$ call ga_error("g_c distribution different",0)
656
status = ga_create(MT_DCPL, n, n, 'b', 0, n, g_b)
658
g_b = ga_create_handle()
662
call ga_set_data(g_b,ndim,dims,MT_DCPL)
663
call ga_set_array_name(g_b,'b')
664
#ifdef USE_SCALAPACK_DISTR
665
call ga_set_block_cyclic_proc_grid(g_b,block_size,proc_grid)
667
call ga_set_block_cyclic(g_b,block_size)
669
status = ga_allocate(g_b)
671
if (.not. status) call ga_error('ga_create failed:b',0)
673
status = ga_create(MT_DCPL, n, n, 'c', n, 0, g_c)
675
g_c = ga_create_handle()
679
call ga_set_data(g_c,ndim,dims,MT_DCPL)
680
call ga_set_array_name(g_c,'c')
681
#ifdef USE_SCALAPACK_DISTR
682
call ga_set_block_cyclic_proc_grid(g_c,block_size,proc_grid)
684
call ga_set_block_cyclic(g_c,block_size)
686
status = ga_allocate(g_c)
688
if (.not. status) call ga_error('ga_create failed:c',0)
694
write(6,*)'> Checking ga_fill_patch ... '
699
call ga_fill_patch(g_a, 2,n/2, 2,n, val)
700
* call ga_print(g_a, 1)
701
do j = 2+me, n, nproc
702
call ga_get(g_a, 1,n/2, j,j, buf,n/2)
704
if(buf(i) .ne.val ) then
705
print *,me, ' error ',i,j, buf(i),val
706
call ga_error('exiting ...',0)
717
do j = 1+me, n, nproc
718
call ga_put(g_a,1,n,j,j,a(1,j),n)
723
write(6,*)'> Checking ga_copy_patch ... '
730
aihi = min(n, -1+ailo+n/2)
731
ajhi = min(n, -1+ajlo+n/4)
737
bihi = bilo + rows -1
738
bjhi = bjlo + cols -1
740
write(6,'(2x,1h[,4i4,1h],5h --> ,1h[,4i4,1h])')
741
$ ailo,aihi,ajlo,ajhi,
742
$ bilo,bihi, bjlo, bjhi
748
call ga_copy_patch('n', g_a, ailo, aihi, ajlo, ajhi,
749
& g_b, bilo, bihi, bjlo, bjhi)
750
c call ga_print(g_a,1)
751
c call ga_print(g_b,1)
752
call ga_get(g_b,bilo,bihi, bjlo, bjhi, buf, rows)
755
if(Mod(j,nproc).eq.me) then
758
if(buf(base) .ne. a(i,j)) then
759
print *,me, ' error ',i,j, buf(base), a(i,j)
760
call ga_error('exiting ...',0)
776
write(6,*)' without transpose: OK '
781
write(6,'(2x,1h[,4i4,1h],5h --> ,1h[,4i4,1h])')
782
$ ailo,aihi,ajlo,ajhi,
783
$ bilo,bihi, bjlo, bjhi
786
call ga_copy_patch('n', g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
787
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
788
* call ga_print_patch(g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,1)
789
* call ga_print(g_b, 1)
790
call ga_get(g_b,bilo,bilo+n/3,bjlo, bjlo+n/2,buf,n/3+1)
792
do j = ajlo, ajlo+n/3
793
if(Mod(j,nproc).eq.me) then
794
do i = ailo, ailo+n/2
796
if(buf(base) .ne. a(i,j)) then
797
print *,me, ' error ',i,j, buf(base), a(i,j)
798
call ga_error('exiting ...',0)
808
write(6,*)' without transpose: OK '
813
write(6,'(2x,1h[,4i4,2h]~,5h --> ,1h[,4i4,1h])')
814
$ ailo,aihi,ajlo,ajhi,
815
$ bilo,bihi, bjlo, bjhi
817
call ga_copy_patch('t', g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
818
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
819
* call ga_print_patch(g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,1)
820
* call ga_print(g_b, 1)
821
call ga_get(g_b,bilo,bilo+n/3,bjlo, bjlo+n/2,buf,n/3+1)
823
do i = ailo, ailo+n/2
824
if(Mod(j,nproc).eq.me) then
825
do j = ajlo, ajlo+n/3
827
if(buf(base) .ne. a(i,j)) then
828
print *,me, ' error ',i,j, buf(base), a(i,j)
829
call ga_error('exiting ...',0)
839
write(6,*)' transposed: OK '
847
write(6,*)'> Checking ga_scale_patch ... '
850
call ga_copy_patch('n', g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
851
$ g_b, bilo,bilo+n/2, bjlo, bjlo+n/3)
853
call ga_scale_patch(g_b, bilo,bilo+n/2,bjlo, bjlo+n/3,val)
854
call ga_get(g_b,bilo,bilo+n/2,bjlo, bjlo+n/3,buf,n/2+1)
856
do j = ajlo, ajlo+n/3
857
if(Mod(j,nproc).eq.me) then
858
do i = ailo, ailo+n/2
860
if(buf(base) .ne. a(i,j)*val) then
861
print *,me, ' error ',i,j, buf(base), a(i,j)*val
862
call ga_error('exiting ...',0)
879
write(6,*)'> Checking ga_add_patch ... '
885
call ga_add_patch(alpha, g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
886
$ beta, g_b, bilo,bilo+n/2, bjlo, bjlo+n/3,
887
$ g_c, bilo,bilo+n/2, bjlo, bjlo+n/3)
888
call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/3,buf,n/2+1)
890
val = val*beta + alpha
891
do j = ajlo, ajlo+n/3
892
if(Mod(j,nproc).eq.me) then
893
do i = ailo, ailo+n/2
895
if(ABS(buf(base)- a(i,j)*val).gt.1d-5) then
896
print *,me, ' error ',i,j, buf(base), a(i,j)*val
897
call ga_error('exiting ...',0)
914
write(6,*)'> Checking ga_zdot_patch ... '
917
alpha= ga_zdot_patch(g_a,'n', ailo,ailo+n/2, ajlo, ajlo+n/3,
918
$ g_c,'n', bilo,bilo+n/2, bjlo, bjlo+n/3)
920
do j = ajlo, ajlo+n/3
921
do i = ailo, ailo+n/2
922
beta = beta + a(i,j)*a(i,j)
925
if(ABS(beta*val- alpha)/abs(alpha).gt.1d-6) then
926
print *,me, ' error ', beta*val, alpha
927
call ga_error('exiting ...',0)
936
c......................................................
939
write(6,*)'> Checking ga_matmul_patch ... '
942
do j = 1+me, n, nproc
943
call ga_put(g_b,1,n,j,j,b(1,j),n)
946
call ga_matmul_patch('n','n', (1d0,0d0), (0d0,0d0),
947
$ g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
948
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2,
949
$ g_c, bilo,bilo+n/2, bjlo, bjlo+n/2)
950
call ygemm('n','n',n/2+1,n/2+1,n/3+1,(1d0,0d0),
952
$ b(bilo,bjlo),n, (0d0,0d0), c, n)
953
* call ga_print_patch(g_a, ailo,ailo+n/2, ajlo, ajlo+n/3)
954
* call ga_print_patch(g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
955
* call ga_print(g_c, 1)
956
call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/2,buf,n/2+1)
959
if(Mod(j,nproc).eq.me) then
962
if(ABS(buf(base)- c(i,j))/abs(c(i,j)).gt.1d-8) then
963
print *,me, ' error ',i,j, buf(base), c(i,j)
964
call ga_error('exiting ...',0)
974
write(6,*)' a*b: OK '
979
call ga_matmul_patch('t','n', (1d0,0d0), (0d0,0d0),
980
$ g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
981
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2,
982
$ g_c, bilo,bilo+n/2, bjlo, bjlo+n/2)
983
call ygemm('t','n',n/2+1,n/2+1,n/3+1,(1d0,0d0),
985
$ b(bilo,bjlo),n, (0d0,0d0), c, n)
986
* call ga_print(g_a,1)
987
* call ga_print_patch(g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
988
* call ga_print(g_c, 1)
989
call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/2,buf,n/2+1)
992
if(Mod(j,nproc).eq.me) then
995
if(ABS(buf(base)- c(i,j))/abs(c(i,j)).gt.1d-8) then
996
print *,me, ' error ',i,j, buf(base), c(i,j)
997
call ga_error('exiting ...',0)
1007
write(6,*)' trans(a)*b: OK '
1012
call ga_matmul_patch('n','t', (1d0,0d0), (0d0,0d0),
1013
$ g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
1014
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2,
1015
$ g_c, bilo,bilo+n/2, bjlo, bjlo+n/2)
1016
call ygemm('n','t',n/2+1,n/2+1,n/3+1,(1d0,0d0),
1018
$ b(bjlo,bilo),n, (0d0,0d0), c, n)
1019
* call ga_print(g_a,1)
1020
* call ga_print_patch(g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
1021
* call ga_print(g_c, 1)
1022
call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/2,buf,n/2+1)
1025
if(Mod(j,nproc).eq.me) then
1028
if(ABS(buf(base)- c(i,j))/abs(c(i,j)).gt.1d-8) then
1029
print *,me, ' error ',i,j, buf(base), c(i,j)
1030
call ga_error('exiting ...',0)
1040
write(6,*)' a*trans(b): OK '
1045
call ga_matmul_patch('t','t', (1d0,0d0), (0d0,0d0),
1046
$ g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
1047
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2,
1048
$ g_c, bilo,bilo+n/2, bjlo, bjlo+n/2)
1049
call ygemm('t','t',n/2+1,n/2+1,n/3+1,(1d0,0d0),
1051
$ b(bilo,bjlo),n, (0d0,0d0), c, n)
1052
* call ga_print(g_a,1)
1053
* call ga_print_patch(g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
1054
* call ga_print(g_c, 1)
1055
call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/2,buf,n/2+1)
1058
if(Mod(j,nproc).eq.me) then
1061
if(ABS(buf(base)- c(i,j))/abs(c(i,j)).gt.1d-8) then
1062
print *,me, ' error ',i,j, buf(base), c(i,j)
1063
call ga_error('exiting ...',0)
1073
write(6,*)' trans(a)*trans(b): OK '
1076
status = ga_destroy(g_a)
1077
status = status .and. ga_destroy(g_b)
1078
status = status .and. ga_destroy(g_c)
1079
if(.not. status) print *, 'ga_destroy failed'
1085
subroutine spatch_test()
1087
#include "mafdecls.fh"
1088
#include "global.fh"
1089
#include "testutil.fh"
1094
real a(n,n), b(n,n), c(n,n), buf(m), val
1097
integer i, j, ailo, ajlo, bilo, bjlo, base, iran
1098
integer aihi, ajhi, bihi, bjhi
1099
integer g_a, g_b, g_c
1100
integer rows, cols, loop
1101
logical status, dist_same
1103
integer ndim, dims(2)
1104
integer block_size(2), proc_grid(2)
1106
iran(i) = int(drand(1)*real(i)) + 1
1114
#ifdef USE_SCALAPACK_DISTR
1115
if (mod(nproc,2).ne.0)
1116
+ call ga_error("Available procs must be divisible by 2",0)
1118
proc_grid(2) = nproc/2
1124
a(i,j) = i-1 + (j-1)*n
1129
c*** Create a global array
1131
#ifndef BLOCK_CYCLIC
1132
status = ga_create(MT_REAL, n, n, 'a', 0, 0, g_a)
1134
g_a = ga_create_handle()
1138
call ga_set_data(g_a,ndim,dims,MT_REAL)
1139
call ga_set_array_name(g_a,'a')
1140
#ifdef USE_SCALAPACK_DISTR
1141
call ga_set_block_cyclic_proc_grid(g_a,block_size,proc_grid)
1143
call ga_set_block_cyclic(g_a,block_size)
1145
status = ga_allocate(g_a)
1147
if (.not. status) then
1148
write(6,*) ' ga_create failed'
1150
call ga_error('... exiting ',0)
1154
status = ga_duplicate(g_a, g_b, 'a_duplicated')
1155
if(.not.ga_compare_distr(g_a, g_b))
1156
$ call ga_error("g_b distribution different",0)
1157
status = ga_duplicate(g_a, g_c, 'a_duplicated_again')
1158
if(.not.ga_compare_distr(g_a, g_c))
1159
$ call ga_error("g_c distribution different",0)
1161
#ifndef BLOCK_CYCLIC
1162
status = ga_create(MT_REAL, n, n, 'b', 0, n, g_b)
1164
g_b = ga_create_handle()
1168
call ga_set_data(g_b,ndim,dims,MT_REAL)
1169
call ga_set_array_name(g_b,'b')
1170
#ifdef USE_SCALAPACK_DISTR
1171
call ga_set_block_cyclic_proc_grid(g_b,block_size,proc_grid)
1173
call ga_set_block_cyclic(g_b,block_size)
1175
status = ga_allocate(g_b)
1177
if (.not. status) call ga_error('ga_create failed:b',0)
1178
#ifndef BLOCK_CYCLIC
1179
status = ga_create(MT_REAL, n, n, 'c', n, 0, g_c)
1181
g_c = ga_create_handle()
1185
call ga_set_data(g_c,ndim,dims,MT_REAL)
1186
call ga_set_array_name(g_c,'c')
1187
#ifdef USE_SCALAPACK_DISTR
1188
call ga_set_block_cyclic_proc_grid(g_c,block_size,proc_grid)
1190
call ga_set_block_cyclic(g_c,block_size)
1192
status = ga_allocate(g_c)
1194
if (.not. status) call ga_error('ga_create failed:c',0)
1200
write(6,*)'> Checking ga_fill_patch ... '
1205
call ga_fill_patch(g_a, 2,n/2, 2,n, val)
1206
* call ga_print(g_a, 1)
1207
do j = 2+me, n, nproc
1208
call ga_get(g_a, 1,n/2, j,j, buf,n/2)
1210
if(buf(i) .ne.val ) then
1211
print *,me, ' error ',i,j, buf(i),val
1212
call ga_error('exiting ...',0)
1223
do j = 1+me, n, nproc
1224
call ga_put(g_a,1,n,j,j,a(1,j),n)
1229
write(6,*)'> Checking ga_copy_patch ... '
1236
aihi = min(n, -1+ailo+n/2)
1237
ajhi = min(n, -1+ajlo+n/4)
1239
cols = ajhi -ajlo +1
1243
bihi = bilo + rows -1
1244
bjhi = bjlo + cols -1
1246
write(6,'(2x,1h[,4i4,1h],5h --> ,1h[,4i4,1h])')
1247
$ ailo,aihi,ajlo,ajhi,
1248
$ bilo,bihi, bjlo, bjhi
1254
call ga_copy_patch('n', g_a, ailo, aihi, ajlo, ajhi,
1255
& g_b, bilo, bihi, bjlo, bjhi)
1256
c call ga_print(g_a,1)
1257
c call ga_print(g_b,1)
1258
call ga_get(g_b,bilo,bihi, bjlo, bjhi, buf, rows)
1261
if(Mod(j,nproc).eq.me) then
1264
if(buf(base) .ne. a(i,j)) then
1265
print *,me, ' error ',i,j, buf(base), a(i,j)
1266
call ga_error('exiting ...',0)
1282
write(6,*)' without transpose: OK '
1287
write(6,'(2x,1h[,4i4,1h],5h --> ,1h[,4i4,1h])')
1288
$ ailo,aihi,ajlo,ajhi,
1289
$ bilo,bihi, bjlo, bjhi
1292
call ga_copy_patch('n', g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
1293
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
1294
* call ga_print_patch(g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,1)
1295
* call ga_print(g_b, 1)
1296
call ga_get(g_b,bilo,bilo+n/3,bjlo, bjlo+n/2,buf,n/3+1)
1298
do j = ajlo, ajlo+n/3
1299
if(Mod(j,nproc).eq.me) then
1300
do i = ailo, ailo+n/2
1302
if(buf(base) .ne. a(i,j)) then
1303
print *,me, ' error ',i,j, buf(base), a(i,j)
1304
call ga_error('exiting ...',0)
1314
write(6,*)' without transpose: OK '
1319
write(6,'(2x,1h[,4i4,2h]~,5h --> ,1h[,4i4,1h])')
1320
$ ailo,aihi,ajlo,ajhi,
1321
$ bilo,bihi, bjlo, bjhi
1323
call ga_copy_patch('t', g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
1324
$ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
1325
* call ga_print_patch(g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,1)
1326
* call ga_print(g_b, 1)
1327
call ga_get(g_b,bilo,bilo+n/3,bjlo, bjlo+n/2,buf,n/3+1)
1329
do i = ailo, ailo+n/2
1330
if(Mod(j,nproc).eq.me) then
1331
do j = ajlo, ajlo+n/3
1333
if(buf(base) .ne. a(i,j)) then
1334
print *,me, ' error ',i,j, buf(base), a(i,j)
1335
call ga_error('exiting ...',0)
1345
write(6,*)' transposed: OK '
1353
write(6,*)'> Checking ga_scale_patch ... '
1357
call ga_copy_patch('n', g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
1358
$ g_b, bilo,bilo+n/2, bjlo, bjlo+n/3)
1360
call ga_scale_patch(g_b, bilo,bilo+n/2,bjlo, bjlo+n/3,val)
1361
call ga_get(g_b,bilo,bilo+n/2,bjlo, bjlo+n/3,buf,n/2+1)
1363
do j = ajlo, ajlo+n/3
1364
if(Mod(j,nproc).eq.me) then
1365
do i = ailo, ailo+n/2
1367
if(buf(base) .ne. a(i,j)*val) then
1368
print *,me, ' error ',i,j, buf(base), a(i,j)*val
1369
call ga_error('exiting ...',0)
1386
write(6,*)'> Checking ga_add_patch ... '
1393
call ga_add_patch(alpha, g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
1394
$ beta, g_b, bilo,bilo+n/2, bjlo, bjlo+n/3,
1395
$ g_c, bilo,bilo+n/2, bjlo, bjlo+n/3)
1396
call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/3,buf,n/2+1)
1398
val = val*beta + alpha
1399
do j = ajlo, ajlo+n/3
1400
if(Mod(j,nproc).eq.me) then
1401
do i = ailo, ailo+n/2
1403
if(ABS(buf(base)- a(i,j)*val).gt.1e-3) then
1404
print *,me, ' error ',i,j, buf(base), a(i,j)*val
1405
call ga_error('exiting ...',0)
1422
write(6,*)'> Checking ga_sdot_patch ... '
1425
alpha= ga_sdot_patch(g_a,'n', ailo,ailo+n/2, ajlo, ajlo+n/3,
1426
$ g_c,'n', bilo,bilo+n/2, bjlo, bjlo+n/3)
1429
do j = ajlo, ajlo+n/3
1430
do i = ailo, ailo+n/2
1431
beta = beta + a(i,j)**2
1434
if(ABS(beta*val- alpha).gt.(1.0e-2*alpha)) then
1435
print *,me, ' error ', beta*val, alpha
1436
call ga_error('exiting ...',0)
1444
c......................................................
1445
ccc if (me .eq. 0) then
1447
ccc write(6,*)'> Checking ga_matmul_patch ... '
1450
ccc do j = 1+me, n, nproc
1451
ccc call ga_put(g_b,1,n,j,j,b(1,j),n)
1454
ccc call ga_matmul_patch('n','n', 1.0, 0.0,
1455
ccc $ g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
1456
ccc $ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2,
1457
ccc $ g_c, bilo,bilo+n/2, bjlo, bjlo+n/2)
1458
ccc call xgemm('n','n',n/2+1,n/2+1,n/3+1,1d0,a(ailo,ajlo), n,
1459
ccc $ b(bilo,bjlo),n, 0d0, c, n)
1460
ccc* call ga_print_patch(g_a, ailo,ailo+n/2, ajlo, ajlo+n/3)
1461
ccc* call ga_print_patch(g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
1462
ccc* call ga_print(g_c, 1)
1463
ccc call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/2,buf,n/2+1)
1466
ccc if(Mod(j,nproc).eq.me) then
1469
ccc if(ABS(buf(base)- c(i,j)).gt.1e-8) then
1470
ccc print *,me, ' error ',i,j, buf(base), c(i,j)
1471
ccc call ga_error('exiting ...',0)
1475
ccc base = base +n/2+1
1480
ccc if (me .eq. 0) then
1481
ccc write(6,*)' a*b: OK '
1486
ccc call ga_matmul_patch('t','n', 1d0, 0d0,
1487
ccc $ g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
1488
ccc $ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2,
1489
ccc $ g_c, bilo,bilo+n/2, bjlo, bjlo+n/2)
1490
ccc call xgemm('t','n',n/2+1,n/2+1,n/3+1,1.0,a(ajlo,ailo), n,
1491
ccc $ b(bilo,bjlo),n, 0.0, c, n)
1492
ccc* call ga_print(g_a,1)
1493
ccc* call ga_print_patch(g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
1494
ccc* call ga_print(g_c, 1)
1495
ccc call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/2,buf,n/2+1)
1498
ccc if(Mod(j,nproc).eq.me) then
1501
ccc if(ABS(buf(base)- c(i,j)).gt.1e-8) then
1502
ccc print *,me, ' error ',i,j, buf(base), c(i,j)
1503
ccc call ga_error('exiting ...',0)
1507
ccc base = base +n/2+1
1512
ccc if (me .eq. 0) then
1513
ccc write(6,*)' trans(a)*b: OK '
1518
ccc call ga_matmul_patch('n','t', 1.0, 0.0,
1519
ccc $ g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
1520
ccc $ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2,
1521
ccc $ g_c, bilo,bilo+n/2, bjlo, bjlo+n/2)
1522
ccc call xgemm('n','t',n/2+1,n/2+1,n/3+1,1.0,a(ailo,ajlo), n,
1523
ccc $ b(bjlo,bilo),n, 0.0, c, n)
1524
ccc* call ga_print(g_a,1)
1525
ccc* call ga_print_patch(g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
1526
ccc* call ga_print(g_c, 1)
1527
ccc call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/2,buf,n/2+1)
1530
ccc if(Mod(j,nproc).eq.me) then
1533
ccc if(ABS(buf(base)- c(i,j)).gt.1e-8) then
1534
ccc print *,me, ' error ',i,j, buf(base), c(i,j)
1535
ccc call ga_error('exiting ...',0)
1539
ccc base = base +n/2+1
1545
ccc if (me .eq. 0) then
1546
ccc write(6,*)' a*trans(b): OK '
1551
ccc call ga_matmul_patch('t','t', 1d0, 0d0,
1552
ccc $ g_a, ailo,ailo+n/2, ajlo, ajlo+n/3,
1553
ccc $ g_b, bilo,bilo+n/3, bjlo, bjlo+n/2,
1554
ccc $ g_c, bilo,bilo+n/2, bjlo, bjlo+n/2)
1555
ccc call xgemm('t','t',n/2+1,n/2+1,n/3+1,1.0,a(ajlo,ailo), n,
1556
ccc $ b(bjlo,bilo),n, 0.0, c, n)
1557
ccc* call ga_print(g_a,1)
1558
ccc* call ga_print_patch(g_b, bilo,bilo+n/3, bjlo, bjlo+n/2)
1559
ccc* call ga_print(g_c, 1)
1560
ccc call ga_get(g_c,bilo,bilo+n/2,bjlo, bjlo+n/2,buf,n/2+1)
1563
ccc if(Mod(j,nproc).eq.me) then
1566
ccc if(ABS(buf(base)- c(i,j)).gt.1e-8) then
1567
ccc print *,me, ' error ',i,j, buf(base), c(i,j)
1568
ccc call ga_error('exiting ...',0)
1572
ccc base = base +n/2+1
1577
ccc if (me .eq. 0) then
1578
ccc write(6,*)' trans(a)*trans(b): OK '
1581
ccc status = ga_destroy(g_a)
1582
ccc status = status .and. ga_destroy(g_b)
1583
ccc status = status .and. ga_destroy(g_c)
1584
ccc if(.not. status) print *, 'ga_destroy failed'