7
Purpose: test the interfaces:
9
GA_Abs_value_patch(g_a)
10
GA_Add_constant_patch(g_a, alpha)
11
GA_Recip_patch_patch(g_a)
12
GA_Elem_multiply_patch(g_a, alo, ahi, g_b, blo, bhi, g_c, clo, chi)
13
GA_Elem_divide_patch(g_a, alo, ahi, g_b, blo, bhi, g_c, clo, chi)
14
GA_Elem_maximum_patch(g_a, alo, ahi, g_b, blo, bhi, g_c, clo, ch
15
GA_Elem_minimum_patch(g_a, alo, ahi, g_b, blo, bhi, g_c, clo, chi)
17
that are for TAO/Global Array Project
22
Mathematics Department
23
Columbia Basin College
29
Pacific Northwest National Laboratory
32
Revised on February 26, 2002.
49
#include "../src/globalp.h"
52
#define BLOCK_CYCLIC 0
55
#define USE_SCALAPACK 1
58
#ifndef GA_HALF_MAX_INT
59
#define GA_HALF_MAX_INT ((((int)1) << ((int)(8*sizeof(int))-2)) - 1)
63
#define GA_INFINITY_I (GA_HALF_MAX_INT + GA_HALF_MAX_INT + 1)
66
Seemed too small arbitrarily.
67
#define GA_INFINITY_I 100000
71
#ifndef GA_NEGATIVE_INFINITY_I
72
#define GA_NEGATIVE_INFINITY_I (- GA_INFINITY_I)
77
Seemed too small arbitrarily.
78
#define GA_NEGATIVE_INFINITY_I -100000
82
#ifndef GA_HALF_MAX_LONG
83
#define GA_HALF_MAX_LONG ((((long)1) << ((int)(8*sizeof(long))-2)) - 1)
87
#define GA_INFINITY_L (GA_HALF_MAX_LONG + GA_HALF_MAX_LONG + 1)
89
#define GA_INFINITY_L 100000
93
#ifndef GA_NEGATIVE_INFINITY_L
94
#define GA_NEGATIVE_INFINITY_L (- GA_INFINITY_L)
98
#define GA_NEGATIVE_INFINITY_L -100000
102
Modified by Doug Baxter 01/24/04 to distinguish between
103
Double inifinity and float infinity.
105
#define GA_INFINITY 1.0e20
108
#ifndef GA_NEGATIVE_INFINITY
109
#define GA_NEGATIVE_INFINITY -1.0e20
112
#ifndef GA_INFINITY_F
113
#define GA_INFINITY_F 1.0e37
116
Original value below.
117
#define GA_INFINITY_F 1.0e20
119
#ifndef GA_NEGATIVE_INFINITY_F
120
#define GA_NEGATIVE_INFINITY_F -1.0e37
123
Original value below.
124
#define GA_NEGATIVE_INFINITY_F -1.0e20
126
#ifndef GA_INFINITY_D
127
#define GA_INFINITY_D 1.0e307
130
Original value below.
131
#define GA_INFINITY_D 1.0e20
133
#ifndef GA_NEGATIVE_INFINITY_D
134
#define GA_NEGATIVE_INFINITY_D -1.0e307
139
#define MISMATCHED(x,y) GA_ABS((x)-(y))>=THRESH
142
#define BLOCK_SIZE 20
143
#define OP_ELEM_MULT 0
144
#define OP_ELEM_DIV 1
145
#define OP_ELEM_MAX 2
146
#define OP_ELEM_MIN 3
148
#define OP_ADD_CONST 5
150
#define OP_STEP_MAX 7
151
#define OP_STEP_BOUND_INFO 8
154
Integer _ga_lo[MAXDIM], _ga_hi[MAXDIM], _ga_work[MAXDIM];
155
# define COPYINDEX_C2F(carr, farr, n){\
156
int i; for(i=0; i< (n); i++)(farr)[n-i-1]=(Integer)(carr)[i]+1;}
158
void nga_vfill_patch(Integer *g_a, Integer *lo, Integer *hi);
159
void nga_pnfill_patch(Integer *g_a, Integer *lo, Integer *hi);
161
void NGA_Vfill_patch(int g_a, int lo[], int hi[])
163
Integer a=(Integer)g_a;
164
Integer ndim = pnga_ndim(a);
165
COPYINDEX_C2F(lo,_ga_lo, ndim);
166
COPYINDEX_C2F(hi,_ga_hi, ndim);
168
nga_vfill_patch(&a, _ga_lo, _ga_hi);
172
void NGA_Pnfill_patch(int g_a, int lo[], int hi[])
174
Integer a=(Integer)g_a;
175
Integer ndim = pnga_ndim(a);
176
COPYINDEX_C2F(lo,_ga_lo, ndim);
177
COPYINDEX_C2F(hi,_ga_hi, ndim);
179
nga_pnfill_patch(&a, _ga_lo, _ga_hi);
201
fill_func (int nelem, int type, void *buf)
209
for (i = 0; i < nelem; i++)
210
((float *) buf)[i] = (float) ifun (i);
213
for (i = 0; i < nelem; i++)
214
((long *) buf)[i] = (long) ifun (i);
217
for (i = 0; i < nelem; i++)
218
((double *) buf)[i] = (double) ifun (i);
221
for (i = 0; i < 2 * nelem; i++)
222
((double *) buf)[i] = (double) ifun (i);
225
for (i = 0; i < 2 * nelem; i++)
226
((float *) buf)[i] = (float) ifun (i);
229
for (i = 0; i < nelem; i++)
230
((int *) buf)[i] = ifun (i);
233
GA_Error (" wrong data type ", type);
239
fill_func2 (int nelem, int type, void *buf)
241
/* int i,size=MA_sizeof(MT_CHAR,type,1);*/
248
for (i = 0; i < nelem; i++)
249
((float *) buf)[i] = (float) ifun2 (i);
252
for (i = 0; i < nelem; i++)
253
((long *) buf)[i] = (long) ifun2 (i);
256
for (i = 0; i < nelem; i++)
257
((double *) buf)[i] = (double) ifun2 (i);
260
for (i = 0; i < 2 * nelem; i++)
261
((double *) buf)[i] = (double) ifun2 (i);
264
for (i = 0; i < 2 * nelem; i++)
265
((float *) buf)[i] = (float) ifun2 (i);
268
for (i = 0; i < nelem; i++)
269
((int *) buf)[i] = ifun2 (i);
272
GA_Error (" wrong data type ", type);
278
fill_func3 (int nelem, int type, void *buf)
279
/*taking the absolute of the ifun() */
281
/*int i,size=MA_sizeof(MT_CHAR,type,1);*/
288
for (i = 0; i < nelem; i++)
289
((float *) buf)[i] = (float) GA_ABS (ifun (i));
292
for (i = 0; i < nelem; i++)
293
((long *) buf)[i] = (long) GA_ABS (ifun (i));
296
for (i = 0; i < nelem; i++)
297
((double *) buf)[i] = (double) GA_ABS (ifun (i));
300
for (i = 0; i < 2 * nelem - 1; i = i + 2)
302
((double *) buf)[i] =
304
(ifun (i) * ifun (i) + ifun (i + 1) * ifun (i + 1)));
305
((double *) buf)[i + 1] = 0.0;
309
for (i = 0; i < 2 * nelem - 1; i = i + 2)
313
(ifun (i) * ifun (i) + ifun (i + 1) * ifun (i + 1)));
314
((float *) buf)[i + 1] = 0.0;
318
for (i = 0; i < nelem; i++)
319
((int *) buf)[i] = GA_ABS (ifun (i));
322
GA_Error (" wrong data type ", type);
333
test_fun (int type, int dim, int OP)
335
void *boundminx=NULL,*boundmaxx=NULL,*wolfeminx=NULL;
336
double boundmind=0,boundmaxd=0,wolfemind=0,aboundmind=0,aboundmaxd=0,awolfemind=0;
337
long boundminl=0,boundmaxl=0,wolfeminl=0,aboundminl=0,aboundmaxl=0,awolfeminl=0;
338
float boundminf=0,boundmaxf=0,wolfeminf=0,aboundminf=0,aboundmaxf=0,awolfeminf=0;
339
int boundmini=0,boundmaxi=0,wolfemini=0,aboundmini=0,aboundmaxi=0,awolfemini=0;
341
int g_a, g_b, g_c, g_d, g_e;
342
int g_f, g_g, g_h, g_i, g_j;
343
int g_k, g_l, g_m, g_n;
345
int me = GA_Nodeid ();
348
int lo[MAXDIM], hi[MAXDIM];
352
int block_size[MAXDIM], proc_grid[MAXDIM], proc_cnt;
353
int needs_scaled_result;
373
DoubleComplex dcval2;
374
DoubleComplex dcval3;
375
DoubleComplex dcval4;
376
DoubleComplex dcval5;
377
DoubleComplex dcval6;
378
DoubleComplex dcval7;
379
SingleComplex fcval2;
380
SingleComplex fcval3;
381
SingleComplex fcval4;
382
SingleComplex fcval5;
383
SingleComplex fcval6;
384
SingleComplex fcval7;
390
DoubleComplex dcvresult;
391
SingleComplex fcvresult;
393
DoubleComplex dcbvresult;
394
SingleComplex fcbvresult;
396
int result=0,result2=0,result3=0;
405
DoubleComplex dcmax2;
406
SingleComplex fcmax2;
408
DoubleComplex dcmax3;
409
SingleComplex fcmax3;
411
void *alpha=NULL, *beta=NULL;
413
long al = 1, bl = -1;
414
float af = 1.0, bf = -1.0;
415
double ad = 1.0, bd = -1.0;
416
DoubleComplex adc, bdc;
417
SingleComplex afc, bfc;
418
double x1, x2, x3, x4;
419
float fx1, fx2, fx3, fx4;
421
long resultl=0,aresultl=0;
422
double resultd=0,aresultd=0;
423
float resultf=0,aresultf=0;
424
int resulti=0,aresulti=0;
436
needs_scaled_result = 0;
438
dcval.real = -sin (3.0);
439
dcval.imag = -cos (3.0);
440
dcval2.real = 2 * sin (3.0);
441
dcval2.imag = 2 * cos (3.0);
442
dcval3.real = dcval.real*1.0e200;
443
dcval3.imag = dcval.imag*1.0e200;
444
dcval4.real = dcval2.real*1.0e200;
445
dcval4.imag = dcval2.imag*1.0e200;
448
dcval6.real = dcval3.imag;
449
dcval6.imag = dcval3.real;
450
dcval7.real = dcval4.imag;
451
dcval7.imag = dcval4.real;
453
fcval.real = -sin (3.0);
454
fcval.imag = -cos (3.0);
455
fcval2.real = 2 * sin (3.0);
456
fcval2.imag = 2 * cos (3.0);
457
fcval3.real = fcval.real*1.0e200;
458
fcval3.imag = fcval.imag*1.0e200;
459
fcval4.real = fcval2.real*1.0e200;
460
fcval4.imag = fcval2.imag*1.0e200;
463
fcval6.real = fcval3.imag;
464
fcval6.imag = fcval3.real;
465
fcval7.real = fcval4.imag;
466
fcval7.imag = fcval4.real;
470
for (i = 0; i < dim; i++) {
472
block_size[i] = BLOCK_SIZE;
473
if (i<dim-1 && proc_cnt < GA_Nnodes()) {
476
} else if (proc_cnt >= GA_Nnodes()) {
479
proc_grid[i] = GA_Nnodes()/proc_cnt;
483
for (i = 0; i < dim; i++)
489
g_a = GA_Create_handle();
490
GA_Set_data(g_a,dim,dims,type);
491
GA_Set_array_name(g_a,"A");
493
GA_Set_block_cyclic_proc_grid(g_a,block_size,proc_grid);
495
GA_Set_block_cyclic(g_a,block_size);
499
g_a = NGA_Create (type, dim, dims, "A", NULL);
501
GA_Error ("create failed: A", n);
504
g_b = GA_Duplicate (g_a, "B");
506
GA_Error ("duplicate failed: B", n);
508
g_c = GA_Duplicate (g_a, "C");
510
GA_Error ("duplicate failed: C", n);
512
g_d = GA_Duplicate (g_a, "D");
514
GA_Error ("duplicate failed: D", n);
516
g_e = GA_Duplicate (g_a, "E");
518
GA_Error ("duplicate failed: E", n);
520
g_f = GA_Duplicate (g_a, "F");
522
GA_Error ("duplicate failed: F", n);
524
g_g = GA_Duplicate (g_a, "G");
526
GA_Error ("duplicate failed: G", n);
528
g_h = GA_Duplicate (g_a, "H");
530
GA_Error ("duplicate failed: H", n);
532
g_i = GA_Duplicate (g_a, "I");
534
GA_Error ("duplicate failed: I", n);
536
g_j = GA_Duplicate (g_a, "J");
538
GA_Error ("duplicate failed: J", n);
540
g_k = GA_Duplicate (g_a, "K");
542
GA_Error ("duplicate failed: K", n);
544
g_l = GA_Duplicate (g_a, "L");
546
GA_Error ("duplicate failed: L", n);
548
g_m = GA_Duplicate (g_a, "M");
550
GA_Error ("duplicate failed: M", n);
552
g_n = GA_Duplicate (g_a, "N");
554
GA_Error ("duplicate failed: N", n);
555
/*initialize with zero */
579
boundminx = &boundmini;
580
boundmaxx = &boundmaxi;
581
wolfeminx = &wolfemini;
589
vresult = &dcvresult;
590
bvresult = &dcbvresult;
598
vresult = &fcvresult;
599
bvresult = &fcbvresult;
608
boundminx = &boundmind;
609
boundmaxx = &boundmaxd;
610
wolfeminx = &wolfemind;
618
boundminx = &boundminf;
619
boundmaxx = &boundmaxf;
620
wolfeminx = &wolfeminf;
628
boundminx = &boundminl;
629
boundmaxx = &boundmaxl;
630
wolfeminx = &wolfeminl;
633
GA_Error ("wrong data type.", type);
637
NGA_Fill_patch (g_a, lo, hi, val);
638
NGA_Fill_patch (g_b, lo, hi, val2);
639
NGA_Pnfill_patch (g_j, lo, hi);
646
printf ("Testing GA_Abs_value...");
647
GA_Abs_value_patch (g_a, lo, hi);
648
ivresult = GA_ABS (ival);
649
dvresult = GA_ABS (dval);
650
fvresult = GA_ABS (fval);
651
lvresult = GA_ABS (lval);
653
if (GA_ABS(dcval.real) >= GA_ABS(dcval.imag)) {
654
if (dcval.real == (double)0.0) {
655
dcvresult.real = (double)0.0;
657
x1 = dcval.imag/dcval.real;
658
dcvresult.real = GA_ABS(dcval.real)*sqrt(((double)1.0)+(x1*x1));
661
x1 = dcval.real/dcval.imag;
662
dcvresult.real = GA_ABS(dcval.imag)*sqrt(((double)1.0)+(x1*x1));
664
dcvresult.imag = 0.0;
665
NGA_Fill_patch (g_d, lo, hi, vresult);
667
if (type == C_DCPL) {
668
needs_scaled_result = 1;
669
NGA_Fill_patch(g_f,lo,hi,val3);
670
GA_Abs_value_patch (g_f, lo, hi);
671
if (GA_ABS(dcval3.real) >= GA_ABS(dcval3.imag)) {
672
if (dcval3.real == (double)0.0) {
673
dcbvresult.real = (double)0.0;
675
x1 = dcval3.imag/dcval3.real;
676
dcbvresult.real = GA_ABS(dcval3.real)*sqrt(((double)1.0)+(x1*x1));
679
x1 = dcval3.real/dcval3.imag;
680
dcbvresult.real = GA_ABS(dcval3.imag)*sqrt(((double)1.0)+(x1*x1));
682
dcbvresult.imag = (double)0.0;
683
NGA_Fill_patch (g_i, lo, hi, bvresult);
684
NGA_Fill_patch(g_k,lo,hi,&dcval6);
685
GA_Abs_value_patch (g_k, lo, hi);
686
if (GA_ABS(dcval6.real) >= GA_ABS(dcval6.imag)) {
687
if (dcval6.real == (double)0.0) {
688
dcbvresult.real = (double)0.0;
690
x1 = dcval6.imag/dcval6.real;
691
dcbvresult.real = GA_ABS(dcval6.real)*sqrt(((double)1.0)+(x1*x1));
694
x1 = dcval6.real/dcval6.imag;
695
dcbvresult.real = GA_ABS(dcval6.imag)*sqrt(((double)1.0)+(x1*x1));
697
NGA_Fill_patch (g_n, lo, hi, bvresult);
699
if (type == C_SCPL) {
700
needs_scaled_result = 1;
701
NGA_Fill_patch(g_f,lo,hi,val3);
702
GA_Abs_value_patch (g_f, lo, hi);
703
if (GA_ABS(fcval3.real) >= GA_ABS(fcval3.imag)) {
704
if (fcval3.real == (float )0.0) {
705
fcbvresult.real = (float )0.0;
707
fx1 = fcval3.imag/fcval3.real;
708
fcbvresult.real = GA_ABS(fcval3.real)*sqrt(((float)1.0)+(fx1*fx1));
711
fx1 = fcval3.real/fcval3.imag;
712
fcbvresult.real = GA_ABS(fcval3.imag)*sqrt(((float)1.0)+(fx1*fx1));
714
fcbvresult.imag = (float)0.0;
715
NGA_Fill_patch (g_i, lo, hi, bvresult);
716
NGA_Fill_patch(g_k,lo,hi,&dcval6);
717
GA_Abs_value_patch (g_k, lo, hi);
718
if (GA_ABS(fcval6.real) >= GA_ABS(fcval6.imag)) {
719
if (fcval6.real == (float)0.0) {
720
fcbvresult.real = (float)0.0;
722
fx1 = fcval6.imag/fcval6.real;
723
fcbvresult.real = GA_ABS(fcval6.real)*sqrt(((float)1.0)+(fx1*fx1));
726
fx1 = fcval6.real/fcval6.imag;
727
fcbvresult.real = GA_ABS(fcval6.imag)*sqrt(((float)1.0)+(fx1*fx1));
729
NGA_Fill_patch (g_n, lo, hi, bvresult);
734
printf ("Testing GA_Add_const...");
735
GA_Add_constant_patch (g_a, lo, hi, val2);
736
ivresult = ival + ival2;
737
dvresult = dval + dval2;
738
fvresult = fval + fval2;
739
lvresult = lval + lval2;
740
dcvresult.real = dcval.real + dcval2.real;
741
dcvresult.imag = dcval.imag + dcval2.imag;
742
fcvresult.real = fcval.real + fcval2.real;
743
fcvresult.imag = fcval.imag + fcval2.imag;
744
NGA_Fill_patch (g_d, lo, hi, vresult);
748
printf ("Testing GA_Recip...");
749
GA_Recip_patch (g_a, lo, hi);
750
ivresult = ((int)1) / ival;
751
dvresult = ((double)1.0) / dval;
752
fvresult = ((float)1.0) / fval;
753
lvresult = ((long)1) / lval;
755
if (GA_ABS(dcval.real) >= GA_ABS(dcval.imag)) {
756
if (dcval.real != (double)0.0) {
757
tmp = dcval.imag/dcval.real;
758
tmp2 = ((double)1.0)/((((double)1.0)+(tmp*tmp))*dcval.real);
759
dcvresult.real = tmp2;
760
dcvresult.imag = -tmp * tmp2;
762
printf("Error in testing GA_Recip dcval = 0.0\n");
765
tmp = dcval.real/dcval.imag;
766
tmp2 = ((double)1.0)/((((double)1.0)+(tmp*tmp))*dcval.imag);
767
dcvresult.real = tmp * tmp2;
768
dcvresult.imag = -tmp2;
770
NGA_Fill_patch (g_d, lo, hi, vresult);
772
if (type == C_DCPL) {
773
needs_scaled_result = 1;
774
NGA_Fill_patch (g_f, lo, hi, val3);
775
GA_Recip_patch (g_f, lo, hi);
776
if (GA_ABS(dcval3.real) >= GA_ABS(dcval3.imag)) {
777
if (dcval3.real == (double)0.0) {
778
printf("Error testing GA_Recip, dcval3.real = 0.0\n");
780
tmp = dcval3.imag/dcval3.real;
781
tmp2 = ((double)1.0)/((((double)1.0)+(tmp*tmp))*dcval3.real);
782
dcbvresult.real = tmp2;
783
dcbvresult.imag = -tmp * tmp2;
786
tmp = dcval3.real/dcval3.imag;
787
tmp2 = ((double)1.0)/((((double)1.0)+(tmp*tmp))*dcval3.imag);
788
dcbvresult.real = tmp * tmp2;
789
dcbvresult.imag = -tmp2;
791
NGA_Fill_patch (g_i, lo, hi, bvresult);
792
NGA_Fill_patch(g_k,lo,hi,&dcval6);
793
GA_Recip_patch (g_k, lo, hi);
794
if (GA_ABS(dcval6.real) >= GA_ABS(dcval6.imag)) {
795
if (dcval6.real == (double)0.0) {
796
printf("Error testing GA_Recip, dcval6.real = 0.0\n");
798
tmp = dcval6.imag/dcval6.real;
799
tmp2 = ((double)1.0)/((((double)1.0)+(tmp*tmp))*dcval6.real);
800
dcbvresult.real = tmp2;
801
dcbvresult.imag = -tmp * tmp2;
804
tmp = dcval6.real/dcval6.imag;
805
tmp2 = ((double)1.0)/((((double)1.0)+(tmp*tmp))*dcval6.imag);
806
dcbvresult.real = tmp * tmp2;
807
dcbvresult.imag = -tmp2;
809
NGA_Fill_patch (g_n, lo, hi, bvresult);
811
if (type == C_SCPL) {
812
needs_scaled_result = 1;
813
NGA_Fill_patch (g_f, lo, hi, val3);
814
GA_Recip_patch (g_f, lo, hi);
815
if (GA_ABS(fcval3.real) >= GA_ABS(fcval3.imag)) {
816
if (fcval3.real == (float )0.0) {
817
printf("Error testing GA_Recip, fcval3.real = 0.0\n");
819
tmpf = fcval3.imag/fcval3.real;
820
tmp2f = ((float)1.0)/((((float)1.0)+(tmpf*tmpf))*fcval3.real);
821
fcbvresult.real = tmp2f;
822
fcbvresult.imag = -tmpf * tmp2f;
825
tmpf = fcval3.real/fcval3.imag;
826
tmp2f = ((float)1.0)/((((float)1.0)+(tmpf*tmpf))*fcval3.imag);
827
fcbvresult.real = tmpf * tmp2f;
828
fcbvresult.imag = -tmp2f;
830
NGA_Fill_patch (g_i, lo, hi, bvresult);
831
NGA_Fill_patch(g_k,lo,hi,&dcval6);
832
GA_Recip_patch (g_k, lo, hi);
833
if (GA_ABS(fcval6.real) >= GA_ABS(fcval6.imag)) {
834
if (fcval6.real == (float)0.0) {
835
printf("Error testing GA_Recip, fcval6.real = 0.0\n");
837
tmpf = fcval6.imag/fcval6.real;
838
tmp2f = ((float)1.0)/((((float)1.0)+(tmpf*tmpf))*fcval6.real);
839
fcbvresult.real = tmp2f;
840
fcbvresult.imag = -tmpf * tmp2f;
843
tmpf = fcval6.real/fcval6.imag;
844
tmp2f = ((float)1.0)/((((float)1.0)+(tmpf*tmpf))*fcval6.imag);
845
fcbvresult.real = tmpf * tmp2f;
846
fcbvresult.imag = -tmp2f;
848
NGA_Fill_patch (g_n, lo, hi, bvresult);
853
printf ("Testing GA_Elem_multiply...");
854
NGA_Fill_patch (g_b, lo, hi, val2);
855
/* g_c is different from g_a or g_b*/
856
GA_Elem_multiply_patch (g_a, lo, hi, g_b, lo, hi, g_c, lo, hi);
858
ivresult = ival * ival2;
859
dvresult = dval * dval2;
860
fvresult = fval * fval2;
861
lvresult = lval * lval2;
862
dcvresult.real = dcval.real * dcval2.real - dcval.imag * dcval2.imag;
863
dcvresult.imag = dcval.real * dcval2.imag + dcval2.real * dcval.imag;
864
NGA_Fill_patch (g_d, lo, hi, vresult);
868
printf ("Testing GA_Elem_divide...");
869
NGA_Fill_patch (g_b, lo, hi, val2);
870
GA_Elem_divide_patch (g_a, lo, hi, g_b, lo, hi, g_c, lo, hi);
871
ivresult = ival / ival2;
872
dvresult = dval / dval2;
873
fvresult = fval / fval2;
874
lvresult = lval / lval2;
875
dcvresult.real = 0.0;
876
dcvresult.imag = 0.0;
878
if (GA_ABS(dcval2.real) >= GA_ABS(dcval2.imag)) {
879
if (dcval2.real != (double)0.0) {
880
tmp = dcval2.imag/dcval2.real;
881
tmp2 = ((double)1.0)/(dcval2.real*(((double)1.0)+(tmp*tmp)));
882
dcvresult.real = (dcval.real + dcval.imag*tmp)*tmp2;
883
dcvresult.imag = (dcval.imag - dcval.real*tmp)*tmp2;
885
printf("Error in testing GA_Elem_divide dcval = 0.0\n");
888
tmp = dcval2.real/dcval2.imag;
889
tmp2 = 1.0/(dcval2.imag*(1.0+(tmp*tmp)));
890
dcvresult.real = (dcval.real*tmp + dcval.imag)*tmp2;
891
dcvresult.imag = (dcval.imag*tmp - dcval.real)*tmp2;
893
NGA_Fill_patch (g_d, lo, hi, vresult);
895
if (type == C_DCPL) {
896
needs_scaled_result = 1;
897
NGA_Fill_patch (g_f, lo, hi, val3);
898
NGA_Fill_patch (g_g, lo, hi, val4);
899
GA_Elem_divide_patch (g_f, lo, hi, g_g, lo, hi, g_h, lo, hi);
900
dcbvresult.real = (double)0.0;
901
dcbvresult.imag = (double)0.0;
902
if (GA_ABS(dcval4.real) >= GA_ABS(dcval4.imag)) {
903
if (dcval4.real != (double)0.0) {
904
tmp = dcval4.imag/dcval4.real;
905
tmp2 = ((double)1.0)/(dcval4.real*(((double)1.0)+(tmp*tmp)));
906
dcbvresult.real = (dcval3.real + dcval3.imag*tmp)*tmp2;
907
dcbvresult.imag = (dcval3.imag - dcval3.real*tmp)*tmp2;
909
printf("Error in testing GA_Elem_divide dcval4 = 0.0\n");
912
tmp = dcval4.real/dcval4.imag;
913
tmp2 = ((double)1.0)/(dcval4.imag*(((double)1.0)+(tmp*tmp)));
914
dcbvresult.real = (dcval3.real*tmp + dcval3.imag)*tmp2;
915
dcbvresult.imag = (dcval3.imag*tmp - dcval3.real)*tmp2;
917
NGA_Fill_patch (g_i, lo, hi, bvresult);
918
NGA_Fill_patch (g_k, lo, hi, &dcval6);
919
NGA_Fill_patch (g_l, lo, hi, &dcval7);
920
GA_Elem_divide_patch (g_k, lo, hi, g_l, lo, hi, g_m, lo, hi);
921
dcbvresult.real = (double)0.0;
922
dcbvresult.imag = (double)0.0;
923
if (GA_ABS(dcval7.real) >= GA_ABS(dcval7.imag)) {
924
if (dcval7.real != (double)0.0) {
925
tmp = dcval7.imag/dcval7.real;
926
tmp2 = ((double)1.0)/(dcval7.real*(((double)1.0)+(tmp*tmp)));
927
dcbvresult.real = (dcval6.real + dcval6.imag*tmp)*tmp2;
928
dcbvresult.imag = (dcval6.imag - dcval6.real*tmp)*tmp2;
930
printf("Error in testing GA_Elem_divide dcval7 = 0.0\n");
933
tmp = dcval7.real/dcval7.imag;
934
tmp2 = ((double)1.0)/(dcval7.imag*(((double)1.0)+(tmp*tmp)));
935
dcbvresult.real = (dcval6.real*tmp + dcval6.imag)*tmp2;
936
dcbvresult.imag = (dcval6.imag*tmp - dcval6.real)*tmp2;
938
NGA_Fill_patch (g_n, lo, hi, bvresult);
940
if (type == C_SCPL) {
941
needs_scaled_result = 1;
942
NGA_Fill_patch (g_f, lo, hi, val3);
943
NGA_Fill_patch (g_g, lo, hi, val4);
944
GA_Elem_divide_patch (g_f, lo, hi, g_g, lo, hi, g_h, lo, hi);
945
fcbvresult.real = (float)0.0;
946
fcbvresult.imag = (float)0.0;
947
if (GA_ABS(fcval4.real) >= GA_ABS(fcval4.imag)) {
948
if (fcval4.real != (float)0.0) {
949
tmpf = fcval4.imag/fcval4.real;
950
tmp2f = ((float)1.0)/(fcval4.real*(((float)1.0)+(tmpf*tmpf)));
951
fcbvresult.real = (fcval3.real + fcval3.imag*tmpf)*tmp2f;
952
fcbvresult.imag = (fcval3.imag - fcval3.real*tmpf)*tmp2f;
954
printf("Error in testing GA_Elem_divide fcval4 = 0.0\n");
957
tmpf = fcval4.real/fcval4.imag;
958
tmp2f = ((float)1.0)/(fcval4.imag*(((float)1.0)+(tmpf*tmpf)));
959
fcbvresult.real = (fcval3.real*tmpf + fcval3.imag)*tmp2f;
960
fcbvresult.imag = (fcval3.imag*tmpf - fcval3.real)*tmp2f;
962
NGA_Fill_patch (g_i, lo, hi, bvresult);
963
NGA_Fill_patch (g_k, lo, hi, &dcval6);
964
NGA_Fill_patch (g_l, lo, hi, &dcval7);
965
GA_Elem_divide_patch (g_k, lo, hi, g_l, lo, hi, g_m, lo, hi);
966
fcbvresult.real = (float)0.0;
967
fcbvresult.imag = (float)0.0;
968
if (GA_ABS(fcval7.real) >= GA_ABS(fcval7.imag)) {
969
if (fcval7.real != (float)0.0) {
970
tmpf = fcval7.imag/fcval7.real;
971
tmp2f = ((float)1.0)/(fcval7.real*(((float)1.0)+(tmpf*tmpf)));
972
fcbvresult.real = (fcval6.real + fcval6.imag*tmpf)*tmp2f;
973
fcbvresult.imag = (fcval6.imag - fcval6.real*tmpf)*tmp2f;
975
printf("Error in testing GA_Elem_divide fcval7 = 0.0\n");
978
tmpf = fcval7.real/fcval7.imag;
979
tmp2f = ((float)1.0)/(fcval7.imag*(((float)1.0)+(tmpf*tmpf)));
980
fcbvresult.real = (fcval6.real*tmpf + fcval6.imag)*tmp2f;
981
fcbvresult.imag = (fcval6.imag*tmpf - fcval6.real)*tmp2f;
983
NGA_Fill_patch (g_n, lo, hi, bvresult);
989
printf ("Testing GA_Elem_maximum...");
990
/*NGA_Fill_patch (g_b, lo, hi, val2);*/
991
GA_Elem_maximum_patch (g_a, lo, hi, g_b, lo, hi, g_c, lo, hi);
992
ivresult = GA_MAX (ival, ival2);
993
dvresult = GA_MAX (dval, dval2);
994
fvresult = GA_MAX (fval, fval2);
995
lvresult = GA_MAX (lval, lval2);
996
tmp = GA_MAX(GA_ABS(dcval.real),GA_ABS(dcval.imag));
997
tmp2 = GA_MAX(GA_ABS(dcval2.real),GA_ABS(dcval2.imag));
998
tmp = GA_MAX(tmp,tmp2);
999
dcvresult.real = dcval.real;
1000
dcvresult.imag = dcval.imag;
1002
tmp = ((double)1.0)/tmp;
1003
x1 = dcval.real*tmp;
1004
x2 = dcval.imag*tmp;
1005
x3 = dcval2.real*tmp;
1006
x4 = dcval2.imag*tmp;
1007
tmp = x1*x1 + x2*x2;
1008
tmp2 = x3*x3 + x4*x4;
1010
dcvresult.real = dcval2.real;
1011
dcvresult.imag = dcval2.imag;
1014
NGA_Fill_patch (g_d, lo, hi, vresult);
1015
if (type == C_DCPL) {
1016
needs_scaled_result = 1;
1017
NGA_Fill_patch (g_f, lo, hi, val3);
1018
NGA_Fill_patch (g_g, lo, hi, val4);
1019
GA_Elem_maximum_patch (g_f, lo, hi, g_g, lo, hi, g_h, lo, hi);
1020
tmp = GA_MAX(GA_ABS(dcval3.real),GA_ABS(dcval3.imag));
1021
tmp2 = GA_MAX(GA_ABS(dcval4.real),GA_ABS(dcval4.imag));
1022
tmp = GA_MAX(tmp,tmp2);
1023
dcvresult.real = dcval3.real;
1024
dcvresult.imag = dcval3.imag;
1026
tmp = ((double)1.0)/tmp;
1027
x1 = dcval3.real*tmp;
1028
x2 = dcval3.imag*tmp;
1029
x3 = dcval4.real*tmp;
1030
x4 = dcval4.imag*tmp;
1031
tmp = x1*x1 + x2*x2;
1032
tmp2 = x3*x3 + x4*x4;
1034
dcvresult.real = dcval4.real;
1035
dcvresult.imag = dcval4.imag;
1038
NGA_Fill_patch (g_i, lo, hi, vresult);
1039
NGA_Fill_patch (g_k, lo, hi, &dcval6);
1040
NGA_Fill_patch (g_l, lo, hi, &dcval7);
1041
GA_Elem_maximum_patch (g_k, lo, hi, g_l, lo, hi, g_m, lo, hi);
1042
tmp = GA_MAX(GA_ABS(dcval6.real),GA_ABS(dcval6.imag));
1043
tmp2 = GA_MAX(GA_ABS(dcval7.real),GA_ABS(dcval7.imag));
1044
tmp = GA_MAX(tmp,tmp2);
1045
dcvresult.real = dcval6.real;
1046
dcvresult.imag = dcval6.imag;
1048
tmp = ((double)1.0)/tmp;
1049
x1 = dcval6.real*tmp;
1050
x2 = dcval6.imag*tmp;
1051
x3 = dcval7.real*tmp;
1052
x4 = dcval7.imag*tmp;
1053
tmp = x1*x1 + x2*x2;
1054
tmp2 = x3*x3 + x4*x4;
1056
dcvresult.real = dcval7.real;
1057
dcvresult.imag = dcval7.imag;
1060
NGA_Fill_patch (g_n, lo, hi, vresult);
1065
printf ("Testing GA_Elem_minimum...");
1066
NGA_Fill_patch (g_b, lo, hi, val2);
1067
GA_Elem_minimum_patch (g_a, lo, hi, g_b, lo, hi, g_c, lo, hi);
1068
ivresult = GA_MIN (ival, ival2);
1069
dvresult = GA_MIN (dval, dval2);
1070
fvresult = GA_MIN (fval, fval2);
1071
lvresult = GA_MIN (lval, lval2);
1072
tmp = GA_MAX(GA_ABS(dcval.real),GA_ABS(dcval.imag));
1073
tmp2 = GA_MAX(GA_ABS(dcval2.real),GA_ABS(dcval2.imag));
1074
tmp = GA_MAX(tmp,tmp2);
1075
dcvresult.real = dcval.real;
1076
dcvresult.imag = dcval.imag;
1079
x1 = dcval.real*tmp;
1080
x2 = dcval.imag*tmp;
1081
x3 = dcval2.real*tmp;
1082
x4 = dcval2.imag*tmp;
1083
tmp = x1*x1 + x2*x2;
1084
tmp2 = x3*x3 + x4*x4;
1086
dcvresult.real = dcval2.real;
1087
dcvresult.imag = dcval2.imag;
1090
NGA_Fill_patch (g_d, lo, hi, vresult);
1091
if (type == C_SCPL) {
1092
needs_scaled_result = 1;
1093
NGA_Fill_patch (g_f, lo, hi, val3);
1094
NGA_Fill_patch (g_g, lo, hi, val4);
1095
GA_Elem_minimum_patch (g_f, lo, hi, g_g, lo, hi, g_h, lo, hi);
1096
tmpf = GA_MAX(GA_ABS(fcval3.real),GA_ABS(fcval3.imag));
1097
tmp2f = GA_MAX(GA_ABS(fcval4.real),GA_ABS(fcval4.imag));
1098
tmpf = GA_MAX(tmpf,tmp2f);
1099
fcvresult.real = fcval3.real;
1100
fcvresult.imag = fcval3.imag;
1102
tmpf = ((float)1.0)/tmpf;
1103
fx1 = fcval3.real*tmpf;
1104
fx2 = fcval3.imag*tmpf;
1105
fx3 = fcval4.real*tmpf;
1106
fx4 = fcval4.imag*tmpf;
1107
tmpf = fx1*fx1 + fx2*fx2;
1108
tmp2f = fx3*fx3 + fx4*fx4;
1110
fcvresult.real = fcval4.real;
1111
fcvresult.imag = fcval4.imag;
1114
NGA_Fill_patch (g_i, lo, hi, vresult);
1115
NGA_Fill_patch (g_k, lo, hi, &dcval6);
1116
NGA_Fill_patch (g_l, lo, hi, &dcval7);
1117
GA_Elem_minimum_patch (g_k, lo, hi, g_l, lo, hi, g_m, lo, hi);
1118
tmpf = GA_MAX(GA_ABS(fcval6.real),GA_ABS(fcval6.imag));
1119
tmp2f = GA_MAX(GA_ABS(fcval7.real),GA_ABS(fcval7.imag));
1120
tmpf = GA_MAX(tmpf,tmp2f);
1121
fcvresult.real = fcval6.real;
1122
fcvresult.imag = fcval6.imag;
1124
tmpf = ((float)1.0)/tmpf;
1125
fx1 = fcval6.real*tmpf;
1126
fx2 = fcval6.imag*tmpf;
1127
fx3 = fcval7.real*tmpf;
1128
fx4 = fcval7.imag*tmpf;
1129
tmpf = fx1*fx1 + fx2*fx2;
1130
tmp2f = fx3*fx3 + fx4*fx4;
1132
fcvresult.real = fcval7.real;
1133
fcvresult.imag = fcval7.imag;
1136
NGA_Fill_patch (g_n, lo, hi, vresult);
1141
printf ("Testing GA_Step_max...");
1142
if (type != C_DCPL || type != C_SCPL) {
1143
/*NGA_Fill_patch (g_b, lo, hi, val2);*/
1144
GA_Abs_value_patch (g_b, lo, hi);
1145
GA_Step_max_patch (g_b, lo, hi, g_j, lo, hi, resultx);
1147
printf(" GA_Stepmax_patch type = %d, resultx = %le\n",type,resultx);
1151
It would be more robust to use GA_Elem_min_patch
1152
here to determine the minimum g_j value, but for
1153
now we set it to -2.
1155
aresulti = ((int)(GA_ABS(ival2)/GA_ABS(ival))) - resulti;
1156
aresultd = GA_ABS(dval2/dval) - resultd;
1157
aresultf = ((float)GA_ABS(fval2/fval)) - resultf;
1158
aresultl = ((long)(GA_ABS(lval2)/GA_ABS(lval))) - resultl;
1162
case OP_STEP_BOUND_INFO:
1164
printf ("Testing GA_Step_bound_info...");
1165
if (type != C_DCPL || type != C_SCPL) {
1166
/*NGA_Fill_patch (g_b, lo, hi, val2);*/
1167
GA_Abs_value_patch (g_b, lo, hi);
1168
GA_Abs_value_patch (g_a, lo, hi);
1169
/*GA_Abs_value_patch (g_j, lo, hi);*/
1170
NGA_Fill_patch(g_c, lo, hi, val5);
1171
GA_Step_bound_info_patch (g_b,lo,hi, g_j,lo,hi, g_a,lo,hi, g_c,lo,hi, boundminx,wolfeminx,boundmaxx);
1173
printf(" GA_Stepmax2_patch type = %d, resultx = %le\n",type,resultx);
1177
This is currently hardwired. would need to change if
1178
val, val2 or val5 change.
1183
awolfemini = ((int)(((int)1)/((int)2))) - wolfemini;
1184
aboundmini = ((int)(((int)1)/((int)2))) - boundmini;
1185
aboundmaxi = (int)GA_INFINITY_I - boundmaxi;
1188
awolfemind = (((double)1.0)/((double)2.0)) - wolfemind;
1189
aboundmind = (((double)1.0)/((double)2.0)) - boundmind;
1190
aboundmaxd = (double)GA_INFINITY_D - boundmaxd;
1193
awolfeminf = (((float)1.0)/((float)2.0)) - wolfeminf;
1194
aboundminf = (((float)1.0)/((float)2.0)) - boundminf;
1195
aboundmaxf = (float)GA_INFINITY_F - boundmaxf;
1198
awolfeminl = ((long)(((long)1)/((long)2))) - wolfeminl;
1199
aboundminl = ((long)(((long)1)/((long)2))) - boundminl;
1200
aboundmaxl = (long)GA_INFINITY_L - boundmaxl;
1203
GA_Error ("GA_step_bound_info wrong data type.", type);
1208
GA_Error ("test_function: wrong operation.", OP);
1239
GA_Error ("wrong data type.", type);
1246
NGA_Add_patch (alpha, g_c, lo, hi, beta, g_d, lo, hi, g_e, lo, hi);
1247
if (needs_scaled_result == 1) {
1248
NGA_Add_patch (alpha, g_h, lo, hi, beta, g_i, lo, hi, g_j, lo, hi);
1249
NGA_Add_patch (alpha, g_m, lo, hi, beta, g_n, lo, hi, g_n, lo, hi);
1257
NGA_Add_patch (alpha, g_a, lo, hi, beta, g_d, lo, hi, g_e, lo, hi);
1258
if (needs_scaled_result == 1) {
1259
NGA_Add_patch (alpha, g_f, lo, hi, beta, g_i, lo, hi, g_j, lo, hi);
1260
NGA_Add_patch (alpha, g_k, lo, hi, beta, g_n, lo, hi, g_n, lo, hi);
1264
Else it was a reduction operation (one of the step_max functions).
1293
GA_Error ("wrong data type.", type);
1297
for unary and binary operators extract the maximum difference between
1298
computed and correct solutions.
1301
NGA_Select_elem (g_e, "max", max, index);
1302
if (needs_scaled_result == 1) {
1303
NGA_Select_elem (g_j, "max", max2, index2);
1304
NGA_Select_elem (g_n, "max", max3, index3);
1307
/* NGA_Select_elem (g_e, "min", min, index);*/
1311
Binary or Unary operators.
1318
/* result = (int)(imax - imin);*/
1320
if (result != (int)0) result = 1;
1324
r = dcmax.real - dcmin.real;
1325
im = dcmax.imag - dcmin.imag;
1329
if ((GA_ABS(r) + GA_ABS(im)) == (double)0.0) {
1334
if (needs_scaled_result == 1) {
1338
if ((GA_ABS(r) + GA_ABS(im)) == (double)0.0) {
1345
if ((GA_ABS(r) + GA_ABS(im)) == (double)0.0) {
1350
result = result | result2 | result3;
1355
rf = fcmax.real - fcmin.real;
1356
imf = fcmax.imag - fcmin.imag;
1360
if ((GA_ABS(rf) + GA_ABS(imf)) == (float)0.0) {
1365
if (needs_scaled_result == 1) {
1369
if ((GA_ABS(rf) + GA_ABS(imf)) == (float)0.0) {
1376
if ((GA_ABS(rf) + GA_ABS(imf)) == (float)0.0) {
1381
result = result | result2 | result3;
1385
if (dmax == (double)0.0) {
1392
if (fmax == (float)0.0) {
1399
if (lmax == (long)0) {
1406
GA_Error ("wrong data type.", type);
1410
A reduction operation, Step_max or Step_bound_info.
1412
if (type == C_DCPL || type == C_SCPL) {
1415
if (OP == OP_STEP_MAX) {
1420
if (aresulti == 0) {
1427
if (aresultd == (double)0.0) {
1434
if (aresultf == (float)0.0) {
1441
if (aresultl == (long)0) {
1448
GA_Error ("Stepmax op, wrong data type.", type);
1451
/* OP = 8 so Step_bound_info */
1455
if (awolfemini == 0) {
1460
if (aboundmini == 0) {
1465
if (aboundmaxi == 0) {
1472
if (awolfemind == ((double)0.0)) {
1477
if (aboundmind == ((double)0.0)) {
1482
if (aboundmaxd == ((double)0.0)) {
1489
if (awolfeminf == ((float)0.0)) {
1494
if (aboundminf == ((float)0.0)) {
1499
if (aboundmaxf == ((float)0.0)) {
1506
if (awolfeminl == ((long)0)) {
1511
if (aboundminl == ((long)0)) {
1516
if (aboundmaxl == ((long)0)) {
1523
GA_Error ("Stepmax op, wrong data type.", type);
1525
result = result | result2 | result3;
1531
if (MISMATCHED (result, 0)) {
1532
printf ("is not ok\n");
1533
GA_Error("aborting", 1);
1535
printf ("is ok.\n");
1540
NGA_Print_patch(g_a, lo, hi, 1);
1541
NGA_Print_patch(g_d, lo, hi, 1);
1542
NGA_Print_patch(g_e, lo, hi, 1);
1568
int heap = 20000, stack = 20000;
1574
GA_INIT(argc,argv); /* initialize GA */
1576
nproc = GA_Nnodes ();
1579
if (GA_Uses_fapi ())
1580
GA_Error ("Program runs with C array API only", 1);
1581
printf ("Using %ld processes\n", (long) nproc);
1587
if (!MA_init (C_DBL, stack, heap))
1588
GA_Error ("MA_init failed", stack + heap); /* initialize memory allocator */
1592
for (op = 0; op < 9; op++)
1594
/*for (d = 1; d < 2; d++)*/
1595
for (d = 1; d < 4; d++)
1598
printf ("\n\ndim =%d\n\n", d);
1600
printf ("\ndata type: INT\t\t");
1601
ok = test_fun (C_INT, d, op);
1603
printf ("\ndata type: double\t");
1604
ok = test_fun (C_DBL, d, op);
1606
printf ("\ndata type: float\t");
1607
ok = test_fun (C_FLOAT, d, op);
1609
printf ("\ndata type: long\t\t");
1610
ok = test_fun (C_LONG, d, op);
1613
printf ("\ndata type: double complex\t");
1614
ok = test_fun (C_DCPL, d, op);
1619
printf("\nAll tests successful\n");
1628
/*\ FILL IN ARRAY WITH Varying VALUEs. (from 0 to product of dims-1).
1629
For complex arrays make the real and imaginary parts equal.
1631
void nga_vfill_patch(Integer *g_a, Integer *lo, Integer *hi)
1634
Integer ndim, dims[MAXDIM], type;
1635
Integer loA[MAXDIM], hiA[MAXDIM], ld[MAXDIM];
1638
Integer bvalue[MAXDIM], bunit[MAXDIM], baseld[MAXDIM];
1639
Integer me= pnga_nodeid();
1640
int local_sync_begin,local_sync_end;
1642
local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end;
1643
_ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/
1644
if(local_sync_begin)GA_Sync();
1646
GA_PUSH_NAME("nga_vfill_patch");
1648
pnga_inquire(*g_a, &type, &ndim, dims);
1650
/* get limits of VISIBLE patch */
1651
pnga_distribution(*g_a, me, loA, hiA);
1653
/* determine subset of my local patch to access */
1654
/* Output is in loA and hiA */
1655
if(pnga_patch_intersect(lo, hi, loA, hiA, ndim)){
1657
/* get data_ptr to corner of patch */
1658
/* ld are leading dimensions INCLUDING ghost cells */
1659
pnga_access_ptr(*g_a, loA, hiA, &data_ptr, ld);
1661
/* number of n-element of the first dimension */
1662
n1dim = 1; for(i=1; i<ndim; i++) n1dim *= (hiA[i] - loA[i] + 1);
1664
/* calculate the destination indices */
1665
bvalue[0] = 0; bvalue[1] = 0; bunit[0] = 1; bunit[1] = 1;
1666
/* baseld[0] = ld[0]
1667
* baseld[1] = ld[0] * ld[1]
1668
* baseld[2] = ld[0] * ld[1] * ld[2] .....
1670
baseld[0] = ld[0]; baseld[1] = baseld[0] *ld[1];
1671
for(i=2; i<ndim; i++) {
1673
bunit[i] = bunit[i-1] * (hiA[i-1] - loA[i-1] + 1);
1674
baseld[i] = baseld[i-1] * ld[i];
1679
for(i=0; i<n1dim; i++) {
1681
for(j=1; j<ndim; j++) {
1682
idx += bvalue[j] * baseld[j-1];
1683
if(((i+1) % bunit[j]) == 0) bvalue[j]++;
1684
if(bvalue[j] > (hiA[j]-loA[j])) bvalue[j] = 0;
1686
for(j=0; j<(hiA[0]-loA[0]+1); j++)
1687
((int *)data_ptr)[idx+j] = (int)(idx+j);
1691
for(i=0; i<n1dim; i++) {
1693
for(j=1; j<ndim; j++) {
1694
idx += bvalue[j] * baseld[j-1];
1695
if(((i+1) % bunit[j]) == 0) bvalue[j]++;
1696
if(bvalue[j] > (hiA[j]-loA[j])) bvalue[j] = 0;
1699
for(j=0; j<(hiA[0]-loA[0]+1); j++) {
1700
((DoubleComplex *)data_ptr)[idx+j].real = (double)(idx+j);
1701
((DoubleComplex *)data_ptr)[idx+j].imag = (double)(idx+j);
1707
for(i=0; i<n1dim; i++) {
1709
for(j=1; j<ndim; j++) {
1710
idx += bvalue[j] * baseld[j-1];
1711
if(((i+1) % bunit[j]) == 0) bvalue[j]++;
1712
if(bvalue[j] > (hiA[j]-loA[j])) bvalue[j] = 0;
1715
for(j=0; j<(hiA[0]-loA[0]+1); j++) {
1716
((SingleComplex *)data_ptr)[idx+j].real = (float)(idx+j);
1717
((SingleComplex *)data_ptr)[idx+j].imag = (float)(idx+j);
1723
for(i=0; i<n1dim; i++) {
1725
for(j=1; j<ndim; j++) {
1726
idx += bvalue[j] * baseld[j-1];
1727
if(((i+1) % bunit[j]) == 0) bvalue[j]++;
1728
if(bvalue[j] > (hiA[j]-loA[j])) bvalue[j] = 0;
1731
for(j=0; j<(hiA[0]-loA[0]+1); j++)
1732
((double*)data_ptr)[idx+j] = (double)(idx+j);
1736
for(i=0; i<n1dim; i++) {
1738
for(j=1; j<ndim; j++) {
1739
idx += bvalue[j] * baseld[j-1];
1740
if(((i+1) % bunit[j]) == 0) bvalue[j]++;
1741
if(bvalue[j] > (hiA[j]-loA[j])) bvalue[j] = 0;
1743
for(j=0; j<(hiA[0]-loA[0]+1); j++)
1744
((float *)data_ptr)[idx+j] = (float)(idx+j);
1748
for(i=0; i<n1dim; i++) {
1750
for(j=1; j<ndim; j++) {
1751
idx += bvalue[j] * baseld[j-1];
1752
if(((i+1) % bunit[j]) == 0) bvalue[j]++;
1753
if(bvalue[j] > (hiA[j]-loA[j])) bvalue[j] = 0;
1755
for(j=0; j<(hiA[0]-loA[0]+1); j++)
1756
((long *)data_ptr)[idx+j] = (long)(idx+j);
1759
default: GA_Error(" wrong data type ",type);
1762
/* release access to the data */
1763
pnga_release_update(*g_a, loA, hiA);
1766
if(local_sync_end)GA_Sync();
1768
/*\ Utility function to actually set positive/negative values
1770
void ngai_do_pnfill_patch(Integer type, Integer ndim, Integer *loA, Integer *hiA,
1771
Integer *ld, void *data_ptr)
1775
Integer bvalue[MAXDIM], bunit[MAXDIM], baseld[MAXDIM];
1776
/* number of n-element of the first dimension */
1777
n1dim = 1; for(i=1; i<ndim; i++) n1dim *= (hiA[i] - loA[i] + 1);
1779
/* calculate the destination indices */
1780
bvalue[0] = 0; bvalue[1] = 0; bunit[0] = 1; bunit[1] = 1;
1781
/* baseld[0] = ld[0]
1782
* baseld[1] = ld[0] * ld[1]
1783
* baseld[2] = ld[0] * ld[1] * ld[2] .....
1785
baseld[0] = ld[0]; baseld[1] = baseld[0] *ld[1];
1786
for(i=2; i<ndim; i++) {
1788
bunit[i] = bunit[i-1] * (hiA[i-1] - loA[i-1] + 1);
1789
baseld[i] = baseld[i-1] * ld[i];
1794
for(i=0; i<n1dim; i++) {
1796
for(j=1; j<ndim; j++) {
1797
idx += bvalue[j] * baseld[j-1];
1798
if(((i+1) % bunit[j]) == 0) bvalue[j]++;
1799
if(bvalue[j] > (hiA[j]-loA[j])) bvalue[j] = 0;
1802
for(j=0; j<(hiA[0]-loA[0]+1); j++)
1803
((int *)data_ptr)[idx+j] = (int)(((idx+j)&3)-2);
1807
for(i=0; i<n1dim; i++) {
1809
for(j=1; j<ndim; j++) {
1810
idx += bvalue[j] * baseld[j-1];
1811
if(((i+1) % bunit[j]) == 0) bvalue[j]++;
1812
if(bvalue[j] > (hiA[j]-loA[j])) bvalue[j] = 0;
1815
for(j=0; j<(hiA[0]-loA[0]+1); j++) {
1816
((DoubleComplex *)data_ptr)[idx+j].real = (double)(((idx+j)&3)-2);
1817
((DoubleComplex *)data_ptr)[idx+j].imag = (double)(((idx+j)&3)-2);
1822
for(i=0; i<n1dim; i++) {
1824
for(j=1; j<ndim; j++) {
1825
idx += bvalue[j] * baseld[j-1];
1826
if(((i+1) % bunit[j]) == 0) bvalue[j]++;
1827
if(bvalue[j] > (hiA[j]-loA[j])) bvalue[j] = 0;
1830
for(j=0; j<(hiA[0]-loA[0]+1); j++) {
1831
((SingleComplex *)data_ptr)[idx+j].real = (float)(((idx+j)&3)-2);
1832
((SingleComplex *)data_ptr)[idx+j].imag = (float)(((idx+j)&3)-2);
1837
for(i=0; i<n1dim; i++) {
1839
for(j=1; j<ndim; j++) {
1840
idx += bvalue[j] * baseld[j-1];
1841
if(((i+1) % bunit[j]) == 0) bvalue[j]++;
1842
if(bvalue[j] > (hiA[j]-loA[j])) bvalue[j] = 0;
1845
for(j=0; j<(hiA[0]-loA[0]+1); j++)
1846
((double*)data_ptr)[idx+j] = (double)(((idx+j)&3)-2);
1850
for(i=0; i<n1dim; i++) {
1852
for(j=1; j<ndim; j++) {
1853
idx += bvalue[j] * baseld[j-1];
1854
if(((i+1) % bunit[j]) == 0) bvalue[j]++;
1855
if(bvalue[j] > (hiA[j]-loA[j])) bvalue[j] = 0;
1857
for(j=0; j<(hiA[0]-loA[0]+1); j++)
1858
((float *)data_ptr)[idx+j] = (float)(((idx+j)&3)-2);
1862
for(i=0; i<n1dim; i++) {
1864
for(j=1; j<ndim; j++) {
1865
idx += bvalue[j] * baseld[j-1];
1866
if(((i+1) % bunit[j]) == 0) bvalue[j]++;
1867
if(bvalue[j] > (hiA[j]-loA[j])) bvalue[j] = 0;
1869
for(j=0; j<(hiA[0]-loA[0]+1); j++)
1870
((long *)data_ptr)[idx+j] = (long)(((idx+j)&3)-2);
1873
default: GA_Error(" wrong data type ",type);
1878
/*\ FILL IN ARRAY WITH Varying positive and negative VALUEs.
1880
For complex arrays make the real and imaginary parts equal.
1882
void nga_pnfill_patch(Integer *g_a, Integer *lo, Integer *hi)
1885
Integer ndim, dims[MAXDIM], type;
1886
Integer loA[MAXDIM], hiA[MAXDIM], ld[MAXDIM];
1888
Integer me= pnga_nodeid();
1890
int local_sync_begin,local_sync_end;
1892
local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end;
1893
_ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/
1894
if(local_sync_begin)GA_Sync();
1896
GA_PUSH_NAME("nga_pnfill_patch");
1898
pnga_inquire(*g_a, &type, &ndim, dims);
1900
num_blocks = pnga_total_blocks(*g_a);
1902
if (num_blocks < 0) {
1903
/* get limits of VISIBLE patch */
1904
pnga_distribution(*g_a, me, loA, hiA);
1906
/* determine subset of my local patch to access */
1907
/* Output is in loA and hiA */
1908
if(pnga_patch_intersect(lo, hi, loA, hiA, ndim)){
1910
/* get data_ptr to corner of patch */
1911
/* ld are leading dimensions INCLUDING ghost cells */
1912
pnga_access_ptr(*g_a, loA, hiA, &data_ptr, ld);
1914
ngai_do_pnfill_patch(type, ndim, loA, hiA, ld, data_ptr);
1916
/* release access to the data */
1917
pnga_release_update(*g_a, loA, hiA);
1920
Integer offset, j, jtmp, chk;
1921
Integer loS[MAXDIM];
1922
Integer nproc = pnga_nnodes();
1923
/* using simple block-cyclic data distribution */
1924
if (!pnga_uses_proc_grid(*g_a)){
1925
for (i=me; i<num_blocks; i += nproc) {
1926
/* get limits of patch */
1927
pnga_distribution(*g_a, i, loA, hiA);
1929
/* loA is changed by pnga_patch_intersect, so
1931
for (j=0; j<ndim; j++) {
1935
/* determine subset of my local patch to access */
1936
/* Output is in loA and hiA */
1937
if(pnga_patch_intersect(lo, hi, loA, hiA, ndim)){
1939
/* get data_ptr to corner of patch */
1940
/* ld are leading dimensions for block */
1941
pnga_access_block_ptr(*g_a, i, &data_ptr, ld);
1943
/* Check for partial overlap */
1945
for (j=0; j<ndim; j++) {
1946
if (loS[j] < loA[j]) {
1952
/* Evaluate additional offset for pointer */
1955
for (j=0; j<ndim-1; j++) {
1956
offset += (loA[j]-loS[j])*jtmp;
1959
offset += (loA[ndim-1]-loS[ndim-1])*jtmp;
1962
data_ptr = (void*)((int*)data_ptr + offset);
1965
data_ptr = (void*)((double*)data_ptr + 2*offset);
1968
data_ptr = (void*)((float*)data_ptr + 2*offset);
1971
data_ptr = (void*)((double*)data_ptr + offset);
1974
data_ptr = (void*)((float*)data_ptr + offset);
1977
data_ptr = (void*)((long*)data_ptr + offset);
1979
default: GA_Error(" wrong data type ",type);
1983
ngai_do_pnfill_patch(type, ndim, loA, hiA, ld, data_ptr);
1985
/* release access to the data */
1986
pnga_release_update_block(*g_a, i);
1990
/* using scalapack block-cyclic data distribution */
1991
Integer proc_index[MAXDIM], index[MAXDIM];
1992
Integer topology[MAXDIM];
1993
Integer blocks[MAXDIM], block_dims[MAXDIM];
1994
pnga_get_proc_index(*g_a, me, proc_index);
1995
pnga_get_proc_index(*g_a, me, index);
1996
pnga_get_block_info(*g_a, blocks, block_dims);
1997
pnga_get_proc_grid(*g_a, topology);
1998
while (index[ndim-1] < blocks[ndim-1]) {
1999
/* find bounding coordinates of block */
2001
for (i = 0; i < ndim; i++) {
2002
loA[i] = index[i]*block_dims[i]+1;
2003
hiA[i] = (index[i] + 1)*block_dims[i];
2004
if (hiA[i] > dims[i]) hiA[i] = dims[i];
2005
if (hiA[i] < loA[i]) chk = 0;
2008
/* loA is changed by pnga_patch_intersect, so
2010
for (j=0; j<ndim; j++) {
2014
/* determine subset of my local patch to access */
2015
/* Output is in loA and hiA */
2016
if(pnga_patch_intersect(lo, hi, loA, hiA, ndim)){
2018
/* get data_ptr to corner of patch */
2019
/* ld are leading dimensions for block */
2020
pnga_access_block_grid_ptr(*g_a, index, &data_ptr, ld);
2022
/* Check for partial overlap */
2024
for (j=0; j<ndim; j++) {
2025
if (loS[j] < loA[j]) {
2031
/* Evaluate additional offset for pointer */
2034
for (j=0; j<ndim-1; j++) {
2035
offset += (loA[j]-loS[j])*jtmp;
2038
offset += (loA[ndim-1]-loS[ndim-1])*jtmp;
2041
data_ptr = (void*)((int*)data_ptr + offset);
2044
data_ptr = (void*)((double*)data_ptr + 2*offset);
2047
data_ptr = (void*)((float*)data_ptr + 2*offset);
2050
data_ptr = (void*)((double*)data_ptr + offset);
2053
data_ptr = (void*)((float*)data_ptr + offset);
2056
data_ptr = (void*)((long*)data_ptr + offset);
2058
default: GA_Error(" wrong data type ",type);
2062
ngai_do_pnfill_patch(type, ndim, loA, hiA, ld, data_ptr);
2064
/* release access to the data */
2065
pnga_release_update_block_grid(*g_a, index);
2067
/* increment index to get next block on processor */
2068
index[0] += topology[0];
2069
for (i = 0; i < ndim; i++) {
2070
if (index[i] >= blocks[i] && i<ndim-1) {
2071
index[i] = proc_index[i];
2072
index[i+1] += topology[i+1];
2079
if(local_sync_end)GA_Sync();