15
#if defined(FUJITSU) || defined(CRAY_YMP)
16
# define THRESH 1.0e-10
18
# define THRESH 1.0e-20
20
#define ABS(x) ((x) >= 0.0 ? (x) : -(x))
21
#define MAX(x,y) ((x) >= (y) ? (x) : (y))
22
#define MISMATCH(x,y) (ABS((x)-(y)) / MAX(1.0,ABS((x)))) > THRESH
24
#define POW2(x) ((x)*(x))
25
#define POW4(x) ((x)*(x)*(x)*(x))
26
#define SPECIFIC_CASE 0
29
static void dpatch_test(
34
int cmpos, int cnpos);
35
static void dpatch_test2();
37
int main(int argc, char **argv)
42
int sizes[] = {10, 50, 100};
43
int s, same_dist, ampos, akpos, bkpos, bnpos, cmpos, cnpos;
46
GA_Initialize_args(&argc,&argv);
48
if (0 == GA_Nodeid()) {
49
printf(" GA initialized\n");
53
/* we need srandom seed to be the same on all procs */
56
GA_Lgop(&seed, 1, "max");
59
printf("seed=%ld\n", seed);
64
/* we want to force distribution of innermost loop in nga_mulmat_patch
65
by providing less buffer memory than needed */
68
gasize = (POW4(NMAX) * 3)/GA_Nnodes();
74
bufsize = (NMAX/2 + 1)*(NMAX/3 + 1)*2 + POW2(NMAX/2 + 1);
75
bufsize = bufsize*6/7;
77
if (!MA_init(MT_DBL, 10, gasize+bufsize+500000)) {
78
GA_Error("MA_init failed", -1);
80
if (0 == GA_Nodeid()) {
82
printf(" CHECKING MATRIX MULTIPLICATION FOR PATCHES \n");
84
printf("gasize and bufsize are %d %d\n", gasize, bufsize);
90
dpatch_test(10, 0, 0, 1, 1, 2, 2, 3);
93
for (same_dist=0; same_dist<2; ++same_dist) {
94
for (ampos=0; ampos<4; ++ampos) {
95
for (akpos=ampos+1; akpos<4; ++akpos) {
96
for (bkpos=0; bkpos<4; ++bkpos) {
97
for (bnpos=bkpos+1; bnpos<4; ++bnpos) {
98
for (cmpos=0; cmpos<4; ++cmpos) {
99
for (cnpos=cmpos+1; cnpos<4; ++cnpos) {
100
dpatch_test(sizes[s],
118
if(0 == GA_Nodeid()) {
119
printf(" All tests successful \n");
130
* We start with a 4-dimensional array and multiply various 2D patches,
131
* comparing results against a locally computed dgemm.
133
static void dpatch_test(
136
int ampos, int akpos,
137
int bkpos, int bnpos,
138
int cmpos, int cnpos)
141
double *a, *b, *c, *r, *v;
145
int dims[4], chunk[4], rld[3];
146
int alo[4], ahi[4], ald[3];
147
int blo[4], bhi[4], bld[3];
148
int clo[4], chi[4], cld[3];
156
assert(size <= NMAX);
163
m = random() % (size/2);
164
n = random() % (size/2);
165
k = random() % (size/2);
171
printf("size=%d, dist_same=%d ampos=%d akpos=%d bkpos=%d bnpos=%d cmpos=%d cnpos=%d m=%d n=%d k=%d\n", size, dist_same, ampos, akpos, bkpos, bnpos, cmpos, cnpos, m, n, k);
175
a = malloc(sizeof(double)*size*size);
176
b = malloc(sizeof(double)*size*size);
177
c = malloc(sizeof(double)*size*size);
178
r = malloc(sizeof(double)*size*size);
179
memset(a, 0, sizeof(double)*size*size);
180
memset(b, 0, sizeof(double)*size*size);
181
memset(c, 0, sizeof(double)*size*size);
182
memset(r, 0, sizeof(double)*size*size);
184
/* establish the shape and default chunking of the global arrays */
185
for (i=0; i<ndim; ++i) {
193
g_a = NGA_Create(C_DBL, ndim, dims, "a", chunk);
195
printf("NGA_Create failed\n");
197
GA_Error("... exiting", 1);
200
/* create g_b and g_c */
202
g_b = GA_Duplicate(g_a, "a_duplicated");
203
if(1 == GA_Compare_distr(g_a, g_b)) {
204
GA_Error("g_b distribution different",1);
206
g_c = GA_Duplicate(g_a, "a_duplicated_again");
207
if(1 == GA_Compare_distr(g_a, g_c)) {
208
GA_Error("g_c distribution different",1);
212
chunk[ndim-1] = size;
213
g_b = NGA_Create(C_DBL, ndim, dims, "b1", chunk);
215
GA_Error("NGA_Create failed:b1",1);
218
chunk[ndim-2] = size;
219
g_c = NGA_Create(C_DBL, ndim, dims, "c1", chunk);
221
GA_Error("NGA_Create failed:c1",1);
228
printf("> Checking NGA_Matmul_patch ... \n");
233
/* fill the g_a and g_b global arrays entirely with data */
234
for (i=0; i<ndim; ++i) {
245
/* generate some local data (an enumerated range) */
246
double *v = malloc(sizeof(double)*POW4(size));
247
memset(v, 0, sizeof(double)*POW4(size));
248
for (i=0; i<POW4(size); ++i) {
251
NGA_Put(g_a,alo,ahi,v,ald);
252
NGA_Put(g_b,blo,bhi,v,bld);
258
/* for g_a, g_b, g_c generate a random starting index for patches */
259
for (i=0; i<ndim; ++i) {
260
ahi[i] = alo[i] = random() % (size/2);
261
bhi[i] = blo[i] = random() % (size/2);
262
chi[i] = clo[i] = random() % (size/2);
271
if (ampos>0) ald[ampos-1] = m;
272
if (akpos>0) ald[akpos-1] = k;
275
if (bkpos>0) bld[bkpos-1] = k;
276
if (bnpos>0) bld[bnpos-1] = n;
279
if (cmpos>0) cld[cmpos-1] = m;
280
if (cnpos>0) cld[cnpos-1] = n;
283
printf("a[%d:%d,%d:%d,%d:%d,%d:%d] %dx%dx%dx%d (%dx%d)\n",
289
printf("ald={%d,%d,%d}\n", ald[0], ald[1], ald[2]);
290
printf("b[%d:%d,%d:%d,%d:%d,%d:%d] %dx%dx%dx%d (%dx%d)\n",
296
printf("bld={%d,%d,%d}\n", bld[0], bld[1], bld[2]);
297
printf("c[%d:%d,%d:%d,%d:%d,%d:%d] %dx%dx%dx%d (%dx%d)\n",
303
printf("cld={%d,%d,%d}\n", cld[0], cld[1], cld[2]);
306
/* reset our buffers, just in case */
307
memset(a, 0, sizeof(double)*size*size);
308
memset(b, 0, sizeof(double)*size*size);
309
memset(c, 0, sizeof(double)*size*size);
310
memset(r, 0, sizeof(double)*size*size);
312
/* get patches locally and compute locally */
313
NGA_Get(g_a, alo, ahi, a, ald);
314
NGA_Get(g_b, blo, bhi, b, bld);
320
xb_dgemm(&tb, &ta, &n, &m, &k, &alpha, b, &n, a, &k, &beta, c, &n);
322
/* perform global computation */
323
NGA_Matmul_patch(ta, tb, &alpha, &beta,
329
/* get global result into local buf and compare results */
330
NGA_Get(g_c, clo, chi, r, cld);
332
for (i=0; i<1; ++i) {
333
if (MISMATCH(c[i],r[i])) {
334
printf("at %d %f != %f\n", i, c[i], r[i]);
335
GA_Error("mismatch", 1);
349
static void dpatch_test2()