1
/*$Id: base.h,v 1.40.2.4 2007/12/18 18:41:27 d3g293 Exp $ */
6
extern int _max_global_array;
7
extern Integer *_ga_map;
8
extern Integer GAme, GAnproc;
9
extern Integer *GA_proclist;
10
extern int GA_Default_Proc_Group;
11
extern int* GA_Proc_list;
12
extern int* GA_inv_Proc_list;
13
extern int** GA_Update_Flags;
14
extern int* GA_Update_Signal;
15
extern short int _ga_irreg_flag;
16
extern Integer GA_Debug_flag;
17
extern int *ProcListPerm; /*permuted list of processes */
19
#define FNAM 31 /* length of array names */
20
#define CACHE_SIZE 512 /* size of the cache inside GA DS*/
23
#define __CRAYX1_PRAGMA _Pragma
25
#define __CRAYX1_PRAGMA(_pragf)
29
typedef int ARMCI_Datatype;
36
int *inv_map_proc_list;
42
typedef Integer C_Integer;
43
typedef armci_size_t C_Long;
46
short int ndim; /* number of dimensions */
47
short int irreg; /* 0-regular; 1-irregular distribution */
48
int type; /* data type in array */
49
int actv; /* activity status, GA is allocated */
50
int actv_handle; /* handle is created */
51
C_Long size; /* size of local data in bytes */
52
int elemsize; /* sizeof(datatype) */
53
int ghosts; /* flag indicating presence of ghosts */
55
long id; /* ID of shmem region / MA handle */
56
C_Integer dims[MAXDIM]; /* global array dimensions */
57
C_Integer chunk[MAXDIM]; /* chunking */
58
int nblock[MAXDIM]; /* number of blocks per dimension */
59
C_Integer width[MAXDIM]; /* boundary cells per dimension */
60
C_Integer first[MAXDIM]; /* (Mirrored only) first local element */
61
C_Integer last[MAXDIM]; /* (Mirrored only) last local element */
62
C_Long shm_length; /* (Mirrored only) local shmem length */
63
C_Integer lo[MAXDIM]; /* top/left corner in local patch */
64
double scale[MAXDIM]; /* nblock/dim (precomputed) */
65
char **ptr; /* arrays of pointers to remote data */
66
C_Integer *mapc; /* block distribution map */
67
char name[FNAM+1]; /* array name */
68
int p_handle; /* pointer to processor list for array */
69
double *cache; /* store for frequently accessed ptrs */
70
int corner_flag; /* flag for updating corner ghost cells */
71
int block_flag; /* flag to indicate block-cyclic data */
72
int block_sl_flag; /* flag to indicate block-cyclic data */
73
/* using ScaLAPACK format */
74
C_Integer block_dims[MAXDIM];/* array of block dimensions */
75
C_Integer num_blocks[MAXDIM];/* number of blocks in each dimension */
76
C_Integer block_total; /* total number of blocks in array */
77
/* using restricted arrays */
78
C_Integer *rstrctd_list; /* list of processors with data */
79
C_Integer num_rstrctd; /* number of processors with data */
80
C_Integer has_data; /* flag that processor has data */
81
C_Integer rstrctd_id; /* rank of processor in restricted list */
82
C_Integer *rank_rstrctd; /* ranks of processors with data */
84
#ifdef ENABLE_CHECKPOINT
85
int record_id; /* record id for writing ga to disk */
89
extern global_array_t *_ga_main_data_structure;
90
extern proc_list_t *_proc_list_main_data_structure;
92
*The following statement had to be moved here because of a problem in the c
93
*compiler on SV1. The problem is that when a c file is compiled with a
94
*-htaskprivate option on SV1, all global objects are given task-private status
95
*even static variables are supposed to be initialized and given a task-private
96
*memory/status. Somehow SV1 fails to do this for global variables that are
97
*initialized during declaration.
98
*So to handle that,we cannot initialize global variables to be able to run
101
extern global_array_t *GA;
102
extern proc_list_t *PGRP_LIST;
105
#define ERR_STR_LEN 256 /* length of string for error reporting */
107
/**************************** MACROS ************************************/
110
#define ga_check_handleM(g_a, string) \
112
if(GA_OFFSET+ (g_a) < 0 || GA_OFFSET+(g_a) >=_max_global_array){ \
113
char err_string[ERR_STR_LEN]; \
114
sprintf(err_string, "%s: INVALID ARRAY HANDLE", string); \
115
pnga_error(err_string, (g_a)); \
117
if( ! (GA[GA_OFFSET+(g_a)].actv) ){ \
118
char err_string[ERR_STR_LEN]; \
119
sprintf(err_string, "%s: ARRAY NOT ACTIVE", string); \
120
pnga_error(err_string, (g_a)); \
124
/* this macro finds cordinates of the chunk of array owned by processor proc */
125
#define ga_ownsM_no_handle(ndim, dims, nblock, mapc, proc, lo, hi) \
127
Integer _loc, _nb, _d, _index, _dim=ndim,_dimstart=0, _dimpos; \
128
for(_nb=1, _d=0; _d<_dim; _d++)_nb *= (Integer)nblock[_d]; \
129
if((Integer)proc > _nb - 1 || proc<0){ \
130
__CRAYX1_PRAGMA("_CRI novector"); \
131
for(_d=0; _d<_dim; _d++){ \
132
lo[_d] = (Integer)0; \
133
hi[_d] = (Integer)-1;} \
137
if(GA_inv_Proc_list) _index = GA_inv_Proc_list[proc]; \
138
__CRAYX1_PRAGMA("_CRI novector"); \
139
for(_d=0; _d<_dim; _d++){ \
140
_loc = _index% (Integer)nblock[_d]; \
141
_index /= (Integer)nblock[_d]; \
142
_dimpos = _loc + _dimstart; /* correction to find place in mapc */\
143
_dimstart += (Integer)nblock[_d]; \
144
lo[_d] = (Integer)mapc[_dimpos]; \
145
if (_loc==nblock[_d]-1) hi[_d]=dims[_d]; \
146
else hi[_d] = mapc[_dimpos+1]-1; \
151
/* this macro finds the block indices for a given block */
152
#define gam_find_block_indices(ga_handle,nblock,index) { \
154
int _ndim = GA[ga_handle].ndim; \
156
index[0] = _itmp%GA[ga_handle].num_blocks[0]; \
157
for (_i=1; _i<_ndim; _i++) { \
158
_itmp = (_itmp-index[_i-1])/GA[ga_handle].num_blocks[_i-1]; \
159
index[_i] = _itmp%GA[ga_handle].num_blocks[_i]; \
163
/* this macro finds the ScaLAPACK indices for a given processor */
164
#ifdef COMPACT_SCALAPACK
165
#define gam_find_proc_indices(ga_handle,proc,index) { \
167
Integer _ndim = GA[ga_handle].ndim; \
169
index[0] = _itmp%GA[ga_handle].nblock[0]; \
170
for (_i=1; _i<_ndim; _i++) { \
171
_itmp = (_itmp-index[_i-1])/GA[ga_handle].nblock[_i-1]; \
172
index[_i] = _itmp%GA[ga_handle].nblock[_i]; \
176
#define gam_find_proc_indices(ga_handle,proc,index) { \
178
Integer _ndim = GA[ga_handle].ndim; \
180
index[_ndim-1] = _itmp%GA[ga_handle].nblock[_ndim-1]; \
181
for (_i=_ndim-2; _i>=0; _i--) { \
182
_itmp = (_itmp-index[_i+1])/GA[ga_handle].nblock[_i+1]; \
183
index[_i] = _itmp%GA[ga_handle].nblock[_i]; \
188
/* this macro finds cordinates of the chunk of array owned by processor proc */
189
#define ga_ownsM(ga_handle, proc, lo, hi) \
191
if (GA[ga_handle].block_flag == 0) { \
192
if (GA[ga_handle].num_rstrctd == 0) { \
193
ga_ownsM_no_handle(GA[ga_handle].ndim, GA[ga_handle].dims, \
194
GA[ga_handle].nblock, GA[ga_handle].mapc, \
197
if (proc < GA[ga_handle].num_rstrctd) { \
198
ga_ownsM_no_handle(GA[ga_handle].ndim, GA[ga_handle].dims, \
199
GA[ga_handle].nblock, GA[ga_handle].mapc, \
203
int _ndim = GA[ga_handle].ndim; \
204
for (_i=0; _i<_ndim; _i++) { \
211
int _index[MAXDIM]; \
213
int _ndim = GA[ga_handle].ndim; \
214
gam_find_block_indices(ga_handle,proc,_index); \
215
for (_i=0; _i<_ndim; _i++) { \
216
lo[_i] = _index[_i]*GA[ga_handle].block_dims[_i]+1; \
217
hi[_i] = (_index[_i]+1)*GA[ga_handle].block_dims[_i]; \
218
if (hi[_i] > GA[ga_handle].dims[_i]) hi[_i]=GA[ga_handle].dims[_i]; \
223
/* this macro finds the block index corresponding to a given set of indices */
224
#define gam_find_block_from_indices(ga_handle,nblock,index) { \
225
int _ndim = GA[ga_handle].ndim; \
227
nblock = index[_ndim-1]; \
228
for (_i=_ndim-2; _i >= 0; _i--) { \
229
nblock = nblock*GA[ga_handle].num_blocks[_i]+index[_i]; \
233
/* this macro finds the proc that owns a given set block indices
234
using the ScaLAPACK data distribution */
235
#ifdef COMPACT_SCALAPACK
236
#define gam_find_proc_from_sl_indices(ga_handle,proc,index) { \
237
int _ndim = GA[ga_handle].ndim; \
239
Integer _index2[MAXDIM]; \
240
for (_i=0; _i<_ndim; _i++) { \
241
_index2[_i] = index[_i]%GA[ga_handle].nblock[_i]; \
243
proc = _index2[_ndim-1]; \
244
for (_i=_ndim-2; _i >= 0; _i--) { \
245
proc = proc*GA[ga_handle].nblock[_i]+_index2[_i]; \
249
#define gam_find_proc_from_sl_indices(ga_handle,proc,index) { \
250
int _ndim = GA[ga_handle].ndim; \
252
Integer _index2[MAXDIM]; \
253
for (_i=0; _i<_ndim; _i++) { \
254
_index2[_i] = index[_i]%GA[ga_handle].nblock[_i]; \
257
for (_i=1; _i < _ndim; _i++) { \
258
proc = proc*GA[ga_handle].nblock[_i]+_index2[_i]; \
262
/* this macro computes the strides on both the remote and local
263
processors that map out the data. ld and ldrem are the physical dimensions
264
of the memory on both the local and remote processors. */
265
/* NEEDS C_INT64 CONVERSION */
266
#define gam_setstride(ndim, size, ld, ldrem, stride_rem, stride_loc){\
268
stride_rem[0]= stride_loc[0] = (int)size; \
269
__CRAYX1_PRAGMA("_CRI novector"); \
270
for(_i=0;_i<ndim-1;_i++){ \
271
stride_rem[_i] *= (int)ldrem[_i]; \
272
stride_loc[_i] *= (int)ld[_i]; \
273
stride_rem[_i+1] = stride_rem[_i]; \
274
stride_loc[_i+1] = stride_loc[_i]; \
278
/* Count total number of elmenents in array based on values of ndim,
280
#define gam_CountElems(ndim, lo, hi, pelems){ \
282
__CRAYX1_PRAGMA("_CRI novector"); \
283
for(_d=0,*pelems=1; _d< ndim;_d++) *pelems *= hi[_d]-lo[_d]+1; \
286
/* NEEDS C_INT64 CONVERSION */
287
#define gam_ComputeCount(ndim, lo, hi, count){ \
289
__CRAYX1_PRAGMA("_CRI novector"); \
290
for(_d=0; _d< ndim;_d++) count[_d] = (int)(hi[_d]-lo[_d])+1; \
293
#define ga_RegionError(ndim, lo, hi, val){ \
295
char *str= "cannot locate region: "; \
296
char err_string[ERR_STR_LEN]; \
297
sprintf(err_string, str); \
300
sprintf(err_string+_l, "[%ld:%ld ",(long)lo[_d],(long)hi[_d]); \
301
_l=strlen(err_string); \
302
__CRAYX1_PRAGMA("_CRI novector"); \
303
for(_d=1; _d< ndim; _d++){ \
304
sprintf(err_string+_l, ",%ld:%ld ",(long)lo[_d],(long)hi[_d]); \
305
_l=strlen(err_string); \
307
sprintf(err_string+_l, "]"); \
308
_l=strlen(err_string); \
309
pnga_error(err_string, val); \
312
/*\ Just return pointer (ptr_loc) to location in memory of element with
313
* subscripts (subscript).
315
#define gam_Loc_ptr(proc, g_handle, subscript, ptr_loc) \
317
Integer _offset=0, _d, _w, _factor=1, _last=GA[g_handle].ndim-1; \
318
Integer _lo[MAXDIM], _hi[MAXDIM], _p_handle, _iproc; \
320
ga_ownsM(g_handle, proc, _lo, _hi); \
321
_p_handle = GA[g_handle].p_handle; \
323
gaCheckSubscriptM(subscript, _lo, _hi, GA[g_handle].ndim); \
324
__CRAYX1_PRAGMA("_CRI novector"); \
325
for(_d=0; _d < _last; _d++) { \
326
_w = (Integer)GA[g_handle].width[_d]; \
327
_offset += (subscript[_d]-_lo[_d]+_w) * _factor; \
328
_factor *= _hi[_d] - _lo[_d]+1+2*_w; \
330
_offset += (subscript[_last]-_lo[_last] \
331
+ (Integer)GA[g_handle].width[_last]) \
333
if (_p_handle == 0) { \
334
_iproc = PGRP_LIST[_p_handle].inv_map_proc_list[_iproc]; \
336
if (GA[g_handle].num_rstrctd > 0) \
337
_iproc = GA[g_handle].rstrctd_list[_iproc]; \
338
*(ptr_loc) = GA[g_handle].ptr[_iproc]+_offset*GA[g_handle].elemsize; \
341
#define ga_check_regionM(g_a, ilo, ihi, jlo, jhi, string){ \
342
if (*(ilo) <= 0 || *(ihi) > GA[GA_OFFSET + *(g_a)].dims[0] || \
343
*(jlo) <= 0 || *(jhi) > GA[GA_OFFSET + *(g_a)].dims[1] || \
344
*(ihi) < *(ilo) || *(jhi) < *(jlo)){ \
345
char err_string[ERR_STR_LEN]; \
346
sprintf(err_string,"%s:req(%ld:%ld,%ld:%ld) out of range (1:%ld,1:%ld)",\
347
string, (long)*(ilo), (long)*(ihi), (long)*(jlo), (long)*(jhi), \
348
(long)GA[GA_OFFSET + *(g_a)].dims[0], \
349
(long)GA[GA_OFFSET + *(g_a)].dims[1]); \
350
pnga_error(err_string, *(g_a)); \
354
#define gaCheckSubscriptM(subscr, lo, hi, ndim) \
357
__CRAYX1_PRAGMA("_CRI novector"); \
358
for(_d=0; _d< ndim; _d++) \
359
if( subscr[_d]< lo[_d] || subscr[_d]> hi[_d]){ \
360
char err_string[ERR_STR_LEN]; \
361
sprintf(err_string,"check subscript failed:%ld not in (%ld:%ld) dim=", \
362
(long)subscr[_d], (long)lo[_d], (long)hi[_d]); \
363
pnga_error(err_string, _d); \