9
* Creates an ndim-dimensional array using the regular distribution model
10
* and returns integer handle representing the array.
12
* The array can be distributed evenly or not. The control over the
13
* distribution is accomplished by specifying chunk (block) size for all or
14
* some of array dimensions.
16
* For example, for a 2-dimensional array, setting chunk[0]=dim[0] gives
17
* distribution by vertical strips (chunk[0]*dims[0]);
18
* setting chunk[1]=dim[1] gives distribution by horizontal strips
19
* (chunk[1]*dims[1]). Actual chunks will be modified so that they are at
20
* least the size of the minimum and each process has either zero or one
21
* chunk. Specifying chunk[i] as <1 will cause that dimension to be
24
* As a convenience, when chunk is specified as NULL, the entire array is
27
* This is a collective operation.
29
* @param[in] type data type(MT_F_DBL,MT_F_INT,MT_F_DCPL)
30
* @param[in] ndim number of array dimensions
31
* @param[in] dims[ndim] array of dimensions
32
* @param[in] arrayname a unique character string
33
* @param[in] chunk[ndim] array of chunks, each element specifies
34
* minimum size that given dimensions should be
37
* @return pointer to GlobalArray object created; NULL if it fails
39
GlobalArray* createGA(int type, int ndim, int dims[], char *arrayname,
43
* Creates an array by following the user-specified distribution and
44
* returns integer handle representing the array.
46
* The distribution is specified as a Cartesian product of distributions
47
* for each dimension. The array indices start at 0. For example, the
48
* following figure demonstrates distribution of a 2-dimensional array 8x10
49
* on 6 (or more) processors. nblock[2]={3,2}, the size of map array is s=5
50
* and array map contains the following elements map={0,2,6, 0, 5}. The
51
* distribution is nonuniform because, P1 and P4 get 20 elements each and
52
* processors P0,P2,P3, and P5 only 10 elements each.
55
* <TR> <TD>5</TD> <TD>5</TD> </TR>
56
* <TR> <TD>P0</TD> <TD>P3</TD> <TD>2</TD> </TR>
57
* <TR> <TD>P1</TD> <TD>P4</TD> <TD>4</TD> </TR>
58
* <TR> <TD>P2</TD> <TD>P5</TD> <TD>2</TD> </TR>
61
* This is a collective operation.
63
* @param[in] arrayname a unique character string
64
* @param[in] type MA data type (MT_F_DBL,MT_F_INT,MT_F_DCPL)
65
* @param[in] ndim number of array dimensions
66
* @param[in] dims array of dimension values
67
* @param[in] block [ndim] no. of blocks each dimension is divided into
68
* @param[in] maps [s] starting index for for each block;
69
* the size s is a sum all elements of nblock array
71
* @return pointer to GlobalArray object created; NULL if it fails
73
GlobalArray * createGA(int type, int ndim, int dims[], char *arrayname,
74
int block[], int maps[]);
77
* Creates a new array by applying all the properties of another existing
80
* This is a collective operation.
82
* @param[in] arrayname a character string
83
* @param[in] g_b integer handle for reference array
85
* @return pointer to GlobalArray object created; NULL if it fails
87
GlobalArray * createGA(const GlobalArray *g_b, char *arrayname);
90
* Creates a new array by applying all the properties of another existing
93
* This is a collective operation.
95
* @param[in] g_b integer handle for reference array
97
* @return pointer to GlobalArray object created; NULL if it fails
99
GlobalArray * createGA(const GlobalArray &g_b);
102
* Creates a 10x10 global array of type "double"(default).
104
* This is a collective operation.
106
* @return pointer to GlobalArray object created; NULL if it fails
108
GlobalArray * createGA();
111
* Creates an ndim-dimensional array with a layer of ghost cells around
112
* the visible data on each processor using the regular distribution
113
* model and returns an integer handle representing the array.
114
* The array can be distributed evenly or not evenly. The control over
115
* the distribution is accomplished by specifying chunk (block) size for
116
* all or some of the array dimensions. For example, for a 2-dimensional
117
* array, setting chunk(1)=dim(1) gives distribution by vertical strips
118
* (chunk(1)*dims(1)); setting chunk(2)=dim(2) gives distribution by
119
* horizontal strips (chunk(2)*dims(2)). Actual chunks will be modified
120
* so that they are at least the size of the minimum and each process
121
* has either zero or one chunk. Specifying chunk(i) as <1 will cause
122
* that dimension (i-th) to be distributed evenly. The width of the
123
* ghost cell layer in each dimension is specified using the array
124
* width(). The local data of the global array residing on each
125
* processor will have a layer width[n] ghosts cells wide on either
126
* side of the visible data along the dimension n.
128
* This is a collective operation.
130
* @param[in] array_name a unique character string
131
* @param[in] type data type (MT_DBL,MT_INT,MT_DCPL)
132
* @param[in] ndim number of array dimensions
133
* @param[in] dims [ndim] array of dimensions
134
* @param[in] width [ndim] array of ghost cell widths
135
* @param[in] chunk [ndim] array of chunks, each element specifies
136
* minimum size that given dimensions should be
139
* @returns pointer to GlobalArray object created; NULL if it fails
141
GlobalArray * createGA_Ghosts(int type, int ndim, int dims[],
142
int width[], char *array_name, int chunk[]);
145
* Creates an array with ghost cells by following the user-specified
146
* distribution and returns integer handle representing the array.
147
* The distribution is specified as a Cartesian product of distributions
148
* for each dimension. For example, the following figure demonstrates
149
* distribution of a 2-dimensional array 8x10 on 6 (or more) processors.
150
* nblock(2)={3,2}, the size of map array is s=5 and array map contains
151
* the following elements map={1,3,7, 1, 6}. The distribution is
152
* nonuniform because, P1 and P4 get 20 elements each and processors
153
* P0,P2,P3, and P5 only 10 elements each.
156
* <TR> <TD>5</TD> <TD>5</TD> </TR>
157
* <TR> <TD>P0</TD> <TD>P3</TD> <TD>2</TD> </TR>
158
* <TR> <TD>P1</TD> <TD>P4</TD> <TD>4</TD> </TR>
159
* <TR> <TD>P2</TD> <TD>P5</TD> <TD>2</TD> </TR>
162
* The array width[] is used to control the width of the ghost cell
163
* boundary around the visible data on each processor. The local data
164
* of the global array residing on each processor will have a layer
165
* width[n] ghosts cells wide on either side of the visible data along
168
* This is a collective operation.
170
* @param[in] array_name a unique character string
171
* @param[in] type data type (MT_DBL,MT_INT,MT_DCPL)
172
* @param[in] ndim number of array dimensions
173
* @param[in] dims [ndim] array of dimensions
174
* @param[in] width [ndim] array of ghost cell widths
175
* @param[in] nblock [ndim] no. of blocks each dimension is divided into
176
* @param[in] map [s] starting index for for each block;
177
* the size s is a sum of all elements of nblock array
179
* @return pointer to GlobalArray object created; NULL if it fails
181
GlobalArray * createGA_Ghosts(int type, int ndim, int dims[],
182
int width[], char *array_name, int map[],
186
* Broadcast from process root to all other processes a message of
187
* length lenbuf. This is operation is provided only for convenience
188
* purposes: it is available regardless of the message-passing library
189
* that GA is running with.
191
* This is a collective operation.
193
* @param[in] lenbuf length of buffer
194
* @param[in,out] buf [lenbuf] data
195
* @param[in] root root process
197
void brdcst(void *buf, int lenbuf, int root);
200
* Returns the current value of the internal debug flag.
202
* This is a local operation.
204
* @return 0 if the debug flag is false, 1 if it is true.
209
* This functions returns the total number of nodes that the program is
212
* On SMP architectures, this will be less than or equal to the total
213
* number of processors.
215
* This is a local operation.
217
* @return the number of nodes the program is running on
222
* This function returns the node ID of the process.
224
* On SMP architectures with more than one processor per node, several
225
* processes may return the same node id.
227
* This is a local operation.
229
* @return the node ID of the process
234
* This function returns the cluster node ID of the specified process.
236
* On SMP architectures with more than one processor per node, several
237
* processes may return the same node id.
239
* This is a local operation.
241
* @return the cluster node ID of the specified process
243
int clusterProcNodeid(int iproc);
246
* This function returns the number of processors available on node inode.
248
* This is a local operation.
252
* @return the number of processors available on the given node
254
int clusterNprocs(int inode);
257
* This function returns the processor id associated with node inode and
258
* the local processor id iproc.
260
* If node inode has N processors, then the value of iproc lies between
263
* This is a local operation.
268
* @return the processor ID associated with the given node and local processor
271
int clusterProcid(int inode, int iproc);
274
* Creates a set containing the number of mutexes.
276
* Mutex is a simple synchronization object used to protect Critical
277
* Sections. Only one set of mutexes can exist at a time. Array of mutexes
278
* can be created and destroyed as many times as needed.
279
* Mutexes are numbered: 0, ..., number -1.
281
* This is a collective operation.
283
* @param[in] number of mutexes in mutex array
285
* @return 0 if the opereation succeeded or 1 when failed.
287
int createMutexes(int number);
290
* Destroys the set of mutexes created with ga_create_mutexes.
292
* This is a collective operation.
294
* @return 0 if the operation succeeded or 1 when failed.
296
int destroyMutexes();
299
* Double Global OPeration.
301
* X(1:N) is a vector present on each process. DGOP 'sums' elements of
302
* X accross all nodes using the commutative operator OP. The result is
303
* broadcast to all nodes. Supported operations include '+', '*', 'max',
304
* 'min', 'absmax', 'absmin'. The use of lowerecase for operators is
305
* necessary. This is operation is provided only for convenience purposes:
306
* it is available regardless of the message-passing library that GA is
309
* This is a collective operation.
311
* @param[in] n number of elements
312
* @param[in,out] x [n] array of elements
313
* @param[in] op operator
315
void dgop(double x[], int n, char *op);
318
* Creates a new array by applying all the properties of another existing
321
* This is a collective operation.
323
* @param[in] array_name a character string
324
* @param[in] g_a integer handle for reference array
326
* @return array handle; a non-zero array handle means the call was succesful.
328
int duplicate(int g_a, char* array_name);
331
* To be called in case of an error.
333
* Print an error message and an integer value that represents error code.
334
* Releases some system resources.
335
* This is the required way of aborting the program execution.
337
* This operation is local.
339
* @param[in] message string to print
340
* @param[in] code code to print
342
void error(const char *message, int code);
345
* Blocks the calling process until all the data transfers corresponding to
346
* GA operations called after ga_init_fence complete.
348
* For example, since ga_put might return before the data reaches the final
349
* destination, ga_init_fence and ga_fence allow process to wait until the
350
* data tranfer is fully completed:
358
* ga_fence must be called after ga_init_fence. A barrier, ga_sync, assures
359
* completion of all data transfers and implicitly cancels all outstanding
360
* ga_init_fence calls. ga_init_fence and ga_fence must be used in pairs,
361
* multiple calls to ga_fence require the same number of corresponding
362
* ga_init_fence calls. ga_init_fence/ga_fence pairs can be nested.
364
* ga_fence works for multiple GA operations. For example:
369
* ga_scatter(g_a, ...);
374
* The calling process will be blocked until data movements initiated by
375
* two calls to ga_put and one ga_scatter complete.
380
* Integer Global OPeration.
382
* The integer version of ga_dgop described above, also include the bitwise OR
383
* operation. This is operation is provided only for convenience purposes: it
384
* is available regardless of the message-passing library that GA is running
387
* This is a collective operation.
389
* @param[in] n number of elements
390
* @param[in,out] x [n] array of elements
391
* @param[in] op operator
393
void gop(int x[], int n, char *op);
396
* Long Global OPeration.
398
* X(1:N) is a vector present on each process. LGOP 'sums' elements of
399
* X accross all nodes using the commutative operator OP. The result is
400
* broadcast to all nodes. Supported operations include '+', '*', 'max',
401
* 'min', 'absmax', 'absmin'. The use of lowerecase for operators is
402
* necessary. This is operation is provided only for convenience purposes:
403
* it is available regardless of the message-passing library that GA is
406
* This is a collective operation.
408
* @param[in] n number of elements
409
* @param[in,out] x [n] array of elements
410
* @param[in] op operator
412
void gop(long x[], int n, char *op);
415
* Float Global OPeration.
417
* X(1:N) is a vector present on each process. FGOP 'sums' elements of
418
* X accross all nodes using the commutative operator OP. The result is
419
* broadcast to all nodes. Supported operations include '+', '*', 'max',
420
* 'min', 'absmax', 'absmin'. The use of lowerecase for operators is
421
* necessary. This is operation is provided only for convenience purposes:
422
* it is available regardless of the message-passing library that GA is
425
* This is a collective operation.
427
* @param[in] n number of elements
428
* @param[in,out] x [n] array of elements
429
* @param[in] op operator
431
void gop(float x[], int n, char *op);
434
* Double Global OPeration.
436
* X(1:N) is a vector present on each process. DGOP 'sums' elements of
437
* X accross all nodes using the commutative operator OP. The result is
438
* broadcast to all nodes. Supported operations include '+', '*', 'max',
439
* 'min', 'absmax', 'absmin'. The use of lowerecase for operators is
440
* necessary. This is operation is provided only for convenience purposes:
441
* it is available regardless of the message-passing library that GA is
444
* This is a collective operation.
446
* @param[in] n number of elements
447
* @param[in,out] x [n] array of elements
448
* @param[in] op operator
450
void gop(double x[], int n, char *op);
453
* Integer Global OPeration.
455
* The integer (more precisely long) version of ga_dgop described above,
456
* also include the bitwise OR operation.
457
* This is operation is provided only for convenience purposes: it is
458
* available regardless of the message-passing library that GA is running
461
* This is a collective operation.
463
* @param[in] n number of elements
464
* @param[in,out] x [n] array of elements
465
* @param[in] op operator
467
void igop(int x[], int n, char *op);
470
* Initializes tracing of completion status of data movement operations.
472
* This operation is local.
477
* Returns amount of memory (in bytes) used in the allocated global
478
* arrays on the calling processor.
480
* This operation is local.
482
* @return amount of memory (in bytes) used in the allocated global arrays on
483
* the calling processor
485
size_t inquireMemory();
488
* Long Global OPeration.
490
* X(1:N) is a vector present on each process. LGOP 'sums' elements of
491
* X accross all nodes using the commutative operator OP. The result is
492
* broadcast to all nodes. Supported operations include '+', '*', 'max',
493
* 'min', 'absmax', 'absmin'. The use of lowerecase for operators is
494
* necessary. This is operation is provided only for convenience purposes:
495
* it is available regardless of the message-passing library that GA is
498
* This is a collective operation.
500
* @param[in] n number of elements
501
* @param[in,out] x [n] array of elements
502
* @param[in] op operator
504
void lgop(long x[], int n, char *op);
507
* Locks a mutex object identified by the mutex number. It is a fatal
508
* error for a process to attempt to lock a mutex which was already
509
* locked by this process.
511
* @param[in] mutex object id
513
void lock(int mutex);
516
* Mask the intrinsic sync operations during collective calls.
518
* GA Collective calls has Sync calls at the begining and ending of
519
* of the call. Sometimes there may be some redundacy in sync calls, which
520
* can be avoided by masking the sync operations.
522
* Setting the parameters as zero will mask (disable) the call. Any non-zero
523
* value will enable the call. Initially these params are set to non-zero
526
* @param[in] first masks the sync at the begining of the collective call.
527
* @param[in] last masks the sync at the end of the collective call.
529
void maskSync(int first, int last);
532
* If GA_uses_ma returns true, then GA_Memory_avail returns the
533
* lesser of the amount available under the GA limit and the amount
534
* available from MA (according to ma_inquire_avail operation).
535
* If no GA limit has been set, it returns what MA says is available.
536
* If ( ! GA_Uses_ma() && ! GA_Memory_limited() ) returns < 0, indicating
537
* that the bound on currently available memory cannot be determined.
539
* This operation is local.
541
* @return amount of memory (in bytes) left for allocation of new
542
* global arrays on the calling processor.
545
int memoryAvailable() ;
548
* Indicates if limit is set on memory usage in Global Arrays on the
551
* This operation is local.
553
* @return 1 means "yes", "0" means "no".
558
* Force completion of a nonblocking operation locally.
560
* Waiting on a nonblocking put or an accumulate operation assures that data
561
* was injected into the network and the user buffer can be now be reused.
562
* Completing a get operation assures data has arrived into the user memory
563
* and is ready for use. Wait operation ensures only local completion. Unlike
564
* their blocking counterparts, the nonblocking operations are not ordered
565
* with respect to the destination. Performance being one reason, the other
566
* reason is that by ensuring ordering we incur additional and possibly
567
* unnecessary overhead on applications that do not require their operations
568
* to be ordered. For cases where ordering is necessary, it can be done by
569
* calling a fence operation. The fence operation is provided to the user to
570
* confirm remote completion if needed.
572
* This is a local operation.
574
* @param[in] nbhandle nonblocking handle
576
void nbWait(GANbhdl *nbhandle);
579
* Returns the GA process id (0, ..., ga_Nnodes()-1) of the requesting
582
* This operation is local.
584
* @return the GA process ID of the requesting process
589
* Returns the number of the GA compute (user) processes.
591
* This operation is local.
593
* @return the number of GA processes
598
* Print statistical information on GA use.
600
* This non-collective (MIMD) operation prints information about:
601
* - number of calls to
609
* - read_and_inc operations
610
* - total amount of data moved in the primitive operations
611
* - amount of data moved in the primitive operations to logicaly remote
613
* - maximum memory consumption in global arrays
614
* - number of requests serviced in the interrupt-driven implementations
615
* by the calling process.
617
* This operation is local.
622
* This function sets an internal flag in the GA library to either true or
625
* The value of this flag can be recovered at any time using the
626
* getDebug function. The flag is set to false when the the GA library
627
* is initialized. This can be useful in a number of debugging situations,
628
* especially when examining the behavior of routines that are called in
629
* multiple locations in a code.
631
* This is a local operation.
633
* @param[in] dbg value to set internal flag
635
void setDebug(int dbg);
638
* Sets the amount of memory to be used (in bytes) per process.
640
* This is a local operation.
642
* @param[in] limit the amount of memory in bytes per process
644
void setMemoryLimit(size_t limit);
647
* Prints info about allocated arrays.
649
* @param[in] verbose if true print distribution info
651
void summarize(int verbose);
654
* Synchronize processes (a barrier) and ensure that all GA operations
657
* This is a collective operation.
662
* Unlocks a mutex object identified by the mutex number.
664
* It is a fatal error for a process to attempt to unlock a mutex which has
665
* not been locked by this process.
667
* @param[in] mutex object id
669
void unlock(int mutex);
672
* Returns whether memory comes from internal or external allocator.
674
* This operation is local.
676
* @return "1" if memory comes from MA;
677
* "0" if memory comes from another source e.g. System V shared memory
682
* Returns whether GA is using Fortran indexing.
684
* @return "1" if uses fortran API, else returns "0"
689
* This function return a wall (or elapsed) time on the calling processor.
691
* Returns time in seconds representing elapsed wall-clock time
692
* since an arbitrary time in the past. Example:
695
* double starttime, endtime;
696
* starttime = GA::wtime();
697
* // {{.... code snippet to be timed ....}}
698
* endtime = GA::wtime();
699
* printf("Time taken = %lf seconds\n", endtime-starttime);
702
* This is a local operation.
704
* @note This function is only available in release 4.1 or greater.
710
#endif /* _SERVICES_H */