9
* This is the GlobalArray class.
16
* Creates an ndim-dimensional array using the regular distribution
17
* model and returns integer handle representing the array.
19
* The array can be distributed evenly or not. The control over the
20
* distribution is accomplished by specifying chunk (block) size for all or
21
* some of array dimensions.
23
* For example, for a 2-dimensional array, setting chunk[0]=dim[0] gives
24
* distribution by vertical strips (chunk[0]*dims[0]);
25
* setting chunk[1]=dim[1] gives distribution by horizontal strips
26
* (chunk[1]*dims[1]). Actual chunks will be modified so that they are at
27
* least the size of the minimum and each process has either zero or one
28
* chunk. Specifying chunk[i] as <1 will cause that dimension to be
31
* As a convenience, when chunk is specified as NULL, the entire array is
34
* This is a collective operation.
36
* @param[in] type data type(MT_F_DBL,MT_F_INT,MT_F_DCPL)
37
* @param[in] ndim number of array dimensions
38
* @param[in] dims [ndim] array of dimensions
39
* @param[in] arrayname a unique character string
40
* @param[in] chunk [ndim] array of chunks, each element specifies
41
* minimum size that given dimensions should be chunked
44
GlobalArray(int type, int ndim, int dims[], char *arrayname, int chunk[]);
47
* @copydoc GlobalArray::GlobalArray(int,int,int[],char*,int[])
48
* @param[in] p_handle processor group handle
50
GlobalArray(int type, int ndim, int dims[], char *arrayname, int chunk[],
54
* @copydoc GlobalArray::GlobalArray(int,int,int[],char*,int[])
56
GlobalArray(int type, int ndim, int64_t dims[], char *arrayname,
60
* @copydoc GlobalArray::GlobalArray(int,int,int[],char*,int[])
61
* @param[in] p_handle processor group handle
63
GlobalArray(int type, int ndim, int64_t dims[], char *arrayname,
64
int64_t chunk[], PGroup* p_handle);
67
* Creates an array by following the user-specified distribution.
69
* The distribution is specified as a Cartesian product of distributions
70
* for each dimension. The array indices start at 0. For example, the
71
* following figure demonstrates distribution of a 2-dimensional array 8x10
72
* on 6 (or more) processors. nblock[2]={3,2}, the size of map array is s=5
73
* and array map contains the following elements map={0,2,6, 0, 5}. The
74
* distribution is nonuniform because, P1 and P4 get 20 elements each and
75
* processors P0,P2,P3, and P5 only 10 elements each.
78
* <TR> <TD>5</TD> <TD>5</TD> </TR>
79
* <TR> <TD>P0</TD> <TD>P3</TD> <TD>2</TD> </TR>
80
* <TR> <TD>P1</TD> <TD>P4</TD> <TD>4</TD> </TR>
81
* <TR> <TD>P2</TD> <TD>P5</TD> <TD>2</TD> </TR>
84
* This is a collective operation.
86
* @param[in] type MA data type (MT_F_DBL,MT_F_INT,MT_F_DCPL)
87
* @param[in] ndim number of array dimensions
88
* @param[in] dims array of dimension values
89
* @param[in] arrayname a unique character string
90
* @param[in] block [ndim] no. of blocks each dimension is divided into
91
* @param[in] maps [s] starting index for for each block;
92
* the size s is a sum all elements of nblock array
94
GlobalArray(int type, int ndim, int dims[], char *arrayname, int block[],
98
* @copydoc GlobalArray::GlobalArray(int,int,int[],char*,int[],int[])
99
* @param[in] p_handle processor group handle
101
GlobalArray(int type, int ndim, int dims[], char *arrayname, int block[],
102
int maps[], PGroup* p_handle);
105
* @copydoc GlobalArray::GlobalArray(int,int,int[],char*,int[],int[])
107
GlobalArray(int type, int ndim, int64_t dims[], char *arrayname,
108
int64_t block[], int64_t maps[]);
111
* @copydoc GlobalArray::GlobalArray(int,int,int[],char*,int[],int[])
112
* @param[in] p_handle processor group handle
114
GlobalArray(int type, int ndim, int64_t dims[], char *arrayname,
115
int64_t block[], int64_t maps[], PGroup* p_handle);
118
* Creates an ndim-dimensional array with a layer of ghost cells around
119
* the visible data on each processor using the regular distribution model.
121
* The array can be distributed evenly or not evenly. The control over
122
* the distribution is accomplished by specifying chunk (block) size for
123
* all or some of the array dimensions. For example, for a 2-dimensional
124
* array, setting chunk(1)=dim(1) gives distribution by vertical strips
125
* (chunk(1)*dims(1)); setting chunk(2)=dim(2) gives distribution by
126
* horizontal strips (chunk(2)*dims(2)). Actual chunks will be modified
127
* so that they are at least the size of the minimum and each process
128
* has either zero or one chunk. Specifying chunk(i) as <1 will cause
129
* that dimension (i-th) to be distributed evenly. The width of the
130
* ghost cell layer in each dimension is specified using the array
131
* width(). The local data of the global array residing on each
132
* processor will have a layer width[n] ghosts cells wide on either
133
* side of the visible data along the dimension n.
135
* @param[in] type data type (MT_DBL,MT_INT,MT_DCPL)
136
* @param[in] ndim number of array dimensions
137
* @param[in] dims [ndim] array of dimensions
138
* @param[in] width [ndim] array of ghost cell widths
139
* @param[in] arrayname a unique character string
140
* @param[in] chunk [ndim] array of chunks, each element specifies
141
* minimum size that given dimensions should be
143
* @param[in] ghosts this is a dummy parameter: added to increase the
144
* number of arguments, inorder to avoid the conflicts
145
* among constructors. (ghosts = 'g' or 'G')
147
GlobalArray(int type, int ndim, int dims[], int width[], char *arrayname,
148
int chunk[], char ghosts);
151
* @copydoc GlobalArray::GlobalArray(int,int,int[],int[],char*,int[],char)
152
* @param[in] p_handle processor group handle
154
GlobalArray(int type, int ndim, int dims[], int width[], char *arrayname,
155
int chunk[], PGroup* p_handle, char ghosts);
158
* @copydoc GlobalArray::GlobalArray(int,int,int[],int[],char*,int[],char)
160
GlobalArray(int type, int ndim, int64_t dims[], int64_t width[],
161
char *arrayname, int64_t chunk[], char ghosts);
164
* @copydoc GlobalArray::GlobalArray(int,int,int[],int[],char*,int[],char)
165
* @param[in] p_handle processor group handle
167
GlobalArray(int type, int ndim, int64_t dims[], int64_t width[],
168
char *arrayname, int64_t chunk[], PGroup* p_handle, char ghosts);
171
* Creates an array with ghost cells by following the user-specified
174
* The distribution is specified as a Cartesian product of distributions
175
* for each dimension. For example, the following figure demonstrates
176
* distribution of a 2-dimensional array 8x10 on 6 (or more) processors.
177
* nblock(2)={3,2}, the size of map array is s=5 and array map contains
178
* the following elements map={1,3,7, 1, 6}. The distribution is
179
* nonuniform because, P1 and P4 get 20 elements each and processors
180
* P0,P2,P3, and P5 only 10 elements each.
183
* <TR> <TD>5</TD> <TD>5</TD> </TR>
184
* <TR> <TD>P0</TD> <TD>P3</TD> <TD>2</TD> </TR>
185
* <TR> <TD>P1</TD> <TD>P4</TD> <TD>4</TD> </TR>
186
* <TR> <TD>P2</TD> <TD>P5</TD> <TD>2</TD> </TR>
189
* The array width[] is used to control the width of the ghost cell
190
* boundary around the visible data on each processor. The local data
191
* of the global array residing on each processor will have a layer
192
* width[n] ghosts cells wide on either side of the visible data along
193
* the dimension n. This is a collective operation.
195
* @param[in] type data type (MT_DBL,MT_INT,MT_DCPL)
196
* @param[in] ndim number of array dimensions
197
* @param[in] dims [ndim] array of dimensions
198
* @param[in] width [ndim] array of ghost cell widths
199
* @param[in] arrayname a unique character string
200
* @param[in] block [ndim] no. of blocks each dimension is divided into
201
* @param[in] maps [s] starting index for for each block;
202
* the size s is a sum of all elements of nblock array
203
* @param[in] ghosts this is a dummy parameter: added to increase the
204
* number of arguments, inorder to avoid the conflicts
205
* among constructors. (ghosts = 'g' or 'G')
207
GlobalArray(int type, int ndim, int dims[], int width[], char *arrayname,
208
int block[], int maps[], char ghosts);
211
* @copydoc GlobalArray::GlobalArray(int,int,int[],int[],char*,int[],int[],char)
212
* @param[in] p_handle processor group handle
214
GlobalArray(int type, int ndim, int dims[], int width[], char *arrayname,
215
int block[], int maps[], PGroup* p_handle, char ghosts);
218
* @copydoc GlobalArray::GlobalArray(int,int,int[],int[],char*,int[],int[],char)
220
GlobalArray(int type, int ndim, int64_t dims[], int64_t width[],
221
char *arrayname, int64_t block[], int64_t maps[], char ghosts);
224
* @copydoc GlobalArray::GlobalArray(int,int,int[],int[],char*,int[],int[],char)
225
* @param[in] p_handle processor group handle
227
GlobalArray(int type, int ndim, int64_t dims[], int64_t width[],
228
char *arrayname, int64_t block[], int64_t maps[], PGroup* p_handle,
232
* Creates a new array by applying all the properties of another existing
235
* This is a collective operation.
237
* @param[in] arrayname a character string
238
* @param[in] g_a integer handle for reference array
240
GlobalArray(const GlobalArray &g_a, char *arrayname);
243
* Creates a new array by applying all the properties of another existing
246
* This is a collective operation.
248
* @param[in] g_a integer handle for reference array
250
GlobalArray(const GlobalArray &g_a);
253
* Creates a new array with no existing attributes.
255
* @note All attributes must subsequently be set using the "set" methods.
257
* This is a collective operation.
264
/* access the data */
266
/** @return the array handle */
267
int handle() const { return mHandle; }
269
/* Global Array operations */
272
* Combines data from local array buffer with data in the global array
275
* @note The local array is assumed to be have the same number of dimensions
276
* as the global array.
278
* global array section (lo[],hi[]) += *alpha * buffer
280
* This is a one-sided and atomic operation.
282
* @param[in] lo [ndim] array of starting indices for array section
283
* @param[in] hi [ndim] array of ending indices for array section
284
* @param[in] buf pointer to the local buffer array
285
* @param[in] ld [ndim-1] array specifying leading
286
* dimensions/strides/extents for buffer array
287
* @param[in] alpha scale factor (double/DoubleComplex/long *)
289
void acc(int lo[], int hi[], void *buf, int ld[], void *alpha) const;
292
* @copydoc GlobalArray::acc(int[],int[],void*,int[],void*)const
294
void acc(int64_t lo[], int64_t hi[], void *buf, int64_t ld[], void *alpha) const;
297
* Provides access to the specified patch of a global array.
299
* Returns array of leading dimensions ld and a pointer to the first element
300
* in the patch. This routine allows to access directly, in place
301
* elements in the local section of a global array. It useful for
302
* writing new GA operations. A call to ga_access normally follows a
303
* previous call to ga_distribution that returns coordinates of the
304
* patch associated with a processor. You need to make sure that the
305
* coordinates of the patch are valid (test values returned from
308
* Each call to ga_access has to be followed by a call to either
309
* ga_release or ga_release_update. You can access in this fashion only
310
* local data. Since the data is shared with other processes, you need
311
* to consider issues of mutual exclusion. This operation is local.
313
* @param[in] lo [ndim] array of starting indices for array section
314
* @param[in] hi [ndim] array of ending indices for array section
315
* @param[out] ptr points to location of first element in patch
316
* @param[out] ld [ndim-1] leading dimensions for the pacth elements
318
void access(int lo[], int hi[], void *ptr, int ld[]) const;
321
* @copydoc GlobalArray::access(int[],int[],void*,int[])const
323
void access(int64_t lo[], int64_t hi[], void *ptr, int64_t ld[]) const;
326
* Provides access to the specified block of a global array that is using
327
* simple block-cyclic data distribution. Returns array of leading
328
* dimensions ld and a pointer to the first element in the patch. This
329
* routine allows user to access directly, in-place * elements in the
330
* local section of a global array. It useful for writing new GA
331
* operations. A call to ga_access normally follows a previous call to
332
* ga_distribution that returns coordinates of the patch associated with
333
* a processor. You need to make sure that the coordinates of the patch
334
* are valid (test values returned from * ga_distribution).
336
* Each call to ga_access_block has to be followed by a call to either
337
* ga_release_block or ga_release_block_update. You can access in this
338
* fashion only local data. Since the data is shared with other processes,
339
* you need to consider issues of mutual exclusion. This operation is
342
* @param[in] idx index of block
343
* @param[out] ptr points to location of first element in patch
344
* @param[out] ld [ndim-1] leading dimensions for the pacth elements
346
void accessBlock(int idx, void *ptr, int ld[]) const;
349
* @copydoc GlobalArray::accessBlock(int,void*,int[])const
351
void accessBlock(int64_t idx, void *ptr, int64_t ld[]) const;
354
* Provides access to the specified block of a global array that is using
355
* SCALAPACK type block-cyclic data distribution. Returns array of leading
356
* dimensions ld and a pointer to the first element in the patch. This
357
* routine allows user to access directly, in-place * elements in the
358
* local section of a global array. It useful for writing new GA
359
* operations. A call to ga_access_block normally follows a previous call to
360
* ga_distribution that returns coordinates of the patch associated with
361
* a processor. You need to make sure that the coordinates of the patch
362
* are valid (test values returned from * ga_distribution).
364
* Each call to ga_access_block_grid has to be followed by a call to either
365
* ga_release_block_grid or ga_release_block_grid_update. You can access in
366
* this fashion only local data. Since the data is shared with other
367
* processes, you need to consider issues of mutual exclusion. This
368
* operation is local.
370
* @param[in] index [ndim] indices of block in processor grid
371
* @param[out] ptr points to location of first element in patch
372
* @param[out] ld [ndim-1] leading dimensions for the pacth elements
374
void accessBlockGrid(int index[], void *ptr, int ld[]) const;
377
* @copydoc GlobalArray::accessBlockGrid(int[],void*,int[])const
379
void accessBlockGrid(int64_t index[], void *ptr, int64_t ld[]) const;
382
* Provides access to the local data of a global array that is using
383
* either the simple or SCALAPACK type block-cyclic data distribution.
384
* Returns the length of the local data block and a pointer to the first
385
* element. This routine allows user to access directly, in-place
386
* elements in the local section of a global array. It useful for writing
389
* Each call to ga_access_segment has to be followed by a call to either
390
* ga_release_segment or ga_release_segmentupdate. You can access in
391
* this fashion only local data. Since the data is shared with other
392
* processes, you need to consider issues of mutual exclusion. This
393
* operation is local.
395
* @param[in] index processor ID
396
* @param[out] ptr points to location of first element
397
* @param[out] len length of locally held data
399
void accessBlockSegment(int index, void *ptr, int *len) const;
402
* @copydoc GlobalArray::accessBlockSegment(int,void*,int*)const
404
void accessBlockSegment(int index, void *ptr, int64_t *len) const;
407
* Provides access to the local patch of the global array. Returns
408
* leading dimension ld and and pointer for the data. This routine
409
* will provide access to the ghost cell data residing on each processor.
410
* Calls to accessGhosts should normally follow a call to
411
* distribution that returns coordinates of the visible data patch
412
* associated with a processor. You need to make sure that the coordinates
413
* of the patch are valid (test values returned from distribution).
415
* You can only access local data.
416
* This is a local operation.
418
* @param[out] dims [ndim] array of dimensions of local patch,
419
* including ghost cells
420
* @param[out] ptr returns an index corresponding to the origin the global
421
* array patch held locally on the processor
422
* @param[out] ld [ndim-1] physical dimensions of the local array patch,
423
* including ghost cells
425
void accessGhosts(int dims[], void *ptr, int ld[]) const;
428
* @copydoc GlobalArray::accessGhosts(int[],void*,int[])const
430
void accessGhosts(int64_t dims[], void *ptr, int64_t ld[]) const;
433
* This function can be used to return a pointer to any data element
434
* in the locally held portion of the global array and can be used to
435
* directly access ghost cell data. The array subscript refers to the
436
* local index of the element relative to the origin of the local
437
* patch (which is assumed to be indexed by (0,0,...)).
439
* This is a local operation.
441
* @param[out] ptr index pointing to location of element
442
* indexed by subscript[]
443
* @param[in] subscript [ndim] array of integers that index desired element
444
* @param[out] ld [ndim-1] array of strides for local data patch.
445
* These include ghost cell widths.
447
void accessGhostElement(void *ptr, int subscript[], int ld[]) const;
450
* @copydoc GlobalArray::accessGhostElement(void*,int[],int[])const
452
void accessGhostElement(void *ptr, int64_t subscript[], int64_t ld[]) const;
455
* The arrays are aded together elemet-wise:
456
* [for example: g_c.add(...,g_a, .., g_b);]
457
* c = alpha * a + beta * b
458
* The result c may replace one of he input arrays(a/b).
459
* This is a collective operation.
461
* @param[in] alpha scale factor
462
* @param[in] g_a array
463
* @param[in] beta scale factor
464
* @param[in] g_b array
466
void add(void *alpha, const GlobalArray * g_a,
467
void *beta, const GlobalArray * g_b) const;
470
* Patches of arrays (which must have the same number of elements) are
471
* added together element-wise.
472
* c[ ][ ] = alpha * a[ ][ ] + beta * b[ ][ ].
474
* This is a collective operation.
476
* @param[in] alpha scale factor
477
* @param[in] g_a global array
478
* @param[in] alo patch of g_a
479
* @param[in] ahi patch of g_a
480
* @param[in] beta scale factor
481
* @param[in] g_b global array
482
* @param[in] blo patch of g_b
483
* @param[in] bhi patch of g_b
484
* @param[in] clo patch of this GlobalArray
485
* @param[in] chi patch of this GlobalArray
487
void addPatch(void *alpha, const GlobalArray * g_a, int alo[], int ahi[],
488
void *beta, const GlobalArray * g_b, int blo[], int bhi[],
489
int clo[], int chi[]) const;
492
* @copydoc GlobalArray::addPatch(void*,const GlobalArray*,int[],int[],void*,const GlobalArray*,int[],int[],int[],int[])const
495
void *alpha, const GlobalArray * g_a, int64_t alo[], int64_t ahi[],
496
void *beta, const GlobalArray * g_b, int64_t blo[], int64_t bhi[],
497
int64_t clo[], int64_t chi[]) const;
500
* Allocate internal memory etc. to create a global array
504
int allocate() const;
507
* This function can be used to preallocate internal buffers that are used by
508
* the gather, scatter and scatter accumulate calls. This avoids repeated
509
* memory allocations in these calls that can reduce performance. The value of
510
* nelems should be set to the maximum number of elements that will be moved
511
* in any single call.
513
* This is a local operation.
515
* @param[in] nelems The maximum number of elements that will be moved in
516
* any gather, scatter, scatter-accumulate call
518
void allocGatscatBuf(int nelems) const;
521
* Check that the global array handle g_a is valid ... if not call
522
* ga_error with the string provided and some more info.
524
* This operation is local.
526
* @param[in] string message
528
void checkHandle(char* string) const;
531
* Compares distributions of two global arrays.
533
* This is a collective operation.
535
* @param[in] g_a GlobalArray to compare
537
* @return 0 if distributions are identical and 1 when they are not.
539
int compareDistr(const GlobalArray *g_a) const;
542
* Copies elements in array represented by g_a into the array
543
* represented by g_b [say for example: g_b.copy(g_a);].
544
* The arrays must be the same type, shape, and identically aligned.
546
* This is a collective operation.
548
* @param[in] g_a GlobalArray to copy
550
void copy(const GlobalArray *g_a) const;
553
* Copies elements in a patch of one array (ga) into another one (say for
554
* example:gb.copyPatch(...,ga,....); ).
556
* The patches of arrays may be of different shapes but must have the same
557
* number of elements. Patches must be nonoverlapping (if gb=ga).
559
* trans = 'N' or 'n' means that the transpose operator should not be
560
* applied. trans = 'T' or 't' means that transpose operator should be
561
* applied. This is a collective operation.
563
* @param[in] trans see above
564
* @param[in] ga global array
565
* @param[in] alo ga patch coordinates
566
* @param[in] ahi ga patch coordinates
567
* @param[in] blo this GlobalArray's patch coordinates
568
* @param[in] bhi this GlobalArray's patch coordinates
570
void copyPatch(char trans, const GlobalArray* ga, int alo[], int ahi[],
571
int blo[], int bhi[]) const;
574
* @copydoc GlobalArray::copyPatch(char,const GlobalArray*,int[],int[],int[],int[])const
577
char trans, const GlobalArray* ga, int64_t alo[], int64_t ahi[],
578
int64_t blo[], int64_t bhi[]) const;
581
* Computes element-wise dot product of the two arrays which must be of
582
* the same types and same number of elements.
583
* return value = SUM_ij a(i,j)*b(i,j)
585
* This is a collective operation.
587
* @param[in] g_a GlobalArray operand
589
double ddot(const GlobalArray * g_a) const;
592
* Computes the element-wise dot product of the two (possibly transposed)
593
* patches which must be of the same type and have the same number of
596
* @param[in] ta transpose flags
597
* @param[in] alo g_a patch coordinates
598
* @param[in] ahi g_a patch coordinates
599
* @param[in] g_a global array
600
* @param[in] tb transpose flags
601
* @param[in] blo g_b patch coordinates
602
* @param[in] bhi g_b patch coordinates
604
double ddotPatch(char ta, int alo[], int ahi[], const GlobalArray * g_a,
605
char tb, int blo[], int bhi[]) const;
608
* @copydoc GlobalArray::ddotPatch(char,int[],int[],const GlobalArray*,char,int[],int[]const
611
char ta, int64_t alo[], int64_t ahi[], const GlobalArray * g_a,
612
char tb, int64_t blo[], int64_t bhi[]) const;
615
* Deallocates the array and frees any associated resources.
620
* Performs one of the matrix-matrix operations:
621
* [say: g_c.dgemm(..., g_a, g_b,..);]
623
* C := alpha*op( A )*op( B ) + beta*C, \n
624
* where op( X ) is one of \n
625
* op( X ) = X or op( X ) = X', \n
626
* alpha and beta are scalars, and A, B and C are matrices, with op( A )
627
* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
628
* On entry, transa specifies the form of op( A ) to be used in the
629
* matrix multiplication as follows:\n
630
* ta = 'N' or 'n', op( A ) = A. \n
631
* ta = 'T' or 't', op( A ) = A'. \n
633
* This is a collective operation.
635
* @param[in] ta transpose operators
636
* @param[in] tb transpose operators
637
* @param[in] m number of rows of op(A) and of matrix C
638
* @param[in] n number of columns of op(B) and of matrix C
639
* @param[in] k number of columns of op(A) and rows of matrix op(B)
640
* @param[in] alpha scale factors
641
* @param[in] g_a input arrays
642
* @param[in] g_b input arrays
643
* @param[in] beta scale factors
645
void dgemm(char ta, char tb, int m, int n, int k, double alpha,
646
const GlobalArray *g_a, const GlobalArray *g_b,double beta) const;
648
* @copydoc GlobalArray::dgemm(char,char,int,int,int,double,const GlobalArray*,const GlobalArray*,double)const
650
void dgemm(char ta, char tb, int64_t m, int64_t n, int64_t k, double alpha,
651
const GlobalArray *g_a, const GlobalArray *g_b,double beta) const;
654
* Solve the generalized eigen-value problem returning all eigen-vectors
655
* and values in ascending order. The input matrices are not overwritten
658
* This is a collective operation.
660
* @param[in] g_s Metric
661
* @param[out] g_v Global matrix to return evecs
662
* @param[out] eval Local array to return evals
665
void diag(const GlobalArray *g_s, GlobalArray *g_v, void *eval) const;
668
* Solve the generalized eigen-value problem returning all eigen-vectors
669
* and values in ascending order. Recommended for REPEATED calls if g_s
670
* is unchanged. Values of the control flag:
672
* value action/purpose
674
* 0 indicates first call to the eigensolver
676
* >0 consecutive calls (reuses factored g_s)
678
* <0 only erases factorized g_s; g_v and eval unchanged
679
* (should be called after previous use if another
680
* eigenproblem, i.e., different g_a and g_s, is to
683
* The input matrices are not destroyed.
685
* This is a collective operation.
687
* @param[in] control Control flag
688
* @param[in] g_s Metric
689
* @param[out] g_v Global matrix to return evecs
690
* @param[out] eval Local array to return evals
692
void diagReuse(int control, const GlobalArray *g_s, GlobalArray *g_v,
696
* Solve the standard (non-generalized) eigenvalue problem returning
697
* all eigenvectors and values in the ascending order. The input matrix
698
* is neither overwritten nor destroyed.
700
* This is a collective operation.
702
* @param[out] g_v Global matrix to return evecs
703
* @param[out] eval Local array to return evals
705
void diagStd(GlobalArray *g_v, void *eval) const;
710
void diagSeq(const GlobalArray * g_s, const GlobalArray * g_v,
716
void diagStdSeq(const GlobalArray * g_v, void *eval) const;
719
* If no array elements are owned by process 'me', the range is returned
720
* as lo[]=-1 and hi[]=-2 for all dimensions.
722
* The operation is local.
724
* @param[in] me process number
725
* @param[in] lo [ndim] array of starting indices for array section
726
* @param[in] hi [ndim] array of ending indices for array section
728
void distribution(int me, int* lo, int* hi) const;
731
* @copydoc GlobalArray::distribution(int,int*,int*)const
733
void distribution(int me, int64_t* lo, int64_t* hi) const;
738
float fdot(const GlobalArray * g_a) const;
744
char t_a, int alo[], int ahi[], const GlobalArray * g_b,
745
char t_b, int blo[], int bhi[]) const;
747
* @copydoc GlobalArray::fdotPatch(char,int[],int[],const GlobalArray*,char,int[],int[])const
750
char t_a, int64_t alo[], int64_t ahi[], const GlobalArray * g_b,
751
char t_b, int64_t blo[], int64_t bhi[]) const;
754
* Assign a single value to all elements in the array.
756
* This is a collective operation.
758
* @param[in] value pointer to the value of appropriate type
759
* (double/DoubleComplex/long) that matches array type.
761
void fill(void *value) const;
764
* Fill the patch with value of 'val'
766
* This is a collective operation.
768
* @param[in] lo patch of this GlobalArray
769
* @param[in] hi patch of this GlobalArray
770
* @param[in] val value to fill
773
void fillPatch (int lo[], int hi[], void *val) const;
776
* @copydoc GlobalArray::fillPatch(int[],int[],void*)const
778
void fillPatch (int64_t lo[], int64_t hi[], void *val) const;
781
* This function can be used to free preallocate internal buffers that were
782
* set using the allocGatscatBuf call.
784
* This is a local operation.
786
void freeGatscatBuf();
789
* Gathers array elements from a global array into a local array.
790
* The contents of the input arrays (v, subscrArray) are preserved,
791
* but their contents might be (consistently) shuffled on return.
794
* for(k=0; k<= n; k++){
795
* v[k] = a[subsArray[k][0]][subsArray[k][1]][subsArray[k][2]]...;
799
* This is a one-sided operation.
801
* @param[in] n number of elements
802
* @param[in] v [n] array containing values
803
* @param[in] subsarray [n][ndim] array of subscripts for each element
805
void gather(void *v, int * subsarray[], int n) const;
808
* @copydoc GlobalArray::gather(void*,int*[],int)const
810
void gather(void *v, int64_t * subsarray[], int64_t n) const;
813
* Copies data from global array section to the local array buffer. The
814
* local array is assumed to be have the same number of dimensions as the
815
* global array. Any detected inconsitencies/errors in the input arguments
818
* Example: For ga_get operation transfering data from the [10:14,0:4]
819
* section of 2-dimensional 15x10 global array into local buffer 5x10
820
* array we have: lo={10,0}, hi={14,4}, ld={10}
822
* One-side operation.
824
* @param[in] lo [ndim] array of starting indices for global array section
825
* @param[in] hi [ndim] array of ending indices for global array section
826
* @param[out] buf pointer to the local buffer array where the data goes
827
* @param[in] ld [ndim-1] array specifying leading
828
* dimensions/strides/extents for buffer array
830
void get(int lo[], int hi[], void *buf, int ld[]) const;
833
* @copydoc GlobalArray::get(int[],int[],void*,int[])const
835
void get(int64_t lo[], int64_t hi[], void *buf, int64_t ld[]) const;
838
* The function retrieves the number of blocks along each coordinate dimension
839
* and the dimensions of the individual blocks for a global array with a
840
* block-cyclic data distribution.
842
* This is a local operation.
844
* @param[out] num_blocks [ndim] array containing number of blocks along each
845
* coordinate direction
846
* @param[out] block_dims [ndim] array containing block dimensions
848
void getBlockInfo(int num_blocks[], int block_dims[]);
851
* This function returns 1 if the global array has some dimensions for
852
* which the ghost cell width is greater than zero, it returns 0 otherwise.
854
* This is a local operation.
856
* @return 1 if this GlobalArray has some dimensions for which teh ghost
857
* cell width is greater than zero; 0 otherwise
859
int hasGhosts() const;
862
* Computes element-wise dot product of the two arrays which must be of
863
* the same types and same number of elements.
865
* This is a collective operation.
867
* @param[in] g_a GlobalArray
868
* @return value = SUM_ij a(i,j)*b(i,j)
870
int idot(const GlobalArray * g_a) const;
873
* Computes the element-wise dot product of the two (possibly transposed)
874
* patches which must be of the same type and have the same number of
877
* @param[in] ta transpose flags
878
* @param[in] alo g_a patch coordinates
879
* @param[in] ahi g_a patch coordinates
880
* @param[in] g_a global array
881
* @param[in] tb transpose flags
882
* @param[in] blo this GlobalArray's patch coordinates
883
* @param[in] bhi this GlobalArray's patch coordinates
886
char ta, int alo[], int ahi[], const GlobalArray * g_a,
887
char tb, int blo[], int bhi[]) const;
890
* @copydoc GlobalArray::idotPatch(char,int[],int[],const GlobalArray*,char,int[],int[])const
893
char ta, int64_t alo[], int64_t ahi[], const GlobalArray * g_a,
894
char tb, int64_t blo[], int64_t bhi[]) const;
898
* Returns data type and dimensions of the array.
900
* This operation is local.
902
* @param[out] type data type
903
* @param[out] ndim number of dimensions
904
* @param[out] dims array of dimensions
906
void inquire(int *type, int *ndim, int dims[]) const;
909
* @copydoc GlobalArray::inquire(int*,int*,int[])const
911
void inquire(int *type, int *ndim, int64_t dims[]) const;
914
* Returns the name of an array represented by the handle g_a.
916
* This operation is local.
918
* @return copy of the name of this GlobalArray
920
char* inquireName() const;
923
* Computes element-wise dot product of the two arrays which must be of
924
* the same types and same number of elements.
927
* This is a collective operation.
929
* @param[in] g_a array handle
931
* @return value = SUM_ij a(i,j)*b(i,j)
933
long ldot(const GlobalArray * g_a) const;
936
* Computes the element-wise dot product of the two (possibly transposed)
937
* patches which must be of the same type and have the same number of
940
* @param[in] ta transpose flags
941
* @param[in] alo g_a patch coordinates
942
* @param[in] ahi g_a patch coordinates
943
* @param[in] g_a global array
944
* @param[in] tb transpose flags
945
* @param[in] blo this GlobalArray's patch coordinates
946
* @param[in] bhi this GlobalArray's patch coordinates
949
char ta, int alo[], int ahi[], const GlobalArray * g_a,
950
char tb, int blo[], int bhi[]) const;
953
* @copydoc GlobalArray::ldotPatch(char,int[],int[],const GlobalArray*,char,int[],int[])const
956
char ta, int64_t alo[], int64_t ahi[], const GlobalArray * g_a,
957
char tb, int64_t blo[], int64_t bhi[]) const;
960
* Solves a system of linear equations
964
* using the Cholesky factorization of an NxN double precision symmetric
965
* positive definite matrix A (epresented by handle g_a). On successful
966
* exit B will contain the solution X.
968
* This is a collective operation.
970
* @param[in] g_a coefficient matrix
972
* @return = 0 : successful exit\n
973
* > 0 : the leading minor of this order is not positive
974
* definite and the factorization could not be completed
976
int lltSolve(const GlobalArray * g_a) const;
979
* Return in owner the GA compute process id that 'owns' the data. If any
980
* element of subscript[] is out of bounds "-1" is returned.
982
* This operation is local.
984
* @param[in] subscript [ndim] element subscript
986
* @return ID of compute process which owns the data
988
int locate(int subscript[]) const;
991
* @copydoc GlobalArray::locate(int[])const
993
int locate(int64_t subscript[]) const;
996
* Return the list of the GA processes id that 'own' the data. Parts of the
997
* specified patch might be actually 'owned' by several processes. If lo/hi
998
* are out of bounds "0" is returned, otherwise return value is equal to the
999
* number of processes that hold the data. This operation is local.
1001
* map[i][0:ndim-1] - lo[i]
1003
* map[i][ndim:2*ndim-1] - hi[i]
1005
* procs[i] - processor id that owns data in patch
1008
* @param[in] lo [ndim] array of starting indices for array section
1009
* @param[in] hi [ndim] array of ending indices for array section
1010
* @param[out] map [][2*ndim] array with mapping information
1011
* @param[out] procs [nproc] list of processes that own a part of selection
1013
* @return 0 if lo/hi are out of bounds, otherwise the number of processes
1016
int locateRegion(int lo[], int hi[], int map[], int procs[]) const;
1019
* @copydoc GlobalArray::locateRegion(int[],int[],int[],int[])const
1021
int locateRegion(int64_t lo[], int64_t hi[], int64_t map[], int procs[]) const;
1024
* Solve the system of linear equations op(A)X = B based on the LU
1027
* op(A) = A or A' depending on the parameter trans:
1029
* trans = 'N' or 'n' means that the transpose operator should not
1032
* trans = 'T' or 't' means that the transpose operator should be applied.
1034
* Matrix A is a general real matrix. Matrix B contains possibly multiple
1035
* rhs vectors. The array associated with the handle g_b is overwritten
1036
* by the solution matrix X.
1037
* This is a collective operation.
1039
* @param[in] trans transpose or not transpose
1040
* @param[in] g_a coefficient matrix
1042
void luSolve(char trans, const GlobalArray * g_a) const;
1045
* ga_matmul_patch is a patch version of ga_dgemm:
1047
* C[cilo:cihi,cjlo:cjhi] := alpha* AA[ailo:aihi,ajlo:ajhi] *
1048
* BB[bilo:bihi,bjlo:bjhi] ) +
1049
* beta*C[cilo:cihi,cjlo:cjhi],
1051
* where AA = op(A), BB = op(B), and op( X ) is one of
1052
* op( X ) = X or op( X ) = X',
1054
* Valid values for transpose arguments: 'n', 'N', 't', 'T'. It works
1055
* for both double and DoubleComplex data tape.
1056
* This is a collective operation.
1058
* @param[in] g_a global array
1059
* @param[in] g_b global array
1060
* @param[in] ailo patch of g_a
1061
* @param[in] aihi patch of g_a
1062
* @param[in] ajlo patch of g_a
1063
* @param[in] ajhi patch of g_a
1064
* @param[in] bilo patch of g_b
1065
* @param[in] bihi patch of g_b
1066
* @param[in] bjlo patch of g_b
1067
* @param[in] bjhi patch of g_b
1068
* @param[in] cilo patch of g_c
1069
* @param[in] cihi patch of g_c
1070
* @param[in] cjlo patch of g_c
1071
* @param[in] cjhi patch of g_c
1072
* @param[in] alpha scale factors
1073
* @param[in] beta scale factors
1074
* @param[in] transa transpose operators
1075
* @param[in] transb transpose operators
1077
void matmulPatch(char transa, char transb, void* alpha, void *beta,
1078
const GlobalArray *g_a,
1079
int ailo, int aihi, int ajlo, int ajhi,
1080
const GlobalArray *g_b,
1081
int bilo, int bihi, int bjlo, int bjhi,
1082
int cilo, int cihi, int cjlo, int cjhi) const;
1085
* @copydoc GlobalArray::matmulPatch(char,char,void*,void*,const GlobalArray*,int,int,int,int,const GlobalArray*,int,int,int,int,int,int,int,int)const
1087
void matmulPatch(char transa, char transb, void* alpha, void *beta,
1088
const GlobalArray *g_a,
1089
int64_t ailo, int64_t aihi, int64_t ajlo, int64_t ajhi,
1090
const GlobalArray *g_b,
1091
int64_t bilo, int64_t bihi, int64_t bjlo, int64_t bjhi,
1092
int64_t cilo, int64_t cihi, int64_t cjlo, int64_t cjhi) const;
1095
* nga_matmul_patch is a n-dimensional patch version of ga_dgemm:
1097
* C[clo[]:chi[]] := alpha* AA[alo[]:ahi[]] *
1098
* BB[blo[]:bhi[]]) +
1099
* beta*C[clo[]:chi[]],
1101
* where AA = op(A), BB = op(B), and op( X ) is one of
1102
* op( X ) = X or op( X ) = X',
1104
* Valid values for transpose arguments: 'n', 'N', 't', 'T'. It works
1105
* for both double and DoubleComplex data tape.
1107
* This is a collective operation.
1109
* @param[in] g_a global array
1110
* @param[in] g_b global array
1111
* @param[in] alo array of patch of g_a
1112
* @param[in] ahi array of patch of g_a
1113
* @param[in] blo array of patch of g_b
1114
* @param[in] bhi array of patch of g_b
1115
* @param[in] clo array of patch of g_c
1116
* @param[in] chi array of patch of g_c
1117
* @param[in] alpha scale factors
1118
* @param[in] beta scale factors
1119
* @param[in] transa transpose operators
1120
* @param[in] transb transpose operators
1122
void matmulPatch(char transa, char transb, void* alpha, void *beta,
1123
const GlobalArray *g_a, int *alo, int *ahi,
1124
const GlobalArray *g_b, int *blo, int *bhi,
1125
int *clo, int *chi) const;
1127
* @copydoc GlobalArray::matmulPatch(char,char,void*,void*,const GlobalArray*,int*,int*,const GlobalArray*,int*,int*,int*,int*)const
1129
void matmulPatch(char transa, char transb, void* alpha, void *beta,
1130
const GlobalArray *g_a, int64_t *alo, int64_t *ahi,
1131
const GlobalArray *g_b, int64_t *blo, int64_t *bhi,
1132
int64_t *clo, int64_t *chi) const;
1135
* This function merges all values in a patch of a mirrored array into
1136
* a patch in another global array g_b.
1138
* This is a collective operation.
1140
* @param[in] alo [ndim] patch indices of mirrored array
1141
* @param[in] ahi [ndim] patch indices of mirrored array
1142
* @param[in] blo [ndim] patch indices of result array
1143
* @param[in] bhi [ndim] patch indices of result array
1144
* @param[out] g_a global array containing result
1146
void mergeDistrPatch(int alo[], int ahi[], GlobalArray *g_a,
1147
int blo[], int bhi[]);
1150
* @copydoc GlobalArray::mergeDistrPatch(int[],int[],GlobalArray*,int[],int[])
1152
void mergeDistrPatch(int64_t alo[], int64_t ahi[], GlobalArray *g_a,
1153
int64_t blo[], int64_t bhi[]);
1156
* This function returns 0 if a global array is not mirrored and 1 if it is.
1161
* This function adds together all copies of a mirrored array so that all
1162
* copies are the same.
1164
* This is a collective operation.
1166
void mergeMirrored();
1169
* Non-blocking accumalate operation. This is function performs an
1170
* accumulate operation and returns a nblocking handle. Completion of the
1171
* operation can be forced by calling the nbwait method on the handle.
1173
* This is a onesided operation.
1175
* @param[in] lo [ndim] patch coordinates of block
1176
* @param[in] hi [ndim] patch coordinates of block
1177
* @param[in] buf local buffer containing data
1178
* @param[in] ld [ndim-1] array of strides for local data
1179
* @param[in] alpha multiplier for data before adding to existing results
1180
* @param[out] nbhandle nonblocking handle
1182
void nbAcc(int lo[], int hi[], void *buf, int ld[], void *alpha,
1186
* @copydoc GlobalArray::nbAcc(int[],int[],void*,int[],void*,GANbhdl*)
1188
void nbAcc(int64_t lo[], int64_t hi[], void *buf, int64_t ld[], void *alpha,
1192
* Non-blocking get operation. This is function gets a data block from a
1193
* global array, copies it into a local buffer, and returns a nonblocking
1194
* handle. Completion of the operation can be forced by calling the nbwait
1195
* method on the handle.
1197
* This is a onesided operation.
1199
* @param[in] lo [ndim] patch coordinates of block
1200
* @param[in] hi [ndim] patch coordinates of block
1201
* @param[in] buf local buffer to receive data
1202
* @param[in] ld [ndim-1] array of strides for local data
1203
* @param[out] nbhandle nonblocking handle
1205
void nbGet(int lo[], int hi[], void *buf, int ld[], GANbhdl *nbhandle);
1208
* @copydoc GlobalArray::nbGet(int[],int[],void*,int[],GANbhdl*)
1210
void nbGet(int64_t lo[], int64_t hi[], void *buf, int64_t ld[], GANbhdl *nbhandle);
1213
* Non-blocking update operation for arrays with ghost cells. Ghost cells
1214
* along the coordinates specified in the mask array are updated with
1215
* non-blocking get calls. The mask array must contain either 0's or 1's.
1217
* This is a onesided operation.
1219
* @param[in] mask [ndim] array with flags for directions that are
1221
* @param[out] nbhandle nonblocking handle
1223
void nbGetGhostDir(int mask[], GANbhdl *nbhandle);
1226
* @copydoc GlobalArray::nbGetGhostDir(int[],GANbhdl*)
1228
void nbGetGhostDir(int64_t mask[], GANbhdl *nbhandle);
1231
* Given a distribution of an array represented by the handle g_a,
1232
* returns the number of partitions of each array dimension.
1234
* This operation is local.
1236
* @param[out] nblock [ndim] number of partitions for each dimension
1238
void nblock(int nblock[]) const;
1241
* Non-blocking put operation. This is function puts a data block from a
1242
* local array, copies it into a global array, and returns a nonblocking
1243
* handle. Completion of the operation can be forced by calling the nbwait
1244
* method on the handle.
1246
* This is a onesided operation.
1248
* @param[in] lo [ndim] patch coordinates of block
1249
* @param[in] hi [ndim] patch coordinates of block
1250
* @param[in] buf local buffer that supplies data
1251
* @param[in] ld [ndim-1] array of strides for local data
1252
* @param[out] nbhandle nonblocking handle
1254
void nbPut(int lo[], int hi[], void *buf, int ld[], GANbhdl *nbhandle);
1257
* @copydoc GlobalArray::nbPut(int[],int[],void*,int[],GANbhdl*)
1259
void nbPut(int64_t lo[], int64_t hi[], void *buf, int64_t ld[], GANbhdl *nbhandle);
1262
* Returns the number of dimensions in this GlobalArray.
1264
* This operation is local.
1266
* @return number of dimensions aka rank
1271
* The pack subroutine is designed to compress the values in the source vector
1272
* g_src into a smaller destination array g_dest based on the values in an
1273
* integer mask array g_mask. The values lo and hi denote the range of
1274
* elements that should be compressed and icount is a variable that on output
1275
* lists the number of values placed in the compressed array. This operation
1276
* is the complement of the ga_unpack operation. An example is shown below
1278
* g_src->pack(g_dest, g_mask, 1, n, icount)
1279
* g_mask: 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 1 0
1280
* g_src: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1281
* g_dest: 1 7 9 12 15 16
1284
* The calling array is the source array.
1286
* This is a collective operation.
1288
* @param[out] g_dest destination array
1289
* @param[in] g_mask mask array
1290
* @param[in] lo coordinate interval to pack
1291
* @param[in] hi coordinate interval to pack
1292
* @param[out] icount number of packed elements
1294
void pack(const GlobalArray *g_dest, const GlobalArray *g_mask,
1295
int lo, int hi, int *icount) const;
1298
* @copydoc GlobalArray::pack(const GlobalArray*,const GlobalArray*,int,int,int*)const
1300
void pack(const GlobalArray *g_dest, const GlobalArray *g_mask,
1301
int64_t lo, int64_t hi, int64_t *icount) const;
1304
* This subroutine enumerates the values of an array between elements lo and
1305
* hi starting with the value istart and incrementing each subsequent value by
1306
* inc. This operation is only applicable to 1-dimensional arrays. An example
1307
* of its use is shown below:
1309
* call g_a->patch_enum(g_a, 1, n, 7, 2)
1310
* g_a: 7 9 11 13 15 17 19 21 23 ...
1312
* This is a collective operation.
1314
* @param[in] lo coordinate interval to enumerate
1315
* @param[in] hi coordinate interval to enumerate
1316
* @param[in] istart starting value of enumeration
1317
* @param[in] inc increment value
1319
void patchEnum(int lo, int hi, void *istart, void *inc);
1322
* @copydoc GlobalArray::patchEnum(int,int,int,int)
1324
void patchEnum(int64_t lo, int64_t hi, void *start, void *inc);
1327
* Same as nga_acc except the indices can extend beyond the array
1328
* boundary/dimensions in which case the library wraps them around.
1330
* This is a one-sided and atomic operation.
1332
* @param[in] lo [ndim] array of starting indices for array section
1333
* @param[in] hi [ndim] array of ending indices for array section
1334
* @param[in] buf pointer to the local buffer array
1335
* @param[in] ld [ndim-1] array specifying leading
1336
* dimensions/strides/extents for buffer array
1337
* @param[in] alpha double/DoubleComplex/long scale factor
1339
void periodicAcc(int lo[], int hi[], void* buf, int ld[], void* alpha) const;
1342
* @copydoc GlobalArray::periodicAcc(int[],int[],void*,int[],void*)const
1344
void periodicAcc(int64_t lo[], int64_t hi[], void* buf, int64_t ld[], void* alpha) const;
1347
* Same as nga_get except the indices can extend beyond the array
1348
* boundary/dimensions in which case the library wraps them around.
1350
* This is a one-sided operation.
1352
* @param[in] lo [ndim] array of starting indices for global array section
1353
* @param[in] hi [ndim] array of ending indices for global array section
1354
* @param[out] buf pointer to the local buffer array where the data goes
1355
* @param[in] ld [ndim-1] array specifying leading
1356
* dimensions/strides/extents for buffer array
1358
void periodicGet(int lo[], int hi[], void* buf, int ld[]) const;
1361
* @copydoc GlobalArray::periodicGet(int[],int[],void*,int[])const
1363
void periodicGet(int64_t lo[], int64_t hi[], void* buf, int64_t ld[]) const;
1366
* Same as nga_put except the indices can extend beyond the array
1367
* boundary/dimensions in which case the library wraps them around.
1369
* This is a one-sided operation.
1371
* @param[in] lo [ndim] array of starting indices for global array section
1372
* @param[in] hi [ndim] array of ending indices for global array section
1373
* @param[in] buf pointer to the local buffer array where the data goes
1374
* @param[in] ld [ndim-1] array specifying leading
1375
* dimensions/strides/extents for buffer array
1377
void periodicPut(int lo[], int hi[], void* buf, int ld[]) const;
1380
* @copydoc GlobalArray::periodicPut(int[],int[],void*,int[])const
1382
void periodicPut(int64_t lo[], int64_t hi[], void* buf, int64_t ld[]) const;
1385
* Prints an entire array to the standard output.
1387
* This is a collective operation.
1389
void print() const ;
1392
* Prints the array distribution.
1394
* This is a collective operation.
1396
void printDistribution() const ;
1399
* Prints the array distribution to a file.
1401
* This is a collective operation.
1403
void printFile(FILE *file) const;
1406
* Prints a patch of g_a array to the standard output. If pretty has the
1407
* value 0 then output is printed in a dense fashion. If pretty has the
1408
* value 1 then output is formatted and rows/columns labeled.
1410
* This is a collective operation.
1412
* @param[in] lo coordinates of the patch
1413
* @param[in] hi coordinates of the patch
1414
* @param[in] pretty formatting flag
1416
void printPatch(int* lo, int* hi, int pretty) const;
1419
* @copydoc GlobalArray::printPatch(int*,int*,int)const
1421
void printPatch(int64_t* lo, int64_t* hi, int pretty) const;
1424
* Based on the distribution of an array associated with handle g_a,
1425
* determines coordinates of the specified processor in the virtual
1426
* processor grid corresponding to the distribution of array g_a. The
1427
* numbering starts from 0. The values of -1 means that the processor
1428
* doesn't 'own' any section of array represented by g_a.
1430
* This operation is local.
1432
* @param[in] proc process id
1433
* @param[out] coord [ndim] coordinates in processor grid
1436
void procTopology(int proc, int coord[]) const;
1438
/*void procTopology(int proc, int *prow, int *pcol);*/
1441
* Copies data from local array buffer to the global array section . The
1442
* local array is assumed to be have the same number of dimensions as the
1443
* global array. Any detected inconsitencies/errors in input arguments are
1444
* fatal. This is a one-sided operation.
1446
* @param[in] lo [ndim] array of starting indices for global array section
1447
* @param[in] hi [ndim] array of ending indices for global array section
1448
* @param[in] buf pointer to the local buffer array where the data is
1449
* @param[in] ld [ndim-1] array specifying leading
1450
* dimensions/strides/extents for buffer array
1451
* @param[in] buf buffer array
1453
void put(int lo[], int hi[], void *buf, int ld[]) const;
1456
* @copydoc GlobalArray::put(int[],int[],void*,int[])const
1458
void put(int64_t lo[], int64_t hi[], void *buf, int64_t ld[]) const;
1461
* Atomically read and increment an element in an integer array.
1463
* *BEGIN CRITICAL SECTION*
1465
* old_value = a(subscript)
1467
* a(subscript) += inc
1469
* *END CRITICAL SECTION*
1473
* This is a one-sided and atomic operation.
1475
* @param[in] subscript [ndim] subscript array for the referenced element
1476
* @param[in] inc how much to increment by
1477
* @return the incremented value
1479
long readInc(int subscript[], long inc) const;
1482
* @copydoc GlobalArray::readInc(int[],long)const
1484
long readInc(int64_t subscript[], long inc) const;
1487
* Releases access to a global array when the data was read only.
1488
* Your code should look like:
1491
* g_a->distribution(myproc, lo,hi);
1492
* g_a->access(lo, hi, &ptr, ld);
1493
* // <operate on the data referenced by ptr>
1494
* g_a->release(lo, hi);
1497
* @note see restrictions specified for ga_access.
1499
* This operation is local.
1501
* @param[in] lo [ndim] array of starting indices for array section
1502
* @param[in] hi [ndim] array of ending indices for array section
1504
void release(int lo[], int hi[]) const;
1507
* @copydoc GlobalArray::release(int[],int[])const
1509
void release(int64_t lo[], int64_t hi[]) const;
1512
* Releases access to the block of data specified by the integer
1513
* index when data was accessed as read only. This is only applicable to
1514
* block-cyclic data distributions created using the simple block-cyclic
1517
* This is a local operation.
1519
* @param[in] index block index
1521
void releaseBlock(int index) const;
1524
* Releases access to the block of data specified by the subscript
1525
* array when data was accessed as read only. This is only applicable to
1526
* block-cyclic data distributions created using the SCALAPACK data
1529
* This is a local operation.
1531
* @param[in] index [ndim] indices of block in array
1533
void releaseBlockGrid(int index[]) const;
1536
* Releases access to the block of locally held data for a block-cyclic
1537
* array, when data was accessed as read-only. This is a local operation.
1539
* @param[in] proc process ID/rank
1541
void releaseBlockSegment(int proc) const;
1544
* Releases access to the data. It must be used if the data was accessed
1545
* for writing. NOTE: see restrictions specified for ga_access.
1547
* This operation is local.
1549
* @param[in] lo [ndim] array of starting indices for array section
1550
* @param[in] hi [ndim] array of ending indices for array section
1552
void releaseUpdate(int lo[], int hi[]) const;
1555
* @copydoc GlobalArray::releaseUpdate(int[],int[])const
1557
void releaseUpdate(int64_t lo[], int64_t hi[]) const;
1560
* Releases access to the block of data specified by the integer index when
1561
* data was accessed in read-write mode. This is only applicable to
1562
* block-cyclic data distributions created using the simple block-cyclic
1565
* This is a local operation.
1567
* @param[in] index block index
1569
void releaseUpdateBlock(int index) const;
1572
* Releases access to the block of data specified by the subscript
1573
* array when data was accessed in read-write mode. This is only applicable
1574
* to block-cyclic data distributions created using the SCALAPACK data
1577
* This is a local operation.
1579
* @param[in] index [ndim] indices of block in array
1581
void releaseUpdateBlockGrid(int index[]) const;
1584
* Releases access to the block of locally held data for a block-cyclic
1585
* array, when data was accessed in read-write mode.
1587
* This is a local operation.
1589
* @param[in] proc process ID/rank
1591
void releaseUpdateBlockSegment(int proc) const;
1594
* Releases access to a global array containing ghost cells when the data was
1596
* Your code should look like:
1599
* g_a->accessGhosts(dims, &ptr, ld)
1600
* // <operate on the data referenced by ptr>
1601
* g_a->releasGhosts();
1604
* This operation is local.
1607
void releaseGhosts() const;
1610
* Releases access to a global array containing ghost cells when the data was
1611
* accessed in read-write mode.
1613
* This operation is local.
1616
void releaseUpdateGhosts() const;
1619
* Releases access to a global array containing ghost cells when the data was
1621
* Your code should look like:
1624
* g_a->accessGhostElement(&ptr, subscript, ld)
1625
* // <operate on the data referenced by ptr>
1626
* g_a->releaseGhostElement(subscript);
1629
* This operation is local.
1630
* @param[in] indices of element
1633
void releaseGhostElement(int subscript[]) const;
1636
* @copydoc GlobalArray::releaseGhostElement(int subscript[]) const
1638
void releaseGhostElement(int64_t subscript[]) const;
1641
* Releases access to a global array containing ghost cells when the data was
1642
* accessed in read-write mode.
1644
* This operation is local.
1645
* @param[in] indices of element
1648
void releaseUpdateGhostElement(int subscript[]) const;
1651
* @copydoc GlobalArray::releaseUpdateGhostElement(int subscript[]) const
1653
void releaseUpdateGhostElement(int64_t subscript[]) const;
1656
* Scales an array by the constant s. Note that the library is unable
1657
* to detect errors when the pointed value is of different type than
1660
* This is a collective operation.
1662
* @param[in] value pointer to the value of appropriate type
1664
void scale(void *value) const;
1667
* Scale an array by the factor 'val'.
1669
* This is a collective operation.
1671
* @param[in] lo patch of g_a
1672
* @param[in] hi patch of g_a
1673
* @param[in] val scale factor
1675
void scalePatch (int lo[], int hi[], void *val) const;
1678
* @copydoc GlobalArray::scalePatch(int[],int[],void*)const
1680
void scalePatch (int64_t lo[], int64_t hi[], void *val) const;
1683
* This operation will add successive elements in a source vector g_src
1684
* and put the results in a destination vector g_dest. The addition will
1685
* restart based on the values of the integer mask vector g_mask. The scan
1686
* is performed within the range specified by the integer values lo and
1687
* hi. Note that this operation can only be applied to 1-dimensional
1688
* arrays. The excl flag determines whether the sum starts with the value
1689
* in the source vector corresponding to the location of a 1 in the mask
1690
* vector (excl=0) or whether the first value is set equal to 0
1691
* (excl=1). Some examples of this operation are given below.
1693
* g_src->scanAdd(g_dest, g_mask, 1, n, 0);
1694
* g_mask: 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 1 0
1695
* g_src: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1696
* g_dest: 1 3 6 10 16 21 7 15 9 19 30 12 25 39 15 16 33
1698
* g_src->scanAdd(g_dest, g_mask, 1, n, 1);
1699
* g_mask: 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 1 0
1700
* g_src: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1701
* g_dest: 0 1 3 6 10 15 0 7 0 9 19 0 12 25 0 0 16
1703
* This is a collective operation.
1705
* @param[out] g_dest handle for destination array
1706
* @param[in] g_mask handle for integer array representing mask
1707
* @param[in] lo low and high values of range on which operation
1709
* @param[in] hi low and high values of range on which operation
1711
* @param[in] excl value to signify if masked values are included in in add
1713
void scanAdd(const GlobalArray *g_dest, const GlobalArray *g_mask,
1714
int lo, int hi, int excl) const;
1717
* @copydoc GlobalArray::scanAdd(const GlobalArray*,const GlobalArray*,int,int,int)const
1719
void scanAdd(const GlobalArray *g_dest, const GlobalArray *g_mask,
1720
int64_t lo, int64_t hi, int excl) const;
1723
* This subroutine does a segmented scan-copy of values in the
1724
* source array g_src into a destination array g_dest with segments
1725
* defined by values in the integer mask array g_mask. The scan-copy
1726
* operation is only applied to the range between the lo and hi
1727
* indices. This operation is restriced to 1-dimensional arrays. The
1728
* resulting destination array will consist of segments of consecutive
1729
* elements with the same value. An example is shown below
1731
* g_src->scanCopy(g_dest, g_mask, 1, n);
1732
* g_mask: 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 1 0
1733
* g_src: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1734
* g_dest: 1 1 1 1 1 1 7 7 9 9 9 12 12 12 15 16 16
1736
* This is a collective operation.
1738
* @param[out] g_dest handle for destination array
1739
* @param[in] g_mask handle for integer array representing mask
1740
* @param[in] lo low and high values of range on which operation
1742
* @param[in] hi low and high values of range on which operation
1745
void scanCopy(const GlobalArray *g_dest, const GlobalArray *g_mask,
1746
int lo, int hi) const;
1749
* @copydoc GlobalArray::scanCopy(const GlobalArray*,const GlobalArray*,int,int)const
1751
void scanCopy(const GlobalArray *g_dest, const GlobalArray *g_mask,
1752
int64_t lo, int64_t hi) const;
1755
* Scatters array elements into a global array. The contents of the input
1756
* arrays (v,subscrArray) are preserved, but their contents might be
1757
* (consistently) shuffled on return.
1760
* for(k=0; k<= n; k++) {
1761
* a[subsArray[k][0]][subsArray[k][1]][subsArray[k][2]]... = v[k];
1765
* This is a one-sided operation.
1767
* @param[in] n number of elements
1768
* @param[in] v [n] array containing values
1769
* @param[in] subsarray [n][ndim] array of subscripts for each element
1771
void scatter(void *v, int *subsarray[], int n) const;
1774
* @copydoc GlobalArray::scatter(void*,int*[],int)const
1776
void scatter(void *v, int64_t *subsarray[], int64_t n) const;
1779
* Adds element a local array to array elements into a global array after
1780
* multiplying by alpha. The contents of the input arrays (v,subscrArray)
1781
* are preserved, but their contents might be (consistently) shuffled on
1785
* for(k=0; k<= n; k++) {
1786
* a[subsArray[k][0]][subsArray[k][1]][subsArray[k][2]]... = v[k];
1790
* This is a one-sided operation.
1792
* @param[in] n number of elements
1793
* @param[in] v [n] array containing values
1794
* @param[in] subsarray [n][ndim] array of subscripts for each element
1795
* @param[in] alpha scale factor
1797
void scatterAcc(void *v, int *subsarray[], int n, void *alpha) const;
1800
* @copydoc GlobalArray::scatterAcc(void*,int*[],int,void*)const
1802
void scatterAcc(void *v, int64_t *subsarray[], int64_t n, void *alpha) const;
1805
* Returns the value and index for an element that is selected by the
1806
* specified operator in a global array corresponding to g_a handle.
1808
* This is a collective operation.
1810
* @param[in] op operator {"min","max"}
1811
* @param[out] val address where value should be stored
1812
* @param[out] index [ndim] array index for the selected element
1814
void selectElem(char *op, void* val, int index[]) const;
1817
* @copydoc GlobalArray::selectElem(char*,void*,int[])const
1819
void selectElem(char *op, void* val, int64_t index[]) const;
1822
* This function can be used to assign a unique character
1823
* string name to a global array handle that was obtained
1824
* using the createHandle function.
1826
* This is a collective operation.
1828
* @param[in] name array name
1830
void setArrayName(char *name) const;
1833
* This subroutine is used to create a global array with a simple
1834
* block-cyclic data distribution. The array is broken up into blocks of
1835
* size dims and each block is numbered sequentially using a column major
1836
* indexing scheme. The blocks are then assigned in a simple round-robin
1837
* fashion to processors. This is illustrated in the figure below for an
1838
* array containing 25 blocks distributed on 4 processors. Blocks at the
1839
* edge of the array may be smaller than the block size specified in
1840
* dims. In the example below, blocks 4,9,14,19,20,21,22,23, and 24 might
1841
* be smaller thatn the remaining blocks. Most global array operations
1842
* are insensitive to whether or not a block-cyclic data distribution is
1843
* used, although performance may be slower in some cases if the global
1844
* array is using a block-cyclic data distribution. Individual data
1845
* blocks can be accessesed using the block-cyclic access functions.
1847
* This is a collective operation.
1849
* @param[in] dims array of block dimensions
1851
void setBlockCyclic(int dims[]) const;
1854
* This subroutine is used to create a global array with a
1855
* SCALAPACK-type block cyclic data distribution. The user specifies
1856
* the dimensions of the processor grid in the array proc_grid. The
1857
* product of the processor grid dimensions must equal the number of
1858
* total number of processors and the number of dimensions in the
1859
* processor grid must be the same as the number of dimensions in the
1860
* global array. The data blocks are mapped onto the processor grid
1861
* in a cyclic manner along each of the processor grid axes. This is
1862
* illustrated below for an array consisting of 25 data blocks
1863
* disributed on 6 processors. The 6 processors are configured in a 3
1864
* by 2 processor grid. Blocks at the edge of the array may be
1865
* smaller than the block size specified in dims. Most global array
1866
* operations are insensitive to whether or not a block-cyclic data
1867
* distribution is used, although performance may be slower in some
1868
* cases if the global array is using a block-cyclic data
1869
* distribution. Individual data blocks can be accessesed using the
1870
* block-cyclic access functions.
1872
* This is a collective operation.
1874
* @param[in] dims array of block dimensions
1875
* @param[in] proc_grid processor grid dimensions
1877
void setBlockCyclicProcGrid(int dims[], int proc_grid[]) const;
1880
* This function is used to set the chunk array for a global array handle
1881
* that was obtained using the createHandle function. The chunk array
1882
* is used to determine the minimum number of array elements assigned to
1883
* each processor along each coordinate direction.
1885
* This is a collective operation.
1887
* @param[in] chunk array of chunk widths
1889
void setChunk(int chunk[]) const;
1892
* @copydoc GlobalArray::setChunk(int[])const
1894
void setChunk(int64_t chunk[]) const;
1897
* This function can be used to set the array dimension, the coordinate
1898
* dimensions, and the data type assigned to a global array handle obtained
1899
* using the GA_Create_handle function.
1901
* This is a collective operation.
1903
* @param[in] ndim dimension of global array
1904
* @param[in] dims dimensions of global array
1905
* @param[in] type data type of global array
1907
void setData(int ndim, int dims[], int type) const;
1910
* @copydoc GlobalArray::setData(int,int[],int)const
1912
void setData(int ndim, int64_t dims[], int type) const;
1915
* This function can be used to set the ghost cell widths for a global
1916
* array handle that was obtained using the createHandle function. The
1917
* ghosts cells widths indicate how many ghost cells are used to pad the
1918
* locally held array data along each dimension. The padding can be set
1919
* independently for each coordinate dimension.
1921
* This is a collective operation.
1923
* @param[in] width [ndim] array of ghost cell widths
1925
void setGhosts(int width[]) const;
1928
* @copydoc GlobalArray::setGhosts(int[])const
1930
void setGhosts(int64_t width[]) const;
1933
* This function can be used to partition the array data among the
1934
* individual processors for a global array handle obtained using the
1935
* GA_Create_handle function.
1937
* The distribution is specified as a Cartesian product of distributions
1938
* for each dimension. For example, the following figure demonstrates
1939
* distribution of a 2-dimensional array 8x10 on 6 (or more)
1940
* processors. nblock(2)={3, 2}, the size of mapc array is s=5 and array
1941
* mapc contains the following elements mapc={1, 3, 7, 1, 6}. The
1942
* distribution is nonuniform because, P1 and P4 get 20 elements each and
1943
* processors P0,P2,P3, and P5 only 10 elements each.
1945
* The array width() is used to control the width of the ghost cell
1946
* boundary around the visible data on each processor. The local data of
1947
* the global array residing on each processor will have a layer width(n)
1948
* ghosts cells wide on either side of the visible data along the dimension
1951
* This is a collective operation.
1953
* @param[in] mapc [s] starting index for each block; the size
1954
* s is the sum of all elements of the array nblock
1955
* @param[in] nblock [ndim] number of blocks that each dimension is
1958
void setIrregDistr(int mapc[], int nblock[]) const;
1961
* @copydoc GlobalArray::setIrregDistr(int mapc[], int nblock[]) const
1963
void setIrregDistr(int64_t mapc[], int64_t nblock[]) const;
1966
* This function can be used to set the processor configuration assigned to
1967
* a global array handle that was obtained using the
1968
* createHandle function. It can be used to create mirrored arrays by
1969
* using the mirrored array processor configuration in this function
1970
* call. It can also be used to create an array on a processor group by
1971
* using a processor group handle in this call.
1973
* This is a collective operation.
1975
* @param[in] pHandle processor group handle
1977
void setPGroup(PGroup *pHandle) const;
1980
* This function is used to restrict the number of processors in a global
1981
* array that actually contain data. It can also be used to rearrange the
1982
* layout of data on a processor from the default distribution. Only the
1983
* processes listed in list[] will actually contain data, the remaining
1984
* processes will be able to see the data in the global array but they will
1985
* not contain any of the global array data locally.
1987
* @param[in] list list of processors that should contain data
1988
* @param[in] nprocs number of processors in list
1991
void setRestricted(int list[], int nprocs) const;
1994
* This function is used to restrict the number of processors in a global
1995
* array that actually contain data. Only the processors in the range
1996
* [lo_proc:hi_proc] (inclusive) will actually contain data, the remaining
1997
* processes will be able to see the data in the global array but they will
1998
* not contain any of the global array data locally.
2000
* @param[in] lo_proc low end of processor range
2001
* @param[in] hi_proc high end of processor range
2003
void setRestrictedRange(int lo_proc, int hi_proc) const;
2006
* Performs one of the matrix-matrix operations:
2008
* C := alpha*op( A )*op( B ) + beta*C,
2009
* where op( X ) is one of
2010
* op( X ) = X or op( X ) = X',
2011
* alpha and beta are scalars, and A, B and C are matrices, with op( A )
2012
* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
2013
* On entry, transa specifies the form of op( A ) to be used in the
2014
* matrix multiplication as follows:
2016
* ta = 'N' or 'n', op( A ) = A.
2018
* ta = 'T' or 't', op( A ) = A'.
2020
* This is a collective operation.
2022
* @param[in] g_a handles to input arrays
2023
* @param[in] g_b handles to input arrays
2024
* @param[in] ta transpose operators
2025
* @param[in] tb transpose operators
2026
* @param[in] m number of rows of op(A) and of matrix C
2027
* @param[in] n number of columns of op(B) and of matrix C
2028
* @param[in] k number of columns of op(A) and rows of matrix op(B)
2029
* @param[in] alpha scale factors
2030
* @param[in] beta scale factors
2033
void sgemm(char ta, char tb, int m, int n, int k, float alpha,
2034
const GlobalArray *g_a, const GlobalArray *g_b, float beta) const;
2037
* @copydoc GlobalArray::sgemm(char,char,int,int,int,float,const GlobalArray*,const GlobalArray*,float)const
2039
void sgemm(char ta, char tb, int64_t m, int64_t n, int64_t k, float alpha,
2040
const GlobalArray *g_a, const GlobalArray *g_b, float beta) const;
2043
* Solves a system of linear equations
2045
* It first will call the Cholesky factorization routine and, if
2046
* sucessfully, will solve the system with the Cholesky solver. If
2047
* Cholesky will be not be able to factorize A, then it will call the
2048
* LU factorization routine and will solve the system with forward/backward
2049
* substitution. On exit B will contain the solution X.
2051
* This is a collective operation.
2053
* @param[in] g_a coefficient matrix
2055
* @return = 0 : Cholesky factoriztion was succesful\n
2056
* > 0 : the leading minor of this order
2057
* is not positive definite, Cholesky factorization
2058
* could not be completed and LU factoriztion was used
2060
int solve(const GlobalArray * g_a) const;
2063
* It computes the inverse of a double precision using the Cholesky
2064
* factorization of a NxN double precision symmetric positive definite
2065
* matrix A stored in the global array represented by g_a. On successful
2066
* exit, A will contain the inverse.
2068
* This is a collective operation.
2070
* @return = 0 : successful exit\n
2071
* > 0 : the leading minor of this order is not positive
2072
* definite and the factorization could not be completed\n
2073
* < 0 : it returns the index i of the (i,i)
2074
* element of the factor L/U that is zero and
2075
* the inverse could not be computed
2077
int spdInvert() const;
2080
* This operation is the same as "acc", except that the values
2081
* corresponding to dimension n in buf are accumulated to every skip[n]
2082
* values of the global array.
2084
* This is a one-sided operation.
2086
* @param[in] lo [ndim] array of starting indices for glob array section
2087
* @param[in] hi [ndim] array of ending indices for global array section
2088
* @param[in] skip [ndim] array of strides for each dimension
2089
* @param[in] buf pointer to local buffer array where data goes
2090
* @param[in] ld [ndim-1] rray specifying leading
2091
* dimensions/strides/extents for buffer array
2092
* @param[in] alpha double/DoublComplex/long scale factor
2094
void stridedAcc(int lo[], int hi[], int skip[], void*buf, int ld[], void *alpha) const;
2097
* @copydoc GlobalArray::stridedAcc(int[],int[],int[],void*,int[],void*)const
2099
void stridedAcc(int64_t lo[], int64_t hi[], int64_t skip[], void*buf, int64_t ld[], void *alpha) const;
2102
* This operation is the same as "get", except that the values
2103
* corresponding to dimension n in buf are accumulated to every skip[n]
2104
* values of the global array.
2106
* This is a one-sided operation.
2108
* @param[in] lo [ndim] array of starting indices for glob array section
2109
* @param[in] hi [ndim] array of ending indices for global array section
2110
* @param[in] skip [ndim] array of strides for each dimension
2111
* @param[out] buf pointer to local buffer array where data goes
2112
* @param[in] ld [ndim-1] array specifying leading
2113
* dimensions/strides/extents for buffer array
2115
void stridedGet(int lo[], int hi[], int skip[], void*buf, int ld[]) const;
2118
* @copydoc GlobalArray::stridedGet(int[],int[],int[],void*,int[])const
2120
void stridedGet(int64_t lo[], int64_t hi[], int64_t skip[], void*buf, int64_t ld[]) const;
2123
* This operation is the same as "put", except that the values
2124
* corresponding to dimension n in buf are accumulated to every skip[n]
2125
* values of the global array.
2127
* This is a one-sided operation.
2129
* @param[in] lo [ndim] array of starting indices for glob array section
2130
* @param[in] hi [ndim] array of ending indices for global array section
2131
* @param[in] skip [ndim] array of strides for each dimension
2132
* @param[in] buf pointer to local buffer array where data goes
2133
* @param[in] ld [ndim-1] array specifying leading
2134
* dimensions/strides/extents for buffer array
2136
void stridedPut(int lo[], int hi[], int skip[], void*buf, int ld[]) const;
2139
* "long" interface for stridedPut
2141
void stridedPut(int64_t lo[], int64_t hi[], int64_t skip[], void*buf, int64_t ld[]) const;
2144
* Prints info about allocated arrays.
2146
* @param[in] verbose If true print distribution info
2148
void summarize(int verbose) const;
2151
* Symmmetrizes matrix A with handle A:=.5 * (A+A').
2153
* This is a collective operation
2155
void symmetrize() const;
2158
* This function returns the total number of blocks contained in a global
2159
* array with a block-cyclic data distribution.
2161
* This is a local operation.
2163
* @return number of blocks contained in this block-cyclic distribution
2165
int totalBlocks() const;
2168
* Transposes a matrix: B = A', where A and B are represented by
2169
* handles g_a and g_b [say, g_b.transpose(g_a);].
2171
* This is a collective operation.
2173
* @param[in] g_a GlobalArray to transpose and assign to this GlobalArray
2175
void transpose(const GlobalArray * g_a) const;
2178
* The unpack subroutine is designed to expand the values in the source
2179
* vector g_src into a larger destination array g_dest based on the values
2180
* in an integer mask array g_mask. The values lo and hi denote the range
2181
* of elements that should be compressed and icount is a variable that on
2182
* output lists the number of values placed in the uncompressed array. This
2183
* operation is the complement of the pack operation. An example is
2186
* g_src->unpack(g_dest, g_mask, 1, n, &icount);
2187
* g_src: 1 7 9 12 15 16
2188
* g_mask: 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 1 0
2189
* g_dest: 1 0 0 0 0 0 7 0 9 0 0 12 0 0 15 16 0
2192
* This is a collective operation.
2194
* @param[out] g_dest handle for destination array
2195
* @param[in] g_mask handle for integer array representing mask
2196
* @param[in] lo low value of range on which operation is performed
2197
* @param[in] hi high value of range on which operation is performed
2198
* @param[out] icount number of values in uncompressed array
2200
void unpack(GlobalArray *g_dest, GlobalArray *g_mask, int lo, int hi,
2203
* @copydoc GlobalArray::unpack(GlobalArray*,GlobalArray*,int,int,int*)const
2205
void unpack(GlobalArray *g_dest, GlobalArray *g_mask,
2206
int64_t lo, int64_t hi, int64_t *icount) const;
2209
* This call updates the ghost cell regions on each processor with the
2210
* corresponding neighbor data from other processors. The operation assumes
2211
* that all data is wrapped around using periodic boundary data so that
2212
* ghost cell data that goes beyound an array boundary is wrapped around to
2213
* the other end of the array. The updateGhosts call contains two
2214
* sync calls before and after the actual update operation. For some
2215
* applications these calls may be unecessary, if so they can be removed
2216
* using the maskSync subroutine.
2218
* This is a collective operation.
2220
void updateGhosts() const;
2223
* This operation is similar to the standard updateGhosts operation except
2224
* that it returns a non-blocking handle after initiating the call. Completion
2225
* of the operation can be guaranteed by call call the NbWait function on the
2226
* handle. Data in the local buffers is then ready for use.
2228
* This is a collective operation.
2230
void updateGhostsNb(GANbhdl *nbhandle) const;
2233
* This function can be used to update the ghost cells along individual
2234
* directions. It is designed for algorithms that can overlap updates
2235
* with computation. The variable dimension indicates which coordinate
2236
* direction is to be updated (e.g. dimension = 1 would correspond to the
2237
* y axis in a two or three dimensional system), the variable idir can take
2238
* the values +/-1 and indicates whether the side that is to be updated lies
2239
* in the positive or negative direction, and cflag indicates whether or not
2240
* the corners on the side being updated are to be included in the update.
2241
* The following calls would be equivalent to a call to updateGhosts
2242
* for a 2-dimensional system:
2244
* status = g_a->updateGhostDir(0,-1,1);\n
2245
* status = g_a->updateGhostDir(0,1,1);\n
2246
* status = g_a->updateGhostDir(1,-1,0);\n
2247
* status = g_a->updateGhostDir(1,1,0);\n
2249
* The variable cflag is set equal to 1 (or non-zero) in the first two
2250
* calls so that the corner ghost cells are update, it is set equal to 0 in
2251
* the second two calls to avoid redundant updates of the corners. Note
2252
* that updating the ghosts cells using several independent calls to the
2253
* nga_update_ghost_dir functions is generally not as efficient as using
2254
* updateGhosts unless the individual calls can be effectively overlapped
2257
* This is a collective operation.
2259
* @param[in] dimension array dimension that is to be updated
2260
* @param[in] idir direction of update (+/- 1)
2261
* @param[in] cflag flag (0/1) to include corners in update
2263
int updateGhostDir(int dimension, int idir, int cflag) const;
2266
* This operation is designed to extract ghost cell data from a global array
2267
* and copy it to a local array. If the request can be satisfied using
2268
* completely local data, then a local copy will be used. Otherwise, the
2269
* method calls periodicGet. The request can be satisfied locally if
2270
* lo is greater than or equal to the lower bound of data held on the
2271
* processor minus the ghost cell width and hi is less than or equal to the
2272
* upper bound of data held on the processor plus the ghost cell width. Cell
2273
* indices using the global address space should be used for lo and hi. These
2274
* may exceed the global array dimensions.
2276
* @param[in] lo [ndim] array of starting indices for global array section
2277
* @param[in] hi [ndim] array of ending indices for global array section
2278
* @param[out] buf pointer to the local buffer array where the data goes
2279
* @param[in] ld [ndim-1] array specifying leading
2280
* dimensions/strides/extents for buffer array
2282
void getGhostBlock(int lo[], int hi[], void *buf, int ld[]) const;
2285
* @copydoc GlobalArray::getGhostBlock(int[],int[],void*,int[])const
2287
void getGhostBlock(int64_t lo[], int64_t hi[], void *buf, int64_t ld[]) const;
2290
* Computes element-wise dot product of the two arrays which must be of
2291
* the same types and same number of elements.
2293
* This is a collective operation.
2295
* @param[in] g_a array handle
2297
* @return value = SUM_ij a(i,j)*b(i,j)
2299
DoubleComplex zdot(const GlobalArray * g_a) const;
2302
* Computes the element-wise dot product of the two (possibly transposed)
2303
* patches which must be of the same type and have the same number of
2306
* @param[in] ta transpose flags
2307
* @param[in] alo g_a patch coordinates
2308
* @param[in] ahi g_a patch coordinates
2309
* @param[in] g_a global array
2310
* @param[in] tb transpose flags
2311
* @param[in] blo g_b patch coordinates
2312
* @param[in] bhi g_b patch coordinates
2315
DoubleComplex zdotPatch(char ta, int alo[], int ahi[],
2316
const GlobalArray * g_a, char tb, int blo[],
2320
* @copydoc GlobalArray::zdotPatch(char,int[],int[],const GlobalArray*,char,int[],int[])const
2322
DoubleComplex zdotPatch(char ta, int64_t alo[], int64_t ahi[],
2323
const GlobalArray * g_a, char tb, int64_t blo[],
2324
int64_t bhi[]) const;
2327
* Sets value of all elements in the array to zero.
2329
* This is a collective operation.
2334
* Set all the elements in the patch to zero.
2335
* This is a collective operation.
2340
void zeroPatch (int lo[], int hi[]) const;
2343
* @copydoc GlobalArray::zeroPatch(int[],int[])const
2345
void zeroPatch (int64_t lo[], int64_t hi[]) const;
2348
* Performs one of the matrix-matrix operations:
2349
* C := alpha*op( A )*op( B ) + beta*C,
2350
* where op( X ) is one of
2351
* op( X ) = X or op( X ) = X',
2352
* alpha and beta are scalars, and A, B and C are matrices, with op( A )
2353
* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
2354
* On entry, transa specifies the form of op( A ) to be used in the
2355
* matrix multiplication as follows:
2357
* ta = 'N' or 'n', op( A ) = A.
2359
* ta = 'T' or 't', op( A ) = A'. *
2361
* This is a collective operation.
2363
* @param[in] g_a handles to input arrays
2364
* @param[in] g_b handles to input arrays
2365
* @param[in] ta transpose operators
2366
* @param[in] tb transpose operators
2367
* @param[in] m number of rows of op(A) and of matrix C
2368
* @param[in] n number of columns of op(B) and of matrix C
2369
* @param[in] k number of columns of op(A) and rows of matrix op(B)
2370
* @param[in] alpha scale factors
2371
* @param[in] beta scale factors
2373
void zgemm(char ta, char tb, int m, int n, int k, DoubleComplex alpha,
2374
const GlobalArray *g_a, const GlobalArray *g_b,
2375
DoubleComplex beta) const;
2378
* @copydoc GlobalArray::zgemm(char,char,int,int,int,DoubleComplex,const GlobalArray*,const GlobalArray*,DoubleComplex)const
2380
void zgemm(char ta, char tb, int64_t m, int64_t n, int64_t k, DoubleComplex alpha,
2381
const GlobalArray *g_a, const GlobalArray *g_b,
2382
DoubleComplex beta) const;
2384
/* New additional functionalities from Limin. */
2387
* Take element-wise absolute value of the array.
2389
* This is a collective operation.
2391
void absValue() const;
2394
* Take element-wise absolute value of the patch.
2396
* This is a collective operation.
2398
* @param[in] lo patch coordinates
2399
* @param[in] hi patch coordinates
2401
void absValuePatch(int *lo, int *hi) const;
2404
* @copydoc GlobalArray::absValuePatch(int*,int*)const
2406
void absValuePatch(int64_t *lo, int64_t *hi) const;
2409
* Add the constant pointed by alpha to each element of the array.
2411
* This is a collective operation.
2413
* @param[in] alpha double/complex/int/long/float
2415
void addConstant(void* alpha) const;
2418
* Add the constant pointed by alpha to each element of the patch.
2420
* This is a collective operation.
2422
* @param[in] lo g_a patch coordinates
2423
* @param[in] hi g_a patch coordinates
2424
* @param[in] alpha double/complex/int/long/float
2426
void addConstantPatch(int *lo, int *hi, void *alpha) const;
2429
* @copydoc GlobalArray::addConstantPatch(int*,int*,void*)const
2431
void addConstantPatch(int64_t *lo, int64_t *hi, void *alpha) const;
2434
* Take element-wise reciprocal of the array.
2436
* This is a collective operation.
2441
* Take element-wise reciprocal of the patch.
2443
* This is a collective operation.
2445
* @param[in] lo patch coordinates
2446
* @param[in] hi patch coordinates
2448
void recipPatch(int *lo, int *hi) const;
2451
* @copydoc GlobalArray::recipPatch(int*,int*)const
2453
void recipPatch(int64_t *lo, int64_t *hi) const;
2456
* Computes the element-wise product of the two arrays
2457
* which must be of the same types and same number of
2458
* elements. For two-dimensional arrays,
2460
* c(i, j) = a(i,j)*b(i,j)
2462
* The result (c) may replace one of the input arrays (a/b).
2463
* This is a collective operation.
2465
* @param[in] g_a GlobalArray
2466
* @param[in] g_b GlobalArray
2468
void elemMultiply(const GlobalArray * g_a, const GlobalArray * g_b) const;
2471
* Computes the element-wise product of the two patches
2472
* which must be of the same types and same number of
2473
* elements. For two-dimensional arrays,
2475
* c(i, j) = a(i,j)*b(i,j)
2477
* The result (c) may replace one of the input arrays (a/b).
2479
* This is a collective operation.
2481
* @param[in] g_a global array
2482
* @param[in] g_b global array
2483
* @param[in] alo g_a patch coordinates
2484
* @param[in] ahi g_a patch coordinates
2485
* @param[in] blo g_b patch coordinates
2486
* @param[in] bhi g_b patch coordinates
2487
* @param[in] clo g_c patch coordinates
2488
* @param[in] chi g_c patch coordinates
2490
void elemMultiplyPatch(const GlobalArray * g_a,int *alo,int *ahi,
2491
const GlobalArray * g_b,int *blo,int *bhi,
2492
int *clo,int *chi) const;
2494
* @copydoc GlobalArray::elemMultiplyPatch(const GlobalArray*,int*,int*,const GlobalArray*,int*,int*,int*,int*)const
2496
void elemMultiplyPatch(const GlobalArray * g_a,int64_t *alo,int64_t *ahi,
2497
const GlobalArray * g_b,int64_t *blo,int64_t *bhi,
2498
int64_t *clo,int64_t *chi) const;
2501
* Computes the element-wise quotient of the two arrays
2502
* which must be of the same types and same number of
2503
* elements. For two-dimensional arrays,
2505
* c(i, j) = a(i,j)/b(i,j)
2507
* The result (c) may replace one of the input arrays (a/b). If one of
2508
* the elements of array g_b is zero, the quotient for the element of g_c
2509
* will be set to GA_NEGATIVE_INFINITY.
2511
* This is a collective operation.
2513
* @param[in] g_a global array
2514
* @param[in] g_b global array
2516
void elemDivide(const GlobalArray * g_a, const GlobalArray * g_b) const;
2519
* Computes the element-wise quotient of the two patches
2520
* which must be of the same types and same number of
2521
* elements. For two-dimensional arrays,
2523
* c(i, j) = a(i,j)/b(i,j)
2525
* The result (c) may replace one of the input arrays (a/b).
2527
* This is a collective operation.
2529
* @param[in] g_a global array
2530
* @param[in] g_b global array
2531
* @param[in] alo g_a patch coordinates
2532
* @param[in] ahi g_a patch coordinates
2533
* @param[in] blo g_b patch coordinates
2534
* @param[in] bhi g_b patch coordinates
2535
* @param[in] clo g_c patch coordinates
2536
* @param[in] chi g_c patch coordinates
2538
void elemDividePatch(const GlobalArray * g_a,int *alo,int *ahi,
2539
const GlobalArray * g_b,int *blo,int *bhi,
2540
int *clo,int *chi) const;
2542
* @copydoc GlobalArray::elemDividePatch(const GlobalArray*,int*,int*,const GlobalArray*,int*,int*,int*,int*)const
2544
void elemDividePatch(const GlobalArray * g_a,int64_t *alo,int64_t *ahi,
2545
const GlobalArray * g_b,int64_t *blo,int64_t *bhi,
2546
int64_t *clo,int64_t *chi) const;
2549
* Computes the element-wise maximum of the two arrays
2550
* which must be of the same types and same number of
2551
* elements. For two dimensional arrays,
2553
* c(i, j) = max{a(i,j), b(i,j)}
2555
* The result (c) may replace one of the input arrays (a/b).
2557
* This is a collective operation.
2559
* @param[in] g_a global array
2560
* @param[in] g_b global array
2562
void elemMaximum(const GlobalArray * g_a, const GlobalArray * g_b) const;
2565
* Computes the element-wise maximum of the two patches
2566
* which must be of the same types and same number of
2567
* elements. For two-dimensional of noncomplex arrays,
2569
* c(i, j) = max{a(i,j), b(i,j)}
2571
* If the data type is complex, then
2572
* c(i, j).real = max{ |a(i,j)|, |b(i,j)|} while c(i,j).image = 0.
2574
* The result (c) may replace one of the input arrays (a/b).
2576
* This is a collective operation.
2578
* @param[in] g_a global array
2579
* @param[in] g_b global array
2580
* @param[in] alo g_a patch coordinates
2581
* @param[in] ahi g_a patch coordinates
2582
* @param[in] blo g_b patch coordinates
2583
* @param[in] bhi g_b patch coordinates
2584
* @param[in] clo g_c patch coordinates
2585
* @param[in] chi g_c patch coordinates
2587
void elemMaximumPatch(const GlobalArray * g_a,int *alo,int *ahi,
2588
const GlobalArray * g_b,int *blo,int *bhi,
2589
int *clo,int *chi) const;
2591
* @copydoc GlobalArray::elemMaximumPatch(const GlobalArray*,int*,int*,const GlobalArray*,int*,int*,int*,int*)const
2593
void elemMaximumPatch(const GlobalArray * g_a,int64_t *alo,int64_t *ahi,
2594
const GlobalArray * g_b,int64_t *blo,int64_t *bhi,
2595
int64_t *clo,int64_t *chi) const;
2598
* Computes the element-wise minimum of the two arrays
2599
* which must be of the same types and same number of
2600
* elements. For two dimensional arrays,
2602
* c(i, j) = min{a(i,j), b(i,j)}
2604
* The result (c) may replace one of the input arrays (a/b).
2606
* This is a collective operation.
2608
* @param[in] g_a global array
2609
* @param[in] g_b global array
2611
void elemMinimum(const GlobalArray * g_a, const GlobalArray * g_b) const;
2614
* Computes the element-wise minimum of the two patches
2615
* which must be of the same types and same number of
2616
* elements. For two-dimensional of noncomplex arrays,
2618
* c(i, j) = min{a(i,j), b(i,j)}
2620
* If the data type is complex, then
2621
* c(i, j).real = min{ |a(i,j)|, |b(i,j)|} while c(i,j).image = 0.
2623
* The result (c) may replace one of the input arrays (a/b).
2625
* This is a collective operation.
2627
* @param[in] g_a global array
2628
* @param[in] g_b global array
2629
* @param[in] alo g_a patch coordinates
2630
* @param[in] ahi g_a patch coordinates
2631
* @param[in] blo g_b patch coordinates
2632
* @param[in] bhi g_b patch coordinates
2633
* @param[in] clo g_c patch coordinates
2634
* @param[in] chi g_c patch coordinates
2636
void elemMinimumPatch(const GlobalArray * g_a,int *alo,int *ahi,
2637
const GlobalArray * g_b,int *blo,int *bhi,
2638
int *clo,int *chi) const;
2641
* @copydoc GlobalArray::elemMinimumPatch(const GlobalArray*,int*,int*,const GlobalArray*,int*,int*,int*,int*)const
2643
void elemMinimumPatch(const GlobalArray * g_a, int64_t *alo, int64_t *ahi,
2644
const GlobalArray * g_b, int64_t *blo, int64_t *bhi,
2645
int64_t *clo, int64_t *chi) const;
2648
* Calculates the largest multiple of a vector g_b that can be added
2649
* to this vector g_a while keeping each element of this vector
2652
* This is a collective operation.
2654
* @param[in] g_b global array where g_b is the step direction.
2655
* @param[out] step the maximum step
2657
void stepMax(const GlobalArray * g_b, double *step) const;
2660
* @copydoc GlobalArray::stepMax(const GlobalArray*,double*)const
2661
* @param[in] alo g_a patch coordinates
2662
* @param[in] ahi g_a patch coordinates
2663
* @param[in] blo g_b patch coordinates
2664
* @param[in] bhi g_b patch coordinates
2666
void stepMaxPatch(int *alo, int *ahi,
2667
const GlobalArray * g_b, int *blo, int *bhi,
2668
double *step) const;
2670
* @copydoc GlobalArray::stepMaxPatch(int*,int*,const GlobalArray*,int*,int*,double*)const
2672
void stepMaxPatch(int64_t *alo, int64_t *ahi,
2673
const GlobalArray * g_b, int64_t *blo, int64_t *bhi,
2674
double *step) const;
2676
/** Matrix Operations */
2679
* Adds this constant to the diagonal elements of the matrix.
2681
* This is a collective operation.
2683
* @param[in] c double/complex/int/long/float constant to add
2685
void shiftDiagonal(void *c) const;
2688
* Sets the diagonal elements of this matrix g_a with the elements of the
2691
* This is a collective operation.
2693
* @param[in] g_v global array
2695
void setDiagonal(const GlobalArray * g_v) const;
2698
* Sets the diagonal elements of this matrix g_a with zeros.
2700
* This is a collective operation.
2702
void zeroDiagonal() const;
2705
* Adds the elements of the vector g_v to the diagonal of this matrix g_a.
2707
* This is a collective operation.
2709
* @param[in] g_v global array
2711
void addDiagonal(const GlobalArray * g_v) const;
2714
* Inserts the diagonal elements of this matrix g_a into the vector g_v.
2716
* This is a collective operation.
2718
* @param[in] g_a global array
2720
void getDiagonal(const GlobalArray * g_a) const;
2723
* Scales the rows of this matrix g_a using the vector g_v.
2725
* This is a collective operation.
2727
* @param[in] g_v global array
2729
void scaleRows(const GlobalArray * g_v) const;
2732
* Scales the columns of this matrix g_a using the vector g_v.
2734
* This is a collective operation.
2736
* @param[in] g_v global array
2738
void scaleCols(const GlobalArray * g_v) const;
2741
* Computes the 1-norm of the matrix or vector g_a.
2743
* This is a collective operation.
2745
* @param[in] nm matrix/vector 1-norm value
2747
void norm1(double *nm) const;
2750
* Computes the 1-norm of the matrix or vector g_a.
2752
* This is a collective operation.
2754
* @param[in] nm - matrix/vector 1-norm value
2756
void normInfinity(double *nm) const;
2759
* Computes the componentwise Median of three arrays g_a, g_b, and g_c, and
2760
* stores the result in this array g_m. The result (m) may replace one of
2761
* the input arrays (a/b/c).
2763
* This is a collective operation.
2765
* @param[in] g_a global array
2766
* @param[in] g_b global array
2767
* @param[in] g_c global array
2769
void median(const GlobalArray * g_a, const GlobalArray * g_b,
2770
const GlobalArray * g_c) const;
2773
* Computes the componentwise Median of three patches g_a, g_b, and g_c, and
2774
* stores the result in this patch g_m. The result (m) may replace one of
2775
* the input patches (a/b/c).
2777
* This is a collective operation.
2779
* @param[in] g_a global array
2780
* @param[in] g_b global array
2781
* @param[in] g_c global array
2782
* @param[in] alo g_a patch coordinates
2783
* @param[in] ahi g_a patch coordinates
2784
* @param[in] blo g_b patch coordinates
2785
* @param[in] bhi g_b patch coordinates
2786
* @param[in] clo g_c patch coordinates
2787
* @param[in] chi g_c patch coordinates
2788
* @param[in] mlo g_m patch coordinates
2789
* @param[in] mhi g_m patch coordinates
2791
void medianPatch(const GlobalArray * g_a, int *alo, int *ahi,
2792
const GlobalArray * g_b, int *blo, int *bhi,
2793
const GlobalArray * g_c, int *clo, int *chi,
2794
int *mlo, int *mhi) const;
2796
* @copydoc GlobalArray::medianPatch(const GlobalArray*,int*,int*,const GlobalArray*,int*,int*,const GlobalArray*,int*,int*,int*,int*)const
2798
void medianPatch(const GlobalArray * g_a, int64_t *alo, int64_t *ahi,
2799
const GlobalArray * g_b, int64_t *blo, int64_t *bhi,
2800
const GlobalArray * g_c, int64_t *clo, int64_t *chi,
2801
int64_t *mlo, int64_t *mhi) const;
2803
GlobalArray& operator=(const GlobalArray &g_a);
2804
int operator==(const GlobalArray &g_a) const;
2805
int operator!=(const GlobalArray &g_a) const;
2808
int mHandle; /**<< g_a handle */
2813
#endif /* _GLOBALARRAY_H */