1
#cython: embedsignature=True
2
"""The Global Arrays (GA) Python interface.
4
This module exports the GA C API, with a few enhancements. The notable
5
exceptions include supporting Pythonic ranges. The ranges here are half-open
6
e.g. [0,n) instead of in the C API where they are closed e.g. [0,n]. This
7
follows the Python convention.
10
# keep the ga functions alphabetical since this is going to be a huge file!
12
from libc.stdlib cimport malloc,free
22
cdef bint _initialized = False
25
C_CHAR = (TYPE_BASE + 0)
26
C_INT = (TYPE_BASE + 1)
27
C_LONG = (TYPE_BASE + 2)
28
C_FLOAT = (TYPE_BASE + 3)
29
C_DBL = (TYPE_BASE + 4)
30
C_LDBL = (TYPE_BASE + 5)
31
C_SCPL = (TYPE_BASE + 6)
32
C_DCPL = (TYPE_BASE + 7)
33
C_LDCPL = (TYPE_BASE + 8)
34
F_BYTE = (TYPE_BASE + 9)
35
F_INT = (TYPE_BASE + 10)
36
F_LOG = (TYPE_BASE + 11)
37
F_REAL = (TYPE_BASE + 12)
38
F_DBL = (TYPE_BASE + 13)
39
F_SCPL = (TYPE_BASE + 14)
40
F_DCPL = (TYPE_BASE + 15)
41
C_LONGLONG = (TYPE_BASE + 16)
46
C_CHAR: np.dtype(np.int8),
47
C_INT: np.dtype(np.int32),
48
C_LONG: np.dtype(np.int64),
49
C_LONGLONG: np.dtype(np.int64),
50
C_FLOAT: np.dtype(np.float32),
51
C_DBL: np.dtype(np.float64),
52
C_SCPL: np.dtype(np.complex64),
53
C_DCPL: np.dtype(np.complex128),
55
# numpy doesn't always have these types depending on the system
56
cdef bint float128_in_np = ('float128' in dir(np))
57
cdef bint complex256_in_np = ('complex256' in dir(np))
59
_to_dtype[C_LDBL] = np.dtype(np.float128)
61
_to_dtype[C_LDCPL] = np.dtype(np.complex256)
63
#############################################################################
65
#############################################################################
67
def dtype(int gatype):
68
"""Converts the given GA type to a numpy dtype."""
69
if gatype in _to_dtype:
70
return _to_dtype[gatype]
71
raise ValueError, "%d was not a recognized GA type" % gatype
73
def inquire_dtype(int g_a):
74
"""Returns the numpy dtype of the given GA."""
75
gatype = inquire_type(g_a)
78
cdef inline void* _gapy_malloc(size_t bytes, int align, char *name):
79
"""Wrapper around C stdlib malloc()."""
82
cdef inline void _gapy_free(void *ptr):
83
"""Wrapper around C stdlib free()."""
86
cdef inline np.ndarray[np.int32_t, ndim=1] _inta32(array_like):
87
"""Converts an integer array-like to an ndarray of 32bit integers.
89
Functions which take a dimension shape or subscript can use this to
90
convert what the user passes to a numpy.ndarray using numpy.asarray.
92
As a convenience, single values can be passed as well.
95
array_like : integer array-like
97
:returns: The converted array_like to an ndarray.
100
cdef np.ndarray[np.int32_t, ndim=1] array_like_nd
102
array_like_nd = np.asarray(array_like, dtype=np.int32)
103
except ValueError: # try again in case array_like is a single value
104
array_like_nd = np.asarray([array_like], dtype=np.int32)
107
cdef inline np.ndarray[np.int64_t, ndim=1] _inta64(array_like):
108
"""Converts an integer array-like to an ndarray of 64bit integers.
110
Functions which take a dimension shape or subscript can use this to
111
convert what the user passes to a numpy.ndarray using numpy.asarray.
113
As a convenience, single values can be passed as well.
116
array_like : integer array-like
118
:returns: The converted array_like to an ndarray.
121
cdef np.ndarray[np.int64_t, ndim=1] array_like_nd
123
array_like_nd = np.asarray(array_like, dtype=np.int64)
124
except ValueError: # try again in case array_like is a single value
125
array_like_nd = np.asarray([array_like], dtype=np.int64)
128
cdef inline _lohi(int g_a, lo, hi):
129
"""Converts and/or prepares a lo/hi combination.
131
Functions which take a patch specification can use this to convert the
132
given lo and/or hi into ndarrays using numpy.asarray.
134
* If neither lo nor hi is given, lo is replaced with an array of zeros and
135
hi is replaced with the last index in each dimension (i.e. the shape).
137
* If only lo is given, hi is replaced with lo. In other words, this is
140
* It is an error to specify hi without lo.
146
lower bounds of a slice
148
upper bounds of a slice
150
:returns: The converted lo and hi ndarrays.
153
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd
154
cdef int ndim = GA_Ndim(g_a)
155
if lo is None and hi is None:
156
lo_nd = np.zeros((ndim), dtype=np.int64)
157
hi_nd = inquire_dims(g_a)
158
elif lo is not None and hi is None:
161
elif lo is None and hi is not None:
162
raise ValueError, 'lo cannot be None if hi is None'
163
else: # lo and hi are not None
166
if len(lo_nd) != ndim:
167
raise ValueError, 'len(lo_nd) != ndim; len(%s) != %s' % (lo_nd,ndim)
168
if len(hi_nd) != ndim:
169
raise ValueError, 'len(hi_nd) != ndim; len(%s) != %s' % (hi_nd,ndim)
170
# We must make a copy of hi_nd. If user passes in an ndarray, the
171
# following "prep" operation will change the user's 'hi'.
172
#hi_nd -= 1 # <----- don't do that!
173
hi_nd = hi_nd-1 # prep hi for GA's inclusive indexing
176
cdef void* _convert_multiplier(int gtype, value,
177
int *iv, long *lv, long long *llv,
178
float *fv, double *dv, long double *ldv,
179
SingleComplex *fcv, DoubleComplex *dcv):
180
"""Returns the address of an appropriately converted value.
182
Functions which take an alpha/beta/value need to have the value
183
appropriately converted from the (possible) Python type to a C type. Often
184
the GA function takes a void* in this case, so the address of the
185
converted value is returned.
188
cdef float complex pfcv=1.0
189
cdef double complex pdcv=1.0
191
raise ValueError, "cannot convert None"
195
elif gtype == C_LONG:
198
elif gtype == C_LONGLONG:
201
elif gtype == C_FLOAT:
207
elif gtype == C_LDBL and float128_in_np:
210
elif gtype == C_SCPL:
212
fcv[0].real = pfcv.real
213
fcv[0].imag = pfcv.imag
215
elif gtype == C_DCPL:
217
dcv[0].real = pdcv.real
218
dcv[0].imag = pdcv.imag
221
raise TypeError, "type of g_a not recognized"
224
"""Transforms a GA lo,hi combination into a slice list."""
225
return [slice(l,h) for l,h in __builtin__.zip(lo,hi)]
227
#############################################################################
229
#############################################################################
231
def abs_value(int g_a, lo=None, hi=None):
232
"""Take element-wise absolute value of the array or patch.
234
This is a collective operation.
240
lower bound patch coordinates, inclusive
242
higher bound patch coordinates, exclusive
245
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd
246
if lo is None and hi is None:
249
lo_nd,hi_nd = _lohi(g_a,lo,hi)
250
GA_Abs_value_patch64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data)
252
def acc(int g_a, buffer, lo=None, hi=None, alpha=None):
253
"""Combines data from buffer with data in the global array patch.
255
The buffer array is assumed to be have the same number of
256
dimensions as the global array. If the buffer is not contiguous, a
257
contiguous copy will be made.
259
global array section (lo[],hi[]) += alpha * buffer
261
This is a one-sided and atomic operation.
267
must be contiguous and have same number of elements as patch
269
lower bound patch coordinates, inclusive
271
higher bound patch coordinates, exclusive
273
multiplier (converted to appropriate type)
276
_acc_common(g_a, buffer, lo, hi, alpha)
278
cdef _acc_common(int g_a, buffer, lo=None, hi=None, alpha=None,
279
bint nb=False, bint periodic=False, skip=None):
280
"""Combines data from buffer with data in the global array patch.
282
The local array is assumed to have the same shape as the requested region,
283
or the local array can be 1-dimensional so long as it has the same number
284
of elements as the requested region. Any detected inconsitencies raise a
287
global array section (lo[],hi[]) += alpha * buffer
289
This is a one-sided and atomic operation.
296
should either be 1D and len(buffer)==np.prod(hi-lo), or
297
np.all(buffer.shape == hi-lo) i.e. buffer is 1D and same size as
298
requested region or buffer is the same shape as requested region
300
lower bound patch coordinates, inclusive
302
higher bound patch coordinates, exclusive
304
multiplier (converted to appropriate type)
306
whether the call is non-blocking
308
whether the call is periodic
310
strides for each dimension
316
:returns: None, however if nb=True, the nonblocking handle is returned.
319
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd, ld_nd, shape, skip_nd
320
cdef np.ndarray buffer_nd
321
cdef int gtype=inquire_type(g_a)
324
cdef long long llalpha
327
cdef long double ldalpha
328
cdef SingleComplex fcalpha
329
cdef DoubleComplex dcalpha
330
cdef void *valpha=NULL
331
cdef ga_nbhdl_t nbhandle
332
dtype = _to_dtype[gtype]
333
lo_nd,hi_nd = _lohi(g_a,lo,hi)
334
shape = hi_nd-lo_nd+1
338
skip_nd = _inta64(skip)
339
shape = (hi_nd-lo_nd)/skip_nd+1
340
buffer_nd = np.asarray(buffer, dtype=dtype)
341
if buffer_nd.dtype != dtype:
342
raise ValueError, "buffer is wrong type :: buffer=%s != %s" % (
344
# Due to GA restrictions, buffer must not have negative strides
345
# and buffer's last stride must be same as itemsize
346
strides = [buffer_nd.strides[i]/buffer_nd.itemsize
347
for i in range(buffer_nd.ndim)]
348
if (strides and (strides[-1] != 1 or np.any(np.asarray(strides) < 0))):
349
buffer_nd = np.ascontiguousarray(buffer_nd)
350
# we allow 1-d "flat" buffers in addition to buffers matching the shape of
351
# the requested region
352
if buffer_nd.ndim == 1:
353
if buffer_nd.size != np.prod(shape):
354
raise ValueError, ('buffer size does not match shape :: '
355
'buffer.size=%s != np.prod(shape)=%s' % (
356
buffer_nd.size, np.prod(shape)))
359
buffer_shape = [buffer_nd.shape[i] for i in range(buffer_nd.ndim)]
360
if not np.all(buffer_shape == shape):
361
raise ValueError, ('buffer shape does not match request shape :: '
362
'buffer_shape=%s != shape=%s' % (
363
buffer_shape, shape))
364
ld_nd = np.asarray([strides[i]/strides[i+1]
365
for i in range(buffer_nd.ndim-1)], dtype=np.int64)
368
valpha = _convert_multiplier(gtype, alpha,
369
&ialpha, &lalpha, &llalpha,
370
&falpha, &dalpha, &ldalpha,
373
NGA_NbAcc64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
374
<void*>buffer_nd.data, <int64_t*>ld_nd.data, valpha, &nbhandle)
377
NGA_Periodic_acc64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
378
<void*>buffer_nd.data, <int64_t*>ld_nd.data, valpha)
379
elif skip is not None:
380
NGA_Strided_acc64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
381
<int64_t*>skip_nd.data,
382
<void*>buffer_nd.data, <int64_t*>ld_nd.data, valpha)
384
NGA_Acc64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
385
<void*>buffer_nd.data, <int64_t*>ld_nd.data, valpha)
387
def access(int g_a, lo=None, hi=None, int proc=-1):
388
"""Returns local array patch.
390
This routine allows to access directly, in place elements in the local
391
section of a global array. It useful for writing new GA operations.
392
If no patch is specified, the entire local patch is returned. If this
393
process does not own any data, None is returned.
395
Note: The entire local data is always accessed, but if a smaller patch is
396
requested, an appropriately sliced ndarray is returned.
398
If proc is not specified, then ga.nodeid() is used.
400
Each call to ga.access has to be followed by a call to either ga.release
401
or ga.release_update. You can access in this fashion only local data.
402
Since the data is shared with other processes, you need to consider issues
405
This operation is local.
411
lower bound patch coordinates, inclusive
413
higher bound patch coordinates, exclusive
415
defaults to ga.nodeid(), but can specify a proc within the same
416
SMP node to access its data instead
418
:returns: ndarray representing local patch
421
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd
422
cdef np.ndarray[np.int64_t, ndim=1] ld_nd, lo_dst, hi_dst, dims_nd
423
cdef int i, gtype=inquire_type(g_a)
424
cdef int dimlen=GA_Ndim(g_a), typenum=_to_dtype[gtype].num
426
cdef np.npy_intp *dims = NULL
429
# first things first, if no data is owned, return None
430
lo_dst,hi_dst = distribution(g_a, proc)
431
if lo_dst[0] < 0 or hi_dst[0] < 0:
433
# always access the entire local data
434
ld_nd = np.zeros(dimlen-1, dtype=np.int64)
435
# put hi_dst back to GA inclusive indexing convention
437
NGA_Access64(g_a, <int64_t*>lo_dst.data, <int64_t*>hi_dst.data, &ptr,
438
<int64_t*>ld_nd.data)
439
# put hi_dst back to Python exclusive indexing convention
441
dims_nd = hi_dst-lo_dst
442
# must convert int64_t ndarray shape to npy_intp array
443
dims = <np.npy_intp*>malloc(dimlen*sizeof(np.npy_intp))
444
for i in range(dimlen):
446
array = np.PyArray_SimpleNewFromData(dimlen, dims, typenum, ptr)
448
if lo is not None or hi is not None:
458
if np.sometrue(lo_nd>hi_nd):
459
raise ValueError,"lo>hi lo=%s hi=%s"%(lo_nd,hi_nd)
460
if np.sometrue(lo_nd<lo_dst):
461
raise ValueError,"lo out of bounds lo_dst=%s lo=%s"%(lo_dst,lo_nd)
462
if np.sometrue(hi_nd>hi_dst):
463
raise ValueError,"hi out of bounds hi_dst=%s hi=%s"%(hi_dst,hi_nd)
465
for i in range(dimlen):
466
slices.append(slice(lo_nd[i]-lo_dst[i],hi_nd[i]-lo_dst[i]))
470
def access_block(int g_a, int idx):
471
"""Returns local array patch for a block-cyclic distribution.
473
This routine allows to access directly, in place elements in the local
474
section of a global array. It useful for writing new GA operations.
476
Each call to ga.access_block has to be followed by a call to either
477
ga.release_block or ga.release_update_block. You can access in this
478
fashion only local data. Since the data is shared with other processes,
479
you need to consider issues of mutual exclusion.
481
This operation is local.
489
:returns: ndarray representing local block
492
cdef np.ndarray[np.int64_t, ndim=1] ld_nd, lo_dst, hi_dst, dims_nd
493
cdef int i, gtype=inquire_type(g_a)
494
cdef int dimlen=GA_Ndim(g_a), typenum=_to_dtype[gtype].num
496
cdef np.npy_intp *dims = NULL
497
# first things first, if no data is owned, return None
498
lo_dst,hi_dst = distribution(g_a, idx)
499
if lo_dst[0] < 0 or hi_dst[0] < 0:
501
# put hi_dst back to GA inclusive indexing convention
503
# always access the entire local data
504
ld_nd = np.zeros(dimlen-1, dtype=np.int64)
505
NGA_Access_block64(g_a, idx, &ptr, <int64_t*>ld_nd.data)
506
dims_nd = hi_dst-lo_dst+1
507
# must convert int64_t ndarray shape to npy_intp array
508
dims = <np.npy_intp*>malloc(dimlen*sizeof(np.npy_intp))
509
for i in range(dimlen):
511
array = np.PyArray_SimpleNewFromData(dimlen, dims, typenum, ptr)
515
def access_block_grid(int g_a, subscript):
516
"""Returns local array patch for a SCALAPACK block-cyclic distribution.
518
The subscript array contains the subscript of the block in the array of
519
blocks. This subscript is based on the location of the block in a grid,
520
each of whose dimensions is equal to the number of blocks that fit along
523
Each call to ga.access_block_grid has to be followed by a call to either
524
ga.release_block_grid or ga.release_update_block_grid. You can access in
525
this fashion only local data. Since the data is shared with other
526
processes, you need to consider issues of mutual exclusion.
528
This operation is local.
533
subscript : 1D array-like
534
subscript of the block in the array
536
:returns: ndarray representing local block
539
raise NotImplementedError
541
def access_block_segment(int g_a, int proc):
544
This function can be used to gain access to the all the locally held data
545
on a particular processor that is associated with a block-cyclic
548
The data inside this segment has a lot of additional structure so this
549
function is not generally useful to developers. It is primarily used
550
inside the GA library to implement other GA routines. Each call to
551
ga.access_block_segment should be followed by a call to either
552
ga.release_block_segment or ga.release_update_block_segment.
554
This is a local operation.
562
:returns: ndarray representing local block
566
cdef int gtype=inquire_type(g_a)
567
cdef int typenum=_to_dtype[gtype].num
569
cdef np.npy_intp *dims = NULL
570
# always access the entire local data
571
NGA_Access_block_segment64(g_a, proc, &ptr, &elems)
572
# must convert int64_t ndarray shape to npy_intp array
573
dims = <np.npy_intp*>malloc(sizeof(np.npy_intp))
575
array = np.PyArray_SimpleNewFromData(1, dims, typenum, ptr)
579
def access_ghost_element(int g_a, subscript, ld):
580
"""Returns a scalar ndarray representing the requested ghost element.
582
This function can be used to return a pointer to any data element in the
583
locally held portion of the global array and can be used to directly
584
access ghost cell data. The array subscript refers to the local index of
585
the element relative to the origin of the local patch (which is assumed
586
to be indexed by (0,0,...)).
588
This is a local operation.
593
subscript : 1D array-like of integers
594
index of the desired element
596
:returns: ndarray scalar representing local block
599
raise NotImplementedError
601
def access_ghosts(int g_a):
602
"""Returns ndarray representing local patch with ghost cells.
604
This routine will provide access to the ghost cell data residing on each
605
processor. Calls to NGA_Access_ghosts should normally follow a call to
606
NGA_Distribution that returns coordinates of the visible data patch
607
associated with a processor. You need to make sure that the coordinates of
608
the patch are valid (test values returned from NGA_Distribution).
610
You can only access local data.
612
This operation is local.
618
:returns: ndarray scalar representing local block with ghost cells
621
raise NotImplementedError
623
def add(int g_a, int g_b, int g_c, alpha=None, beta=None, alo=None, ahi=None,
624
blo=None, bhi=None, clo=None, chi=None):
625
"""Element-wise addition of two arrays.
627
The arrays must be the same shape and identically aligned.
628
The result (c) may replace one of the input arrays (a/b).
629
Patches of arrays (which must have the same number of elements) may also
630
be added together elementw=-wise, if patch coordinates are specified.
633
This is a collective operation.
643
multiplier (converted to appropriate type)
645
multiplier (converted to appropriate type)
646
alo : 1D array-like of integers
647
lower bound patch coordinates of g_a, inclusive
648
ahi : 1D array-like of integers
649
higher bound patch coordinates of g_a, exclusive
650
blo : 1D array-like of integers
651
lower bound patch coordinates of g_b, inclusive
652
bhi : 1D array-like of integers
653
higher bound patch coordinates of g_b, exclusive
654
clo : 1D array-like of integers
655
lower bound patch coordinates of g_c, inclusive
656
chi : 1D array-like of integers
657
higher bound patch coordinates of g_c, exclusive
661
cdef np.ndarray[np.int64_t, ndim=1] alo_nd, ahi_nd
662
cdef np.ndarray[np.int64_t, ndim=1] blo_nd, bhi_nd
663
cdef np.ndarray[np.int64_t, ndim=1] clo_nd, chi_nd
664
cdef int gtype=inquire_type(g_a)
665
cdef int ialpha, ibeta
666
cdef long lalpha, lbeta
667
cdef long long llalpha, llbeta
668
cdef float falpha, fbeta
669
cdef double dalpha, dbeta
670
cdef long double ldalpha, ldbeta
671
cdef SingleComplex fcalpha, fcbeta
672
cdef DoubleComplex dcalpha, dcbeta
673
cdef void *valpha, *vbeta
676
valpha = _convert_multiplier(gtype, alpha,
677
&ialpha, &lalpha, &llalpha,
678
&falpha, &dalpha, &ldalpha,
682
vbeta = _convert_multiplier(gtype, beta,
683
&ibeta, &lbeta, &llbeta,
684
&fbeta, &dbeta, &ldbeta,
686
if (alo is None and ahi is None
687
and blo is None and bhi is None
688
and clo is None and chi is None):
689
GA_Add(valpha, g_a, vbeta, g_b, g_c)
691
alo_nd,ahi_nd = _lohi(g_a,alo,ahi)
692
blo_nd,bhi_nd = _lohi(g_b,blo,bhi)
693
clo_nd,chi_nd = _lohi(g_c,clo,chi)
695
valpha, g_a, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
696
vbeta, g_b, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data,
697
g_c, <int64_t*>clo_nd.data, <int64_t*>chi_nd.data)
699
def add_constant(int g_a, alpha, lo=None, hi=None):
700
"""Adds the constant alpha to each element of the array.
702
This operation is collective.
708
the constant to add (converted to appropriate type)
709
lo : 1D array-like of integers
710
lower bound patch coordinates, inclusive
711
hi : 1D array-like of integers
712
higher bound patch coordinates, exclusive
715
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd
716
cdef int gtype=inquire_type(g_a)
719
cdef long long llalpha
722
cdef long double ldalpha
723
cdef SingleComplex fcalpha
724
cdef DoubleComplex dcalpha
726
valpha = _convert_multiplier(gtype, alpha,
727
&ialpha, &lalpha, &llalpha,
728
&falpha, &dalpha, &ldalpha,
730
if lo is None and hi is None:
731
GA_Add_constant(g_a, valpha)
733
lo_nd,hi_nd = _lohi(g_a,lo,hi)
734
GA_Add_constant_patch64(
735
g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data, valpha)
737
def add_diagonal(int g_a, int g_v):
738
"""Adds the elements of the vector g_v to the diagonal of matrix g_a.
740
This operation is collective.
749
GA_Add_diagonal(g_a, g_v)
751
def allocate(int g_a):
752
"""Allocates memory for the handle obtained using ga.create_handle.
754
At a minimum, the ga.set_data function must be called before the memory is
755
allocated. Other ga.set_xxx functions can also be called before invoking
758
This is a collective operation.
764
:returns: True if allocation of g_a was successful.
767
if GA_Allocate(g_a) == 1:
771
def brdcst(buffer, int root=0):
772
"""Broadcast from process root to all other processes.
774
If the buffer is not contiguous, an error is raised. This operation is
775
provided only for convenience purposes: it is available regardless of the
776
message-passing library that GA is running with.
778
This is a collective operation.
781
buffer : 1D array-like of objects
782
the ndarray message (converted to the appropriate type)
784
the process which is sending
786
:returns: The buffer in case a temporary was passed in.
789
cdef np.ndarray buffer_nd
790
buffer_nd = np.asarray(buffer)
791
if not buffer_nd.flags['C_CONTIGUOUS']:
792
raise ValueError, "the buffer must be contiguous"
793
#if buffer_nd.ndim != 1:
794
# raise ValueError, "the buffer must be one-dimensional"
795
GA_Brdcst(buffer_nd.data, buffer_nd.size*buffer_nd.itemsize, root)
798
def check_handle(int g_a, char *message):
799
"""Checks that the array handle g_a is valid.
801
If not, calls ga.error withe the provided string.
803
This operation is local.
806
GA_Check_handle(g_a, message)
808
def cluster_nnodes():
809
"""Returns the total number of nodes that the program is running on.
811
On SMP architectures, this will be less than or equal to the total number
814
This is a local operation.
817
return GA_Cluster_nnodes()
819
def cluster_nodeid(int proc=-1):
820
"""Returns the node ID of this process or the given process.
822
On SMP architectures with more than one processor per node, several
823
processes may return the same node id.
825
This is a local operation.
833
return GA_Cluster_proc_nodeid(proc)
834
return GA_Cluster_nodeid()
836
def cluster_proc_nodeid(int proc):
837
"""Returns the node ID of the specified process.
839
On SMP architectures with more than one processor per node, several
840
processors may return the same node id.
842
This is a local operation.
845
return GA_Cluster_proc_nodeid(proc)
847
def cluster_nprocs(int node):
848
"""Returns the number of processors available on the given node.
850
This is a local operation.
853
return GA_Cluster_nprocs(node)
855
def cluster_procid(int node, int proc):
856
"""Returns the proc ID associated with node and local proc ID.
858
If node has N processors, then the value of proc lies between 0 and
861
This is a local operation.
864
return GA_Cluster_procid(node, proc)
866
def compare_distr(int g_a, int g_b):
867
"""Compares the distributions of two global arrays.
869
This is a collective operation.
871
:returns: True if distributions are identical and False when they are not
874
if GA_Compare_distr(g_a, g_b) == 0:
878
def copy(int g_a, int g_b, alo=None, ahi=None, blo=None, bhi=None,
880
"""Copies elements from array g_a into array g_b.
882
For the operation over the entire arrays, the arrays must be the same
883
type, shape, and identically aligned. No transpose is allowed in this
886
For patch operations, the patches of arrays may be of different shapes but
887
must have the same number of elements. Patches must be nonoverlapping (if
888
g_a=g_b). Transposes are allowed for patch operations.
890
This is a collective operation.
894
the array handle copying from
896
the array handle copying to
897
alo : 1D array-like of integers
898
lower bound patch coordinates of g_a, inclusive
899
ahi : 1D array-like of integers
900
higher bound patch coordinates of g_a, exclusive
901
blo : 1D array-like of integers
902
lower bound patch coordinates of g_b, inclusive
903
bhi : 1D array-like of integers
904
higher bound patch coordinates of g_b, exclusive
906
whether the transpose operator should be applied True=applied
909
cdef np.ndarray[np.int64_t, ndim=1] alo_nd, ahi_nd
910
cdef np.ndarray[np.int64_t, ndim=1] blo_nd, bhi_nd
912
if alo is None and ahi is None and blo is None and bhi is None:
915
alo_nd,ahi_nd = _lohi(g_a,alo,ahi)
916
blo_nd,bhi_nd = _lohi(g_b,blo,bhi)
921
NGA_Copy_patch64(trans_c,
922
g_a, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
923
g_b, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data)
925
def create(int gtype, dims, char *name="", chunk=None, int pgroup=-1):
926
"""Creates an n-dimensional array using the regular distribution model.
928
The array can be distributed evenly or not. The control over the
929
distribution is accomplished by specifying chunk (block) size for all or
930
some of array dimensions. For example, for a 2-dimensional array, setting
931
chunk[0]=dim[0] gives distribution by vertical strips (chunk[0]*dims[0]);
932
setting chunk[1]=dim[1] gives distribution by horizontal strips
933
(chunk[1]*dims[1]). Actual chunks will be modified so that they are at
934
least the size of the minimum and each process has either zero or one
935
chunk. Specifying chunk[i] as <1 will cause that dimension to be
938
As a convenience, when chunk is omitted or None, the entire array is
943
the type of the array
944
dims : 1D array-like of integers
947
the name of the array
948
chunk : 1D array-like of integers
951
create array only as part of this processor group
953
:returns: a non-zero array handle means the call was succesful.
955
This is a collective operation.
958
cdef np.ndarray[np.int64_t, ndim=1] dims_nd, chunk_nd=None
959
dims_nd = _inta64(dims)
961
pgroup = pgroup_get_default()
963
chunk_nd = _inta64(chunk)
964
return NGA_Create_config64(gtype, len(dims_nd), <int64_t*>dims_nd.data,
965
name, <int64_t*>chunk_nd.data, pgroup)
967
return NGA_Create_config64(gtype, len(dims_nd), <int64_t*>dims_nd.data,
970
def create_ghosts(int gtype, dims, width, char *name="", chunk=None,
972
"""Creates an array with a layer of ghost cells around the visible data.
974
The array can be distributed evenly or not evenly. The control over the
975
distribution is accomplished by specifying chunk (block) size for all or
976
some of the array dimensions. For example, for a 2-dimensional array,
977
setting chunk(1)=dim(1) gives distribution by vertical strips
978
(chunk(1)*dims(1)); setting chunk(2)=dim(2) gives distribution by
979
horizontal strips (chunk(2)*dims(2)). Actual chunks will be modified so
980
that they are at least the size of the minimum and each process has either
981
zero or one chunk. Specifying chunk(i) as <1 will cause that dimension
982
(i-th) to be distributed evenly. The width of the ghost cell layer in
983
each dimension is specified using the array width(). The local data of
984
the global array residing on each processor will have a layer width[n]
985
ghosts cells wide on either side of the visible data along the dimension
990
the type of the array
991
dims : 1D array-like of integers
993
width : 1D array-like of integers
996
the name of the array
997
chunk : 1D array-like of integers
1000
create array only as part of this processor group
1002
:returns: a non-zero array handle means the call was successful.
1004
This is a collective operation.
1007
cdef np.ndarray[np.int64_t, ndim=1] dims_nd, chunk_nd, width_nd
1008
dims_nd = _inta64(dims)
1009
width_nd = _inta64(width)
1011
pgroup = pgroup_get_default()
1013
chunk_nd = _inta64(chunk)
1014
return NGA_Create_ghosts_config64(gtype, len(dims_nd),
1015
<int64_t*>dims_nd.data, <int64_t*>width_nd.data, name,
1016
<int64_t*>chunk_nd.data, pgroup)
1018
return NGA_Create_ghosts_config64(gtype, len(dims_nd),
1019
<int64_t*>dims_nd.data, <int64_t*>width_nd.data, name,
1022
def create_handle():
1023
"""Returns a global array handle that can be used to create a new array.
1025
The sequence of operations is to begin with a call to ga.create_handle to
1026
get a new array handle. The attributes of the array, such as dimension,
1027
size, type, etc. can then be set using successive calls to the ga.set_xxx
1028
subroutines. When all array attributes have been set, the ga.allocate
1029
subroutine is called and the global array is actually created and memory
1030
for it is allocated.
1032
This is a collective operation.
1035
return GA_Create_handle()
1037
def create_irreg(int gtype, dims, block, map, char *name="", int pgroup=-1):
1038
"""Creates an array by following the user-specified distribution.
1040
The distribution is specified as a Cartesian product of distributions for
1041
each dimension. The array indices start at 0. For example, the following
1042
figure demonstrates distribution of a 2-dimensional array 8x10 on 6 (or
1043
more) processors. nblock[2]=[3,2], the size of map array is s=5 and array
1044
map contains the following elements map=[0,2,6, 0, 5]. The distribution is
1045
nonuniform because, P1 and P4 get 20 elements each and processors
1046
P0,P2,P3, and P5 only 10 elements each.
1048
This is a collective operation.
1052
the type of the array
1053
dims : 1D array-like of integers
1055
block : 1D array-like of integers
1056
the number of blocks each dimension is divided into
1057
map : 1D array-like of integers
1058
starting index for each block
1059
len(map) == sum of all elements of nblock array
1061
the name of the array
1063
create array only as part of this processor group
1065
:returns: integer handle representing the array; a non-zero value indicates success
1068
cdef np.ndarray[np.int64_t, ndim=1] dims_nd, block_nd, map_nd
1069
dims_nd = _inta64(dims)
1070
block_nd = _inta64(block)
1071
map_nd = _inta64(map)
1073
pgroup = pgroup_get_default()
1074
return NGA_Create_irreg_config64(gtype, len(dims_nd),
1075
<int64_t*>dims_nd.data, name,
1076
<int64_t*>block_nd.data, <int64_t*>map_nd.data, pgroup)
1078
def create_ghosts_irreg(int gtype, dims, width, block, map, char *name="",
1080
"""Creates an array with a layer of ghost cells around the visible data.
1082
The distribution is specified as a Cartesian product of distributions for
1083
each dimension. For example, the following figure demonstrates
1084
distribution of a 2-dimensional array 8x10 on 6 (or more) processors.
1085
nblock(2)=[3,2], the size of map array is s=5 and array map contains the
1086
following elements map=[1,3,7, 1, 6]. The distribution is nonuniform
1087
because, P1 and P4 get 20 elements each and processors P0,P2,P3, and P5
1088
only 10 elements each.
1090
The array width[] is used to control the width of the ghost cell boundary
1091
around the visible data on each processor. The local data of the global
1092
array residing on each processor will have a layer width[n] ghosts cells
1093
wide on either side of the visible data along the dimension n.
1095
This is a collective operation.
1099
the type of the array
1100
dims : 1D array-like of integers
1102
width : 1D array-like of integers
1104
block : 1D array-like of integers
1105
number of blocks each dimension is divided into
1106
map : 1D array-like of integers
1107
starting index for each block
1108
len(map) == sum of all elements of nblock array
1110
the name of the array
1112
create array only as part of this processor group
1114
:returns: a non-zero array handle means the call was succesful
1117
cdef np.ndarray[np.int64_t, ndim=1] dims_nd, width_nd, block_nd, map_nd
1118
dims_nd = _inta64(dims)
1119
width_nd = _inta64(width)
1120
block_nd = _inta64(block)
1121
map_nd = _inta64(map)
1123
pgroup = pgroup_get_default()
1124
return NGA_Create_ghosts_irreg_config64(gtype, len(dims_nd),
1125
<int64_t*>dims_nd.data, <int64_t*>width_nd.data, name,
1126
<int64_t*>block_nd.data, <int64_t*>map_nd.data, pgroup)
1128
def create_mutexes(int number):
1129
"""Creates a set containing the number of mutexes.
1131
Mutex is a simple synchronization object used to protect Critical
1132
Sections. Only one set of mutexes can exist at a time. Array of mutexes
1133
can be created and destroyed as many times as needed.
1135
Mutexes are numbered: 0, ..., number -1.
1137
This is a collective operation.
1141
the number of mutexes to create
1143
:returns: True on success, False on failure
1146
if GA_Create_mutexes(number) == 1:
1150
def deregister_type(int type):
1151
"""Removes the data type previously registered using register_type.
1155
the data type handle
1158
return NGA_Deregister_type(type)
1160
def destroy(int g_a):
1161
"""Deallocates the array and frees any associated resources.
1163
This is a collective operation.
1168
def destroy_mutexes():
1169
"""Destroys the set of mutexes created with ga_create_mutexes.
1171
:returns: True if the operation succeeded; False when failed
1173
This is a collective operation.
1176
if GA_Destroy_mutexes() == 1:
1180
def diag(int g_a, int g_s, int g_v, evalues=None):
1181
"""Solve the generalized eigen-value problem.
1183
The input matrices are not overwritten or destroyed.
1187
the array handle of the matrix to diagonalize
1189
the array handle of the metric
1191
the array handle to return evecs
1193
:returns: All eigen-values as an ndarray in ascending order.
1195
This is a collective operation.
1199
gtype,dims = inquire(g_a)
1200
evalues = np.ndarray((dims[0]), dtype=_to_dtype(gtype))
1202
evalues = np.asarray(evalues)
1203
GA_Diag(g_a, g_s, g_v, <void*>evalues.data)
1206
def diag_reuse(int control, int g_a, int g_s, int g_v, evalues=None):
1207
"""Solve the generalized eigen-value problem.
1209
Recommended for REPEATED calls if g_s is unchanged.
1210
The input matrices are not overwritten or destroyed.
1214
0 indicates first call to the eigensolver;
1215
>0 consecutive calls (reuses factored g_s);
1216
<0 only erases factorized g_s; g_v and eval unchanged
1217
(should be called after previous use if another
1218
eigenproblem, i.e., different g_a and g_s, is to
1221
the array handle of the matrix to diagonalize
1223
the array handle of the metric
1225
the array handle to return evecs
1227
:returns: All eigen-values as an ndarray in ascending order.
1229
This is a collective operation.
1233
gtype,dims = inquire(g_a)
1234
evalues = np.ndarray((dims[0]), dtype=_to_dtype(gtype))
1236
evalues = np.asarray(evalues)
1237
GA_Diag_reuse(control, g_a, g_s, g_v, <void*>evalues.data)
1240
def diag_std(int g_a, int g_v, evalues=None):
1241
"""Solve the standard (non-generalized) eigenvalue problem.
1243
The input matrix is neither overwritten nor destroyed.
1247
the array handle of the matrix to diagonalize
1249
the array handle to return evecs
1251
:returns: all eigenvectors via the g_v global array, and eigenvalues as an ndarray in ascending order
1253
This is a collective operation.
1257
gtype,dims = inquire(g_a)
1258
evalues = np.ndarray((dims[0]), dtype=_to_dtype(gtype))
1260
evalues = np.asarray(evalues)
1261
GA_Diag_std(g_a, g_v, <void*>evalues.data)
1264
cpdef distribution(int g_a, int proc=-1):
1265
"""Return the distribution given to proc.
1267
If proc is not specified, then ga.nodeid() is used. The range is
1268
returned as -1 for lo and -2 for hi if no elements are owned by
1272
cdef int ndim = GA_Ndim(g_a)
1273
cdef np.ndarray[np.int64_t, ndim=1] lo = np.zeros((ndim), dtype=np.int64)
1274
cdef np.ndarray[np.int64_t, ndim=1] hi = np.zeros((ndim), dtype=np.int64)
1277
NGA_Distribution64(g_a, proc, <int64_t*>lo.data, <int64_t*>hi.data)
1278
# convert hi to python exclusive indexing convetion
1282
def dot(int g_a, int g_b, alo=None, ahi=None, blo=None, bhi=None,
1283
bint ta=False, bint tb=False):
1284
"""Computes the element-wise dot product of two arrays.
1286
Arrays must be of the same type and same number of elements.
1287
Patch operation allows for possibly transposed patches.
1289
This is a collective operation.
1296
alo : 1D array-like of integers
1297
lower bound patch coordinates of g_a, inclusive
1298
ahi : 1D array-like of integers
1299
higher bound patch coordinates of g_a, exclusive
1300
blo : 1D array-like of integers
1301
lower bound patch coordinates of g_b, inclusive
1302
bhi : 1D array-like of integers
1303
higher bound patch coordinates of g_b, exclusive
1305
whether the transpose operator should be applied to g_a True=applied
1307
whether the transpose operator should be applied to g_b True=applied
1309
:returns: SUM_ij a(i,j)*b(i,j)
1312
cdef np.ndarray[np.int64_t, ndim=1] alo_nd, ahi_nd
1313
cdef np.ndarray[np.int64_t, ndim=1] blo_nd, bhi_nd
1314
cdef char ta_c, tb_c
1315
cdef int gtype=inquire_type(g_a)
1316
cdef float complex pfcv
1317
cdef double complex pdcv
1318
cdef SingleComplex gfcv
1319
cdef DoubleComplex gdcv
1320
if alo is None and ahi is None and blo is None and bhi is None:
1322
return GA_Idot(g_a, g_b)
1323
elif gtype == C_LONG:
1324
return GA_Ldot(g_a, g_b)
1325
elif gtype == C_LONGLONG:
1326
return GA_Lldot(g_a, g_b)
1327
elif gtype == C_FLOAT:
1328
return GA_Fdot(g_a, g_b)
1329
elif gtype == C_DBL:
1330
return GA_Ddot(g_a, g_b)
1331
elif gtype == C_SCPL:
1332
gfcv = GA_Cdot(g_a, g_b)
1333
pfcv.real = gfcv.real
1334
pfcv.imag = gfcv.imag
1336
elif gtype == C_DCPL:
1337
gdcv = GA_Zdot(g_a, g_b)
1338
pdcv.real = gdcv.real
1339
pdcv.imag = gdcv.imag
1344
alo_nd,ahi_nd = _lohi(g_a,alo,ahi)
1345
blo_nd,bhi_nd = _lohi(g_b,blo,bhi)
1355
return NGA_Idot_patch64(
1356
g_a, ta_c, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
1357
g_b, tb_c, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data)
1358
elif gtype == C_LONG:
1359
return NGA_Ldot_patch64(
1360
g_a, ta_c, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
1361
g_b, tb_c, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data)
1362
elif gtype == C_LONGLONG:
1363
return NGA_Lldot_patch64(
1364
g_a, ta_c, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
1365
g_b, tb_c, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data)
1366
elif gtype == C_FLOAT:
1367
return NGA_Fdot_patch64(
1368
g_a, ta_c, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
1369
g_b, tb_c, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data)
1370
elif gtype == C_DBL:
1371
return NGA_Ddot_patch64(
1372
g_a, ta_c, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
1373
g_b, tb_c, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data)
1374
elif gtype == C_SCPL:
1375
gfcv = NGA_Cdot_patch64(
1376
g_a, ta_c, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
1377
g_b, tb_c, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data)
1378
pfcv.real = gfcv.real
1379
pfcv.imag = gfcv.imag
1381
elif gtype == C_DCPL:
1382
gdcv = NGA_Zdot_patch64(
1383
g_a, ta_c, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
1384
g_b, tb_c, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data)
1385
pdcv.real = gdcv.real
1386
pdcv.imag = gdcv.imag
1391
def duplicate(int g_a, char *name=""):
1392
"""Creates a new array by applying all the properties of another existing
1399
the new name of the created array
1401
:returns: a non-zero array handle means the call was succesful.
1403
This is a collective operation.
1406
return GA_Duplicate(g_a, name)
1408
def elem_divide(int g_a, int g_b, int g_c, alo=None, ahi=None, blo=None,
1409
bhi=None, clo=None, chi=None):
1410
"""Computes the element-wise quotient of the two arrays.
1412
Arrays or array patches must be of the same types and same number of
1413
elements. For two-dimensional arrays:
1415
c(i, j) = a(i,j)/b(i,j)
1417
The result (c) may replace one of the input arrays (a/b).
1418
If one of the elements of array g_b is zero, the quotient for the element
1419
of g_c will be set to GA_NEGATIVE_INFINITY.
1421
This is a collective operation.
1430
alo : 1D array-like of integers
1431
lower bound patch coordinates of g_a, inclusive
1432
ahi : 1D array-like of integers
1433
higher bound patch coordinates of g_a, exclusive
1434
blo : 1D array-like of integers
1435
lower bound patch coordinates of g_b, inclusive
1436
bhi : 1D array-like of integers
1437
higher bound patch coordinates of g_b, exclusive
1438
clo : 1D array-like of integers
1439
lower bound patch coordinates of g_c, inclusive
1440
chi : 1D array-like of integers
1441
higher bound patch coordinates of g_c, exclusive
1444
cdef np.ndarray[np.int64_t, ndim=1] alo_nd, ahi_nd
1445
cdef np.ndarray[np.int64_t, ndim=1] blo_nd, bhi_nd
1446
cdef np.ndarray[np.int64_t, ndim=1] clo_nd, chi_nd
1447
if (alo is None and ahi is None
1448
and blo is None and bhi is None
1449
and clo is None and chi is None):
1450
GA_Elem_divide(g_a, g_b, g_c)
1452
alo_nd,ahi_nd = _lohi(g_a,alo,ahi)
1453
blo_nd,bhi_nd = _lohi(g_b,blo,bhi)
1454
clo_nd,chi_nd = _lohi(g_c,clo,chi)
1455
GA_Elem_divide_patch64(
1456
g_a, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
1457
g_b, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data,
1458
g_c, <int64_t*>clo_nd.data, <int64_t*>chi_nd.data)
1460
def elem_maximum(int g_a, int g_b, int g_c, alo=None, ahi=None, blo=None,
1461
bhi=None, clo=None, chi=None):
1462
"""Computes the element-wise maximum of the two arrays.
1464
Arrays or array patches must be of the same types and same number of
1465
elements. For two-dimensional arrays::
1467
c(i,j) = max(a(i,j),b(i,j))
1469
If the data type is complex, then::
1471
c(i,j).real = max{ |a(i,j)|, |b(i,j)|} while c(i,j).image = 0
1473
The result (c) may replace one of the input arrays (a/b).
1475
This is a collective operation.
1484
alo : 1D array-like of integers
1485
lower bound patch coordinates of g_a, inclusive
1486
ahi : 1D array-like of integers
1487
higher bound patch coordinates of g_a, exclusive
1488
blo : 1D array-like of integers
1489
lower bound patch coordinates of g_b, inclusive
1490
bhi : 1D array-like of integers
1491
higher bound patch coordinates of g_b, exclusive
1492
clo : 1D array-like of integers
1493
lower bound patch coordinates of g_c, inclusive
1494
chi : 1D array-like of integers
1495
higher bound patch coordinates of g_c, exclusive
1498
cdef np.ndarray[np.int64_t, ndim=1] alo_nd, ahi_nd
1499
cdef np.ndarray[np.int64_t, ndim=1] blo_nd, bhi_nd
1500
cdef np.ndarray[np.int64_t, ndim=1] clo_nd, chi_nd
1501
if (alo is None and ahi is None
1502
and blo is None and bhi is None
1503
and clo is None and chi is None):
1504
GA_Elem_maximum(g_a, g_b, g_c)
1506
alo_nd,ahi_nd = _lohi(g_a,alo,ahi)
1507
blo_nd,bhi_nd = _lohi(g_b,blo,bhi)
1508
clo_nd,chi_nd = _lohi(g_c,clo,chi)
1509
GA_Elem_maximum_patch64(
1510
g_a, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
1511
g_b, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data,
1512
g_c, <int64_t*>clo_nd.data, <int64_t*>chi_nd.data)
1514
def elem_minimum(int g_a, int g_b, int g_c, alo=None, ahi=None, blo=None,
1515
bhi=None, clo=None, chi=None):
1516
"""Computes the element-wise minimum of the two arrays.
1518
Arrays or array patches must be of the same types and same number of
1519
elements. For two-dimensional arrays::
1521
c(i,j) = min(a(i,j),b(i,j))
1523
If the data type is complex, then::
1525
c(i,j).real = min{ |a(i,j)|, |b(i,j)|} while c(i,j).image = 0
1527
The result (c) may replace one of the input arrays (a/b).
1529
This is a collective operation.
1538
alo : 1D array-like of integers
1539
lower bound patch coordinates of g_a, inclusive
1540
ahi : 1D array-like of integers
1541
higher bound patch coordinates of g_a, exclusive
1542
blo : 1D array-like of integers
1543
lower bound patch coordinates of g_b, inclusive
1544
bhi : 1D array-like of integers
1545
higher bound patch coordinates of g_b, exclusive
1546
clo : 1D array-like of integers
1547
lower bound patch coordinates of g_c, inclusive
1548
chi : 1D array-like of integers
1549
higher bound patch coordinates of g_c, exclusive
1552
cdef np.ndarray[np.int64_t, ndim=1] alo_nd, ahi_nd
1553
cdef np.ndarray[np.int64_t, ndim=1] blo_nd, bhi_nd
1554
cdef np.ndarray[np.int64_t, ndim=1] clo_nd, chi_nd
1555
if (alo is None and ahi is None
1556
and blo is None and bhi is None
1557
and clo is None and chi is None):
1558
GA_Elem_minimum(g_a, g_b, g_c)
1560
alo_nd,ahi_nd = _lohi(g_a,alo,ahi)
1561
blo_nd,bhi_nd = _lohi(g_b,blo,bhi)
1562
clo_nd,chi_nd = _lohi(g_c,clo,chi)
1563
GA_Elem_minimum_patch64(
1564
g_a, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
1565
g_b, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data,
1566
g_c, <int64_t*>clo_nd.data, <int64_t*>chi_nd.data)
1568
def elem_multiply(int g_a, int g_b, int g_c, alo=None, ahi=None, blo=None,
1569
bhi=None, clo=None, chi=None):
1570
"""Computes the element-wise product of the two arrays.
1572
Arrays or array patches must be of the same types and same number of
1573
elements. For two-dimensional arrays:
1575
c(i, j) = a(i,j)*b(i,j)
1577
The result (c) may replace one of the input arrays (a/b).
1579
This is a collective operation.
1588
alo : 1D array-like of integers
1589
lower bound patch coordinates of g_a, inclusive
1590
ahi : 1D array-like of integers
1591
higher bound patch coordinates of g_a, exclusive
1592
blo : 1D array-like of integers
1593
lower bound patch coordinates of g_b, inclusive
1594
bhi : 1D array-like of integers
1595
higher bound patch coordinates of g_b, exclusive
1596
clo : 1D array-like of integers
1597
lower bound patch coordinates of g_c, inclusive
1598
chi : 1D array-like of integers
1599
higher bound patch coordinates of g_c, exclusive
1602
cdef np.ndarray[np.int64_t, ndim=1] alo_nd, ahi_nd
1603
cdef np.ndarray[np.int64_t, ndim=1] blo_nd, bhi_nd
1604
cdef np.ndarray[np.int64_t, ndim=1] clo_nd, chi_nd
1605
if (alo is None and ahi is None
1606
and blo is None and bhi is None
1607
and clo is None and chi is None):
1608
GA_Elem_multiply(g_a, g_b, g_c)
1610
alo_nd,ahi_nd = _lohi(g_a,alo,ahi)
1611
blo_nd,bhi_nd = _lohi(g_b,blo,bhi)
1612
clo_nd,chi_nd = _lohi(g_c,clo,chi)
1613
GA_Elem_multiply_patch64(
1614
g_a, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
1615
g_b, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data,
1616
g_c, <int64_t*>clo_nd.data, <int64_t*>chi_nd.data)
1618
def error(char *message, int code=1):
1619
"""Prints message and aborts safely with code."""
1620
GA_Error(message, code)
1623
"""Blocks the calling process until all the data transfers corresponding
1624
to GA operations called after ga.init_fence() complete.
1626
For example, since ga.put might return before the data reaches the final
1627
destination, ga_init_fence and ga_fence allow processes to wait until the
1628
data tranfer is fully completed:
1634
ga.fence() must be called after ga.init_fence(). A barrier, ga.sync(),
1635
assures completion of all data transfers and implicitly cancels all
1636
outstanding ga.init_fence() calls. ga.init_fence() and ga.fence() must be
1637
used in pairs, multiple calls to ga.fence() require the same number of
1638
corresponding ga.init_fence() calls. ga.init_fence()/ga_fence() pairs can
1641
ga.fence() works for multiple GA operations. For example:
1645
ga.scatter(g_a, ...)
1649
The calling process will be blocked until data movements initiated by two
1650
calls to ga_put and one ga_scatter complete.
1655
def fill(int g_a, value, lo=None, hi=None):
1656
"""Assign a single value to all elements in the array or patch.
1661
lo : 1D array-like of integers
1662
lower bound patch coordinates, inclusive
1663
hi : 1D array-like of integers
1664
higher bound patch coordinates, exclusive
1667
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd
1670
cdef long long llvalue
1673
cdef long double ldvalue
1674
cdef SingleComplex fcvalue
1675
cdef DoubleComplex dcvalue
1677
cdef int gtype=inquire_type(g_a)
1678
vvalue = _convert_multiplier(gtype, value,
1679
&ivalue, &lvalue, &llvalue,
1680
&fvalue, &dvalue, &ldvalue,
1682
if lo is None and hi is None:
1683
GA_Fill(g_a, vvalue)
1685
lo_nd,hi_nd = _lohi(g_a,lo,hi)
1687
g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data, vvalue)
1689
def gather(int g_a, subsarray, np.ndarray values=None):
1690
"""Gathers array elements from a global array into a local array.
1692
subsarray will be converted to an ndarray if it is not one already. A
1693
two-dimensional array is allowed so long as its shape is (n,ndim) where n
1694
is the number of elements to gather and ndim is the number of dimensions
1695
of the target array. Also, subsarray must be contiguous.
1697
For example, if the subsarray were two-dimensional::
1700
v[k] = g_a[subsarray[k,0],subsarray[k,1],subsarray[k,2]...]
1702
For example, if the subsarray were one-dimensional::
1706
v[k] = g_a[subsarray[base+0],subsarray[base+1],subsarray[base+2]...]
1708
This is a one-sided operation.
1711
cdef np.ndarray[np.int64_t, ndim=1] subsarray1_nd = None
1712
cdef np.ndarray[np.int64_t, ndim=2] subsarray2_nd = None
1713
cdef int gtype = inquire_type(g_a)
1714
cdef int ndim = GA_Ndim(g_a)
1718
subsarray1_nd = np.ascontiguousarray(subsarray, dtype=np.int64)
1719
n = len(subsarray1_nd) / ndim
1721
subsarray1_nd = None
1723
subsarray2_nd = np.ascontiguousarray(subsarray, dtype=np.int64)
1724
n = len(subsarray2_nd) # length of first dimension of subsarray2_nd
1726
raise ValueError, "subsarray must be either 1- or 2-dimensional"
1727
# prepare values array
1729
values = np.ndarray(n, dtype=_to_dtype[gtype])
1731
if values.ndim != 1:
1732
raise ValueError, "values must be one-dimensional"
1733
if not values.flags['C_CONTIGUOUS']:
1734
raise ValueError, "values must be contiguous"
1736
raise ValueError, "values was not large enough"
1737
# call the wrapped function
1738
if subsarray1_nd is not None:
1739
NGA_Gather_flat64(g_a, <void*>values.data,
1740
<int64_t*>subsarray1_nd.data, n)
1741
elif subsarray2_nd is not None:
1742
NGA_Gather_flat64(g_a, <void*>values.data,
1743
<int64_t*>subsarray2_nd.data, n)
1745
raise ValueError, "how did this happen?"
1748
def gemm(bint ta, bint tb, int64_t m, int64_t n, int64_t k,
1749
alpha, int g_a, int g_b, beta, int g_c):
1750
"""Performs one of the matrix-matrix operations.
1752
C := alpha*op( A )*op( B ) + beta*C
1754
where op( X ) is one of op(X)=X or op(X) = X', alpha and beta are scalars,
1755
and A, B and C are matrices, with op(A) an m by k matrix, op(B) a k by n
1756
matrix, and C an m by n matrix.
1758
On entry, ta specifies the form of op( A ) to be used in the
1759
matrix multiplication as follows::
1761
ta = False, op(A) = A.
1762
ta = True, op(A) = A'.
1764
This is a collective operation.
1772
number of rows of op(A) and of matrix C
1774
number of columns of op(B) and of matrix C
1776
number of columns of op(A) and rows of matrix op(B)
1780
handle to input array
1782
handle to input array
1786
handle to output array
1789
cdef int gtype=inquire_type(g_a)
1790
#cdef int ialpha=1, ibeta=1
1791
#cdef long lalpha=1, lbeta=1
1792
#cdef long long llalpha=1, llbeta=1
1793
cdef float falpha=1.0, fbeta=1.0
1794
cdef double dalpha=1.0, dbeta=1.0
1795
#cdef long double ldalpha=1.0, ldbeta=1.0
1796
cdef float complex fcalpha=1.0, fcbeta=1.0
1797
cdef double complex dcalpha=1.0, dcbeta=1.0
1798
#cdef long double complex ldalpha=1.0, ldbeta=1.0
1799
cdef SingleComplex ga_fcalpha, ga_fcbeta
1800
cdef DoubleComplex ga_dcalpha, ga_dcbeta
1801
cdef char ta_char = 'N'
1802
cdef char tb_char = 'N'
1808
raise TypeError, "C_INT not supported"
1809
elif gtype == C_LONG:
1810
raise TypeError, "C_LONG not supported"
1811
elif gtype == C_LONGLONG:
1812
raise TypeError, "C_LONGLONG not supported"
1813
elif gtype == C_FLOAT:
1816
GA_Sgemm64(ta_char, tb_char, m, n, k, falpha, g_a, g_b, fbeta, g_c)
1817
elif gtype == C_DBL:
1820
GA_Dgemm64(ta_char, tb_char, m, n, k, dalpha, g_a, g_b, dbeta, g_c)
1821
elif gtype == C_LDBL:
1822
raise TypeError, "C_LDBL not supported"
1823
elif gtype == C_SCPL:
1826
ga_fcalpha.real = fcalpha.real
1827
ga_fcalpha.imag = fcalpha.imag
1828
ga_fcbeta.real = fcbeta.real
1829
ga_fcbeta.imag = fcbeta.imag
1830
GA_Cgemm64(ta_char, tb_char, m, n, k, ga_fcalpha, g_a, g_b, ga_fcbeta, g_c)
1831
elif gtype == C_DCPL:
1834
ga_dcalpha.real = dcalpha.real
1835
ga_dcalpha.imag = dcalpha.imag
1836
ga_dcbeta.real = dcbeta.real
1837
ga_dcbeta.imag = dcbeta.imag
1838
GA_Zgemm64(ta_char, tb_char, m, n, k, ga_dcalpha, g_a, g_b, ga_dcbeta, g_c)
1839
elif gtype == C_LDCPL:
1840
raise TypeError, "C_LDCPL not supported (yet)"
1844
def get(int g_a, lo=None, hi=None, np.ndarray buffer=None):
1845
"""Copies data from global array section to the local array buffer.
1847
The local array is assumed to be have the same number of dimensions as the
1848
global array. Any detected inconsitencies/errors in the input arguments
1851
This is a one-sided operation.
1856
lo : 1D array-like of integers
1857
lower bound patch coordinates, inclusive
1858
hi : 1D array-like of integers
1859
higher bound patch coordinates, exclusive
1861
an ndarray of the appropriate type, large enough to hold lo,hi
1863
:returns: The local array buffer.
1866
return _get_common(g_a, lo, hi, buffer)
1868
cdef _get_common(int g_a, lo=None, hi=None, np.ndarray buffer=None,
1869
bint nb=False, bint periodic=False, skip=None):
1870
"""Copies data from global array section to the local array buffer.
1872
The local array is assumed to have the same shape as the requested region,
1873
or the local array can be 1-dimensional so long as it has the same number
1874
of elements as the requested region. Any detected inconsitencies raise a
1877
This is a one-sided operation.
1882
lo : 1D array-like of integers
1883
lower bound patch coordinates, inclusive
1884
hi : 1D array-like of integers
1885
higher bound patch coordinates, exclusive
1887
should either be 1D and len(buffer)==np.prod(hi-lo), or
1888
np.all(buffer.shape == hi-lo) i.e. buffer is 1D and same size as
1889
requested region or buffer is the same shape as requested region
1891
whether this call is non-blocking (see ga.nbget)
1893
whether this call is periodic (see ga.periodic_get)
1894
skip : 1D array-like of integers
1895
strides for each dimension
1897
:returns: The local array buffer (and the nonblocking handle if nb=True.)
1900
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd, ld_nd, shape, skip_nd
1901
cdef int gtype=inquire_type(g_a)
1902
cdef ga_nbhdl_t nbhandle
1903
dtype = _to_dtype[gtype]
1904
lo_nd,hi_nd = _lohi(g_a,lo,hi)
1905
shape = hi_nd-lo_nd+1
1909
skip_nd = _inta64(skip)
1910
shape = (hi_nd-lo_nd)/skip_nd+1
1912
buffer = np.ndarray(shape, dtype=dtype)
1913
elif buffer.dtype != dtype:
1914
raise ValueError, "buffer is wrong type :: buffer=%s != %s" % (
1915
buffer.dtype, dtype)
1916
# Due to GA restrictions, buffer must not have negative strides
1917
# and buffer's last stride must be same as itemsize
1918
strides = [buffer.strides[i]/buffer.itemsize for i in range(buffer.ndim)]
1919
if strides[-1] != 1:
1920
raise ValueError, "first dimension of buffer cannot be strided"
1921
if np.any(np.asarray(strides) < 0):
1922
raise ValueError, "buffer cannot have negative strides"
1923
# we allow 1-d "flat" buffers in addition to buffers matching the shape of
1925
if buffer.ndim == 1:
1926
if buffer.size != np.prod(shape):
1927
raise ValueError, ('buffer size does not match shape :: '
1928
'buffer.size=%s != np.prod(shape)=%s' % (
1929
buffer.size, np.prod(shape)))
1932
buffer_shape = [buffer.shape[i] for i in range(buffer.ndim)]
1933
if not np.all(buffer_shape == shape):
1934
raise ValueError, ('buffer shape does not match request shape :: '
1935
'buffer_shape=%s != shape=%s' % (
1936
buffer_shape, shape))
1937
ld_nd = np.asarray([strides[i]/strides[i+1]
1938
for i in range(buffer.ndim-1)], dtype=np.int64)
1940
NGA_NbGet64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
1941
<void*>buffer.data, <int64_t*>ld_nd.data, &nbhandle)
1942
return buffer,nbhandle
1944
NGA_Periodic_get64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
1945
<void*>buffer.data, <int64_t*>ld_nd.data)
1947
elif skip is not None:
1948
NGA_Strided_get64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
1949
<int64_t*>skip_nd.data,
1950
<void*>buffer.data, <int64_t*>ld_nd.data)
1953
NGA_Get64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
1954
<void*>buffer.data, <int64_t*>ld_nd.data)
1957
def get_block_info(int g_a):
1958
"""Returns information about the block-cyclic distribution.
1960
The number of blocks along each of the array axes are returned in the
1961
array num_blocks and the dimensions of the individual blocks, specified in
1962
the ga.set_block_cyclic or ga.set_block_cyclic_proc_grid subroutines, are
1963
returned in block_dims.
1965
This is a local function.
1971
:returns: The number of blocks along each of the array axes and the dimensions of thet individual blocks, in that order, as ndarrays.
1974
cdef np.ndarray[np.int_t, ndim=1] num_blocks, block_dims
1975
cdef int ndim = GA_Ndim(g_a)
1976
num_blocks = np.zeros(ndim, dtype=np.intc)
1977
block_dims = np.zeros(ndim, dtype=np.intc)
1978
GA_Get_block_info(g_a, <int*>num_blocks.data, <int*>block_dims.data)
1979
return num_blocks,block_dims
1981
def get_diag(int g_a, int g_v):
1982
"""Inserts the diagonal elements of this matrix g_a into the vector g_v.
1984
This is a collective operation.
1991
GA_Get_diag(g_a, g_v)
1994
"""Returns the value of an internal flag in the GA library whose value can
1995
be set using the ga.set_debug() subroutine.
1997
This is a local operation.
2000
return GA_Get_debug()
2002
def gop(X, char *op):
2003
"""Global operation.
2005
X(1:N) is a vector present on each process. gop 'sums' elements of X
2006
accross all nodes using the commutative operator op. The result is
2007
broadcast to all nodes. Supported operations include '+', '*', 'max',
2008
'min', 'absmax', 'absmin'. The use of lowerecase for operators is
2011
X must be a contiguous array-like. X is not guaranteed to be modified
2014
>>> value = ga.gop((1,2,3), "+")
2016
This operation is provided only for convenience purposes: it is available
2017
regardless of the message-passing library that GA is running with.
2019
This is a collective operation.
2022
cdef np.ndarray X_nd = np.asarray(X)
2024
if not X_nd.flags['C_CONTIGUOUS']:
2025
raise ValueError, "X must be contiguous"
2030
if X_nd.dtype == np.dtype(np.intc):
2031
GA_Igop(<int*>X_nd.data, size, op)
2032
elif X_nd.dtype == np.dtype(np.long):
2033
GA_Lgop(<long*>X_nd.data, size, op)
2034
elif X_nd.dtype == np.dtype(np.longlong):
2035
GA_Llgop(<long long*>X_nd.data, size, op)
2036
elif X_nd.dtype == np.dtype(np.single):
2037
GA_Fgop(<float*>X_nd.data, size, op)
2038
elif X_nd.dtype == np.dtype(np.double):
2039
GA_Dgop(<double*>X_nd.data, size, op)
2040
elif X_nd.dtype == np.dtype(np.complex64):
2041
GA_Cgop(<SingleComplex*>X_nd.data, size, op)
2042
elif X_nd.dtype == np.dtype(np.complex128):
2043
GA_Zgop(<DoubleComplex*>X_nd.data, size, op)
2045
raise TypeError, "type not supported by ga.gop %s" % X_nd.dtype
2051
def gop_multiply(X):
2055
return gop(X, "max")
2058
return gop(X, "min")
2061
return gop(X, "absmax")
2064
return gop(X, "absmin")
2066
def has_ghosts(int g_a):
2067
"""Determines whether any dimension of the given array has ghost cells.
2069
This is a collective operation.
2075
:returns: True if the global array has some dimensions for which the ghost cell width is greater than zero, it returns False otherwise.
2078
if GA_Has_ghosts(g_a) == 1:
2083
"""Initializes tracing of completion status of data movement operations.
2085
This operation is local.
2091
"""Allocates and initializes internal data structures in Global Arrays.
2093
This is a collective operation.
2099
GA_Register_stack_memory(_gapy_malloc, _gapy_free)
2100
atexit.register(terminate)
2103
def initialize_ltd(size_t limit):
2104
"""Allocates and initializes internal data structures and sets limit for
2105
memory used in global arrays.
2107
The limit is per process: it is the amount of memory that the given
2108
processor can contribute to collective allocation of global arrays. It
2109
does not include temporary storage that GA might be allocating (and
2110
releasing) during execution of a particular operation.
2112
limit = 0 means "allow unlimited memory usage" in which case this
2113
operation is equivalent to GA_initialize.
2115
This is a collective operation.
2119
GA_Initialize_ltd(limit)
2123
"""Returns whether ga has been initialized."""
2126
cpdef inquire(int g_a):
2128
cdef int ndim = GA_Ndim(g_a)
2129
cdef np.ndarray[np.int64_t, ndim=1] dims=np.zeros((ndim), dtype=np.int64)
2130
NGA_Inquire64(g_a, >ype, &ndim, <int64_t*>dims.data)
2133
cpdef np.ndarray[np.int64_t, ndim=1] inquire_dims(int g_a):
2135
cdef np.ndarray[np.int64_t, ndim=1] dims
2136
gtype,dims = inquire(g_a)
2139
def inquire_memory():
2140
"""Returns amount of memory (in bytes) used in the allocated global arrays
2141
on the calling processor.
2143
This operation is local.
2146
return GA_Inquire_memory()
2148
def inquire_name(int g_a):
2149
"""Returns the name of an array represented by the handle g_a.
2151
This operation is local.
2158
return GA_Inquire_name(g_a)
2160
cpdef int inquire_type(int g_a):
2162
cdef np.ndarray[np.int64_t, ndim=1] dims
2163
gtype,dims = inquire(g_a)
2166
def is_mirrored(int g_a):
2167
"""Checks whether the array is mirrored.
2169
This is a local operation.
2171
:returns: True if it is a mirrored array, else returns False.
2174
if GA_Is_mirrored(g_a) == 1:
2178
def llt_solve(int g_a, int g_b):
2179
"""Solves a system of linear equations
2183
using the Cholesky factorization of an NxN double precision symmetric
2184
positive definite matrix A (epresented by handle g_a). On successful exit
2185
B will contain the solution X.
2187
This is a collective operation.
2191
the coefficient matrix
2193
the rhs/solution matrix
2195
:returns: 0 if successful; >0 if the leading minor of this order is not positive definite and the factorization could not be completed
2198
return GA_Llt_solve(g_a, g_b)
2200
def locate(int g_a, subscript):
2201
"""Return the GA compute process id that 'owns' the data.
2203
If any element of subscript[] is out of bounds "-1" is returned.
2205
This operation is local.
2210
subscript : 1D array-like of integers
2211
len(subscript) should be ndim
2214
cdef np.ndarray[np.int64_t, ndim=1] subscript_nd
2215
subscript_nd = _inta64(subscript)
2216
return NGA_Locate64(g_a, <int64_t*>subscript_nd.data)
2218
cpdef int locate_nnodes(int g_a, lo, hi):
2219
"""Return the number of process which own the specified patch.
2221
This operation is local.
2224
cdef np.ndarray[np.int64_t, ndim=1] lo_nd,hi_nd
2226
lo_nd,hi_nd = _lohi(g_a,lo,hi)
2227
return NGA_Locate_nnodes64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data)
2229
def locate_region(int g_a, lo, hi):
2230
"""Return the list of the GA processes id that 'own' the data.
2232
Parts of the specified patch might be actually 'owned' by several
2233
processes. If lo/hi are out of bounds "0" is returned, otherwise return
2234
value is equal to the number of processes that hold the data .
2236
map[i][0] - lo[ndim]
2237
map[i][1] - hi[ndim]
2238
procs[i] - processor id that owns data in patch lo[i]:hi[i]
2240
This operation is local.
2243
cdef np.ndarray[np.int64_t, ndim=1] lo_nd
2244
cdef np.ndarray[np.int64_t, ndim=1] hi_nd
2245
cdef np.ndarray[np.int64_t, ndim=1] map
2246
cdef np.ndarray[np.int32_t, ndim=1] procs
2249
cdef int ndim = GA_Ndim(g_a)
2250
lo_nd,hi_nd = _lohi(g_a,lo,hi)
2251
np_guess = locate_nnodes(g_a, lo_nd, hi_nd)
2252
map = np.ndarray(np_guess*ndim*2, dtype=np.int64)
2253
procs = np.ndarray(np_guess, dtype=np.int32)
2254
np_result = NGA_Locate_region64(g_a,
2255
<int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
2256
<int64_t*>map.data, <int*>procs.data)
2257
# TODO then slice it and reshape to something useful?
2258
return map.reshape(np_result,2,ndim),procs
2260
def lock(int mutex):
2261
"""Locks a mutex object identified by the mutex number.
2263
It is a fatal error for a process to attempt to lock a mutex which was
2264
already locked by this process.
2269
def lu_solve(int g_a, int g_b, bint trans=False):
2270
"""Solve the system of linear equations op(A)X = B based on the LU
2273
op(A) = A or A' depending on the parameter trans
2274
trans = False means that the transpose operator should not be applied.
2275
trans = True means that the transpose operator should be applied.
2277
Matrix A is a general real matrix. Matrix B contains possibly multiple rhs
2278
vectors. The array associated with the handle g_b is overwritten by the
2281
This is a collective operation.
2285
the array handle for the coefficient matrix
2287
the array handle for the solution matrix
2289
transpose (True) or not transpose (False)
2292
cdef char ctrans = 'N'
2295
GA_Lu_solve(ctrans, g_a, g_b)
2297
def mask_sync(bint first, bint last):
2298
"""This subroutine can be used to remove synchronization calls from around
2299
collective operations.
2301
Setting the parameter first=False removes the synchronization prior to the
2302
collective operation, setting last=False removes the synchronization call
2303
after the collective operation. This call is applicable to all collective
2304
operations. It most be invoked before each collective operation.
2306
This is a collective operation.
2310
mask for prior internal synchronization
2312
mask for post internal synchronization
2315
GA_Mask_sync(first,last)
2317
def matmul_patch(bint transa, bint transb, alpha, beta,
2321
"""An n-dimensional patch version of ga_dgemm.
2323
C[clo[]:chi[]] := alpha * AA[alo[]:ahi[]] *
2324
BB[blo[]:bhi[]] ) + beta*C[clo[]:chi[]],
2326
where AA = op(A), BB = op(B), and op( X ) is one of
2328
op( X ) = X or op( X ) = X',
2330
It works for both double and DoubleComplex data tape.
2332
This is a collective operation.
2335
cdef np.ndarray[np.int64_t, ndim=1] alo_nd, ahi_nd
2336
cdef np.ndarray[np.int64_t, ndim=1] blo_nd, bhi_nd
2337
cdef np.ndarray[np.int64_t, ndim=1] clo_nd, chi_nd
2338
cdef int gtype=inquire_type(g_a)
2339
cdef int ialpha, ibeta
2340
cdef long lalpha, lbeta
2341
cdef long long llalpha, llbeta
2342
cdef float falpha, fbeta
2343
cdef double dalpha, dbeta
2344
cdef long double ldalpha, ldbeta
2345
cdef SingleComplex fcalpha, fcbeta
2346
cdef DoubleComplex dcalpha, dcbeta
2347
cdef void *valpha, *vbeta
2348
cdef char char_transa = 'N'
2349
cdef char char_transb = 'N'
2352
valpha = _convert_multiplier(gtype, alpha,
2353
&ialpha, &lalpha, &llalpha,
2354
&falpha, &dalpha, &ldalpha,
2358
vbeta = _convert_multiplier(gtype, beta,
2359
&ibeta, &lbeta, &llbeta,
2360
&fbeta, &dbeta, &ldbeta,
2362
alo_nd,ahi_nd = _lohi(g_a,alo,ahi)
2363
blo_nd,bhi_nd = _lohi(g_b,blo,bhi)
2364
clo_nd,chi_nd = _lohi(g_c,clo,chi)
2369
NGA_Matmul_patch64(char_transa, char_transb, valpha, vbeta,
2370
g_a, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
2371
g_b, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data,
2372
g_c, <int64_t*>clo_nd.data, <int64_t*>chi_nd.data)
2374
def median(int g_a, int g_b, int g_c, int g_m,
2375
alo=None, ahi=None, blo=None, bhi=None,
2376
clo=None, chi=None, mlo=None, mhi=None):
2377
"""Computes the componentwise Median of three arrays or patches g_a, g_b,
2378
and g_c, and stores the result in this array or patch g_m.
2380
The result (m) may replace one of the input arrays (a/b/c).
2382
This is a collective operation.
2392
the array handle for the result
2393
alo : 1D array-like of integers
2394
lower bound patch coordinates of g_a, inclusive
2395
ahi : 1D array-like of integers
2396
higher bound patch coordinates of g_a, exclusive
2397
blo : 1D array-like of integers
2398
lower bound patch coordinates of g_b, inclusive
2399
bhi : 1D array-like of integers
2400
higher bound patch coordinates of g_b, exclusive
2401
clo : 1D array-like of integers
2402
lower bound patch coordinates of g_c, inclusive
2403
chi : 1D array-like of integers
2404
higher bound patch coordinates of g_c, exclusive
2405
mlo : 1D array-like of integers
2406
lower bound patch coordinates of g_m, inclusive
2407
mhi : 1D array-like of integers
2408
higher bound patch coordinates of g_m, exclusive
2411
cdef np.ndarray[np.int64_t, ndim=1] alo_nd, ahi_nd
2412
cdef np.ndarray[np.int64_t, ndim=1] blo_nd, bhi_nd
2413
cdef np.ndarray[np.int64_t, ndim=1] clo_nd, chi_nd
2414
cdef np.ndarray[np.int64_t, ndim=1] mlo_nd, mhi_nd
2415
if (alo is None and ahi is None
2416
and blo is None and bhi is None
2417
and clo is None and chi is None
2418
and mlo is None and mhi is None):
2419
GA_Median(g_a, g_b, g_c, g_m)
2421
alo_nd,ahi_nd = _lohi(g_a,alo,ahi)
2422
blo_nd,bhi_nd = _lohi(g_b,blo,bhi)
2423
clo_nd,chi_nd = _lohi(g_c,clo,chi)
2424
mlo_nd,mhi_nd = _lohi(g_m,mlo,mhi)
2426
g_a, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
2427
g_b, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data,
2428
g_c, <int64_t*>clo_nd.data, <int64_t*>chi_nd.data,
2429
g_m, <int64_t*>mlo_nd.data, <int64_t*>mhi_nd.data)
2432
"""Returns amount of memory (in bytes) left for allocation of new global
2433
arrays on the calling processor.
2435
Note: If ga.uses_ma() returns True, then ga.memory_avail() returns the
2436
lesser of the amount available under the GA limit and the amount available
2437
from MA (according to ma.inquire_avail() operation). If no GA limit has
2438
been set, it returns what MA says is available.
2440
If ( ! ga.uses_ma() && ! ga.memory_limited() ) returns < 0, indicating
2441
that the bound on currently available memory cannot be determined.
2443
This operation is local.
2446
return GA_Memory_avail()
2448
def memory_limited():
2449
"""Indicates if limit is set on memory usage in Global Arrays on the
2452
This operation is local.
2454
:returns: True for "yes", False for "no"
2457
if 1 == GA_Memory_limited():
2461
def merge_distr_patch(int g_a, alo, ahi, int g_b, blo, bhi):
2462
"""This function merges all copies of a patch of a mirrored array (g_a)
2463
into a patch in a distributed array (g_b).
2465
This is a collective operation.
2470
alo : 1D array-like of integers
2471
g_a patch coordinate
2472
ahi : 1D array-like of integers
2473
g_a patch coordinate
2476
blo : 1D array-like of integers
2477
g_b patch coordinate
2478
bhi : 1D array-like of integers
2479
g_b patch coordinate
2482
cdef np.ndarray[np.int64_t, ndim=1] alo_nd, ahi_nd
2483
cdef np.ndarray[np.int64_t, ndim=1] blo_nd, bhi_nd
2484
alo_nd,ahi_nd = _lohi(g_a,alo,ahi)
2485
blo_nd,bhi_nd = _lohi(g_b,blo,bhi)
2486
NGA_Merge_distr_patch64(
2487
g_a, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
2488
g_b, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data)
2490
def merge_mirrored(int g_a):
2491
"""This subroutine merges mirrored arrays by adding the contents of each
2494
The result is that the each mirrored copy of the array represented by g_a
2495
is the sum of the individual arrays before the merge operation. After the
2496
merge, all mirrored arrays are equal.
2498
This is a collective operation.
2505
GA_Merge_mirrored(g_a)
2507
def nbacc(int g_a, buffer, lo=None, hi=None, alpha=None):
2508
"""Non-blocking version of ga.acc.
2510
The accumulate operation can be completed locally by making a call to the
2511
ga.nbwait() routine.
2513
Combines data from buffer with data in the global array patch.
2515
The buffer array is assumed to be have the same number of
2516
dimensions as the global array. If the buffer is not contiguous, a
2517
contiguous copy will be made.
2519
global array section (lo[],hi[]) += alpha * buffer
2521
This is a non-blocking and one-sided and atomic operation.
2527
must be contiguous and have same number of elements as patch
2528
lo : 1D array-like of integers
2529
lower bound patch coordinates, inclusive
2530
hi : 1D array-like of integers
2531
higher bound patch coordinates, exclusive
2533
multiplier (converted to the appropriate type)
2535
:returns: The non-blocking request handle.
2538
return _acc_common(g_a, buffer, lo, hi, alpha, True)
2540
def nbget(int g_a, lo=None, hi=None, np.ndarray buffer=None):
2541
"""Non-blocking version of the blocking ga.get operation.
2543
The get operation can be completed locally by making a call to the
2544
ga.nbwait() routine.
2546
Copies data from global array section to the local array buffer.
2548
The local array is assumed to be have the same number of dimensions as the
2549
global array. Any detected inconsitencies/errors in the input arguments
2552
This is a non-blocking and one-sided operation.
2557
lo : 1D array-like of integers
2558
lower bound patch coordinates, inclusive
2559
hi : 1D array-like of integers
2560
higher bound patch coordinates, exclusive
2562
Fill this buffer instead of allocating a new one internally.
2563
Must be contiguous and have same number of elements as patch.
2565
:returns: The local array buffer.
2568
return _get_common(g_a, lo, hi, buffer, True)
2570
def nblock(int g_a):
2571
"""Returns the number of partitions of each array dimension for g_a.
2573
This operation is local.
2580
cdef np.ndarray[np.int32_t, ndim=1] nblock_nd
2581
cdef int ndim = GA_Ndim(g_a)
2582
nblock_nd = np.zeros(ndim, dtype=np.intc)
2583
GA_Nblock(g_a, <int*>nblock_nd.data)
2586
def nbput(int g_a, buffer, lo=None, hi=None):
2587
"""Non-blocking version of the blocking put operation.
2589
The put operation can be completed locally by making a call to the
2590
ga.nbwait() routine.
2592
Copies data from local array buffer to the global array section.
2594
The local array is assumed to be have the same number of dimensions as the
2595
global array. Any detected inconsitencies/errors in input arguments are
2598
This is a one-sided operation.
2605
lo : 1D array-like of integers
2606
lower bound patch coordinates, inclusive
2607
hi : 1D array-like of integers
2608
higher bound patch coordinates, exclusive
2611
return _put_common(g_a, buffer, lo, hi, True, False)
2613
def nbwait(ga_nbhdl_t nbhandle):
2614
"""This function completes a non-blocking one-sided operation locally.
2616
Waiting on a nonblocking put or an accumulate operation assures that data
2617
was injected into the network and the user buffer can be now be reused.
2618
Completing a get operation assures data has arrived into the user memory
2619
and is ready for use. Wait operation ensures only local completion. Unlike
2620
their blocking counterparts, the nonblocking operations are not ordered
2621
with respect to the destination. Performance being one reason, the other
2622
reason is that by ensuring ordering we incur additional and possibly
2623
unnecessary overhead on applications that do not require their operations
2624
to be ordered. For cases where ordering is necessary, it can be done by
2625
calling a fence operation. The fence operation is provided to the user to
2626
confirm remote completion if needed.
2629
NGA_NbWait(&nbhandle)
2632
"""Returns the number of dimensions in array represented by the handle g_a.
2634
This operation is local.
2640
:returns: the number of dimensions in the array g_a
2646
"""Returns the number of the GA compute (user) processes.
2648
This operation is local.
2650
:returns: the number of GA compute (user) processes
2656
"""Returns the GA process id (0, ..., ga.nnodes()-1) of the requesting
2659
This operation is local.
2661
:returns: the GA process id
2667
"""Computes the 1-norm of the matrix or vector g_a.
2669
This is a collective operation.
2675
:returns: the 1-norm of the matrix or vector g_a (as a float)
2682
def norm_infinity(int g_a):
2683
"""Computes the 1-norm of the matrix or vector g_a.
2685
This is a collective operation.
2691
:returns: the 1-norm of the matrix or vector g_a
2695
GA_Norm_infinity(g_a, &nm)
2698
def enum(int g_a, lo=None, hi=None, start=None, inc=None):
2699
"""This subroutine enumerates the values of an array between elements lo
2700
and hi starting with the value istart and incrementing each subsequent
2703
This operation is only applicable to 1-dimensional arrays.
2705
An example of its use is shown below:
2707
ga.enum(g_a, 1, n, 7, 2)
2708
# g_a: 7 9 11 13 15 17 19 21 23 ...
2710
This is a collective operation.
2715
lo : 1D array-like of integers
2717
hi : 1D array-like of integers
2720
starting value of enumeration (converted to appropriate type)
2722
increment value (converted to appropriate type)
2725
cdef np.ndarray[np.int64_t, ndim=1] hi_nd = inquire_dims(g_a)-1
2726
cdef int64_t c_lo=0, c_hi=hi_nd[0]
2727
cdef int gtype=inquire_type(g_a)
2728
cdef int istart, iinc
2729
cdef long lstart, linc
2730
cdef long long llstart, llinc
2731
cdef float fstart, finc
2732
cdef double dstart, dinc
2733
cdef long double ldstart, ldinc
2734
cdef SingleComplex fcstart, fcinc
2735
cdef DoubleComplex dcstart, dcinc
2736
cdef void *vstart=NULL, *vinc=NULL
2745
vstart = _convert_multiplier(gtype, start,
2746
&istart, &lstart, &llstart,
2747
&fstart, &dstart, &ldstart,
2749
vinc = _convert_multiplier(gtype, inc,
2750
&iinc, &linc, &llinc,
2751
&finc, &dinc, &ldinc,
2753
GA_Patch_enum64(g_a, c_lo, c_hi, vstart, vinc)
2755
def pack(int g_src, int g_dst, int g_msk, lo=None, hi=None):
2756
"""The pack subroutine is designed to compress the values in the source
2757
vector g_src into a smaller destination array g_dst based on the values
2758
in an integer mask array g_msk. The values lo and hi denote the range of
2759
elements that should be compressed and the number of values placed in the
2760
compressed array is returned. This operation is the complement of the
2761
ga.unpack operation. An example is shown below::
2763
icount = ga.pack(g_src, g_dst, g_msk, 1, n);
2764
# g_msk: 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 1 0
2765
# g_src: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
2766
# g_dst: 1 7 9 12 15 16
2769
This is a collective operation.
2773
handle for source arrray
2775
handle for destination array
2777
handle for integer array representing mask
2778
lo : 1D array-like of integers
2779
low value of range on which operation is performed
2780
hi : 1D array-like of integers
2781
hi value of range on which operation is performed
2784
cdef np.ndarray[np.int64_t, ndim=1] hi_nd = inquire_dims(g_src)-1
2785
cdef int64_t c_lo=0, c_hi=hi_nd[0], icount
2790
GA_Pack64(g_src, g_dst, g_msk, lo, hi, &icount)
2793
def periodic_acc(int g_a, buffer, lo=None, hi=None, alpha=None):
2794
"""Periodic version of ga.acc.
2796
The indices can extend beyond the array boundary/dimensions in which case
2797
the libray wraps them around.
2799
Combines data from buffer with data in the global array patch.
2801
The buffer array is assumed to be have the same number of
2802
dimensions as the global array. If the buffer is not contiguous, a
2803
contiguous copy will be made.
2805
global array section (lo[],hi[]) += alpha * buffer
2807
This is a one-sided and atomic operation.
2813
must be contiguous and have same number of elements as patch
2814
lo : 1D array-like of integers
2815
lower bound patch coordinates, inclusive
2816
hi : array-like of integers
2817
higher bound patch coordinates, exclusive
2819
multiplier (converted to the appropriate type)
2822
_acc_common(g_a, buffer, lo, hi, alpha, False, True)
2824
def periodic_get(int g_a, lo=None, hi=None, np.ndarray buffer=None):
2825
"""Periodic version of ga.get.
2827
The indices can extend beyond the array boundary/dimensions in which case
2828
the libray wraps them around.
2830
Copies data from global array section to the local array buffer.
2832
The local array is assumed to be have the same number of dimensions as the
2833
global array. Any detected inconsitencies/errors in the input arguments
2836
This is a one-sided operation.
2841
lo : 1D array-like of integers
2842
lower bound patch coordinates, inclusive
2843
hi : array-like of integers
2844
higher bound patch coordinates, exclusive
2846
must be contiguous and have same number of elements as patch
2848
:returns: The local array buffer.
2851
return _get_common(g_a, lo, hi, buffer, False, True)
2853
def periodic_put(int g_a, buffer, lo=None, hi=None):
2854
"""Periodic version of ga.put.
2856
The indices can extend beyond the array boundary/dimensions in which case
2857
the libray wraps them around.
2859
Copies data from local array buffer to the global array section.
2861
The local array is assumed to be have the same number of dimensions as the
2862
global array. Any detected inconsitencies/errors in input arguments are
2865
This is a one-sided operation.
2872
lo : 1D array-like of integers
2873
lower bound patch coordinates, inclusive
2874
hi : array-like of integers
2875
higher bound patch coordinates, exclusive
2878
_put_common(g_a, buffer, lo, hi, False, True)
2880
def pgroup_absolute_id(int pgroup, int pid):
2882
return GA_Pgroup_absolute_id(pgroup, pid)
2884
def pgroup_brdcst(int pgroup, buffer, int root=0):
2885
"""Broadcast from process root to all other processes in the same group.
2887
If the buffer is not contiguous, an error is raised. This operation is
2888
provided only for convenience purposes: it is available regardless of the
2889
message-passing library that GA is running with.
2891
This is a collective operation.
2895
processor group handle
2899
the process which is sending
2901
:returns: The buffer in case a temporary was passed in.
2904
cdef np.ndarray buffer_nd
2905
buffer_nd = np.asarray(buffer)
2906
if not buffer_nd.flags['C_CONTIGUOUS']:
2907
raise ValueError, "the buffer must be contiguous"
2908
#if buffer_nd.ndim != 1:
2909
# raise ValueError, "the buffer must be one-dimensional"
2910
GA_Pgroup_brdcst(pgroup, buffer_nd.data,
2911
buffer_nd.size*buffer_nd.itemsize, root)
2914
def pgroup_create(list):
2915
"""Creates a processor group.
2917
At present, it must be invoked by all processors in the current default
2918
processor group. The list of processors use the indexing scheme of the
2919
default processor group. If the default processor group is the world
2920
group, then these indices are the usual processor indices. This function
2921
returns a process group handle that can be used to reference this group by
2924
This is a collective operation on the default processor group.
2927
cdef np.ndarray[np.int32_t, ndim=1] list_nd
2928
list_nd = _inta32(list)
2929
return GA_Pgroup_create(<int*>list_nd.data, len(list_nd))
2931
def pgroup_destroy(int pgroup):
2932
"""Frees up a processor group handle.
2934
This is a collective operation on the default processor group.
2936
:returns: True if the handle was previously active. False if the handle was not previously active.
2939
if 0 == GA_Pgroup_destroy(pgroup):
2943
def pgroup_get_default():
2944
"""Returns a handle to the default processor group.
2946
The return value can then be used to create a global array using one of
2947
the ga.create or ga.set_pgroup calls.
2949
This is a local operation.
2952
return GA_Pgroup_get_default()
2954
def pgroup_get_mirror():
2955
"""Returns a handle to the mirrored processor group.
2957
The return value can then be used to create a global array using one of
2958
the ga.create or ga.set_pgroup calls.
2960
This is a local operation.
2963
return GA_Pgroup_get_mirror()
2965
def pgroup_get_world():
2966
"""Returns a handle to the world processor group.
2968
The return value can then be used to create a global array using one of
2969
the ga.create or ga.set_pgroup calls.
2971
This is a local operation.
2974
return GA_Pgroup_get_world()
2976
def pgroup_gop(int pgroup, X, char *op):
2977
"""Global operation.
2979
X(1:N) is a vector present on each process in the group. gop 'sums'
2980
elements of X accross all nodes using the commutative operator op. The
2981
result is broadcast to all nodes. Supported operations include '+', '*',
2982
'max', 'min', 'absmax', 'absmin'. The use of lowerecase for operators is
2985
X must be a contiguous array-like. X is not guaranteed to be modified
2988
>>> value = ga.gop((1,2,3), "+")
2990
This operation is provided only for convenience purposes: it is available
2991
regardless of the message-passing library that GA is running with.
2993
This is a collective operation.
2996
cdef np.ndarray X_nd = np.asarray(X)
2997
if not X_nd.flags['C_CONTIGUOUS']:
2998
raise ValueError, "X must be contiguous"
2999
if X_nd.dtype == np.intc:
3000
GA_Pgroup_igop(pgroup, <int*>X_nd.data, len(X_nd), op)
3001
elif X_nd.dtype == np.long:
3002
GA_Pgroup_lgop(pgroup, <long*>X_nd.data, len(X_nd), op)
3003
elif X_nd.dtype == np.longlong:
3004
GA_Pgroup_llgop(pgroup, <long long*>X_nd.data, len(X_nd), op)
3005
elif X_nd.dtype == np.single:
3006
GA_Pgroup_fgop(pgroup, <float*>X_nd.data, len(X_nd), op)
3007
elif X_nd.dtype == np.double:
3008
GA_Pgroup_dgop(pgroup, <double*>X_nd.data, len(X_nd), op)
3009
elif X_nd.dtype == np.complex64:
3010
GA_Pgroup_cgop(pgroup, <SingleComplex*>X_nd.data, len(X_nd), op)
3011
elif X_nd.dtype == np.complex128:
3012
GA_Pgroup_zgop(pgroup, <DoubleComplex*>X_nd.data, len(X_nd), op)
3014
raise TypeError, "type not supported by ga.pgroup_gop %s" % X_nd.dtype
3017
def pgroup_gop_add(int pgroup, X):
3018
return pgroup_gop(pgroup, X, "+")
3020
def pgroup_gop_multiply(int pgroup, X):
3021
return pgroup_gop(pgroup, X, "*")
3023
def pgroup_gop_max(int pgroup, X):
3024
return pgroup_gop(pgroup, X, "max")
3026
def pgroup_gop_min(int pgroup, X):
3027
return pgroup_gop(pgroup, X, "min")
3029
def pgroup_gop_absmax(int pgroup, X):
3030
return pgroup_gop(pgroup, X, "absmax")
3032
def pgroup_gop_absmin(int pgroup, X):
3033
return pgroup_gop(pgroup, X, "absmin")
3035
def pgroup_nnodes(int pgroup=-1):
3036
"""Returns the number of processors contained in the group specified by
3039
This is a local local operation.
3047
pgroup = pgroup_get_default()
3048
return GA_Pgroup_nnodes(pgroup)
3050
def pgroup_nodeid(int pgroup=-1):
3051
"""Returns the relative index of the processor in the processor group
3052
specified by pgroup.
3054
This index will generally differ from the absolute processor index
3055
returned by ga.nodeid if the processor group is not the world group.
3057
This is a local operation.
3065
pgroup = pgroup_get_default()
3066
return GA_Pgroup_nodeid(pgroup)
3068
def pgroup_set_default(int pgroup=-1):
3069
"""Resets the default processor group on a collection of processors.
3071
All processors in the group referenced by p_handle must make a call to
3072
this function. Any standard global array call that is made after resetting
3073
the default processor group will be restricted to processors in that
3074
group. Global arrays that are created after resetting the default
3075
processor group will only be defined on that group and global operations
3076
such as ga.sync or ga.gop will be restricted to processors in that group.
3077
The ga.pgroup_set_default call can be used to rapidly convert large
3078
applications, written with GA, into routines that run on processor groups.
3080
The default processor group can be overridden by using GA calls that
3081
require an explicit group handle as one of the arguments.
3083
This is a collective operation on the group represented by the handle
3088
pgroup = pgroup_get_world()
3089
GA_Pgroup_set_default(pgroup)
3091
def pgroup_split(int pgroup, int num_group):
3093
return GA_Pgroup_split(pgroup, num_group)
3095
def pgroup_split_irreg(int pgroup, int color):
3097
return GA_Pgroup_split_irreg(pgroup, color)
3099
def pgroup_sync(int pgroup=-1):
3100
"""Executes a synchronization group across the processors in the processor
3101
group specified by pgroup.
3103
Nodes outside this group are unaffected.
3105
This is a collective operation on the processor group specified by
3110
pgroup = pgroup_get_default()
3111
GA_Pgroup_sync(pgroup)
3113
def print_distribution(int g_a):
3114
"""Prints the array distribution.
3116
This is a collective operation.
3119
GA_Print_distribution(g_a)
3121
def print_file(int g_a, file):
3122
"""Prints an entire array to a file.
3124
This is a collective operation.
3128
file-like object which must implement fileno(), or a string
3133
#GA_Print_file(file.fileno(), g_a)
3134
raise NotImplementedError
3136
def print_patch(int g_a, lo=None, hi=None, bint pretty=True):
3137
"""Prints a patch of g_a array to the standard output.
3139
If pretty is False then output is printed in a dense fashion. If
3140
pretty is True then output is formatted and rows/columns labeled.
3142
This is a collective operation.
3145
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd
3147
lo_nd,hi_nd = _lohi(g_a,lo,hi)
3150
NGA_Print_patch64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data, apretty)
3153
"""This non-collective (MIMD) operation prints information about:
3155
* number of calls to the GA create/duplicate, destroy, get, put,
3156
* scatter, gather, and read_and_inc operations
3157
* total amount of data moved in the GA primitive operations
3158
* amount of data moved in GA primitive operations to logicaly
3160
* maximum memory consumption in global arrays, and
3161
* number of requests serviced in the interrupt-driven
3162
* implementations by the calling process.
3164
This operation is local.
3169
def print_stdout(int g_a):
3170
"""Prints an entire array to the standard output."""
3173
def proc_topology(int g_a, int proc):
3174
"""Based on the distribution of an array associated with handle g_a,
3175
determines coordinates of the specified processor in the virtual processor
3176
grid corresponding to the distribution of array g_a.
3178
The numbering starts from 0. The values of -1 means that the processor
3179
doesn't 'own' any section of array represented by g_a.
3181
This operation is local.
3184
cdef int ndim = GA_Ndim(g_a)
3185
cdef np.ndarray[np.int32_t, ndim=1] coord
3186
coord = np.zeros(ndim, dtype=np.intc)
3187
NGA_Proc_topology(g_a, proc, <int*>coord.data)
3190
def put(int g_a, buffer, lo=None, hi=None):
3191
"""Copies data from local array buffer to the global array section.
3193
The local array is assumed to be have the same number of dimensions as the
3194
global array. Any detected inconsitencies/errors in input arguments are
3197
This is a one-sided operation.
3204
lo : 1D array-like of integers
3205
lower bound patch coordinates, inclusive
3206
hi : array-like of integers
3207
higher bound patch coordinates, exclusive
3210
_put_common(g_a, buffer, lo, hi)
3212
cdef _put_common(int g_a, buffer, lo=None, hi=None,
3213
bint nb=False, bint periodic=False, skip=None):
3214
"""Copies data from local array buffer to the global array section.
3216
The local array is assumed to have the same shape as the requested region,
3217
or the local array can be 1-dimensional so long as it has the same number
3218
of elements as the requested region. Any detected inconsitencies raise a
3221
This is a one-sided operation.
3228
should either be 1D and len(buffer)==np.prod(hi-lo), or
3229
np.all(buffer.shape == hi-lo) i.e. buffer is 1D and same size as
3230
requested region or buffer is the same shape as requested region
3231
lo : 1D array-like of integers
3232
lower bound patch coordinates, inclusive
3233
hi : array-like of integers
3234
higher bound patch coordinates, exclusive
3236
whether this call is non-blocking (see ga.nbget)
3238
whether this call is periodic (see ga.periodic_get)
3239
skip : 1D array-like of integers
3240
strides for each dimension
3242
:returns: None, usually. However if nb=True, the nonblocking handle is returned.
3245
cdef np.ndarray buffer_nd
3246
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd, ld_nd, shape, skip_nd
3247
cdef int gtype=inquire_type(g_a)
3248
cdef ga_nbhdl_t nbhandle
3249
dtype = _to_dtype[gtype]
3250
lo_nd,hi_nd = _lohi(g_a,lo,hi)
3251
shape = hi_nd-lo_nd+1
3255
skip_nd = _inta64(skip)
3256
shape = (hi_nd-lo_nd)/skip_nd+1
3257
buffer_nd = np.asarray(buffer, dtype=dtype)
3258
if buffer_nd.dtype != dtype:
3259
raise ValueError, "buffer is wrong type :: buffer=%s != %s" % (
3260
buffer.dtype, dtype)
3261
# Due to GA restrictions, buffer must not have negative strides
3262
# and buffer's last stride must be same as itemsize
3263
strides = [buffer_nd.strides[i]/buffer_nd.itemsize
3264
for i in range(buffer_nd.ndim)]
3265
if (strides and (strides[-1] != 1 or np.any(np.asarray(strides) < 0))):
3266
buffer_nd = np.ascontiguousarray(buffer_nd)
3267
# we allow 1-d "flat" buffers in addition to buffers matching the shape of
3268
# the requested region
3269
if buffer_nd.ndim == 1:
3270
if buffer_nd.size != np.prod(shape):
3271
raise ValueError, ('buffer size does not match shape :: '
3272
'buffer.size=%s != np.prod(shape)=%s' % (
3273
buffer_nd.size, np.prod(shape)))
3276
buffer_shape = [buffer_nd.shape[i] for i in range(buffer_nd.ndim)]
3277
if not np.all(buffer_shape == shape):
3278
raise ValueError, ('buffer shape does not match request shape :: '
3279
'buffer_shape=%s != shape=%s' % (
3280
buffer_shape, shape))
3281
ld_nd = np.asarray([strides[i]/strides[i+1]
3282
for i in range(buffer_nd.ndim-1)], dtype=np.int64)
3284
NGA_NbPut64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
3285
<void*>buffer_nd.data, <int64_t*>ld_nd.data, &nbhandle)
3288
NGA_Periodic_put64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
3289
<void*>buffer_nd.data, <int64_t*>ld_nd.data)
3290
elif skip is not None:
3291
NGA_Strided_put64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
3292
<int64_t*>skip_nd.data,
3293
<void*>buffer_nd.data, <int64_t*>ld_nd.data)
3295
NGA_Put64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data,
3296
<void*>buffer_nd.data, <int64_t*>ld_nd.data)
3298
def randomize(int g_a, val=None):
3299
"""Fill array with random values in [0,val)."""
3300
cdef int gtype=inquire_type(g_a)
3303
cdef long long llval
3306
cdef long double ldval
3307
cdef SingleComplex fcval
3308
cdef DoubleComplex dcval
3309
cdef void *vval=NULL
3312
vval = _convert_multiplier(gtype, val,
3313
&ival, &lval, &llval,
3314
&fval, &dval, &ldval,
3316
GA_Randomize(g_a, vval)
3318
def read_inc(int g_a, subscript, long inc=1):
3319
"""Atomically read and increment an element in an integer array.
3321
This is a one-sided and atomic operation.
3326
subscript : 1D array-like of integers
3327
index for the referenced element
3332
cdef np.ndarray[np.int64_t, ndim=1] subscript_nd
3333
subscript_nd = _inta64(subscript)
3334
return NGA_Read_inc64(g_a, <int64_t*>subscript_nd.data, inc)
3336
def register_dtype(dtype):
3337
"""Creates a new data type based on the given dtype.
3341
the numpy dtype to register
3345
dtype = np.dtype(dtype) # just in case it's not really a dtype instance
3346
gatype = NGA_Register_type(dtype.itemsize)
3347
_to_dtype[gatype] = dtype
3350
def register_type(size_t bytes):
3351
"""Creates a new data type of size bytes.
3355
the size of the new data type
3358
return NGA_Register_type(bytes)
3360
def recip(int g_a, lo=None, hi=None):
3361
"""Take element-wise reciprocal of the array or patch.
3363
This is a collective operation.
3366
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd
3367
if lo is None and hi is None:
3370
lo_nd,hi_nd = _lohi(g_a,lo,hi)
3371
GA_Recip_patch64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data)
3373
def release(int g_a, lo=None, hi=None):
3374
"""Releases access to a global array when the data was read only.
3376
Your code should look like:
3378
array = ga.access(g_a)
3379
# <operate on the data referenced by ptr>
3382
NOTE: see restrictions specified for ga.access
3384
This operation is local.
3387
_release_common(g_a, lo, hi, False)
3389
def release_block(int g_a, int index):
3390
"""Releases access to the block of data specified by the integer index
3391
when data was accessed as read only.
3393
This is only applicable to block-cyclic data distributions created using
3394
the simple block-cyclic distribution. This is a local operation.
3397
NGA_Release_block(g_a, index)
3399
def release_block_grid(int g_a, subscript):
3400
"""Releases access to the block of data specified by the subscript array
3401
when data was accessed as read only.
3403
This is only applicable to block-cyclic data distributions created using
3404
the SCALAPACK data distribution.
3406
This is a local operation.
3409
cdef np.ndarray[np.int32_t, ndim=1] subscript_nd
3410
subscript_nd = _inta32(subscript)
3411
NGA_Release_block_grid(g_a, <int*>subscript_nd.data)
3413
def release_block_segment(int g_a, int proc):
3414
"""Releases access to the block of locally held data for a block-cyclic
3415
array, when data was accessed as read-only.
3417
This is a local operation.
3420
NGA_Release_block_segment(g_a, proc)
3422
cdef _release_common(int g_a, lo, hi, bint update):
3424
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd, lo_dst, hi_dst
3425
# first things first, if no data is owned, return silently
3426
lo_dst,hi_dst = distribution(g_a)
3427
# convet hi_dst back to GA inclusive indexing convention
3429
if lo_dst[0] < 0 or hi_dst[0] < 0:
3440
if np.sometrue(lo_nd>hi_nd):
3441
raise ValueError,"lo>hi lo=%s hi=%s"%(lo_nd,hi_nd)
3442
if np.sometrue(lo_nd<lo_dst):
3443
raise ValueError,"lo out of bounds lo_dst=%s lo=%s"%(lo_dst,lo_nd)
3444
if np.sometrue(hi_nd>hi_dst):
3445
raise ValueError,"hi out of bounds hi_dst=%s hi=%s"%(hi_dst,hi_nd)
3447
NGA_Release_update64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data)
3449
NGA_Release64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data)
3451
def release_ghost_element(int g_a, subscript):
3452
"""Releases access to the locally held data for an array with ghost
3453
elements, when data was accessed as read-only.
3455
This is a local operation.
3458
cdef np.ndarray[np.int64_t, ndim=1] subscript_nd
3459
subscript_nd = _inta64(subscript)
3460
NGA_Release_ghost_element64(g_a, <int64_t*>subscript_nd.data)
3462
def release_ghosts(int g_a):
3463
"""Releases access to the locally held block of data containing ghost
3464
elements, when data was accessed as read-only.
3466
This is a local operation.
3469
NGA_Release_ghosts(g_a)
3471
def release_update(int g_a, lo=None, hi=None):
3472
"""Releases access to the data.
3474
It must be used if the data was accessed for writing.
3475
NOTE: see restrictions specified for ga.access.
3477
This operation is local.
3480
_release_common(g_a, lo, hi, True)
3482
def release_update_block(int g_a, int index):
3483
"""Releases access to the block of data specified by the integer index
3484
when data was accessed in read-write mode.
3486
This is only applicable to block-cyclic data distributions created using
3487
the simple block-cyclic distribution.
3489
This is a local operation.
3492
NGA_Release_update_block(g_a, index)
3494
def release_update_block_grid(int g_a, subscript):
3495
"""Releases access to the block of data specified by the subscript array
3496
when data was accessed in read-write mode.
3498
This is only applicable to block-cyclic data distributions created using
3499
the SCALAPACK data distribution.
3501
This is a local operation.
3504
cdef np.ndarray[np.int32_t, ndim=1] subscript_nd
3505
subscript_nd = _inta32(subscript)
3506
NGA_Release_update_block_grid(g_a, <int*>subscript_nd.data)
3508
def release_update_block_segment(int g_a, int proc):
3509
"""Releases access to the block of locally held data for a block-cyclic
3510
array, when data was accessed as read-only.
3512
This is a local operation.
3515
NGA_Release_update_block_segment(g_a, proc)
3517
def release_update_ghost_element(int g_a, subscript):
3518
"""Releases access to the locally held data for an array with ghost
3519
elements, when data was accessed in read-write mode.
3521
This is a local operation.
3524
cdef np.ndarray[np.int64_t, ndim=1] subscript_nd
3525
subscript_nd = _inta64(subscript)
3526
NGA_Release_update_ghost_element64(g_a, <int64_t*>subscript_nd.data)
3529
def release_update_ghosts(int g_a):
3530
"""Releases access to the locally held block of data containing ghost
3531
elements, when data was accessed in read-write mode.
3533
This is a local operation.
3536
NGA_Release_update_ghosts(g_a)
3538
def scale(int g_a, value, lo=None, hi=None):
3539
"""Scales an array by the constant s.
3541
Note that the library is unable to detect errors when the pointed value is
3542
of different type than the array.
3544
This is a collective operation.
3547
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd
3548
cdef int gtype=inquire_type(g_a)
3551
cdef long long llvalue
3554
cdef long double ldvalue
3555
cdef SingleComplex fcvalue
3556
cdef DoubleComplex dcvalue
3558
vvalue = _convert_multiplier(gtype, value,
3559
&ivalue, &lvalue, &llvalue,
3560
&fvalue, &dvalue, &ldvalue,
3562
if lo is None and hi is None:
3563
GA_Scale(g_a, vvalue)
3565
lo_nd,hi_nd = _lohi(g_a,lo,hi)
3567
g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data, vvalue)
3569
def scale_rows(int g_a, int g_v):
3570
"""Scales the rows of this matrix g_a using the vector g_v.
3572
This is a collective operation.
3575
GA_Scale_rows(g_a, g_v)
3577
def scale_cols(int g_a, int g_v):
3578
"""Scales the columns of this matrix g_a using the vector g_v.
3580
This is a collective operation.
3583
GA_Scale_cols(g_a, g_v)
3585
def scan_add(int g_src, int g_dst, int g_msk, lo=None, hi=None,
3587
"""Adds successive elements in a source vector g_src and put the results
3588
in a destination vector g_dst.
3590
The addition will restart based on the values of the integer mask vector
3591
g_msk. The scan is performed within the range specified by the integer
3592
values lo and hi. Note that this operation can only be applied to
3593
1-dimensional arrays. The excl flag determines whether the sum starts with
3594
the value in the source vector corresponding to the location of a 1 in the
3595
mask vector (excl=False) or whether the first value is set equal to 0
3596
(excl=True). Some examples of this operation are given below.
3598
ga.scan_add(g_src, g_dst, g_msk, 0, n, False);
3599
g_msk: 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 1 0
3600
g_src: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
3601
g_dst: 1 3 6 10 16 21 7 15 9 19 30 12 25 39 15 16 33
3603
ga.scan_add(g_src, g_dst, g_msk, 0, n, True);
3604
g_msk: 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 1 0
3605
g_src: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
3606
g_dst: 0 1 3 6 10 15 0 7 0 9 19 0 12 25 0 0 16
3608
This is a collective operation.
3612
handle for source arrray
3614
handle for destination array
3616
handle for integer array representing mask
3617
lo : 1D array-like of integers
3618
low value of range on which operation is performed
3619
hi : 1D array-like of integers
3620
hi value of range on which operation is performed
3622
whether the first value is set to 0 (see above)
3625
cdef np.ndarray[np.int64_t, ndim=1] hi_nd = inquire_dims(g_src)-1
3626
cdef int64_t c_lo=0, c_hi=hi_nd[0]
3634
GA_Scan_add64(g_src, g_dst, g_msk, c_lo, c_hi, c_excl)
3636
def scan_copy(int g_src, int g_dst, int g_msk, lo=None, hi=None):
3637
"""This subroutine does a segmented scan-copy of values in the source
3638
array g_src into a destination array g_dst with segments defined by
3639
values in the integer mask array g_msk. The scan-copy operation is only
3640
applied to the range between the lo and hi indices. This operation is
3641
restriced to 1-dimensional arrays. The resulting destination array will
3642
consist of segments of consecutive elements with the same value. An
3643
example is shown below
3645
GA_Scan_copy(g_src, g_dst, g_msk, 0, n);
3646
g_msk: 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 1 0
3647
g_src: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
3648
g_dst: 1 1 1 1 1 1 7 7 9 9 9 12 12 12 15 16 16
3650
This is a collective operation.
3653
cdef np.ndarray[np.int64_t, ndim=1] hi_nd = inquire_dims(g_src)-1
3654
cdef int64_t c_lo=0, c_hi=hi_nd[0]
3660
GA_Scan_copy64(g_src, g_dst, g_msk, c_lo, c_hi)
3662
def scatter(int g_a, values, subsarray):
3663
"""Scatters array elements from a global array into a local array.
3665
subsarray will be converted to an ndarray if it is not one already. A
3666
two-dimensional array is allowed so long as its shape is (n,ndim) where n
3667
is the number of elements to gather and ndim is the number of dimensions
3668
of the target array. Also, subsarray must be contiguous.
3670
For example, if the subsarray were two-dimensional::
3673
v[k] = g_a[subsarray[k,0],subsarray[k,1],subsarray[k,2]...]
3675
For example, if the subsarray were one-dimensional::
3679
v[k] = g_a[subsarray[base+0],subsarray[base+1],subsarray[base+2]...]
3681
This is a one-sided operation.
3684
cdef np.ndarray[np.int64_t, ndim=1] subsarray1_nd = None
3685
cdef np.ndarray[np.int64_t, ndim=2] subsarray2_nd = None
3686
cdef np.ndarray values_nd = None
3687
cdef int gtype = inquire_type(g_a)
3688
cdef int ndim = GA_Ndim(g_a)
3692
subsarray1_nd = np.ascontiguousarray(subsarray, dtype=np.int64)
3693
n = len(subsarray1_nd) / ndim
3695
subsarray1_nd = None
3697
subsarray2_nd = np.ascontiguousarray(subsarray, dtype=np.int64)
3698
n = len(subsarray2_nd) # length of first dimension of subsarray2_nd
3700
raise ValueError, "subsarray must be either 1- or 2-dimensional"
3701
# prepare values array
3702
values_nd = np.asarray(values, dtype=_to_dtype[gtype])
3703
if values_nd.ndim != 1:
3704
raise ValueError, "values must be one-dimensional"
3705
if not values_nd.flags['C_CONTIGUOUS']:
3706
raise ValueError, "values must be contiguous"
3707
if len(values_nd) < n:
3708
raise ValueError, "values was not large enough"
3709
# call the wrapped function
3710
if subsarray1_nd is not None:
3711
NGA_Scatter_flat64(g_a, <void*>values_nd.data,
3712
<int64_t*>subsarray1_nd.data, n)
3713
elif subsarray2_nd is not None:
3714
NGA_Scatter_flat64(g_a, <void*>values_nd.data,
3715
<int64_t*>subsarray2_nd.data, n)
3717
raise ValueError, "how did this happen?"
3719
def scatter_acc(int g_a, values, subsarray, alpha=None):
3720
"""Scatters array elements from a global array into a local array.
3722
Like scatter, but adds values to existing values in the global array after
3723
multiplying by alpha.
3725
subsarray will be converted to an ndarray if it is not one already. A
3726
two-dimensional array is allowed so long as its shape is (n,ndim) where n
3727
is the number of elements to gather and ndim is the number of dimensions
3728
of the target array. Also, subsarray must be contiguous.
3730
For example, if the subsarray were two-dimensional::
3733
v[k] = g_a[subsarray[k,0],subsarray[k,1],subsarray[k,2]...]
3735
For example, if the subsarray were one-dimensional::
3739
v[k] = g_a[subsarray[base+0],subsarray[base+1],subsarray[base+2]...]
3741
This is a one-sided operation.
3744
cdef np.ndarray[np.int64_t, ndim=1] subsarray1_nd = None
3745
cdef np.ndarray[np.int64_t, ndim=2] subsarray2_nd = None
3746
cdef np.ndarray values_nd = None
3747
cdef int gtype = inquire_type(g_a)
3748
cdef int ndim = GA_Ndim(g_a)
3752
cdef long long llalpha
3755
cdef long double ldalpha
3756
cdef SingleComplex fcalpha
3757
cdef DoubleComplex dcalpha
3758
cdef void *valpha=NULL
3761
subsarray1_nd = np.ascontiguousarray(subsarray, dtype=np.int64)
3762
n = len(subsarray1_nd) / ndim
3764
subsarray1_nd = None
3766
subsarray2_nd = np.ascontiguousarray(subsarray, dtype=np.int64)
3767
n = len(subsarray2_nd) # length of first dimension of subsarray2_nd
3769
raise ValueError, "subsarray must be either 1- or 2-dimensional"
3770
# prepare values array
3771
values_nd = np.asarray(values, dtype=_to_dtype[gtype])
3772
if values_nd.ndim != 1:
3773
raise ValueError, "values must be one-dimensional"
3774
if not values_nd.flags['C_CONTIGUOUS']:
3775
raise ValueError, "values must be contiguous"
3776
if len(values_nd) < n:
3777
raise ValueError, "values was not large enough"
3781
valpha = _convert_multiplier(gtype, alpha,
3782
&ialpha, &lalpha, &llalpha,
3783
&falpha, &dalpha, &ldalpha,
3785
# call the wrapped function
3786
if subsarray1_nd is not None:
3787
NGA_Scatter_acc_flat64(g_a, <void*>values_nd.data,
3788
<int64_t*>subsarray1_nd.data, n, valpha)
3789
elif subsarray2_nd is not None:
3790
NGA_Scatter_acc_flat64(g_a, <void*>values_nd.data,
3791
<int64_t*>subsarray2_nd.data, n, valpha)
3793
raise ValueError, "how did this happen?"
3795
def select_elem(int g_a, char *op):
3796
"""Returns the value and index for an element that is selected by the
3797
specified operator in a global array corresponding to g_a handle.
3799
This is a collective operation.
3801
:returns: the selected element and the array index for the selected element
3804
cdef np.ndarray[np.int64_t, ndim=1] index
3805
cdef int gtype=inquire_type(g_a)
3808
cdef long long llalpha
3811
cdef long double ldalpha
3812
cdef SingleComplex fcalpha
3813
cdef DoubleComplex dcalpha
3814
cdef void *valpha=NULL
3815
valpha = _convert_multiplier(gtype, 0,
3816
&ialpha, &lalpha, &llalpha,
3817
&falpha, &dalpha, &ldalpha,
3819
index = np.ndarray(GA_Ndim(g_a), dtype=np.int64)
3820
NGA_Select_elem64(g_a, op, valpha, <int64_t*>index.data)
3823
elif gtype == C_LONG:
3825
elif gtype == C_LONGLONG:
3826
return llalpha,index
3827
elif gtype == C_FLOAT:
3829
elif gtype == C_DBL:
3831
elif gtype == C_LDBL:
3832
return ldalpha,index
3833
elif gtype == C_SCPL:
3834
return fcalpha,index
3835
elif gtype == C_DCPL:
3836
return dcalpha,index
3838
raise TypeError, "type of g_a not recognized"
3840
def select_elem_min(int g_a):
3841
"""Equivalent to ga.select_elem(g_a, "min")."""
3842
return select_elem(g_a, "min")
3844
def select_elem_max(int g_a):
3845
"""Equivalent to ga.select_elem(g_a, "max")."""
3846
return select_elem(g_a, "max")
3848
def set_array_name(int g_a, char *name):
3849
"""Assigns a unique character string name to a global array handle that
3850
was obtained using the GA_Create_handle function.
3852
This is a collective operation.
3855
GA_Set_array_name(g_a, name)
3857
def set_block_cyclic(int g_a, dims):
3858
"""Creates a global array with a simple block-cyclic data distribution.
3860
The array is broken up into blocks of size dims and each block is numbered
3861
sequentially using a column major indexing scheme. The blocks are then
3862
assigned in a simple round-robin fashion to processors. This is
3863
illustrated in the figure below for an array containing 25 blocks
3864
distributed on 4 processors. Blocks at the edge of the array may be
3865
smaller than the block size specified in dims. In the example below,
3866
blocks 4,9,14,19,20,21,22,23, and 24 might be smaller thatn the remaining
3867
blocks. Most global array operations are insensitive to whether or not a
3868
block-cyclic data distribution is used, although performance may be slower
3869
in some cases if the global array is using a block-cyclic data
3870
distribution. Individual data blocks can be accessesed using the
3871
block-cyclic access functions.
3873
This is a collective operation.
3876
cdef np.ndarray[np.int32_t, ndim=1] dims_nd
3877
dims_nd = _inta32(dims)
3878
GA_Set_block_cyclic(g_a, <int*>dims_nd.data)
3880
def set_block_cyclic_proc_grid(int g_a, block, proc_grid):
3881
"""Creates a global array with a SCALAPACK-type block cyclic data
3884
The user specifies the dimensions of the processor grid in the array
3885
proc_grid. The product of the processor grid dimensions must equal the
3886
number of total number of processors and the number of dimensions in the
3887
processor grid must be the same as the number of dimensions in the global
3888
array. The data blocks are mapped onto the processor grid in a cyclic
3889
manner along each of the processor grid axes. This is illustrated below
3890
for an array consisting of 25 data blocks disributed on 6 processors. The
3891
6 processors are configured in a 3 by 2 processor grid. Blocks at the edge
3892
of the array may be smaller than the block size specified in dims. Most
3893
global array operations are insensitive to whether or not a block-cyclic
3894
data distribution is used, although performance may be slower in some
3895
cases if the global array is using a block-cyclic data distribution.
3896
Individual data blocks can be accessesed using the block-cyclic access
3899
This is a collective operation.
3902
cdef np.ndarray[np.int32_t, ndim=1] block_nd, proc_grid_nd
3903
block_nd = _inta32(block)
3904
proc_grid_nd = _inta32(proc_grid)
3905
GA_Set_block_cyclic_proc_grid(g_a,
3906
<int*>block_nd.data,
3907
<int*>proc_grid_nd.data)
3909
def set_chunk(int g_a, chunk):
3910
"""This function is used to set the chunk array for a global array handle
3911
that was obtained using the GA_Create_handle function. The chunk array is
3912
used to determine the minimum number of array elements assigned to each
3913
processor along each coordinate direction.
3915
This is a collective operation.
3918
cdef np.ndarray[np.int64_t, ndim=1] chunk_nd
3919
chunk_nd = _inta64(chunk)
3920
GA_Set_chunk64(g_a, <int64_t*>chunk_nd.data)
3922
def set_data(int g_a, dims, int type):
3923
"""Sets the array dimension, the coordinate dimensions, and the data type
3924
assigned to a global array handle obtained using the ga.create_handle
3927
This is a collective operation.
3930
cdef np.ndarray[np.int64_t, ndim=1] dims_nd
3931
dims_nd = _inta64(dims)
3932
GA_Set_data64(g_a, len(dims_nd), <int64_t*>dims_nd.data, type)
3934
def set_debug(bint debug):
3935
"""Sets an internal flag in the GA library to either True or False.
3937
The value of this flag can be recovered at any time using the ga.get_debug
3938
function. The flag is set to false when the the GA library is initialized.
3939
This can be useful in a number of debugging situations, especially when
3940
examining the behavior of routines that are called in multiple locations
3943
This is a local operation.
3948
def set_diagonal(int g_a, int g_v):
3949
"""Sets the diagonal elements of this matrix g_a with the elements of the
3952
This is a collective operation.
3955
GA_Set_diagonal(g_a, g_v)
3957
def set_ghosts(int g_a, width):
3958
"""Sets the ghost cell widths for a global array handle that was obtained
3959
using the ga.create_handle function.
3961
The ghosts cells widths indicate how many ghost cells are used to pad the
3962
locally held array data along each dimension. The padding can be set
3963
independently for each coordinate dimension.
3965
This is a collective operation.
3968
cdef np.ndarray[np.int64_t, ndim=1] width_nd
3969
width_nd = _inta64(width)
3970
GA_Set_ghosts64(g_a, <int64_t*>width_nd.data)
3972
def set_irreg_distr(int g_a, mapc, nblock):
3973
"""Partitions the array data among the individual processors for a global
3974
array handle obtained using the ga.create_handle function.
3976
The distribution is specified as a Cartesian product of distributions for
3977
each dimension. For example, the following figure demonstrates
3978
distribution of a 2-dimensional array 8x10 on 6 (or more) processors.
3979
nblock(2)={3,2}, the size of mapc array is s=5 and array mapc contains the
3980
following elements mapc={1,3,7, 1, 6}. The distribution is nonuniform
3981
because, P1 and P4 get 20 elements each and processors P0,P2,P3, and P5
3982
only 10 elements each.
3992
The array width() is used to control the width of the ghost cell boundary
3993
around the visible data on each processor. The local data of the global
3994
array residing on each processor will have a layer width(n) ghosts cells
3995
wide on either side of the visible data along the dimension n.
3997
This is a collective operation.
4000
cdef np.ndarray[np.int64_t, ndim=1] mapc_nd, nblock_nd
4001
mapc_nd = _inta64(mapc)
4002
nblock_nd = _inta64(nblock)
4003
GA_Set_irreg_distr64(g_a, <int64_t*>mapc_nd.data, <int64_t*>nblock_nd.data)
4005
def set_memory_limit(size_t limit):
4006
"""Sets the amount of memory to be used (in bytes) per process.
4008
This is a local operation.
4012
the amount of memory in bytes per process
4015
GA_Set_memory_limit(limit)
4017
def set_pgroup(int g_a, int pgroup):
4018
"""Sets the processor configuration assigned to a global array handle that
4019
was obtained using the ga.create_handle function.
4021
It can be used to create mirrored arrays by using the mirrored array
4022
processor configuration in this function call. It can also be used to
4023
create an array on a processor group by using a processor group handle in
4026
This is a collective operation.
4029
GA_Set_pgroup(g_a, pgroup)
4031
def shift_diagoal(int g_a, value=None):
4032
"""Adds this constant to the diagonal elements of the matrix.
4034
This is a collective operation.
4039
cdef long long llvalue
4042
cdef long double ldvalue
4043
cdef SingleComplex fcvalue
4044
cdef DoubleComplex dcvalue
4046
cdef int gtype=inquire_type(g_a)
4049
vvalue = _convert_multiplier(gtype, value,
4050
&ivalue, &lvalue, &llvalue,
4051
&fvalue, &dvalue, &ldvalue,
4053
GA_Shift_diagonal(g_a, vvalue)
4055
def solve(int g_a, int g_b):
4056
"""Solves a system of linear equations A * X = B.
4058
It first will call the Cholesky factorization routine and, if sucessfully,
4059
will solve the system with the Cholesky solver. If Cholesky will be not be
4060
able to factorize A, then it will call the LU factorization routine and
4061
will solve the system with forward/backward substitution. On exit B will
4062
contain the solution X.
4064
This is a collective operation.
4066
:returns: 0 if Cholesky factoriztion was succesful. >0 if the leading minor of this order is not positive definite, Cholesky factorization could not be completed and LU factoriztion was used
4069
return GA_Solve(g_a, g_b)
4071
def spd_invert(int g_a):
4072
"""Compute the inverse of a double precision using the Cholesky
4073
factorization of a NxN double precision symmetric positive definite matrix
4074
A stored in the global array represented by g_a. On successful exit, A
4075
will contain the inverse.
4077
This is a collective operation.
4079
:returns: 0 if successful exit; >0 if the leading minor of this order is not positive definite and the factorization could not be completed; <0 if it returns the index i of the (i,i) element of the factor L/U that is zero and the inverse could not be computed
4082
return GA_Spd_invert(g_a)
4084
def step_max(int g_a, int g_b, alo=None, ahi=None, blo=None, bhi=None):
4085
"""Calculates the largest multiple of a vector g_b that can be added to
4086
this vector g_a while keeping each element of this vector non-negative.
4088
This is a collective operation.
4091
cdef np.ndarray[np.int64_t, ndim=1] alo_nd, ahi_nd
4092
cdef np.ndarray[np.int64_t, ndim=1] blo_nd, bhi_nd
4094
if (alo is None and ahi is None
4095
and blo is None and bhi is None):
4096
GA_Step_max(g_a, g_b, &step)
4098
alo_nd,ahi_nd = _lohi(g_a,alo,ahi)
4099
blo_nd,bhi_nd = _lohi(g_b,blo,bhi)
4100
GA_Step_max_patch64(g_a, <int64_t*>alo_nd.data, <int64_t*>ahi_nd.data,
4101
g_b, <int64_t*>blo_nd.data, <int64_t*>bhi_nd.data, &step)
4104
def strided_acc(int g_a, buffer, lo=None, hi=None, skip=None, alpha=None):
4105
"""Strided version of ga.acc.
4107
The values corresponding to dimension n in buf are accumulated to every
4108
skip[n] values of the global array g_a.
4110
Combines data from buffer with data in the global array patch.
4112
The buffer array is assumed to be have the same number of dimensions as
4113
the global array. If the buffer is not contiguous, a contiguous copy will
4116
global array section (lo[],hi[]) += alpha * buffer
4118
This is a one-sided and atomic operation.
4124
must be contiguous and have same number of elements as patch
4125
lo : 1D array-like of integers
4126
lower bound patch coordinates, inclusive
4127
hi : 1D array-like of integers
4128
higher bound patch coordinates, exclusive
4130
multiplier (converted to the appropriate type)
4133
_acc_common(g_a, buffer, lo, hi, alpha, False, False, skip)
4135
def strided_get(int g_a, lo=None, hi=None, skip=None, np.ndarray buffer=None):
4136
"""Strided version of ga.get.
4138
Copies data from global array section to the local array buffer.
4140
The local array is assumed to be have the same number of dimensions as the
4141
global array. Any detected inconsitencies/errors in the input arguments
4144
This is a one-sided operation.
4149
lo : 1D array-like of integers
4150
lower bound patch coordinates, inclusive
4151
hi : 1D array-like of integers
4152
higher bound patch coordinates, exclusive
4153
skip : 1D array-like of integers
4154
strides for each dimension
4156
an ndarray of the appropriate type, large enough to hold lo,hi
4158
:returns: The local array buffer.
4161
return _get_common(g_a, lo, hi, buffer, False, False, skip)
4163
def strided_put(int g_a, buffer, lo=None, hi=None, skip=None):
4164
"""Strided version of ga.put.
4166
Copies data from local array buffer to the global array section.
4168
The local array is assumed to be have the same number of dimensions as the
4169
global array. Any detected inconsitencies/errors in input arguments are
4172
This is a one-sided operation.
4179
lo : 1D array-like of integers
4180
lower bound patch coordinates, inclusive
4181
hi : array-like of integers
4182
higher bound patch coordinates, exclusive
4183
skip : 1D array-like of integers
4184
strides for each dimension
4187
_put_common(g_a, buffer, lo, hi, False, False, skip)
4189
def summarize(bint verbose):
4190
"""Prints info about allocated arrays."""
4191
GA_Summarize(verbose)
4193
def symmetrize(int g_a):
4194
"""Symmetrizes matrix A represented with handle g_a: A:= .5 * (A+A').
4196
This is a collective operation.
4202
"""Synchronize processes (a barrier) and ensure that all GA operations
4205
This is a collective operation.
4211
"""Delete all active arrays and destroy internal data structures.
4213
This is a collective operation.
4217
_initialized = False
4220
def total_blocks(int g_a):
4221
"""Returns the total number of blocks contained in a global
4222
array with a block-cyclic data distribution.
4224
This is a local operation.
4227
return GA_Total_blocks(g_a)
4229
def transpose(int g_a, int g_b):
4230
"""Transposes a matrix: B = A', where A and B are represented by handles
4233
This is a collective operation.
4236
GA_Transpose(g_a, g_b)
4238
def unlock(int mutex):
4239
"""Unlocks a mutex object identified by the mutex number. It is a fatal
4240
error for a process to attempt to unlock a mutex which has not been locked
4244
def unpack(int g_src, int g_dst, int g_msk, lo=None, hi=None):
4245
"""Expands the values in the source vector into a larger destination vector.
4247
The unpack subroutine is designed to expand the values in the source
4248
vector g_src into a larger destination array g_dst based on the values in
4249
an integer mask array g_msk. The values lo and hi denote the range of
4250
elements that should be uncompressed and icount is a variable that on
4251
output lists the number of values placed in the uncompressed array. This
4252
operation is the complement of the ga.pack operation. An example is shown
4255
ga.unpack(g_src, g_dst, g_msk, 1, n, &icount);
4256
g_src: 1 7 9 12 15 16
4257
g_msk: 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 1 0
4258
g_dst: 1 0 0 0 0 0 7 0 9 0 0 12 0 0 15 16 0
4261
This is a collective operation.
4265
handle for source arrray
4267
handle for destination array
4269
handle for integer array representing mask
4270
lo : 1D array-like of integers
4271
low value of range on which operation is performed
4272
hi : 1D array-like of integers
4273
hi value of range on which operation is performed
4276
cdef np.ndarray[np.int64_t, ndim=1] hi_nd = inquire_dims(g_src)-1
4277
cdef int64_t c_lo=0, c_hi=hi_nd[0], icount
4282
GA_Unpack64(g_src, g_dst, g_msk, lo, hi, &icount)
4285
def update_ghosts(int g_a):
4286
"""This call updates the ghost cell regions on each processor with the
4287
corresponding neighbor data from other processors.
4289
The operation assumes that all data is wrapped around using periodic
4290
boundary data so that ghost cell data that goes beyound an array boundary
4291
is wrapped around to the other end of the array. The ga.update_ghosts call
4292
contains two ga.sync calls before and after the actual update operation.
4293
For some applications these calls may be unecessary, if so they can be
4294
removed using the ga.mask_sync subroutine.
4296
This is a collective operation.
4299
GA_Update_ghosts(g_a)
4301
def update_ghost_dir(int g_a, int dimension, int dir, int flag):
4302
"""This function can be used to update the ghost cells along individual
4303
directions. It is designed for algorithms that can overlap updates with
4304
computation. The variable dimension indicates which coordinate direction
4305
is to be updated (e.g. dimension = 1 would correspond to the y axis in a
4306
two or three dimensional system), the variable idir can take the values
4307
+/-1 and indicates whether the side that is to be updated lies in the
4308
positive or negative direction, and cflag indicates whether or not the
4309
corners on the side being updated are to be included in the update. The
4310
following calls would be equivalent to a call to GA_Update_ghosts for a
4311
2-dimensional system:
4314
status = NGA_Update_ghost_dir(g_a,0,-1,1);
4315
status = NGA_Update_ghost_dir(g_a,0,1,1);
4316
status = NGA_Update_ghost_dir(g_a,1,-1,0);
4317
status = NGA_Update_ghost_dir(g_a,1,1,0);
4319
The variable cflag is set equal to 1 (or non-zero) in the first two calls
4320
so that the corner ghost cells are update, it is set equal to 0 in the
4321
second two calls to avoid redundant updates of the corners. Note that
4322
updating the ghosts cells using several independent calls to the
4323
nga_update_ghost_dir functions is generally not as efficient as using
4324
GA_Update_ghosts unless the individual calls can be effectively
4325
overlapped with computation.
4328
NGA_Update_ghost_dir(g_a, dimension, dir, flag)
4332
if GA_Uses_ma() == 1:
4337
"""This function return a wall (or elapsed) time on the calling processor.
4338
Returns time in seconds representing elapsed wall-clock time since an
4339
arbitrary time in the past. Example:
4341
starttime = ga.wtime()
4342
# .... code snippet to be timed ....
4343
endtime = ga.wtime()
4344
print "Time taken = %s seconds" % endtime-starttime
4346
This is a local operation.
4348
This function is only available in release 4.1 or greater.
4353
def zero(int g_a, lo=None, hi=None):
4354
"""Set all the elements in the array or patch to zero."""
4355
cdef np.ndarray[np.int64_t, ndim=1] lo_nd, hi_nd
4356
if lo is None and hi is None:
4359
lo_nd,hi_nd = _lohi(g_a,lo,hi)
4360
NGA_Zero_patch64(g_a, <int64_t*>lo_nd.data, <int64_t*>hi_nd.data)
4362
def zero_diagonal(int g_a):
4363
"""Sets the diagonal elements of this matrix g_a with zeros.
4365
This is a collective operation.
4368
GA_Zero_diagonal(g_a)