1
/* begin_generated_IBM_copyright_prolog */
3
/* ---------------------------------------------------------------- */
4
/* (C)Copyright IBM Corp. 2007, 2008 */
6
/* ---------------------------------------------------------------- */
8
/* end_generated_IBM_copyright_prolog */
10
* \file armci/src/x/armcix.h
11
* \brief ARMCI Extension interface.
13
#ifndef __armci_src_x_armcix_h
14
#define __armci_src_x_armcix_h
23
* \brief Creates a compile error if the condition is false.
25
* This macro must be used within a function for the compiler to process it.
26
* It is suggested that C++ classes and C files create an inline function
27
* similar to the following example. The inline function is never used at
28
* runtime and should be optimized out by the compiler. It exists for the sole
29
* purpose of moving runtime \c assert calls to compile-time errors.
32
* static inline void compile_time_assert ()
34
* // This compile time assert will succeed.
35
* COMPILE_TIME_ASSERT(sizeof(char) <= sizeof(double));
37
* // This compile time assert will fail.
38
* COMPILE_TIME_ASSERT(sizeof(double) <= sizeof(char));
42
* Compile time assert errors will look similar to the following:
45
* foo.h: In function compile_time_assert:
46
* foo.h:43: error: duplicate case value
47
* foo.h:43: error: previously used here
50
#define COMPILE_TIME_ASSERT(expr) switch(0){case 0:case expr:;}
53
* \brief Assert during compile if certain conditions are not met.
55
static inline void armcix_compile_time_assert ()
58
* Assert that the size of the internal armci handle data structure is less
59
* than or equal to the size of the public armci handle data structure.
61
COMPILE_TIME_ASSERT(sizeof(armci_ireq_t)<=sizeof(armci_hdl_t));
65
* \brief Initialize the ARMCI Extension.
67
* \todo Define return values.
73
* \brief Initialize the ARMCI Extention lock resources.
75
* \param[in] local_memlock_table memlock table
77
void ARMCIX_init_memlock (memlock_t * local_memlock_table);
80
* \brief ARMCI Extension blocking memory lock operation.
82
* Send a lock request to the remote node and block until the lock has been
83
* acquired on the remote node.
85
* \param[in] pstart The start virtual address of the range of memory to lock.
86
* \param[in] pend The end virtual address of the range of memory to lock.
87
* \param[in] proc Remote process(or) ID
89
void ARMCIX_Lockmem (void * pstart, void * pend, int proc);
92
* \brief ARMCI Extension release memory lock operation.
94
* Send a lock release message to the remote node. This is a \e fire-and-forget
95
* operation because the node does not block for an acknowledgement that the
96
* lock release was successful.
98
* \param[in] proc Remote process rank
100
void ARMCIX_Unlockmem (int proc);
103
* \brief ARMCI Extension blocking wait operation for a specifc request
105
* The armcix_opaque_t structure is a field in the armci_ireq_t structure
106
* and is used to maintain ARMCIX state information for an operation in
109
* \param[in] cmpl_info Pointer to the ARMCIX opaque object
111
* \todo define return values
116
int ARMCIX_Wait (armcix_opaque_t * cmpl_info);
119
* \brief ARMCI Extension blocking wait operation for all requests to a specific process
121
* All existing requests to the remote process are compelte after this function returns.
123
* \param[in] proc Remote process rank
125
* \todo define return values
128
int ARMCIX_WaitProc (int proc);
131
* \brief ARMCI Extension blocking wait operation for all requests to all processes
133
* All existing requests to all processes are completed after this function returns.
135
* \todo define return values
138
int ARMCIX_WaitAll ();
142
* \brief Point-to-point fence operation.
144
* Blocks until all active messages between the local node and the remote
145
* node have completed and acknowledged by the remote node.
147
* \param[in] proc Rank of the remote node to fence
149
* \see ARMCIX_AllFence
151
void ARMCIX_Fence (int proc);
154
* \brief Global fence operation.
156
* Blocks until all active messages between the local node and all remote
157
* nodes have completed and acknowledged by the remote node.
161
void ARMCIX_AllFence ();
164
* \brief ARMCI Extension blocking read-modify-write operation.
172
* \todo define return code and input parameters; add detailed doxygen description
175
int ARMCIX_Rmw (int op, int * ploc, int * prem, int extra, int proc);
178
* \brief ARMCI Extension blocking get operation.
180
* \param[in] src Source buffer on the remote node
181
* \param[in] dst Destination buffer on the local node
182
* \param[in] bytes Number of bytes to transfer
183
* \param[in] proc Remote node rank
185
* \todo define return code; add detailed doxygen description
188
int ARMCIX_Get (void * src, void * dst, int bytes, int proc);
191
* \brief ARMCI Extension blocking vector get operation.
193
* \param[in] darr Descriptor array
194
* \param[in] len Length of descriptor array
195
* \param[in] proc Remote process(or) ID
197
* \todo define return code; add detailed doxygen description
200
int ARMCIX_GetV (armci_giov_t * darr, int len, int proc);
203
* \brief ARMCI Extension blocking strided get operation.
205
* \param[in] src_ptr pointer to 1st segment at source
206
* \param[in] src_stride_arr array of strides at source
207
* \param[in] dst_ptr pointer to 1st segment at destination
208
* \param[in] dst_stride_arr array of strides at destination
209
* \param[in] seg_count number of segments at each stride levels: count[0]=bytes
210
* \param[in] stride_levels number of stride levels
211
* \param[in] proc remote process(or) ID
213
* \todo define return code; add detailed doxygen description
216
int ARMCIX_GetS (void * src_ptr, int * src_stride_arr,
217
void * dst_ptr, int * dst_stride_arr,
218
int * seg_count, int stride_levels, int proc);
221
* \brief ARMCI Extension non-blocking get operation.
223
* \param[in] src Source buffer on the remote node
224
* \param[in] dst Destination buffer on the local node
225
* \param[in] bytes Number of bytes to transfer
226
* \param[in] proc Remote node rank
227
* \param[in] nb_handle ARMCI non-blocking handle
229
* \todo define return code; add detailed doxygen description
232
int ARMCIX_NbGet (void * src, void * dst, int bytes, int proc, armci_ihdl_t nb_handle);
235
* \brief ARMCI Extension non-blocking vector get operation.
237
* \param[in] darr Descriptor array
238
* \param[in] len Length of descriptor array
239
* \param[in] proc Remote process(or) ID
240
* \param[in] nb_handle ARMCI non-blocking handle
242
* \todo define return code; add detailed doxygen description
245
int ARMCIX_NbGetV (armci_giov_t * darr, int len, int proc, armci_ihdl_t nb_handle);
248
* \brief ARMCI Extension non-blocking strided get operation.
250
* \param[in] src_ptr pointer to 1st segment at source
251
* \param[in] src_stride_arr array of strides at source
252
* \param[in] dst_ptr pointer to 1st segment at destination
253
* \param[in] dst_stride_arr array of strides at destination
254
* \param[in] seg_count number of segments at each stride levels: count[0]=bytes
255
* \param[in] stride_levels number of stride levels
256
* \param[in] proc remote process(or) ID
257
* \param[in] nb_handle ARMCI non-blocking handle
259
* \todo define return code; add detailed doxygen description
262
int ARMCIX_NbGetS (void * src_ptr, int * src_stride_arr,
263
void * dst_ptr, int * dst_stride_arr,
264
int * seg_count, int stride_levels, int proc,
265
armci_ihdl_t nb_handle);
268
* \brief ARMCI Extension blocking put operation.
270
* \param[in] src Source buffer on the local node
271
* \param[in] dst Destination buffer on the remote node
272
* \param[in] bytes Number of bytes to transfer
273
* \param[in] proc Remote node rank
275
* \todo define return code; add detailed doxygen description
278
int ARMCIX_Put (void * src, void * dst, int bytes, int proc);
281
* \brief ARMCI Extension blocking vector put operation.
283
* \param[in] darr Descriptor array
284
* \param[in] len Length of descriptor array
285
* \param[in] proc Remote process(or) ID
287
* \todo define return code; add detailed doxygen description
290
int ARMCIX_PutV (armci_giov_t * darr, int len, int proc);
293
* \brief ARMCI Extension blocking strided put operation.
295
* \param[in] src_ptr pointer to 1st segment at source
296
* \param[in] src_stride_arr array of strides at source
297
* \param[in] dst_ptr pointer to 1st segment at destination
298
* \param[in] dst_stride_arr array of strides at destination
299
* \param[in] seg_count number of segments at each stride levels: count[0]=bytes
300
* \param[in] stride_levels number of stride levels
301
* \param[in] proc remote process(or) ID
303
* \todo define return code; add detailed doxygen description
306
int ARMCIX_PutS (void * src_ptr, int * src_stride_arr,
307
void * dst_ptr, int * dst_stride_arr,
308
int * seg_count, int stride_levels, int proc);
311
* \brief ARMCI Extension non-blocking put operation.
313
* \param[in] src Source buffer on the local node
314
* \param[in] dst Destination buffer on the remote node
315
* \param[in] bytes Number of bytes to transfer
316
* \param[in] proc Remote node rank
317
* \param[in] nb_handle ARMCI non-blocking handle
319
* \todo define return code; add detailed doxygen description
322
int ARMCIX_NbPut (void * src, void * dst, int bytes, int proc, armci_ihdl_t nb_handle);
325
* \brief ARMCI Extension non-blocking vector put operation.
327
* \param[in] darr Descriptor array
328
* \param[in] len Length of descriptor array
329
* \param[in] proc Remote process(or) ID
330
* \param[in] nb_handle ARMCI non-blocking handle
332
* \todo define return code; add detailed doxygen description
335
int ARMCIX_NbPutV (armci_giov_t * darr, int len, int proc, armci_ihdl_t nb_handle);
338
* \brief ARMCI Extension non-blocking strided put operation.
340
* \param[in] src_ptr pointer to 1st segment at source
341
* \param[in] src_stride_arr array of strides at source
342
* \param[in] dst_ptr pointer to 1st segment at destination
343
* \param[in] dst_stride_arr array of strides at destination
344
* \param[in] seg_count number of segments at each stride levels: count[0]=bytes
345
* \param[in] stride_levels number of stride levels
346
* \param[in] proc remote process(or) ID
347
* \param[in] nb_handle ARMCI non-blocking handle
349
* \todo define return code; add detailed doxygen description
352
int ARMCIX_NbPutS (void * src_ptr, int * src_stride_arr,
353
void * dst_ptr, int * dst_stride_arr,
354
int * seg_count, int stride_levels, int proc,
355
armci_ihdl_t nb_handle);
358
* \brief ARMCI Extension blocking accumulate operation.
360
* \param[in] datatype accumulate datatype (operation code)
361
* \param[in] scale opaque pointer to the scaling factor for accumulate
362
* \param[in] src Source buffer on the local node
363
* \param[in] dst Destination buffer on the remote node
364
* \param[in] bytes Number of bytes to transfer
365
* \param[in] proc Remote node rank
367
* \todo define return code; add detailed doxygen description
370
int ARMCIX_Acc (int datatype, void * scale, void * src, void * dst, int bytes, int proc);
373
* \brief ARMCI Extension blocking vector accumulate operation.
375
* \param[in] datatype accumulate datatype (operation code)
376
* \param[in] scale opaque pointer to the scaling factor for accumulate
377
* \param[in] darr descriptor array
378
* \param[in] len length of the descriptor array
379
* \param[in] proc process(or) ID
381
* \todo define return code; add detailed doxygen description
384
int ARMCIX_AccV (int datatype, void * scale, armci_giov_t * darr, int len, int proc);
387
* \brief ARMCI Extension blocking strided accumulate operation.
389
* \param[in] datatype accumulate datatype (operation code)
390
* \param[in] scale opaque pointer to the scaling factor for accumulate
391
* \param[in] src_ptr pointer to 1st segment at source
392
* \param[in] src_stride_arr array of strides at source
393
* \param[in] dst_ptr pointer to 1st segment at destination
394
* \param[in] dst_stride_arr array of strides at destination
395
* \param[in] seg_count number of segments at each stride levels: count[0]=bytes
396
* \param[in] stride_levels number of stride levels
397
* \param[in] proc remote process(or) ID
399
* \todo define return code; add detailed doxygen description
402
int ARMCIX_AccS (int datatype, void * scale,
403
void * src_ptr, int * src_stride_arr,
404
void * dst_ptr, int * dst_stride_arr,
405
int * seg_count, int stride_levels, int proc);
408
* \brief ARMCI Extension non-blocking accumulate operation.
410
* \param[in] datatype accumulate datatype (operation code)
411
* \param[in] scale opaque pointer to the scaling factor for accumulate
412
* \param[in] src Source buffer on the local node
413
* \param[in] dst Destination buffer on the remote node
414
* \param[in] bytes Number of bytes to transfer
415
* \param[in] proc Remote node rank
416
* \param[in] nb_handle ARMCI non-blocking handle
418
* \todo define return code; add detailed doxygen description
421
int ARMCIX_NbAcc (int datatype, void * scale, void * src, void * dst, int bytes, int proc, armci_ihdl_t nb_handle);
424
* \brief ARMCI Extension non-blocking vector accumulate operation.
426
* \param[in] datatype accumulate datatype (operation code)
427
* \param[in] scale opaque pointer to the scaling factor for accumulate
428
* \param[in] darr Descriptor array
429
* \param[in] len Length of descriptor array
430
* \param[in] proc Remote process(or) ID
431
* \param[in] nb_handle ARMCI non-blocking handle
433
* \todo define return code; add detailed doxygen description
436
int ARMCIX_NbAccV (int datatype, void * scale, armci_giov_t * darr, int len, int proc, armci_ihdl_t nb_handle);
439
* \brief ARMCI Extension non-blocking strided accumulate operation.
441
* \param[in] datatype accumulate datatype (operation code)
442
* \param[in] scale opaque pointer to the scaling factor for accumulate
443
* \param[in] src_ptr pointer to 1st segment at source
444
* \param[in] src_stride_arr array of strides at source
445
* \param[in] dst_ptr pointer to 1st segment at destination
446
* \param[in] dst_stride_arr array of strides at destination
447
* \param[in] seg_count number of segments at each stride levels: count[0]=bytes
448
* \param[in] stride_levels number of stride levels
449
* \param[in] proc remote process(or) ID
450
* \param[in] nb_handle ARMCI non-blocking handle
452
* \todo define return code; add detailed doxygen description
455
int ARMCIX_NbAccS (int datatype, void * scale,
456
void * src_ptr, int * src_stride_arr,
457
void * dst_ptr, int * dst_stride_arr,
458
int * seg_count, int stride_levels, int proc,
459
armci_ihdl_t nb_handle);
463
* \page get_page Get APIs
465
* This is a description of the ARMCI Extension Get APIs
467
* \section get_blocking Blocking APIs
471
* \section get_nonblocking Non-blocking APIs
478
* \page put_page Put APIs
480
* This is a description of the ARMCI Extension Put APIs
482
* \section put_blocking Blocking APIs
486
* \section put_nonblocking Non-blocking APIs
493
* \page acc_page Accumulate APIs
495
* This is a description of the ARMCI Extension Accumulate APIs
497
* \section acc_blocking Blocking APIs
501
* \section acc_nonblocking Non-blocking APIs
508
* \page blocking_page Blocking APIs
510
* This is a description of the \b blocking ARMCI Extension APIs
512
* \section transfer Data transfer APIs
523
* \section sync Syncronization APIs
525
* - ARMCIX_AllFence()
527
* - ARMCIX_WaitProc()
533
* \page nonblocking_page Non-blocking APIs
535
* This is a description of the \b non-blocking ARMCI Extension APIs
537
* \section transfer Data transfer APIs
550
* \page vector_page Vector APIs
552
* This is a description of the ARMCI Extension vector APIs
554
* \section vector_blocking Blocking APIs
558
* \section vector_nonblocking Non-blocking APIs
565
* \page strided_page Strided APIs
567
* This is a description of the ARMCI Extension strided APIs
569
* \section strided_blocking Blocking APIs
573
* \section strided_nonblocking Non-blocking APIs