5
/* $Id: noncont.c,v 1.3.2.2 2007-05-04 16:43:35 d3p687 Exp $
8
* Developed by Andriy Kot <andriy.kot@pnl.gov>
9
* Copyright (c) 2006 Pacific Northwest National Laboratory
11
* Alternative version of non-contiguous calls using non-blocking ones
14
* 2006-09-08 - created
26
#include "armci_portals.h"
31
# define PRN_DBG_MSG3(m,a1,a2,a3) \
32
fprintf(stderr,"DBG %d: " m,armci_me,a1,a2,a3);fflush(stderr)
33
# define PRN_DBG_MSG(m) PRN_DBG_MSG3(m,0,0,0)
34
# define PRN_DBG_MSG1(m,a1) PRN_DBG_MSG3(m,a1,0,0)
35
# define PRN_DBG_MSG2(m,a1,a2) PRN_DBG_MSG3(m,a1,a2,0)
37
# define PRN_DBG_MSG(m)
38
# define PRN_DBG_MSG1(m,a1)
39
# define PRN_DBG_MSG2(m,a1,a2)
40
# define PRN_DBG_MSG3(m,a1,a2,a3)
44
# define CALL_IN(_func) { if (armci_me == 0) printf("ENTERED %s\n", _func); fflush(stdout); }
45
# define CALL_OUT(_func) { if (armci_me == 0) printf("EXITING %s\n", _func); fflush(stdout); }
47
# define CALL_IN(_func)
48
# define CALL_OUT(_func)
54
typedef ELAN_EVENT *HTYPE;
55
#define SHMEM_HANDLE_SUPPORTED
56
#elif defined(CRAY_SHMEM)
59
typedef armci_ireq_t HTYPE;
62
#define MAX_SLOTS_LL 64
63
#define MIN_OUTSTANDING 6
64
static int max_pending = 16; /* throttle number of outstanding nb calls */
66
/* might have to use MAX_SLOTS_LL < MAX_PENDING due to throttling problem */
70
static HTYPE put_dscr[MAX_SLOTS_LL];
71
static HTYPE get_dscr[MAX_SLOTS_LL];
72
/* static variables alreay initialize to 0 (?)
73
static HTYPE put_dscr[MAX_SLOTS_LL]= {
74
ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,
75
ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR};
77
static HTYPE get_dscr[MAX_SLOTS_LL] = {
78
ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,
79
ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR,ZR};
83
extern ARMCI_MEMHDL_T *mhloc;
84
extern ARMCI_MEMHDL_T *mhrem;
85
# define INI_HDL(_hdl, _op, _proc) { \
86
(_hdl).tag = GET_NEXT_NBTAG(); \
88
(_hdl).proc = _proc; \
89
(_hdl).bufid = NB_NONE; \
91
# define CLR_HDL(_hdl) ((_hdl).tag = 0)
92
# define CHK_HDL(_hdl) (_hdl.tag)
94
# define CLR_HDL(_hdl) ((_hdl) = ZR)
95
# define CHK_HDL(_hdl) (_hdl)
96
# define INI_HDL(_hdl, _op, _proc)
100
static int cur_put=0;
101
static int pending_get=0;
102
static int pending_put=0;
104
/* strided put, nonblocking */
105
void armcill_put2D(int proc, int bytes, int count, void* src_ptr,int src_stride,
106
void* dst_ptr,int dst_stride)
108
CALL_IN("armcill_put2D");
110
int _j, i, batch, issued=0;
111
char *ps=src_ptr, *pd=dst_ptr;
113
for (_j = 0; _j < count; ){
114
/* how big a batch of requests can we issue */
115
batch = (count - _j )<max_pending ? count - _j : max_pending;
117
#ifdef SHMEM_HANDLE_SUPPORTED
118
for(i=0; i< batch; i++){
119
if (CHK_HDL(put_dscr[cur_put])) armcill_nb_wait(put_dscr[cur_put]);
121
INI_HDL(put_dscr[cur_put], PUT, proc);
122
armcill_nb_put(pd,ps,bytes,proc,put_dscr[cur_put]);
125
for(i=0; i< batch; i++){
127
armcill_nb_put(pd,ps,bytes,proc,dummy);
133
if(cur_put>=max_pending)cur_put=0;
138
armci_die2("armcill_put2D: mismatch %d %d \n", count,issued);
140
CALL_OUT("armcill_put2D");
144
/* blocking vector put */
145
void armcill_putv(int proc, int bytes, int count, void* src[], void* dst[])
147
int _j, i, batch, issued=0;
150
for (_j = 0; _j < count; ){
151
/* how big a batch of requests can we issue */
152
batch = (count - _j )<max_pending ? count - _j : max_pending;
154
#ifdef SHMEM_HANDLE_SUPPORTED
155
for(i=0; i< batch; i++){
156
if (CHK_HDL(put_dscr[cur_put])) armcill_nb_wait(put_dscr[cur_put]);
160
INI_HDL(put_dscr[cur_put], PUT, proc);
161
armcill_nb_put(pd,ps,bytes,proc,put_dscr[cur_put]);
164
for(i=0; i< batch; i++){
166
armcill_nb_put(pd,ps,bytes,proc,dummy);
170
if(cur_put>=max_pending)cur_put=0;
174
armci_die2("armcill_putv: mismatch\n", count,issued);
176
#ifdef SHMEM_HANDLE_SUPPORTED
177
for(i=0; i<max_pending; i++) if(CHK_HDL(put_dscr[i])){
178
armcill_nb_wait(put_dscr[i]);
179
CLR_HDL(put_dscr[i]);
189
/* strided get, nonblocking */
190
void armcill_get2D(int proc, int bytes, int count, void* src_ptr,int src_stride,
191
void* dst_ptr,int dst_stride)
193
CALL_IN("armcill_get2D");
194
PRN_DBG_MSG3("armcill_get2D: proc=%d, bytes=%d, count=%d\n", proc, bytes, count);
196
int _j, i, batch, issued=0;
197
char *ps=src_ptr, *pd=dst_ptr;
199
for (_j = 0; _j < count; ){
200
/* how big a batch of requests can we issue */
201
batch = (count - _j )<max_pending ? count - _j : max_pending;
203
#ifdef SHMEM_HANDLE_SUPPORTED
204
for(i=0; i< batch; i++){
205
PRN_DBG_MSG2("inner loop: cur_ptr=%d, tag=%d\n", cur_get, get_dscr[cur_get].tag);
206
if (CHK_HDL(get_dscr[cur_get])) armcill_nb_wait(get_dscr[cur_get]);
208
PRN_DBG_MSG1("inner loop: pending_get=%d\n", pending_get);
209
INI_HDL(get_dscr[cur_get], GET, proc);
210
armcill_nb_get(pd,ps,bytes,proc,get_dscr[cur_get]);
211
PRN_DBG_MSG("inner loop: after get\n");
214
for(i=0; i< batch; i++){
216
armcill_nb_get(pd,ps,bytes,proc,dummy);
222
if(cur_get>=max_pending)cur_get=0;
227
armci_die2("armcill_get2D: mismatch %d %d \n", count,issued);
229
CALL_OUT("armcill_get2D");
233
/* blocking vector get */
234
void armcill_getv(int proc, int bytes, int count, void* src[], void* dst[])
236
int _j, i, batch, issued=0;
239
for (_j = 0; _j < count; ){
240
/* how big a batch of requests can we issue */
241
batch = (count - _j )<max_pending ? count - _j : max_pending;
243
#ifdef SHMEM_HANDLE_SUPPORTED
244
for(i=0; i< batch; i++){
245
if (CHK_HDL(get_dscr[cur_get])) armcill_nb_wait(get_dscr[cur_get]);
249
INI_HDL(get_dscr[cur_get], GET, proc);
250
armcill_nb_get(pd,ps,bytes,proc,get_dscr[cur_get]);
253
for(i=0; i< batch; i++){
255
armcill_nb_get(pd,ps,bytes,proc,dummy);
259
if(cur_get>=max_pending)cur_get=0;
263
armci_die2("armcill_getv: mismatch %d %d \n", count,issued);
265
#ifdef SHMEM_HANDLE_SUPPORTED
266
for(i=0; i<max_pending; i++) if(CHK_HDL(get_dscr[i])){
267
armcill_nb_wait(get_dscr[i]);
268
CLR_HDL(get_dscr[i]);
276
void armcill_wait_get()
278
CALL_IN("armcill_wait_get");
279
#ifdef SHMEM_HANDLE_SUPPORTED
281
if(!pending_get)return;
283
for(i=0; i<max_pending; i++) if(CHK_HDL(get_dscr[i])){
284
armcill_nb_wait(get_dscr[i]);
285
CLR_HDL(get_dscr[i]);
290
CALL_OUT("armcill_wait_get");
294
void armcill_wait_put()
296
CALL_IN("armcill_wait_put");
297
#ifdef SHMEM_HANDLE_SUPPORTED
299
if(!pending_put)return;
301
for(i=0; i<max_pending; i++) if(CHK_HDL(put_dscr[i])){
302
armcill_nb_wait(put_dscr[i]);
303
CLR_HDL(put_dscr[i]);
308
CALL_OUT("armcill_wait_put");