11
#if (defined(__i386__) || defined(__x86_64__)) && !defined(NO_I386ASM)
12
# include "atomics-i386.h"
16
/* enable use of newer interfaces in SHMEM */
18
#ifndef LIBELAN_ATOMICS
19
/* manpages for shmem_fadd exist on the T3E but library code does not */
25
/* global scope to prevent compiler optimization of volatile code */
29
/* JAD -- DCMF implements its own rmw
30
there were linking errors with missing atomic_fetch_and_add for DCMF */
32
void armci_generic_rmw(int op, void *ploc, void *prem, int extra, int proc)
34
#if defined(CLUSTER) && !defined(SGIALTIX)
35
int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS;
40
NATIVE_LOCK(lock,proc);
43
case ARMCI_FETCH_AND_ADD:
44
#if (defined(__i386__) || defined(__x86_64__)) && !defined(NO_I386ASM)
45
#if (defined(__GNUC__) || defined(__INTEL_COMPILER__) ||defined(__PGIC__)) && !defined(PORTALS) && !defined(NO_I386ASM)
46
if(SERVER_CONTEXT || armci_nclus == 1){
47
/* *(int*)ploc = __sync_fetch_and_add((int*)prem, extra); */
48
atomic_fetch_and_add(prem, ploc, extra, sizeof(int));
54
armci_get(prem,ploc,sizeof(int),proc);
55
_a_temp = *(int*)ploc + extra;
56
armci_put(&_a_temp,prem,sizeof(int),proc);
59
case ARMCI_FETCH_AND_ADD_LONG:
60
armci_get(prem,ploc,sizeof(long),proc);
61
_a_ltemp = *(long*)ploc + extra;
62
armci_put(&_a_ltemp,prem,sizeof(long),proc);
65
#if (defined(__i386__) || defined(__x86_64__)) && !defined(PORTALS) && !defined(NO_I386ASM)
66
if(SERVER_CONTEXT || armci_nclus==1){
67
atomic_exchange(ploc, prem, sizeof(int));
72
armci_get(prem,&_a_temp,sizeof(int),proc);
73
armci_put(ploc,prem,sizeof(int),proc);
74
*(int*)ploc = _a_temp;
78
armci_get(prem,&_a_ltemp,sizeof(long),proc);
79
armci_put(ploc,prem,sizeof(long),proc);
80
*(long*)ploc = _a_ltemp;
82
default: armci_die("rmw: operation not supported",op);
88
NATIVE_UNLOCK(lock,proc);
93
int PARMCI_Rmw(int op, void *ploc, void *prem, int extra, int proc)
96
extern int LAPI_Rmw64(lapi_handle_t hndl, RMW_ops_t op, uint tgt,
98
long long *in_val, long long *prev_tgt_val, lapi_cntr_t *org_cntr);
99
long long llval, *pllarg = (long long*)ploc, lltmp;
100
/* enable RMWBROKEN if RMW fails for long datatype */
105
int ival, rc, opcode=SWAP, *parg=ploc;
107
#elif defined(_CRAYMPP) || defined(QUADRICS) || defined(CRAY_SHMEM)
112
#if defined(LAPI64) && defined(RMWBROKEN)
113
/* hack for rmw64 BROKEN: we operate on least significant part of long */
114
if(op==ARMCI_FETCH_AND_ADD_LONG || op==ARMCI_SWAP_LONG){
122
#if defined(CLUSTER) && !defined(LAPI) && !defined(QUADRICS) &&!defined(CYGWIN)\
123
&& !defined(HITACHI) && !defined(CRAY_SHMEM) && !defined(PORTALS)
124
if(!SAMECLUSNODE(proc)){
125
armci_rem_rmw(op, ploc, prem, extra, proc);
131
if(SAMECLUSNODE(proc)) (void)armci_region_fixup(proc,&prem);
141
case ARMCI_FETCH_AND_ADD:
142
case ARMCI_FETCH_AND_ADD_LONG:
148
case ARMCI_FETCH_AND_ADD_LONG:
149
fprintf(stderr,"adding int to longs....\n");
157
case ARMCI_SWAP_LONG:
163
case ARMCI_SWAP_LONG:
164
fprintf(stderr,"long armci_swap\n");
171
ARMCI_Error("Invalid operation for RMW", op);
174
/* int PARMCI_Rmw(int op, int *ploc, int *prem, int extra, int proc) */
175
/* assumes ploc will change
176
dstbuf=prem, input=temp(extra), output=ploc
177
val=ploc, arr[0]=prem, 1=extra */
179
int me=armci_msg_me();
182
BGML_Callback_t cb_wait={wait_callback, &done};
183
BG1S_rmw(&request, proc, 0, prem, temp, ploc, oper, dt, &cb_wait, 1);
186
ARMCIX_Rmw(op, ploc, prem, extra, proc);
189
# if defined(QUADRICS) || defined(_CRAYMPP) || defined(CRAY_SHMEM)
190
case ARMCI_FETCH_AND_ADD:
192
/* printf(" calling intfdd arg %x %ld \n", prem, *prem); */
193
*(int*) ploc = shmem_int_fadd(prem, extra, proc);
194
#elif defined(LIBELAN_ATOMICS)
195
*(int*) ploc = elan_int_fadd(prem, extra, proc);
197
while ( (ival = shmem_int_swap(prem, INT_MAX, proc) ) == INT_MAX);
198
(void) shmem_int_swap(prem, ival +extra, proc);
202
case ARMCI_FETCH_AND_ADD_LONG:
204
*(long*) ploc = shmem_long_fadd( (long*)prem, (long) extra, proc);
205
#elif defined(LIBELAN_ATOMICS)
206
*(long*) ploc = elan_long_fadd( (long*)prem, (long) extra, proc);
208
while ((lval=shmem_long_swap((long*)prem,LONG_MAX,proc)) == LONG_MAX);
209
(void) shmem_long_swap((long*)prem, (lval + extra), proc);
214
#ifdef LIBELAN_ATOMICS
215
*(int*)ploc = elan_int_swap((int*)prem, *(int*)ploc, proc);
217
*(int*)ploc = shmem_int_swap((int*)prem, *(int*)ploc, proc);
220
case ARMCI_SWAP_LONG:
221
#ifdef LIBELAN_ATOMICS
222
*(long*)ploc = elan_long_swap((long*)prem, *(long*)ploc, proc);
224
*(long*)ploc = shmem_long_swap((long*)prem, *(long*)ploc, proc);
228
# if defined(LAPI64) && !defined(RMWBROKEN)
229
case ARMCI_FETCH_AND_ADD_LONG:
230
opcode = FETCH_AND_ADD;
231
lltmp = (long long)extra;
233
case ARMCI_SWAP_LONG:
235
printf("before opcode=%d rem=%ld, loc=(%ld,%ld) extra=%ld\n",
236
opcode,*prem,*(long*)ploc,llval, lltmp);
238
PARMCI_Get(prem, &llval, rc, proc);
239
printf("%d:rem val before %ld\n",armci_me, llval); fflush(stdout);
241
if( rc = LAPI_Setcntr(lapi_handle,&req_id,0))
242
armci_die("rmw setcntr failed",rc);
243
if( rc = LAPI_Rmw64(lapi_handle, opcode, proc, (long long*)prem,
244
pllarg, &llval, &req_id)) armci_die("rmw failed",rc);
245
if( rc = LAPI_Waitcntr(lapi_handle, &req_id, 1, NULL))
246
armci_die("rmw wait failed",rc);
248
*(long*)ploc = (long)llval;
251
PARMCI_Get(prem, &lltmp, rc, proc);
252
printf("%d:after rmw remote val from rmw=%ld and get=%ld extra=%d\n",
253
armci_me,llval, lltmp,extra);
257
/************** here sizeof(long)= sizeof(int) **************/
258
case ARMCI_FETCH_AND_ADD:
259
# if !defined(LAPI64) || defined(RMWBROKEN)
260
case ARMCI_FETCH_AND_ADD_LONG:
262
opcode = FETCH_AND_ADD;
265
# if !defined(LAPI64) || defined(RMWBROKEN)
266
case ARMCI_SWAP_LONG:
268
/* Within SMPs LAPI_Rmw needs target's address. */
269
if(SAMECLUSNODE(proc)) proc=armci_me;
271
if( rc = LAPI_Setcntr(lapi_handle,&req_id,0))
272
armci_die("rmw setcntr failed",rc);
273
if( rc = LAPI_Rmw(lapi_handle, opcode, proc, prem,
274
parg, &ival, &req_id)) armci_die("rmw failed",rc);
275
if( rc = LAPI_Waitcntr(lapi_handle, &req_id, 1, NULL))
276
armci_die("rmw wait failed",rc);
277
* (int *)ploc = ival;
280
case ARMCI_FETCH_AND_ADD:
281
case ARMCI_FETCH_AND_ADD_LONG:
283
case ARMCI_SWAP_LONG:
284
armci_generic_rmw(op, ploc, prem, extra, proc);
287
default: armci_die("rmw: operation not supported",op);