11
#if (defined(__i386__) || defined(__x86_64__)) && !defined(NO_I386ASM)
12
# include "atomics-i386.h"
16
/* enable use of newer interfaces in SHMEM */
18
#ifndef LIBELAN_ATOMICS
19
/* manpages for shmem_fadd exist on the T3E but library code does not */
25
/* global scope to prevent compiler optimization of volatile code */
29
/* JAD -- DCMF implements its own rmw
30
there were linking errors with missing atomic_fetch_and_add for DCMF */
32
void armci_generic_rmw(int op, void *ploc, void *prem, int extra, int proc)
34
#if defined(CLUSTER) && !defined(SGIALTIX)
35
int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS;
40
NATIVE_LOCK(lock,proc);
43
case ARMCI_FETCH_AND_ADD:
44
#if (defined(__GNUC__) || defined(__INTEL_COMPILER__) ||defined(__PGIC__)) && (defined(__i386__) || defined(__x86_64__)) && !defined(PORTALS) && !defined(NO_I386ASM)
45
if(SERVER_CONTEXT || armci_nclus == 1){
46
/* *(int*)ploc = __sync_fetch_and_add((int*)prem, extra); */
47
atomic_fetch_and_add(prem, ploc, extra, sizeof(int));
52
armci_get(prem,ploc,sizeof(int),proc);
53
_a_temp = *(int*)ploc + extra;
54
armci_put(&_a_temp,prem,sizeof(int),proc);
57
case ARMCI_FETCH_AND_ADD_LONG:
58
armci_get(prem,ploc,sizeof(long),proc);
59
_a_ltemp = *(long*)ploc + extra;
60
armci_put(&_a_ltemp,prem,sizeof(long),proc);
63
#if (defined(__i386__) || defined(__x86_64__)) && !defined(PORTALS) && !defined(NO_I386ASM)
64
if(SERVER_CONTEXT || armci_nclus==1){
65
atomic_exchange(ploc, prem, sizeof(int));
70
armci_get(prem,&_a_temp,sizeof(int),proc);
71
armci_put(ploc,prem,sizeof(int),proc);
72
*(int*)ploc = _a_temp;
76
armci_get(prem,&_a_ltemp,sizeof(long),proc);
77
armci_put(ploc,prem,sizeof(long),proc);
78
*(long*)ploc = _a_ltemp;
80
default: armci_die("rmw: operation not supported",op);
86
NATIVE_UNLOCK(lock,proc);
91
int PARMCI_Rmw(int op, void *ploc, void *prem, int extra, int proc)
94
extern int LAPI_Rmw64(lapi_handle_t hndl, RMW_ops_t op, uint tgt,
96
long long *in_val, long long *prev_tgt_val, lapi_cntr_t *org_cntr);
97
long long llval, *pllarg = (long long*)ploc, lltmp;
98
/* enable RMWBROKEN if RMW fails for long datatype */
103
int ival, rc, opcode=SWAP, *parg=ploc;
105
#elif defined(_CRAYMPP) || defined(QUADRICS) || defined(CRAY_SHMEM)
110
#if defined(LAPI64) && defined(RMWBROKEN)
111
/* hack for rmw64 BROKEN: we operate on least significant part of long */
112
if(op==ARMCI_FETCH_AND_ADD_LONG || op==ARMCI_SWAP_LONG){
120
#if defined(CLUSTER) && !defined(LAPI) && !defined(QUADRICS) &&!defined(CYGWIN)\
121
&& !defined(HITACHI) && !defined(CRAY_SHMEM) && !defined(PORTALS)
122
if(!SAMECLUSNODE(proc)){
123
armci_rem_rmw(op, ploc, prem, extra, proc);
129
if(SAMECLUSNODE(proc)) (void)armci_region_fixup(proc,&prem);
139
case ARMCI_FETCH_AND_ADD:
140
case ARMCI_FETCH_AND_ADD_LONG:
146
case ARMCI_FETCH_AND_ADD_LONG:
147
fprintf(stderr,"adding int to longs....\n");
155
case ARMCI_SWAP_LONG:
161
case ARMCI_SWAP_LONG:
162
fprintf(stderr,"long armci_swap\n");
169
ARMCI_Error("Invalid operation for RMW", op);
172
/* int PARMCI_Rmw(int op, int *ploc, int *prem, int extra, int proc) */
173
/* assumes ploc will change
174
dstbuf=prem, input=temp(extra), output=ploc
175
val=ploc, arr[0]=prem, 1=extra */
177
int me=armci_msg_me();
180
BGML_Callback_t cb_wait={wait_callback, &done};
181
BG1S_rmw(&request, proc, 0, prem, temp, ploc, oper, dt, &cb_wait, 1);
184
ARMCIX_Rmw(op, ploc, prem, extra, proc);
187
# if defined(QUADRICS) || defined(_CRAYMPP) || defined(CRAY_SHMEM)
188
case ARMCI_FETCH_AND_ADD:
190
/* printf(" calling intfdd arg %x %ld \n", prem, *prem); */
191
*(int*) ploc = shmem_int_fadd(prem, extra, proc);
192
#elif defined(LIBELAN_ATOMICS)
193
*(int*) ploc = elan_int_fadd(prem, extra, proc);
195
while ( (ival = shmem_int_swap(prem, INT_MAX, proc) ) == INT_MAX);
196
(void) shmem_int_swap(prem, ival +extra, proc);
200
case ARMCI_FETCH_AND_ADD_LONG:
202
*(long*) ploc = shmem_long_fadd( (long*)prem, (long) extra, proc);
203
#elif defined(LIBELAN_ATOMICS)
204
*(long*) ploc = elan_long_fadd( (long*)prem, (long) extra, proc);
206
while ((lval=shmem_long_swap((long*)prem,LONG_MAX,proc)) == LONG_MAX);
207
(void) shmem_long_swap((long*)prem, (lval + extra), proc);
212
#ifdef LIBELAN_ATOMICS
213
*(int*)ploc = elan_int_swap((int*)prem, *(int*)ploc, proc);
215
*(int*)ploc = shmem_int_swap((int*)prem, *(int*)ploc, proc);
218
case ARMCI_SWAP_LONG:
219
#ifdef LIBELAN_ATOMICS
220
*(long*)ploc = elan_long_swap((long*)prem, *(long*)ploc, proc);
222
*(long*)ploc = shmem_long_swap((long*)prem, *(long*)ploc, proc);
226
# if defined(LAPI64) && !defined(RMWBROKEN)
227
case ARMCI_FETCH_AND_ADD_LONG:
228
opcode = FETCH_AND_ADD;
229
lltmp = (long long)extra;
231
case ARMCI_SWAP_LONG:
233
printf("before opcode=%d rem=%ld, loc=(%ld,%ld) extra=%ld\n",
234
opcode,*prem,*(long*)ploc,llval, lltmp);
236
PARMCI_Get(prem, &llval, rc, proc);
237
printf("%d:rem val before %ld\n",armci_me, llval); fflush(stdout);
239
if( rc = LAPI_Setcntr(lapi_handle,&req_id,0))
240
armci_die("rmw setcntr failed",rc);
241
if( rc = LAPI_Rmw64(lapi_handle, opcode, proc, (long long*)prem,
242
pllarg, &llval, &req_id)) armci_die("rmw failed",rc);
243
if( rc = LAPI_Waitcntr(lapi_handle, &req_id, 1, NULL))
244
armci_die("rmw wait failed",rc);
246
*(long*)ploc = (long)llval;
249
PARMCI_Get(prem, &lltmp, rc, proc);
250
printf("%d:after rmw remote val from rmw=%ld and get=%ld extra=%d\n",
251
armci_me,llval, lltmp,extra);
255
/************** here sizeof(long)= sizeof(int) **************/
256
case ARMCI_FETCH_AND_ADD:
257
# if !defined(LAPI64) || defined(RMWBROKEN)
258
case ARMCI_FETCH_AND_ADD_LONG:
260
opcode = FETCH_AND_ADD;
263
# if !defined(LAPI64) || defined(RMWBROKEN)
264
case ARMCI_SWAP_LONG:
266
/* Within SMPs LAPI_Rmw needs target's address. */
267
if(SAMECLUSNODE(proc)) proc=armci_me;
269
if( rc = LAPI_Setcntr(lapi_handle,&req_id,0))
270
armci_die("rmw setcntr failed",rc);
271
if( rc = LAPI_Rmw(lapi_handle, opcode, proc, prem,
272
parg, &ival, &req_id)) armci_die("rmw failed",rc);
273
if( rc = LAPI_Waitcntr(lapi_handle, &req_id, 1, NULL))
274
armci_die("rmw wait failed",rc);
275
* (int *)ploc = ival;
278
case ARMCI_FETCH_AND_ADD:
279
case ARMCI_FETCH_AND_ADD_LONG:
281
case ARMCI_SWAP_LONG:
282
armci_generic_rmw(op, ploc, prem, extra, proc);
285
default: armci_die("rmw: operation not supported",op);