5
/* preliminary implementation on top of portals */
6
/*there are 3 kinds of ARMCI memory: PARMCI_Malloc, PARMCI_Malloc_local, user
7
* allocated memory. For PARMCI_Malloc, we use region specific md that
8
* comes from completion descriptor.
9
* For PARMCI_Malloc_local, we use the MD from the lochdl
10
* For user allocated memory, we use another list of MD's
11
* which binds the user memory. We never keep track of non-armci allocated
46
static cnos_mutex_t *_mutex_array;
50
/*global variables and data structures */
51
armci_portals_proc_t _armci_portals_proc_struct;
52
armci_portals_proc_t *portals = &_armci_portals_proc_struct;
53
comp_desc *_region_compdesc_array[PORTALS_MEM_REGIONS+1];
54
int ptl_initialized = 0;
55
int free_desc_index[PORTALS_MEM_REGIONS+1];
58
ptl_ni_limits_t armci_ptl_nilimits;
60
int armci_init_portals(void)
65
comp_desc *armci_comp_desc;
66
ptl_process_id_t match_id;
67
ARMCI_PR_DBG("enter",0);
69
if (PtlInit(&num_interface) != PTL_OK) {
70
fprintf(stderr, "PtlInit() failed\n");
73
portals->ptl = 37; /* our own ptl number */
74
for(i=0;i<=PORTALS_MEM_REGIONS;i++){
78
rc=PtlNIInit(IFACE_FROM_BRIDGE_AND_NALID(PTL_BRIDGE_UK,PTL_IFACE_SS), PTL_PID_ANY, NULL, &armci_ptl_nilimits, &(portals->ni_h));
84
printf( "PtlNIInit() failed %d error=%s\n",rc,ptl_err_str[rc]);
85
armci_die("NIInit Failed",0);
87
if(DEBUG_INIT || DEBUG_COMM)
88
PtlNIDebug(portals->ni_h,PTL_DEBUG_ALL);
90
PtlGetId(portals->ni_h,&portals->ptl_my_procid);
92
printf("%d:the rank is %d, size is %d\n",armci_me,
93
portals->ptl_my_procid,portals->size);
96
if((rc=PMI_CNOS_Get_nidpid_map(&portals->ptl_pe_procid_map))!=PMI_SUCCESS){
97
printf("Getting proc map failed (npes=%d)\n", armci_nproc);
100
/* Allocate one shared event queue for all operations
103
rc = PtlEQAlloc(portals->ni_h,1024,NULL, &(portals->eq_h));
105
printf("%d:PtlEQAlloc() failed: %s (%d)\n",
106
portals->ptl_my_procid, ptl_err_str[rc], rc);
107
armci_die("EQ Alloc failed",rc);
111
portals->num_match_entries = 0;
114
utcp_lib_out = stdout;
115
utcp_api_out = stdout;
118
/*now prepare for use of local memory*/
119
armci_comp_desc = (comp_desc *)malloc(sizeof(comp_desc)*MAX_OUT);
120
for(i=0; i< MAX_OUT;i++){
122
ptl_handle_md_t *md_h;
123
armci_comp_desc[i].active=0;
124
md_ptr = &armci_comp_desc[i].mem_dsc;
125
md_h = &armci_comp_desc[i].mem_dsc_hndl;
126
md_ptr->eq_handle = portals->eq_h;
128
md_ptr->threshold = 2;/*PTL_MD_THRESH_INF;*/
129
md_ptr->options = PTL_MD_OP_GET | PTL_MD_OP_PUT | PTL_MD_EVENT_START_DISABLE;
131
_region_compdesc_array[PORTALS_MEM_REGIONS]=armci_comp_desc;
132
ARMCI_PR_DBG("exit",0);
138
void armci_fini_portals()
140
ARMCI_PR_DBG("enter",0);
142
printf("ENTERING ARMCI_FINI_PORTALS\n");fflush(stdout);
144
PtlNIFini(portals->ni_h);
147
printf("LEAVING ARMCI_FINI_PORTALS\n");fflush(stdout);
149
ARMCI_PR_DBG("exit",0);
154
void armci_serv_register_req(void *start,long bytes, ARMCI_MEMHDL_T *reg_mem)
159
ptl_match_bits_t *mb;
160
ptl_process_id_t match_id;
161
ptl_handle_md_t *md_h;
162
ptl_match_bits_t ignbits = 0xFFFFFFFFFFFFFFF0;
164
ARMCI_PR_DBG("enter",reg_mem->regid);
166
printf("%d:armci_serv_register_req:size of mem_hndl is %d\n",
167
armci_me,sizeof(region_memhdl_t));
168
printf("\n%d:armci_serv_register_req start=%p bytes=%d\n",
169
armci_me,start,bytes);fflush(stdout);
172
md_ptr = ®_mem->cdesc.mem_dsc;
173
mb = ®_mem->match_bits;
174
md_h = ®_mem->cdesc.mem_dsc_hndl;
177
md_ptr->start = start;
178
md_ptr->length = bytes;
179
md_ptr->threshold = PTL_MD_THRESH_INF;
180
md_ptr->options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE;
181
md_ptr->user_ptr = context;
182
/*eq_hdl is null for the attaches done for a remote proc*/
183
/*md_ptr->eq_handle = portals->eq_h;*/
184
md_ptr->eq_handle = PTL_EQ_NONE;
186
*mb = 0x0000000000000000;
187
*mb = (*mb+reg_mem->regid);
189
match_id.nid = PTL_NID_ANY;
190
match_id.pid = PTL_PID_ANY;
192
rc = PtlMEAttach(portals->ni_h,portals->ptl,match_id,*mb,ignbits,
193
PTL_RETAIN,PTL_INS_AFTER,
194
&(portals->me_h[portals->num_match_entries]));
197
printf("%d:PtlMEAttach: %s\n", portals->ptl_my_procid, ptl_err_str[rc]);
198
armci_die("portals attach error2",rc);
201
rc = PtlMDAttach(portals->me_h[portals->num_match_entries],*md_ptr,PTL_RETAIN,md_h);
204
printf("%d:PtlMDAttach: %s\n", portals->ptl_my_procid, ptl_err_str[rc]);
205
armci_die("portals attach error1",rc);
208
portals->num_match_entries++;
209
ARMCI_PR_DBG("exit",reg_mem->regid);
213
int armci_pin_contig_hndl(void *start,int bytes, ARMCI_MEMHDL_T *reg_mem)
218
ptl_process_id_t match_id;
219
ptl_handle_md_t *md_h;
220
comp_desc *armci_comp_desc;
222
ARMCI_PR_DBG("enter",reg_mem->regid);
223
/*first create comp_desc arr for this region if it is not local*/
224
if(!reg_mem->islocal){
225
armci_comp_desc = (comp_desc *)malloc(sizeof(comp_desc)*MAX_OUT);
226
for(i=0; i< MAX_OUT;i++){
227
armci_comp_desc[i].active=0;
228
md_ptr = &armci_comp_desc[i].mem_dsc;
229
md_h = &armci_comp_desc[i].mem_dsc_hndl;
231
md_ptr->start = start;
232
md_ptr->length = bytes;
233
md_ptr->threshold = 2;/*PTL_MD_THRESH_INF;*/
234
md_ptr->options = PTL_MD_OP_GET | PTL_MD_OP_PUT | PTL_MD_EVENT_START_DISABLE;
235
/*md_ptr->options = PTL_MD_EVENT_START_DISABLE;*/
237
md_ptr->user_ptr = context;
238
md_ptr->eq_handle = portals->eq_h;
241
rc = PtlMDBind(portals->ni_h,*md_ptr, PTL_RETAIN, md_h);
243
printf("%d:PtlMDBind: %s\n", portals->ptl_my_procid, ptl_err_str[rc]);
244
armci_die("ptlmdbind failed",0);
248
_region_compdesc_array[reg_mem->regid]=armci_comp_desc;
249
ARMCI_PR_DBG("exit",0);
253
md_ptr = ®_mem->cdesc.mem_dsc;
254
md_h = ®_mem->cdesc.mem_dsc_hndl;
256
md_ptr->start = start;
257
md_ptr->length = bytes;
258
md_ptr->threshold = 2;/*PTL_MD_THRESH_INF;*/
259
md_ptr->options = PTL_MD_OP_PUT | PTL_MD_EVENT_START_DISABLE;
261
md_ptr->user_ptr = context;
262
md_ptr->eq_handle = portals->eq_h;
265
rc = PtlMDBind(portals->ni_h,*md_ptr, PTL_RETAIN, md_h);
267
printf("%d:PtlMDBind: %s\n", portals->ptl_my_procid, ptl_err_str[rc]);
268
armci_die("ptlmdbind failed",0);
271
ARMCI_PR_DBG("exit",1);
277
int armci_client_complete(ptl_event_kind_t *evt,int proc_id, int nb_tag,
282
ptl_event_t *ev=&ev_t;
283
comp_desc *temp_comp = NULL;
286
ARMCI_PR_DBG("enter",0);
288
printf("\n%d:enter:client_complete active=%d tag=%d %d\n",armci_me,
289
cdesc->active,cdesc->tag,nb_tag);fflush(stdout);
291
while(cdesc->active!=0){
293
if((rc = PtlEQWait(portals->eq_h, ev)) != PTL_OK){
294
printf("%d:PtlEQWait(): %d %s\n", portals->ptl_my_procid,rc,
296
armci_die("EQWait problem",rc);
298
if (ev->ni_fail_type != PTL_NI_OK) {
299
printf("%d:NI sent %d in event.\n",
300
portals->ptl_my_procid, ev->ni_fail_type);
301
armci_die("event failure problem",0);
304
printf("\n%d:armci_client_complete:done waiting type=%d\n",armci_me,
308
if (ev->type == PTL_EVENT_SEND_END){
310
printf("\n%d:armci_client_complete:event send end\n",armci_me);
313
temp_comp = (comp_desc *)ev->md.user_ptr;
314
#ifdef PUT_LOCAL_ONLY_COMPLETION
315
if(temp_comp->type==ARMCI_PORTALS_PUT || temp_comp->type=ARMCI_PORTALS_NBPUT){
326
if (ev->type == PTL_EVENT_REPLY_END){
328
printf("\n%d:client_send_complete:reply end\n",armci_me);
331
temp_comp = (comp_desc *)ev->md.user_ptr;
332
temp_comp->active = 0; /*this was a get request, so we are done*/
336
if (ev->type == PTL_EVENT_ACK){
338
printf("\n%d:client_send_complete:event ack\n",armci_me);
341
temp_comp = (comp_desc *)ev->md.user_ptr;
344
armci_update_fence_array(temp_comp->dest_id,0);
345
portals->outstanding_puts--;
349
printf("\n%d:exit:client_complete active=%d tag=%d %d\n",armci_me,
350
cdesc->active,cdesc->tag,nb_tag);fflush(stdout);
352
ARMCI_PR_DBG("exit",0);
357
comp_desc * get_free_comp_desc(int region_id, int * comp_id)
361
ARMCI_PR_DBG("enter",region_id);
362
c = &(_region_compdesc_array[region_id][free_desc_index[region_id]]);
363
if(c->active!=0 && c->tag>0)armci_client_complete(NULL,c->dest_id,c->tag,c);
364
#ifdef PUT_LOCAL_ONLY_COMPLETION
365
if(region_id<PORTALS_MEM_REGIONS){
367
rc = PtlMDUnlink(*md_hdl_local);
368
}while(rc==PTL_MD_IN_USE);
371
*comp_id = (region_id*PORTALS_MEM_REGIONS+free_desc_index[region_id]);
373
printf("the value of comp_desc_id is %d\n",*comp_id);
376
free_desc_index[region_id] = ((free_desc_index[region_id] + 1) % MAX_OUT);
377
ARMCI_PR_DBG("exit",0);
382
void print_mem_desc(ptl_md_t * md)
384
printf("%d:md : start %p : length %d\n",armci_me,md->start, md->length);
389
void armci_client_direct_get(int proc, void *src_buf, void *dst_buf, int bytes,
390
void** cptr,int tag,ARMCI_MEMHDL_T *lochdl,
391
ARMCI_MEMHDL_T *remhdl)
393
int clus = armci_clus_id(proc);
395
ptl_size_t offset_local = 0, offset_remote=0;
396
ptl_match_bits_t mb = remhdl->regid;
397
ptl_md_t *md_remote,md, *md_local;
399
ptl_handle_md_t *md_hdl_local;
401
ptl_process_id_t dest_proc;
403
int lproc,rproc,user_memory=0;
405
ARMCI_PR_DBG("enter",remhdl->regid);
408
printf("%d:armci_client_direct_get:src_buf %p dstbuf %p loc_hd is %p\n"
409
"rem_hndl is %p, BYTES = %d\n",armci_me,src_buf,dst_buf,
410
lochdl,remhdl,bytes);
414
/*first process information*/
415
dest_proc.nid = portals->ptl_pe_procid_map[proc].nid;
416
dest_proc.pid = portals->ptl_pe_procid_map[proc].pid;
417
md_remote =&remhdl->cdesc.mem_dsc;
419
/*updating md to send*/
420
if(lochdl == NULL){ /*this is user memory (stack/heap/whatever) */
422
cdesc = get_free_comp_desc(PORTALS_MEM_REGIONS,&c_info);
423
md_local = &cdesc->mem_dsc;
424
md_hdl_local = &cdesc->mem_dsc_hndl;
425
md_local->length=bytes;
426
md_local->start=dst_buf;
429
if(lochdl->islocal){ /*PARMCI_Malloc_local memory*/
432
cdesc = get_free_comp_desc(PORTALS_MEM_REGIONS,&c_info);
433
md_local = &cdesc->mem_dsc;
434
md_hdl_local = &cdesc->mem_dsc_hndl;
435
md_local->length=bytes;
436
md_local->start=dst_buf;
438
cdesc=&lochdl->cdesc;
439
md_local = &lochdl->cdesc.mem_dsc;
440
md_hdl_local = &lochdl->cdesc.mem_dsc_hndl;
444
/*we need to pass region id to get corresponding md*/
445
cdesc = get_free_comp_desc(lochdl->regid,&c_info);
446
md_local = &cdesc->mem_dsc;
447
md_hdl_local = &cdesc->mem_dsc_hndl;
451
/*compute the local and remote offsets*/
452
offset_local = (char*)dst_buf - (char *)md_local->start;
453
offset_remote = (char*)src_buf - (char *)md_remote->start;
455
printf("\n%d:offr=%d offl=%d %p %p\n",armci_me,offset_remote,offset_local,md_local->start,md_remote->start);
457
/*printf("\n%d:get offr=%d ptrr=%p offl=%d ptrl=%p\n",armci_me,offset_remote,md_remote->start,offset_local,md_local->start);fflush(stdout);*/
459
if(tag) *((comp_desc **)cptr) = cdesc;
460
/*if(tag){printf("\n%d:get tag=%d c_info=%d %p",armci_me,tag,c_info,cdesc);fflush(stdout);}*/
463
cdesc->dest_id = proc;
464
cdesc->type = ARMCI_PORTALS_NBGET;
468
cdesc->dest_id = proc;
469
cdesc->type = ARMCI_PORTALS_GET;
472
md_local->user_ptr = (void *)cdesc;
473
md_local->options = PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE;
477
rc = PtlMDUpdate(*md_hdl_local,NULL,md_local,portals->eq_h);
478
printf("\n%d:trying to update\n",armci_me);fflush(stdout);
479
} while (rc == PTL_MD_NO_UPDATE);
481
printf("%d:PtlMDUpdate: %s\n", portals->rank, ptl_err_str[rc]);
482
armci_die("ptlmdbind failed",0);
487
rc = PtlMDBind(portals->ni_h,*md_local, PTL_UNLINK, md_hdl_local);
489
fprintf(stderr, "%d:PtlMDBind: %s\n", portals->rank, ptl_err_str[rc]);
490
armci_die("ptlmdbind failed",0);
496
rc = PtlGetRegion(*md_hdl_local,offset_local,bytes,dest_proc,
502
printf("%d:PtlGetRegion: %s\n", portals->rank,ptl_err_str[rc]);
503
armci_die("PtlGetRegion failed",0);
507
printf("\n%d:issued get to %d %d\n",armci_me,proc,c_info);fflush(stdout);
511
armci_client_complete(NULL,proc,0,cdesc); /* check this later */
513
/*printf("\n%d:issued get to %d %d\n",armci_me,proc,c_info);fflush(stdout);*/
514
ARMCI_PR_DBG("exit",remhdl->regid);
518
void armci_client_nb_get(int proc, void *src_buf, int *src_stride_arr,
519
void *dst_buf, int *dst_stride_arr, int bytes,
520
void** cptr,int tag,ARMCI_MEMHDL_T *lochdl,
521
ARMCI_MEMHDL_T *remhdl)
526
int armci_client_direct_send(int proc,void *src, void* dst, int bytes,
527
void **cptr, int tag, ARMCI_MEMHDL_T *lochdl,
528
ARMCI_MEMHDL_T *remhdl )
530
int clus = armci_clus_id(proc);
532
ptl_size_t offset_local = 0, offset_remote = 0;
533
ptl_match_bits_t mb = remhdl->regid;
534
ptl_md_t *md_remote,md, *md_local;
536
ptl_match_bits_t * mb_ptr;
537
ptl_handle_md_t *md_hdl_local;
539
ptl_process_id_t dest_proc;
541
int lproc,rproc,user_memory=0;
543
ARMCI_PR_DBG("enter",remhdl->regid);
544
dest_proc.nid = portals->ptl_pe_procid_map[proc].nid;
545
dest_proc.pid = portals->ptl_pe_procid_map[proc].pid;
546
md_remote =&remhdl->cdesc.mem_dsc;
548
if(lochdl == NULL){ /*this is user memory*/
550
cdesc = get_free_comp_desc(PORTALS_MEM_REGIONS,&c_info);
551
md_local = &cdesc->mem_dsc;
552
md_hdl_local = &cdesc->mem_dsc_hndl;
553
md_local->length=bytes;
557
if(lochdl->islocal){ /*PARMCI_Malloc_local memory*/
560
cdesc = get_free_comp_desc(PORTALS_MEM_REGIONS,&c_info);
561
md_local = &cdesc->mem_dsc;
562
md_hdl_local = &cdesc->mem_dsc_hndl;
563
md_local->length=bytes;
566
cdesc=&lochdl->cdesc;
567
md_local = &lochdl->cdesc.mem_dsc;
568
md_hdl_local = &lochdl->cdesc.mem_dsc_hndl;
572
/*we need to pass region id to get corresponding md*/
573
cdesc = get_free_comp_desc(lochdl->regid,&c_info);
574
md_local = &cdesc->mem_dsc;
575
md_hdl_local = &cdesc->mem_dsc_hndl;
580
offset_local = (char *)src - (char *)md_local->start;
581
offset_remote =(char *)dst - (char *)md_remote->start;
583
printf("\n%d:offr=%d offl=%d\n",armci_me,offset_remote,offset_local);
585
/*printf("\n%d:offr=%d ptrr=%p offl=%d ptrl=%p\n",armci_me,offset_remote,md_remote->start,offset_local,md_local->start);fflush(stdout);*/
587
if(tag) *((comp_desc **)cptr) = cdesc; /*TOED*/
589
if(tag){printf("\n%d:put tag=%d c_info=%d %p",armci_me,tag,c_info,cdesc);fflush(stdout);}
593
cdesc->dest_id = proc;
594
cdesc->type = ARMCI_PORTALS_NBPUT;
598
cdesc->dest_id = proc;
599
cdesc->type = ARMCI_PORTALS_PUT;
601
#ifdef PUT_LOCAL_COMPLETION_ONLY
607
md_local->user_ptr = (void *)cdesc;
608
md_local->options = PTL_MD_OP_PUT | PTL_MD_EVENT_START_DISABLE;
612
rc = PtlMDUpdate(*md_hdl_local,NULL,md_local,portals->eq_h);
613
} while (rc == PTL_MD_NO_UPDATE);
615
printf("%d:PtlMDUpdate: %s\n", portals->rank, ptl_err_str[rc]);
616
armci_die("ptlmdupdate failed",0);
621
rc = PtlMDBind(portals->ni_h,*md_local, PTL_UNLINK, md_hdl_local);
623
fprintf(stderr, "%d:PtlMDBind: %s\n", portals->rank, ptl_err_str[rc]);
624
armci_die("ptlmdbind failed",0);
630
rc = PtlPutRegion(*md_hdl_local,offset_local,bytes,
631
#ifdef PUT_LOCAL_COMPLETION_ONLY
638
0, mb,offset_remote, 0);
640
fprintf(stderr, "%d:PtlPutRegion: %s\n", portals->rank,ptl_err_str[rc]);
641
armci_die("PtlPutRegion failed",0);
644
printf("\n%d:issued put to %d\n",armci_me,proc);fflush(stdout);
647
armci_update_fence_array(proc, 1);
649
armci_client_complete(NULL,proc,0,cdesc); /* check this later */
652
portals->outstanding_puts++;
653
ARMCI_PR_DBG("exit",remhdl->regid);
657
void armci_client_nb_send(int proc, void *src_buf, int *src_stride_arr,
658
void *dst_buf, int *dst_stride_arr, int bytes,
659
void** cptr,int tag,ARMCI_MEMHDL_T *lochdl,
660
ARMCI_MEMHDL_T *remhdl)
664
/*using non-blocking for multiple 1ds inside a 2d*/
665
void armci_network_strided(int op, void* scale, int proc,void *src_ptr,
666
int src_stride_arr[], void* dst_ptr, int dst_stride_arr[],
667
int count[], int stride_levels, armci_ihdl_t nb_handle)
670
long idxs,idxd; /* index offset of current block position to ptr */
671
int n1dim; /* number of 1 dim block */
672
int bvalues[MAX_STRIDE_LEVEL], bunit[MAX_STRIDE_LEVEL];
673
int bvalued[MAX_STRIDE_LEVEL];
674
int bytes = count[0];
677
ARMCI_MEMHDL_T *loc_memhdl=NULL,*rem_memhdl=NULL;
679
int armci_region_both_found_hndl(void *loc, void *rem, int size, int node,
680
ARMCI_MEMHDL_T **loc_memhdl,ARMCI_MEMHDL_T **rem_memhdl);
681
if(nb_handle)tag=nb_handle->tag;
690
armci_region_both_found_hndl(loc,rem,bytes,armci_clus_id(proc),
691
&loc_memhdl,&rem_memhdl);
692
/* number of n-element of the first dimension */
694
for(i=1; i<=stride_levels; i++)
696
/* calculate the destination indices */
697
bvalues[0] = 0; bvalues[1] = 0; bunit[0] = 1;
698
bvalued[0] = 0; bvalued[1] = 0; bunit[1] = 1;
699
for(i=2; i<=stride_levels; i++) {
700
bvalues[i] = bvalued[i] = 0;
701
bunit[i] = bunit[i-1] * count[i-1];
705
printf("\nSHOULD NOT DO NETWORK_STRIDED FOR ACCS \n",armci_me);
707
armci_die("network_strided called for acc",proc);
709
for(i=0; i<n1dim; i++) {
710
tag = GET_NEXT_NBTAG();
713
for(j=1; j<=stride_levels; j++) {
714
idxs += bvalues[j] * src_stride_arr[j-1];
715
idxd += bvalued[j] * dst_stride_arr[j-1];
716
if((i+1) % bunit[j] == 0) {bvalues[j]++;bvalued[j]++;}
717
if(bvalues[j] > (count[j]-1)) bvalues[j] = 0;
718
if(bvalued[j] > (count[j]-1)) bvalued[j] = 0;
721
sptr = ((char *)src_ptr)+idxs;
722
dptr = ((char *)dst_ptr)+idxd;
724
armci_client_direct_get(proc,sptr,dptr,bytes,&cptr,tag,loc_memhdl,
728
armci_client_direct_send(proc,sptr,dptr,bytes,&cptr,tag,loc_memhdl,
731
else if(ARMCI_ACC(op)){
732
armci_client_direct_get(proc,sptr,dptr,bytes,&cptr,tag,loc_memhdl,
735
armci_client_direct_send(proc,sptr,dptr,bytes,&cptr,tag,loc_memhdl,
739
armci_die("in network_strided unknown opcode",op);
746
nb_handle->cmpl_info=cptr;
749
armci_client_complete(NULL,proc,tag,cptr); /* check this later */
752
#else /*using blocking for multiple 1ds inside a 2d*/
753
void armci_network_strided(int op, void* scale, int proc,void *src_ptr,
754
int src_stride_arr[], void* dst_ptr, int dst_stride_arr[],
755
int count[], int stride_levels, armci_ihdl_t nb_handle)
758
long idxs,idxd; /* index offset of current block position to ptr */
759
int n1dim; /* number of 1 dim block */
760
int bvalues[MAX_STRIDE_LEVEL], bunit[MAX_STRIDE_LEVEL];
761
int bvalued[MAX_STRIDE_LEVEL];
762
int bytes = count[0];
766
ARMCI_MEMHDL_T *loc_memhdl=NULL,*rem_memhdl=NULL;
767
int armci_region_both_found_hndl(void *loc, void *rem, int size, int node,
768
ARMCI_MEMHDL_T **loc_memhdl,ARMCI_MEMHDL_T **rem_memhdl);
779
armci_region_both_found_hndl(loc,rem,count[0],armci_clus_id(proc),
780
&loc_memhdl,&rem_memhdl);
782
/* number of n-element of the first dimension */
784
for(i=1; i<=stride_levels; i++)
787
/* calculate the destination indices */
788
bvalues[0] = 0; bvalues[1] = 0; bunit[0] = 1;
789
bvalued[0] = 0; bvalued[1] = 0; bunit[1] = 1;
790
for(i=2; i<=stride_levels; i++) {
791
bvalues[i] = bvalued[i] = 0;
792
bunit[i] = bunit[i-1] * count[i-1];
795
for(i=0; i<n1dim; i++) {
798
for(j=1; j<=stride_levels; j++) {
799
idxs += bvalues[j] * src_stride_arr[j-1];
800
idxd += bvalued[j] * dst_stride_arr[j-1];
801
if((i+1) % bunit[j] == 0) {bvalues[j]++;bvalued[j]++;}
802
if(bvalues[j] > (count[j]-1)) bvalues[j] = 0;
803
if(bvalued[j] > (count[j]-1)) bvalued[j] = 0;
806
sptr = ((char *)src_ptr)+idxs;
807
dptr = ((char *)dst_ptr)+idxd;
808
if((i<(n1dim-1)) || nb_handle==NULL){
810
PARMCI_Get(sptr,dptr,bytes,proc);
812
PARMCI_Put(sptr,dptr,bytes,proc);
813
else if(ARMCI_ACC(op))
814
PARMCI_AccS(op,scale,sptr,NULL,dptr,NULL,count,1,proc);
816
armci_die("in network_strided unknown opcode",op);
821
PARMCI_NbGet(sptr,dptr,bytes,proc,(armci_hdl_t *)nb_handle);
823
PARMCI_NbPut(sptr,dptr,bytes,proc,(armci_hdl_t *)nb_handle);
824
else if(ARMCI_ACC(op))
825
PARMCI_NbAccS(op,scale,sptr,NULL,dptr,NULL,count,1,proc,(armci_hdl_t *)nb_handle);
827
armci_die("in network_strided unknown opcode",op);
832
int armci_client_direct_getput(int proc,void *getinto, void *putfrom, void* dst,
833
int bytes, void **cptr, int tag, ARMCI_MEMHDL_T *lochdl,
834
ARMCI_MEMHDL_T *remhdl )
837
int clus = armci_clus_id(proc);
839
ptl_size_t offset_get = 0, offset_put=0, offset_remote = 0;
840
ptl_match_bits_t mb = 100;
841
ptl_md_t *md_remote,md, *md_local_put, *md_local_get;
843
ptl_match_bits_t * mb_ptr;
844
ptl_handle_md_t *md_hdl_local_put,*md_hdl_local_get;
846
ptl_process_id_t dest_proc;
849
printf("\n%d:****************getput*********\n",armci_me);
850
dest_proc.nid = portals->ptl_pe_procid_map[proc].nid;
851
dest_proc.pid = portals->ptl_pe_procid_map[proc].pid;
852
md_remote =&remhdl->cdesc.mem_dsc;
854
cdesc = get_free_comp_desc(PORTALS_MEM_REGIONS,&c_info);
855
md_local_get = &cdesc->mem_dsc;
856
md_hdl_local_get = &cdesc->mem_dsc_hndl;
857
md_local_get->length=bytes;
858
md_local_get->start=getinto;
859
offset_get = (char *)getinto - (char *)md_local_get->start;
860
offset_remote =(char *)dst - (char *)md_remote->start;
862
cdesc->dest_id = proc;
863
cdesc->type = ARMCI_PORTALS_PUT;
865
md_local_get->user_ptr = (void *)cdesc;
866
md_local_get->options = PTL_MD_OP_GET | PTL_MD_OP_PUT | PTL_MD_EVENT_START_DISABLE | PTL_MD_EVENT_END_DISABLE;
867
rc = PtlMDBind(portals->ni_h,*md_local_get, PTL_UNLINK, md_hdl_local_get);
869
fprintf(stderr, "%d:PtlMDBind: %s\n", portals->rank, ptl_err_str[rc]);
870
armci_die("ptlmdbind failed",0);
873
cdesc = get_free_comp_desc(PORTALS_MEM_REGIONS,&c_info);
874
md_local_put = &cdesc->mem_dsc;
875
md_hdl_local_put = &cdesc->mem_dsc_hndl;
876
md_local_put->length=bytes;
877
md_local_put->start=putfrom;
878
offset_put = (char *)putfrom - (char *)md_local_put->start;
880
cdesc->dest_id = proc;
881
cdesc->type = ARMCI_PORTALS_GET;
883
md_local_put->user_ptr = (void *)cdesc;
884
md_local_put->options = PTL_MD_OP_PUT | PTL_MD_EVENT_START_DISABLE;
885
rc = PtlMDBind(portals->ni_h,*md_local_put, PTL_UNLINK, md_hdl_local_put);
887
fprintf(stderr, "%d:PtlMDBind: %s\n", portals->rank, ptl_err_str[rc]);
888
armci_die("ptlmdbind failed",0);
891
rc = PtlGetPutRegion(*md_hdl_local_get,offset_get,*md_hdl_local_put,
892
offset_put,bytes,dest_proc, portals->ptl,0,mb,offset_remote,
895
printf("%d:PtlPutRegion: %s\n", portals->rank,ptl_err_str[rc]);
896
armci_die("PtlPutRegion failed",0);
899
printf("\n%d:issued getput to %d\n",armci_me,proc);fflush(stdout);
902
armci_client_complete(NULL,proc,0,cdesc); /* check this later */
906
void armci_network_client_deregister_memory(ARMCI_MEMHDL_T *mh)
911
void armci_network_server_deregister_memory(ARMCI_MEMHDL_T *mh)
916
static int num_locks=0;
917
static long **all_locks;
918
#define ARMCI_PORTALS_MAX_LOCKS 16
920
ptl_handle_md_t mem_dsc_h;
921
ptl_handle_me_t me_lock_h;
922
region_memhdl_t armci_portal_lock_memhdl;
924
armci_lock_struct armci_portals_lock_st;
925
void armcill_allocate_locks(int num)
928
ptl_match_bits_t *mb;
929
ptl_process_id_t match_id;
930
ptl_handle_md_t *md_h;
935
armci_lock_struct *armci_portals_lock=&armci_portals_lock_st;
939
printf("%d:armci_allocate_locks num=%d\n",
942
if(MAX_LOCKS<num)armci_die2("too many locks",ARMCI_PORTALS_MAX_LOCKS,num);
944
/* allocate memory to hold lock info for all the processors */
945
all_locks = malloc(armci_nproc*sizeof(long *));
946
if(!all_locks) armci_die("armcill_init_locks: malloc failed",0);
947
bzero(all_locks,armci_nproc*sizeof(long));
948
/* initialize local locks */
949
my_locks = malloc(num*sizeof(long));
950
if(!my_locks) armci_die("armcill_init_locks: malloc failed",0);
951
bzero(all_locks,armci_nproc*sizeof(long));
953
all_locks[armci_me]=my_locks;
954
/* now we use all-reduce to exchange locks info among everybody */
956
armci_exchange_address((void **)all_locks,elems);
958
rc = PtlACEntry(portals->ni_h, ace_any,
959
(ptl_process_id_t){PTL_NID_ANY, PTL_PID_ANY},
962
printf("%d: PtlACEntry() failed: %s\n",
963
armci_me, ptl_err_str[rc]);
964
armci_die("PtlACEntry failed",0);
967
armci_portals_lock->armci_portal_lock_memhdl.cdesc.mem_dsc.start =&my_locks;
968
armci_portals_lock->armci_portal_lock_memhdl.cdesc.mem_dsc.length =
970
armci_portals_lock->armci_portal_lock_memhdl.cdesc.mem_dsc.threshold =
972
armci_portals_lock->armci_portal_lock_memhdl.cdesc.mem_dsc.options =
973
PTL_MD_OP_PUT | PTL_MD_OP_GET |
974
PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE |
975
PTL_MD_EVENT_START_DISABLE;
976
armci_portals_lock->armci_portal_lock_memhdl.cdesc.mem_dsc.max_size = 0;
977
armci_portals_lock->armci_portal_lock_memhdl.cdesc.mem_dsc.user_ptr = NULL;
978
armci_portals_lock->armci_portal_lock_memhdl.cdesc.mem_dsc.eq_handle =
980
match_id.nid = PTL_NID_ANY;
981
match_id.pid = PTL_PID_ANY;
983
/* Lockmaster needs a match entry for clients to access lock value.
985
rc = PtlMEAttach(portals->ni_h, portals->ptl,
986
match_id, /* source address */
987
100, /* expected match bits */
988
0, /* ignore bits to mask */
989
PTL_RETAIN, /* unlink when md is unlinked */
991
&armci_portals_lock->me_lock_h);
993
printf("%d: PtlMEAttach(): %s\n",
994
armci_me, ptl_err_str[rc]);
995
armci_die("PtlMEAttach in init_locks failed",0);
997
rc = PtlMDAttach(armci_portals_lock->me_lock_h,
998
armci_portals_lock->armci_portal_lock_memhdl.cdesc.mem_dsc,
1000
&armci_portals_lock->mem_dsc_h);
1002
printf("%d: PtlMDAttach(): %s\n",
1003
armci_me, ptl_err_str[rc]);
1004
armci_die("PtlMDAttach in init_locks failed",0);
1008
void armcill_lock(int mutex, int proc)
1010
long getinto=0,putfrom=1;
1011
armci_lock_struct *armci_portal_lock=&armci_portals_lock_st;
1012
region_memhdl_t *rem_lock_hdl=&armci_portal_lock->armci_portal_lock_memhdl;
1013
printf("\n%d:in lock before\n",armci_me);fflush(stdout);
1015
armci_client_direct_getput(proc,&getinto,&putfrom,(all_locks[proc]+mutex),
1016
sizeof(long), NULL, 0, NULL,rem_lock_hdl);
1018
printf("\n%d:in lock after\n",armci_me);fflush(stdout);
1022
/*\ unlock specified mutex on node where process proc is running
1024
void armcill_unlock(int mutex, int proc)
1026
long getinto=0,putfrom=0;
1027
armci_lock_struct *armci_portal_lock=&armci_portals_lock_st;
1028
region_memhdl_t *rem_lock_hdl=&armci_portal_lock->armci_portal_lock_memhdl;
1029
armci_client_direct_getput(proc,&getinto,&putfrom,(all_locks[proc]+mutex),
1030
sizeof(long), NULL, 0, NULL,rem_lock_hdl);
1033
int armci_portals_rmw_(int op, int *ploc, int *prem, int extra, int proc)
1039
void armci_portals_shmalloc_allocate_mem(int num_lks)
1043
armci_size_t bytes = 128;
1046
ptr_arr = (void**)malloc(armci_nproc*sizeof(void*));
1047
if(!ptr_arr) armci_die("armci_shmalloc_get_offsets: malloc failed", 0);
1048
bzero((char*)ptr_arr,armci_nproc*sizeof(void*));
1050
PARMCI_Malloc(ptr_arr,bytes);