5
/* $Id: memory.c,v 1.56.2.3 2007-04-25 23:49:55 d3p687 Exp $ */
11
#include "kr_malloc.h"
18
static context_t ctx_localmem;
20
static context_t ctx_mlocalmem;
22
#if defined(SYSV) || defined(WIN32) || defined(MMAP) || defined(HITACHI)
25
#if !defined(USE_SHMEM) && (defined(HITACHI) || defined(MULTI_CTX))
29
#if !(defined(LAPI)||defined(QUADRICS)||defined(SERVER_THREAD)) ||\
31
#define RMA_NEEDS_SHMEM
37
kr_malloc_print_stats(&ctx_localmem);
39
kr_malloc_verify(&ctx_localmem);
42
void armci_print_ptr(void **ptr_arr, int bytes, int size, void* myptr, int off)
45
int nproc = armci_clus_info[armci_clus_me].nslave;
47
ARMCI_PR_DBG("enter",0);
48
for(i=0; i< armci_nproc; i++){
51
printf("%d master =%d nproc=%d off=%d\n",armci_me,
52
armci_master,nproc, off);
53
printf("%d:bytes=%d mptr=%p s=%d ",armci_me, bytes, myptr,size);
54
for(j = 0; j< armci_nproc; j++)printf(" %p",ptr_arr[j]);
55
printf("\n"); fflush(stdout);
59
ARMCI_PR_DBG("exit",0);
63
/*\ master exports its address of shmem region at the beggining of that region
65
static void armci_master_exp_attached_ptr(void* ptr)
67
ARMCI_PR_DBG("enter",0);
68
if(!ptr) armci_die("armci_master_exp_att_ptr: null ptr",0);
69
*(volatile void**)ptr = ptr;
70
ARMCI_PR_DBG("exit",0);
74
/*\ Collective Memory Allocation on shared memory systems
76
void armci_shmem_malloc(void *ptr_arr[], armci_size_t bytes)
78
void *myptr=NULL, *ptr=NULL;
79
long idlist[SHMIDLEN];
80
long size=0, offset=0;
84
int nproc = armci_clus_info[armci_clus_me].nslave;
85
ARMCI_PR_DBG("enter",0);
86
bzero((char*)ptr_arr,armci_nproc*sizeof(void*));
88
/* allocate work arrays */
89
size_arr = (long*)calloc(armci_nproc,sizeof(long));
90
if(!size_arr)armci_die("armci_malloc:calloc failed",armci_nproc);
91
/* allocate arrays for cluster address translations */
93
ptr_ref_arr = calloc(armci_nclus,sizeof(void*)); /* must be zero */
94
if(!ptr_ref_arr)armci_die("armci_malloc:calloc 2 failed",armci_nclus);
96
/* combine all memory requests into size_arr */
97
size_arr[armci_me] = bytes;
98
armci_msg_lgop(size_arr, armci_nproc, "+");
100
/* determine aggregate request size on the cluster node */
101
for(i=0, size=0; i< nproc; i++) size += size_arr[i+armci_master];
103
/* master process creates shmem region and then others attach to it */
104
if(armci_me == armci_master ){
105
/* can malloc if there is no data server process and has 1 process/node*/
106
# ifndef RMA_NEEDS_SHMEM
108
myptr = kr_malloc(size, &ctx_localmem, 0, NULL, NULL);
111
myptr = Create_Shared_Region(idlist+1,size,idlist);
112
if(!myptr && size>0 )armci_die("armci_malloc: could not create", (int)(size>>10));
114
/* place its address at begining of attached region for others to see */
115
if(size)armci_master_exp_attached_ptr(myptr);
118
printf("%d:armci_malloc addr mptr=%p size=%ld\n",armci_me,myptr,size);
123
/* broadcast shmem id to other processes on the same cluster node */
124
armci_msg_clus_brdcst(idlist, SHMIDLEN*sizeof(long));
126
if(armci_me != armci_master){
127
myptr=(double*)Attach_Shared_Region(idlist+1,size,idlist[0]);
128
if(!myptr)armci_die("armci_malloc: could not attach", (int)(size>>10));
130
/* now every process in a SMP node needs to find out its offset
131
* w.r.t. master - this offset is necessary to use memlock table
133
if(size) armci_set_mem_offset(myptr);
135
printf("%d:armci_malloc attached addr mptr=%p ref=%p size=%ld\n",
136
armci_me,myptr, *(void**)myptr,size); fflush(stdout);
139
# if defined(DATA_SERVER)
140
/* get server reference address for every cluster node to perform
141
* remote address translation for global address space */
143
if(armci_me == armci_master){
144
# ifdef SERVER_THREAD
145
ptr_ref_arr[armci_clus_me]=myptr;
148
extern int _armci_server_started;
149
if(_armci_server_started) {
150
armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
151
&ptr, sizeof(void*));
152
ptr_ref_arr[armci_clus_me]= myptr; /* from server*/
154
else /* server not yet started */
155
ptr_ref_arr[armci_clus_me]=myptr;
159
printf("%d:addresses server=%p myptr=%p\n",armci_me,ptr,myptr);
164
/* exchange ref addr of shared memory region on every cluster node*/
165
armci_exchange_address(ptr_ref_arr, armci_nclus);
166
# ifdef ARMCI_REGISTER_SHMEM
167
armci_register_shmem(myptr,size,idlist+1,idlist[0],ptr_ref_arr[armci_clus_me]);
170
ptr_ref_arr[armci_master] = myptr;
172
/* translate addresses for all cluster nodes */
173
for(cn = 0; cn < armci_nclus; cn++){
174
int master = armci_clus_info[cn].master;
176
/* on local cluster node use myptr directly */
177
ptr = (armci_clus_me == cn) ? myptr: ptr_ref_arr[cn];
178
/* compute addresses pointing to the memory regions on cluster node*/
179
for(i=0; i< armci_clus_info[cn].nslave; i++){
180
/* NULL if request size is 0*/
181
ptr_arr[i+master] = (size_arr[i+master])? ((char*)ptr)+offset : NULL;
182
offset += size_arr[i+master];
186
/* compute addresses for local cluster node */
188
for(i=0; i< nproc; i++) {
189
ptr_ref_arr[i] = (size_arr[i+armci_master])? ((char*)myptr)+offset : 0L;
190
offset += size_arr[i+armci_master];
192
/* exchange addreses with all other processes */
193
ptr_arr[armci_me] = (char*)ptr_ref_arr[armci_me-armci_master];
194
armci_exchange_address(ptr_arr, armci_nproc);
195
/* overwrite entries for local cluster node with ptr_ref_arr */
196
bcopy((char*)ptr_ref_arr, (char*)(ptr_arr+armci_master), nproc*sizeof(void*));
197
/* armci_print_ptr(ptr_arr, bytes, size, myptr, offset);*/
202
/* free work arrays */
205
ARMCI_PR_DBG("exit",0);
209
/********************************************************************
210
* Non-collective Memory Allocation on shared memory systems
212
void armci_shmem_memget(armci_meminfo_t *meminfo, size_t size) {
214
void *armci_ptr=NULL; /* legal ARCMIptr used in ARMCI data xfer ops */
215
long idlist[SHMIDLEN];
217
/* can malloc if there is no data server process & has 1 process/node*/
218
#ifndef RMA_NEEDS_SHMEM
219
if( armci_clus_info[armci_clus_me].nslave == 1)
220
myptr = kr_malloc(size, &ctx_localmem, 0, NULL, NULL);
223
myptr = Create_Shared_Region(idlist+1,size,idlist);
225
if(!myptr && size>0 )
226
armci_die("armci_shmem_memget: create failed", (int)(size>>10));
230
printf("%d: armci_shmem_memget: addr=%p size=%ld %ld %ld \n", armci_me,
231
myptr, size, idlist[0], idlist[1]);
237
#if defined(DATA_SERVER)
239
/* get server reference address to perform
240
* remote address translation for global address space */
243
# ifdef SERVER_THREAD
245
/* data server thread runs on master process */
246
if(armci_me != armci_master) {
247
armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
248
&armci_ptr, sizeof(void*));
252
/* ask dataserver process to attach to region and get ptr*/
254
extern int _armci_server_started;
255
if(_armci_server_started) {
256
armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
257
&armci_ptr, sizeof(void*));
264
/* fill the meminfo structure */
265
meminfo->armci_addr = armci_ptr;
266
meminfo->addr = myptr;
267
meminfo->size = size;
268
meminfo->cpid = armci_me;
269
bcopy(idlist, meminfo->idlist, SHMIDLEN*sizeof(long));
273
void* armci_shmem_memat(armci_meminfo_t *meminfo) {
275
long size = (long) meminfo->size;
276
long *idlist = (long*) meminfo->idlist;
278
if(SAMECLUSNODE(meminfo->cpid))
280
/* Attach to the shared memory segment */
281
ptr=(double*)Attach_Shared_Region(idlist+1,size,idlist[0]);
282
if(!ptr)armci_die("ARMCi_Memat: could not attach", (int)(size>>10));
284
/* CHECK: now every process in a SMP node needs to find out its offset
285
* w.r.t. master - this offset is necessary to use memlock table
287
if(size) armci_set_mem_offset(ptr);
291
ptr = meminfo->armci_addr; /* remote address */
297
void armci_shmem_memctl(armci_meminfo_t *meminfo) {
299
/* only the creator can delete the segment */
300
if(meminfo->cpid == armci_me) {
301
void *ptr = meminfo->addr;
303
#ifdef RMA_NEEDS_SHMEM
304
Free_Shmem_Ptr(0,0,ptr);
306
if(armci_clus_info[armci_clus_me].nslave>1)
307
Free_Shmem_Ptr(0,0,ptr);
308
else kr_free(ptr, &ctx_localmem);
313
/****** End: Non-collective memory allocation on shared memory systems *****/
316
/********************************************************************
317
* Group Memory Allocation on shared memory systems for ARMCI Groups
319
void armci_shmem_malloc_group(void *ptr_arr[], armci_size_t bytes,
322
void *myptr=NULL, *ptr=NULL;
323
long idlist[SHMIDLEN];
324
long size=0, offset=0;
328
/* int nproc = armci_clus_info[armci_clus_me].nslave; ? change ? */
329
int grp_me, grp_nproc, grp_nclus, grp_master, grp_clus_nproc, grp_clus_me;
330
armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);
331
ARMCI_PR_DBG("enter",0);
333
/* Get the group info: group size & group rank */
334
ARMCI_Group_size(group, &grp_nproc);
335
ARMCI_Group_rank(group, &grp_me);
336
if(grp_me == MPI_UNDEFINED) { /* check if the process is in this group */
337
armci_die("armci_malloc_group: process is not a member in this group",
341
grp_nclus = grp_attr->grp_nclus;
342
grp_clus_me = grp_attr->grp_clus_me;
343
grp_master = grp_attr->grp_clus_info[grp_clus_me].master;
344
grp_clus_nproc = grp_attr->grp_clus_info[grp_clus_me].nslave;
346
bzero((char*)ptr_arr,grp_nproc*sizeof(void*));
348
/* allocate work arrays */
349
size_arr = (long*)calloc(grp_nproc,sizeof(long));
350
if(!size_arr)armci_die("armci_malloc_group:calloc failed",grp_nproc);
352
/* allocate arrays for cluster address translations */
353
# if defined(DATA_SERVER)
356
len = grp_clus_nproc;
359
ptr_ref_arr = calloc(len,sizeof(void*)); /* must be zero */
360
if(!ptr_ref_arr)armci_die("armci_malloc_group:calloc 2 failed",len);
362
/* combine all memory requests into size_arr */
363
size_arr[grp_me] = bytes;
364
armci_msg_group_gop_scope(SCOPE_ALL, size_arr, grp_nproc, "+", ARMCI_LONG,
367
/* determine aggregate request size on the cluster node */
368
for(i=0, size=0; i< grp_clus_nproc; i++) size += size_arr[i+grp_master];
370
/* master process creates shmem region and then others attach to it */
371
if(grp_me == grp_master ){
374
/* can malloc if there is no data server process and has 1 process/node*/
375
# ifndef RMA_NEEDS_SHMEM
376
if( armci_clus_info[armci_clus_me].nslave == 1)
377
myptr = kr_malloc(size, &ctx_localmem, 0, NULL, NULL);
380
myptr = Create_Shared_Region(idlist+1,size,idlist);
381
if(!myptr && size>0 )
382
armci_die("armci_malloc_group: could not create", (int)(size>>10));
384
/* place its address at begining of attached region for others to see */
385
if(size)armci_master_exp_attached_ptr(myptr);
388
printf("%d:armci_malloc_group addr mptr=%p ref=%p size=%ld %ld %ld \n",armci_me,myptr,*(void**)myptr, size,idlist[0],idlist[1]);
393
/* broadcast shmem id to other processes (in the same group) on the
395
armci_grp_clus_brdcst(idlist, SHMIDLEN*sizeof(long), grp_master,
396
grp_clus_nproc, group);
398
if(grp_me != grp_master){
399
myptr=(double*)Attach_Shared_Region(idlist+1,size,idlist[0]);
400
if(!myptr)armci_die("armci_malloc_group: could not attach", (int)(size>>10));
402
/* now every process in a SMP node needs to find out its offset
403
* w.r.t. master - this offset is necessary to use memlock table
405
if(size) armci_set_mem_offset(myptr);
407
printf("%d:armci_malloc_group attached addr mptr=%p ref=%p size=%ld\n",
408
armci_me,myptr, *(void**)myptr,size); fflush(stdout);
412
# if defined(DATA_SERVER)
414
/* get server reference address for every cluster node in the group
415
* to perform remote address translation for global address space */
417
if(grp_me == grp_master){
419
# ifdef SERVER_THREAD
421
/* data server thread runs on master process */
422
if(ARMCI_Absolute_id(group,grp_master)!=armci_master){
423
/*printf("\n%d: grp_master=%d %ld %ld \n",armci_me,ARMCI_Absolute_id(group,grp_master),idlist[0],idlist[1]);*/
424
armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
425
&ptr, sizeof(void*));
426
ptr_ref_arr[grp_clus_me]= ptr; /* from server*/
429
ptr_ref_arr[grp_clus_me]=myptr;
432
/* ask data server process to attach to the region and get ptr */
434
extern int _armci_server_started;
435
if(_armci_server_started) {
436
armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
437
&ptr, sizeof(void*));
438
ptr_ref_arr[grp_clus_me]= ptr; /* from server*/
440
else /* server not yet started */
441
ptr_ref_arr[grp_clus_me]=myptr;
445
printf("%d:addresses server=%p myptr=%p\n",grp_me,ptr,myptr);
450
/* exchange ref addr of shared memory region on every cluster node*/
452
int ratio = sizeof(void*)/sizeof(int);
453
if(DEBUG_)printf("%d: exchanging %ld ratio=%d\n",armci_me,
454
(long)ptr_arr[grp_me], ratio);
455
armci_msg_group_gop_scope(SCOPE_ALL, ptr_ref_arr, grp_nclus*ratio,
456
"+", ARMCI_INT, group);
457
# ifdef ARMCI_REGISTER_SHMEM
458
armci_register_shmem_grp(myptr,size,idlist+1,idlist[0],ptr_ref_arr[armci_clus_me],group);
463
ptr_ref_arr[grp_master] = myptr;
467
/* translate addresses for all cluster nodes */
468
for(cn = 0; cn < grp_nclus; cn++){
470
int master = grp_attr->grp_clus_info[cn].master;
473
/* on local cluster node use myptr directly */
474
ptr = (grp_clus_me == cn) ? myptr: ptr_ref_arr[cn];
476
/* compute addresses pointing to the memory regions on cluster node*/
477
for(i=0; i< grp_attr->grp_clus_info[cn].nslave; i++){
479
/* NULL if request size is 0*/
480
ptr_arr[i+master] =(size_arr[i+master])? ((char*)ptr)+offset: NULL;
481
offset += size_arr[i+master];
487
/* compute addresses for local cluster node */
489
for(i=0; i< grp_clus_nproc; i++) {
491
ptr_ref_arr[i] = (size_arr[i+grp_master])? ((char*)myptr)+offset : 0L;
492
offset += size_arr[i+grp_master];
496
/* exchange addreses with all other processes */
497
ptr_arr[grp_me] = (char*)ptr_ref_arr[grp_me-grp_master];
498
armci_exchange_address_grp(ptr_arr, grp_nproc, group);
500
/* overwrite entries for local cluster node with ptr_ref_arr */
501
bcopy((char*)ptr_ref_arr, (char*)(ptr_arr+grp_master), grp_clus_nproc*sizeof(void*));
505
/* armci_print_ptr(ptr_arr, bytes, size, myptr, offset);*/
507
armci_msg_group_barrier(group);
509
/* free work arrays */
512
ARMCI_PR_DBG("exit",0);
514
#endif /* ifdef MPI */
518
void armci_shmem_malloc(void* ptr_arr[], int bytes)
520
armci_die("armci_shmem_malloc should never be called on this system",0);
522
void armci_shmem_memget(armci_meminfo_t *meminfo, size_t size) {
523
armci_die("armci_shmem_memget should never be called on this system",0);
525
void* armci_shmem_memat(armci_meminfo_t *meminfo) {
526
armci_die("armci_shmem_memat should never be called on this system",0);
528
void armci_shmem_memctl(armci_meminfo_t *meminfo) {
529
armci_die("armci_shmem_memctl should never be called on this system",0);
532
void armci_shmem_malloc_group(void *ptr_arr[], armci_size_t bytes,
533
ARMCI_Group *group) {
534
armci_die("armci_shmem_malloc_group should never be called on this system",0);
541
/* public constructor to initialize the kr_malloc context */
542
void armci_krmalloc_init_localmem() {
543
#if defined(ALLOW_PIN)
544
kr_malloc_init(0, 0, 0, reg_malloc, 0, &ctx_localmem);
545
kr_malloc_init(0, 0, 0, malloc, 0, &ctx_mlocalmem);
546
ctx_mlocalmem.ctx_type = KR_CTX_LOCALMEM;
547
#elif defined(CRAY_SHMEM) && defined(CRAY_XT)
549
int units_avail = (cnos_shmem_size() - 1024 * 1024) / SHM_UNIT;
551
extern size_t get_xt_heapsize();
552
int units_avail = (get_xt_heapsize() - 1024 * 1024) / SHM_UNIT;
557
fprintf(stderr,"%d:krmalloc_init_localmem: symheap=%llu,units(%d)=%d\n",
558
armci_me, SHM_UNIT*units_avail, SHM_UNIT, units_avail);
560
kr_malloc_init(SHM_UNIT, units_avail, units_avail, shmalloc, 0,
562
armci_shmalloc_exchange_offsets(&ctx_localmem);
565
kr_malloc_init(0, 0, 0, malloc, 0, &ctx_localmem);
569
ctx_localmem.ctx_type = KR_CTX_LOCALMEM;
573
* Local Memory Allocation and Free
575
void *PARMCI_Malloc_local(armci_size_t bytes) {
577
ARMCI_PR_DBG("enter",0);
578
ARMCI_PR_DBG("exit",0);
579
rptr = (void *)kr_malloc((size_t)bytes, &ctx_localmem, 0, NULL, NULL);
580
//printf("\n%d:%s:%d:%p\n",armci_me,FUNCTION_NAME,bytes,rptr);
584
int PARMCI_Free_local(void *ptr) {
585
ARMCI_PR_DBG("enter",0);
586
kr_free((char *)ptr, &ctx_localmem);
587
ARMCI_PR_DBG("exit",0);
592
/*\ Collective Memory Allocation
593
* returns array of pointers to blocks of memory allocated by everybody
594
* Note: as the same shared memory region can be mapped at different locations
595
* in each process address space, the array might hold different values
596
* on every process. However, the addresses are legitimate
597
* and can be used in the ARMCI data transfer operations.
600
int PARMCI_Malloc(void *ptr_arr[], armci_size_t bytes)
605
ARMCI_PR_DBG("enter",0);
607
fprintf(stderr,"%d bytes in armci_malloc %d\n",armci_me, (int)bytes);
613
if(armci_nproc == 1) {
614
ptr = kr_malloc((size_t) bytes, &ctx_localmem, 0, NULL, NULL);
615
if(bytes) if(!ptr) armci_die("armci_malloc:malloc 1 failed",(int)bytes);
616
ptr_arr[armci_me] = ptr;
617
ARMCI_PR_DBG("exit",0);
622
// static int one_time = 0;
624
if( ARMCI_Uses_shm() ) {
625
// if(one_time++ == 0 && armci_me==0) printf("%d: ARMCI_Uses_shm = true\n",armci_me);
626
armci_shmem_malloc(ptr_arr,bytes);
628
/* on distributed-memory systems just malloc & collect all addresses */
629
ptr = kr_malloc(bytes, &ctx_localmem, 1, &new_base, &new_size);
630
if(bytes) if(!ptr) armci_die("armci_malloc:malloc 2 failed",bytes);
632
bzero((char*)ptr_arr,armci_nproc*sizeof(void*));
633
ptr_arr[armci_me] = ptr;
635
/* now combine individual addresses into a single array */
636
armci_exchange_address(ptr_arr, armci_nproc);
637
# ifdef ARMCI_REGISTER_SHMEM
639
armci_register_shmem(new_base,new_size,NULL,0,new_base);
641
armci_register_shmem(ptr,bytes,NULL,0,ptr);
644
ARMCI_PR_DBG("exit",0);
645
//printf("\n%d:%s:%d:%p\n",armci_me,FUNCTION_NAME,bytes,ptr_arr[armci_me]);
651
/*\ shared memory is released to kr_malloc only on process 0
652
* with data server malloc cannot be used
654
int PARMCI_Free(void *ptr)
656
ARMCI_PR_DBG("enter",0);
659
# if (defined(SYSV) || defined(WIN32) || defined(MMAP)) && !defined(NO_SHM)
663
if(ARMCI_Uses_shm()){
664
if(armci_me==armci_master){
665
# ifdef RMA_NEEDS_SHMEM
666
Free_Shmem_Ptr(0,0,ptr);
668
if(armci_clus_info[armci_clus_me].nslave>1)
669
Free_Shmem_Ptr(0,0,ptr);
670
else kr_free(ptr, &ctx_localmem);
677
kr_free(ptr, &ctx_localmem);
678
//armci_unregister_shmem(ptr,0);
680
ARMCI_PR_DBG("exit",0);
689
#if (defined(SYSV) || defined(WIN32) || defined(MMAP) ||defined(HITACHI)) \
691
# ifdef RMA_NEEDS_SHMEM
692
if(armci_nproc >1) uses= 1; /* always unless serial mode */
694
if(armci_nproc != armci_nclus)uses= 1; /* only when > 1 node used */
697
if(DEBUG_) fprintf(stderr,"%d:uses shmem %d\n",armci_me, uses);
702
int ARMCI_Uses_shm_grp(ARMCI_Group *group)
704
int uses=0, grp_me, grp_nproc, grp_nclus;
705
ARMCI_PR_DBG("enter",0);
706
armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);
708
ARMCI_Group_size(group, &grp_nproc);
709
ARMCI_Group_rank(group, &grp_me);
710
grp_nclus = grp_attr->grp_nclus;
712
#if (defined(SYSV) || defined(WIN32) || defined(MMAP) ||defined(HITACHI)) \
714
# ifdef RMA_NEEDS_SHMEM
715
if(grp_nproc >1) uses= 1; /* always unless serial mode */
717
if(grp_nproc != grp_nclus)uses= 1; /* only when > 1 node used */
720
if(DEBUG_) fprintf(stderr,"%d (grp_id=%d):uses shmem %d\n",armci_me, grp_me, uses);
721
ARMCI_PR_DBG("exit",0);
725
/*\ ************** Begin Group Collective Memory Allocation ******************
726
* returns array of pointers to blocks of memory allocated by everybody
727
* Note: as the same shared memory region can be mapped at different locations
728
* in each process address space, the array might hold different values
729
* on every process. However, the addresses are legitimate
730
* and can be used in the ARMCI data transfer operations.
733
int ARMCI_Malloc_group(void *ptr_arr[], armci_size_t bytes,
737
int grp_me, grp_nproc;
738
ARMCI_PR_DBG("enter",0);
739
ARMCI_Group_size(group, &grp_nproc);
740
ARMCI_Group_rank(group, &grp_me);
741
if(DEBUG_)fprintf(stderr,"%d (grp_id=%d) bytes in armci_malloc_group %d\n",
742
armci_me, grp_me, (int)bytes);
745
ptr = kr_malloc((size_t) bytes, &ctx_localmem, 0, NULL, NULL);
746
if(bytes) if(!ptr) armci_die("armci_malloc_group:malloc 1 failed",(int)bytes);
747
ptr_arr[grp_me] = ptr;
748
ARMCI_PR_DBG("exit",0);
753
if( ARMCI_Uses_shm_grp(group) ) {
755
armci_altix_shm_malloc_group(ptr_arr,bytes,group);
757
armci_shmem_malloc_group(ptr_arr,bytes,group);
762
size_t new_size=NULL;
763
ptr = kr_malloc(bytes, &ctx_localmem, 1, &new_base, &new_size);
764
if(bytes) if(!ptr) armci_die("armci_malloc:malloc 2 failed",bytes);
766
bzero((char*)ptr_arr,grp_nproc*sizeof(void*));
767
ptr_arr[grp_me] = ptr;
769
/* now combine individual addresses into a single array */
770
armci_exchange_address_grp(ptr_arr, grp_nproc, group);
773
ARMCI_PR_DBG("exit",0);
778
/*\ shared memory is released to kr_malloc only on process 0
779
* with data server malloc cannot be used
781
int ARMCI_Free_group(void *ptr, ARMCI_Group *group)
783
int grp_me, grp_nproc, grp_master, grp_clus_me;
784
armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);
785
ARMCI_PR_DBG("enter",0);
789
ARMCI_Group_size(group, &grp_nproc);
790
ARMCI_Group_rank(group, &grp_me);
791
if(grp_me == MPI_UNDEFINED) { /* check if the process is in this group */
792
armci_die("armci_malloc_group: process is not a member in this group",
795
/* get the group cluster info */
796
grp_clus_me = grp_attr->grp_clus_me;
797
grp_master = grp_attr->grp_clus_info[grp_clus_me].master;
799
# if (defined(SYSV) || defined(WIN32) || defined(MMAP)) && !defined(NO_SHM)
803
if(ARMCI_Uses_shm_grp(group)){
804
if(grp_me == grp_master) {
805
# ifdef RMA_NEEDS_SHMEM
806
Free_Shmem_Ptr(0,0,ptr);
808
if(armci_clus_info[armci_clus_me].nslave>1) Free_Shmem_Ptr(0,0,ptr);
809
else kr_free(ptr, &ctx_localmem);
813
ARMCI_PR_DBG("exit",0);
817
kr_free(ptr, &ctx_localmem);
820
ARMCI_PR_DBG("exit",0);
823
/* ***************** End Group Collective Memory Allocation ******************/
825
/* ************** Begin Non-Collective Memory Allocation ******************
826
* Prototype similar to SysV shared memory.
830
* CHECK: On Altix we are forced to use SysV as shmalloc is collective. We
831
* may use a preallocated shmalloc memory, however, it may NOT still solve
833
* NOTE: "int memflg" option for future optimiztions.
835
void PARMCI_Memget(size_t bytes, armci_meminfo_t *meminfo, int memflg) {
838
void *armci_ptr=NULL; /* legal ARCMI ptr used in ARMCI data xfer ops*/
841
if(size<=0) armci_die("ARMCI_Memget: size must be > 0", (int)size);
842
if(meminfo==NULL) armci_die("ARMCI_Memget: Invalid arg #2 (NULL ptr)",0);
843
if(memflg!=0) armci_die("ARMCI_Memget: Invalid memflg", memflg);
845
if( !ARMCI_Uses_shm() )
847
armci_ptr = myptr = kr_malloc(size, &ctx_localmem, 0, NULL, NULL);
848
if(size) if(!myptr) armci_die("ARMCI_Memget failed", (int)size);
850
/* fill the meminfo structure */
851
meminfo->armci_addr = armci_ptr;
852
meminfo->addr = myptr;
853
meminfo->size = size;
854
meminfo->cpid = armci_me;
855
/* meminfo->attr = NULL; */
859
armci_shmem_memget(meminfo, size);
863
printf("%d: ARMCI_Memget: addresses server=%p myptr=%p bytes=%ld\n",
864
armci_me, meminfo->armci_addr, meminfo->addr, bytes);
869
void* PARMCI_Memat(armci_meminfo_t *meminfo, long offset) {
872
if(meminfo==NULL) armci_die("ARMCI_Memget: Invalid arg #2 (NULL ptr)",0);
873
if(offset!=0) armci_die("ARMCI_Memget: Invalid offset", offset);
875
if(meminfo->cpid==armci_me) { ptr = meminfo->addr; return ptr; }
877
if( !ARMCI_Uses_shm())
883
ptr = armci_shmem_memat(meminfo);
888
printf("%d:ARMCI_Memat: attached addr mptr=%p size=%ld\n",
889
armci_me, ptr, meminfo->size); fflush(stdout);
895
void ARMCI_Memdt(armci_meminfo_t *meminfo, long offset) {
897
* Do nothing. May be we need to have reference counting in future. This
898
* is to avoid the case of dangling pointers when the creator of shm
899
* segment calls Memctl and other processes are still attached to this
904
void ARMCI_Memctl(armci_meminfo_t *meminfo) {
906
if(meminfo==NULL) armci_die("ARMCI_Memget: Invalid arg #2 (NULL ptr)",0);
908
/* only the creator can delete the segment */
909
if(meminfo->cpid == armci_me)
911
if( !ARMCI_Uses_shm() )
913
void *ptr = meminfo->addr;
914
kr_free(ptr, &ctx_localmem);
918
armci_shmem_memctl(meminfo);
922
meminfo->addr = NULL;
923
meminfo->armci_addr = NULL;
924
/* if(meminfo->attr!=NULL) free(meminfo->attr); */
927
/* ***************** End Non-Collective Memory Allocation ******************/