55
48
MPIU_ERR_CHKANDJUMP((*win_ptr)->epoch_state != MPIDI_EPOCH_NONE,
56
49
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
51
mpi_errno = MPIDI_CH3I_Wait_for_pt_ops_finish(*win_ptr);
52
if(mpi_errno) MPIU_ERR_POP(mpi_errno);
58
54
comm_ptr = (*win_ptr)->comm_ptr;
59
MPIU_INSTR_DURATION_START(winfree_rs);
60
mpi_errno = MPIR_Reduce_scatter_block_impl((*win_ptr)->pt_rma_puts_accs,
61
&total_pt_rma_puts_accs, 1,
62
MPI_INT, MPI_SUM, comm_ptr, &errflag);
63
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
64
MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
65
MPIU_INSTR_DURATION_END(winfree_rs);
67
if (total_pt_rma_puts_accs != (*win_ptr)->my_pt_rma_puts_accs)
69
MPID_Progress_state progress_state;
71
/* poke the progress engine until the two are equal */
72
MPIU_INSTR_DURATION_START(winfree_complete);
73
MPID_Progress_start(&progress_state);
74
while (total_pt_rma_puts_accs != (*win_ptr)->my_pt_rma_puts_accs)
76
mpi_errno = MPID_Progress_wait(&progress_state);
77
/* --BEGIN ERROR HANDLING-- */
78
if (mpi_errno != MPI_SUCCESS)
80
MPID_Progress_end(&progress_state);
81
MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
83
/* --END ERROR HANDLING-- */
85
MPID_Progress_end(&progress_state);
86
MPIU_INSTR_DURATION_END(winfree_complete);
89
55
mpi_errno = MPIR_Comm_free_impl(comm_ptr);
90
56
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
210
rank = win_ptr->myrank;
148
rank = win_ptr->comm_ptr->rank;
150
if (win_ptr->shm_allocated == TRUE && target_rank != rank && win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
151
/* check if target is local and shared memory is allocated on window,
152
if so, we directly perform this operation on shared memory region. */
154
/* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
155
the same node. However, in ch3:sock, even if origin and target are on the same node, they do
156
not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
157
which is only set to TRUE when SHM region is allocated in nemesis.
158
In future we need to figure out a way to check if origin and target are in the same "SHM comm".
160
MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
161
MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
212
164
/* If the put is a local operation, do it here */
213
if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
165
if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
166
(win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id))
218
if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
219
base = win_ptr->shm_base_addrs[target_rank];
220
disp_unit = win_ptr->disp_units[target_rank];
223
base = win_ptr->base;
224
disp_unit = win_ptr->disp_unit;
227
mpi_errno = MPIR_Localcopy(origin_addr, origin_count, origin_datatype,
228
(char *) base + disp_unit * target_disp,
229
target_count, target_datatype);
230
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
168
mpi_errno = MPIDI_CH3I_Shm_put_op(origin_addr, origin_count, origin_datatype, target_rank,
169
target_disp, target_count, target_datatype, win_ptr);
170
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
321
rank = win_ptr->myrank;
260
rank = win_ptr->comm_ptr->rank;
262
if (win_ptr->shm_allocated == TRUE && target_rank != rank && win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
263
/* check if target is local and shared memory is allocated on window,
264
if so, we directly perform this operation on shared memory region. */
266
/* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
267
the same node. However, in ch3:sock, even if origin and target are on the same node, they do
268
not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
269
which is only set to TRUE when SHM region is allocated in nemesis.
270
In future we need to figure out a way to check if origin and target are in the same "SHM comm".
272
MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
273
MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
323
276
/* If the get is a local operation, do it here */
324
if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
277
if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
278
(win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id))
329
if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
330
base = win_ptr->shm_base_addrs[target_rank];
331
disp_unit = win_ptr->disp_units[target_rank];
334
base = win_ptr->base;
335
disp_unit = win_ptr->disp_unit;
338
mpi_errno = MPIR_Localcopy((char *) base + disp_unit * target_disp,
339
target_count, target_datatype, origin_addr,
340
origin_count, origin_datatype);
341
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
280
mpi_errno = MPIDI_CH3I_Shm_get_op(origin_addr, origin_count, origin_datatype, target_rank,
281
target_disp, target_count, target_datatype, win_ptr);
282
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
431
rank = win_ptr->myrank;
370
rank = win_ptr->comm_ptr->rank;
433
MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, origin_predefined);
434
MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, target_predefined);
372
if (win_ptr->shm_allocated == TRUE && target_rank != rank && win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
373
/* check if target is local and shared memory is allocated on window,
374
if so, we directly perform this operation on shared memory region. */
376
/* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
377
the same node. However, in ch3:sock, even if origin and target are on the same node, they do
378
not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
379
which is only set to TRUE when SHM region is allocated in nemesis.
380
In future we need to figure out a way to check if origin and target are in the same "SHM comm".
382
MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
383
MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
436
386
/* Do =! rank first (most likely branch?) */
437
if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
387
if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
388
(win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id))
439
MPI_User_function *uop;
441
int disp_unit, shm_op = 0;
443
if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
445
base = win_ptr->shm_base_addrs[target_rank];
446
disp_unit = win_ptr->disp_units[target_rank];
449
base = win_ptr->base;
450
disp_unit = win_ptr->disp_unit;
453
if (op == MPI_REPLACE)
455
if (shm_op) MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
456
mpi_errno = MPIR_Localcopy(origin_addr, origin_count,
458
(char *) base + disp_unit * target_disp,
459
target_count, target_datatype);
460
if (shm_op) MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
461
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
465
MPIU_ERR_CHKANDJUMP1((HANDLE_GET_KIND(op) != HANDLE_KIND_BUILTIN),
466
mpi_errno, MPI_ERR_OP, "**opnotpredefined",
467
"**opnotpredefined %d", op );
469
/* get the function by indexing into the op table */
470
uop = MPIR_OP_HDL_TO_FN(op);
472
if (origin_predefined && target_predefined)
474
/* Cast away const'ness for origin_address in order to
475
* avoid changing the prototype for MPI_User_function */
476
if (shm_op) MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
477
(*uop)((void *) origin_addr, (char *) base + disp_unit*target_disp,
478
&target_count, &target_datatype);
479
if (shm_op) MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
483
/* derived datatype */
486
DLOOP_VECTOR *dloop_vec;
487
MPI_Aint first, last;
488
int vec_len, i, type_size, count;
490
MPI_Aint true_lb, true_extent, extent;
491
void *tmp_buf=NULL, *target_buf;
492
const void *source_buf;
494
if (origin_datatype != target_datatype)
496
/* first copy the data into a temporary buffer with
497
the same datatype as the target. Then do the
498
accumulate operation. */
500
MPIR_Type_get_true_extent_impl(target_datatype, &true_lb, &true_extent);
501
MPID_Datatype_get_extent_macro(target_datatype, extent);
503
MPIU_CHKLMEM_MALLOC(tmp_buf, void *,
504
target_count * (MPIR_MAX(extent,true_extent)),
505
mpi_errno, "temporary buffer");
506
/* adjust for potential negative lower bound in datatype */
507
tmp_buf = (void *)((char*)tmp_buf - true_lb);
509
mpi_errno = MPIR_Localcopy(origin_addr, origin_count,
510
origin_datatype, tmp_buf,
511
target_count, target_datatype);
512
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
515
if (target_predefined) {
516
/* target predefined type, origin derived datatype */
518
if (shm_op) MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
519
(*uop)(tmp_buf, (char *) base + disp_unit * target_disp,
520
&target_count, &target_datatype);
521
if (shm_op) MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
525
segp = MPID_Segment_alloc();
526
MPIU_ERR_CHKANDJUMP1((!segp), mpi_errno, MPI_ERR_OTHER,
527
"**nomem","**nomem %s","MPID_Segment_alloc");
528
MPID_Segment_init(NULL, target_count, target_datatype, segp, 0);
530
last = SEGMENT_IGNORE_LAST;
532
MPID_Datatype_get_ptr(target_datatype, dtp);
533
vec_len = dtp->max_contig_blocks * target_count + 1;
534
/* +1 needed because Rob says so */
535
MPIU_CHKLMEM_MALLOC(dloop_vec, DLOOP_VECTOR *,
536
vec_len * sizeof(DLOOP_VECTOR),
537
mpi_errno, "dloop vector");
539
MPID_Segment_pack_vector(segp, first, &last, dloop_vec, &vec_len);
541
source_buf = (tmp_buf != NULL) ? tmp_buf : origin_addr;
542
target_buf = (char *) base + disp_unit * target_disp;
544
type_size = MPID_Datatype_get_basic_size(type);
545
if (shm_op) MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
546
for (i=0; i<vec_len; i++)
548
count = (dloop_vec[i].DLOOP_VECTOR_LEN)/type_size;
549
(*uop)((char *)source_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
550
(char *)target_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
553
if (shm_op) MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
555
MPID_Segment_free(segp);
390
mpi_errno = MPIDI_CH3I_Shm_acc_op(origin_addr, origin_count, origin_datatype,
391
target_rank, target_disp, target_count, target_datatype,
393
if (mpi_errno) MPIU_ERR_POP(mpi_errno);