59
* The FIFO is implemented as a circular queue with head and tail pointers
60
* (integer indices). For efficient wraparound indexing, the size of the
61
* queue is constrained to be a power of two and we "&" indices with a "mask".
63
* More than one process can write to the FIFO head. Therefore, there is a head
64
* lock. One cannot write until the head slot is empty, indicated by the special
65
* queue entry SM_FIFO_FREE.
67
* Only the receiver can read the FIFO tail. Therefore, the tail lock is
68
* required only in multithreaded applications. If a tail read returns the
69
* SM_FIFO_FREE value, that means the FIFO is empty. Once a non-FREE value
70
* has been read, the queue slot is *not* automatically reset to SM_FIFO_FREE.
71
* Rather, read tail slots are reset "lazily" (see "lazy_free" and "num_to_clear")
72
* to reduce the number of memory barriers and improve performance.
74
* Since the FIFO lives in shared memory that is mapped differently into
75
* each address space, the "queue" pointer is relative (each process must
76
* add its own offset) and the queue_recv pointer is meaningful only in the
77
* receiver's address space.
79
* Since multiple processes access different parts of the FIFO structure in
80
* different ways, we introduce padding to keep different parts on different
84
#define SM_FIFO_FREE (void *) (-2)
87
/* This queue pointer is used only by the heads. */
88
volatile void **queue; char pad0[CACHE_LINE_SIZE - sizeof(void **) ];
89
/* This lock is used by the heads. */
90
opal_atomic_lock_t head_lock; char pad1[CACHE_LINE_SIZE - sizeof(opal_atomic_lock_t)];
91
/* This index is used by the head holding the head lock. */
92
volatile int head; char pad2[CACHE_LINE_SIZE - sizeof(int) ];
93
/* This mask is used "read only" by all processes. */
94
unsigned int mask; char pad3[CACHE_LINE_SIZE - sizeof(int) ];
95
/* The following are used only by the tail. */
96
volatile void **queue_recv;
97
opal_atomic_lock_t tail_lock;
100
int lazy_free; char pad4[CACHE_LINE_SIZE - sizeof(void **)
101
- sizeof(opal_atomic_lock_t)
104
typedef struct sm_fifo_t sm_fifo_t;
58
107
* Shared Memory resource managment
87
136
mca_common_sm_mmap_t *mmap_file; /**< description of mmap'ed file */
88
137
mca_common_sm_file_header_t *sm_ctl_header; /* control header in
90
ompi_fifo_t **shm_fifo; /**< pointer to fifo 2D array in shared memory */
139
sm_fifo_t **shm_fifo; /**< pointer to fifo 2D array in shared memory */
91
140
char **shm_bases; /**< pointer to base pointers in shared memory */
92
141
uint16_t *shm_mem_nodes; /**< pointer to mem noded in shared memory */
93
ompi_fifo_t **fifo; /**< cached copy of the pointer to the 2D
142
sm_fifo_t **fifo; /**< cached copy of the pointer to the 2D
94
143
fifo array. The address in the shared
95
144
memory segment sm_ctl_header is a relative,
96
145
but this one, in process private memory, is
97
146
a real virtual address */
98
uint16_t *mem_nodes; /**< cached copy of mem nodes of each local rank */
99
size_t size_of_cb_queue; /**< size of each circular buffer queue array */
100
size_t cb_lazy_free_freq; /**< frequency of lazy free */
101
int cb_max_num; /**< max number of circular buffers for each peer */
147
uint16_t *mem_nodes; /**< cached copy of mem nodes of each local rank */
148
size_t fifo_size; /**< number of FIFO queue entries */
149
size_t fifo_lazy_free; /**< number of reads before lazy fifo free is triggered */
150
int nfifos; /**< number of FIFOs per receiver */
102
151
ptrdiff_t *sm_offset; /**< offset to be applied to shared memory
103
152
addresses, per local process value */
104
153
int32_t num_smp_procs; /**< current number of smp procs on this host */
132
181
typedef struct btl_sm_pending_send_item_t btl_sm_pending_send_item_t;
184
* FIFO support for sm BTL.
188
* One or more FIFO components may be a pointer that must be
189
* accessed by multiple processes. Since the shared region may
190
* be mmapped differently into each process's address space,
191
* these pointers will be relative to some base address. Here,
192
* we define macros to translate between relative addresses and
195
#define VIRTUAL2RELATIVE(VADDR ) ((long)(VADDR) - (long)mca_btl_sm_component.shm_bases[mca_btl_sm_component.my_smp_rank])
196
#define RELATIVE2VIRTUAL(OFFSET) ((long)(OFFSET) + (long)mca_btl_sm_component.shm_bases[mca_btl_sm_component.my_smp_rank])
198
/* ================================================== */
199
/* ================================================== */
200
/* ================================================== */
202
static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool,
203
sm_fifo_t *fifo, int lazy_free)
207
/* figure out the queue size (a power of two that is at least 1) */
209
while ( qsize < fifo_size )
212
/* allocate the queue in the receiver's address space */
213
fifo->queue_recv = (volatile void **)mpool->mpool_alloc(
214
mpool, sizeof(void *) * qsize, CACHE_LINE_SIZE, 0, NULL);
215
if(NULL == fifo->queue_recv) {
216
return OMPI_ERR_OUT_OF_RESOURCE;
219
/* initialize the queue */
220
for ( i = 0; i < qsize; i++ )
221
fifo->queue_recv[i] = SM_FIFO_FREE;
223
/* shift queue address to be relative */
224
fifo->queue = (volatile void **) VIRTUAL2RELATIVE(fifo->queue_recv);
226
/* initialize the locks */
227
opal_atomic_init(&(fifo->head_lock), OPAL_ATOMIC_UNLOCKED);
228
opal_atomic_init(&(fifo->tail_lock), OPAL_ATOMIC_UNLOCKED);
229
opal_atomic_unlock(&(fifo->head_lock)); /* should be unnecessary */
230
opal_atomic_unlock(&(fifo->tail_lock)); /* should be unnecessary */
232
/* other initializations */
234
fifo->mask = qsize - 1;
236
fifo->num_to_clear = 0;
237
fifo->lazy_free = lazy_free;
243
static inline int sm_fifo_write(void *value, sm_fifo_t *fifo)
245
volatile void **q = (volatile void **) RELATIVE2VIRTUAL(fifo->queue);
247
/* if there is no free slot to write, report exhausted resource */
248
if ( SM_FIFO_FREE != q[fifo->head] )
249
return OMPI_ERR_OUT_OF_RESOURCE;
251
/* otherwise, write to the slot and advance the head index */
253
q[fifo->head] = value;
254
fifo->head = (fifo->head + 1) & fifo->mask;
260
static inline void *sm_fifo_read(sm_fifo_t *fifo)
264
/* read the next queue entry */
265
value = (void *) fifo->queue_recv[fifo->tail];
269
/* if you read a non-empty slot, advance the tail pointer */
270
if ( SM_FIFO_FREE != value ) {
272
fifo->tail = ( fifo->tail + 1 ) & fifo->mask;
273
fifo->num_to_clear += 1;
275
/* check if it's time to free slots, which we do lazily */
276
if ( fifo->num_to_clear >= fifo->lazy_free ) {
277
int i = (fifo->tail - fifo->num_to_clear ) & fifo->mask;
279
while ( fifo->num_to_clear > 0 ) {
280
fifo->queue_recv[i] = SM_FIFO_FREE;
281
i = (i+1) & fifo->mask;
282
fifo->num_to_clear -= 1;
135
292
* Register shared memory module parameters with the MCA framework