1067
1070
struct channel *chan = shmp(handle, buf->backend.chan);
1068
1071
const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1069
1072
unsigned long consumed_cur, consumed_idx, commit_count, write_offset;
1073
int ret, finalized, nr_retry = LTTNG_UST_RING_BUFFER_GET_RETRY;
1074
1076
finalized = CMM_ACCESS_ONCE(buf->finalized);
1105
1107
* Check that the subbuffer we are trying to consume has been
1106
* already fully committed.
1108
* already fully committed. There are a few causes that can make
1109
* this unavailability situation occur:
1111
* Temporary (short-term) situation:
1112
* - Application is running on a different CPU, between reserve
1113
* and commit ring buffer operations,
1114
* - Application is preempted between reserve and commit ring
1115
* buffer operations,
1117
* Long-term situation:
1118
* - Application is stopped (SIGSTOP) between reserve and commit
1119
* ring buffer operations. Could eventually be resumed by
1121
* - Application is killed (SIGTERM, SIGINT, SIGKILL) between
1122
* reserve and commit ring buffer operation.
1124
* From a consumer perspective, handling short-term
1125
* unavailability situations is performed by retrying a few
1126
* times after a delay. Handling long-term unavailability
1127
* situations is handled by failing to get the sub-buffer.
1129
* In all of those situations, if the application is taking a
1130
* long time to perform its commit after ring buffer space
1131
* reservation, we can end up in a situation where the producer
1132
* will fill the ring buffer and try to write into the same
1133
* sub-buffer again (which has a missing commit). This is
1134
* handled by the producer in the sub-buffer switch handling
1135
* code of the reserve routine by detecting unbalanced
1136
* reserve/commit counters and discarding all further events
1137
* until the situation is resolved in those situations. Two
1138
* scenarios can occur:
1140
* 1) The application causing the reserve/commit counters to be
1141
* unbalanced has been terminated. In this situation, all
1142
* further events will be discarded in the buffers, and no
1143
* further buffer data will be readable by the consumer
1144
* daemon. Tearing down the UST tracing session and starting
1145
* anew is a work-around for those situations. Note that this
1146
* only affects per-UID tracing. In per-PID tracing, the
1147
* application vanishes with the termination, and therefore
1148
* no more data needs to be written to the buffers.
1149
* 2) The application causing the unbalance has been delayed for
1150
* a long time, but will eventually try to increment the
1151
* commit counter after eventually writing to the sub-buffer.
1152
* This situation can cause events to be discarded until the
1153
* application resumes its operations.
1108
1155
if (((commit_count - chan->backend.subbuf_size)
1109
1156
& chan->commit_count_mask)
1110
1157
- (buf_trunc(consumed, chan)
1111
1158
>> chan->backend.num_subbuf_order)
1160
if (nr_retry-- > 0) {
1161
if (nr_retry <= (LTTNG_UST_RING_BUFFER_GET_RETRY >> 1))
1162
(void) poll(NULL, 0, LTTNG_UST_RING_BUFFER_RETRY_DELAY_MS);
1116
1170
* Check that we are not about to read the same subbuffer in
1126
1180
* the writer is getting access to a subbuffer we were trying to get
1127
1181
* access to. Also checks that the "consumed" buffer count we are
1128
1182
* looking for matches the one contained in the subbuffer id.
1184
* The short-lived race window described here can be affected by
1185
* application signals and preemption, thus requiring to bound
1186
* the loop to a maximum number of retry.
1130
1188
ret = update_read_sb_index(config, &buf->backend, &chan->backend,
1131
1189
consumed_idx, buf_trunc_val(consumed, chan),
1192
if (nr_retry-- > 0) {
1193
if (nr_retry <= (LTTNG_UST_RING_BUFFER_GET_RETRY >> 1))
1194
(void) poll(NULL, 0, LTTNG_UST_RING_BUFFER_RETRY_DELAY_MS);
1135
1200
subbuffer_id_clear_noref(config, &buf->backend.buf_rsb.id);
1137
1202
buf->get_subbuf_consumed = consumed;