98
79
STATIC void xlog_ungrant_log_space(xlog_t *log,
99
80
xlog_ticket_t *ticket);
102
/* local ticket functions */
103
STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log,
109
82
#if defined(DEBUG)
110
STATIC void xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr);
111
STATIC void xlog_verify_grant_head(xlog_t *log, int equals);
83
STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
84
STATIC void xlog_verify_grant_tail(struct log *log);
112
85
STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
113
86
int count, boolean_t syncing);
114
87
STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
115
88
xfs_lsn_t tail_lsn);
117
90
#define xlog_verify_dest_ptr(a,b)
118
#define xlog_verify_grant_head(a,b)
91
#define xlog_verify_grant_tail(a)
119
92
#define xlog_verify_iclog(a,b,c,d)
120
93
#define xlog_verify_tail_lsn(a,b,c)
123
96
STATIC int xlog_iclogs_empty(xlog_t *log);
125
#if defined(XFS_LOG_TRACE)
127
#define XLOG_TRACE_LOGGRANT_SIZE 2048
128
#define XLOG_TRACE_ICLOG_SIZE 256
131
xlog_trace_loggrant_alloc(xlog_t *log)
133
log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
137
xlog_trace_loggrant_dealloc(xlog_t *log)
139
ktrace_free(log->l_grant_trace);
143
xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
147
/* ticket counts are 1 byte each */
148
cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
150
ktrace_enter(log->l_grant_trace,
152
(void *)log->l_reserve_headq,
153
(void *)log->l_write_headq,
154
(void *)((unsigned long)log->l_grant_reserve_cycle),
155
(void *)((unsigned long)log->l_grant_reserve_bytes),
156
(void *)((unsigned long)log->l_grant_write_cycle),
157
(void *)((unsigned long)log->l_grant_write_bytes),
158
(void *)((unsigned long)log->l_curr_cycle),
159
(void *)((unsigned long)log->l_curr_block),
160
(void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)),
161
(void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)),
163
(void *)((unsigned long)tic->t_trans_type),
165
(void *)((unsigned long)tic->t_curr_res),
166
(void *)((unsigned long)tic->t_unit_res));
170
xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
172
iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
176
xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
178
ktrace_free(iclog->ic_trace);
182
xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
184
ktrace_enter(iclog->ic_trace,
185
(void *)((unsigned long)state),
186
(void *)((unsigned long)current_pid()),
187
(void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
188
(void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
189
(void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
190
(void *)NULL, (void *)NULL);
194
#define xlog_trace_loggrant_alloc(log)
195
#define xlog_trace_loggrant_dealloc(log)
196
#define xlog_trace_loggrant(log,tic,string)
198
#define xlog_trace_iclog_alloc(iclog)
199
#define xlog_trace_iclog_dealloc(iclog)
200
#define xlog_trace_iclog(iclog,state)
202
#endif /* XFS_LOG_TRACE */
206
xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
210
tic->t_prev = (*qp)->t_prev;
211
(*qp)->t_prev->t_next = tic;
214
tic->t_prev = tic->t_next = tic;
218
tic->t_flags |= XLOG_TIC_IN_Q;
222
xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
224
if (tic == tic->t_next) {
228
tic->t_next->t_prev = tic->t_prev;
229
tic->t_prev->t_next = tic->t_next;
232
tic->t_next = tic->t_prev = NULL;
233
tic->t_flags &= ~XLOG_TIC_IN_Q;
237
xlog_grant_sub_space(struct log *log, int bytes)
239
log->l_grant_write_bytes -= bytes;
240
if (log->l_grant_write_bytes < 0) {
241
log->l_grant_write_bytes += log->l_logsize;
242
log->l_grant_write_cycle--;
245
log->l_grant_reserve_bytes -= bytes;
246
if ((log)->l_grant_reserve_bytes < 0) {
247
log->l_grant_reserve_bytes += log->l_logsize;
248
log->l_grant_reserve_cycle--;
254
xlog_grant_add_space_write(struct log *log, int bytes)
256
int tmp = log->l_logsize - log->l_grant_write_bytes;
258
log->l_grant_write_bytes += bytes;
260
log->l_grant_write_cycle++;
261
log->l_grant_write_bytes = bytes - tmp;
266
xlog_grant_add_space_reserve(struct log *log, int bytes)
268
int tmp = log->l_logsize - log->l_grant_reserve_bytes;
270
log->l_grant_reserve_bytes += bytes;
272
log->l_grant_reserve_cycle++;
273
log->l_grant_reserve_bytes = bytes - tmp;
278
xlog_grant_add_space(struct log *log, int bytes)
280
xlog_grant_add_space_write(log, bytes);
281
xlog_grant_add_space_reserve(log, bytes);
104
int64_t head_val = atomic64_read(head);
110
xlog_crack_grant_head_val(head_val, &cycle, &space);
114
space += log->l_logsize;
119
new = xlog_assign_grant_head_val(cycle, space);
120
head_val = atomic64_cmpxchg(head, old, new);
121
} while (head_val != old);
125
xlog_grant_add_space(
130
int64_t head_val = atomic64_read(head);
137
xlog_crack_grant_head_val(head_val, &cycle, &space);
139
tmp = log->l_logsize - space;
148
new = xlog_assign_grant_head_val(cycle, space);
149
head_val = atomic64_cmpxchg(head, old, new);
150
} while (head_val != old);
485
294
* reservation, we prevent over allocation problems.
488
xfs_log_reserve(xfs_mount_t *mp,
491
xfs_log_ticket_t *ticket,
298
struct xfs_mount *mp,
301
struct xlog_ticket **ticket,
496
xlog_t *log = mp->m_log;
497
xlog_ticket_t *internal_ticket;
306
struct log *log = mp->m_log;
307
struct xlog_ticket *internal_ticket;
500
310
ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
501
ASSERT((flags & XFS_LOG_NOSLEEP) == 0);
503
312
if (XLOG_FORCED_SHUTDOWN(log))
504
313
return XFS_ERROR(EIO);
506
315
XFS_STATS_INC(xs_try_logspace);
508
318
if (*ticket != NULL) {
509
319
ASSERT(flags & XFS_LOG_PERM_RESERV);
510
internal_ticket = (xlog_ticket_t *)*ticket;
511
xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)");
512
xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
320
internal_ticket = *ticket;
323
* this is a new transaction on the ticket, so we need to
324
* change the transaction ID so that the next transaction has a
325
* different TID in the log. Just add one to the existing tid
326
* so that we can see chains of rolling transactions in the log
329
internal_ticket->t_tid++;
331
trace_xfs_log_reserve(log, internal_ticket);
333
xlog_grant_push_ail(log, internal_ticket->t_unit_res);
513
334
retval = xlog_regrant_write_log_space(log, internal_ticket);
515
336
/* may sleep if need to allocate more tickets */
516
337
internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt,
339
KM_SLEEP|KM_MAYFAIL);
518
340
if (!internal_ticket)
519
341
return XFS_ERROR(ENOMEM);
520
342
internal_ticket->t_trans_type = t_type;
521
343
*ticket = internal_ticket;
522
xlog_trace_loggrant(log, internal_ticket,
523
(internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ?
524
"xfs_log_reserve: create new ticket (permanent trans)" :
525
"xfs_log_reserve: create new ticket");
526
xlog_grant_push_ail(mp,
345
trace_xfs_log_reserve(log, internal_ticket);
347
xlog_grant_push_ail(log,
527
348
(internal_ticket->t_unit_res *
528
349
internal_ticket->t_cnt));
529
350
retval = xlog_grant_log_space(log, internal_ticket);
821
679
xlog_ticket_t *tic;
822
680
xlog_t *log = mp->m_log;
823
int need_bytes, free_bytes, cycle, bytes;
681
int need_bytes, free_bytes;
825
683
if (XLOG_FORCED_SHUTDOWN(log))
829
/* needed since sync_lsn is 64 bits */
830
spin_lock(&log->l_icloglock);
831
tail_lsn = log->l_last_sync_lsn;
832
spin_unlock(&log->l_icloglock);
835
spin_lock(&log->l_grant_lock);
837
/* Also an invalid lsn. 1 implies that we aren't passing in a valid
841
log->l_tail_lsn = tail_lsn;
844
if ((tic = log->l_write_headq)) {
687
tail_lsn = atomic64_read(&log->l_last_sync_lsn);
689
/* tail_lsn == 1 implies that we weren't passed a valid value. */
691
atomic64_set(&log->l_tail_lsn, tail_lsn);
693
if (!list_empty_careful(&log->l_writeq)) {
846
695
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
847
696
panic("Recovery problem");
849
cycle = log->l_grant_write_cycle;
850
bytes = log->l_grant_write_bytes;
851
free_bytes = xlog_space_left(log, cycle, bytes);
698
spin_lock(&log->l_grant_write_lock);
699
free_bytes = xlog_space_left(log, &log->l_grant_write_head);
700
list_for_each_entry(tic, &log->l_writeq, t_queue) {
853
701
ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
855
703
if (free_bytes < tic->t_unit_res && tail_lsn != 1)
858
706
free_bytes -= tic->t_unit_res;
859
sv_signal(&tic->t_wait);
861
} while (tic != log->l_write_headq);
707
trace_xfs_log_regrant_write_wake_up(log, tic);
708
wake_up(&tic->t_wait);
710
spin_unlock(&log->l_grant_write_lock);
863
if ((tic = log->l_reserve_headq)) {
713
if (!list_empty_careful(&log->l_reserveq)) {
865
715
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
866
716
panic("Recovery problem");
868
cycle = log->l_grant_reserve_cycle;
869
bytes = log->l_grant_reserve_bytes;
870
free_bytes = xlog_space_left(log, cycle, bytes);
718
spin_lock(&log->l_grant_reserve_lock);
719
free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
720
list_for_each_entry(tic, &log->l_reserveq, t_queue) {
872
721
if (tic->t_flags & XLOG_TIC_PERM_RESERV)
873
722
need_bytes = tic->t_unit_res*tic->t_cnt;
1196
1014
log->l_flags |= XLOG_ACTIVE_RECOVERY;
1198
1016
log->l_prev_block = -1;
1199
log->l_tail_lsn = xlog_assign_lsn(1, 0);
1200
1017
/* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
1201
log->l_last_sync_lsn = log->l_tail_lsn;
1018
xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
1019
xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
1202
1020
log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
1203
log->l_grant_reserve_cycle = 1;
1204
log->l_grant_write_cycle = 1;
1021
xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0);
1022
xlog_assign_grant_head(&log->l_grant_write_head, 1, 0);
1023
INIT_LIST_HEAD(&log->l_reserveq);
1024
INIT_LIST_HEAD(&log->l_writeq);
1025
spin_lock_init(&log->l_grant_reserve_lock);
1026
spin_lock_init(&log->l_grant_write_lock);
1206
1028
error = EFSCORRUPTED;
1207
1029
if (xfs_sb_version_hassector(&mp->m_sb)) {
1208
log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
1209
if (log->l_sectbb_log < 0 ||
1210
log->l_sectbb_log > mp->m_sectbb_log) {
1211
xlog_warn("XFS: Log sector size (0x%x) out of range.",
1030
log2_size = mp->m_sb.sb_logsectlog;
1031
if (log2_size < BBSHIFT) {
1032
xlog_warn("XFS: Log sector size too small "
1033
"(0x%x < 0x%x)", log2_size, BBSHIFT);
1037
log2_size -= BBSHIFT;
1038
if (log2_size > mp->m_sectbb_log) {
1039
xlog_warn("XFS: Log sector size too large "
1040
"(0x%x > 0x%x)", log2_size, mp->m_sectbb_log);
1213
1041
goto out_free_log;
1216
1044
/* for larger sector sizes, must have v2 or external log */
1217
if (log->l_sectbb_log != 0 &&
1218
(log->l_logBBstart != 0 &&
1219
!xfs_sb_version_haslogv2(&mp->m_sb))) {
1045
if (log2_size && log->l_logBBstart > 0 &&
1046
!xfs_sb_version_haslogv2(&mp->m_sb)) {
1220
1048
xlog_warn("XFS: log sector size (0x%x) invalid "
1221
"for configuration.", log->l_sectbb_log);
1224
if (mp->m_sb.sb_logsectlog < BBSHIFT) {
1225
xlog_warn("XFS: Log sector log (0x%x) too small.",
1226
mp->m_sb.sb_logsectlog);
1049
"for configuration.", log2_size);
1227
1050
goto out_free_log;
1230
log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
1053
log->l_sectBBsize = 1 << log2_size;
1232
1055
xlog_get_iclog_buffer_size(mp, log);
1370
1189
* water mark. In this manner, we would be creating a low water mark.
1373
xlog_grant_push_ail(xfs_mount_t *mp,
1192
xlog_grant_push_ail(
1376
xlog_t *log = mp->m_log; /* pointer to the log */
1377
xfs_lsn_t tail_lsn; /* lsn of the log tail */
1378
xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */
1379
int free_blocks; /* free blocks left to write to */
1380
int free_bytes; /* free bytes left to write to */
1381
int threshold_block; /* block in lsn we'd like to be at */
1382
int threshold_cycle; /* lsn cycle we'd like to be at */
1385
ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
1387
spin_lock(&log->l_grant_lock);
1388
free_bytes = xlog_space_left(log,
1389
log->l_grant_reserve_cycle,
1390
log->l_grant_reserve_bytes);
1391
tail_lsn = log->l_tail_lsn;
1392
free_blocks = BTOBBT(free_bytes);
1395
* Set the threshold for the minimum number of free blocks in the
1396
* log to the maximum of what the caller needs, one quarter of the
1397
* log, and 256 blocks.
1399
free_threshold = BTOBB(need_bytes);
1400
free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
1401
free_threshold = MAX(free_threshold, 256);
1402
if (free_blocks < free_threshold) {
1403
threshold_block = BLOCK_LSN(tail_lsn) + free_threshold;
1404
threshold_cycle = CYCLE_LSN(tail_lsn);
1196
xfs_lsn_t threshold_lsn = 0;
1197
xfs_lsn_t last_sync_lsn;
1200
int threshold_block;
1201
int threshold_cycle;
1204
ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
1206
free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
1207
free_blocks = BTOBBT(free_bytes);
1210
* Set the threshold for the minimum number of free blocks in the
1211
* log to the maximum of what the caller needs, one quarter of the
1212
* log, and 256 blocks.
1214
free_threshold = BTOBB(need_bytes);
1215
free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
1216
free_threshold = MAX(free_threshold, 256);
1217
if (free_blocks >= free_threshold)
1220
xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
1222
threshold_block += free_threshold;
1405
1223
if (threshold_block >= log->l_logBBsize) {
1406
threshold_block -= log->l_logBBsize;
1407
threshold_cycle += 1;
1409
threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block);
1411
/* Don't pass in an lsn greater than the lsn of the last
1412
* log record known to be on disk.
1414
if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0)
1415
threshold_lsn = log->l_last_sync_lsn;
1417
spin_unlock(&log->l_grant_lock);
1420
* Get the transaction layer to kick the dirty buffers out to
1421
* disk asynchronously. No point in trying to do this if
1422
* the filesystem is shutting down.
1424
if (threshold_lsn &&
1425
!XLOG_FORCED_SHUTDOWN(log))
1426
xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1427
} /* xlog_grant_push_ail */
1224
threshold_block -= log->l_logBBsize;
1225
threshold_cycle += 1;
1227
threshold_lsn = xlog_assign_lsn(threshold_cycle,
1230
* Don't pass in an lsn greater than the lsn of the last
1231
* log record known to be on disk. Use a snapshot of the last sync lsn
1232
* so that it doesn't change between the compare and the set.
1234
last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
1235
if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
1236
threshold_lsn = last_sync_lsn;
1239
* Get the transaction layer to kick the dirty buffers out to
1240
* disk asynchronously. No point in trying to do this if
1241
* the filesystem is shutting down.
1243
if (!XLOG_FORCED_SHUTDOWN(log))
1244
xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1248
* The bdstrat callback function for log bufs. This gives us a central
1249
* place to trap bufs in case we get hit by a log I/O error and need to
1250
* shutdown. Actually, in practice, even when we didn't get a log error,
1251
* we transition the iclogs to IOERROR state *after* flushing all existing
1252
* iclogs to disk. This is because we don't want anymore new transactions to be
1253
* started or completed afterwards.
1259
struct xlog_in_core *iclog;
1261
iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1262
if (iclog->ic_state & XLOG_STATE_IOERROR) {
1263
XFS_BUF_ERROR(bp, EIO);
1265
xfs_buf_ioend(bp, 0);
1267
* It would seem logical to return EIO here, but we rely on
1268
* the log state machine to propagate I/O errors instead of
1274
bp->b_flags |= _XBF_RUN_QUEUES;
1275
xfs_buf_iorequest(bp);
1431
1280
* Flush out the in-core log (iclog) to the on-disk log in an asynchronous
1747
1593
"bad-rtype" : res_type_str[r_type-1]),
1748
1594
ticket->t_res_arr[i].r_len);
1597
xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp,
1598
"xfs_log_write: reservation ran out. Need to up reservation");
1599
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1603
* Calculate the potential space needed by the log vector. Each region gets
1604
* its own xlog_op_header_t and may need to be double word aligned.
1607
xlog_write_calc_vec_length(
1608
struct xlog_ticket *ticket,
1609
struct xfs_log_vec *log_vector)
1611
struct xfs_log_vec *lv;
1616
/* acct for start rec of xact */
1617
if (ticket->t_flags & XLOG_TIC_INITED)
1620
for (lv = log_vector; lv; lv = lv->lv_next) {
1621
headers += lv->lv_niovecs;
1623
for (i = 0; i < lv->lv_niovecs; i++) {
1624
struct xfs_log_iovec *vecp = &lv->lv_iovecp[i];
1627
xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type);
1631
ticket->t_res_num_ophdrs += headers;
1632
len += headers * sizeof(struct xlog_op_header);
1638
* If first write for transaction, insert start record We can't be trying to
1639
* commit if we are inited. We can't have any "partial_copy" if we are inited.
1642
xlog_write_start_rec(
1643
struct xlog_op_header *ophdr,
1644
struct xlog_ticket *ticket)
1646
if (!(ticket->t_flags & XLOG_TIC_INITED))
1649
ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
1650
ophdr->oh_clientid = ticket->t_clientid;
1652
ophdr->oh_flags = XLOG_START_TRANS;
1655
ticket->t_flags &= ~XLOG_TIC_INITED;
1657
return sizeof(struct xlog_op_header);
1660
static xlog_op_header_t *
1661
xlog_write_setup_ophdr(
1663
struct xlog_op_header *ophdr,
1664
struct xlog_ticket *ticket,
1667
ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
1668
ophdr->oh_clientid = ticket->t_clientid;
1671
/* are we copying a commit or unmount record? */
1672
ophdr->oh_flags = flags;
1675
* We've seen logs corrupted with bad transaction client ids. This
1676
* makes sure that XFS doesn't generate them on. Turn this into an EIO
1677
* and shut down the filesystem.
1679
switch (ophdr->oh_clientid) {
1680
case XFS_TRANSACTION:
1685
xfs_fs_cmn_err(CE_WARN, log->l_mp,
1686
"Bad XFS transaction clientid 0x%x in ticket 0x%p",
1687
ophdr->oh_clientid, ticket);
1695
* Set up the parameters of the region copy into the log. This has
1696
* to handle region write split across multiple log buffers - this
1697
* state is kept external to this function so that this code can
1698
* can be written in an obvious, self documenting manner.
1701
xlog_write_setup_copy(
1702
struct xlog_ticket *ticket,
1703
struct xlog_op_header *ophdr,
1704
int space_available,
1708
int *last_was_partial_copy,
1709
int *bytes_consumed)
1713
still_to_copy = space_required - *bytes_consumed;
1714
*copy_off = *bytes_consumed;
1716
if (still_to_copy <= space_available) {
1717
/* write of region completes here */
1718
*copy_len = still_to_copy;
1719
ophdr->oh_len = cpu_to_be32(*copy_len);
1720
if (*last_was_partial_copy)
1721
ophdr->oh_flags |= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
1722
*last_was_partial_copy = 0;
1723
*bytes_consumed = 0;
1727
/* partial write of region, needs extra log op header reservation */
1728
*copy_len = space_available;
1729
ophdr->oh_len = cpu_to_be32(*copy_len);
1730
ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
1731
if (*last_was_partial_copy)
1732
ophdr->oh_flags |= XLOG_WAS_CONT_TRANS;
1733
*bytes_consumed += *copy_len;
1734
(*last_was_partial_copy)++;
1736
/* account for new log op header */
1737
ticket->t_curr_res -= sizeof(struct xlog_op_header);
1738
ticket->t_res_num_ophdrs++;
1740
return sizeof(struct xlog_op_header);
1744
xlog_write_copy_finish(
1746
struct xlog_in_core *iclog,
1751
int *partial_copy_len,
1753
struct xlog_in_core **commit_iclog)
1755
if (*partial_copy) {
1757
* This iclog has already been marked WANT_SYNC by
1758
* xlog_state_get_iclog_space.
1760
xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
1763
return xlog_state_release_iclog(log, iclog);
1767
*partial_copy_len = 0;
1769
if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
1770
/* no more space in this iclog - push it. */
1771
xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
1775
spin_lock(&log->l_icloglock);
1776
xlog_state_want_sync(log, iclog);
1777
spin_unlock(&log->l_icloglock);
1780
return xlog_state_release_iclog(log, iclog);
1781
ASSERT(flags & XLOG_COMMIT_TRANS);
1782
*commit_iclog = iclog;
1789
1825
* we don't update ic_offset until the end when we know exactly how many
1790
1826
* bytes have been written out.
1793
xlog_write(xfs_mount_t * mp,
1794
xfs_log_iovec_t reg[],
1796
xfs_log_ticket_t tic,
1797
xfs_lsn_t *start_lsn,
1798
xlog_in_core_t **commit_iclog,
1831
struct xfs_log_vec *log_vector,
1832
struct xlog_ticket *ticket,
1833
xfs_lsn_t *start_lsn,
1834
struct xlog_in_core **commit_iclog,
1801
xlog_t *log = mp->m_log;
1802
xlog_ticket_t *ticket = (xlog_ticket_t *)tic;
1803
xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */
1804
xlog_op_header_t *logop_head; /* ptr to log operation header */
1805
__psint_t ptr; /* copy address into data region */
1806
int len; /* # xlog_write() bytes 2 still copy */
1807
int index; /* region index currently copying */
1808
int log_offset; /* offset (from 0) into data region */
1809
int start_rec_copy; /* # bytes to copy for start record */
1810
int partial_copy; /* did we split a region? */
1811
int partial_copy_len;/* # bytes copied if split region */
1812
int need_copy; /* # bytes need to memcpy this region */
1813
int copy_len; /* # bytes actually memcpy'ing */
1814
int copy_off; /* # bytes from entry start */
1815
int contwr; /* continued write of in-core log? */
1817
int record_cnt = 0, data_cnt = 0;
1819
partial_copy_len = partial_copy = 0;
1821
/* Calculate potential maximum space. Each region gets its own
1822
* xlog_op_header_t and may need to be double word aligned.
1825
if (ticket->t_flags & XLOG_TIC_INITED) { /* acct for start rec of xact */
1826
len += sizeof(xlog_op_header_t);
1827
ticket->t_res_num_ophdrs++;
1830
for (index = 0; index < nentries; index++) {
1831
len += sizeof(xlog_op_header_t); /* each region gets >= 1 */
1832
ticket->t_res_num_ophdrs++;
1833
len += reg[index].i_len;
1834
xlog_tic_add_region(ticket, reg[index].i_len, reg[index].i_type);
1836
contwr = *start_lsn = 0;
1838
if (ticket->t_curr_res < len) {
1839
xlog_print_tic_res(mp, ticket);
1842
"xfs_log_write: reservation ran out. Need to up reservation");
1844
/* Customer configurable panic */
1845
xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp,
1846
"xfs_log_write: reservation ran out. Need to up reservation");
1847
/* If we did not panic, shutdown the filesystem */
1848
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1851
ticket->t_curr_res -= len;
1853
for (index = 0; index < nentries; ) {
1854
if ((error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
1855
&contwr, &log_offset)))
1858
ASSERT(log_offset <= iclog->ic_size - 1);
1859
ptr = (__psint_t) ((char *)iclog->ic_datap+log_offset);
1861
/* start_lsn is the first lsn written to. That's all we need. */
1863
*start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
1865
/* This loop writes out as many regions as can fit in the amount
1866
* of space which was allocated by xlog_state_get_iclog_space().
1868
while (index < nentries) {
1869
ASSERT(reg[index].i_len % sizeof(__int32_t) == 0);
1870
ASSERT((__psint_t)ptr % sizeof(__int32_t) == 0);
1873
/* If first write for transaction, insert start record.
1874
* We can't be trying to commit if we are inited. We can't
1875
* have any "partial_copy" if we are inited.
1877
if (ticket->t_flags & XLOG_TIC_INITED) {
1878
logop_head = (xlog_op_header_t *)ptr;
1879
logop_head->oh_tid = cpu_to_be32(ticket->t_tid);
1880
logop_head->oh_clientid = ticket->t_clientid;
1881
logop_head->oh_len = 0;
1882
logop_head->oh_flags = XLOG_START_TRANS;
1883
logop_head->oh_res2 = 0;
1884
ticket->t_flags &= ~XLOG_TIC_INITED; /* clear bit */
1887
start_rec_copy = sizeof(xlog_op_header_t);
1888
xlog_write_adv_cnt(ptr, len, log_offset, start_rec_copy);
1891
/* Copy log operation header directly into data section */
1892
logop_head = (xlog_op_header_t *)ptr;
1893
logop_head->oh_tid = cpu_to_be32(ticket->t_tid);
1894
logop_head->oh_clientid = ticket->t_clientid;
1895
logop_head->oh_res2 = 0;
1897
/* header copied directly */
1898
xlog_write_adv_cnt(ptr, len, log_offset, sizeof(xlog_op_header_t));
1900
/* are we copying a commit or unmount record? */
1901
logop_head->oh_flags = flags;
1904
* We've seen logs corrupted with bad transaction client
1905
* ids. This makes sure that XFS doesn't generate them on.
1906
* Turn this into an EIO and shut down the filesystem.
1908
switch (logop_head->oh_clientid) {
1909
case XFS_TRANSACTION:
1914
xfs_fs_cmn_err(CE_WARN, mp,
1915
"Bad XFS transaction clientid 0x%x in ticket 0x%p",
1916
logop_head->oh_clientid, tic);
1917
return XFS_ERROR(EIO);
1920
/* Partial write last time? => (partial_copy != 0)
1921
* need_copy is the amount we'd like to copy if everything could
1922
* fit in the current memcpy.
1924
need_copy = reg[index].i_len - partial_copy_len;
1926
copy_off = partial_copy_len;
1927
if (need_copy <= iclog->ic_size - log_offset) { /*complete write */
1928
copy_len = need_copy;
1929
logop_head->oh_len = cpu_to_be32(copy_len);
1931
logop_head->oh_flags|= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
1932
partial_copy_len = partial_copy = 0;
1933
} else { /* partial write */
1934
copy_len = iclog->ic_size - log_offset;
1935
logop_head->oh_len = cpu_to_be32(copy_len);
1936
logop_head->oh_flags |= XLOG_CONTINUE_TRANS;
1938
logop_head->oh_flags |= XLOG_WAS_CONT_TRANS;
1939
partial_copy_len += copy_len;
1941
len += sizeof(xlog_op_header_t); /* from splitting of region */
1942
/* account for new log op header */
1943
ticket->t_curr_res -= sizeof(xlog_op_header_t);
1944
ticket->t_res_num_ophdrs++;
1946
xlog_verify_dest_ptr(log, ptr);
1949
ASSERT(copy_len >= 0);
1950
memcpy((xfs_caddr_t)ptr, reg[index].i_addr + copy_off, copy_len);
1951
xlog_write_adv_cnt(ptr, len, log_offset, copy_len);
1953
/* make copy_len total bytes copied, including headers */
1954
copy_len += start_rec_copy + sizeof(xlog_op_header_t);
1956
data_cnt += contwr ? copy_len : 0;
1957
if (partial_copy) { /* copied partial region */
1958
/* already marked WANT_SYNC by xlog_state_get_iclog_space */
1959
xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1960
record_cnt = data_cnt = 0;
1961
if ((error = xlog_state_release_iclog(log, iclog)))
1963
break; /* don't increment index */
1964
} else { /* copied entire region */
1966
partial_copy_len = partial_copy = 0;
1968
if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
1969
xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1970
record_cnt = data_cnt = 0;
1971
spin_lock(&log->l_icloglock);
1972
xlog_state_want_sync(log, iclog);
1973
spin_unlock(&log->l_icloglock);
1975
ASSERT(flags & XLOG_COMMIT_TRANS);
1976
*commit_iclog = iclog;
1977
} else if ((error = xlog_state_release_iclog(log, iclog)))
1979
if (index == nentries)
1980
return 0; /* we are done */
1837
struct xlog_in_core *iclog = NULL;
1838
struct xfs_log_iovec *vecp;
1839
struct xfs_log_vec *lv;
1842
int partial_copy = 0;
1843
int partial_copy_len = 0;
1851
len = xlog_write_calc_vec_length(ticket, log_vector);
1854
* Region headers and bytes are already accounted for.
1855
* We only need to take into account start records and
1856
* split regions in this function.
1858
if (ticket->t_flags & XLOG_TIC_INITED)
1859
ticket->t_curr_res -= sizeof(xlog_op_header_t);
1862
* Commit record headers need to be accounted for. These
1863
* come in as separate writes so are easy to detect.
1865
if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
1866
ticket->t_curr_res -= sizeof(xlog_op_header_t);
1868
ticket->t_curr_res -= len;
1870
if (ticket->t_curr_res < 0)
1871
xlog_print_tic_res(log->l_mp, ticket);
1875
vecp = lv->lv_iovecp;
1876
while (lv && index < lv->lv_niovecs) {
1880
error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
1881
&contwr, &log_offset);
1885
ASSERT(log_offset <= iclog->ic_size - 1);
1886
ptr = iclog->ic_datap + log_offset;
1888
/* start_lsn is the first lsn written to. That's all we need. */
1890
*start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
1893
* This loop writes out as many regions as can fit in the amount
1894
* of space which was allocated by xlog_state_get_iclog_space().
1896
while (lv && index < lv->lv_niovecs) {
1897
struct xfs_log_iovec *reg = &vecp[index];
1898
struct xlog_op_header *ophdr;
1903
ASSERT(reg->i_len % sizeof(__int32_t) == 0);
1904
ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0);
1906
start_rec_copy = xlog_write_start_rec(ptr, ticket);
1907
if (start_rec_copy) {
1909
xlog_write_adv_cnt(&ptr, &len, &log_offset,
1913
ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
1915
return XFS_ERROR(EIO);
1917
xlog_write_adv_cnt(&ptr, &len, &log_offset,
1918
sizeof(struct xlog_op_header));
1920
len += xlog_write_setup_copy(ticket, ophdr,
1921
iclog->ic_size-log_offset,
1923
©_off, ©_len,
1926
xlog_verify_dest_ptr(log, ptr);
1929
ASSERT(copy_len >= 0);
1930
memcpy(ptr, reg->i_addr + copy_off, copy_len);
1931
xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len);
1933
copy_len += start_rec_copy + sizeof(xlog_op_header_t);
1935
data_cnt += contwr ? copy_len : 0;
1937
error = xlog_write_copy_finish(log, iclog, flags,
1938
&record_cnt, &data_cnt,
1947
* if we had a partial copy, we need to get more iclog
1948
* space but we don't want to increment the region
1949
* index because there is still more is this region to
1952
* If we completed writing this region, and we flushed
1953
* the iclog (indicated by resetting of the record
1954
* count), then we also need to get more log space. If
1955
* this was the last record, though, we are done and
1961
if (++index == lv->lv_niovecs) {
1965
vecp = lv->lv_iovecp;
1967
if (record_cnt == 0) {
1984
} /* if (partial_copy) */
1985
} /* while (index < nentries) */
1986
} /* for (index = 0; index < nentries; ) */
1989
xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1977
xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1979
return xlog_state_release_iclog(log, iclog);
1991
1981
ASSERT(flags & XLOG_COMMIT_TRANS);
1992
1982
*commit_iclog = iclog;
1995
return xlog_state_release_iclog(log, iclog);
1999
1987
/*****************************************************************************
2535
2537
goto error_return;
2537
2539
XFS_STATS_INC(xs_sleep_logspace);
2538
sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2540
xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2540
2543
* If we got an error, and the filesystem is shutting down,
2541
2544
* we'll catch it down below. So just continue...
2543
xlog_trace_loggrant(log, tic,
2544
"xlog_grant_log_space: wake 1");
2545
spin_lock(&log->l_grant_lock);
2546
trace_xfs_log_grant_wake1(log, tic);
2547
if (tic->t_flags & XFS_LOG_PERM_RESERV)
2548
need_bytes = tic->t_unit_res*tic->t_ocnt;
2550
need_bytes = tic->t_unit_res;
2553
2550
if (XLOG_FORCED_SHUTDOWN(log))
2551
goto error_return_unlocked;
2556
free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle,
2557
log->l_grant_reserve_bytes);
2553
free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
2558
2554
if (free_bytes < need_bytes) {
2559
if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2560
xlog_ins_ticketq(&log->l_reserve_headq, tic);
2561
xlog_trace_loggrant(log, tic,
2562
"xlog_grant_log_space: sleep 2");
2563
spin_unlock(&log->l_grant_lock);
2564
xlog_grant_push_ail(log->l_mp, need_bytes);
2565
spin_lock(&log->l_grant_lock);
2567
XFS_STATS_INC(xs_sleep_logspace);
2568
sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2570
spin_lock(&log->l_grant_lock);
2555
spin_lock(&log->l_grant_reserve_lock);
2556
if (list_empty(&tic->t_queue))
2557
list_add_tail(&tic->t_queue, &log->l_reserveq);
2559
trace_xfs_log_grant_sleep2(log, tic);
2571
2561
if (XLOG_FORCED_SHUTDOWN(log))
2572
2562
goto error_return;
2574
xlog_trace_loggrant(log, tic,
2575
"xlog_grant_log_space: wake 2");
2564
xlog_grant_push_ail(log, need_bytes);
2566
XFS_STATS_INC(xs_sleep_logspace);
2567
xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2569
trace_xfs_log_grant_wake2(log, tic);
2577
} else if (tic->t_flags & XLOG_TIC_IN_Q)
2578
xlog_del_ticketq(&log->l_reserve_headq, tic);
2573
if (!list_empty(&tic->t_queue)) {
2574
spin_lock(&log->l_grant_reserve_lock);
2575
list_del_init(&tic->t_queue);
2576
spin_unlock(&log->l_grant_reserve_lock);
2580
2579
/* we've got enough space */
2581
xlog_grant_add_space(log, need_bytes);
2583
tail_lsn = log->l_tail_lsn;
2585
* Check to make sure the grant write head didn't just over lap the
2586
* tail. If the cycles are the same, we can't be overlapping.
2587
* Otherwise, make sure that the cycles differ by exactly one and
2588
* check the byte count.
2590
if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
2591
ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
2592
ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
2595
xlog_trace_loggrant(log, tic, "xlog_grant_log_space: exit");
2596
xlog_verify_grant_head(log, 1);
2597
spin_unlock(&log->l_grant_lock);
2580
xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
2581
xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2582
trace_xfs_log_grant_exit(log, tic);
2583
xlog_verify_grant_tail(log);
2601
if (tic->t_flags & XLOG_TIC_IN_Q)
2602
xlog_del_ticketq(&log->l_reserve_headq, tic);
2603
xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret");
2586
error_return_unlocked:
2587
spin_lock(&log->l_grant_reserve_lock);
2589
list_del_init(&tic->t_queue);
2590
spin_unlock(&log->l_grant_reserve_lock);
2591
trace_xfs_log_grant_error(log, tic);
2605
2594
* If we are failing, make sure the ticket doesn't have any
2606
2595
* current reservations. We don't want to add this back when
2652
2635
* this transaction.
2654
2637
need_bytes = tic->t_unit_res;
2655
if ((ntic = log->l_write_headq)) {
2656
free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
2657
log->l_grant_write_bytes);
2638
if (!list_empty_careful(&log->l_writeq)) {
2639
struct xlog_ticket *ntic;
2641
spin_lock(&log->l_grant_write_lock);
2642
free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2643
list_for_each_entry(ntic, &log->l_writeq, t_queue) {
2659
2644
ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
2661
2646
if (free_bytes < ntic->t_unit_res)
2663
2648
free_bytes -= ntic->t_unit_res;
2664
sv_signal(&ntic->t_wait);
2665
ntic = ntic->t_next;
2666
} while (ntic != log->l_write_headq);
2668
if (ntic != log->l_write_headq) {
2669
if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2670
xlog_ins_ticketq(&log->l_write_headq, tic);
2672
xlog_trace_loggrant(log, tic,
2673
"xlog_regrant_write_log_space: sleep 1");
2674
spin_unlock(&log->l_grant_lock);
2675
xlog_grant_push_ail(log->l_mp, need_bytes);
2676
spin_lock(&log->l_grant_lock);
2649
wake_up(&ntic->t_wait);
2652
if (ntic != list_first_entry(&log->l_writeq,
2653
struct xlog_ticket, t_queue)) {
2654
if (list_empty(&tic->t_queue))
2655
list_add_tail(&tic->t_queue, &log->l_writeq);
2656
trace_xfs_log_regrant_write_sleep1(log, tic);
2658
xlog_grant_push_ail(log, need_bytes);
2678
2660
XFS_STATS_INC(xs_sleep_logspace);
2679
sv_wait(&tic->t_wait, PINOD|PLTWAIT,
2680
&log->l_grant_lock, s);
2682
/* If we're shutting down, this tic is already
2684
spin_lock(&log->l_grant_lock);
2685
if (XLOG_FORCED_SHUTDOWN(log))
2688
xlog_trace_loggrant(log, tic,
2689
"xlog_regrant_write_log_space: wake 1");
2661
xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2662
trace_xfs_log_regrant_write_wake1(log, tic);
2664
spin_unlock(&log->l_grant_write_lock);
2694
2668
if (XLOG_FORCED_SHUTDOWN(log))
2669
goto error_return_unlocked;
2697
free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
2698
log->l_grant_write_bytes);
2671
free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2699
2672
if (free_bytes < need_bytes) {
2700
if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2701
xlog_ins_ticketq(&log->l_write_headq, tic);
2702
spin_unlock(&log->l_grant_lock);
2703
xlog_grant_push_ail(log->l_mp, need_bytes);
2704
spin_lock(&log->l_grant_lock);
2706
XFS_STATS_INC(xs_sleep_logspace);
2707
sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2709
/* If we're shutting down, this tic is already off the queue */
2710
spin_lock(&log->l_grant_lock);
2673
spin_lock(&log->l_grant_write_lock);
2674
if (list_empty(&tic->t_queue))
2675
list_add_tail(&tic->t_queue, &log->l_writeq);
2711
2677
if (XLOG_FORCED_SHUTDOWN(log))
2712
2678
goto error_return;
2714
xlog_trace_loggrant(log, tic,
2715
"xlog_regrant_write_log_space: wake 2");
2680
xlog_grant_push_ail(log, need_bytes);
2682
XFS_STATS_INC(xs_sleep_logspace);
2683
trace_xfs_log_regrant_write_sleep2(log, tic);
2684
xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2686
trace_xfs_log_regrant_write_wake2(log, tic);
2717
} else if (tic->t_flags & XLOG_TIC_IN_Q)
2718
xlog_del_ticketq(&log->l_write_headq, tic);
2690
if (!list_empty(&tic->t_queue)) {
2691
spin_lock(&log->l_grant_write_lock);
2692
list_del_init(&tic->t_queue);
2693
spin_unlock(&log->l_grant_write_lock);
2720
2696
/* we've got enough space */
2721
xlog_grant_add_space_write(log, need_bytes);
2723
tail_lsn = log->l_tail_lsn;
2724
if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
2725
ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
2726
ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
2730
xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit");
2731
xlog_verify_grant_head(log, 1);
2732
spin_unlock(&log->l_grant_lock);
2697
xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2698
trace_xfs_log_regrant_write_exit(log, tic);
2699
xlog_verify_grant_tail(log);
2703
error_return_unlocked:
2704
spin_lock(&log->l_grant_write_lock);
2737
if (tic->t_flags & XLOG_TIC_IN_Q)
2738
xlog_del_ticketq(&log->l_reserve_headq, tic);
2739
xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret");
2706
list_del_init(&tic->t_queue);
2707
spin_unlock(&log->l_grant_write_lock);
2708
trace_xfs_log_regrant_write_error(log, tic);
2741
2711
* If we are failing, make sure the ticket doesn't have any
2742
2712
* current reservations. We don't want to add this back when
3069
3067
* state and go to sleep or return.
3070
3068
* If it is in any other state, go to sleep or return.
3072
* If filesystem activity goes to zero, the iclog will get flushed only by
3070
* Synchronous forces are implemented with a signal variable. All callers
3071
* to force a given lsn to disk will wait on a the sv attached to the
3072
* specific in-core log. When given in-core log finally completes its
3073
* write to disk, that thread will wake up all threads waiting on the
3076
xlog_state_sync(xlog_t *log,
3078
struct xfs_mount *mp,
3081
xlog_in_core_t *iclog;
3082
int already_slept = 0;
3083
struct log *log = mp->m_log;
3084
struct xlog_in_core *iclog;
3085
int already_slept = 0;
3089
XFS_STATS_INC(xs_log_force);
3092
lsn = xlog_cil_force_lsn(log, lsn);
3093
if (lsn == NULLCOMMITLSN)
3085
spin_lock(&log->l_icloglock);
3086
iclog = log->l_iclog;
3088
if (iclog->ic_state & XLOG_STATE_IOERROR) {
3089
spin_unlock(&log->l_icloglock);
3090
return XFS_ERROR(EIO);
3094
if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3095
iclog = iclog->ic_next;
3099
if (iclog->ic_state == XLOG_STATE_DIRTY) {
3098
spin_lock(&log->l_icloglock);
3099
iclog = log->l_iclog;
3100
if (iclog->ic_state & XLOG_STATE_IOERROR) {
3100
3101
spin_unlock(&log->l_icloglock);
3102
return XFS_ERROR(EIO);
3104
if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3106
* We sleep here if we haven't already slept (e.g.
3107
* this is the first time we've looked at the correct
3108
* iclog buf) and the buffer before us is going to
3109
* be sync'ed. The reason for this is that if we
3110
* are doing sync transactions here, by waiting for
3111
* the previous I/O to complete, we can allow a few
3112
* more transactions into this iclog before we close
3115
* Otherwise, we mark the buffer WANT_SYNC, and bump
3116
* up the refcnt so we can release the log (which drops
3117
* the ref count). The state switch keeps new transaction
3118
* commits from using this buffer. When the current commits
3119
* finish writing into the buffer, the refcount will drop to
3120
* zero and the buffer will go out then.
3122
if (!already_slept &&
3123
(iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC |
3124
XLOG_STATE_SYNCING))) {
3125
ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3126
XFS_STATS_INC(xs_log_force_sleep);
3127
sv_wait(&iclog->ic_prev->ic_write_wait, PSWP,
3128
&log->l_icloglock, s);
3106
if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3107
iclog = iclog->ic_next;
3111
if (iclog->ic_state == XLOG_STATE_DIRTY) {
3112
spin_unlock(&log->l_icloglock);
3116
if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3118
* We sleep here if we haven't already slept (e.g.
3119
* this is the first time we've looked at the correct
3120
* iclog buf) and the buffer before us is going to
3121
* be sync'ed. The reason for this is that if we
3122
* are doing sync transactions here, by waiting for
3123
* the previous I/O to complete, we can allow a few
3124
* more transactions into this iclog before we close
3127
* Otherwise, we mark the buffer WANT_SYNC, and bump
3128
* up the refcnt so we can release the log (which
3129
* drops the ref count). The state switch keeps new
3130
* transaction commits from using this buffer. When
3131
* the current commits finish writing into the buffer,
3132
* the refcount will drop to zero and the buffer will
3135
if (!already_slept &&
3136
(iclog->ic_prev->ic_state &
3137
(XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
3138
ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3140
XFS_STATS_INC(xs_log_force_sleep);
3142
xlog_wait(&iclog->ic_prev->ic_write_wait,
3133
3149
atomic_inc(&iclog->ic_refcnt);
3134
3150
xlog_state_switch_iclogs(log, iclog, 0);
3135
3151
spin_unlock(&log->l_icloglock);
3136
3152
if (xlog_state_release_iclog(log, iclog))
3137
3153
return XFS_ERROR(EIO);
3139
3156
spin_lock(&log->l_icloglock);
3143
if ((flags & XFS_LOG_SYNC) && /* sleep */
3144
!(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3147
* Don't wait on completion if we know that we've
3148
* gotten a log write error.
3150
if (iclog->ic_state & XLOG_STATE_IOERROR) {
3159
if ((flags & XFS_LOG_SYNC) && /* sleep */
3161
(XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3163
* Don't wait on completion if we know that we've
3164
* gotten a log write error.
3166
if (iclog->ic_state & XLOG_STATE_IOERROR) {
3167
spin_unlock(&log->l_icloglock);
3168
return XFS_ERROR(EIO);
3170
XFS_STATS_INC(xs_log_force_sleep);
3171
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
3173
* No need to grab the log lock here since we're
3174
* only deciding whether or not to return EIO
3175
* and the memory read should be atomic.
3177
if (iclog->ic_state & XLOG_STATE_IOERROR)
3178
return XFS_ERROR(EIO);
3182
} else { /* just return */
3151
3183
spin_unlock(&log->l_icloglock);
3152
return XFS_ERROR(EIO);
3154
XFS_STATS_INC(xs_log_force_sleep);
3155
sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3157
* No need to grab the log lock here since we're
3158
* only deciding whether or not to return EIO
3159
* and the memory read should be atomic.
3161
if (iclog->ic_state & XLOG_STATE_IOERROR)
3162
return XFS_ERROR(EIO);
3164
} else { /* just return */
3165
spin_unlock(&log->l_icloglock);
3187
} while (iclog != log->l_iclog);
3189
spin_unlock(&log->l_icloglock);
3169
} while (iclog != log->l_iclog);
3171
spin_unlock(&log->l_icloglock);
3173
} /* xlog_state_sync */
3194
* Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
3195
* about errors or whether the log was flushed or not. This is the normal
3196
* interface to use when trying to unpin items or move the log forward.
3206
error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
3208
xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
3209
"error %d returned.", error);
3177
3214
* Called when we want to mark the current iclog as being ready to sync to