~ubuntu-branches/debian/wheezy/linux-2.6/wheezy

« back to all changes in this revision

Viewing changes to fs/xfs/xfs_log.c

  • Committer: Bazaar Package Importer
  • Author(s): Ben Hutchings, Ben Hutchings, Aurelien Jarno, Martin Michlmayr
  • Date: 2011-04-06 13:53:30 UTC
  • mfrom: (43.1.5 sid)
  • Revision ID: james.westby@ubuntu.com-20110406135330-wjufxhd0tvn3zx4z
Tags: 2.6.38-3
[ Ben Hutchings ]
* [ppc64] Add to linux-tools package architectures (Closes: #620124)
* [amd64] Save cr4 to mmu_cr4_features at boot time (Closes: #620284)
* appletalk: Fix bugs introduced when removing use of BKL
* ALSA: Fix yet another race in disconnection
* cciss: Fix lost command issue
* ath9k: Fix kernel panic in AR2427
* ses: Avoid kernel panic when lun 0 is not mapped
* PCI/ACPI: Report ASPM support to BIOS if not disabled from command line

[ Aurelien Jarno ]
* rtlwifi: fix build when PCI is not enabled.

[ Martin Michlmayr ]
* rtlwifi: Eliminate udelay calls with too large values (Closes: #620204)

Show diffs side-by-side

added added

removed removed

Lines of Context:
24
24
#include "xfs_trans.h"
25
25
#include "xfs_sb.h"
26
26
#include "xfs_ag.h"
27
 
#include "xfs_dir2.h"
28
 
#include "xfs_dmapi.h"
29
27
#include "xfs_mount.h"
30
28
#include "xfs_error.h"
31
29
#include "xfs_log_priv.h"
35
33
#include "xfs_ialloc_btree.h"
36
34
#include "xfs_log_recover.h"
37
35
#include "xfs_trans_priv.h"
38
 
#include "xfs_dir2_sf.h"
39
 
#include "xfs_attr_sf.h"
40
36
#include "xfs_dinode.h"
41
37
#include "xfs_inode.h"
42
38
#include "xfs_rw.h"
 
39
#include "xfs_trace.h"
43
40
 
44
41
kmem_zone_t     *xfs_log_ticket_zone;
45
42
 
46
 
#define xlog_write_adv_cnt(ptr, len, off, bytes) \
47
 
        { (ptr) += (bytes); \
48
 
          (len) -= (bytes); \
49
 
          (off) += (bytes);}
50
 
 
51
43
/* Local miscellaneous function prototypes */
52
 
STATIC int       xlog_bdstrat_cb(struct xfs_buf *);
53
 
STATIC int       xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket,
 
44
STATIC int       xlog_commit_record(struct log *log, struct xlog_ticket *ticket,
54
45
                                    xlog_in_core_t **, xfs_lsn_t *);
55
46
STATIC xlog_t *  xlog_alloc_log(xfs_mount_t     *mp,
56
47
                                xfs_buftarg_t   *log_target,
57
48
                                xfs_daddr_t     blk_offset,
58
49
                                int             num_bblks);
59
 
STATIC int       xlog_space_left(xlog_t *log, int cycle, int bytes);
 
50
STATIC int       xlog_space_left(struct log *log, atomic64_t *head);
60
51
STATIC int       xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
61
52
STATIC void      xlog_dealloc_log(xlog_t *log);
62
 
STATIC int       xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
63
 
                            int nentries, xfs_log_ticket_t tic,
64
 
                            xfs_lsn_t *start_lsn,
65
 
                            xlog_in_core_t **commit_iclog,
66
 
                            uint flags);
67
53
 
68
54
/* local state machine functions */
69
55
STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
79
65
STATIC void xlog_state_switch_iclogs(xlog_t             *log,
80
66
                                     xlog_in_core_t *iclog,
81
67
                                     int                eventual_size);
82
 
STATIC int  xlog_state_sync(xlog_t                      *log,
83
 
                            xfs_lsn_t                   lsn,
84
 
                            uint                        flags,
85
 
                            int                         *log_flushed);
86
 
STATIC int  xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
87
68
STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
88
69
 
89
70
/* local functions to manipulate grant head */
90
71
STATIC int  xlog_grant_log_space(xlog_t         *log,
91
72
                                 xlog_ticket_t  *xtic);
92
 
STATIC void xlog_grant_push_ail(xfs_mount_t     *mp,
 
73
STATIC void xlog_grant_push_ail(struct log      *log,
93
74
                                int             need_bytes);
94
75
STATIC void xlog_regrant_reserve_log_space(xlog_t        *log,
95
76
                                           xlog_ticket_t *ticket);
98
79
STATIC void xlog_ungrant_log_space(xlog_t        *log,
99
80
                                   xlog_ticket_t *ticket);
100
81
 
101
 
 
102
 
/* local ticket functions */
103
 
STATIC xlog_ticket_t    *xlog_ticket_alloc(xlog_t *log,
104
 
                                         int    unit_bytes,
105
 
                                         int    count,
106
 
                                         char   clientid,
107
 
                                         uint   flags);
108
 
 
109
82
#if defined(DEBUG)
110
 
STATIC void     xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr);
111
 
STATIC void     xlog_verify_grant_head(xlog_t *log, int equals);
 
83
STATIC void     xlog_verify_dest_ptr(xlog_t *log, char *ptr);
 
84
STATIC void     xlog_verify_grant_tail(struct log *log);
112
85
STATIC void     xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
113
86
                                  int count, boolean_t syncing);
114
87
STATIC void     xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
115
88
                                     xfs_lsn_t tail_lsn);
116
89
#else
117
90
#define xlog_verify_dest_ptr(a,b)
118
 
#define xlog_verify_grant_head(a,b)
 
91
#define xlog_verify_grant_tail(a)
119
92
#define xlog_verify_iclog(a,b,c,d)
120
93
#define xlog_verify_tail_lsn(a,b,c)
121
94
#endif
122
95
 
123
96
STATIC int      xlog_iclogs_empty(xlog_t *log);
124
97
 
125
 
#if defined(XFS_LOG_TRACE)
126
 
 
127
 
#define XLOG_TRACE_LOGGRANT_SIZE        2048
128
 
#define XLOG_TRACE_ICLOG_SIZE           256
129
 
 
130
 
void
131
 
xlog_trace_loggrant_alloc(xlog_t *log)
132
 
{
133
 
        log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
134
 
}
135
 
 
136
 
void
137
 
xlog_trace_loggrant_dealloc(xlog_t *log)
138
 
{
139
 
        ktrace_free(log->l_grant_trace);
140
 
}
141
 
 
142
 
void
143
 
xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
144
 
{
145
 
        unsigned long cnts;
146
 
 
147
 
        /* ticket counts are 1 byte each */
148
 
        cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
149
 
 
150
 
        ktrace_enter(log->l_grant_trace,
151
 
                     (void *)tic,
152
 
                     (void *)log->l_reserve_headq,
153
 
                     (void *)log->l_write_headq,
154
 
                     (void *)((unsigned long)log->l_grant_reserve_cycle),
155
 
                     (void *)((unsigned long)log->l_grant_reserve_bytes),
156
 
                     (void *)((unsigned long)log->l_grant_write_cycle),
157
 
                     (void *)((unsigned long)log->l_grant_write_bytes),
158
 
                     (void *)((unsigned long)log->l_curr_cycle),
159
 
                     (void *)((unsigned long)log->l_curr_block),
160
 
                     (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)),
161
 
                     (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)),
162
 
                     (void *)string,
163
 
                     (void *)((unsigned long)tic->t_trans_type),
164
 
                     (void *)cnts,
165
 
                     (void *)((unsigned long)tic->t_curr_res),
166
 
                     (void *)((unsigned long)tic->t_unit_res));
167
 
}
168
 
 
169
 
void
170
 
xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
171
 
{
172
 
        iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
173
 
}
174
 
 
175
 
void
176
 
xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
177
 
{
178
 
        ktrace_free(iclog->ic_trace);
179
 
}
180
 
 
181
 
void
182
 
xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
183
 
{
184
 
        ktrace_enter(iclog->ic_trace,
185
 
                     (void *)((unsigned long)state),
186
 
                     (void *)((unsigned long)current_pid()),
187
 
                     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
188
 
                     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
189
 
                     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
190
 
                     (void *)NULL, (void *)NULL);
191
 
}
192
 
#else
193
 
 
194
 
#define xlog_trace_loggrant_alloc(log)
195
 
#define xlog_trace_loggrant_dealloc(log)
196
 
#define xlog_trace_loggrant(log,tic,string)
197
 
 
198
 
#define xlog_trace_iclog_alloc(iclog)
199
 
#define xlog_trace_iclog_dealloc(iclog)
200
 
#define xlog_trace_iclog(iclog,state)
201
 
 
202
 
#endif /* XFS_LOG_TRACE */
203
 
 
204
 
 
205
 
static void
206
 
xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
207
 
{
208
 
        if (*qp) {
209
 
                tic->t_next         = (*qp);
210
 
                tic->t_prev         = (*qp)->t_prev;
211
 
                (*qp)->t_prev->t_next = tic;
212
 
                (*qp)->t_prev       = tic;
213
 
        } else {
214
 
                tic->t_prev = tic->t_next = tic;
215
 
                *qp = tic;
216
 
        }
217
 
 
218
 
        tic->t_flags |= XLOG_TIC_IN_Q;
219
 
}
220
 
 
221
 
static void
222
 
xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
223
 
{
224
 
        if (tic == tic->t_next) {
225
 
                *qp = NULL;
226
 
        } else {
227
 
                *qp = tic->t_next;
228
 
                tic->t_next->t_prev = tic->t_prev;
229
 
                tic->t_prev->t_next = tic->t_next;
230
 
        }
231
 
 
232
 
        tic->t_next = tic->t_prev = NULL;
233
 
        tic->t_flags &= ~XLOG_TIC_IN_Q;
234
 
}
235
 
 
236
 
static void
237
 
xlog_grant_sub_space(struct log *log, int bytes)
238
 
{
239
 
        log->l_grant_write_bytes -= bytes;
240
 
        if (log->l_grant_write_bytes < 0) {
241
 
                log->l_grant_write_bytes += log->l_logsize;
242
 
                log->l_grant_write_cycle--;
243
 
        }
244
 
 
245
 
        log->l_grant_reserve_bytes -= bytes;
246
 
        if ((log)->l_grant_reserve_bytes < 0) {
247
 
                log->l_grant_reserve_bytes += log->l_logsize;
248
 
                log->l_grant_reserve_cycle--;
249
 
        }
250
 
 
251
 
}
252
 
 
253
 
static void
254
 
xlog_grant_add_space_write(struct log *log, int bytes)
255
 
{
256
 
        int tmp = log->l_logsize - log->l_grant_write_bytes;
257
 
        if (tmp > bytes)
258
 
                log->l_grant_write_bytes += bytes;
259
 
        else {
260
 
                log->l_grant_write_cycle++;
261
 
                log->l_grant_write_bytes = bytes - tmp;
262
 
        }
263
 
}
264
 
 
265
 
static void
266
 
xlog_grant_add_space_reserve(struct log *log, int bytes)
267
 
{
268
 
        int tmp = log->l_logsize - log->l_grant_reserve_bytes;
269
 
        if (tmp > bytes)
270
 
                log->l_grant_reserve_bytes += bytes;
271
 
        else {
272
 
                log->l_grant_reserve_cycle++;
273
 
                log->l_grant_reserve_bytes = bytes - tmp;
274
 
        }
275
 
}
276
 
 
277
 
static inline void
278
 
xlog_grant_add_space(struct log *log, int bytes)
279
 
{
280
 
        xlog_grant_add_space_write(log, bytes);
281
 
        xlog_grant_add_space_reserve(log, bytes);
 
98
static void
 
99
xlog_grant_sub_space(
 
100
        struct log      *log,
 
101
        atomic64_t      *head,
 
102
        int             bytes)
 
103
{
 
104
        int64_t head_val = atomic64_read(head);
 
105
        int64_t new, old;
 
106
 
 
107
        do {
 
108
                int     cycle, space;
 
109
 
 
110
                xlog_crack_grant_head_val(head_val, &cycle, &space);
 
111
 
 
112
                space -= bytes;
 
113
                if (space < 0) {
 
114
                        space += log->l_logsize;
 
115
                        cycle--;
 
116
                }
 
117
 
 
118
                old = head_val;
 
119
                new = xlog_assign_grant_head_val(cycle, space);
 
120
                head_val = atomic64_cmpxchg(head, old, new);
 
121
        } while (head_val != old);
 
122
}
 
123
 
 
124
static void
 
125
xlog_grant_add_space(
 
126
        struct log      *log,
 
127
        atomic64_t      *head,
 
128
        int             bytes)
 
129
{
 
130
        int64_t head_val = atomic64_read(head);
 
131
        int64_t new, old;
 
132
 
 
133
        do {
 
134
                int             tmp;
 
135
                int             cycle, space;
 
136
 
 
137
                xlog_crack_grant_head_val(head_val, &cycle, &space);
 
138
 
 
139
                tmp = log->l_logsize - space;
 
140
                if (tmp > bytes)
 
141
                        space += bytes;
 
142
                else {
 
143
                        space = bytes - tmp;
 
144
                        cycle++;
 
145
                }
 
146
 
 
147
                old = head_val;
 
148
                new = xlog_assign_grant_head_val(cycle, space);
 
149
                head_val = atomic64_cmpxchg(head, old, new);
 
150
        } while (head_val != old);
282
151
}
283
152
 
284
153
static void
327
196
 * out when the next write occurs.
328
197
 */
329
198
xfs_lsn_t
330
 
xfs_log_done(xfs_mount_t        *mp,
331
 
             xfs_log_ticket_t   xtic,
332
 
             void               **iclog,
333
 
             uint               flags)
 
199
xfs_log_done(
 
200
        struct xfs_mount        *mp,
 
201
        struct xlog_ticket      *ticket,
 
202
        struct xlog_in_core     **iclog,
 
203
        uint                    flags)
334
204
{
335
 
        xlog_t          *log    = mp->m_log;
336
 
        xlog_ticket_t   *ticket = (xfs_log_ticket_t) xtic;
337
 
        xfs_lsn_t       lsn     = 0;
 
205
        struct log              *log = mp->m_log;
 
206
        xfs_lsn_t               lsn = 0;
338
207
 
339
208
        if (XLOG_FORCED_SHUTDOWN(log) ||
340
209
            /*
342
211
             * If we get an error, just continue and give back the log ticket.
343
212
             */
344
213
            (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
345
 
             (xlog_commit_record(mp, ticket,
346
 
                                 (xlog_in_core_t **)iclog, &lsn)))) {
 
214
             (xlog_commit_record(log, ticket, iclog, &lsn)))) {
347
215
                lsn = (xfs_lsn_t) -1;
348
216
                if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
349
217
                        flags |= XFS_LOG_REL_PERM_RESERV;
353
221
 
354
222
        if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
355
223
            (flags & XFS_LOG_REL_PERM_RESERV)) {
 
224
                trace_xfs_log_done_nonperm(log, ticket);
 
225
 
356
226
                /*
357
227
                 * Release ticket if not permanent reservation or a specific
358
228
                 * request has been made to release a permanent reservation.
359
229
                 */
360
 
                xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
361
230
                xlog_ungrant_log_space(log, ticket);
362
231
                xfs_log_ticket_put(ticket);
363
232
        } else {
364
 
                xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
 
233
                trace_xfs_log_done_perm(log, ticket);
 
234
 
365
235
                xlog_regrant_reserve_log_space(log, ticket);
366
236
                /* If this ticket was a permanent reservation and we aren't
367
237
                 * trying to release it, reset the inited flags; so next time
371
241
        }
372
242
 
373
243
        return lsn;
374
 
}       /* xfs_log_done */
375
 
 
376
 
 
377
 
/*
378
 
 * Force the in-core log to disk.  If flags == XFS_LOG_SYNC,
379
 
 *      the force is done synchronously.
380
 
 *
381
 
 * Asynchronous forces are implemented by setting the WANT_SYNC
382
 
 * bit in the appropriate in-core log and then returning.
383
 
 *
384
 
 * Synchronous forces are implemented with a signal variable. All callers
385
 
 * to force a given lsn to disk will wait on a the sv attached to the
386
 
 * specific in-core log.  When given in-core log finally completes its
387
 
 * write to disk, that thread will wake up all threads waiting on the
388
 
 * sv.
389
 
 */
390
 
int
391
 
_xfs_log_force(
392
 
        xfs_mount_t     *mp,
393
 
        xfs_lsn_t       lsn,
394
 
        uint            flags,
395
 
        int             *log_flushed)
396
 
{
397
 
        xlog_t          *log = mp->m_log;
398
 
        int             dummy;
399
 
 
400
 
        if (!log_flushed)
401
 
                log_flushed = &dummy;
402
 
 
403
 
        ASSERT(flags & XFS_LOG_FORCE);
404
 
 
405
 
        XFS_STATS_INC(xs_log_force);
406
 
 
407
 
        if (log->l_flags & XLOG_IO_ERROR)
408
 
                return XFS_ERROR(EIO);
409
 
        if (lsn == 0)
410
 
                return xlog_state_sync_all(log, flags, log_flushed);
411
 
        else
412
 
                return xlog_state_sync(log, lsn, flags, log_flushed);
413
 
}       /* _xfs_log_force */
414
 
 
415
 
/*
416
 
 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
417
 
 * about errors or whether the log was flushed or not. This is the normal
418
 
 * interface to use when trying to unpin items or move the log forward.
419
 
 */
420
 
void
421
 
xfs_log_force(
422
 
        xfs_mount_t     *mp,
423
 
        xfs_lsn_t       lsn,
424
 
        uint            flags)
425
 
{
426
 
        int     error;
427
 
        error = _xfs_log_force(mp, lsn, flags, NULL);
428
 
        if (error) {
429
 
                xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
430
 
                        "error %d returned.", error);
431
 
        }
432
244
}
433
245
 
434
 
 
435
246
/*
436
247
 * Attaches a new iclog I/O completion callback routine during
437
248
 * transaction commit.  If the log is in error state, a non-zero
439
250
 * executing the callback at an appropriate time.
440
251
 */
441
252
int
442
 
xfs_log_notify(xfs_mount_t        *mp,          /* mount of partition */
443
 
               void               *iclog_hndl,  /* iclog to hang callback off */
444
 
               xfs_log_callback_t *cb)
 
253
xfs_log_notify(
 
254
        struct xfs_mount        *mp,
 
255
        struct xlog_in_core     *iclog,
 
256
        xfs_log_callback_t      *cb)
445
257
{
446
 
        xlog_in_core_t    *iclog = (xlog_in_core_t *)iclog_hndl;
447
258
        int     abortflg;
448
259
 
449
260
        spin_lock(&iclog->ic_callback_lock);
457
268
        }
458
269
        spin_unlock(&iclog->ic_callback_lock);
459
270
        return abortflg;
460
 
}       /* xfs_log_notify */
 
271
}
461
272
 
462
273
int
463
 
xfs_log_release_iclog(xfs_mount_t *mp,
464
 
                      void        *iclog_hndl)
 
274
xfs_log_release_iclog(
 
275
        struct xfs_mount        *mp,
 
276
        struct xlog_in_core     *iclog)
465
277
{
466
 
        xlog_t *log = mp->m_log;
467
 
        xlog_in_core_t    *iclog = (xlog_in_core_t *)iclog_hndl;
468
 
 
469
 
        if (xlog_state_release_iclog(log, iclog)) {
 
278
        if (xlog_state_release_iclog(mp->m_log, iclog)) {
470
279
                xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
471
280
                return EIO;
472
281
        }
485
294
 * reservation, we prevent over allocation problems.
486
295
 */
487
296
int
488
 
xfs_log_reserve(xfs_mount_t      *mp,
489
 
                int              unit_bytes,
490
 
                int              cnt,
491
 
                xfs_log_ticket_t *ticket,
492
 
                __uint8_t        client,
493
 
                uint             flags,
494
 
                uint             t_type)
 
297
xfs_log_reserve(
 
298
        struct xfs_mount        *mp,
 
299
        int                     unit_bytes,
 
300
        int                     cnt,
 
301
        struct xlog_ticket      **ticket,
 
302
        __uint8_t               client,
 
303
        uint                    flags,
 
304
        uint                    t_type)
495
305
{
496
 
        xlog_t          *log = mp->m_log;
497
 
        xlog_ticket_t   *internal_ticket;
498
 
        int             retval = 0;
 
306
        struct log              *log = mp->m_log;
 
307
        struct xlog_ticket      *internal_ticket;
 
308
        int                     retval = 0;
499
309
 
500
310
        ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
501
 
        ASSERT((flags & XFS_LOG_NOSLEEP) == 0);
502
311
 
503
312
        if (XLOG_FORCED_SHUTDOWN(log))
504
313
                return XFS_ERROR(EIO);
505
314
 
506
315
        XFS_STATS_INC(xs_try_logspace);
507
316
 
 
317
 
508
318
        if (*ticket != NULL) {
509
319
                ASSERT(flags & XFS_LOG_PERM_RESERV);
510
 
                internal_ticket = (xlog_ticket_t *)*ticket;
511
 
                xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)");
512
 
                xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
 
320
                internal_ticket = *ticket;
 
321
 
 
322
                /*
 
323
                 * this is a new transaction on the ticket, so we need to
 
324
                 * change the transaction ID so that the next transaction has a
 
325
                 * different TID in the log. Just add one to the existing tid
 
326
                 * so that we can see chains of rolling transactions in the log
 
327
                 * easily.
 
328
                 */
 
329
                internal_ticket->t_tid++;
 
330
 
 
331
                trace_xfs_log_reserve(log, internal_ticket);
 
332
 
 
333
                xlog_grant_push_ail(log, internal_ticket->t_unit_res);
513
334
                retval = xlog_regrant_write_log_space(log, internal_ticket);
514
335
        } else {
515
336
                /* may sleep if need to allocate more tickets */
516
337
                internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt,
517
 
                                                  client, flags);
 
338
                                                  client, flags,
 
339
                                                  KM_SLEEP|KM_MAYFAIL);
518
340
                if (!internal_ticket)
519
341
                        return XFS_ERROR(ENOMEM);
520
342
                internal_ticket->t_trans_type = t_type;
521
343
                *ticket = internal_ticket;
522
 
                xlog_trace_loggrant(log, internal_ticket, 
523
 
                        (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ?
524
 
                        "xfs_log_reserve: create new ticket (permanent trans)" :
525
 
                        "xfs_log_reserve: create new ticket");
526
 
                xlog_grant_push_ail(mp,
 
344
 
 
345
                trace_xfs_log_reserve(log, internal_ticket);
 
346
 
 
347
                xlog_grant_push_ail(log,
527
348
                                    (internal_ticket->t_unit_res *
528
349
                                     internal_ticket->t_cnt));
529
350
                retval = xlog_grant_log_space(log, internal_ticket);
556
377
                cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname);
557
378
        else {
558
379
                cmn_err(CE_NOTE,
559
 
                        "!Mounting filesystem \"%s\" in no-recovery mode.  Filesystem will be inconsistent.",
 
380
                        "Mounting filesystem \"%s\" in no-recovery mode.  Filesystem will be inconsistent.",
560
381
                        mp->m_fsname);
561
382
                ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
562
383
        }
600
421
        /* Normal transactions can now occur */
601
422
        mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
602
423
 
 
424
        /*
 
425
         * Now the log has been fully initialised and we know were our
 
426
         * space grant counters are, we can initialise the permanent ticket
 
427
         * needed for delayed logging to work.
 
428
         */
 
429
        xlog_cil_init_post_recovery(mp->m_log);
 
430
 
603
431
        return 0;
604
432
 
605
433
out_destroy_ail:
657
485
#ifdef DEBUG
658
486
        xlog_in_core_t   *first_iclog;
659
487
#endif
660
 
        xfs_log_iovec_t  reg[1];
661
 
        xfs_log_ticket_t tic = NULL;
 
488
        xlog_ticket_t   *tic = NULL;
662
489
        xfs_lsn_t        lsn;
663
490
        int              error;
664
491
 
665
 
        /* the data section must be 32 bit size aligned */
666
 
        struct {
667
 
            __uint16_t magic;
668
 
            __uint16_t pad1;
669
 
            __uint32_t pad2; /* may as well make it 64 bits */
670
 
        } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
671
 
 
672
492
        /*
673
493
         * Don't write out unmount record on read-only mounts.
674
494
         * Or, if we are doing a forced umount (typically because of IO errors).
676
496
        if (mp->m_flags & XFS_MOUNT_RDONLY)
677
497
                return 0;
678
498
 
679
 
        error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL);
 
499
        error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
680
500
        ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
681
501
 
682
502
#ifdef DEBUG
690
510
        } while (iclog != first_iclog);
691
511
#endif
692
512
        if (! (XLOG_FORCED_SHUTDOWN(log))) {
693
 
                reg[0].i_addr = (void*)&magic;
694
 
                reg[0].i_len  = sizeof(magic);
695
 
                XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT);
696
 
 
697
513
                error = xfs_log_reserve(mp, 600, 1, &tic,
698
514
                                        XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
699
515
                if (!error) {
 
516
                        /* the data section must be 32 bit size aligned */
 
517
                        struct {
 
518
                            __uint16_t magic;
 
519
                            __uint16_t pad1;
 
520
                            __uint32_t pad2; /* may as well make it 64 bits */
 
521
                        } magic = {
 
522
                                .magic = XLOG_UNMOUNT_TYPE,
 
523
                        };
 
524
                        struct xfs_log_iovec reg = {
 
525
                                .i_addr = &magic,
 
526
                                .i_len = sizeof(magic),
 
527
                                .i_type = XLOG_REG_TYPE_UNMOUNT,
 
528
                        };
 
529
                        struct xfs_log_vec vec = {
 
530
                                .lv_niovecs = 1,
 
531
                                .lv_iovecp = &reg,
 
532
                        };
 
533
 
700
534
                        /* remove inited flag */
701
 
                        ((xlog_ticket_t *)tic)->t_flags = 0;
702
 
                        error = xlog_write(mp, reg, 1, tic, &lsn,
 
535
                        tic->t_flags = 0;
 
536
                        error = xlog_write(log, &vec, tic, &lsn,
703
537
                                           NULL, XLOG_UNMOUNT_TRANS);
704
538
                        /*
705
539
                         * At this point, we're umounting anyway,
725
559
                if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
726
560
                      iclog->ic_state == XLOG_STATE_DIRTY)) {
727
561
                        if (!XLOG_FORCED_SHUTDOWN(log)) {
728
 
                                sv_wait(&iclog->ic_force_wait, PMEM,
729
 
                                        &log->l_icloglock, s);
 
562
                                xlog_wait(&iclog->ic_force_wait,
 
563
                                                        &log->l_icloglock);
730
564
                        } else {
731
565
                                spin_unlock(&log->l_icloglock);
732
566
                        }
734
568
                        spin_unlock(&log->l_icloglock);
735
569
                }
736
570
                if (tic) {
737
 
                        xlog_trace_loggrant(log, tic, "unmount rec");
 
571
                        trace_xfs_log_umount_write(log, tic);
738
572
                        xlog_ungrant_log_space(log, tic);
739
573
                        xfs_log_ticket_put(tic);
740
574
                }
766
600
                        || iclog->ic_state == XLOG_STATE_DIRTY
767
601
                        || iclog->ic_state == XLOG_STATE_IOERROR) ) {
768
602
 
769
 
                                sv_wait(&iclog->ic_force_wait, PMEM,
770
 
                                        &log->l_icloglock, s);
 
603
                                xlog_wait(&iclog->ic_force_wait,
 
604
                                                        &log->l_icloglock);
771
605
                } else {
772
606
                        spin_unlock(&log->l_icloglock);
773
607
                }
789
623
        xlog_dealloc_log(mp->m_log);
790
624
}
791
625
 
 
626
void
 
627
xfs_log_item_init(
 
628
        struct xfs_mount        *mp,
 
629
        struct xfs_log_item     *item,
 
630
        int                     type,
 
631
        struct xfs_item_ops     *ops)
 
632
{
 
633
        item->li_mountp = mp;
 
634
        item->li_ailp = mp->m_ail;
 
635
        item->li_type = type;
 
636
        item->li_ops = ops;
 
637
        item->li_lv = NULL;
 
638
 
 
639
        INIT_LIST_HEAD(&item->li_ail);
 
640
        INIT_LIST_HEAD(&item->li_cil);
 
641
}
 
642
 
792
643
/*
793
644
 * Write region vectors to log.  The write happens using the space reservation
794
645
 * of the ticket (tic).  It is not a requirement that all writes for a given
795
 
 * transaction occur with one call to xfs_log_write().
 
646
 * transaction occur with one call to xfs_log_write(). However, it is important
 
647
 * to note that the transaction reservation code makes an assumption about the
 
648
 * number of log headers a transaction requires that may be violated if you
 
649
 * don't pass all the transaction vectors in one call....
796
650
 */
797
651
int
798
 
xfs_log_write(xfs_mount_t *     mp,
799
 
              xfs_log_iovec_t   reg[],
800
 
              int               nentries,
801
 
              xfs_log_ticket_t  tic,
802
 
              xfs_lsn_t         *start_lsn)
 
652
xfs_log_write(
 
653
        struct xfs_mount        *mp,
 
654
        struct xfs_log_iovec    reg[],
 
655
        int                     nentries,
 
656
        struct xlog_ticket      *tic,
 
657
        xfs_lsn_t               *start_lsn)
803
658
{
804
 
        int     error;
805
 
        xlog_t *log = mp->m_log;
 
659
        struct log              *log = mp->m_log;
 
660
        int                     error;
 
661
        struct xfs_log_vec      vec = {
 
662
                .lv_niovecs = nentries,
 
663
                .lv_iovecp = reg,
 
664
        };
806
665
 
807
666
        if (XLOG_FORCED_SHUTDOWN(log))
808
667
                return XFS_ERROR(EIO);
809
668
 
810
 
        if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) {
 
669
        error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);
 
670
        if (error)
811
671
                xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
812
 
        }
813
672
        return error;
814
 
}       /* xfs_log_write */
815
 
 
 
673
}
816
674
 
817
675
void
818
676
xfs_log_move_tail(xfs_mount_t   *mp,
820
678
{
821
679
        xlog_ticket_t   *tic;
822
680
        xlog_t          *log = mp->m_log;
823
 
        int             need_bytes, free_bytes, cycle, bytes;
 
681
        int             need_bytes, free_bytes;
824
682
 
825
683
        if (XLOG_FORCED_SHUTDOWN(log))
826
684
                return;
827
685
 
828
 
        if (tail_lsn == 0) {
829
 
                /* needed since sync_lsn is 64 bits */
830
 
                spin_lock(&log->l_icloglock);
831
 
                tail_lsn = log->l_last_sync_lsn;
832
 
                spin_unlock(&log->l_icloglock);
833
 
        }
834
 
 
835
 
        spin_lock(&log->l_grant_lock);
836
 
 
837
 
        /* Also an invalid lsn.  1 implies that we aren't passing in a valid
838
 
         * tail_lsn.
839
 
         */
840
 
        if (tail_lsn != 1) {
841
 
                log->l_tail_lsn = tail_lsn;
842
 
        }
843
 
 
844
 
        if ((tic = log->l_write_headq)) {
 
686
        if (tail_lsn == 0)
 
687
                tail_lsn = atomic64_read(&log->l_last_sync_lsn);
 
688
 
 
689
        /* tail_lsn == 1 implies that we weren't passed a valid value.  */
 
690
        if (tail_lsn != 1)
 
691
                atomic64_set(&log->l_tail_lsn, tail_lsn);
 
692
 
 
693
        if (!list_empty_careful(&log->l_writeq)) {
845
694
#ifdef DEBUG
846
695
                if (log->l_flags & XLOG_ACTIVE_RECOVERY)
847
696
                        panic("Recovery problem");
848
697
#endif
849
 
                cycle = log->l_grant_write_cycle;
850
 
                bytes = log->l_grant_write_bytes;
851
 
                free_bytes = xlog_space_left(log, cycle, bytes);
852
 
                do {
 
698
                spin_lock(&log->l_grant_write_lock);
 
699
                free_bytes = xlog_space_left(log, &log->l_grant_write_head);
 
700
                list_for_each_entry(tic, &log->l_writeq, t_queue) {
853
701
                        ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
854
702
 
855
703
                        if (free_bytes < tic->t_unit_res && tail_lsn != 1)
856
704
                                break;
857
705
                        tail_lsn = 0;
858
706
                        free_bytes -= tic->t_unit_res;
859
 
                        sv_signal(&tic->t_wait);
860
 
                        tic = tic->t_next;
861
 
                } while (tic != log->l_write_headq);
 
707
                        trace_xfs_log_regrant_write_wake_up(log, tic);
 
708
                        wake_up(&tic->t_wait);
 
709
                }
 
710
                spin_unlock(&log->l_grant_write_lock);
862
711
        }
863
 
        if ((tic = log->l_reserve_headq)) {
 
712
 
 
713
        if (!list_empty_careful(&log->l_reserveq)) {
864
714
#ifdef DEBUG
865
715
                if (log->l_flags & XLOG_ACTIVE_RECOVERY)
866
716
                        panic("Recovery problem");
867
717
#endif
868
 
                cycle = log->l_grant_reserve_cycle;
869
 
                bytes = log->l_grant_reserve_bytes;
870
 
                free_bytes = xlog_space_left(log, cycle, bytes);
871
 
                do {
 
718
                spin_lock(&log->l_grant_reserve_lock);
 
719
                free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
 
720
                list_for_each_entry(tic, &log->l_reserveq, t_queue) {
872
721
                        if (tic->t_flags & XLOG_TIC_PERM_RESERV)
873
722
                                need_bytes = tic->t_unit_res*tic->t_cnt;
874
723
                        else
877
726
                                break;
878
727
                        tail_lsn = 0;
879
728
                        free_bytes -= need_bytes;
880
 
                        sv_signal(&tic->t_wait);
881
 
                        tic = tic->t_next;
882
 
                } while (tic != log->l_reserve_headq);
 
729
                        trace_xfs_log_grant_wake_up(log, tic);
 
730
                        wake_up(&tic->t_wait);
 
731
                }
 
732
                spin_unlock(&log->l_grant_reserve_lock);
883
733
        }
884
 
        spin_unlock(&log->l_grant_lock);
885
 
}       /* xfs_log_move_tail */
 
734
}
886
735
 
887
736
/*
888
737
 * Determine if we have a transaction that has gone to disk
889
 
 * that needs to be covered. Log activity needs to be idle (no AIL and
890
 
 * nothing in the iclogs). And, we need to be in the right state indicating
891
 
 * something has gone out.
 
738
 * that needs to be covered. To begin the transition to the idle state
 
739
 * firstly the log needs to be idle (no AIL and nothing in the iclogs).
 
740
 * If we are then in a state where covering is needed, the caller is informed
 
741
 * that dummy transactions are required to move the log into the idle state.
 
742
 *
 
743
 * Because this is called as part of the sync process, we should also indicate
 
744
 * that dummy transactions should be issued in anything but the covered or
 
745
 * idle states. This ensures that the log tail is accurately reflected in
 
746
 * the log at the end of the sync, hence if a crash occurrs avoids replay
 
747
 * of transactions where the metadata is already on disk.
892
748
 */
893
749
int
894
750
xfs_log_need_covered(xfs_mount_t *mp)
900
756
                return 0;
901
757
 
902
758
        spin_lock(&log->l_icloglock);
903
 
        if (((log->l_covered_state == XLOG_STATE_COVER_NEED) ||
904
 
                (log->l_covered_state == XLOG_STATE_COVER_NEED2))
905
 
                        && !xfs_trans_ail_tail(log->l_ailp)
906
 
                        && xlog_iclogs_empty(log)) {
907
 
                if (log->l_covered_state == XLOG_STATE_COVER_NEED)
908
 
                        log->l_covered_state = XLOG_STATE_COVER_DONE;
909
 
                else {
910
 
                        ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2);
911
 
                        log->l_covered_state = XLOG_STATE_COVER_DONE2;
 
759
        switch (log->l_covered_state) {
 
760
        case XLOG_STATE_COVER_DONE:
 
761
        case XLOG_STATE_COVER_DONE2:
 
762
        case XLOG_STATE_COVER_IDLE:
 
763
                break;
 
764
        case XLOG_STATE_COVER_NEED:
 
765
        case XLOG_STATE_COVER_NEED2:
 
766
                if (!xfs_trans_ail_tail(log->l_ailp) &&
 
767
                    xlog_iclogs_empty(log)) {
 
768
                        if (log->l_covered_state == XLOG_STATE_COVER_NEED)
 
769
                                log->l_covered_state = XLOG_STATE_COVER_DONE;
 
770
                        else
 
771
                                log->l_covered_state = XLOG_STATE_COVER_DONE2;
912
772
                }
 
773
                /* FALLTHRU */
 
774
        default:
913
775
                needed = 1;
 
776
                break;
914
777
        }
915
778
        spin_unlock(&log->l_icloglock);
916
779
        return needed;
934
797
 * We may be holding the log iclog lock upon entering this routine.
935
798
 */
936
799
xfs_lsn_t
937
 
xlog_assign_tail_lsn(xfs_mount_t *mp)
 
800
xlog_assign_tail_lsn(
 
801
        struct xfs_mount        *mp)
938
802
{
939
 
        xfs_lsn_t tail_lsn;
940
 
        xlog_t    *log = mp->m_log;
 
803
        xfs_lsn_t               tail_lsn;
 
804
        struct log              *log = mp->m_log;
941
805
 
942
806
        tail_lsn = xfs_trans_ail_tail(mp->m_ail);
943
 
        spin_lock(&log->l_grant_lock);
944
 
        if (tail_lsn != 0) {
945
 
                log->l_tail_lsn = tail_lsn;
946
 
        } else {
947
 
                tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn;
948
 
        }
949
 
        spin_unlock(&log->l_grant_lock);
 
807
        if (!tail_lsn)
 
808
                tail_lsn = atomic64_read(&log->l_last_sync_lsn);
950
809
 
 
810
        atomic64_set(&log->l_tail_lsn, tail_lsn);
951
811
        return tail_lsn;
952
 
}       /* xlog_assign_tail_lsn */
953
 
 
 
812
}
954
813
 
955
814
/*
956
815
 * Return the space in the log between the tail and the head.  The head
967
826
 * result is that we return the size of the log as the amount of space left.
968
827
 */
969
828
STATIC int
970
 
xlog_space_left(xlog_t *log, int cycle, int bytes)
 
829
xlog_space_left(
 
830
        struct log      *log,
 
831
        atomic64_t      *head)
971
832
{
972
 
        int free_bytes;
973
 
        int tail_bytes;
974
 
        int tail_cycle;
 
833
        int             free_bytes;
 
834
        int             tail_bytes;
 
835
        int             tail_cycle;
 
836
        int             head_cycle;
 
837
        int             head_bytes;
975
838
 
976
 
        tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn));
977
 
        tail_cycle = CYCLE_LSN(log->l_tail_lsn);
978
 
        if ((tail_cycle == cycle) && (bytes >= tail_bytes)) {
979
 
                free_bytes = log->l_logsize - (bytes - tail_bytes);
980
 
        } else if ((tail_cycle + 1) < cycle) {
 
839
        xlog_crack_grant_head(head, &head_cycle, &head_bytes);
 
840
        xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes);
 
841
        tail_bytes = BBTOB(tail_bytes);
 
842
        if (tail_cycle == head_cycle && head_bytes >= tail_bytes)
 
843
                free_bytes = log->l_logsize - (head_bytes - tail_bytes);
 
844
        else if (tail_cycle + 1 < head_cycle)
981
845
                return 0;
982
 
        } else if (tail_cycle < cycle) {
983
 
                ASSERT(tail_cycle == (cycle - 1));
984
 
                free_bytes = tail_bytes - bytes;
 
846
        else if (tail_cycle < head_cycle) {
 
847
                ASSERT(tail_cycle == (head_cycle - 1));
 
848
                free_bytes = tail_bytes - head_bytes;
985
849
        } else {
986
850
                /*
987
851
                 * The reservation head is behind the tail.
992
856
                        "xlog_space_left: head behind tail\n"
993
857
                        "  tail_cycle = %d, tail_bytes = %d\n"
994
858
                        "  GH   cycle = %d, GH   bytes = %d",
995
 
                        tail_cycle, tail_bytes, cycle, bytes);
 
859
                        tail_cycle, tail_bytes, head_cycle, head_bytes);
996
860
                ASSERT(0);
997
861
                free_bytes = log->l_logsize;
998
862
        }
999
863
        return free_bytes;
1000
 
}       /* xlog_space_left */
 
864
}
1001
865
 
1002
866
 
1003
867
/*
1020
884
        l = iclog->ic_log;
1021
885
 
1022
886
        /*
1023
 
         * If the _XFS_BARRIER_FAILED flag was set by a lower
1024
 
         * layer, it means the underlying device no longer supports
1025
 
         * barrier I/O. Warn loudly and turn off barriers.
1026
 
         */
1027
 
        if (bp->b_flags & _XFS_BARRIER_FAILED) {
1028
 
                bp->b_flags &= ~_XFS_BARRIER_FAILED;
1029
 
                l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
1030
 
                xfs_fs_cmn_err(CE_WARN, l->l_mp,
1031
 
                                "xlog_iodone: Barriers are no longer supported"
1032
 
                                " by device. Disabling barriers\n");
1033
 
                xfs_buftrace("XLOG_IODONE BARRIERS OFF", bp);
1034
 
        }
1035
 
 
1036
 
        /*
1037
887
         * Race to shutdown the filesystem if we see an error.
1038
888
         */
1039
889
        if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp,
1063
913
}       /* xlog_iodone */
1064
914
 
1065
915
/*
1066
 
 * The bdstrat callback function for log bufs. This gives us a central
1067
 
 * place to trap bufs in case we get hit by a log I/O error and need to
1068
 
 * shutdown. Actually, in practice, even when we didn't get a log error,
1069
 
 * we transition the iclogs to IOERROR state *after* flushing all existing
1070
 
 * iclogs to disk. This is because we don't want anymore new transactions to be
1071
 
 * started or completed afterwards.
1072
 
 */
1073
 
STATIC int
1074
 
xlog_bdstrat_cb(struct xfs_buf *bp)
1075
 
{
1076
 
        xlog_in_core_t *iclog;
1077
 
 
1078
 
        iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1079
 
 
1080
 
        if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) {
1081
 
          /* note for irix bstrat will need  struct bdevsw passed
1082
 
           * Fix the following macro if the code ever is merged
1083
 
           */
1084
 
            XFS_bdstrat(bp);
1085
 
                return 0;
1086
 
        }
1087
 
 
1088
 
        xfs_buftrace("XLOG__BDSTRAT IOERROR", bp);
1089
 
        XFS_BUF_ERROR(bp, EIO);
1090
 
        XFS_BUF_STALE(bp);
1091
 
        xfs_biodone(bp);
1092
 
        return XFS_ERROR(EIO);
1093
 
 
1094
 
 
1095
 
}
1096
 
 
1097
 
/*
1098
916
 * Return size of each in-core log record buffer.
1099
917
 *
1100
918
 * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
1178
996
        xlog_in_core_t          *iclog, *prev_iclog=NULL;
1179
997
        xfs_buf_t               *bp;
1180
998
        int                     i;
1181
 
        int                     iclogsize;
1182
999
        int                     error = ENOMEM;
 
1000
        uint                    log2_size = 0;
1183
1001
 
1184
1002
        log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
1185
1003
        if (!log) {
1196
1014
        log->l_flags       |= XLOG_ACTIVE_RECOVERY;
1197
1015
 
1198
1016
        log->l_prev_block  = -1;
1199
 
        log->l_tail_lsn    = xlog_assign_lsn(1, 0);
1200
1017
        /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
1201
 
        log->l_last_sync_lsn = log->l_tail_lsn;
 
1018
        xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
 
1019
        xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
1202
1020
        log->l_curr_cycle  = 1;     /* 0 is bad since this is initial value */
1203
 
        log->l_grant_reserve_cycle = 1;
1204
 
        log->l_grant_write_cycle = 1;
 
1021
        xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0);
 
1022
        xlog_assign_grant_head(&log->l_grant_write_head, 1, 0);
 
1023
        INIT_LIST_HEAD(&log->l_reserveq);
 
1024
        INIT_LIST_HEAD(&log->l_writeq);
 
1025
        spin_lock_init(&log->l_grant_reserve_lock);
 
1026
        spin_lock_init(&log->l_grant_write_lock);
1205
1027
 
1206
1028
        error = EFSCORRUPTED;
1207
1029
        if (xfs_sb_version_hassector(&mp->m_sb)) {
1208
 
                log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
1209
 
                if (log->l_sectbb_log < 0 ||
1210
 
                    log->l_sectbb_log > mp->m_sectbb_log) {
1211
 
                        xlog_warn("XFS: Log sector size (0x%x) out of range.",
1212
 
                                                log->l_sectbb_log);
 
1030
                log2_size = mp->m_sb.sb_logsectlog;
 
1031
                if (log2_size < BBSHIFT) {
 
1032
                        xlog_warn("XFS: Log sector size too small "
 
1033
                                "(0x%x < 0x%x)", log2_size, BBSHIFT);
 
1034
                        goto out_free_log;
 
1035
                }
 
1036
 
 
1037
                log2_size -= BBSHIFT;
 
1038
                if (log2_size > mp->m_sectbb_log) {
 
1039
                        xlog_warn("XFS: Log sector size too large "
 
1040
                                "(0x%x > 0x%x)", log2_size, mp->m_sectbb_log);
1213
1041
                        goto out_free_log;
1214
1042
                }
1215
1043
 
1216
1044
                /* for larger sector sizes, must have v2 or external log */
1217
 
                if (log->l_sectbb_log != 0 &&
1218
 
                    (log->l_logBBstart != 0 &&
1219
 
                     !xfs_sb_version_haslogv2(&mp->m_sb))) {
 
1045
                if (log2_size && log->l_logBBstart > 0 &&
 
1046
                            !xfs_sb_version_haslogv2(&mp->m_sb)) {
 
1047
 
1220
1048
                        xlog_warn("XFS: log sector size (0x%x) invalid "
1221
 
                                  "for configuration.", log->l_sectbb_log);
1222
 
                        goto out_free_log;
1223
 
                }
1224
 
                if (mp->m_sb.sb_logsectlog < BBSHIFT) {
1225
 
                        xlog_warn("XFS: Log sector log (0x%x) too small.",
1226
 
                                                mp->m_sb.sb_logsectlog);
 
1049
                                  "for configuration.", log2_size);
1227
1050
                        goto out_free_log;
1228
1051
                }
1229
1052
        }
1230
 
        log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
 
1053
        log->l_sectBBsize = 1 << log2_size;
1231
1054
 
1232
1055
        xlog_get_iclog_buffer_size(mp, log);
1233
1056
 
1236
1059
        if (!bp)
1237
1060
                goto out_free_log;
1238
1061
        XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1239
 
        XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1240
1062
        XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1241
1063
        ASSERT(XFS_BUF_ISBUSY(bp));
1242
1064
        ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
1243
1065
        log->l_xbuf = bp;
1244
1066
 
1245
1067
        spin_lock_init(&log->l_icloglock);
1246
 
        spin_lock_init(&log->l_grant_lock);
1247
 
        sv_init(&log->l_flush_wait, 0, "flush_wait");
 
1068
        init_waitqueue_head(&log->l_flush_wait);
1248
1069
 
1249
 
        xlog_trace_loggrant_alloc(log);
1250
1070
        /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1251
1071
        ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
1252
1072
 
1258
1078
         * with different amounts of memory.  See the definition of
1259
1079
         * xlog_in_core_t in xfs_log_priv.h for details.
1260
1080
         */
1261
 
        iclogsize = log->l_iclog_size;
1262
1081
        ASSERT(log->l_iclog_size >= 4096);
1263
1082
        for (i=0; i < log->l_iclog_bufs; i++) {
1264
1083
                *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL);
1269
1088
                iclog->ic_prev = prev_iclog;
1270
1089
                prev_iclog = iclog;
1271
1090
 
1272
 
                bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
 
1091
                bp = xfs_buf_get_uncached(mp->m_logdev_targp,
 
1092
                                                log->l_iclog_size, 0);
1273
1093
                if (!bp)
1274
1094
                        goto out_free_iclog;
1275
1095
                if (!XFS_BUF_CPSEMA(bp))
1276
1096
                        ASSERT(0);
1277
1097
                XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1278
 
                XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1279
1098
                XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1280
1099
                iclog->ic_bp = bp;
1281
1100
                iclog->ic_data = bp->b_addr;
1302
1121
 
1303
1122
                ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
1304
1123
                ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
1305
 
                sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
1306
 
                sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
1307
 
 
1308
 
                xlog_trace_iclog_alloc(iclog);
 
1124
                init_waitqueue_head(&iclog->ic_force_wait);
 
1125
                init_waitqueue_head(&iclog->ic_write_wait);
1309
1126
 
1310
1127
                iclogp = &iclog->ic_next;
1311
1128
        }
1312
1129
        *iclogp = log->l_iclog;                 /* complete ring */
1313
1130
        log->l_iclog->ic_prev = prev_iclog;     /* re-write 1st prev ptr */
1314
1131
 
 
1132
        error = xlog_cil_init(log);
 
1133
        if (error)
 
1134
                goto out_free_iclog;
1315
1135
        return log;
1316
1136
 
1317
1137
out_free_iclog:
1318
1138
        for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
1319
1139
                prev_iclog = iclog->ic_next;
1320
 
                if (iclog->ic_bp) {
1321
 
                        sv_destroy(&iclog->ic_force_wait);
1322
 
                        sv_destroy(&iclog->ic_write_wait);
 
1140
                if (iclog->ic_bp)
1323
1141
                        xfs_buf_free(iclog->ic_bp);
1324
 
                        xlog_trace_iclog_dealloc(iclog);
1325
 
                }
1326
1142
                kmem_free(iclog);
1327
1143
        }
1328
1144
        spinlock_destroy(&log->l_icloglock);
1329
 
        spinlock_destroy(&log->l_grant_lock);
1330
 
        xlog_trace_loggrant_dealloc(log);
1331
1145
        xfs_buf_free(log->l_xbuf);
1332
1146
out_free_log:
1333
1147
        kmem_free(log);
1341
1155
 * ticket.  Return the lsn of the commit record.
1342
1156
 */
1343
1157
STATIC int
1344
 
xlog_commit_record(xfs_mount_t  *mp,
1345
 
                   xlog_ticket_t *ticket,
1346
 
                   xlog_in_core_t **iclog,
1347
 
                   xfs_lsn_t    *commitlsnp)
 
1158
xlog_commit_record(
 
1159
        struct log              *log,
 
1160
        struct xlog_ticket      *ticket,
 
1161
        struct xlog_in_core     **iclog,
 
1162
        xfs_lsn_t               *commitlsnp)
1348
1163
{
1349
 
        int             error;
1350
 
        xfs_log_iovec_t reg[1];
1351
 
 
1352
 
        reg[0].i_addr = NULL;
1353
 
        reg[0].i_len = 0;
1354
 
        XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_COMMIT);
 
1164
        struct xfs_mount *mp = log->l_mp;
 
1165
        int     error;
 
1166
        struct xfs_log_iovec reg = {
 
1167
                .i_addr = NULL,
 
1168
                .i_len = 0,
 
1169
                .i_type = XLOG_REG_TYPE_COMMIT,
 
1170
        };
 
1171
        struct xfs_log_vec vec = {
 
1172
                .lv_niovecs = 1,
 
1173
                .lv_iovecp = &reg,
 
1174
        };
1355
1175
 
1356
1176
        ASSERT_ALWAYS(iclog);
1357
 
        if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
1358
 
                               iclog, XLOG_COMMIT_TRANS))) {
 
1177
        error = xlog_write(log, &vec, ticket, commitlsnp, iclog,
 
1178
                                        XLOG_COMMIT_TRANS);
 
1179
        if (error)
1359
1180
                xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1360
 
        }
1361
1181
        return error;
1362
 
}       /* xlog_commit_record */
1363
 
 
 
1182
}
1364
1183
 
1365
1184
/*
1366
1185
 * Push on the buffer cache code if we ever use more than 75% of the on-disk
1370
1189
 * water mark.  In this manner, we would be creating a low water mark.
1371
1190
 */
1372
1191
STATIC void
1373
 
xlog_grant_push_ail(xfs_mount_t *mp,
1374
 
                    int         need_bytes)
 
1192
xlog_grant_push_ail(
 
1193
        struct log      *log,
 
1194
        int             need_bytes)
1375
1195
{
1376
 
    xlog_t      *log = mp->m_log;       /* pointer to the log */
1377
 
    xfs_lsn_t   tail_lsn;               /* lsn of the log tail */
1378
 
    xfs_lsn_t   threshold_lsn = 0;      /* lsn we'd like to be at */
1379
 
    int         free_blocks;            /* free blocks left to write to */
1380
 
    int         free_bytes;             /* free bytes left to write to */
1381
 
    int         threshold_block;        /* block in lsn we'd like to be at */
1382
 
    int         threshold_cycle;        /* lsn cycle we'd like to be at */
1383
 
    int         free_threshold;
1384
 
 
1385
 
    ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
1386
 
 
1387
 
    spin_lock(&log->l_grant_lock);
1388
 
    free_bytes = xlog_space_left(log,
1389
 
                                 log->l_grant_reserve_cycle,
1390
 
                                 log->l_grant_reserve_bytes);
1391
 
    tail_lsn = log->l_tail_lsn;
1392
 
    free_blocks = BTOBBT(free_bytes);
1393
 
 
1394
 
    /*
1395
 
     * Set the threshold for the minimum number of free blocks in the
1396
 
     * log to the maximum of what the caller needs, one quarter of the
1397
 
     * log, and 256 blocks.
1398
 
     */
1399
 
    free_threshold = BTOBB(need_bytes);
1400
 
    free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
1401
 
    free_threshold = MAX(free_threshold, 256);
1402
 
    if (free_blocks < free_threshold) {
1403
 
        threshold_block = BLOCK_LSN(tail_lsn) + free_threshold;
1404
 
        threshold_cycle = CYCLE_LSN(tail_lsn);
 
1196
        xfs_lsn_t       threshold_lsn = 0;
 
1197
        xfs_lsn_t       last_sync_lsn;
 
1198
        int             free_blocks;
 
1199
        int             free_bytes;
 
1200
        int             threshold_block;
 
1201
        int             threshold_cycle;
 
1202
        int             free_threshold;
 
1203
 
 
1204
        ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
 
1205
 
 
1206
        free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
 
1207
        free_blocks = BTOBBT(free_bytes);
 
1208
 
 
1209
        /*
 
1210
         * Set the threshold for the minimum number of free blocks in the
 
1211
         * log to the maximum of what the caller needs, one quarter of the
 
1212
         * log, and 256 blocks.
 
1213
         */
 
1214
        free_threshold = BTOBB(need_bytes);
 
1215
        free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
 
1216
        free_threshold = MAX(free_threshold, 256);
 
1217
        if (free_blocks >= free_threshold)
 
1218
                return;
 
1219
 
 
1220
        xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
 
1221
                                                &threshold_block);
 
1222
        threshold_block += free_threshold;
1405
1223
        if (threshold_block >= log->l_logBBsize) {
1406
 
            threshold_block -= log->l_logBBsize;
1407
 
            threshold_cycle += 1;
1408
 
        }
1409
 
        threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block);
1410
 
 
1411
 
        /* Don't pass in an lsn greater than the lsn of the last
1412
 
         * log record known to be on disk.
1413
 
         */
1414
 
        if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0)
1415
 
            threshold_lsn = log->l_last_sync_lsn;
1416
 
    }
1417
 
    spin_unlock(&log->l_grant_lock);
1418
 
 
1419
 
    /*
1420
 
     * Get the transaction layer to kick the dirty buffers out to
1421
 
     * disk asynchronously. No point in trying to do this if
1422
 
     * the filesystem is shutting down.
1423
 
     */
1424
 
    if (threshold_lsn &&
1425
 
        !XLOG_FORCED_SHUTDOWN(log))
1426
 
            xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1427
 
}       /* xlog_grant_push_ail */
1428
 
 
 
1224
                threshold_block -= log->l_logBBsize;
 
1225
                threshold_cycle += 1;
 
1226
        }
 
1227
        threshold_lsn = xlog_assign_lsn(threshold_cycle,
 
1228
                                        threshold_block);
 
1229
        /*
 
1230
         * Don't pass in an lsn greater than the lsn of the last
 
1231
         * log record known to be on disk. Use a snapshot of the last sync lsn
 
1232
         * so that it doesn't change between the compare and the set.
 
1233
         */
 
1234
        last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
 
1235
        if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
 
1236
                threshold_lsn = last_sync_lsn;
 
1237
 
 
1238
        /*
 
1239
         * Get the transaction layer to kick the dirty buffers out to
 
1240
         * disk asynchronously. No point in trying to do this if
 
1241
         * the filesystem is shutting down.
 
1242
         */
 
1243
        if (!XLOG_FORCED_SHUTDOWN(log))
 
1244
                xfs_trans_ail_push(log->l_ailp, threshold_lsn);
 
1245
}
 
1246
 
 
1247
/*
 
1248
 * The bdstrat callback function for log bufs. This gives us a central
 
1249
 * place to trap bufs in case we get hit by a log I/O error and need to
 
1250
 * shutdown. Actually, in practice, even when we didn't get a log error,
 
1251
 * we transition the iclogs to IOERROR state *after* flushing all existing
 
1252
 * iclogs to disk. This is because we don't want anymore new transactions to be
 
1253
 * started or completed afterwards.
 
1254
 */
 
1255
STATIC int
 
1256
xlog_bdstrat(
 
1257
        struct xfs_buf          *bp)
 
1258
{
 
1259
        struct xlog_in_core     *iclog;
 
1260
 
 
1261
        iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
 
1262
        if (iclog->ic_state & XLOG_STATE_IOERROR) {
 
1263
                XFS_BUF_ERROR(bp, EIO);
 
1264
                XFS_BUF_STALE(bp);
 
1265
                xfs_buf_ioend(bp, 0);
 
1266
                /*
 
1267
                 * It would seem logical to return EIO here, but we rely on
 
1268
                 * the log state machine to propagate I/O errors instead of
 
1269
                 * doing it here.
 
1270
                 */
 
1271
                return 0;
 
1272
        }
 
1273
 
 
1274
        bp->b_flags |= _XBF_RUN_QUEUES;
 
1275
        xfs_buf_iorequest(bp);
 
1276
        return 0;
 
1277
}
1429
1278
 
1430
1279
/*
1431
1280
 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous 
1488
1337
                 roundoff < BBTOB(1)));
1489
1338
 
1490
1339
        /* move grant heads by roundoff in sync */
1491
 
        spin_lock(&log->l_grant_lock);
1492
 
        xlog_grant_add_space(log, roundoff);
1493
 
        spin_unlock(&log->l_grant_lock);
 
1340
        xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff);
 
1341
        xlog_grant_add_space(log, &log->l_grant_write_head, roundoff);
1494
1342
 
1495
1343
        /* put cycle number in every block */
1496
1344
        xlog_pack_data(log, iclog, roundoff); 
1524
1372
        XFS_BUF_ZEROFLAGS(bp);
1525
1373
        XFS_BUF_BUSY(bp);
1526
1374
        XFS_BUF_ASYNC(bp);
1527
 
        /*
1528
 
         * Do an ordered write for the log block.
1529
 
         * Its unnecessary to flush the first split block in the log wrap case.
1530
 
         */
1531
 
        if (!split && (log->l_mp->m_flags & XFS_MOUNT_BARRIER))
 
1375
        bp->b_flags |= XBF_LOG_BUFFER;
 
1376
 
 
1377
        if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1532
1378
                XFS_BUF_ORDERED(bp);
1533
1379
 
1534
1380
        ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1544
1390
         */
1545
1391
        XFS_BUF_WRITE(bp);
1546
1392
 
1547
 
        if ((error = XFS_bwrite(bp))) {
 
1393
        if ((error = xlog_bdstrat(bp))) {
1548
1394
                xfs_ioerror_alert("xlog_sync", log->l_mp, bp,
1549
1395
                                  XFS_BUF_ADDR(bp));
1550
1396
                return error;
1561
1407
                XFS_BUF_ZEROFLAGS(bp);
1562
1408
                XFS_BUF_BUSY(bp);
1563
1409
                XFS_BUF_ASYNC(bp);
 
1410
                bp->b_flags |= XBF_LOG_BUFFER;
1564
1411
                if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1565
1412
                        XFS_BUF_ORDERED(bp);
1566
1413
                dptr = XFS_BUF_PTR(bp);
1583
1430
                /* account for internal log which doesn't start at block #0 */
1584
1431
                XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
1585
1432
                XFS_BUF_WRITE(bp);
1586
 
                if ((error = XFS_bwrite(bp))) {
 
1433
                if ((error = xlog_bdstrat(bp))) {
1587
1434
                        xfs_ioerror_alert("xlog_sync (split)", log->l_mp,
1588
1435
                                          bp, XFS_BUF_ADDR(bp));
1589
1436
                        return error;
1602
1449
        xlog_in_core_t  *iclog, *next_iclog;
1603
1450
        int             i;
1604
1451
 
 
1452
        xlog_cil_destroy(log);
 
1453
 
1605
1454
        iclog = log->l_iclog;
1606
1455
        for (i=0; i<log->l_iclog_bufs; i++) {
1607
 
                sv_destroy(&iclog->ic_force_wait);
1608
 
                sv_destroy(&iclog->ic_write_wait);
1609
1456
                xfs_buf_free(iclog->ic_bp);
1610
 
                xlog_trace_iclog_dealloc(iclog);
1611
1457
                next_iclog = iclog->ic_next;
1612
1458
                kmem_free(iclog);
1613
1459
                iclog = next_iclog;
1614
1460
        }
1615
1461
        spinlock_destroy(&log->l_icloglock);
1616
 
        spinlock_destroy(&log->l_grant_lock);
1617
1462
 
1618
1463
        xfs_buf_free(log->l_xbuf);
1619
 
        xlog_trace_loggrant_dealloc(log);
1620
1464
        log->l_mp->m_log = NULL;
1621
1465
        kmem_free(log);
1622
1466
}       /* xlog_dealloc_log */
1646
1490
 * print out info relating to regions written which consume
1647
1491
 * the reservation
1648
1492
 */
1649
 
STATIC void
1650
 
xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
 
1493
void
 
1494
xlog_print_tic_res(
 
1495
        struct xfs_mount        *mp,
 
1496
        struct xlog_ticket      *ticket)
1651
1497
{
1652
1498
        uint i;
1653
1499
        uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
1747
1593
                            "bad-rtype" : res_type_str[r_type-1]),
1748
1594
                            ticket->t_res_arr[i].r_len);
1749
1595
        }
 
1596
 
 
1597
        xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp,
 
1598
                "xfs_log_write: reservation ran out. Need to up reservation");
 
1599
        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 
1600
}
 
1601
 
 
1602
/*
 
1603
 * Calculate the potential space needed by the log vector.  Each region gets
 
1604
 * its own xlog_op_header_t and may need to be double word aligned.
 
1605
 */
 
1606
static int
 
1607
xlog_write_calc_vec_length(
 
1608
        struct xlog_ticket      *ticket,
 
1609
        struct xfs_log_vec      *log_vector)
 
1610
{
 
1611
        struct xfs_log_vec      *lv;
 
1612
        int                     headers = 0;
 
1613
        int                     len = 0;
 
1614
        int                     i;
 
1615
 
 
1616
        /* acct for start rec of xact */
 
1617
        if (ticket->t_flags & XLOG_TIC_INITED)
 
1618
                headers++;
 
1619
 
 
1620
        for (lv = log_vector; lv; lv = lv->lv_next) {
 
1621
                headers += lv->lv_niovecs;
 
1622
 
 
1623
                for (i = 0; i < lv->lv_niovecs; i++) {
 
1624
                        struct xfs_log_iovec    *vecp = &lv->lv_iovecp[i];
 
1625
 
 
1626
                        len += vecp->i_len;
 
1627
                        xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type);
 
1628
                }
 
1629
        }
 
1630
 
 
1631
        ticket->t_res_num_ophdrs += headers;
 
1632
        len += headers * sizeof(struct xlog_op_header);
 
1633
 
 
1634
        return len;
 
1635
}
 
1636
 
 
1637
/*
 
1638
 * If first write for transaction, insert start record  We can't be trying to
 
1639
 * commit if we are inited.  We can't have any "partial_copy" if we are inited.
 
1640
 */
 
1641
static int
 
1642
xlog_write_start_rec(
 
1643
        struct xlog_op_header   *ophdr,
 
1644
        struct xlog_ticket      *ticket)
 
1645
{
 
1646
        if (!(ticket->t_flags & XLOG_TIC_INITED))
 
1647
                return 0;
 
1648
 
 
1649
        ophdr->oh_tid   = cpu_to_be32(ticket->t_tid);
 
1650
        ophdr->oh_clientid = ticket->t_clientid;
 
1651
        ophdr->oh_len = 0;
 
1652
        ophdr->oh_flags = XLOG_START_TRANS;
 
1653
        ophdr->oh_res2 = 0;
 
1654
 
 
1655
        ticket->t_flags &= ~XLOG_TIC_INITED;
 
1656
 
 
1657
        return sizeof(struct xlog_op_header);
 
1658
}
 
1659
 
 
1660
static xlog_op_header_t *
 
1661
xlog_write_setup_ophdr(
 
1662
        struct log              *log,
 
1663
        struct xlog_op_header   *ophdr,
 
1664
        struct xlog_ticket      *ticket,
 
1665
        uint                    flags)
 
1666
{
 
1667
        ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
 
1668
        ophdr->oh_clientid = ticket->t_clientid;
 
1669
        ophdr->oh_res2 = 0;
 
1670
 
 
1671
        /* are we copying a commit or unmount record? */
 
1672
        ophdr->oh_flags = flags;
 
1673
 
 
1674
        /*
 
1675
         * We've seen logs corrupted with bad transaction client ids.  This
 
1676
         * makes sure that XFS doesn't generate them on.  Turn this into an EIO
 
1677
         * and shut down the filesystem.
 
1678
         */
 
1679
        switch (ophdr->oh_clientid)  {
 
1680
        case XFS_TRANSACTION:
 
1681
        case XFS_VOLUME:
 
1682
        case XFS_LOG:
 
1683
                break;
 
1684
        default:
 
1685
                xfs_fs_cmn_err(CE_WARN, log->l_mp,
 
1686
                        "Bad XFS transaction clientid 0x%x in ticket 0x%p",
 
1687
                        ophdr->oh_clientid, ticket);
 
1688
                return NULL;
 
1689
        }
 
1690
 
 
1691
        return ophdr;
 
1692
}
 
1693
 
 
1694
/*
 
1695
 * Set up the parameters of the region copy into the log. This has
 
1696
 * to handle region write split across multiple log buffers - this
 
1697
 * state is kept external to this function so that this code can
 
1698
 * can be written in an obvious, self documenting manner.
 
1699
 */
 
1700
static int
 
1701
xlog_write_setup_copy(
 
1702
        struct xlog_ticket      *ticket,
 
1703
        struct xlog_op_header   *ophdr,
 
1704
        int                     space_available,
 
1705
        int                     space_required,
 
1706
        int                     *copy_off,
 
1707
        int                     *copy_len,
 
1708
        int                     *last_was_partial_copy,
 
1709
        int                     *bytes_consumed)
 
1710
{
 
1711
        int                     still_to_copy;
 
1712
 
 
1713
        still_to_copy = space_required - *bytes_consumed;
 
1714
        *copy_off = *bytes_consumed;
 
1715
 
 
1716
        if (still_to_copy <= space_available) {
 
1717
                /* write of region completes here */
 
1718
                *copy_len = still_to_copy;
 
1719
                ophdr->oh_len = cpu_to_be32(*copy_len);
 
1720
                if (*last_was_partial_copy)
 
1721
                        ophdr->oh_flags |= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
 
1722
                *last_was_partial_copy = 0;
 
1723
                *bytes_consumed = 0;
 
1724
                return 0;
 
1725
        }
 
1726
 
 
1727
        /* partial write of region, needs extra log op header reservation */
 
1728
        *copy_len = space_available;
 
1729
        ophdr->oh_len = cpu_to_be32(*copy_len);
 
1730
        ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
 
1731
        if (*last_was_partial_copy)
 
1732
                ophdr->oh_flags |= XLOG_WAS_CONT_TRANS;
 
1733
        *bytes_consumed += *copy_len;
 
1734
        (*last_was_partial_copy)++;
 
1735
 
 
1736
        /* account for new log op header */
 
1737
        ticket->t_curr_res -= sizeof(struct xlog_op_header);
 
1738
        ticket->t_res_num_ophdrs++;
 
1739
 
 
1740
        return sizeof(struct xlog_op_header);
 
1741
}
 
1742
 
 
1743
static int
 
1744
xlog_write_copy_finish(
 
1745
        struct log              *log,
 
1746
        struct xlog_in_core     *iclog,
 
1747
        uint                    flags,
 
1748
        int                     *record_cnt,
 
1749
        int                     *data_cnt,
 
1750
        int                     *partial_copy,
 
1751
        int                     *partial_copy_len,
 
1752
        int                     log_offset,
 
1753
        struct xlog_in_core     **commit_iclog)
 
1754
{
 
1755
        if (*partial_copy) {
 
1756
                /*
 
1757
                 * This iclog has already been marked WANT_SYNC by
 
1758
                 * xlog_state_get_iclog_space.
 
1759
                 */
 
1760
                xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
 
1761
                *record_cnt = 0;
 
1762
                *data_cnt = 0;
 
1763
                return xlog_state_release_iclog(log, iclog);
 
1764
        }
 
1765
 
 
1766
        *partial_copy = 0;
 
1767
        *partial_copy_len = 0;
 
1768
 
 
1769
        if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
 
1770
                /* no more space in this iclog - push it. */
 
1771
                xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
 
1772
                *record_cnt = 0;
 
1773
                *data_cnt = 0;
 
1774
 
 
1775
                spin_lock(&log->l_icloglock);
 
1776
                xlog_state_want_sync(log, iclog);
 
1777
                spin_unlock(&log->l_icloglock);
 
1778
 
 
1779
                if (!commit_iclog)
 
1780
                        return xlog_state_release_iclog(log, iclog);
 
1781
                ASSERT(flags & XLOG_COMMIT_TRANS);
 
1782
                *commit_iclog = iclog;
 
1783
        }
 
1784
 
 
1785
        return 0;
1750
1786
}
1751
1787
 
1752
1788
/*
1789
1825
 *      we don't update ic_offset until the end when we know exactly how many
1790
1826
 *      bytes have been written out.
1791
1827
 */
1792
 
STATIC int
1793
 
xlog_write(xfs_mount_t *        mp,
1794
 
           xfs_log_iovec_t      reg[],
1795
 
           int                  nentries,
1796
 
           xfs_log_ticket_t     tic,
1797
 
           xfs_lsn_t            *start_lsn,
1798
 
           xlog_in_core_t       **commit_iclog,
1799
 
           uint                 flags)
 
1828
int
 
1829
xlog_write(
 
1830
        struct log              *log,
 
1831
        struct xfs_log_vec      *log_vector,
 
1832
        struct xlog_ticket      *ticket,
 
1833
        xfs_lsn_t               *start_lsn,
 
1834
        struct xlog_in_core     **commit_iclog,
 
1835
        uint                    flags)
1800
1836
{
1801
 
    xlog_t           *log = mp->m_log;
1802
 
    xlog_ticket_t    *ticket = (xlog_ticket_t *)tic;
1803
 
    xlog_in_core_t   *iclog = NULL;  /* ptr to current in-core log */
1804
 
    xlog_op_header_t *logop_head;    /* ptr to log operation header */
1805
 
    __psint_t        ptr;            /* copy address into data region */
1806
 
    int              len;            /* # xlog_write() bytes 2 still copy */
1807
 
    int              index;          /* region index currently copying */
1808
 
    int              log_offset;     /* offset (from 0) into data region */
1809
 
    int              start_rec_copy; /* # bytes to copy for start record */
1810
 
    int              partial_copy;   /* did we split a region? */
1811
 
    int              partial_copy_len;/* # bytes copied if split region */
1812
 
    int              need_copy;      /* # bytes need to memcpy this region */
1813
 
    int              copy_len;       /* # bytes actually memcpy'ing */
1814
 
    int              copy_off;       /* # bytes from entry start */
1815
 
    int              contwr;         /* continued write of in-core log? */
1816
 
    int              error;
1817
 
    int              record_cnt = 0, data_cnt = 0;
1818
 
 
1819
 
    partial_copy_len = partial_copy = 0;
1820
 
 
1821
 
    /* Calculate potential maximum space.  Each region gets its own
1822
 
     * xlog_op_header_t and may need to be double word aligned.
1823
 
     */
1824
 
    len = 0;
1825
 
    if (ticket->t_flags & XLOG_TIC_INITED) {    /* acct for start rec of xact */
1826
 
        len += sizeof(xlog_op_header_t);
1827
 
        ticket->t_res_num_ophdrs++;
1828
 
    }
1829
 
 
1830
 
    for (index = 0; index < nentries; index++) {
1831
 
        len += sizeof(xlog_op_header_t);            /* each region gets >= 1 */
1832
 
        ticket->t_res_num_ophdrs++;
1833
 
        len += reg[index].i_len;
1834
 
        xlog_tic_add_region(ticket, reg[index].i_len, reg[index].i_type);
1835
 
    }
1836
 
    contwr = *start_lsn = 0;
1837
 
 
1838
 
    if (ticket->t_curr_res < len) {
1839
 
        xlog_print_tic_res(mp, ticket);
1840
 
#ifdef DEBUG
1841
 
        xlog_panic(
1842
 
                "xfs_log_write: reservation ran out. Need to up reservation");
1843
 
#else
1844
 
        /* Customer configurable panic */
1845
 
        xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp,
1846
 
                "xfs_log_write: reservation ran out. Need to up reservation");
1847
 
        /* If we did not panic, shutdown the filesystem */
1848
 
        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1849
 
#endif
1850
 
    } else
1851
 
        ticket->t_curr_res -= len;
1852
 
 
1853
 
    for (index = 0; index < nentries; ) {
1854
 
        if ((error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
1855
 
                                               &contwr, &log_offset)))
1856
 
                return error;
1857
 
 
1858
 
        ASSERT(log_offset <= iclog->ic_size - 1);
1859
 
        ptr = (__psint_t) ((char *)iclog->ic_datap+log_offset);
1860
 
 
1861
 
        /* start_lsn is the first lsn written to. That's all we need. */
1862
 
        if (! *start_lsn)
1863
 
            *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
1864
 
 
1865
 
        /* This loop writes out as many regions as can fit in the amount
1866
 
         * of space which was allocated by xlog_state_get_iclog_space().
1867
 
         */
1868
 
        while (index < nentries) {
1869
 
            ASSERT(reg[index].i_len % sizeof(__int32_t) == 0);
1870
 
            ASSERT((__psint_t)ptr % sizeof(__int32_t) == 0);
1871
 
            start_rec_copy = 0;
1872
 
 
1873
 
            /* If first write for transaction, insert start record.
1874
 
             * We can't be trying to commit if we are inited.  We can't
1875
 
             * have any "partial_copy" if we are inited.
1876
 
             */
1877
 
            if (ticket->t_flags & XLOG_TIC_INITED) {
1878
 
                logop_head              = (xlog_op_header_t *)ptr;
1879
 
                logop_head->oh_tid      = cpu_to_be32(ticket->t_tid);
1880
 
                logop_head->oh_clientid = ticket->t_clientid;
1881
 
                logop_head->oh_len      = 0;
1882
 
                logop_head->oh_flags    = XLOG_START_TRANS;
1883
 
                logop_head->oh_res2     = 0;
1884
 
                ticket->t_flags         &= ~XLOG_TIC_INITED;    /* clear bit */
1885
 
                record_cnt++;
1886
 
 
1887
 
                start_rec_copy = sizeof(xlog_op_header_t);
1888
 
                xlog_write_adv_cnt(ptr, len, log_offset, start_rec_copy);
1889
 
            }
1890
 
 
1891
 
            /* Copy log operation header directly into data section */
1892
 
            logop_head                  = (xlog_op_header_t *)ptr;
1893
 
            logop_head->oh_tid          = cpu_to_be32(ticket->t_tid);
1894
 
            logop_head->oh_clientid     = ticket->t_clientid;
1895
 
            logop_head->oh_res2         = 0;
1896
 
 
1897
 
            /* header copied directly */
1898
 
            xlog_write_adv_cnt(ptr, len, log_offset, sizeof(xlog_op_header_t));
1899
 
 
1900
 
            /* are we copying a commit or unmount record? */
1901
 
            logop_head->oh_flags = flags;
1902
 
 
1903
 
            /*
1904
 
             * We've seen logs corrupted with bad transaction client
1905
 
             * ids.  This makes sure that XFS doesn't generate them on.
1906
 
             * Turn this into an EIO and shut down the filesystem.
1907
 
             */
1908
 
            switch (logop_head->oh_clientid)  {
1909
 
            case XFS_TRANSACTION:
1910
 
            case XFS_VOLUME:
1911
 
            case XFS_LOG:
1912
 
                break;
1913
 
            default:
1914
 
                xfs_fs_cmn_err(CE_WARN, mp,
1915
 
                    "Bad XFS transaction clientid 0x%x in ticket 0x%p",
1916
 
                    logop_head->oh_clientid, tic);
1917
 
                return XFS_ERROR(EIO);
1918
 
            }
1919
 
 
1920
 
            /* Partial write last time? => (partial_copy != 0)
1921
 
             * need_copy is the amount we'd like to copy if everything could
1922
 
             * fit in the current memcpy.
1923
 
             */
1924
 
            need_copy = reg[index].i_len - partial_copy_len;
1925
 
 
1926
 
            copy_off = partial_copy_len;
1927
 
            if (need_copy <= iclog->ic_size - log_offset) { /*complete write */
1928
 
                copy_len = need_copy;
1929
 
                logop_head->oh_len = cpu_to_be32(copy_len);
1930
 
                if (partial_copy)
1931
 
                    logop_head->oh_flags|= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
1932
 
                partial_copy_len = partial_copy = 0;
1933
 
            } else {                                        /* partial write */
1934
 
                copy_len = iclog->ic_size - log_offset;
1935
 
                logop_head->oh_len = cpu_to_be32(copy_len);
1936
 
                logop_head->oh_flags |= XLOG_CONTINUE_TRANS;
1937
 
                if (partial_copy)
1938
 
                        logop_head->oh_flags |= XLOG_WAS_CONT_TRANS;
1939
 
                partial_copy_len += copy_len;
1940
 
                partial_copy++;
1941
 
                len += sizeof(xlog_op_header_t); /* from splitting of region */
1942
 
                /* account for new log op header */
1943
 
                ticket->t_curr_res -= sizeof(xlog_op_header_t);
1944
 
                ticket->t_res_num_ophdrs++;
1945
 
            }
1946
 
            xlog_verify_dest_ptr(log, ptr);
1947
 
 
1948
 
            /* copy region */
1949
 
            ASSERT(copy_len >= 0);
1950
 
            memcpy((xfs_caddr_t)ptr, reg[index].i_addr + copy_off, copy_len);
1951
 
            xlog_write_adv_cnt(ptr, len, log_offset, copy_len);
1952
 
 
1953
 
            /* make copy_len total bytes copied, including headers */
1954
 
            copy_len += start_rec_copy + sizeof(xlog_op_header_t);
1955
 
            record_cnt++;
1956
 
            data_cnt += contwr ? copy_len : 0;
1957
 
            if (partial_copy) {                 /* copied partial region */
1958
 
                    /* already marked WANT_SYNC by xlog_state_get_iclog_space */
1959
 
                    xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1960
 
                    record_cnt = data_cnt = 0;
1961
 
                    if ((error = xlog_state_release_iclog(log, iclog)))
1962
 
                            return error;
1963
 
                    break;                      /* don't increment index */
1964
 
            } else {                            /* copied entire region */
1965
 
                index++;
1966
 
                partial_copy_len = partial_copy = 0;
1967
 
 
1968
 
                if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
1969
 
                    xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1970
 
                    record_cnt = data_cnt = 0;
1971
 
                    spin_lock(&log->l_icloglock);
1972
 
                    xlog_state_want_sync(log, iclog);
1973
 
                    spin_unlock(&log->l_icloglock);
1974
 
                    if (commit_iclog) {
1975
 
                        ASSERT(flags & XLOG_COMMIT_TRANS);
1976
 
                        *commit_iclog = iclog;
1977
 
                    } else if ((error = xlog_state_release_iclog(log, iclog)))
1978
 
                           return error;
1979
 
                    if (index == nentries)
1980
 
                            return 0;           /* we are done */
1981
 
                    else
1982
 
                            break;
 
1837
        struct xlog_in_core     *iclog = NULL;
 
1838
        struct xfs_log_iovec    *vecp;
 
1839
        struct xfs_log_vec      *lv;
 
1840
        int                     len;
 
1841
        int                     index;
 
1842
        int                     partial_copy = 0;
 
1843
        int                     partial_copy_len = 0;
 
1844
        int                     contwr = 0;
 
1845
        int                     record_cnt = 0;
 
1846
        int                     data_cnt = 0;
 
1847
        int                     error;
 
1848
 
 
1849
        *start_lsn = 0;
 
1850
 
 
1851
        len = xlog_write_calc_vec_length(ticket, log_vector);
 
1852
        if (log->l_cilp) {
 
1853
                /*
 
1854
                 * Region headers and bytes are already accounted for.
 
1855
                 * We only need to take into account start records and
 
1856
                 * split regions in this function.
 
1857
                 */
 
1858
                if (ticket->t_flags & XLOG_TIC_INITED)
 
1859
                        ticket->t_curr_res -= sizeof(xlog_op_header_t);
 
1860
 
 
1861
                /*
 
1862
                 * Commit record headers need to be accounted for. These
 
1863
                 * come in as separate writes so are easy to detect.
 
1864
                 */
 
1865
                if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
 
1866
                        ticket->t_curr_res -= sizeof(xlog_op_header_t);
 
1867
        } else
 
1868
                ticket->t_curr_res -= len;
 
1869
 
 
1870
        if (ticket->t_curr_res < 0)
 
1871
                xlog_print_tic_res(log->l_mp, ticket);
 
1872
 
 
1873
        index = 0;
 
1874
        lv = log_vector;
 
1875
        vecp = lv->lv_iovecp;
 
1876
        while (lv && index < lv->lv_niovecs) {
 
1877
                void            *ptr;
 
1878
                int             log_offset;
 
1879
 
 
1880
                error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
 
1881
                                                   &contwr, &log_offset);
 
1882
                if (error)
 
1883
                        return error;
 
1884
 
 
1885
                ASSERT(log_offset <= iclog->ic_size - 1);
 
1886
                ptr = iclog->ic_datap + log_offset;
 
1887
 
 
1888
                /* start_lsn is the first lsn written to. That's all we need. */
 
1889
                if (!*start_lsn)
 
1890
                        *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
 
1891
 
 
1892
                /*
 
1893
                 * This loop writes out as many regions as can fit in the amount
 
1894
                 * of space which was allocated by xlog_state_get_iclog_space().
 
1895
                 */
 
1896
                while (lv && index < lv->lv_niovecs) {
 
1897
                        struct xfs_log_iovec    *reg = &vecp[index];
 
1898
                        struct xlog_op_header   *ophdr;
 
1899
                        int                     start_rec_copy;
 
1900
                        int                     copy_len;
 
1901
                        int                     copy_off;
 
1902
 
 
1903
                        ASSERT(reg->i_len % sizeof(__int32_t) == 0);
 
1904
                        ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0);
 
1905
 
 
1906
                        start_rec_copy = xlog_write_start_rec(ptr, ticket);
 
1907
                        if (start_rec_copy) {
 
1908
                                record_cnt++;
 
1909
                                xlog_write_adv_cnt(&ptr, &len, &log_offset,
 
1910
                                                   start_rec_copy);
 
1911
                        }
 
1912
 
 
1913
                        ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
 
1914
                        if (!ophdr)
 
1915
                                return XFS_ERROR(EIO);
 
1916
 
 
1917
                        xlog_write_adv_cnt(&ptr, &len, &log_offset,
 
1918
                                           sizeof(struct xlog_op_header));
 
1919
 
 
1920
                        len += xlog_write_setup_copy(ticket, ophdr,
 
1921
                                                     iclog->ic_size-log_offset,
 
1922
                                                     reg->i_len,
 
1923
                                                     &copy_off, &copy_len,
 
1924
                                                     &partial_copy,
 
1925
                                                     &partial_copy_len);
 
1926
                        xlog_verify_dest_ptr(log, ptr);
 
1927
 
 
1928
                        /* copy region */
 
1929
                        ASSERT(copy_len >= 0);
 
1930
                        memcpy(ptr, reg->i_addr + copy_off, copy_len);
 
1931
                        xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len);
 
1932
 
 
1933
                        copy_len += start_rec_copy + sizeof(xlog_op_header_t);
 
1934
                        record_cnt++;
 
1935
                        data_cnt += contwr ? copy_len : 0;
 
1936
 
 
1937
                        error = xlog_write_copy_finish(log, iclog, flags,
 
1938
                                                       &record_cnt, &data_cnt,
 
1939
                                                       &partial_copy,
 
1940
                                                       &partial_copy_len,
 
1941
                                                       log_offset,
 
1942
                                                       commit_iclog);
 
1943
                        if (error)
 
1944
                                return error;
 
1945
 
 
1946
                        /*
 
1947
                         * if we had a partial copy, we need to get more iclog
 
1948
                         * space but we don't want to increment the region
 
1949
                         * index because there is still more is this region to
 
1950
                         * write.
 
1951
                         *
 
1952
                         * If we completed writing this region, and we flushed
 
1953
                         * the iclog (indicated by resetting of the record
 
1954
                         * count), then we also need to get more log space. If
 
1955
                         * this was the last record, though, we are done and
 
1956
                         * can just return.
 
1957
                         */
 
1958
                        if (partial_copy)
 
1959
                                break;
 
1960
 
 
1961
                        if (++index == lv->lv_niovecs) {
 
1962
                                lv = lv->lv_next;
 
1963
                                index = 0;
 
1964
                                if (lv)
 
1965
                                        vecp = lv->lv_iovecp;
 
1966
                        }
 
1967
                        if (record_cnt == 0) {
 
1968
                                if (!lv)
 
1969
                                        return 0;
 
1970
                                break;
 
1971
                        }
1983
1972
                }
1984
 
            } /* if (partial_copy) */
1985
 
        } /* while (index < nentries) */
1986
 
    } /* for (index = 0; index < nentries; ) */
1987
 
    ASSERT(len == 0);
1988
 
 
1989
 
    xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1990
 
    if (commit_iclog) {
 
1973
        }
 
1974
 
 
1975
        ASSERT(len == 0);
 
1976
 
 
1977
        xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
 
1978
        if (!commit_iclog)
 
1979
                return xlog_state_release_iclog(log, iclog);
 
1980
 
1991
1981
        ASSERT(flags & XLOG_COMMIT_TRANS);
1992
1982
        *commit_iclog = iclog;
1993
1983
        return 0;
1994
 
    }
1995
 
    return xlog_state_release_iclog(log, iclog);
1996
 
}       /* xlog_write */
 
1984
}
1997
1985
 
1998
1986
 
1999
1987
/*****************************************************************************
2205
2193
                                lowest_lsn = xlog_get_lowest_lsn(log);
2206
2194
                                if (lowest_lsn &&
2207
2195
                                    XFS_LSN_CMP(lowest_lsn,
2208
 
                                                be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
 
2196
                                                be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
2209
2197
                                        iclog = iclog->ic_next;
2210
2198
                                        continue; /* Leave this iclog for
2211
2199
                                                   * another thread */
2213
2201
 
2214
2202
                                iclog->ic_state = XLOG_STATE_CALLBACK;
2215
2203
 
2216
 
                                spin_unlock(&log->l_icloglock);
2217
2204
 
2218
 
                                /* l_last_sync_lsn field protected by
2219
 
                                 * l_grant_lock. Don't worry about iclog's lsn.
2220
 
                                 * No one else can be here except us.
 
2205
                                /*
 
2206
                                 * update the last_sync_lsn before we drop the
 
2207
                                 * icloglock to ensure we are the only one that
 
2208
                                 * can update it.
2221
2209
                                 */
2222
 
                                spin_lock(&log->l_grant_lock);
2223
 
                                ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn,
2224
 
                                       be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
2225
 
                                log->l_last_sync_lsn =
2226
 
                                        be64_to_cpu(iclog->ic_header.h_lsn);
2227
 
                                spin_unlock(&log->l_grant_lock);
 
2210
                                ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
 
2211
                                        be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
 
2212
                                atomic64_set(&log->l_last_sync_lsn,
 
2213
                                        be64_to_cpu(iclog->ic_header.h_lsn));
2228
2214
 
2229
 
                        } else {
2230
 
                                spin_unlock(&log->l_icloglock);
 
2215
                        } else
2231
2216
                                ioerrors++;
2232
 
                        }
 
2217
 
 
2218
                        spin_unlock(&log->l_icloglock);
2233
2219
 
2234
2220
                        /*
2235
2221
                         * Keep processing entries in the callback list until
2270
2256
                        xlog_state_clean_log(log);
2271
2257
 
2272
2258
                        /* wake up threads waiting in xfs_log_force() */
2273
 
                        sv_broadcast(&iclog->ic_force_wait);
 
2259
                        wake_up_all(&iclog->ic_force_wait);
2274
2260
 
2275
2261
                        iclog = iclog->ic_next;
2276
2262
                } while (first_iclog != iclog);
2317
2303
        spin_unlock(&log->l_icloglock);
2318
2304
 
2319
2305
        if (wake)
2320
 
                sv_broadcast(&log->l_flush_wait);
 
2306
                wake_up_all(&log->l_flush_wait);
2321
2307
}
2322
2308
 
2323
2309
 
2368
2354
         * iclog buffer, we wake them all, one will get to do the
2369
2355
         * I/O, the others get to wait for the result.
2370
2356
         */
2371
 
        sv_broadcast(&iclog->ic_write_wait);
 
2357
        wake_up_all(&iclog->ic_write_wait);
2372
2358
        spin_unlock(&log->l_icloglock);
2373
2359
        xlog_state_do_callback(log, aborted, iclog);    /* also cleans log */
2374
2360
}       /* xlog_state_done_syncing */
2414
2400
 
2415
2401
        iclog = log->l_iclog;
2416
2402
        if (iclog->ic_state != XLOG_STATE_ACTIVE) {
2417
 
                xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
2418
2403
                XFS_STATS_INC(xs_log_noiclogs);
2419
2404
 
2420
2405
                /* Wait for log writes to have flushed */
2421
 
                sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
 
2406
                xlog_wait(&log->l_flush_wait, &log->l_icloglock);
2422
2407
                goto restart;
2423
2408
        }
2424
2409
 
2501
2486
 *
2502
2487
 * Once a ticket gets put onto the reserveq, it will only return after
2503
2488
 * the needed reservation is satisfied.
 
2489
 *
 
2490
 * This function is structured so that it has a lock free fast path. This is
 
2491
 * necessary because every new transaction reservation will come through this
 
2492
 * path. Hence any lock will be globally hot if we take it unconditionally on
 
2493
 * every pass.
 
2494
 *
 
2495
 * As tickets are only ever moved on and off the reserveq under the
 
2496
 * l_grant_reserve_lock, we only need to take that lock if we are going
 
2497
 * to add the ticket to the queue and sleep. We can avoid taking the lock if the
 
2498
 * ticket was never added to the reserveq because the t_queue list head will be
 
2499
 * empty and we hold the only reference to it so it can safely be checked
 
2500
 * unlocked.
2504
2501
 */
2505
2502
STATIC int
2506
2503
xlog_grant_log_space(xlog_t        *log,
2508
2505
{
2509
2506
        int              free_bytes;
2510
2507
        int              need_bytes;
2511
 
#ifdef DEBUG
2512
 
        xfs_lsn_t        tail_lsn;
2513
 
#endif
2514
 
 
2515
2508
 
2516
2509
#ifdef DEBUG
2517
2510
        if (log->l_flags & XLOG_ACTIVE_RECOVERY)
2518
2511
                panic("grant Recovery problem");
2519
2512
#endif
2520
2513
 
2521
 
        /* Is there space or do we need to sleep? */
2522
 
        spin_lock(&log->l_grant_lock);
2523
 
        xlog_trace_loggrant(log, tic, "xlog_grant_log_space: enter");
 
2514
        trace_xfs_log_grant_enter(log, tic);
 
2515
 
 
2516
        need_bytes = tic->t_unit_res;
 
2517
        if (tic->t_flags & XFS_LOG_PERM_RESERV)
 
2518
                need_bytes *= tic->t_ocnt;
2524
2519
 
2525
2520
        /* something is already sleeping; insert new transaction at end */
2526
 
        if (log->l_reserve_headq) {
2527
 
                xlog_ins_ticketq(&log->l_reserve_headq, tic);
2528
 
                xlog_trace_loggrant(log, tic,
2529
 
                                    "xlog_grant_log_space: sleep 1");
 
2521
        if (!list_empty_careful(&log->l_reserveq)) {
 
2522
                spin_lock(&log->l_grant_reserve_lock);
 
2523
                /* recheck the queue now we are locked */
 
2524
                if (list_empty(&log->l_reserveq)) {
 
2525
                        spin_unlock(&log->l_grant_reserve_lock);
 
2526
                        goto redo;
 
2527
                }
 
2528
                list_add_tail(&tic->t_queue, &log->l_reserveq);
 
2529
 
 
2530
                trace_xfs_log_grant_sleep1(log, tic);
 
2531
 
2530
2532
                /*
2531
2533
                 * Gotta check this before going to sleep, while we're
2532
2534
                 * holding the grant lock.
2535
2537
                        goto error_return;
2536
2538
 
2537
2539
                XFS_STATS_INC(xs_sleep_logspace);
2538
 
                sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
 
2540
                xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
 
2541
 
2539
2542
                /*
2540
2543
                 * If we got an error, and the filesystem is shutting down,
2541
2544
                 * we'll catch it down below. So just continue...
2542
2545
                 */
2543
 
                xlog_trace_loggrant(log, tic,
2544
 
                                    "xlog_grant_log_space: wake 1");
2545
 
                spin_lock(&log->l_grant_lock);
 
2546
                trace_xfs_log_grant_wake1(log, tic);
2546
2547
        }
2547
 
        if (tic->t_flags & XFS_LOG_PERM_RESERV)
2548
 
                need_bytes = tic->t_unit_res*tic->t_ocnt;
2549
 
        else
2550
 
                need_bytes = tic->t_unit_res;
2551
2548
 
2552
2549
redo:
2553
2550
        if (XLOG_FORCED_SHUTDOWN(log))
2554
 
                goto error_return;
 
2551
                goto error_return_unlocked;
2555
2552
 
2556
 
        free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle,
2557
 
                                     log->l_grant_reserve_bytes);
 
2553
        free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
2558
2554
        if (free_bytes < need_bytes) {
2559
 
                if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2560
 
                        xlog_ins_ticketq(&log->l_reserve_headq, tic);
2561
 
                xlog_trace_loggrant(log, tic,
2562
 
                                    "xlog_grant_log_space: sleep 2");
2563
 
                spin_unlock(&log->l_grant_lock);
2564
 
                xlog_grant_push_ail(log->l_mp, need_bytes);
2565
 
                spin_lock(&log->l_grant_lock);
2566
 
 
2567
 
                XFS_STATS_INC(xs_sleep_logspace);
2568
 
                sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2569
 
 
2570
 
                spin_lock(&log->l_grant_lock);
 
2555
                spin_lock(&log->l_grant_reserve_lock);
 
2556
                if (list_empty(&tic->t_queue))
 
2557
                        list_add_tail(&tic->t_queue, &log->l_reserveq);
 
2558
 
 
2559
                trace_xfs_log_grant_sleep2(log, tic);
 
2560
 
2571
2561
                if (XLOG_FORCED_SHUTDOWN(log))
2572
2562
                        goto error_return;
2573
2563
 
2574
 
                xlog_trace_loggrant(log, tic,
2575
 
                                    "xlog_grant_log_space: wake 2");
 
2564
                xlog_grant_push_ail(log, need_bytes);
 
2565
 
 
2566
                XFS_STATS_INC(xs_sleep_logspace);
 
2567
                xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
 
2568
 
 
2569
                trace_xfs_log_grant_wake2(log, tic);
2576
2570
                goto redo;
2577
 
        } else if (tic->t_flags & XLOG_TIC_IN_Q)
2578
 
                xlog_del_ticketq(&log->l_reserve_headq, tic);
 
2571
        }
 
2572
 
 
2573
        if (!list_empty(&tic->t_queue)) {
 
2574
                spin_lock(&log->l_grant_reserve_lock);
 
2575
                list_del_init(&tic->t_queue);
 
2576
                spin_unlock(&log->l_grant_reserve_lock);
 
2577
        }
2579
2578
 
2580
2579
        /* we've got enough space */
2581
 
        xlog_grant_add_space(log, need_bytes);
2582
 
#ifdef DEBUG
2583
 
        tail_lsn = log->l_tail_lsn;
2584
 
        /*
2585
 
         * Check to make sure the grant write head didn't just over lap the
2586
 
         * tail.  If the cycles are the same, we can't be overlapping.
2587
 
         * Otherwise, make sure that the cycles differ by exactly one and
2588
 
         * check the byte count.
2589
 
         */
2590
 
        if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
2591
 
                ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
2592
 
                ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
2593
 
        }
2594
 
#endif
2595
 
        xlog_trace_loggrant(log, tic, "xlog_grant_log_space: exit");
2596
 
        xlog_verify_grant_head(log, 1);
2597
 
        spin_unlock(&log->l_grant_lock);
 
2580
        xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
 
2581
        xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
 
2582
        trace_xfs_log_grant_exit(log, tic);
 
2583
        xlog_verify_grant_tail(log);
2598
2584
        return 0;
2599
2585
 
2600
 
 error_return:
2601
 
        if (tic->t_flags & XLOG_TIC_IN_Q)
2602
 
                xlog_del_ticketq(&log->l_reserve_headq, tic);
2603
 
        xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret");
 
2586
error_return_unlocked:
 
2587
        spin_lock(&log->l_grant_reserve_lock);
 
2588
error_return:
 
2589
        list_del_init(&tic->t_queue);
 
2590
        spin_unlock(&log->l_grant_reserve_lock);
 
2591
        trace_xfs_log_grant_error(log, tic);
 
2592
 
2604
2593
        /*
2605
2594
         * If we are failing, make sure the ticket doesn't have any
2606
2595
         * current reservations. We don't want to add this back when
2608
2597
         */
2609
2598
        tic->t_curr_res = 0;
2610
2599
        tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2611
 
        spin_unlock(&log->l_grant_lock);
2612
2600
        return XFS_ERROR(EIO);
2613
2601
}       /* xlog_grant_log_space */
2614
2602
 
2616
2604
/*
2617
2605
 * Replenish the byte reservation required by moving the grant write head.
2618
2606
 *
2619
 
 *
 
2607
 * Similar to xlog_grant_log_space, the function is structured to have a lock
 
2608
 * free fast path.
2620
2609
 */
2621
2610
STATIC int
2622
2611
xlog_regrant_write_log_space(xlog_t        *log,
2623
2612
                             xlog_ticket_t *tic)
2624
2613
{
2625
2614
        int             free_bytes, need_bytes;
2626
 
        xlog_ticket_t   *ntic;
2627
 
#ifdef DEBUG
2628
 
        xfs_lsn_t       tail_lsn;
2629
 
#endif
2630
2615
 
2631
2616
        tic->t_curr_res = tic->t_unit_res;
2632
2617
        xlog_tic_reset_res(tic);
2639
2624
                panic("regrant Recovery problem");
2640
2625
#endif
2641
2626
 
2642
 
        spin_lock(&log->l_grant_lock);
2643
 
        xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: enter");
2644
 
 
 
2627
        trace_xfs_log_regrant_write_enter(log, tic);
2645
2628
        if (XLOG_FORCED_SHUTDOWN(log))
2646
 
                goto error_return;
 
2629
                goto error_return_unlocked;
2647
2630
 
2648
2631
        /* If there are other waiters on the queue then give them a
2649
2632
         * chance at logspace before us. Wake up the first waiters,
2652
2635
         * this transaction.
2653
2636
         */
2654
2637
        need_bytes = tic->t_unit_res;
2655
 
        if ((ntic = log->l_write_headq)) {
2656
 
                free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
2657
 
                                             log->l_grant_write_bytes);
2658
 
                do {
 
2638
        if (!list_empty_careful(&log->l_writeq)) {
 
2639
                struct xlog_ticket *ntic;
 
2640
 
 
2641
                spin_lock(&log->l_grant_write_lock);
 
2642
                free_bytes = xlog_space_left(log, &log->l_grant_write_head);
 
2643
                list_for_each_entry(ntic, &log->l_writeq, t_queue) {
2659
2644
                        ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
2660
2645
 
2661
2646
                        if (free_bytes < ntic->t_unit_res)
2662
2647
                                break;
2663
2648
                        free_bytes -= ntic->t_unit_res;
2664
 
                        sv_signal(&ntic->t_wait);
2665
 
                        ntic = ntic->t_next;
2666
 
                } while (ntic != log->l_write_headq);
2667
 
 
2668
 
                if (ntic != log->l_write_headq) {
2669
 
                        if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2670
 
                                xlog_ins_ticketq(&log->l_write_headq, tic);
2671
 
 
2672
 
                        xlog_trace_loggrant(log, tic,
2673
 
                                    "xlog_regrant_write_log_space: sleep 1");
2674
 
                        spin_unlock(&log->l_grant_lock);
2675
 
                        xlog_grant_push_ail(log->l_mp, need_bytes);
2676
 
                        spin_lock(&log->l_grant_lock);
 
2649
                        wake_up(&ntic->t_wait);
 
2650
                }
 
2651
 
 
2652
                if (ntic != list_first_entry(&log->l_writeq,
 
2653
                                                struct xlog_ticket, t_queue)) {
 
2654
                        if (list_empty(&tic->t_queue))
 
2655
                                list_add_tail(&tic->t_queue, &log->l_writeq);
 
2656
                        trace_xfs_log_regrant_write_sleep1(log, tic);
 
2657
 
 
2658
                        xlog_grant_push_ail(log, need_bytes);
2677
2659
 
2678
2660
                        XFS_STATS_INC(xs_sleep_logspace);
2679
 
                        sv_wait(&tic->t_wait, PINOD|PLTWAIT,
2680
 
                                &log->l_grant_lock, s);
2681
 
 
2682
 
                        /* If we're shutting down, this tic is already
2683
 
                         * off the queue */
2684
 
                        spin_lock(&log->l_grant_lock);
2685
 
                        if (XLOG_FORCED_SHUTDOWN(log))
2686
 
                                goto error_return;
2687
 
 
2688
 
                        xlog_trace_loggrant(log, tic,
2689
 
                                    "xlog_regrant_write_log_space: wake 1");
2690
 
                }
 
2661
                        xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
 
2662
                        trace_xfs_log_regrant_write_wake1(log, tic);
 
2663
                } else
 
2664
                        spin_unlock(&log->l_grant_write_lock);
2691
2665
        }
2692
2666
 
2693
2667
redo:
2694
2668
        if (XLOG_FORCED_SHUTDOWN(log))
2695
 
                goto error_return;
 
2669
                goto error_return_unlocked;
2696
2670
 
2697
 
        free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
2698
 
                                     log->l_grant_write_bytes);
 
2671
        free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2699
2672
        if (free_bytes < need_bytes) {
2700
 
                if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2701
 
                        xlog_ins_ticketq(&log->l_write_headq, tic);
2702
 
                spin_unlock(&log->l_grant_lock);
2703
 
                xlog_grant_push_ail(log->l_mp, need_bytes);
2704
 
                spin_lock(&log->l_grant_lock);
2705
 
 
2706
 
                XFS_STATS_INC(xs_sleep_logspace);
2707
 
                sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2708
 
 
2709
 
                /* If we're shutting down, this tic is already off the queue */
2710
 
                spin_lock(&log->l_grant_lock);
 
2673
                spin_lock(&log->l_grant_write_lock);
 
2674
                if (list_empty(&tic->t_queue))
 
2675
                        list_add_tail(&tic->t_queue, &log->l_writeq);
 
2676
 
2711
2677
                if (XLOG_FORCED_SHUTDOWN(log))
2712
2678
                        goto error_return;
2713
2679
 
2714
 
                xlog_trace_loggrant(log, tic,
2715
 
                                    "xlog_regrant_write_log_space: wake 2");
 
2680
                xlog_grant_push_ail(log, need_bytes);
 
2681
 
 
2682
                XFS_STATS_INC(xs_sleep_logspace);
 
2683
                trace_xfs_log_regrant_write_sleep2(log, tic);
 
2684
                xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
 
2685
 
 
2686
                trace_xfs_log_regrant_write_wake2(log, tic);
2716
2687
                goto redo;
2717
 
        } else if (tic->t_flags & XLOG_TIC_IN_Q)
2718
 
                xlog_del_ticketq(&log->l_write_headq, tic);
 
2688
        }
 
2689
 
 
2690
        if (!list_empty(&tic->t_queue)) {
 
2691
                spin_lock(&log->l_grant_write_lock);
 
2692
                list_del_init(&tic->t_queue);
 
2693
                spin_unlock(&log->l_grant_write_lock);
 
2694
        }
2719
2695
 
2720
2696
        /* we've got enough space */
2721
 
        xlog_grant_add_space_write(log, need_bytes);
2722
 
#ifdef DEBUG
2723
 
        tail_lsn = log->l_tail_lsn;
2724
 
        if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
2725
 
                ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
2726
 
                ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
2727
 
        }
2728
 
#endif
2729
 
 
2730
 
        xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit");
2731
 
        xlog_verify_grant_head(log, 1);
2732
 
        spin_unlock(&log->l_grant_lock);
 
2697
        xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
 
2698
        trace_xfs_log_regrant_write_exit(log, tic);
 
2699
        xlog_verify_grant_tail(log);
2733
2700
        return 0;
2734
2701
 
2735
2702
 
 
2703
 error_return_unlocked:
 
2704
        spin_lock(&log->l_grant_write_lock);
2736
2705
 error_return:
2737
 
        if (tic->t_flags & XLOG_TIC_IN_Q)
2738
 
                xlog_del_ticketq(&log->l_reserve_headq, tic);
2739
 
        xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret");
 
2706
        list_del_init(&tic->t_queue);
 
2707
        spin_unlock(&log->l_grant_write_lock);
 
2708
        trace_xfs_log_regrant_write_error(log, tic);
 
2709
 
2740
2710
        /*
2741
2711
         * If we are failing, make sure the ticket doesn't have any
2742
2712
         * current reservations. We don't want to add this back when
2744
2714
         */
2745
2715
        tic->t_curr_res = 0;
2746
2716
        tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2747
 
        spin_unlock(&log->l_grant_lock);
2748
2717
        return XFS_ERROR(EIO);
2749
2718
}       /* xlog_regrant_write_log_space */
2750
2719
 
2760
2729
xlog_regrant_reserve_log_space(xlog_t        *log,
2761
2730
                               xlog_ticket_t *ticket)
2762
2731
{
2763
 
        xlog_trace_loggrant(log, ticket,
2764
 
                            "xlog_regrant_reserve_log_space: enter");
 
2732
        trace_xfs_log_regrant_reserve_enter(log, ticket);
 
2733
 
2765
2734
        if (ticket->t_cnt > 0)
2766
2735
                ticket->t_cnt--;
2767
2736
 
2768
 
        spin_lock(&log->l_grant_lock);
2769
 
        xlog_grant_sub_space(log, ticket->t_curr_res);
 
2737
        xlog_grant_sub_space(log, &log->l_grant_reserve_head,
 
2738
                                        ticket->t_curr_res);
 
2739
        xlog_grant_sub_space(log, &log->l_grant_write_head,
 
2740
                                        ticket->t_curr_res);
2770
2741
        ticket->t_curr_res = ticket->t_unit_res;
2771
2742
        xlog_tic_reset_res(ticket);
2772
 
        xlog_trace_loggrant(log, ticket,
2773
 
                            "xlog_regrant_reserve_log_space: sub current res");
2774
 
        xlog_verify_grant_head(log, 1);
 
2743
 
 
2744
        trace_xfs_log_regrant_reserve_sub(log, ticket);
2775
2745
 
2776
2746
        /* just return if we still have some of the pre-reserved space */
2777
 
        if (ticket->t_cnt > 0) {
2778
 
                spin_unlock(&log->l_grant_lock);
 
2747
        if (ticket->t_cnt > 0)
2779
2748
                return;
2780
 
        }
2781
 
 
2782
 
        xlog_grant_add_space_reserve(log, ticket->t_unit_res);
2783
 
        xlog_trace_loggrant(log, ticket,
2784
 
                            "xlog_regrant_reserve_log_space: exit");
2785
 
        xlog_verify_grant_head(log, 0);
2786
 
        spin_unlock(&log->l_grant_lock);
 
2749
 
 
2750
        xlog_grant_add_space(log, &log->l_grant_reserve_head,
 
2751
                                        ticket->t_unit_res);
 
2752
 
 
2753
        trace_xfs_log_regrant_reserve_exit(log, ticket);
 
2754
 
2787
2755
        ticket->t_curr_res = ticket->t_unit_res;
2788
2756
        xlog_tic_reset_res(ticket);
2789
2757
}       /* xlog_regrant_reserve_log_space */
2807
2775
xlog_ungrant_log_space(xlog_t        *log,
2808
2776
                       xlog_ticket_t *ticket)
2809
2777
{
 
2778
        int     bytes;
 
2779
 
2810
2780
        if (ticket->t_cnt > 0)
2811
2781
                ticket->t_cnt--;
2812
2782
 
2813
 
        spin_lock(&log->l_grant_lock);
2814
 
        xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter");
2815
 
 
2816
 
        xlog_grant_sub_space(log, ticket->t_curr_res);
2817
 
 
2818
 
        xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current");
2819
 
 
2820
 
        /* If this is a permanent reservation ticket, we may be able to free
 
2783
        trace_xfs_log_ungrant_enter(log, ticket);
 
2784
        trace_xfs_log_ungrant_sub(log, ticket);
 
2785
 
 
2786
        /*
 
2787
         * If this is a permanent reservation ticket, we may be able to free
2821
2788
         * up more space based on the remaining count.
2822
2789
         */
 
2790
        bytes = ticket->t_curr_res;
2823
2791
        if (ticket->t_cnt > 0) {
2824
2792
                ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
2825
 
                xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
 
2793
                bytes += ticket->t_unit_res*ticket->t_cnt;
2826
2794
        }
2827
2795
 
2828
 
        xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit");
2829
 
        xlog_verify_grant_head(log, 1);
2830
 
        spin_unlock(&log->l_grant_lock);
 
2796
        xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes);
 
2797
        xlog_grant_sub_space(log, &log->l_grant_write_head, bytes);
 
2798
 
 
2799
        trace_xfs_log_ungrant_exit(log, ticket);
 
2800
 
2831
2801
        xfs_log_move_tail(log->l_mp, 1);
2832
2802
}       /* xlog_ungrant_log_space */
2833
2803
 
2864
2834
 
2865
2835
        if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
2866
2836
                /* update tail before writing to iclog */
2867
 
                xlog_assign_tail_lsn(log->l_mp);
 
2837
                xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
2868
2838
                sync++;
2869
2839
                iclog->ic_state = XLOG_STATE_SYNCING;
2870
 
                iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn);
2871
 
                xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn);
 
2840
                iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
 
2841
                xlog_verify_tail_lsn(log, iclog, tail_lsn);
2872
2842
                /* cycle incremented when incrementing curr_block */
2873
2843
        }
2874
2844
        spin_unlock(&log->l_icloglock);
2927
2897
        log->l_iclog = iclog->ic_next;
2928
2898
}       /* xlog_state_switch_iclogs */
2929
2899
 
2930
 
 
2931
2900
/*
2932
2901
 * Write out all data in the in-core log as of this exact moment in time.
2933
2902
 *
2955
2924
 *         b) when we return from flushing out this iclog, it is still
2956
2925
 *              not in the active nor dirty state.
2957
2926
 */
2958
 
STATIC int
2959
 
xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
 
2927
int
 
2928
_xfs_log_force(
 
2929
        struct xfs_mount        *mp,
 
2930
        uint                    flags,
 
2931
        int                     *log_flushed)
2960
2932
{
2961
 
        xlog_in_core_t  *iclog;
2962
 
        xfs_lsn_t       lsn;
 
2933
        struct log              *log = mp->m_log;
 
2934
        struct xlog_in_core     *iclog;
 
2935
        xfs_lsn_t               lsn;
 
2936
 
 
2937
        XFS_STATS_INC(xs_log_force);
 
2938
 
 
2939
        if (log->l_cilp)
 
2940
                xlog_cil_force(log);
2963
2941
 
2964
2942
        spin_lock(&log->l_icloglock);
2965
2943
 
3005
2983
 
3006
2984
                                if (xlog_state_release_iclog(log, iclog))
3007
2985
                                        return XFS_ERROR(EIO);
3008
 
                                *log_flushed = 1;
 
2986
 
 
2987
                                if (log_flushed)
 
2988
                                        *log_flushed = 1;
3009
2989
                                spin_lock(&log->l_icloglock);
3010
2990
                                if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn &&
3011
2991
                                    iclog->ic_state != XLOG_STATE_DIRTY)
3041
3021
                        return XFS_ERROR(EIO);
3042
3022
                }
3043
3023
                XFS_STATS_INC(xs_log_force_sleep);
3044
 
                sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);
 
3024
                xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
3045
3025
                /*
3046
3026
                 * No need to grab the log lock here since we're
3047
3027
                 * only deciding whether or not to return EIO
3049
3029
                 */
3050
3030
                if (iclog->ic_state & XLOG_STATE_IOERROR)
3051
3031
                        return XFS_ERROR(EIO);
3052
 
                *log_flushed = 1;
3053
 
 
 
3032
                if (log_flushed)
 
3033
                        *log_flushed = 1;
3054
3034
        } else {
3055
3035
 
3056
3036
no_sleep:
3057
3037
                spin_unlock(&log->l_icloglock);
3058
3038
        }
3059
3039
        return 0;
3060
 
}       /* xlog_state_sync_all */
3061
 
 
3062
 
 
3063
 
/*
3064
 
 * Used by code which implements synchronous log forces.
 
3040
}
 
3041
 
 
3042
/*
 
3043
 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
 
3044
 * about errors or whether the log was flushed or not. This is the normal
 
3045
 * interface to use when trying to unpin items or move the log forward.
 
3046
 */
 
3047
void
 
3048
xfs_log_force(
 
3049
        xfs_mount_t     *mp,
 
3050
        uint            flags)
 
3051
{
 
3052
        int     error;
 
3053
 
 
3054
        error = _xfs_log_force(mp, flags, NULL);
 
3055
        if (error) {
 
3056
                xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
 
3057
                        "error %d returned.", error);
 
3058
        }
 
3059
}
 
3060
 
 
3061
/*
 
3062
 * Force the in-core log to disk for a specific LSN.
3065
3063
 *
3066
3064
 * Find in-core log with lsn.
3067
3065
 *      If it is in the DIRTY state, just return.
3069
3067
 *              state and go to sleep or return.
3070
3068
 *      If it is in any other state, go to sleep or return.
3071
3069
 *
3072
 
 * If filesystem activity goes to zero, the iclog will get flushed only by
3073
 
 * bdflush().
 
3070
 * Synchronous forces are implemented with a signal variable. All callers
 
3071
 * to force a given lsn to disk will wait on a the sv attached to the
 
3072
 * specific in-core log.  When given in-core log finally completes its
 
3073
 * write to disk, that thread will wake up all threads waiting on the
 
3074
 * sv.
3074
3075
 */
3075
 
STATIC int
3076
 
xlog_state_sync(xlog_t    *log,
3077
 
                xfs_lsn_t lsn,
3078
 
                uint      flags,
3079
 
                int       *log_flushed)
 
3076
int
 
3077
_xfs_log_force_lsn(
 
3078
        struct xfs_mount        *mp,
 
3079
        xfs_lsn_t               lsn,
 
3080
        uint                    flags,
 
3081
        int                     *log_flushed)
3080
3082
{
3081
 
    xlog_in_core_t      *iclog;
3082
 
    int                 already_slept = 0;
 
3083
        struct log              *log = mp->m_log;
 
3084
        struct xlog_in_core     *iclog;
 
3085
        int                     already_slept = 0;
 
3086
 
 
3087
        ASSERT(lsn != 0);
 
3088
 
 
3089
        XFS_STATS_INC(xs_log_force);
 
3090
 
 
3091
        if (log->l_cilp) {
 
3092
                lsn = xlog_cil_force_lsn(log, lsn);
 
3093
                if (lsn == NULLCOMMITLSN)
 
3094
                        return 0;
 
3095
        }
3083
3096
 
3084
3097
try_again:
3085
 
    spin_lock(&log->l_icloglock);
3086
 
    iclog = log->l_iclog;
3087
 
 
3088
 
    if (iclog->ic_state & XLOG_STATE_IOERROR) {
3089
 
            spin_unlock(&log->l_icloglock);
3090
 
            return XFS_ERROR(EIO);
3091
 
    }
3092
 
 
3093
 
    do {
3094
 
        if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3095
 
                iclog = iclog->ic_next;
3096
 
                continue;
3097
 
        }
3098
 
 
3099
 
        if (iclog->ic_state == XLOG_STATE_DIRTY) {
 
3098
        spin_lock(&log->l_icloglock);
 
3099
        iclog = log->l_iclog;
 
3100
        if (iclog->ic_state & XLOG_STATE_IOERROR) {
3100
3101
                spin_unlock(&log->l_icloglock);
3101
 
                return 0;
 
3102
                return XFS_ERROR(EIO);
3102
3103
        }
3103
3104
 
3104
 
        if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3105
 
                /*
3106
 
                 * We sleep here if we haven't already slept (e.g.
3107
 
                 * this is the first time we've looked at the correct
3108
 
                 * iclog buf) and the buffer before us is going to
3109
 
                 * be sync'ed. The reason for this is that if we
3110
 
                 * are doing sync transactions here, by waiting for
3111
 
                 * the previous I/O to complete, we can allow a few
3112
 
                 * more transactions into this iclog before we close
3113
 
                 * it down.
3114
 
                 *
3115
 
                 * Otherwise, we mark the buffer WANT_SYNC, and bump
3116
 
                 * up the refcnt so we can release the log (which drops
3117
 
                 * the ref count).  The state switch keeps new transaction
3118
 
                 * commits from using this buffer.  When the current commits
3119
 
                 * finish writing into the buffer, the refcount will drop to
3120
 
                 * zero and the buffer will go out then.
3121
 
                 */
3122
 
                if (!already_slept &&
3123
 
                    (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC |
3124
 
                                                 XLOG_STATE_SYNCING))) {
3125
 
                        ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3126
 
                        XFS_STATS_INC(xs_log_force_sleep);
3127
 
                        sv_wait(&iclog->ic_prev->ic_write_wait, PSWP,
3128
 
                                &log->l_icloglock, s);
3129
 
                        *log_flushed = 1;
3130
 
                        already_slept = 1;
3131
 
                        goto try_again;
3132
 
                } else {
 
3105
        do {
 
3106
                if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
 
3107
                        iclog = iclog->ic_next;
 
3108
                        continue;
 
3109
                }
 
3110
 
 
3111
                if (iclog->ic_state == XLOG_STATE_DIRTY) {
 
3112
                        spin_unlock(&log->l_icloglock);
 
3113
                        return 0;
 
3114
                }
 
3115
 
 
3116
                if (iclog->ic_state == XLOG_STATE_ACTIVE) {
 
3117
                        /*
 
3118
                         * We sleep here if we haven't already slept (e.g.
 
3119
                         * this is the first time we've looked at the correct
 
3120
                         * iclog buf) and the buffer before us is going to
 
3121
                         * be sync'ed. The reason for this is that if we
 
3122
                         * are doing sync transactions here, by waiting for
 
3123
                         * the previous I/O to complete, we can allow a few
 
3124
                         * more transactions into this iclog before we close
 
3125
                         * it down.
 
3126
                         *
 
3127
                         * Otherwise, we mark the buffer WANT_SYNC, and bump
 
3128
                         * up the refcnt so we can release the log (which
 
3129
                         * drops the ref count).  The state switch keeps new
 
3130
                         * transaction commits from using this buffer.  When
 
3131
                         * the current commits finish writing into the buffer,
 
3132
                         * the refcount will drop to zero and the buffer will
 
3133
                         * go out then.
 
3134
                         */
 
3135
                        if (!already_slept &&
 
3136
                            (iclog->ic_prev->ic_state &
 
3137
                             (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
 
3138
                                ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
 
3139
 
 
3140
                                XFS_STATS_INC(xs_log_force_sleep);
 
3141
 
 
3142
                                xlog_wait(&iclog->ic_prev->ic_write_wait,
 
3143
                                                        &log->l_icloglock);
 
3144
                                if (log_flushed)
 
3145
                                        *log_flushed = 1;
 
3146
                                already_slept = 1;
 
3147
                                goto try_again;
 
3148
                        }
3133
3149
                        atomic_inc(&iclog->ic_refcnt);
3134
3150
                        xlog_state_switch_iclogs(log, iclog, 0);
3135
3151
                        spin_unlock(&log->l_icloglock);
3136
3152
                        if (xlog_state_release_iclog(log, iclog))
3137
3153
                                return XFS_ERROR(EIO);
3138
 
                        *log_flushed = 1;
 
3154
                        if (log_flushed)
 
3155
                                *log_flushed = 1;
3139
3156
                        spin_lock(&log->l_icloglock);
3140
3157
                }
3141
 
        }
3142
 
 
3143
 
        if ((flags & XFS_LOG_SYNC) && /* sleep */
3144
 
            !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3145
 
 
3146
 
                /*
3147
 
                 * Don't wait on completion if we know that we've
3148
 
                 * gotten a log write error.
3149
 
                 */
3150
 
                if (iclog->ic_state & XLOG_STATE_IOERROR) {
 
3158
 
 
3159
                if ((flags & XFS_LOG_SYNC) && /* sleep */
 
3160
                    !(iclog->ic_state &
 
3161
                      (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
 
3162
                        /*
 
3163
                         * Don't wait on completion if we know that we've
 
3164
                         * gotten a log write error.
 
3165
                         */
 
3166
                        if (iclog->ic_state & XLOG_STATE_IOERROR) {
 
3167
                                spin_unlock(&log->l_icloglock);
 
3168
                                return XFS_ERROR(EIO);
 
3169
                        }
 
3170
                        XFS_STATS_INC(xs_log_force_sleep);
 
3171
                        xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
 
3172
                        /*
 
3173
                         * No need to grab the log lock here since we're
 
3174
                         * only deciding whether or not to return EIO
 
3175
                         * and the memory read should be atomic.
 
3176
                         */
 
3177
                        if (iclog->ic_state & XLOG_STATE_IOERROR)
 
3178
                                return XFS_ERROR(EIO);
 
3179
 
 
3180
                        if (log_flushed)
 
3181
                                *log_flushed = 1;
 
3182
                } else {                /* just return */
3151
3183
                        spin_unlock(&log->l_icloglock);
3152
 
                        return XFS_ERROR(EIO);
3153
3184
                }
3154
 
                XFS_STATS_INC(xs_log_force_sleep);
3155
 
                sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3156
 
                /*
3157
 
                 * No need to grab the log lock here since we're
3158
 
                 * only deciding whether or not to return EIO
3159
 
                 * and the memory read should be atomic.
3160
 
                 */
3161
 
                if (iclog->ic_state & XLOG_STATE_IOERROR)
3162
 
                        return XFS_ERROR(EIO);
3163
 
                *log_flushed = 1;
3164
 
        } else {                /* just return */
3165
 
                spin_unlock(&log->l_icloglock);
3166
 
        }
 
3185
 
 
3186
                return 0;
 
3187
        } while (iclog != log->l_iclog);
 
3188
 
 
3189
        spin_unlock(&log->l_icloglock);
3167
3190
        return 0;
3168
 
 
3169
 
    } while (iclog != log->l_iclog);
3170
 
 
3171
 
    spin_unlock(&log->l_icloglock);
3172
 
    return 0;
3173
 
}       /* xlog_state_sync */
3174
 
 
 
3191
}
 
3192
 
 
3193
/*
 
3194
 * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
 
3195
 * about errors or whether the log was flushed or not. This is the normal
 
3196
 * interface to use when trying to unpin items or move the log forward.
 
3197
 */
 
3198
void
 
3199
xfs_log_force_lsn(
 
3200
        xfs_mount_t     *mp,
 
3201
        xfs_lsn_t       lsn,
 
3202
        uint            flags)
 
3203
{
 
3204
        int     error;
 
3205
 
 
3206
        error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
 
3207
        if (error) {
 
3208
                xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
 
3209
                        "error %d returned.", error);
 
3210
        }
 
3211
}
3175
3212
 
3176
3213
/*
3177
3214
 * Called when we want to mark the current iclog as being ready to sync to
3206
3243
        xlog_ticket_t   *ticket)
3207
3244
{
3208
3245
        ASSERT(atomic_read(&ticket->t_ref) > 0);
3209
 
        if (atomic_dec_and_test(&ticket->t_ref)) {
3210
 
                sv_destroy(&ticket->t_wait);
 
3246
        if (atomic_dec_and_test(&ticket->t_ref))
3211
3247
                kmem_zone_free(xfs_log_ticket_zone, ticket);
3212
 
        }
3213
3248
}
3214
3249
 
3215
3250
xlog_ticket_t *
3221
3256
        return ticket;
3222
3257
}
3223
3258
 
 
3259
xlog_tid_t
 
3260
xfs_log_get_trans_ident(
 
3261
        struct xfs_trans        *tp)
 
3262
{
 
3263
        return tp->t_ticket->t_tid;
 
3264
}
 
3265
 
3224
3266
/*
3225
3267
 * Allocate and initialise a new log ticket.
3226
3268
 */
3227
 
STATIC xlog_ticket_t *
3228
 
xlog_ticket_alloc(xlog_t                *log,
3229
 
                int             unit_bytes,
3230
 
                int             cnt,
3231
 
                char            client,
3232
 
                uint            xflags)
 
3269
xlog_ticket_t *
 
3270
xlog_ticket_alloc(
 
3271
        struct log      *log,
 
3272
        int             unit_bytes,
 
3273
        int             cnt,
 
3274
        char            client,
 
3275
        uint            xflags,
 
3276
        int             alloc_flags)
3233
3277
{
3234
 
        xlog_ticket_t   *tic;
 
3278
        struct xlog_ticket *tic;
3235
3279
        uint            num_headers;
 
3280
        int             iclog_space;
3236
3281
 
3237
 
        tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL);
 
3282
        tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags);
3238
3283
        if (!tic)
3239
3284
                return NULL;
3240
3285
 
3276
3321
        /* for start-rec */
3277
3322
        unit_bytes += sizeof(xlog_op_header_t);
3278
3323
 
3279
 
        /* for LR headers */
3280
 
        num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log);
 
3324
        /*
 
3325
         * for LR headers - the space for data in an iclog is the size minus
 
3326
         * the space used for the headers. If we use the iclog size, then we
 
3327
         * undercalculate the number of headers required.
 
3328
         *
 
3329
         * Furthermore - the addition of op headers for split-recs might
 
3330
         * increase the space required enough to require more log and op
 
3331
         * headers, so take that into account too.
 
3332
         *
 
3333
         * IMPORTANT: This reservation makes the assumption that if this
 
3334
         * transaction is the first in an iclog and hence has the LR headers
 
3335
         * accounted to it, then the remaining space in the iclog is
 
3336
         * exclusively for this transaction.  i.e. if the transaction is larger
 
3337
         * than the iclog, it will be the only thing in that iclog.
 
3338
         * Fundamentally, this means we must pass the entire log vector to
 
3339
         * xlog_write to guarantee this.
 
3340
         */
 
3341
        iclog_space = log->l_iclog_size - log->l_iclog_hsize;
 
3342
        num_headers = howmany(unit_bytes, iclog_space);
 
3343
 
 
3344
        /* for split-recs - ophdrs added when data split over LRs */
 
3345
        unit_bytes += sizeof(xlog_op_header_t) * num_headers;
 
3346
 
 
3347
        /* add extra header reservations if we overrun */
 
3348
        while (!num_headers ||
 
3349
               howmany(unit_bytes, iclog_space) > num_headers) {
 
3350
                unit_bytes += sizeof(xlog_op_header_t);
 
3351
                num_headers++;
 
3352
        }
3281
3353
        unit_bytes += log->l_iclog_hsize * num_headers;
3282
3354
 
3283
3355
        /* for commit-rec LR header - note: padding will subsume the ophdr */
3284
3356
        unit_bytes += log->l_iclog_hsize;
3285
3357
 
3286
 
        /* for split-recs - ophdrs added when data split over LRs */
3287
 
        unit_bytes += sizeof(xlog_op_header_t) * num_headers;
3288
 
 
3289
3358
        /* for roundoff padding for transaction data and one for commit record */
3290
3359
        if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
3291
3360
            log->l_mp->m_sb.sb_logsunit > 1) {
3297
3366
        }
3298
3367
 
3299
3368
        atomic_set(&tic->t_ref, 1);
 
3369
        INIT_LIST_HEAD(&tic->t_queue);
3300
3370
        tic->t_unit_res         = unit_bytes;
3301
3371
        tic->t_curr_res         = unit_bytes;
3302
3372
        tic->t_cnt              = cnt;
3303
3373
        tic->t_ocnt             = cnt;
3304
 
        tic->t_tid              = (xlog_tid_t)((__psint_t)tic & 0xffffffff);
 
3374
        tic->t_tid              = random32();
3305
3375
        tic->t_clientid         = client;
3306
3376
        tic->t_flags            = XLOG_TIC_INITED;
3307
3377
        tic->t_trans_type       = 0;
3308
3378
        if (xflags & XFS_LOG_PERM_RESERV)
3309
3379
                tic->t_flags |= XLOG_TIC_PERM_RESERV;
3310
 
        sv_init(&(tic->t_wait), SV_DEFAULT, "logtick");
 
3380
        init_waitqueue_head(&tic->t_wait);
3311
3381
 
3312
3382
        xlog_tic_reset_res(tic);
3313
3383
 
3328
3398
 * part of the log in case we trash the log structure.
3329
3399
 */
3330
3400
void
3331
 
xlog_verify_dest_ptr(xlog_t     *log,
3332
 
                     __psint_t  ptr)
 
3401
xlog_verify_dest_ptr(
 
3402
        struct log      *log,
 
3403
        char            *ptr)
3333
3404
{
3334
3405
        int i;
3335
3406
        int good_ptr = 0;
3336
3407
 
3337
 
        for (i=0; i < log->l_iclog_bufs; i++) {
3338
 
                if (ptr >= (__psint_t)log->l_iclog_bak[i] &&
3339
 
                    ptr <= (__psint_t)log->l_iclog_bak[i]+log->l_iclog_size)
 
3408
        for (i = 0; i < log->l_iclog_bufs; i++) {
 
3409
                if (ptr >= log->l_iclog_bak[i] &&
 
3410
                    ptr <= log->l_iclog_bak[i] + log->l_iclog_size)
3340
3411
                        good_ptr++;
3341
3412
        }
3342
 
        if (! good_ptr)
 
3413
 
 
3414
        if (!good_ptr)
3343
3415
                xlog_panic("xlog_verify_dest_ptr: invalid ptr");
3344
 
}       /* xlog_verify_dest_ptr */
 
3416
}
3345
3417
 
3346
3418
STATIC void
3347
 
xlog_verify_grant_head(xlog_t *log, int equals)
 
3419
xlog_verify_grant_tail(
 
3420
        struct log      *log)
3348
3421
{
3349
 
    if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) {
3350
 
        if (equals)
3351
 
            ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes);
3352
 
        else
3353
 
            ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes);
3354
 
    } else {
3355
 
        ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle);
3356
 
        ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes);
3357
 
    }
3358
 
}       /* xlog_verify_grant_head */
 
3422
        int             tail_cycle, tail_blocks;
 
3423
        int             cycle, space;
 
3424
 
 
3425
        /*
 
3426
         * Check to make sure the grant write head didn't just over lap the
 
3427
         * tail.  If the cycles are the same, we can't be overlapping.
 
3428
         * Otherwise, make sure that the cycles differ by exactly one and
 
3429
         * check the byte count.
 
3430
         */
 
3431
        xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space);
 
3432
        xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
 
3433
        if (tail_cycle != cycle) {
 
3434
                ASSERT(cycle - 1 == tail_cycle);
 
3435
                ASSERT(space <= BBTOB(tail_blocks));
 
3436
        }
 
3437
}
3359
3438
 
3360
3439
/* check if it will fit */
3361
3440
STATIC void
3527
3606
 *      c. nothing new gets queued up after (a) and (b) are done.
3528
3607
 *      d. if !logerror, flush the iclogs to disk, then seal them off
3529
3608
 *         for business.
 
3609
 *
 
3610
 * Note: for delayed logging the !logerror case needs to flush the regions
 
3611
 * held in memory out to the iclogs before flushing them to disk. This needs
 
3612
 * to be done before the log is marked as shutdown, otherwise the flush to the
 
3613
 * iclogs will fail.
3530
3614
 */
3531
3615
int
3532
3616
xfs_log_force_umount(
3536
3620
        xlog_ticket_t   *tic;
3537
3621
        xlog_t          *log;
3538
3622
        int             retval;
3539
 
        int             dummy;
3540
3623
 
3541
3624
        log = mp->m_log;
3542
3625
 
3561
3644
                return 1;
3562
3645
        }
3563
3646
        retval = 0;
3564
 
        /*
3565
 
         * We must hold both the GRANT lock and the LOG lock,
3566
 
         * before we mark the filesystem SHUTDOWN and wake
3567
 
         * everybody up to tell the bad news.
 
3647
 
 
3648
        /*
 
3649
         * Flush the in memory commit item list before marking the log as
 
3650
         * being shut down. We need to do it in this order to ensure all the
 
3651
         * completed transactions are flushed to disk with the xfs_log_force()
 
3652
         * call below.
 
3653
         */
 
3654
        if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
 
3655
                xlog_cil_force(log);
 
3656
 
 
3657
        /*
 
3658
         * mark the filesystem and the as in a shutdown state and wake
 
3659
         * everybody up to tell them the bad news.
3568
3660
         */
3569
3661
        spin_lock(&log->l_icloglock);
3570
 
        spin_lock(&log->l_grant_lock);
3571
3662
        mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
3572
3663
        if (mp->m_sb_bp)
3573
3664
                XFS_BUF_DONE(mp->m_sb_bp);
3588
3679
        spin_unlock(&log->l_icloglock);
3589
3680
 
3590
3681
        /*
3591
 
         * We don't want anybody waiting for log reservations
3592
 
         * after this. That means we have to wake up everybody
3593
 
         * queued up on reserve_headq as well as write_headq.
3594
 
         * In addition, we make sure in xlog_{re}grant_log_space
3595
 
         * that we don't enqueue anything once the SHUTDOWN flag
3596
 
         * is set, and this action is protected by the GRANTLOCK.
 
3682
         * We don't want anybody waiting for log reservations after this. That
 
3683
         * means we have to wake up everybody queued up on reserveq as well as
 
3684
         * writeq.  In addition, we make sure in xlog_{re}grant_log_space that
 
3685
         * we don't enqueue anything once the SHUTDOWN flag is set, and this
 
3686
         * action is protected by the grant locks.
3597
3687
         */
3598
 
        if ((tic = log->l_reserve_headq)) {
3599
 
                do {
3600
 
                        sv_signal(&tic->t_wait);
3601
 
                        tic = tic->t_next;
3602
 
                } while (tic != log->l_reserve_headq);
3603
 
        }
3604
 
 
3605
 
        if ((tic = log->l_write_headq)) {
3606
 
                do {
3607
 
                        sv_signal(&tic->t_wait);
3608
 
                        tic = tic->t_next;
3609
 
                } while (tic != log->l_write_headq);
3610
 
        }
3611
 
        spin_unlock(&log->l_grant_lock);
3612
 
 
3613
 
        if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
 
3688
        spin_lock(&log->l_grant_reserve_lock);
 
3689
        list_for_each_entry(tic, &log->l_reserveq, t_queue)
 
3690
                wake_up(&tic->t_wait);
 
3691
        spin_unlock(&log->l_grant_reserve_lock);
 
3692
 
 
3693
        spin_lock(&log->l_grant_write_lock);
 
3694
        list_for_each_entry(tic, &log->l_writeq, t_queue)
 
3695
                wake_up(&tic->t_wait);
 
3696
        spin_unlock(&log->l_grant_write_lock);
 
3697
 
 
3698
        if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3614
3699
                ASSERT(!logerror);
3615
3700
                /*
3616
3701
                 * Force the incore logs to disk before shutting the
3617
3702
                 * log down completely.
3618
3703
                 */
3619
 
                xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy);
 
3704
                _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
 
3705
 
3620
3706
                spin_lock(&log->l_icloglock);
3621
3707
                retval = xlog_state_ioerror(log);
3622
3708
                spin_unlock(&log->l_icloglock);