2
* Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved.
4
* This program is free software; you can redistribute it and/or modify it
5
* under the terms of version 2 of the GNU General Public License as
6
* published by the Free Software Foundation.
8
* This program is distributed in the hope that it would be useful, but
9
* WITHOUT ANY WARRANTY; without even the implied warranty of
10
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
* Further, this software is distributed without any warranty that it is
13
* free of the rightful claim of any third person regarding infringement
14
* or the like. Any license provided herein, whether implied or
15
* otherwise, applies only to this software file. Patent licenses, if
16
* any, provided herein do not apply to combinations of this program with
17
* other software, or any other product whatsoever.
19
* You should have received a copy of the GNU General Public License along
20
* with this program; if not, write the Free Software Foundation, Inc., 59
21
* Temple Place - Suite 330, Boston MA 02111-1307, USA.
23
* Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24
* Mountain View, CA 94043, or:
28
* For further information regarding this notice, see:
30
* http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
36
* This routine finds (to an approximation) the first block in the physical
37
* log which contains the given cycle. It uses a binary search algorithm.
38
* Note that the algorithm can not be perfect because the disk will not
39
* necessarily be perfect.
42
xlog_find_cycle_start(xlog_t *log,
44
xfs_daddr_t first_blk,
45
xfs_daddr_t *last_blk,
52
mid_blk = BLK_AVG(first_blk, *last_blk);
53
while (mid_blk != first_blk && mid_blk != *last_blk) {
54
if ((error = xlog_bread(log, mid_blk, 1, bp)))
56
mid_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
57
if (mid_cycle == cycle) {
59
/* last_half_cycle == mid_cycle */
62
/* first_half_cycle == mid_cycle */
64
mid_blk = BLK_AVG(first_blk, *last_blk);
66
ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
67
(mid_blk == *last_blk && mid_blk-1 == first_blk));
70
} /* xlog_find_cycle_start */
74
* Check that the range of blocks does not contain the cycle number
75
* given. The scan needs to occur from front to back and the ptr into the
76
* region must be updated since a later routine will need to perform another
77
* test. If the region is completely good, we end up returning the same
80
* Return -1 if we encounter no errors. This is an invalid block number
81
* since we don't ever expect logs to get this large.
85
xlog_find_verify_cycle( xlog_t *log,
86
xfs_daddr_t start_blk,
88
uint stop_on_cycle_no)
95
xfs_daddr_t bufblks = nbblks;
97
while (!(bp = xlog_get_bp(bufblks, log->l_mp))) {
98
/* can't get enough memory to do everything in one big buffer */
105
for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
106
int bcount = min(bufblks, (start_blk + nbblks - i));
108
if ((error = xlog_bread(log, i, bcount, bp)))
111
buf = XFS_BUF_PTR(bp);
112
for (j = 0; j < bcount; j++) {
113
cycle = GET_CYCLE(buf, ARCH_CONVERT);
114
if (cycle == stop_on_cycle_no) {
129
} /* xlog_find_verify_cycle */
133
* Potentially backup over partial log record write.
135
* In the typical case, last_blk is the number of the block directly after
136
* a good log record. Therefore, we subtract one to get the block number
137
* of the last block in the given buffer. extra_bblks contains the number
138
* of blocks we would have read on a previous read. This happens when the
139
* last log record is split over the end of the physical log.
141
* extra_bblks is the number of blocks potentially verified on a previous
142
* call to this routine.
146
xlog_find_verify_log_record(xlog_t *log,
147
xfs_daddr_t start_blk,
148
xfs_daddr_t *last_blk,
154
xlog_rec_header_t *head = NULL;
157
int num_blks = *last_blk - start_blk;
159
ASSERT(start_blk != 0 || *last_blk != start_blk);
161
if (!(bp = xlog_get_bp(num_blks, log->l_mp))) {
162
if (!(bp = xlog_get_bp(1, log->l_mp)))
165
buf = XFS_BUF_PTR(bp);
167
if ((error = xlog_bread(log, start_blk, num_blks, bp)))
169
buf = XFS_BUF_PTR(bp) + (num_blks - 1) * BBSIZE;
173
for (i=(*last_blk)-1; i>=0; i--) {
175
/* legal log record not found */
176
xlog_warn("XFS: Log inconsistent (didn't find previous header)");
180
error = XFS_ERROR(EIO);
184
if (smallmem && (error = xlog_bread(log, i, 1, bp)))
186
head = (xlog_rec_header_t*)buf;
188
if (INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM)
196
* We hit the beginning of the physical log & still no header. Return
197
* to caller. If caller can handle a return of -1, then this routine
198
* will be called again for the end of the physical log.
205
/* we have the final block of the good log (the first block
206
* of the log record _before_ the head. So we check the uuid.
209
if ((error = xlog_header_check_mount(log->l_mp, head)))
213
* We may have found a log record header before we expected one.
214
* last_blk will be the 1st block # with a given cycle #. We may end
215
* up reading an entire log record. In this case, we don't want to
216
* reset last_blk. Only when last_blk points in the middle of a log
217
* record do we update last_blk.
219
if (*last_blk - i + extra_bblks
220
!= BTOBB(INT_GET(head->h_len, ARCH_CONVERT))+1)
227
} /* xlog_find_verify_log_record */
230
* Head is defined to be the point of the log where the next log write
231
* write could go. This means that incomplete LR writes at the end are
232
* eliminated when calculating the head. We aren't guaranteed that previous
233
* LR have complete transactions. We only know that a cycle number of
234
* current cycle number -1 won't be present in the log if we start writing
235
* from our current block number.
237
* last_blk contains the block number of the first block with a given
240
* Also called from xfs_log_print.c
242
* Return: zero if normal, non-zero if error.
245
xlog_find_head(xlog_t *log,
246
xfs_daddr_t *return_head_blk)
249
xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
251
uint first_half_cycle, last_half_cycle;
253
int error, log_bbnum = log->l_logBBsize;
255
/* Is the end of the log device zeroed? */
256
if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
257
*return_head_blk = first_blk;
259
/* is the whole lot zeroed? */
261
/* Linux XFS shouldn't generate totally zeroed logs -
262
* mkfs etc write a dummy unmount record to a fresh
263
* log so we can store the uuid in there
265
xlog_warn("XFS: totally zeroed log\n");
270
xlog_warn("XFS: empty log check failed");
274
first_blk = 0; /* get cycle # of 1st block */
275
bp = xlog_get_bp(1,log->l_mp);
278
if ((error = xlog_bread(log, 0, 1, bp)))
280
first_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
282
last_blk = head_blk = log_bbnum-1; /* get cycle # of last block */
283
if ((error = xlog_bread(log, last_blk, 1, bp)))
285
last_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
286
ASSERT(last_half_cycle != 0);
289
* If the 1st half cycle number is equal to the last half cycle number,
290
* then the entire log is stamped with the same cycle number. In this
291
* case, head_blk can't be set to zero (which makes sense). The below
292
* math doesn't work out properly with head_blk equal to zero. Instead,
293
* we set it to log_bbnum which is an illegal block number, but this
294
* value makes the math correct. If head_blk doesn't changed through
295
* all the tests below, *head_blk is set to zero at the very end rather
296
* than log_bbnum. In a sense, log_bbnum and zero are the same block
297
* in a circular file.
299
if (first_half_cycle == last_half_cycle) {
301
* In this case we believe that the entire log should have cycle
302
* number last_half_cycle. We need to scan backwards from the
303
* end verifying that there are no holes still containing
304
* last_half_cycle - 1. If we find such a hole, then the start
305
* of that hole will be the new head. The simple case looks like
306
* x | x ... | x - 1 | x
307
* Another case that fits this picture would be
308
* x | x + 1 | x ... | x
309
* In this case the head really is somwhere at the end of the
310
* log, as one of the latest writes at the beginning was incomplete.
312
* x | x + 1 | x ... | x - 1 | x
313
* This is really the combination of the above two cases, and the
314
* head has to end up at the start of the x-1 hole at the end of
317
* In the 256k log case, we will read from the beginning to the
318
* end of the log and search for cycle numbers equal to x-1. We
319
* don't worry about the x+1 blocks that we encounter, because
320
* we know that they cannot be the head since the log started with
323
head_blk = log_bbnum;
324
stop_on_cycle = last_half_cycle - 1;
327
* In this case we want to find the first block with cycle number
328
* matching last_half_cycle. We expect the log to be some
331
* The first block with cycle number x (last_half_cycle) will be
332
* where the new head belongs. First we do a binary search for
333
* the first occurrence of last_half_cycle. The binary search
334
* may not be totally accurate, so then we scan back from there
335
* looking for occurrences of last_half_cycle before us. If
336
* that backwards scan wraps around the beginning of the log,
337
* then we look for occurrences of last_half_cycle - 1 at the
338
* end of the log. The cases we're looking for look like
339
* x + 1 ... | x | x + 1 | x ...
340
* ^ binary search stopped here
342
* x + 1 ... | x ... | x - 1 | x
343
* <---------> less than scan distance
345
stop_on_cycle = last_half_cycle;
346
if ((error = xlog_find_cycle_start(log, bp, first_blk,
347
&head_blk, last_half_cycle)))
352
* Now validate the answer. Scan back some number of maximum possible
353
* blocks and make sure each one has the expected cycle number. The
354
* maximum is determined by the total possible amount of buffering
355
* in the in-core log. The following number can be made tighter if
356
* we actually look at the block size of the filesystem.
358
num_scan_bblks = BTOBB(XLOG_MAX_ICLOGS<<XLOG_MAX_RECORD_BSHIFT);
359
if (head_blk >= num_scan_bblks) {
361
* We are guaranteed that the entire check can be performed
364
start_blk = head_blk - num_scan_bblks;
365
new_blk = xlog_find_verify_cycle(log, start_blk, num_scan_bblks,
369
} else { /* need to read 2 parts of log */
371
* We are going to scan backwards in the log in two parts. First
372
* we scan the physical end of the log. In this part of the log,
373
* we are looking for blocks with cycle number last_half_cycle - 1.
374
* If we find one, then we know that the log starts there, as we've
375
* found a hole that didn't get written in going around the end
376
* of the physical log. The simple case for this is
377
* x + 1 ... | x ... | x - 1 | x
378
* <---------> less than scan distance
379
* If all of the blocks at the end of the log have cycle number
380
* last_half_cycle, then we check the blocks at the start of the
381
* log looking for occurrences of last_half_cycle. If we find one,
382
* then our current estimate for the location of the first
383
* occurrence of last_half_cycle is wrong and we move back to the
384
* hole we've found. This case looks like
385
* x + 1 ... | x | x + 1 | x ...
386
* ^ binary search stopped here
387
* Another case we need to handle that only occurs in 256k logs is
388
* x + 1 ... | x ... | x+1 | x ...
389
* ^ binary search stops here
390
* In a 256k log, the scan at the end of the log will see the x+1
391
* blocks. We need to skip past those since that is certainly not
392
* the head of the log. By searching for last_half_cycle-1 we
395
start_blk = log_bbnum - num_scan_bblks + head_blk;
396
ASSERT(head_blk <= INT_MAX && (xfs_daddr_t) num_scan_bblks-head_blk >= 0);
397
new_blk= xlog_find_verify_cycle(log, start_blk,
398
num_scan_bblks-(int)head_blk, (stop_on_cycle - 1));
405
* Scan beginning of log now. The last part of the physical log
406
* is good. This scan needs to verify that it doesn't find the
410
ASSERT(head_blk <= INT_MAX);
411
new_blk = xlog_find_verify_cycle(log, start_blk, (int) head_blk,
419
* Now we need to make sure head_blk is not pointing to a block in
420
* the middle of a log record.
422
num_scan_bblks = BTOBB(XLOG_MAX_RECORD_BSIZE);
423
if (head_blk >= num_scan_bblks) {
424
start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
426
/* start ptr at last block ptr before head_blk */
427
if ((error = xlog_find_verify_log_record(log,
431
error = XFS_ERROR(EIO);
437
ASSERT(head_blk <= INT_MAX);
438
if ((error = xlog_find_verify_log_record(log,
442
/* We hit the beginning of the log during our search */
443
start_blk = log_bbnum - num_scan_bblks + head_blk;
445
ASSERT(start_blk <= INT_MAX && (xfs_daddr_t) log_bbnum-start_blk >= 0);
446
ASSERT(head_blk <= INT_MAX);
447
if ((error = xlog_find_verify_log_record(log,
450
(int)head_blk)) == -1) {
451
error = XFS_ERROR(EIO);
455
if (new_blk != log_bbnum)
462
if (head_blk == log_bbnum)
463
*return_head_blk = 0;
465
*return_head_blk = head_blk;
467
* When returning here, we have a good block number. Bad block
468
* means that during a previous crash, we didn't have a clean break
469
* from cycle number N to cycle number N-1. In this case, we need
470
* to find the first block with cycle number N-1.
478
xlog_warn("XFS: failed to find log head");
481
} /* xlog_find_head */
484
* Find the sync block number or the tail of the log.
486
* This will be the block number of the last record to have its
487
* associated buffers synced to disk. Every log record header has
488
* a sync lsn embedded in it. LSNs hold block numbers, so it is easy
489
* to get a sync block number. The only concern is to figure out which
490
* log record header to believe.
492
* The following algorithm uses the log record header with the largest
493
* lsn. The entire log record does not need to be valid. We only care
494
* that the header is valid.
496
* We could speed up search by using current head_blk buffer, but it is not
500
xlog_find_tail(xlog_t *log,
501
xfs_daddr_t *head_blk,
502
xfs_daddr_t *tail_blk,
505
xlog_rec_header_t *rhead;
506
xlog_op_header_t *op_head;
509
xfs_daddr_t umount_data_blk;
510
xfs_daddr_t after_umount_blk;
516
* Find previous log record
518
if ((error = xlog_find_head(log, head_blk)))
521
bp = xlog_get_bp(1,log->l_mp);
524
if (*head_blk == 0) { /* special case */
525
if ((error = xlog_bread(log, 0, 1, bp)))
527
if (GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT) == 0) {
529
/* leave all other log inited values alone */
535
* Search backwards looking for log record header block
537
ASSERT(*head_blk < INT_MAX);
538
for (i=(int)(*head_blk)-1; i>=0; i--) {
539
if ((error = xlog_bread(log, i, 1, bp)))
541
if (INT_GET(*(uint *)(XFS_BUF_PTR(bp)), ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) {
547
* If we haven't found the log record header block, start looking
548
* again from the end of the physical log. XXXmiken: There should be
549
* a check here to make sure we didn't search more than N blocks in
553
for (i=log->l_logBBsize-1; i>=(int)(*head_blk); i--) {
554
if ((error = xlog_bread(log, i, 1, bp)))
556
if (INT_GET(*(uint*)(XFS_BUF_PTR(bp)), ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) {
563
xlog_warn("XFS: xlog_find_tail: couldn't find sync record");
565
return XFS_ERROR(EIO);
568
/* find blk_no of tail of log */
569
rhead = (xlog_rec_header_t *)XFS_BUF_PTR(bp);
570
*tail_blk = BLOCK_LSN(rhead->h_tail_lsn, ARCH_CONVERT);
573
* Reset log values according to the state of the log when we
574
* crashed. In the case where head_blk == 0, we bump curr_cycle
575
* one because the next write starts a new cycle rather than
576
* continuing the cycle of the last good log record. At this
577
* point we have guaranteed that all partial log records have been
578
* accounted for. Therefore, we know that the last good log record
579
* written was complete and ended exactly on the end boundary
580
* of the physical log.
582
log->l_prev_block = i;
583
log->l_curr_block = (int)*head_blk;
584
log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT);
587
log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT);
588
log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT);
589
log->l_grant_reserve_cycle = log->l_curr_cycle;
590
log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
591
log->l_grant_write_cycle = log->l_curr_cycle;
592
log->l_grant_write_bytes = BBTOB(log->l_curr_block);
595
* Look for unmount record. If we find it, then we know there
596
* was a clean unmount. Since 'i' could be the last block in
597
* the physical log, we convert to a log block before comparing
600
* Save the current tail lsn to use to pass to
601
* xlog_clear_stale_blocks() below. We won't want to clear the
602
* unmount record if there is one, so we pass the lsn of the
603
* unmount record rather than the block after it.
605
after_umount_blk = (i + 2) % log->l_logBBsize;
606
tail_lsn = log->l_tail_lsn;
607
if (*head_blk == after_umount_blk && INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) {
608
umount_data_blk = (i + 1) % log->l_logBBsize;
609
if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
612
op_head = (xlog_op_header_t *)XFS_BUF_PTR(bp);
613
if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
615
* Set tail and last sync so that newly written
616
* log records will point recovery to after the
617
* current unmount record.
619
ASSIGN_ANY_LSN(log->l_tail_lsn, log->l_curr_cycle,
620
after_umount_blk, ARCH_NOCONVERT);
621
ASSIGN_ANY_LSN(log->l_last_sync_lsn, log->l_curr_cycle,
622
after_umount_blk, ARCH_NOCONVERT);
623
*tail_blk = after_umount_blk;
629
* Make sure that there are no blocks in front of the head
630
* with the same cycle number as the head. This can happen
631
* because we allow multiple outstanding log writes concurrently,
632
* and the later writes might make it out before earlier ones.
634
* We use the lsn from before modifying it so that we'll never
635
* overwrite the unmount record after a clean unmount.
637
* Do this only if we are going to recover the filesystem
640
error = xlog_clear_stale_blocks(log, tail_lsn);
648
xlog_warn("XFS: failed to locate log tail");
651
} /* xlog_find_tail */
655
* Is the log zeroed at all?
657
* The last binary search should be changed to perform an X block read
658
* once X becomes small enough. You can then search linearly through
659
* the X blocks. This will cut down on the number of reads we need to do.
661
* If the log is partially zeroed, this routine will pass back the blkno
662
* of the first block with cycle number 0. It won't have a complete LR
666
* 0 => the log is completely written to
667
* -1 => use *blk_no as the first block of the log
668
* >0 => error has occurred
671
xlog_find_zeroed(struct log *log,
675
uint first_cycle, last_cycle;
676
xfs_daddr_t new_blk, last_blk, start_blk;
677
xfs_daddr_t num_scan_bblks;
678
int error, log_bbnum = log->l_logBBsize;
681
/* check totally zeroed log */
682
bp = xlog_get_bp(1,log->l_mp);
685
if ((error = xlog_bread(log, 0, 1, bp)))
687
first_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
688
if (first_cycle == 0) { /* completely zeroed log */
694
/* check partially zeroed log */
695
if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
697
last_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
698
if (last_cycle != 0) { /* log completely written to */
701
} else if (first_cycle != 1) {
703
* If the cycle of the last block is zero, the cycle of
704
* the first block must be 1. If it's not, maybe we're
705
* not looking at a log... Bail out.
707
xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)");
708
return XFS_ERROR(EINVAL);
711
/* we have a partially zeroed log */
712
last_blk = log_bbnum-1;
713
if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
717
* Validate the answer. Because there is no way to guarantee that
718
* the entire log is made up of log records which are the same size,
719
* we scan over the defined maximum blocks. At this point, the maximum
720
* is not chosen to mean anything special. XXXmiken
722
num_scan_bblks = BTOBB(XLOG_MAX_ICLOGS<<XLOG_MAX_RECORD_BSHIFT);
723
ASSERT(num_scan_bblks <= INT_MAX);
725
if (last_blk < num_scan_bblks)
726
num_scan_bblks = last_blk;
727
start_blk = last_blk - num_scan_bblks;
730
* We search for any instances of cycle number 0 that occur before
731
* our current estimate of the head. What we're trying to detect is
732
* 1 ... | 0 | 1 | 0...
733
* ^ binary search ends here
735
new_blk = xlog_find_verify_cycle(log, start_blk,
736
(int)num_scan_bblks, 0);
741
* Potentially backup over partial log record write. We don't need
742
* to search the end of the log because we know it is zero.
744
if ((error = xlog_find_verify_log_record(log, start_blk,
754
} /* xlog_find_zeroed */
756
/* stuff for transactional view */
758
xlog_unpack_data(xlog_rec_header_t *rhead,
763
#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
764
uint *up = (uint *)dp;
768
for (i=0; i<BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) {
769
INT_SET(*(uint *)dp, ARCH_CONVERT, INT_GET(*(uint *)&rhead->h_cycle_data[i], ARCH_CONVERT));
772
#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
773
/* divide length by 4 to get # words */
774
for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) {
775
chksum ^= INT_GET(*up, ARCH_CONVERT);
778
if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) {
779
if (!INT_ISZERO(rhead->h_chksum, ARCH_CONVERT) ||
780
((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
782
"XFS: LogR chksum mismatch: was (0x%x) is (0x%x)",
783
INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum);
785
"XFS: Disregard message if filesystem was created with non-DEBUG kernel");
786
log->l_flags |= XLOG_CHKSUM_MISMATCH;
789
#endif /* DEBUG && XFS_LOUD_RECOVERY */
790
} /* xlog_unpack_data */
793
STATIC xlog_recover_t *
794
xlog_recover_find_tid(xlog_recover_t *q,
797
xlog_recover_t *p = q;
800
if (p->r_log_tid == tid)
805
} /* xlog_recover_find_tid */
808
xlog_recover_put_hashq(xlog_recover_t **q,
809
xlog_recover_t *trans)
813
} /* xlog_recover_put_hashq */
816
xlog_recover_new_tid(xlog_recover_t **q,
820
xlog_recover_t *trans;
822
trans = kmem_zalloc(sizeof(xlog_recover_t), 0);
823
trans->r_log_tid = tid;
825
xlog_recover_put_hashq(q, trans);
826
} /* xlog_recover_new_tid */
830
xlog_recover_unlink_tid(xlog_recover_t **q,
831
xlog_recover_t *trans)
842
if (tp->r_next == trans) {
850
"XFS: xlog_recover_unlink_tid: trans not found");
852
return XFS_ERROR(EIO);
854
tp->r_next = tp->r_next->r_next;
857
} /* xlog_recover_unlink_tid */
860
* Free up any resources allocated by the transaction
862
* Remember that EFIs, EFDs, and IUNLINKs are handled later.
865
xlog_recover_free_trans(xlog_recover_t *trans)
867
xlog_recover_item_t *first_item, *item, *free_item;
870
item = first_item = trans->r_itemq;
873
item = item->ri_next;
874
/* Free the regions in the item. */
875
for (i = 0; i < free_item->ri_cnt; i++) {
876
kmem_free(free_item->ri_buf[i].i_addr,
877
free_item->ri_buf[i].i_len);
879
/* Free the item itself */
880
kmem_free(free_item->ri_buf,
881
(free_item->ri_total * sizeof(xfs_log_iovec_t)));
882
kmem_free(free_item, sizeof(xlog_recover_item_t));
883
} while (first_item != item);
884
/* Free the transaction recover structure */
885
kmem_free(trans, sizeof(xlog_recover_t));
886
} /* xlog_recover_free_trans */
890
xlog_recover_commit_trans(xlog_t *log,
892
xlog_recover_t *trans,
897
if ((error = xlog_recover_unlink_tid(q, trans)))
899
if ((error = xlog_recover_do_trans(log, trans, pass)))
901
xlog_recover_free_trans(trans); /* no error */
903
} /* xlog_recover_commit_trans */
906
xlog_recover_insert_item_backq(xlog_recover_item_t **q,
907
xlog_recover_item_t *item)
910
item->ri_prev = item->ri_next = item;
914
item->ri_prev = (*q)->ri_prev;
915
(*q)->ri_prev = item;
916
item->ri_prev->ri_next = item;
918
} /* xlog_recover_insert_item_backq */
921
xlog_recover_add_item(xlog_recover_item_t **itemq)
923
xlog_recover_item_t *item;
925
item = kmem_zalloc(sizeof(xlog_recover_item_t), 0);
926
xlog_recover_insert_item_backq(itemq, item);
927
} /* xlog_recover_add_item */
929
/* The next region to add is the start of a new region. It could be
930
* a whole region or it could be the first part of a new region. Because
931
* of this, the assumption here is that the type and size fields of all
932
* format structures fit into the first 32 bits of the structure.
934
* This works because all regions must be 32 bit aligned. Therefore, we
935
* either have both fields or we have neither field. In the case we have
936
* neither field, the data part of the region is zero length. We only have
937
* a log_op_header and can throw away the header since a new one will appear
938
* later. If we have at least 4 bytes, then we can determine how many regions
939
* will appear in the current log item.
942
xlog_recover_add_to_trans(xlog_recover_t *trans,
946
xfs_inode_log_format_t *in_f; /* any will do */
947
xlog_recover_item_t *item;
952
ptr = kmem_zalloc(len, 0);
955
in_f = (xfs_inode_log_format_t *)ptr;
956
item = trans->r_itemq;
958
ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
959
if (len == sizeof(xfs_trans_header_t))
960
xlog_recover_add_item(&trans->r_itemq);
961
bcopy(dp, &trans->r_theader, len); /* s, d, l */
964
if (item->ri_prev->ri_total != 0 &&
965
item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
966
xlog_recover_add_item(&trans->r_itemq);
968
item = trans->r_itemq;
969
item = item->ri_prev;
971
if (item->ri_total == 0) { /* first region to be added */
972
item->ri_total = in_f->ilf_size;
973
ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
974
item->ri_buf = kmem_zalloc((item->ri_total *
975
sizeof(xfs_log_iovec_t)), 0);
977
ASSERT(item->ri_total > item->ri_cnt);
978
/* Description region is ri_buf[0] */
979
item->ri_buf[item->ri_cnt].i_addr = ptr;
980
item->ri_buf[item->ri_cnt].i_len = len;
983
} /* xlog_recover_add_to_trans */
986
xlog_recover_add_to_cont_trans(xlog_recover_t *trans,
990
xlog_recover_item_t *item;
991
xfs_caddr_t ptr, old_ptr;
994
item = trans->r_itemq;
996
/* finish copying rest of trans header */
997
xlog_recover_add_item(&trans->r_itemq);
998
ptr = (xfs_caddr_t)&trans->r_theader+sizeof(xfs_trans_header_t)-len;
999
bcopy(dp, ptr, len); /* s, d, l */
1002
item = item->ri_prev;
1004
old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1005
old_len = item->ri_buf[item->ri_cnt-1].i_len;
1007
ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0);
1008
bcopy(dp , &ptr[old_len], len); /* s, d, l */
1009
item->ri_buf[item->ri_cnt-1].i_len += len;
1010
item->ri_buf[item->ri_cnt-1].i_addr = ptr;
1012
} /* xlog_recover_add_to_cont_trans */
1015
xlog_recover_unmount_trans(xlog_recover_t *trans)
1017
/* Do nothing now */
1018
xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR");
1020
} /* xlog_recover_unmount_trans */
1024
xlog_recover_process_data(xlog_t *log,
1025
xlog_recover_t *rhash[],
1026
xlog_rec_header_t *rhead,
1030
xfs_caddr_t lp = dp+INT_GET(rhead->h_len, ARCH_CONVERT);
1031
int num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT);
1032
xlog_op_header_t *ohead;
1033
xlog_recover_t *trans;
1039
/* check the log format matches our own - else we can't recover */
1040
if (xlog_header_check_recover(log->l_mp, rhead))
1041
return (XFS_ERROR(EIO));
1044
ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
1045
ohead = (xlog_op_header_t *)dp;
1046
dp += sizeof(xlog_op_header_t);
1047
if (ohead->oh_clientid != XFS_TRANSACTION &&
1048
ohead->oh_clientid != XFS_LOG) {
1049
xlog_warn("XFS: xlog_recover_process_data: bad clientid");
1051
return (XFS_ERROR(EIO));
1053
tid = INT_GET(ohead->oh_tid, ARCH_CONVERT);
1054
hash = XLOG_RHASH(tid);
1055
trans = xlog_recover_find_tid(rhash[hash], tid);
1056
if (trans == NULL) { /* not found; add new tid */
1057
if (ohead->oh_flags & XLOG_START_TRANS)
1058
xlog_recover_new_tid(&rhash[hash], tid, INT_GET(rhead->h_lsn, ARCH_CONVERT));
1060
ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp);
1061
flags = ohead->oh_flags & ~XLOG_END_TRANS;
1062
if (flags & XLOG_WAS_CONT_TRANS)
1063
flags &= ~XLOG_CONTINUE_TRANS;
1065
case XLOG_COMMIT_TRANS: {
1066
error = xlog_recover_commit_trans(log, &rhash[hash],
1070
case XLOG_UNMOUNT_TRANS: {
1071
error = xlog_recover_unmount_trans(trans);
1074
case XLOG_WAS_CONT_TRANS: {
1075
error = xlog_recover_add_to_cont_trans(trans, dp,
1076
INT_GET(ohead->oh_len, ARCH_CONVERT));
1079
case XLOG_START_TRANS : {
1080
xlog_warn("XFS: xlog_recover_process_data: bad transaction");
1082
error = XFS_ERROR(EIO);
1086
case XLOG_CONTINUE_TRANS: {
1087
error = xlog_recover_add_to_trans(trans, dp,
1088
INT_GET(ohead->oh_len, ARCH_CONVERT));
1092
xlog_warn("XFS: xlog_recover_process_data: bad flag");
1094
error = XFS_ERROR(EIO);
1101
dp += INT_GET(ohead->oh_len, ARCH_CONVERT);
1105
} /* xlog_recover_process_data */
1108
* Read the log from tail to head and process the log records found.
1109
* Handle the two cases where the tail and head are in the same cycle
1110
* and where the active portion of the log wraps around the end of
1111
* the physical log separately. The pass parameter is passed through
1112
* to the routines called to process the data and is not looked at
1116
xlog_do_recovery_pass(xlog_t *log,
1117
xfs_daddr_t head_blk,
1118
xfs_daddr_t tail_blk,
1121
xlog_rec_header_t *rhead;
1123
xfs_caddr_t bufaddr;
1124
xfs_buf_t *hbp, *dbp;
1126
int bblks, split_bblks;
1127
xlog_recover_t *rhash[XLOG_RHASH_SIZE];
1130
hbp = xlog_get_bp(1,log->l_mp);
1133
dbp = xlog_get_bp(BTOBB(XLOG_MAX_RECORD_BSIZE),log->l_mp);
1138
bzero(rhash, sizeof(rhash));
1139
if (tail_blk <= head_blk) {
1140
for (blk_no = tail_blk; blk_no < head_blk; ) {
1141
if ((error = xlog_bread(log, blk_no, 1, hbp)))
1143
rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1144
ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
1145
ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
1146
bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); /* blocks in data section */
1148
if ((error = xlog_bread(log, blk_no+1, bblks, dbp)))
1150
xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
1151
if ((error = xlog_recover_process_data(log, rhash,
1152
rhead, XFS_BUF_PTR(dbp),
1156
blk_no += (bblks+1);
1160
* Perform recovery around the end of the physical log. When the head
1161
* is not on the same cycle number as the tail, we can't do a sequential
1162
* recovery as above.
1165
while (blk_no < log->l_logBBsize) {
1167
/* Read header of one block */
1168
if ((error = xlog_bread(log, blk_no, 1, hbp)))
1170
rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1171
ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
1172
ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
1173
bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
1175
/* LR body must have data or it wouldn't have been written */
1177
blk_no++; /* successfully read header */
1178
ASSERT(blk_no <= log->l_logBBsize);
1180
if ((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) ||
1181
(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) ||
1183
(blk_no > log->l_logBBsize)) {
1184
error = EFSCORRUPTED;
1188
/* Read in data for log record */
1189
if (blk_no+bblks <= log->l_logBBsize) {
1190
if ((error = xlog_bread(log, blk_no, bblks, dbp)))
1193
/* This log record is split across physical end of log */
1195
if (blk_no != log->l_logBBsize) {
1197
/* some data is before physical end of log */
1198
ASSERT(blk_no <= INT_MAX);
1199
split_bblks = log->l_logBBsize - (int)blk_no;
1200
ASSERT(split_bblks > 0);
1201
if ((error = xlog_bread(log, blk_no, split_bblks, dbp)))
1204
bufaddr = XFS_BUF_PTR(dbp);
1205
XFS_BUF_SET_PTR(dbp, bufaddr + BBTOB(split_bblks),
1206
BBTOB(bblks - split_bblks));
1207
if ((error = xlog_bread(log, 0, bblks - split_bblks, dbp)))
1209
XFS_BUF_SET_PTR(dbp, bufaddr, XLOG_MAX_RECORD_BSIZE);
1211
xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
1212
if ((error = xlog_recover_process_data(log, rhash,
1213
rhead, XFS_BUF_PTR(dbp),
1219
ASSERT(blk_no >= log->l_logBBsize);
1220
blk_no -= log->l_logBBsize;
1222
/* read first part of physical log */
1223
while (blk_no < head_blk) {
1224
if ((error = xlog_bread(log, blk_no, 1, hbp)))
1226
rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1227
ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
1228
ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
1229
bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
1231
if ((error = xlog_bread(log, blk_no+1, bblks, dbp)))
1233
xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
1234
if ((error = xlog_recover_process_data(log, rhash,
1235
rhead, XFS_BUF_PTR(dbp),
1238
blk_no += (bblks+1);