302
319
memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
322
* We had to recover the master node, which means there was an
323
* unclean reboot. However, it is possible that the master node
324
* is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
325
* E.g., consider the following chain of events:
327
* 1. UBIFS was cleanly unmounted, so the master node is clean
328
* 2. UBIFS is being mounted R/W and starts changing the master
329
* node in the first (%UBIFS_MST_LNUM). A power cut happens,
330
* so this LEB ends up with some amount of garbage at the
332
* 3. UBIFS is being mounted R/O. We reach this place and
333
* recover the master node from the second LEB
334
* (%UBIFS_MST_LNUM + 1). But we cannot update the media
335
* because we are being mounted R/O. We have to defer the
337
* 4. However, this master node (@c->mst_node) is marked as
338
* clean (since the step 1). And if we just return, the
339
* mount code will be confused and won't recover the master
340
* node when it is re-mounter R/W later.
342
* Thus, to force the recovery by marking the master node as
345
c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
304
347
/* Write the recovered master node */
305
348
c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
563
632
* found, and a negative error code in case of failure.
565
634
struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
566
int offs, void *sbuf, int grouped)
635
int offs, void *sbuf, int jhead)
568
int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
569
int empty_chkd = 0, start = offs;
637
int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
638
int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
570
639
struct ubifs_scan_leb *sleb;
571
640
void *buf = sbuf + offs;
573
dbg_rcvry("%d:%d", lnum, offs);
642
dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
575
644
sleb = ubifs_start_scan(c, lnum, offs, sbuf);
576
645
if (IS_ERR(sleb))
648
ubifs_assert(len >= 8);
582
649
while (len >= 8) {
585
650
dbg_scan("look at LEB %d:%d (%d bytes left)",
586
651
lnum, offs, len);
605
669
offs += node_len;
672
} else if (ret > 0) {
612
673
/* Padding bytes or a valid padding node */
619
if (ret == SCANNED_EMPTY_SPACE) {
620
if (!is_empty(buf, len)) {
621
if (!is_last_write(c, buf, offs))
623
clean_buf(c, &buf, lnum, &offs, &len);
677
} else if (ret == SCANNED_EMPTY_SPACE ||
678
ret == SCANNED_GARBAGE ||
679
ret == SCANNED_A_BAD_PAD_NODE ||
680
ret == SCANNED_A_CORRUPT_NODE) {
681
dbg_rcvry("found corruption - %d", ret);
630
if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
631
if (is_last_write(c, buf, offs)) {
632
clean_buf(c, &buf, lnum, &offs, &len);
638
if (ret == SCANNED_A_CORRUPT_NODE)
639
if (no_more_nodes(c, buf, len, lnum, offs)) {
640
clean_buf(c, &buf, lnum, &offs, &len);
647
/* Redo the last scan but noisily */
653
case SCANNED_GARBAGE:
656
case SCANNED_A_CORRUPT_NODE:
657
case SCANNED_A_BAD_PAD_NODE:
684
dbg_err("unexpected return value %d", ret);
667
if (!empty_chkd && !is_empty(buf, len)) {
668
if (is_last_write(c, buf, offs)) {
669
clean_buf(c, &buf, lnum, &offs, &len);
690
if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
691
if (!is_last_write(c, buf, offs))
692
goto corrupted_rescan;
693
} else if (ret == SCANNED_A_CORRUPT_NODE) {
694
if (!no_more_nodes(c, buf, len, lnum, offs))
695
goto corrupted_rescan;
696
} else if (!is_empty(buf, len)) {
697
if (!is_last_write(c, buf, offs)) {
672
698
int corruption = first_non_ff(buf, len);
701
* See header comment for this file for more
702
* explanations about the reasons we have this check.
674
704
ubifs_err("corrupt empty space LEB %d:%d, corruption "
675
705
"starts at %d", lnum, offs, corruption);
676
706
/* Make sure we dump interesting non-0xFF data */
678
708
buf += corruption;
683
/* Drop nodes from incomplete group */
684
if (grouped && drop_incomplete_group(sleb, &offs)) {
686
len = c->leb_size - offs;
687
clean_buf(c, &buf, lnum, &offs, &len);
691
if (offs % c->min_io_size) {
692
clean_buf(c, &buf, lnum, &offs, &len);
713
min_io_unit = round_down(offs, c->min_io_size);
716
* If nodes are grouped, always drop the incomplete group at
719
drop_last_group(sleb, &offs);
723
* If this LEB belongs to the GC head then while we are in the
724
* middle of the same min. I/O unit keep dropping nodes. So
725
* basically, what we want is to make sure that the last min.
726
* I/O unit where we saw the corruption is dropped completely
727
* with all the uncorrupted nodes which may possibly sit there.
729
* In other words, let's name the min. I/O unit where the
730
* corruption starts B, and the previous min. I/O unit A. The
731
* below code tries to deal with a situation when half of B
732
* contains valid nodes or the end of a valid node, and the
733
* second half of B contains corrupted data or garbage. This
734
* means that UBIFS had been writing to B just before the power
735
* cut happened. I do not know how realistic is this scenario
736
* that half of the min. I/O unit had been written successfully
737
* and the other half not, but this is possible in our 'failure
738
* mode emulation' infrastructure at least.
740
* So what is the problem, why we need to drop those nodes? Why
741
* can't we just clean-up the second half of B by putting a
742
* padding node there? We can, and this works fine with one
743
* exception which was reproduced with power cut emulation
744
* testing and happens extremely rarely.
746
* Imagine the file-system is full, we run GC which starts
747
* moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
748
* the current GC head LEB). The @c->gc_lnum is -1, which means
749
* that GC will retain LEB X and will try to continue. Imagine
750
* that LEB X is currently the dirtiest LEB, and the amount of
751
* used space in LEB Y is exactly the same as amount of free
754
* And a power cut happens when nodes are moved from LEB X to
755
* LEB Y. We are here trying to recover LEB Y which is the GC
756
* head LEB. We find the min. I/O unit B as described above.
757
* Then we clean-up LEB Y by padding min. I/O unit. And later
758
* 'ubifs_rcvry_gc_commit()' function fails, because it cannot
759
* find a dirty LEB which could be GC'd into LEB Y! Even LEB X
760
* does not match because the amount of valid nodes there does
761
* not fit the free space in LEB Y any more! And this is
762
* because of the padding node which we added to LEB Y. The
763
* user-visible effect of this which I once observed and
764
* analysed is that we cannot mount the file-system with
767
* So obviously, to make sure that situation does not happen we
768
* should free min. I/O unit B in LEB Y completely and the last
769
* used min. I/O unit in LEB Y should be A. This is basically
770
* what the below code tries to do.
772
while (offs > min_io_unit)
773
drop_last_node(sleb, &offs);
777
len = c->leb_size - offs;
779
clean_buf(c, &buf, lnum, &offs, &len);
696
780
ubifs_end_scan(c, sleb, lnum, offs);
699
err = fix_unclean_leb(c, sleb, start);
782
err = fix_unclean_leb(c, sleb, start);
789
/* Re-scan the corrupted data with verbose messages */
790
dbg_err("corruptio %d", ret);
791
ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
707
793
ubifs_scanned_corruption(c, lnum, offs, buf);
1111
* grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
1112
* @c: UBIFS file-system description object
1114
* This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
1115
* LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
1116
* zero in case of success and a negative error code in case of failure.
1118
static int grab_empty_leb(struct ubifs_info *c)
1123
* Note, it is very important to first search for an empty LEB and then
1124
* run the commit, not vice-versa. The reason is that there might be
1125
* only one empty LEB at the moment, the one which has been the
1126
* @c->gc_lnum just before the power cut happened. During the regular
1127
* UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
1128
* one but GC can grab it. But at this moment this single empty LEB is
1129
* not marked as taken, so if we run commit - what happens? Right, the
1130
* commit will grab it and write the index there. Remember that the
1131
* index always expands as long as there is free space, and it only
1132
* starts consolidating when we run out of space.
1134
* IOW, if we run commit now, we might not be able to find a free LEB
1137
lnum = ubifs_find_free_leb_for_idx(c);
1139
dbg_err("could not find an empty LEB");
1141
dbg_dump_budg(c, &c->bi);
1145
/* Reset the index flag */
1146
err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1152
dbg_rcvry("found empty LEB %d, run commit", lnum);
1154
return ubifs_run_commit(c);
1029
1158
* ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1030
1159
* @c: UBIFS file-system description object
1048
1177
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1049
1178
struct ubifs_lprops lp;
1181
dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
1052
1183
c->gc_lnum = -1;
1053
if (wbuf->lnum == -1) {
1054
dbg_rcvry("no GC head LEB");
1058
* See whether the used space in the dirtiest LEB fits in the GC head
1061
if (wbuf->offs == c->leb_size) {
1062
dbg_rcvry("no room in GC head LEB");
1184
if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
1185
return grab_empty_leb(c);
1065
1187
err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1068
* There are no dirty or empty LEBs subject to here being
1069
* enough for the index. Try to use
1070
* 'ubifs_find_free_leb_for_idx()', which will return any empty
1071
* LEBs (ignoring index requirements). If the index then
1072
* doesn't have enough LEBs the recovery commit will fail -
1073
* which is the same result anyway i.e. recovery fails. So
1074
* there is no problem ignoring index requirements and just
1075
* grabbing a free LEB since we have already established there
1076
* is not a dirty LEB we could have used instead.
1078
if (err == -ENOSPC) {
1079
dbg_rcvry("could not find a dirty LEB");
1192
dbg_rcvry("could not find a dirty LEB");
1193
return grab_empty_leb(c);
1084
1196
ubifs_assert(!(lp.flags & LPROPS_INDEX));
1086
if (lp.free + lp.dirty == c->leb_size) {
1087
/* An empty LEB was returned */
1088
if (lp.free != c->leb_size) {
1089
err = ubifs_change_one_lp(c, lnum, c->leb_size,
1094
err = ubifs_leb_unmap(c, lnum);
1098
dbg_rcvry("allocated LEB %d for GC", lnum);
1099
/* Run the commit */
1100
dbg_rcvry("committing");
1101
return ubifs_run_commit(c);
1104
* There was no empty LEB so the used space in the dirtiest LEB must fit
1105
* in the GC head LEB.
1107
if (lp.free + lp.dirty < wbuf->offs) {
1108
dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
1109
lnum, wbuf->lnum, wbuf->offs);
1110
err = ubifs_return_leb(c, lnum);
1197
ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
1116
1200
* We run the commit before garbage collection otherwise subsequent
1117
1201
* mounts will see the GC and orphan deletion in a different order.