584
583
if (ch->group_type != UBIFS_IN_NODE_GROUP)
586
dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs);
588
list_del(&snod->list);
590
sleb->nodes_cnt -= 1;
586
dbg_rcvry("dropping grouped node at %d:%d",
587
sleb->lnum, snod->offs);
589
list_del(&snod->list);
591
sleb->nodes_cnt -= 1;
596
* drop_last_node - drop the last node.
597
* @sleb: scanned LEB information
598
* @offs: offset of dropped nodes is returned here
599
* @grouped: non-zero if whole group of nodes have to be dropped
601
* This is a helper function for 'ubifs_recover_leb()' which drops the last
602
* node of the scanned LEB.
604
static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
606
struct ubifs_scan_node *snod;
608
if (!list_empty(&sleb->nodes)) {
609
snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
612
dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs);
614
list_del(&snod->list);
616
sleb->nodes_cnt -= 1;
607
632
* found, and a negative error code in case of failure.
609
634
struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
610
int offs, void *sbuf, int grouped)
635
int offs, void *sbuf, int jhead)
612
int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
613
int empty_chkd = 0, start = offs;
637
int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
638
int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
614
639
struct ubifs_scan_leb *sleb;
615
640
void *buf = sbuf + offs;
617
dbg_rcvry("%d:%d", lnum, offs);
642
dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
619
644
sleb = ubifs_start_scan(c, lnum, offs, sbuf);
620
645
if (IS_ERR(sleb))
648
ubifs_assert(len >= 8);
626
649
while (len >= 8) {
629
650
dbg_scan("look at LEB %d:%d (%d bytes left)",
630
651
lnum, offs, len);
649
669
offs += node_len;
672
} else if (ret > 0) {
656
673
/* Padding bytes or a valid padding node */
663
if (ret == SCANNED_EMPTY_SPACE) {
664
if (!is_empty(buf, len)) {
665
if (!is_last_write(c, buf, offs))
667
clean_buf(c, &buf, lnum, &offs, &len);
677
} else if (ret == SCANNED_EMPTY_SPACE ||
678
ret == SCANNED_GARBAGE ||
679
ret == SCANNED_A_BAD_PAD_NODE ||
680
ret == SCANNED_A_CORRUPT_NODE) {
681
dbg_rcvry("found corruption - %d", ret);
674
if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
675
if (is_last_write(c, buf, offs)) {
676
clean_buf(c, &buf, lnum, &offs, &len);
682
if (ret == SCANNED_A_CORRUPT_NODE)
683
if (no_more_nodes(c, buf, len, lnum, offs)) {
684
clean_buf(c, &buf, lnum, &offs, &len);
691
/* Redo the last scan but noisily */
697
case SCANNED_GARBAGE:
700
case SCANNED_A_CORRUPT_NODE:
701
case SCANNED_A_BAD_PAD_NODE:
684
dbg_err("unexpected return value %d", ret);
711
if (!empty_chkd && !is_empty(buf, len)) {
712
if (is_last_write(c, buf, offs)) {
713
clean_buf(c, &buf, lnum, &offs, &len);
690
if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
691
if (!is_last_write(c, buf, offs))
692
goto corrupted_rescan;
693
} else if (ret == SCANNED_A_CORRUPT_NODE) {
694
if (!no_more_nodes(c, buf, len, lnum, offs))
695
goto corrupted_rescan;
696
} else if (!is_empty(buf, len)) {
697
if (!is_last_write(c, buf, offs)) {
716
698
int corruption = first_non_ff(buf, len);
731
/* Drop nodes from incomplete group */
732
if (grouped && drop_incomplete_group(sleb, &offs)) {
734
len = c->leb_size - offs;
735
clean_buf(c, &buf, lnum, &offs, &len);
739
if (offs % c->min_io_size) {
740
clean_buf(c, &buf, lnum, &offs, &len);
713
min_io_unit = round_down(offs, c->min_io_size);
716
* If nodes are grouped, always drop the incomplete group at
719
drop_last_group(sleb, &offs);
723
* If this LEB belongs to the GC head then while we are in the
724
* middle of the same min. I/O unit keep dropping nodes. So
725
* basically, what we want is to make sure that the last min.
726
* I/O unit where we saw the corruption is dropped completely
727
* with all the uncorrupted nodes which may possibly sit there.
729
* In other words, let's name the min. I/O unit where the
730
* corruption starts B, and the previous min. I/O unit A. The
731
* below code tries to deal with a situation when half of B
732
* contains valid nodes or the end of a valid node, and the
733
* second half of B contains corrupted data or garbage. This
734
* means that UBIFS had been writing to B just before the power
735
* cut happened. I do not know how realistic is this scenario
736
* that half of the min. I/O unit had been written successfully
737
* and the other half not, but this is possible in our 'failure
738
* mode emulation' infrastructure at least.
740
* So what is the problem, why we need to drop those nodes? Why
741
* can't we just clean-up the second half of B by putting a
742
* padding node there? We can, and this works fine with one
743
* exception which was reproduced with power cut emulation
744
* testing and happens extremely rarely.
746
* Imagine the file-system is full, we run GC which starts
747
* moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
748
* the current GC head LEB). The @c->gc_lnum is -1, which means
749
* that GC will retain LEB X and will try to continue. Imagine
750
* that LEB X is currently the dirtiest LEB, and the amount of
751
* used space in LEB Y is exactly the same as amount of free
754
* And a power cut happens when nodes are moved from LEB X to
755
* LEB Y. We are here trying to recover LEB Y which is the GC
756
* head LEB. We find the min. I/O unit B as described above.
757
* Then we clean-up LEB Y by padding min. I/O unit. And later
758
* 'ubifs_rcvry_gc_commit()' function fails, because it cannot
759
* find a dirty LEB which could be GC'd into LEB Y! Even LEB X
760
* does not match because the amount of valid nodes there does
761
* not fit the free space in LEB Y any more! And this is
762
* because of the padding node which we added to LEB Y. The
763
* user-visible effect of this which I once observed and
764
* analysed is that we cannot mount the file-system with
767
* So obviously, to make sure that situation does not happen we
768
* should free min. I/O unit B in LEB Y completely and the last
769
* used min. I/O unit in LEB Y should be A. This is basically
770
* what the below code tries to do.
772
while (offs > min_io_unit)
773
drop_last_node(sleb, &offs);
777
len = c->leb_size - offs;
779
clean_buf(c, &buf, lnum, &offs, &len);
744
780
ubifs_end_scan(c, sleb, lnum, offs);
747
err = fix_unclean_leb(c, sleb, start);
782
err = fix_unclean_leb(c, sleb, start);
789
/* Re-scan the corrupted data with verbose messages */
790
dbg_err("corruptio %d", ret);
791
ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
755
793
ubifs_scanned_corruption(c, lnum, offs, buf);
1111
* grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
1112
* @c: UBIFS file-system description object
1114
* This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
1115
* LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
1116
* zero in case of success and a negative error code in case of failure.
1118
static int grab_empty_leb(struct ubifs_info *c)
1123
* Note, it is very important to first search for an empty LEB and then
1124
* run the commit, not vice-versa. The reason is that there might be
1125
* only one empty LEB at the moment, the one which has been the
1126
* @c->gc_lnum just before the power cut happened. During the regular
1127
* UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
1128
* one but GC can grab it. But at this moment this single empty LEB is
1129
* not marked as taken, so if we run commit - what happens? Right, the
1130
* commit will grab it and write the index there. Remember that the
1131
* index always expands as long as there is free space, and it only
1132
* starts consolidating when we run out of space.
1134
* IOW, if we run commit now, we might not be able to find a free LEB
1137
lnum = ubifs_find_free_leb_for_idx(c);
1139
dbg_err("could not find an empty LEB");
1141
dbg_dump_budg(c, &c->bi);
1145
/* Reset the index flag */
1146
err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1152
dbg_rcvry("found empty LEB %d, run commit", lnum);
1154
return ubifs_run_commit(c);
1073
1158
* ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1074
1159
* @c: UBIFS file-system description object
1092
1177
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1093
1178
struct ubifs_lprops lp;
1181
dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
1096
1183
c->gc_lnum = -1;
1097
if (wbuf->lnum == -1) {
1098
dbg_rcvry("no GC head LEB");
1102
* See whether the used space in the dirtiest LEB fits in the GC head
1105
if (wbuf->offs == c->leb_size) {
1106
dbg_rcvry("no room in GC head LEB");
1184
if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
1185
return grab_empty_leb(c);
1109
1187
err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1112
* There are no dirty or empty LEBs subject to here being
1113
* enough for the index. Try to use
1114
* 'ubifs_find_free_leb_for_idx()', which will return any empty
1115
* LEBs (ignoring index requirements). If the index then
1116
* doesn't have enough LEBs the recovery commit will fail -
1117
* which is the same result anyway i.e. recovery fails. So
1118
* there is no problem ignoring index requirements and just
1119
* grabbing a free LEB since we have already established there
1120
* is not a dirty LEB we could have used instead.
1122
if (err == -ENOSPC) {
1123
dbg_rcvry("could not find a dirty LEB");
1192
dbg_rcvry("could not find a dirty LEB");
1193
return grab_empty_leb(c);
1128
1196
ubifs_assert(!(lp.flags & LPROPS_INDEX));
1130
if (lp.free + lp.dirty == c->leb_size) {
1131
/* An empty LEB was returned */
1132
if (lp.free != c->leb_size) {
1133
err = ubifs_change_one_lp(c, lnum, c->leb_size,
1138
err = ubifs_leb_unmap(c, lnum);
1142
dbg_rcvry("allocated LEB %d for GC", lnum);
1143
/* Run the commit */
1144
dbg_rcvry("committing");
1145
return ubifs_run_commit(c);
1148
* There was no empty LEB so the used space in the dirtiest LEB must fit
1149
* in the GC head LEB.
1151
if (lp.free + lp.dirty < wbuf->offs) {
1152
dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
1153
lnum, wbuf->lnum, wbuf->offs);
1154
err = ubifs_return_leb(c, lnum);
1197
ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
1160
1200
* We run the commit before garbage collection otherwise subsequent
1161
1201
* mounts will see the GC and orphan deletion in a different order.
1187
if (err != LEB_RETAINED) {
1188
dbg_err("GC returned %d", err);
1225
ubifs_assert(err == LEB_RETAINED);
1226
if (err != LEB_RETAINED)
1189
1227
return -EINVAL;
1191
1229
err = ubifs_leb_unmap(c, c->gc_lnum);
1194
dbg_rcvry("allocated LEB %d for GC", lnum);
1233
dbg_rcvry("allocated LEB %d for GC", lp.lnum);
1199
* There is no GC head LEB or the free space in the GC head LEB is too
1200
* small, or there are not dirty LEBs. Allocate gc_lnum by calling
1201
* 'ubifs_find_free_leb_for_idx()' so GC is not run.
1203
lnum = ubifs_find_free_leb_for_idx(c);
1205
dbg_err("could not find an empty LEB");
1208
/* And reset the index flag */
1209
err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1214
dbg_rcvry("allocated LEB %d for GC", lnum);
1215
/* Run the commit */
1216
dbg_rcvry("committing");
1217
return ubifs_run_commit(c);
1505
1522
e->i_size = le64_to_cpu(ino->size);
1508
1526
if (e->exists && e->i_size < e->d_size) {
1509
if (!e->inode && c->ro_mount) {
1510
1528
/* Fix the inode size and pin it in memory */
1511
1529
struct inode *inode;
1530
struct ubifs_inode *ui;
1532
ubifs_assert(!e->inode);
1513
1534
inode = ubifs_iget(c->vfs_sb, e->inum);
1514
1535
if (IS_ERR(inode))
1515
1536
return PTR_ERR(inode);
1538
ui = ubifs_inode(inode);
1516
1539
if (inode->i_size < e->d_size) {
1517
1540
dbg_rcvry("ino %lu size %lld -> %lld",
1518
1541
(unsigned long)e->inum,
1519
e->d_size, inode->i_size);
1542
inode->i_size, e->d_size);
1520
1543
inode->i_size = e->d_size;
1521
ubifs_inode(inode)->ui_size = e->d_size;
1544
ui->ui_size = e->d_size;
1545
ui->synced_i_size = e->d_size;
1522
1546
e->inode = inode;
1523
1547
this = rb_next(this);