bcachefs: Fix overlapping extent repair
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 21 Jul 2023 02:42:26 +0000 (22:42 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:10:09 +0000 (17:10 -0400)
A number of smallish fixes for overlapping extent repair, and (part of)
a new unit test. This fixes all the issues turned up by bhzhu203, in his
filesystem image from running mongodb + snapshots.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/fsck.c
fs/bcachefs/tests.c

index 31eb917..28dc8b4 100644 (file)
@@ -408,6 +408,28 @@ static inline void snapshots_seen_init(struct snapshots_seen *s)
        memset(s, 0, sizeof(*s));
 }
 
+static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id)
+{
+       struct snapshots_seen_entry *i, n = {
+               .id     = id,
+               .equiv  = bch2_snapshot_equiv(c, id),
+       };
+       int ret = 0;
+
+       darray_for_each(s->ids, i) {
+               if (i->id == id)
+                       return 0;
+               if (i->id > id)
+                       break;
+       }
+
+       ret = darray_insert_item(&s->ids, i - s->ids.data, n);
+       if (ret)
+               bch_err(c, "error reallocating snapshots_seen table (size %zu)",
+                       s->ids.size);
+       return ret;
+}
+
 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
                                 enum btree_id btree_id, struct bpos pos)
 {
@@ -1122,74 +1144,116 @@ static int extent_ends_at(struct bch_fs *c,
 
 static int overlapping_extents_found(struct btree_trans *trans,
                                     enum btree_id btree,
-                                    struct bpos pos1, struct bkey pos2,
-                                    bool *fixed)
+                                    struct bpos pos1, struct snapshots_seen *pos1_seen,
+                                    struct bkey pos2,
+                                    bool *fixed,
+                                    struct extent_end *extent_end)
 {
        struct bch_fs *c = trans->c;
        struct printbuf buf = PRINTBUF;
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       u32 snapshot = min(pos1.snapshot, pos2.p.snapshot);
+       struct btree_iter iter1, iter2 = { NULL };
+       struct bkey_s_c k1, k2;
        int ret;
 
        BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2)));
 
-       bch2_trans_iter_init(trans, &iter, btree, SPOS(pos1.inode, pos1.offset - 1, snapshot), 0);
-       k = bch2_btree_iter_peek_upto(&iter, POS(pos1.inode, U64_MAX));
-       ret = bkey_err(k);
+       bch2_trans_iter_init(trans, &iter1, btree, pos1,
+                            BTREE_ITER_ALL_SNAPSHOTS|
+                            BTREE_ITER_NOT_EXTENTS);
+       k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX));
+       ret = bkey_err(k1);
        if (ret)
                goto err;
 
        prt_str(&buf, "\n  ");
-       bch2_bkey_val_to_text(&buf, c, k);
+       bch2_bkey_val_to_text(&buf, c, k1);
+
+       if (!bpos_eq(pos1, k1.k->p)) {
+               prt_str(&buf, "\n  wanted\n  ");
+               bch2_bpos_to_text(&buf, pos1);
+               prt_str(&buf, "\n  ");
+               bch2_bkey_to_text(&buf, &pos2);
 
-       if (!bpos_eq(pos1, k.k->p)) {
-               bch_err(c, "%s: error finding first overlapping extent when repairing%s",
+               bch_err(c, "%s: error finding first overlapping extent when repairing, got%s",
                        __func__, buf.buf);
                ret = -BCH_ERR_internal_fsck_err;
                goto err;
        }
 
+       bch2_trans_copy_iter(&iter2, &iter1);
+
        while (1) {
-               bch2_btree_iter_advance(&iter);
+               bch2_btree_iter_advance(&iter2);
 
-               k = bch2_btree_iter_peek_upto(&iter, POS(pos1.inode, U64_MAX));
-               ret = bkey_err(k);
+               k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX));
+               ret = bkey_err(k2);
                if (ret)
                        goto err;
 
-               if (bkey_ge(k.k->p, pos2.p))
+               if (bpos_ge(k2.k->p, pos2.p))
                        break;
-
        }
 
        prt_str(&buf, "\n  ");
-       bch2_bkey_val_to_text(&buf, c, k);
+       bch2_bkey_val_to_text(&buf, c, k2);
 
-       if (bkey_gt(k.k->p, pos2.p) ||
-           pos2.size != k.k->size) {
+       if (bpos_gt(k2.k->p, pos2.p) ||
+           pos2.size != k2.k->size) {
                bch_err(c, "%s: error finding seconding overlapping extent when repairing%s",
                        __func__, buf.buf);
                ret = -BCH_ERR_internal_fsck_err;
                goto err;
        }
 
+       prt_printf(&buf, "\n  overwriting %s extent",
+                  pos1.snapshot >= pos2.p.snapshot ? "first" : "second");
+
        if (fsck_err(c, "overlapping extents%s", buf.buf)) {
-               struct bpos update_pos = pos1.snapshot < pos2.p.snapshot ? pos1 : pos2.p;
-               struct btree_iter update_iter;
+               struct btree_iter *old_iter = &iter1;
+               struct disk_reservation res = { 0 };
 
-               struct bkey_i *update = bch2_bkey_get_mut(trans, &update_iter,
-                                               btree, update_pos,
-                                               BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-               bch2_trans_iter_exit(trans, &update_iter);
-               if ((ret = PTR_ERR_OR_ZERO(update)))
+               if (pos1.snapshot < pos2.p.snapshot) {
+                       old_iter = &iter2;
+                       swap(k1, k2);
+               }
+
+               trans->extra_journal_res += bch2_bkey_sectors_compressed(k2);
+
+               ret =   bch2_trans_update_extent_overwrite(trans, old_iter,
+                               BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE,
+                               k1, k2) ?:
+                       bch2_trans_commit(trans, &res, NULL,
+                               BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL);
+               bch2_disk_reservation_put(c, &res);
+
+               if (ret)
                        goto err;
 
                *fixed = true;
+
+               if (pos1.snapshot == pos2.p.snapshot) {
+                       /*
+                        * We overwrote the first extent, and did the overwrite
+                        * in the same snapshot:
+                        */
+                       extent_end->offset = bkey_start_offset(&pos2);
+               } else if (pos1.snapshot > pos2.p.snapshot) {
+                       /*
+                        * We overwrote the first extent in pos2's snapshot:
+                        */
+                       ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot);
+               } else {
+                       /*
+                        * We overwrote the second extent - restart
+                        * check_extent() from the top:
+                        */
+                       ret = -BCH_ERR_transaction_restart_nested;
+               }
        }
 fsck_err:
 err:
-       bch2_trans_iter_exit(trans, &iter);
+       bch2_trans_iter_exit(trans, &iter2);
+       bch2_trans_iter_exit(trans, &iter1);
        printbuf_exit(&buf);
        return ret;
 }
@@ -1199,11 +1263,11 @@ static int check_overlapping_extents(struct btree_trans *trans,
                              struct extent_ends *extent_ends,
                              struct bkey_s_c k,
                              u32 equiv,
-                             struct btree_iter *iter)
+                             struct btree_iter *iter,
+                             bool *fixed)
 {
        struct bch_fs *c = trans->c;
        struct extent_end *i;
-       bool fixed = false;
        int ret = 0;
 
        /* transaction restart, running again */
@@ -1226,7 +1290,8 @@ static int check_overlapping_extents(struct btree_trans *trans,
                                                SPOS(iter->pos.inode,
                                                     i->offset,
                                                     i->snapshot),
-                                               *k.k, &fixed);
+                                               &i->seen,
+                                               *k.k, fixed, i);
                if (ret)
                        goto err;
        }
@@ -1237,7 +1302,7 @@ static int check_overlapping_extents(struct btree_trans *trans,
 
        extent_ends->last_pos = k.k->p;
 err:
-       return ret ?: fixed;
+       return ret;
 }
 
 static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
@@ -1292,13 +1357,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        goto delete;
 
                ret = check_overlapping_extents(trans, s, extent_ends, k,
-                                               equiv.snapshot, iter);
-               if (ret < 0)
-                       goto err;
-
+                                               equiv.snapshot, iter,
+                                               &inode->recalculate_sums);
                if (ret)
-                       inode->recalculate_sums = true;
-               ret = 0;
+                       goto err;
        }
 
        /*
@@ -1373,7 +1435,7 @@ int bch2_check_extents(struct bch_fs *c)
 
        snapshots_seen_init(&s);
        extent_ends_init(&extent_ends);
-       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
 
        ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_extents,
                        POS(BCACHEFS_ROOT_INO, 0),
index cef23d2..1d4b0a5 100644 (file)
@@ -503,6 +503,36 @@ static int test_extent_overwrite_all(struct bch_fs *c, u64 nr)
                __test_extent_overwrite(c, 32, 64, 32, 128);
 }
 
+static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, u32 len, u32 snapid)
+{
+       struct bkey_i_cookie k;
+       int ret;
+
+       bkey_cookie_init(&k.k_i);
+       k.k_i.k.p.inode = inum;
+       k.k_i.k.p.offset = start + len;
+       k.k_i.k.p.snapshot = snapid;
+       k.k_i.k.size = len;
+
+       ret = bch2_trans_do(c, NULL, NULL, 0,
+               bch2_btree_insert_nonextent(&trans, BTREE_ID_extents, &k.k_i,
+                                           BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
+       if (ret)
+               bch_err_fn(c, ret);
+       return ret;
+}
+
+static int test_extent_create_overlapping(struct bch_fs *c, u64 inum)
+{
+       return  insert_test_overlapping_extent(c, inum,  0, 16, U32_MAX - 2) ?: /* overwrite entire */
+               insert_test_overlapping_extent(c, inum,  2,  8, U32_MAX - 2) ?:
+               insert_test_overlapping_extent(c, inum,  4,  4, U32_MAX) ?:
+               insert_test_overlapping_extent(c, inum, 32,  8, U32_MAX - 2) ?: /* overwrite front/back */
+               insert_test_overlapping_extent(c, inum, 36,  8, U32_MAX) ?:
+               insert_test_overlapping_extent(c, inum, 60,  8, U32_MAX - 2) ?:
+               insert_test_overlapping_extent(c, inum, 64,  8, U32_MAX);
+}
+
 /* snapshot unit tests */
 
 /* Test skipping over keys in unrelated snapshots: */
@@ -901,6 +931,7 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname,
        perf_test(test_extent_overwrite_back);
        perf_test(test_extent_overwrite_middle);
        perf_test(test_extent_overwrite_all);
+       perf_test(test_extent_create_overlapping);
 
        perf_test(test_snapshots);