bcachefs: Improvements to fsck check_dirents()
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 15 Jul 2021 00:28:27 +0000 (20:28 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:08 +0000 (17:09 -0400)
The fsck code handles transaction restarts in a very ad hoc way, and not
always correctly. This patch makes some improvements to check_dirents(),
but more work needs to be done to figure out how this kind of code
should be structured.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/fsck.c

index 7ea1a41..bedfd34 100644 (file)
@@ -267,11 +267,11 @@ static struct inode_walker inode_walker_init(void)
        };
 }
 
-static int walk_inode(struct btree_trans *trans,
-                     struct inode_walker *w, u64 inum)
+static int __walk_inode(struct btree_trans *trans,
+                       struct inode_walker *w, u64 inum)
 {
        if (inum != w->cur_inum) {
-               int ret = lookup_inode(trans, inum, &w->inode, &w->snapshot);
+               int ret = __lookup_inode(trans, inum, &w->inode, &w->snapshot);
 
                if (ret && ret != -ENOENT)
                        return ret;
@@ -286,6 +286,12 @@ static int walk_inode(struct btree_trans *trans,
        return 0;
 }
 
+static int walk_inode(struct btree_trans *trans,
+                     struct inode_walker *w, u64 inum)
+{
+       return lockrestart_do(trans, __walk_inode(trans, w, inum));
+}
+
 static int hash_redo_key(struct btree_trans *trans,
                         const struct bch_hash_desc desc,
                         struct bch_hash_info *hash_info,
@@ -704,210 +710,215 @@ fsck_err:
        return bch2_trans_exit(&trans) ?: ret;
 }
 
-/*
- * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
- * validate d_type
- */
-noinline_for_stack
-static int check_dirents(struct bch_fs *c)
+static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
+                       struct bch_hash_info *hash_info,
+                       struct inode_walker *w, unsigned *nr_subdirs)
 {
-       struct inode_walker w = inode_walker_init();
-       struct bch_hash_info hash_info;
-       struct btree_trans trans;
-       struct btree_iter *iter;
+       struct bch_fs *c = trans->c;
        struct bkey_s_c k;
+       struct bkey_s_c_dirent d;
+       struct bch_inode_unpacked target;
+       u32 target_snapshot;
+       bool have_target;
+       bool backpointer_exists = true;
+       u64 d_inum;
        char buf[200];
-       unsigned nr_subdirs = 0;
-       int ret = 0;
+       int ret;
 
-       bch_verbose(c, "checking dirents");
+       k = bch2_btree_iter_peek(iter);
+       if (!k.k)
+               return 1;
 
-       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+       ret = bkey_err(k);
+       if (ret)
+               return ret;
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents,
-                                  POS(BCACHEFS_ROOT_INO, 0),
-                                  BTREE_ITER_INTENT|
-                                  BTREE_ITER_PREFETCH);
-retry:
-       while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(ret = bkey_err(k))) {
-               struct bkey_s_c_dirent d;
-               struct bch_inode_unpacked target;
-               u32 target_snapshot;
-               bool have_target;
-               bool backpointer_exists = true;
-               u64 d_inum;
+       if (w->have_inode &&
+           w->cur_inum != k.k->p.inode &&
+           fsck_err_on(w->inode.bi_nlink != *nr_subdirs, c,
+                       "directory %llu with wrong i_nlink: got %u, should be %u",
+                       w->inode.bi_inum, w->inode.bi_nlink, *nr_subdirs)) {
+               w->inode.bi_nlink = *nr_subdirs;
+               ret = write_inode(trans, &w->inode, w->snapshot);
+               return ret ?: -EINTR;
+       }
 
-               if (w.have_inode &&
-                   w.cur_inum != k.k->p.inode &&
-                   fsck_err_on(w.inode.bi_nlink != nr_subdirs, c,
-                               "directory %llu with wrong i_nlink: got %u, should be %u",
-                               w.inode.bi_inum, w.inode.bi_nlink, nr_subdirs)) {
-                       w.inode.bi_nlink = nr_subdirs;
-                       ret = write_inode(&trans, &w.inode, w.snapshot);
-                       if (ret)
-                               break;
-               }
+       ret = __walk_inode(trans, w, k.k->p.inode);
+       if (ret)
+               return ret;
 
-               ret = walk_inode(&trans, &w, k.k->p.inode);
-               if (ret)
-                       break;
+       if (w->first_this_inode)
+               *nr_subdirs = 0;
 
-               if (w.first_this_inode)
-                       nr_subdirs = 0;
+       if (fsck_err_on(!w->have_inode, c,
+                       "dirent in nonexisting directory:\n%s",
+                       (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)) ||
+           fsck_err_on(!S_ISDIR(w->inode.bi_mode), c,
+                       "dirent in non directory inode type %u:\n%s",
+                       mode_to_type(w->inode.bi_mode),
+                       (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
+               return __bch2_trans_do(trans, NULL, NULL, 0,
+                               bch2_btree_delete_at(trans, iter, 0));
 
-               if (fsck_err_on(!w.have_inode, c,
-                               "dirent in nonexisting directory:\n%s",
-                               (bch2_bkey_val_to_text(&PBUF(buf), c,
-                                                      k), buf)) ||
-                   fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
-                               "dirent in non directory inode type %u:\n%s",
-                               mode_to_type(w.inode.bi_mode),
-                               (bch2_bkey_val_to_text(&PBUF(buf), c,
-                                                      k), buf))) {
-                       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-                                       bch2_btree_delete_at(&trans, iter, 0));
-                       if (ret)
-                               goto err;
-                       goto next;
-               }
+       if (!w->have_inode)
+               return 0;
 
-               if (!w.have_inode)
-                       goto next;
+       if (w->first_this_inode)
+               *hash_info = bch2_hash_info_init(c, &w->inode);
 
-               if (w.first_this_inode)
-                       hash_info = bch2_hash_info_init(c, &w.inode);
+       ret = hash_check_key(trans, bch2_dirent_hash_desc,
+                            hash_info, iter, k);
+       if (ret < 0)
+               return ret;
+       if (ret) /* dirent has been deleted */
+               return 0;
 
-               ret = hash_check_key(&trans, bch2_dirent_hash_desc,
-                                    &hash_info, iter, k);
-               if (ret > 0) {
-                       ret = 0;
-                       goto next;
-               }
-               if (ret)
-                       goto fsck_err;
+       if (k.k->type != KEY_TYPE_dirent)
+               return 0;
+
+       d = bkey_s_c_to_dirent(k);
+       d_inum = le64_to_cpu(d.v->d_inum);
 
-               if (k.k->type != KEY_TYPE_dirent)
-                       goto next;
+       ret = __lookup_inode(trans, d_inum, &target, &target_snapshot);
+       if (ret && ret != -ENOENT)
+               return ret;
 
-               d = bkey_s_c_to_dirent(k);
-               d_inum = le64_to_cpu(d.v->d_inum);
+       have_target = !ret;
+       ret = 0;
 
-               ret = lookup_inode(&trans, d_inum, &target, &target_snapshot);
-               if (ret && ret != -ENOENT)
-                       break;
+       if (fsck_err_on(!have_target, c,
+                       "dirent points to missing inode:\n%s",
+                       (bch2_bkey_val_to_text(&PBUF(buf), c,
+                                              k), buf)))
+               return remove_dirent(trans, d.k->p);
 
-               have_target = !ret;
+       if (!have_target)
+               return 0;
+
+       if (!target.bi_dir &&
+           !target.bi_dir_offset) {
+               target.bi_dir           = k.k->p.inode;
+               target.bi_dir_offset    = k.k->p.offset;
+
+               ret = __write_inode(trans, &target, target_snapshot) ?:
+                       bch2_trans_commit(trans, NULL, NULL,
+                                         BTREE_INSERT_NOFAIL|
+                                         BTREE_INSERT_LAZY_RW|
+                                         BTREE_INSERT_NOUNLOCK);
+               if (ret)
+                       return ret;
+               return -EINTR;
+       }
+
+       if (!inode_backpointer_matches(d, &target)) {
+               ret = inode_backpointer_exists(trans, &target);
+               if (ret < 0)
+                       return ret;
+
+               backpointer_exists = ret;
                ret = 0;
 
-               if (fsck_err_on(!have_target, c,
-                               "dirent points to missing inode:\n%s",
-                               (bch2_bkey_val_to_text(&PBUF(buf), c,
-                                                      k), buf))) {
-                       ret = remove_dirent(&trans, d.k->p);
-                       if (ret)
-                               goto err;
-                       goto next;
+               if (fsck_err_on(S_ISDIR(target.bi_mode) &&
+                               backpointer_exists, c,
+                               "directory %llu with multiple links",
+                               target.bi_inum))
+                       return remove_dirent(trans, d.k->p);
+
+               if (fsck_err_on(backpointer_exists &&
+                               !target.bi_nlink, c,
+                               "inode %llu has multiple links but i_nlink 0",
+                               d_inum)) {
+                       target.bi_nlink++;
+                       target.bi_flags &= ~BCH_INODE_UNLINKED;
+
+                       ret = write_inode(trans, &target, target_snapshot);
+                       return ret ?: -EINTR;
                }
 
-               if (!have_target)
-                       goto next;
-
-               if (!target.bi_dir &&
-                   !target.bi_dir_offset) {
+               if (fsck_err_on(!backpointer_exists, c,
+                               "inode %llu has wrong backpointer:\n"
+                               "got       %llu:%llu\n"
+                               "should be %llu:%llu",
+                               d_inum,
+                               target.bi_dir,
+                               target.bi_dir_offset,
+                               k.k->p.inode,
+                               k.k->p.offset)) {
                        target.bi_dir           = k.k->p.inode;
                        target.bi_dir_offset    = k.k->p.offset;
 
-                       ret = write_inode(&trans, &target, target_snapshot);
-                       if (ret)
-                               goto err;
+                       ret = write_inode(trans, &target, target_snapshot);
+                       return ret ?: -EINTR;
                }
+       }
 
-               if (!inode_backpointer_matches(d, &target)) {
-                       ret = inode_backpointer_exists(&trans, &target);
-                       if (ret < 0)
-                               goto err;
-
-                       backpointer_exists = ret;
-                       ret = 0;
+       if (fsck_err_on(d.v->d_type != mode_to_type(target.bi_mode), c,
+                       "incorrect d_type: should be %u:\n%s",
+                       mode_to_type(target.bi_mode),
+                       (bch2_bkey_val_to_text(&PBUF(buf), c,
+                                              k), buf))) {
+               struct bkey_i_dirent *n;
 
-                       if (fsck_err_on(S_ISDIR(target.bi_mode) &&
-                                       backpointer_exists, c,
-                                       "directory %llu with multiple links",
-                                       target.bi_inum)) {
-                               ret = remove_dirent(&trans, d.k->p);
-                               if (ret)
-                                       goto err;
-                               continue;
-                       }
+               n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
+               if (!n)
+                       return -ENOMEM;
 
-                       if (fsck_err_on(backpointer_exists &&
-                                       !target.bi_nlink, c,
-                                       "inode %llu has multiple links but i_nlink 0",
-                                       d_inum)) {
-                               target.bi_nlink++;
-                               target.bi_flags &= ~BCH_INODE_UNLINKED;
+               bkey_reassemble(&n->k_i, d.s_c);
+               n->v.d_type = mode_to_type(target.bi_mode);
 
-                               ret = write_inode(&trans, &target, target_snapshot);
-                               if (ret)
-                                       goto err;
-                       }
+               ret = __bch2_trans_do(trans, NULL, NULL,
+                                     BTREE_INSERT_NOFAIL|
+                                     BTREE_INSERT_LAZY_RW,
+                       bch2_btree_iter_traverse(iter) ?:
+                       bch2_trans_update(trans, iter, &n->k_i, 0));
+               kfree(n);
+               return ret ?: -EINTR;
+       }
 
-                       if (fsck_err_on(!backpointer_exists, c,
-                                       "inode %llu has wrong backpointer:\n"
-                                       "got       %llu:%llu\n"
-                                       "should be %llu:%llu",
-                                       d_inum,
-                                       target.bi_dir,
-                                       target.bi_dir_offset,
-                                       k.k->p.inode,
-                                       k.k->p.offset)) {
-                               target.bi_dir           = k.k->p.inode;
-                               target.bi_dir_offset    = k.k->p.offset;
-
-                               ret = write_inode(&trans, &target, target_snapshot);
-                               if (ret)
-                                       goto err;
-                       }
-               }
+       *nr_subdirs += d.v->d_type == DT_DIR;
+       return 0;
+fsck_err:
+       return ret;
+}
 
-               if (fsck_err_on(d.v->d_type != mode_to_type(target.bi_mode), c,
-                               "incorrect d_type: should be %u:\n%s",
-                               mode_to_type(target.bi_mode),
-                               (bch2_bkey_val_to_text(&PBUF(buf), c,
-                                                      k), buf))) {
-                       struct bkey_i_dirent *n;
+/*
+ * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
+ * validate d_type
+ */
+noinline_for_stack
+static int check_dirents(struct bch_fs *c)
+{
+       struct inode_walker w = inode_walker_init();
+       struct bch_hash_info hash_info;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       unsigned nr_subdirs = 0;
+       int ret = 0;
 
-                       n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
-                       if (!n) {
-                               ret = -ENOMEM;
-                               goto err;
-                       }
+       bch_verbose(c, "checking dirents");
 
-                       bkey_reassemble(&n->k_i, d.s_c);
-                       n->v.d_type = mode_to_type(target.bi_mode);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
-                       ret = __bch2_trans_do(&trans, NULL, NULL,
-                                             BTREE_INSERT_NOFAIL|
-                                             BTREE_INSERT_LAZY_RW,
-                               bch2_btree_iter_traverse(iter) ?:
-                               bch2_trans_update(&trans, iter, &n->k_i, 0));
-                       kfree(n);
-                       if (ret)
-                               goto err;
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents,
+                                  POS(BCACHEFS_ROOT_INO, 0),
+                                  BTREE_ITER_INTENT|
+                                  BTREE_ITER_PREFETCH);
 
+       while (1) {
+               ret = lockrestart_do(&trans,
+                               check_dirent(&trans, iter, &hash_info, &w, &nr_subdirs));
+               if (ret == 1) {
+                       /* at end */
+                       ret = 0;
+                       break;
                }
+               if (ret)
+                       break;
 
-               nr_subdirs += d.v->d_type == DT_DIR;
-next:
                bch2_btree_iter_advance(iter);
        }
-err:
-fsck_err:
-       if (ret == -EINTR)
-               goto retry;
-
        bch2_trans_iter_put(&trans, iter);
+
        return bch2_trans_exit(&trans) ?: ret;
 }