btrfs: clean up our handling of refs == 0 in snapshot delete
authorJosef Bacik <josef@toxicpanda.com>
Tue, 7 May 2024 18:12:13 +0000 (14:12 -0400)
committerDavid Sterba <dsterba@suse.com>
Thu, 11 Jul 2024 13:33:25 +0000 (15:33 +0200)
In reada we BUG_ON(refs == 0), which could be unkind since we aren't
holding a lock on the extent leaf and thus could get a transient
incorrect answer.  In walk_down_proc we also BUG_ON(refs == 0), which
could happen if we have extent tree corruption.  Change that to return
-EUCLEAN.  In do_walk_down() we catch this case and handle it correctly,
however we return -EIO, which -EUCLEAN is a more appropriate error code.
Finally in walk_up_proc we have the same BUG_ON(refs == 0), so convert
that to proper error handling.  Also adjust the error message so we can
actually do something with the information.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/extent-tree.c

index f399a0b..bcfb438 100644 (file)
@@ -5350,7 +5350,15 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
                /* We don't care about errors in readahead. */
                if (ret < 0)
                        continue;
-               BUG_ON(refs == 0);
+
+               /*
+                * This could be racey, it's conceivable that we raced and end
+                * up with a bogus refs count, if that's the case just skip, if
+                * we are actually corrupt we will notice when we look up
+                * everything again with our locks.
+                */
+               if (refs == 0)
+                       continue;
 
                /* If we don't need to visit this node don't reada. */
                if (!visit_node_for_delete(root, wc, eb, refs, flags, slot))
@@ -5399,7 +5407,11 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                                               NULL);
                if (ret)
                        return ret;
-               BUG_ON(wc->refs[level] == 0);
+               if (unlikely(wc->refs[level] == 0)) {
+                       btrfs_err(fs_info, "bytenr %llu has 0 references, expect > 0",
+                                 eb->start);
+                       return -EUCLEAN;
+               }
        }
 
        if (wc->stage == DROP_REFERENCE) {
@@ -5654,8 +5666,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
                goto out_unlock;
 
        if (unlikely(wc->refs[level - 1] == 0)) {
-               btrfs_err(fs_info, "Missing references.");
-               ret = -EIO;
+               btrfs_err(fs_info, "bytenr %llu has 0 references, expect > 0",
+                         bytenr);
+               ret = -EUCLEAN;
                goto out_unlock;
        }
        wc->lookup_info = 0;
@@ -5766,7 +5779,12 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                                path->locks[level] = 0;
                                return ret;
                        }
-                       BUG_ON(wc->refs[level] == 0);
+                       if (unlikely(wc->refs[level] == 0)) {
+                               btrfs_tree_unlock_rw(eb, path->locks[level]);
+                               btrfs_err(fs_info, "bytenr %llu has 0 references, expect > 0",
+                                         eb->start);
+                               return -EUCLEAN;
+                       }
                        if (wc->refs[level] == 1) {
                                btrfs_tree_unlock_rw(eb, path->locks[level]);
                                path->locks[level] = 0;