btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted
authorOmar Sandoval <osandov@fb.com>
Thu, 4 Jan 2024 19:48:47 +0000 (11:48 -0800)
committerDavid Sterba <dsterba@suse.com>
Fri, 12 Jan 2024 01:00:21 +0000 (02:00 +0100)
Sweet Tea spotted a race between subvolume deletion and snapshotting
that can result in the root item for the snapshot having the
BTRFS_ROOT_SUBVOL_DEAD flag set. The race is:

Thread 1                                      | Thread 2
----------------------------------------------|----------
btrfs_delete_subvolume                        |
  btrfs_set_root_flags(BTRFS_ROOT_SUBVOL_DEAD)|
                                              |btrfs_mksubvol
                                              |  down_read(subvol_sem)
                                              |  create_snapshot
                                              |    ...
                                              |    create_pending_snapshot
                                              |      copy root item from source
  down_write(subvol_sem)                      |

This flag is only checked in send and swap activate, which this would
cause to fail mysteriously.

create_snapshot() now checks the root refs to reject a deleted
subvolume, so we can fix this by locking subvol_sem earlier so that the
BTRFS_ROOT_SUBVOL_DEAD flag and the root refs are updated atomically.

CC: stable@vger.kernel.org # 4.14+
Reported-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/inode.c

index b3e3961..7bcc1c0 100644 (file)
@@ -4458,6 +4458,8 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
        u64 root_flags;
        int ret;
 
+       down_write(&fs_info->subvol_sem);
+
        /*
         * Don't allow to delete a subvolume with send in progress. This is
         * inside the inode lock so the error handling that has to drop the bit
@@ -4469,25 +4471,25 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
                btrfs_warn(fs_info,
                           "attempt to delete subvolume %llu during send",
                           dest->root_key.objectid);
-               return -EPERM;
+               ret = -EPERM;
+               goto out_up_write;
        }
        if (atomic_read(&dest->nr_swapfiles)) {
                spin_unlock(&dest->root_item_lock);
                btrfs_warn(fs_info,
                           "attempt to delete subvolume %llu with active swapfile",
                           root->root_key.objectid);
-               return -EPERM;
+               ret = -EPERM;
+               goto out_up_write;
        }
        root_flags = btrfs_root_flags(&dest->root_item);
        btrfs_set_root_flags(&dest->root_item,
                             root_flags | BTRFS_ROOT_SUBVOL_DEAD);
        spin_unlock(&dest->root_item_lock);
 
-       down_write(&fs_info->subvol_sem);
-
        ret = may_destroy_subvol(dest);
        if (ret)
-               goto out_up_write;
+               goto out_undead;
 
        btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
        /*
@@ -4497,7 +4499,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
         */
        ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
        if (ret)
-               goto out_up_write;
+               goto out_undead;
 
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
@@ -4563,15 +4565,17 @@ out_end_trans:
        inode->i_flags |= S_DEAD;
 out_release:
        btrfs_subvolume_release_metadata(root, &block_rsv);
-out_up_write:
-       up_write(&fs_info->subvol_sem);
+out_undead:
        if (ret) {
                spin_lock(&dest->root_item_lock);
                root_flags = btrfs_root_flags(&dest->root_item);
                btrfs_set_root_flags(&dest->root_item,
                                root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
                spin_unlock(&dest->root_item_lock);
-       } else {
+       }
+out_up_write:
+       up_write(&fs_info->subvol_sem);
+       if (!ret) {
                d_invalidate(dentry);
                btrfs_prune_dentries(dest);
                ASSERT(dest->send_in_progress == 0);