Merge tag 'for-5.17-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 4 Feb 2022 20:14:58 +0000 (12:14 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 4 Feb 2022 20:14:58 +0000 (12:14 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 4 Feb 2022 20:14:58 +0000 (12:14 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 4 Feb 2022 20:14:58 +0000 (12:14 -0800)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c

index 1db24e6..8202ad6 100644 (file)
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -124,7 +124,16 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
  {
         if (refcount_dec_and_test(&cache->refs)) {
                 WARN_ON(cache->pinned > 0);
-               WARN_ON(cache->reserved > 0);
+               /*
+                * If there was a failure to cleanup a log tree, very likely due
+                * to an IO failure on a writeback attempt of one or more of its
+                * extent buffers, we could not do proper (and cheap) unaccounting
+                * of their reserved space, so don't warn on reserved > 0 in that
+                * case.
+                */
+               if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) ||
+                   !BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info))
+                       WARN_ON(cache->reserved > 0);
  
                 /*
                  * A block_group shouldn't be on the discard_list anymore.
@@ -2544,6 +2553,19 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
         int ret;
         bool dirty_bg_running;
  
+       /*
+        * This can only happen when we are doing read-only scrub on read-only
+        * mount.
+        * In that case we should not start a new transaction on read-only fs.
+        * Thus here we skip all chunk allocations.
+        */
+       if (sb_rdonly(fs_info->sb)) {
+               mutex_lock(&fs_info->ro_block_group_mutex);
+               ret = inc_block_group_ro(cache, 0);
+               mutex_unlock(&fs_info->ro_block_group_mutex);
+               return ret;
+       }
+
         do {
                 trans = btrfs_join_transaction(root);
                 if (IS_ERR(trans))
@@ -3974,9 +3996,22 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                  * important and indicates a real bug if this happens.
                  */
                 if (WARN_ON(space_info->bytes_pinned > 0 ||
-                           space_info->bytes_reserved > 0 ||
                             space_info->bytes_may_use > 0))
                         btrfs_dump_space_info(info, space_info, 0, 0);
+
+               /*
+                * If there was a failure to cleanup a log tree, very likely due
+                * to an IO failure on a writeback attempt of one or more of its
+                * extent buffers, we could not do proper (and cheap) unaccounting
+                * of their reserved space, so don't warn on bytes_reserved > 0 in
+                * that case.
+                */
+               if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) ||
+                   !BTRFS_FS_LOG_CLEANUP_ERROR(info)) {
+                       if (WARN_ON(space_info->bytes_reserved > 0))
+                               btrfs_dump_space_info(info, space_info, 0, 0);
+               }
+
                 WARN_ON(space_info->reclaim_size > 0);
                 list_del(&space_info->list);
                 btrfs_sysfs_remove_space_info(space_info);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index b4a9b1c..8992e00 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -145,6 +145,9 @@ enum {
         BTRFS_FS_STATE_DUMMY_FS_INFO,
  
         BTRFS_FS_STATE_NO_CSUMS,
+
+       /* Indicates there was an error cleaning up a log tree. */
+       BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
  };
  
  #define BTRFS_BACKREF_REV_MAX          256
@@ -3593,6 +3596,9 @@ do {                                                              \
  
  #define BTRFS_FS_ERROR(fs_info)        (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
                                                    &(fs_info)->fs_state)))
+#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info)                            \
+       (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR,            \
+                          &(fs_info)->fs_state)))
  
  __printf(5, 6)
  __cold
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index d8af662..33eda39 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -805,10 +805,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
                 goto fail;
         }
  
-       spin_lock(&fs_info->trans_lock);
-       list_add(&pending_snapshot->list,
-                &trans->transaction->pending_snapshots);
-       spin_unlock(&fs_info->trans_lock);
+       trans->pending_snapshot = pending_snapshot;
  
         ret = btrfs_commit_transaction(trans);
         if (ret)
@@ -3354,7 +3351,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
         struct block_device *bdev = NULL;
         fmode_t mode;
         int ret;
-       bool cancel;
+       bool cancel = false;
  
         if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c

index 8928275..f12dc68 100644 (file)
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1185,9 +1185,24 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
         struct btrfs_trans_handle *trans = NULL;
         int ret = 0;
  
+       /*
+        * We need to have subvol_sem write locked, to prevent races between
+        * concurrent tasks trying to disable quotas, because we will unlock
+        * and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
+        */
+       lockdep_assert_held_write(&fs_info->subvol_sem);
+
         mutex_lock(&fs_info->qgroup_ioctl_lock);
         if (!fs_info->quota_root)
                 goto out;
+
+       /*
+        * Request qgroup rescan worker to complete and wait for it. This wait
+        * must be done before transaction start for quota disable since it may
+        * deadlock with transaction by the qgroup rescan worker.
+        */
+       clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+       btrfs_qgroup_wait_for_completion(fs_info, false);
         mutex_unlock(&fs_info->qgroup_ioctl_lock);
  
         /*
@@ -1205,14 +1220,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
         if (IS_ERR(trans)) {
                 ret = PTR_ERR(trans);
                 trans = NULL;
+               set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
                 goto out;
         }
  
         if (!fs_info->quota_root)
                 goto out;
  
-       clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
-       btrfs_qgroup_wait_for_completion(fs_info, false);
         spin_lock(&fs_info->qgroup_lock);
         quota_root = fs_info->quota_root;
         fs_info->quota_root = NULL;
@@ -3383,6 +3397,9 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
                         btrfs_warn(fs_info,
                         "qgroup rescan init failed, qgroup is not enabled");
                         ret = -EINVAL;
+               } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
+                       /* Quota disable is in progress */
+                       ret = -EBUSY;
                 }
  
                 if (ret) {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 03de89b..c43bbc7 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2000,6 +2000,27 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
                 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
  }
  
+/*
+ * Add a pending snapshot associated with the given transaction handle to the
+ * respective handle. This must be called after the transaction commit started
+ * and while holding fs_info->trans_lock.
+ * This serves to guarantee a caller of btrfs_commit_transaction() that it can
+ * safely free the pending snapshot pointer in case btrfs_commit_transaction()
+ * returns an error.
+ */
+static void add_pending_snapshot(struct btrfs_trans_handle *trans)
+{
+       struct btrfs_transaction *cur_trans = trans->transaction;
+
+       if (!trans->pending_snapshot)
+               return;
+
+       lockdep_assert_held(&trans->fs_info->trans_lock);
+       ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START);
+
+       list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
+}
+
  int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
  {
         struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -2073,6 +2094,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
         if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
                 enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
  
+               add_pending_snapshot(trans);
+
                 spin_unlock(&fs_info->trans_lock);
                 refcount_inc(&cur_trans->use_count);
  
@@ -2163,6 +2186,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
          * COMMIT_DOING so make sure to wait for num_writers to == 1 again.
          */
         spin_lock(&fs_info->trans_lock);
+       add_pending_snapshot(trans);
         cur_trans->state = TRANS_STATE_COMMIT_DOING;
         spin_unlock(&fs_info->trans_lock);
         wait_event(cur_trans->writer_wait,
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h

index 1852ed9..9402d8d 100644 (file)
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -123,6 +123,8 @@ struct btrfs_trans_handle {
         struct btrfs_transaction *transaction;
         struct btrfs_block_rsv *block_rsv;
         struct btrfs_block_rsv *orig_rsv;
+       /* Set by a task that wants to create a snapshot. */
+       struct btrfs_pending_snapshot *pending_snapshot;
         refcount_t use_count;
         unsigned int type;
         /*
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c

index 72e1c94..9fd145f 100644 (file)
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -965,6 +965,7 @@ static int check_dev_item(struct extent_buffer *leaf,
                           struct btrfs_key *key, int slot)
  {
         struct btrfs_dev_item *ditem;
+       const u32 item_size = btrfs_item_size(leaf, slot);
  
         if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) {
                 dev_item_err(leaf, slot,
@@ -972,6 +973,13 @@ static int check_dev_item(struct extent_buffer *leaf,
                              key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
                 return -EUCLEAN;
         }
+
+       if (unlikely(item_size != sizeof(*ditem))) {
+               dev_item_err(leaf, slot, "invalid item size: has %u expect %zu",
+                            item_size, sizeof(*ditem));
+               return -EUCLEAN;
+       }
+
         ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
         if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) {
                 dev_item_err(leaf, slot,
@@ -1007,6 +1015,7 @@ static int check_inode_item(struct extent_buffer *leaf,
         struct btrfs_inode_item *iitem;
         u64 super_gen = btrfs_super_generation(fs_info->super_copy);
         u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
+       const u32 item_size = btrfs_item_size(leaf, slot);
         u32 mode;
         int ret;
         u32 flags;
@@ -1016,6 +1025,12 @@ static int check_inode_item(struct extent_buffer *leaf,
         if (unlikely(ret < 0))
                 return ret;
  
+       if (unlikely(item_size != sizeof(*iitem))) {
+               generic_err(leaf, slot, "invalid item size: has %u expect %zu",
+                           item_size, sizeof(*iitem));
+               return -EUCLEAN;
+       }
+
         iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
  
         /* Here we use super block generation + 1 to handle log tree */
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index c1ddbe8..3ee014c 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3414,6 +3414,29 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
         if (log->node) {
                 ret = walk_log_tree(trans, log, &wc);
                 if (ret) {
+                       /*
+                        * We weren't able to traverse the entire log tree, the
+                        * typical scenario is getting an -EIO when reading an
+                        * extent buffer of the tree, due to a previous writeback
+                        * failure of it.
+                        */
+                       set_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
+                               &log->fs_info->fs_state);
+
+                       /*
+                        * Some extent buffers of the log tree may still be dirty
+                        * and not yet written back to storage, because we may
+                        * have updates to a log tree without syncing a log tree,
+                        * such as during rename and link operations. So flush
+                        * them out and wait for their writeback to complete, so
+                        * that we properly cleanup their state and pages.
+                        */
+                       btrfs_write_marked_extents(log->fs_info,
+                                                  &log->dirty_log_pages,
+                                                  EXTENT_DIRTY | EXTENT_NEW);
+                       btrfs_wait_tree_log_extents(log,
+                                                   EXTENT_DIRTY | EXTENT_NEW);
+
                         if (trans)
                                 btrfs_abort_transaction(trans, ret);
                         else
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 4 Feb 2022 20:14:58 +0000 (12:14 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 4 Feb 2022 20:14:58 +0000 (12:14 -0800)
fs/btrfs/block-group.c		patch \| blob \| history
fs/btrfs/ctree.h		patch \| blob \| history
fs/btrfs/ioctl.c		patch \| blob \| history
fs/btrfs/qgroup.c		patch \| blob \| history
fs/btrfs/transaction.c		patch \| blob \| history
fs/btrfs/transaction.h		patch \| blob \| history
fs/btrfs/tree-checker.c		patch \| blob \| history
fs/btrfs/tree-log.c		patch \| blob \| history