Merge tag 'for-6.0-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 20 Sep 2022 17:23:24 +0000 (10:23 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 20 Sep 2022 17:23:24 +0000 (10:23 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 20 Sep 2022 17:23:24 +0000 (10:23 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 20 Sep 2022 17:23:24 +0000 (10:23 -0700)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 1af28b0..2633137 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4474,6 +4474,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
  
         set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
  
+       /*
+        * If we had UNFINISHED_DROPS we could still be processing them, so
+        * clear that bit and wake up relocation so it can stop.
+        * We must do this before stopping the block group reclaim task, because
+        * at btrfs_relocate_block_group() we wait for this bit, and after the
+        * wait we stop with -EINTR if btrfs_fs_closing() returns non-zero - we
+        * have just set BTRFS_FS_CLOSING_START, so btrfs_fs_closing() will
+        * return 1.
+        */
+       btrfs_wake_unfinished_drop(fs_info);
+
         /*
          * We may have the reclaim task running and relocating a data block group,
          * in which case it may create delayed iputs. So stop it before we park
@@ -4492,12 +4503,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
          */
         kthread_park(fs_info->cleaner_kthread);
  
-       /*
-        * If we had UNFINISHED_DROPS we could still be processing them, so
-        * clear that bit and wake up relocation so it can stop.
-        */
-       btrfs_wake_unfinished_drop(fs_info);
-
         /* wait for the qgroup rescan worker to stop */
         btrfs_qgroup_wait_for_completion(fs_info, false);
  
@@ -4520,6 +4525,31 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
         /* clear out the rbtree of defraggable inodes */
         btrfs_cleanup_defrag_inodes(fs_info);
  
+       /*
+        * After we parked the cleaner kthread, ordered extents may have
+        * completed and created new delayed iputs. If one of the async reclaim
+        * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
+        * can hang forever trying to stop it, because if a delayed iput is
+        * added after it ran btrfs_run_delayed_iputs() and before it called
+        * btrfs_wait_on_delayed_iputs(), it will hang forever since there is
+        * no one else to run iputs.
+        *
+        * So wait for all ongoing ordered extents to complete and then run
+        * delayed iputs. This works because once we reach this point no one
+        * can either create new ordered extents nor create delayed iputs
+        * through some other means.
+        *
+        * Also note that btrfs_wait_ordered_roots() is not safe here, because
+        * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
+        * but the delayed iput for the respective inode is made only when doing
+        * the final btrfs_put_ordered_extent() (which must happen at
+        * btrfs_finish_ordered_io() when we are unmounting).
+        */
+       btrfs_flush_workqueue(fs_info->endio_write_workers);
+       /* Ordered extents for free space inodes. */
+       btrfs_flush_workqueue(fs_info->endio_freespace_worker);
+       btrfs_run_delayed_iputs(fs_info);
+
         cancel_work_sync(&fs_info->async_reclaim_work);
         cancel_work_sync(&fs_info->async_data_reclaim_work);
         cancel_work_sync(&fs_info->preempt_reclaim_work);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c

index 62e7007..73c6929 100644 (file)
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1918,10 +1918,44 @@ out_unlock:
         return ret;
  }
  
+static void wait_eb_writebacks(struct btrfs_block_group *block_group)
+{
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
+       const u64 end = block_group->start + block_group->length;
+       struct radix_tree_iter iter;
+       struct extent_buffer *eb;
+       void __rcu **slot;
+
+       rcu_read_lock();
+       radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter,
+                                block_group->start >> fs_info->sectorsize_bits) {
+               eb = radix_tree_deref_slot(slot);
+               if (!eb)
+                       continue;
+               if (radix_tree_deref_retry(eb)) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
+
+               if (eb->start < block_group->start)
+                       continue;
+               if (eb->start >= end)
+                       break;
+
+               slot = radix_tree_iter_resume(slot, &iter);
+               rcu_read_unlock();
+               wait_on_extent_buffer_writeback(eb);
+               rcu_read_lock();
+       }
+       rcu_read_unlock();
+}
+
  static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written)
  {
         struct btrfs_fs_info *fs_info = block_group->fs_info;
         struct map_lookup *map;
+       const bool is_metadata = (block_group->flags &
+                       (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
         int ret = 0;
         int i;
  
@@ -1932,8 +1966,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
         }
  
         /* Check if we have unwritten allocated space */
-       if ((block_group->flags &
-            (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
+       if (is_metadata &&
             block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) {
                 spin_unlock(&block_group->lock);
                 return -EAGAIN;
@@ -1958,6 +1991,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
                 /* No need to wait for NOCOW writers. Zoned mode does not allow that */
                 btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
                                          block_group->length);
+               /* Wait for extent buffers to be written. */
+               if (is_metadata)
+                       wait_eb_writebacks(block_group);
  
                 spin_lock(&block_group->lock);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 20 Sep 2022 17:23:24 +0000 (10:23 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 20 Sep 2022 17:23:24 +0000 (10:23 -0700)
fs/btrfs/disk-io.c		patch \| blob \| history
fs/btrfs/zoned.c		patch \| blob \| history