btrfs: remove btrfs_end_io_wq
authorChristoph Hellwig <hch@lst.de>
Thu, 26 May 2022 07:36:40 +0000 (09:36 +0200)
committerDavid Sterba <dsterba@suse.com>
Mon, 25 Jul 2022 15:45:33 +0000 (17:45 +0200)
All reads bio that go through btrfs_map_bio need to be completed in
user context.  And read I/Os are the most common and timing critical
in almost any file system workloads.

Embed a work_struct into struct btrfs_bio and use it to complete all
read bios submitted through btrfs_map, using the REQ_META flag to decide
which workqueue they are placed on.

This removes the need for a separate 128 byte allocation (typically
rounded up to 192 bytes by slab) for all reads with a size increase
of 24 bytes for struct btrfs_bio.  Future patches will reorganize
struct btrfs_bio to make use of this extra space for writes as well.

(All sizes are based a on typical 64-bit non-debug build)

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/compression.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/inode.c
fs/btrfs/super.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index 2ea5cf5..63d5429 100644 (file)
@@ -931,10 +931,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                        sums += fs_info->csum_size * nr_sectors;
 
                        ASSERT(comp_bio->bi_iter.bi_size);
-                       ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
-                                                 BTRFS_WQ_ENDIO_DATA);
-                       if (ret)
-                               goto finish_cb;
                        ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
                        if (ret)
                                goto finish_cb;
index e689dba..22a287c 100644 (file)
@@ -850,8 +850,8 @@ struct btrfs_fs_info {
        struct btrfs_workqueue *hipri_workers;
        struct btrfs_workqueue *delalloc_workers;
        struct btrfs_workqueue *flush_workers;
-       struct btrfs_workqueue *endio_workers;
-       struct btrfs_workqueue *endio_meta_workers;
+       struct workqueue_struct *endio_workers;
+       struct workqueue_struct *endio_meta_workers;
        struct workqueue_struct *endio_raid56_workers;
        struct workqueue_struct *rmw_workers;
        struct workqueue_struct *compressed_write_workers;
index a085cd9..ed1d92b 100644 (file)
@@ -51,7 +51,6 @@
                                 BTRFS_SUPER_FLAG_METADUMP |\
                                 BTRFS_SUPER_FLAG_METADUMP_V2)
 
-static void end_workqueue_fn(struct btrfs_work *work);
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                                      struct btrfs_fs_info *fs_info);
@@ -64,40 +63,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
 static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
 static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
 
-/*
- * btrfs_end_io_wq structs are used to do processing in task context when an IO
- * is complete.  This is used during reads to verify checksums, and it is used
- * by writes to insert metadata for new file extents after IO is complete.
- */
-struct btrfs_end_io_wq {
-       struct bio *bio;
-       bio_end_io_t *end_io;
-       void *private;
-       struct btrfs_fs_info *info;
-       blk_status_t status;
-       enum btrfs_wq_endio_type metadata;
-       struct btrfs_work work;
-};
-
-static struct kmem_cache *btrfs_end_io_wq_cache;
-
-int __init btrfs_end_io_wq_init(void)
-{
-       btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
-                                       sizeof(struct btrfs_end_io_wq),
-                                       0,
-                                       SLAB_MEM_SPREAD,
-                                       NULL);
-       if (!btrfs_end_io_wq_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void __cold btrfs_end_io_wq_exit(void)
-{
-       kmem_cache_destroy(btrfs_end_io_wq_cache);
-}
-
 static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
 {
        if (fs_info->csum_shash)
@@ -740,48 +705,6 @@ err:
        return ret;
 }
 
-static void end_workqueue_bio(struct bio *bio)
-{
-       struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
-       struct btrfs_fs_info *fs_info;
-       struct btrfs_workqueue *wq;
-
-       fs_info = end_io_wq->info;
-       end_io_wq->status = bio->bi_status;
-
-       if (end_io_wq->metadata)
-               wq = fs_info->endio_meta_workers;
-       else
-               wq = fs_info->endio_workers;
-
-       btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
-       btrfs_queue_work(wq, &end_io_wq->work);
-}
-
-blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
-                       enum btrfs_wq_endio_type metadata)
-{
-       struct btrfs_end_io_wq *end_io_wq;
-
-       if (WARN_ON_ONCE(btrfs_op(bio) != BTRFS_MAP_WRITE))
-               return BLK_STS_IOERR;
-
-       end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
-       if (!end_io_wq)
-               return BLK_STS_RESOURCE;
-
-       end_io_wq->private = bio->bi_private;
-       end_io_wq->end_io = bio->bi_end_io;
-       end_io_wq->info = info;
-       end_io_wq->status = 0;
-       end_io_wq->bio = bio;
-       end_io_wq->metadata = metadata;
-
-       bio->bi_private = end_io_wq;
-       bio->bi_end_io = end_workqueue_bio;
-       return 0;
-}
-
 static void run_one_async_start(struct btrfs_work *work)
 {
        struct async_submit_bio *async;
@@ -917,14 +840,7 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
        bio->bi_opf |= REQ_META;
 
        if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
-               /*
-                * called for a read, do the setup so that checksum validation
-                * can happen in the async kernel threads
-                */
-               ret = btrfs_bio_wq_end_io(fs_info, bio,
-                                         BTRFS_WQ_ENDIO_METADATA);
-               if (!ret)
-                       ret = btrfs_map_bio(fs_info, bio, mirror_num);
+               ret = btrfs_map_bio(fs_info, bio, mirror_num);
        } else if (!should_async_write(fs_info, BTRFS_I(inode))) {
                ret = btree_csum_one_bio(bio);
                if (!ret)
@@ -1947,25 +1863,6 @@ struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
        return root;
 }
 
-/*
- * called by the kthread helper functions to finally call the bio end_io
- * functions.  This is where read checksum verification actually happens
- */
-static void end_workqueue_fn(struct btrfs_work *work)
-{
-       struct bio *bio;
-       struct btrfs_end_io_wq *end_io_wq;
-
-       end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
-       bio = end_io_wq->bio;
-
-       bio->bi_status = end_io_wq->status;
-       bio->bi_private = end_io_wq->private;
-       bio->bi_end_io = end_io_wq->end_io;
-       bio_endio(bio);
-       kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
-}
-
 static int cleaner_kthread(void *arg)
 {
        struct btrfs_fs_info *fs_info = arg;
@@ -2272,7 +2169,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
        btrfs_destroy_workqueue(fs_info->delalloc_workers);
        btrfs_destroy_workqueue(fs_info->hipri_workers);
        btrfs_destroy_workqueue(fs_info->workers);
-       btrfs_destroy_workqueue(fs_info->endio_workers);
+       if (fs_info->endio_workers)
+               destroy_workqueue(fs_info->endio_workers);
        if (fs_info->endio_raid56_workers)
                destroy_workqueue(fs_info->endio_raid56_workers);
        if (fs_info->rmw_workers)
@@ -2292,7 +2190,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
         * the queues used for metadata I/O, since tasks from those other work
         * queues can do metadata I/O operations.
         */
-       btrfs_destroy_workqueue(fs_info->endio_meta_workers);
+       if (fs_info->endio_meta_workers)
+               destroy_workqueue(fs_info->endio_meta_workers);
 }
 
 static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2471,15 +2370,10 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
        fs_info->fixup_workers =
                btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
 
-       /*
-        * endios are largely parallel and should have a very
-        * low idle thresh
-        */
        fs_info->endio_workers =
-               btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4);
+               alloc_workqueue("btrfs-endio", flags, max_active);
        fs_info->endio_meta_workers =
-               btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
-                                     max_active, 4);
+               alloc_workqueue("btrfs-endio-meta", flags, max_active);
        fs_info->endio_raid56_workers =
                alloc_workqueue("btrfs-endio-raid56", flags, max_active);
        fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
index 809ef06..05e779a 100644 (file)
  */
 #define BTRFS_BDEV_BLOCKSIZE   (4096)
 
-enum btrfs_wq_endio_type {
-       BTRFS_WQ_ENDIO_DATA,
-       BTRFS_WQ_ENDIO_METADATA,
-       BTRFS_WQ_ENDIO_FREE_SPACE,
-};
-
 static inline u64 btrfs_sb_offset(int mirror)
 {
        u64 start = SZ_16K;
@@ -120,8 +114,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
                          int atomic);
 int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
                             int level, struct btrfs_key *first_key);
-blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
-                       enum btrfs_wq_endio_type metadata);
 blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
                                 int mirror_num, u64 dio_file_offset,
                                 extent_submit_bio_start_t *submit_bio_start);
@@ -144,8 +136,6 @@ int btree_lock_page_hook(struct page *page, void *data,
 int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
 int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid);
 int btrfs_init_root_free_objectid(struct btrfs_root *root);
-int __init btrfs_end_io_wq_init(void);
-void __cold btrfs_end_io_wq_exit(void);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 void btrfs_set_buffer_lockdep_class(u64 objectid,
index 3067c96..9cce0a3 100644 (file)
@@ -2640,12 +2640,6 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
                return;
        }
 
-       ret = btrfs_bio_wq_end_io(fs_info, bio,
-                       btrfs_is_free_space_inode(BTRFS_I(inode)) ?
-                       BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
-       if (ret)
-               goto out;
-
        /*
         * Lookup bio sums does extra checks around whether we need to csum or
         * not, which is why we ignore skip_sum here.
@@ -7879,9 +7873,6 @@ static void submit_dio_repair_bio(struct inode *inode, struct bio *bio,
 
        BUG_ON(bio_op(bio) == REQ_OP_WRITE);
 
-       if (btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA))
-               return;
-
        refcount_inc(&dip->refs);
        if (btrfs_map_bio(fs_info, bio, mirror_num))
                refcount_dec(&dip->refs);
@@ -7970,19 +7961,12 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_dio_private *dip = bio->bi_private;
-       bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
        blk_status_t ret;
 
-       if (!write) {
-               ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
-               if (ret)
-                       return ret;
-       }
-
        if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
                goto map;
 
-       if (write) {
+       if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
                /* Check btrfs_submit_data_write_bio() for async submit rules */
                if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
                        return btrfs_wq_submit_bio(inode, bio, 0, file_offset,
@@ -10314,12 +10298,6 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
                        return ret;
        }
 
-       ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
-       if (ret) {
-               btrfs_bio_free_csum(bbio);
-               return ret;
-       }
-
        atomic_inc(&priv->pending);
        ret = btrfs_map_bio(fs_info, bio, mirror_num);
        if (ret) {
index e3800f0..719dda5 100644 (file)
@@ -1932,8 +1932,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
        btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size);
        btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
        btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
-       btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
-       btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
        btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
        btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
        btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
@@ -2702,13 +2700,9 @@ static int __init init_btrfs_fs(void)
        if (err)
                goto free_delayed_ref;
 
-       err = btrfs_end_io_wq_init();
-       if (err)
-               goto free_prelim_ref;
-
        err = btrfs_interface_init();
        if (err)
-               goto free_end_io_wq;
+               goto free_prelim_ref;
 
        btrfs_print_mod_info();
 
@@ -2724,8 +2718,6 @@ static int __init init_btrfs_fs(void)
 
 unregister_ioctl:
        btrfs_interface_exit();
-free_end_io_wq:
-       btrfs_end_io_wq_exit();
 free_prelim_ref:
        btrfs_prelim_ref_exit();
 free_delayed_ref:
@@ -2763,7 +2755,6 @@ static void __exit exit_btrfs_fs(void)
        extent_state_cache_exit();
        extent_io_exit();
        btrfs_interface_exit();
-       btrfs_end_io_wq_exit();
        unregister_filesystem(&btrfs_fs_type);
        btrfs_exit_sysfs();
        btrfs_cleanup_fs_uuids();
index 9935b5d..04e7e79 100644 (file)
@@ -6616,11 +6616,27 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
        return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1);
 }
 
-static inline void btrfs_end_bioc(struct btrfs_io_context *bioc)
+static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_io_context *bioc)
+{
+       if (bioc->orig_bio->bi_opf & REQ_META)
+               return bioc->fs_info->endio_meta_workers;
+       return bioc->fs_info->endio_workers;
+}
+
+static void btrfs_end_bio_work(struct work_struct *work)
+{
+       struct btrfs_bio *bbio =
+               container_of(work, struct btrfs_bio, end_io_work);
+
+       bio_endio(&bbio->bio);
+}
+
+static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
 {
        struct bio *orig_bio = bioc->orig_bio;
+       struct btrfs_bio *bbio = btrfs_bio(orig_bio);
 
-       btrfs_bio(orig_bio)->mirror_num = bioc->mirror_num;
+       bbio->mirror_num = bioc->mirror_num;
        orig_bio->bi_private = bioc->private;
        orig_bio->bi_end_io = bioc->end_io;
 
@@ -6632,7 +6648,14 @@ static inline void btrfs_end_bioc(struct btrfs_io_context *bioc)
                orig_bio->bi_status = BLK_STS_IOERR;
        else
                orig_bio->bi_status = BLK_STS_OK;
-       bio_endio(orig_bio);
+
+       if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) {
+               INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
+               queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
+       } else {
+               bio_endio(orig_bio);
+       }
+
        btrfs_put_bioc(bioc);
 }
 
@@ -6664,7 +6687,7 @@ static void btrfs_end_bio(struct bio *bio)
 
        btrfs_bio_counter_dec(bioc->fs_info);
        if (atomic_dec_and_test(&bioc->stripes_pending))
-               btrfs_end_bioc(bioc);
+               btrfs_end_bioc(bioc, true);
 }
 
 static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
@@ -6762,7 +6785,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
                    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
                        atomic_inc(&bioc->error);
                        if (atomic_dec_and_test(&bioc->stripes_pending))
-                               btrfs_end_bioc(bioc);
+                               btrfs_end_bioc(bioc, false);
                        continue;
                }
 
index 1e86c48..7973d11 100644 (file)
@@ -371,6 +371,9 @@ struct btrfs_bio {
        u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
        struct bvec_iter iter;
 
+       /* For read end I/O handling */
+       struct work_struct end_io_work;
+
        /*
         * This member must come last, bio_alloc_bioset will allocate enough
         * bytes for entire btrfs_bio but relies on bio being last.