block: count 'ios' and 'sectors' when io is done for bio-based device
authorYu Kuai <yukuai3@huawei.com>
Thu, 23 Feb 2023 09:12:26 +0000 (17:12 +0800)
committerJens Axboe <axboe@kernel.dk>
Wed, 15 Mar 2023 15:25:04 +0000 (09:25 -0600)
While using iostat for raid, I observed very strange 'await'
occasionally, and turns out it's due to that 'ios' and 'sectors' is
counted in bdev_start_io_acct(), while 'nsecs' is counted in
bdev_end_io_acct(). I'm not sure why they are ccounted like that
but I think this behaviour is obviously wrong because user will get
wrong disk stats.

Fix the problem by counting 'ios' and 'sectors' when io is done, like
what rq-based device does.

Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230223091226.1135678-1-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-core.c
drivers/md/dm.c
drivers/nvme/host/multipath.c
include/linux/blkdev.h

index 9e5e027..42926e6 100644 (file)
@@ -959,16 +959,11 @@ again:
        }
 }
 
-unsigned long bdev_start_io_acct(struct block_device *bdev,
-                                unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
                                 unsigned long start_time)
 {
-       const int sgrp = op_stat_group(op);
-
        part_stat_lock();
        update_io_ticks(bdev, start_time, false);
-       part_stat_inc(bdev, ios[sgrp]);
-       part_stat_add(bdev, sectors[sgrp], sectors);
        part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
        part_stat_unlock();
 
@@ -984,13 +979,12 @@ EXPORT_SYMBOL(bdev_start_io_acct);
  */
 unsigned long bio_start_io_acct(struct bio *bio)
 {
-       return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
-                                 bio_op(bio), jiffies);
+       return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
 }
 EXPORT_SYMBOL_GPL(bio_start_io_acct);
 
 void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
-                     unsigned long start_time)
+                     unsigned int sectors, unsigned long start_time)
 {
        const int sgrp = op_stat_group(op);
        unsigned long now = READ_ONCE(jiffies);
@@ -998,6 +992,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
 
        part_stat_lock();
        update_io_ticks(bdev, now, true);
+       part_stat_inc(bdev, ios[sgrp]);
+       part_stat_add(bdev, sectors[sgrp], sectors);
        part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
        part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
        part_stat_unlock();
@@ -1007,7 +1003,7 @@ EXPORT_SYMBOL(bdev_end_io_acct);
 void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
                              struct block_device *orig_bdev)
 {
-       bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
+       bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time);
 }
 EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
 
index eace45a..f5cc330 100644 (file)
@@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
                sectors = io->sectors;
 
        if (!end)
-               bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
-                                  start_time);
+               bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time);
        else
-               bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
+               bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors,
+                                start_time);
 
        if (static_branch_unlikely(&stats_enabled) &&
            unlikely(dm_stats_used(&md->stats))) {
index fc39d01..9171452 100644 (file)
@@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq)
                return;
 
        nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
-       nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
-                                       blk_rq_bytes(rq) >> SECTOR_SHIFT,
-                                       req_op(rq), jiffies);
+       nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq),
+                                                     jiffies);
 }
 EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
 
@@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq)
        if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
                return;
        bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
-               nvme_req(rq)->start_time);
+                        blk_rq_bytes(rq) >> SECTOR_SHIFT,
+                        nvme_req(rq)->start_time);
 }
 
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
index d1aee08..941304f 100644 (file)
@@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
                wake_up_process(waiter);
 }
 
-unsigned long bdev_start_io_acct(struct block_device *bdev,
-                                unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
                                 unsigned long start_time);
 void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
-               unsigned long start_time);
+                     unsigned int sectors, unsigned long start_time);
 
 unsigned long bio_start_io_acct(struct bio *bio);
 void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,