blk-mq: make sure elevator callbacks aren't called for passthrough request
authorChristoph Hellwig <hch@lst.de>
Thu, 18 May 2023 05:31:01 +0000 (07:31 +0200)
committerJens Axboe <axboe@kernel.dk>
Fri, 19 May 2023 01:42:54 +0000 (19:42 -0600)
In case of q->elevator, passthrough request can still be marked as
RQF_ELV, so some elevator callbacks will be called for them.

Fix this by splitting RQF_SCHED_TAGS, which is set for all requests that
are issued on a queue that uses an I/O scheduler, and RQF_USE_SCHED for
non-flush, non-passthrough requests on such a queue.

Roughly based on two different patches from
Ming Lei <ming.lei@redhat.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20230518053101.760632-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-mq-debugfs.c
block/blk-mq-sched.h
block/blk-mq.c
block/blk-mq.h
include/linux/blk-mq.h

index ae1b308..22e39b9 100644 (file)
@@ -249,6 +249,8 @@ static const char *const rqf_name[] = {
        RQF_NAME(MIXED_MERGE),
        RQF_NAME(MQ_INFLIGHT),
        RQF_NAME(DONTPREP),
+       RQF_NAME(SCHED_TAGS),
+       RQF_NAME(USE_SCHED),
        RQF_NAME(FAILED),
        RQF_NAME(QUIET),
        RQF_NAME(IO_STAT),
@@ -258,7 +260,6 @@ static const char *const rqf_name[] = {
        RQF_NAME(SPECIAL_PAYLOAD),
        RQF_NAME(ZONE_WRITE_LOCKED),
        RQF_NAME(TIMED_OUT),
-       RQF_NAME(ELV),
        RQF_NAME(RESV),
 };
 #undef RQF_NAME
index 4d8d2cd..1326526 100644 (file)
@@ -37,7 +37,7 @@ static inline bool
 blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
                         struct bio *bio)
 {
-       if (rq->rq_flags & RQF_ELV) {
+       if (rq->rq_flags & RQF_USE_SCHED) {
                struct elevator_queue *e = q->elevator;
 
                if (e->type->ops.allow_merge)
@@ -48,7 +48,7 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
 
 static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
 {
-       if (rq->rq_flags & RQF_ELV) {
+       if (rq->rq_flags & RQF_USE_SCHED) {
                struct elevator_queue *e = rq->q->elevator;
 
                if (e->type->ops.completed_request)
@@ -58,7 +58,7 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
 
 static inline void blk_mq_sched_requeue_request(struct request *rq)
 {
-       if ((rq->rq_flags & RQF_ELV) && !op_is_flush(rq->cmd_flags)) {
+       if (rq->rq_flags & RQF_USE_SCHED) {
                struct request_queue *q = rq->q;
                struct elevator_queue *e = q->elevator;
 
index 7470c66..e021740 100644 (file)
@@ -354,12 +354,12 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
                data->rq_flags |= RQF_IO_STAT;
        rq->rq_flags = data->rq_flags;
 
-       if (!(data->rq_flags & RQF_ELV)) {
-               rq->tag = tag;
-               rq->internal_tag = BLK_MQ_NO_TAG;
-       } else {
+       if (data->rq_flags & RQF_SCHED_TAGS) {
                rq->tag = BLK_MQ_NO_TAG;
                rq->internal_tag = tag;
+       } else {
+               rq->tag = tag;
+               rq->internal_tag = BLK_MQ_NO_TAG;
        }
        rq->timeout = 0;
 
@@ -386,14 +386,13 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
        WRITE_ONCE(rq->deadline, 0);
        req_ref_set(rq, 1);
 
-       if (rq->rq_flags & RQF_ELV) {
+       if (rq->rq_flags & RQF_USE_SCHED) {
                struct elevator_queue *e = data->q->elevator;
 
                INIT_HLIST_NODE(&rq->hash);
                RB_CLEAR_NODE(&rq->rb_node);
 
-               if (!op_is_flush(data->cmd_flags) &&
-                   e->type->ops.prepare_request)
+               if (e->type->ops.prepare_request)
                        e->type->ops.prepare_request(rq);
        }
 
@@ -447,26 +446,32 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
                data->flags |= BLK_MQ_REQ_NOWAIT;
 
        if (q->elevator) {
-               struct elevator_queue *e = q->elevator;
-
-               data->rq_flags |= RQF_ELV;
+               /*
+                * All requests use scheduler tags when an I/O scheduler is
+                * enabled for the queue.
+                */
+               data->rq_flags |= RQF_SCHED_TAGS;
 
                /*
                 * Flush/passthrough requests are special and go directly to the
-                * dispatch list. Don't include reserved tags in the
-                * limiting, as it isn't useful.
+                * dispatch list.
                 */
                if (!op_is_flush(data->cmd_flags) &&
-                   !blk_op_is_passthrough(data->cmd_flags) &&
-                   e->type->ops.limit_depth &&
-                   !(data->flags & BLK_MQ_REQ_RESERVED))
-                       e->type->ops.limit_depth(data->cmd_flags, data);
+                   !blk_op_is_passthrough(data->cmd_flags)) {
+                       struct elevator_mq_ops *ops = &q->elevator->type->ops;
+
+                       WARN_ON_ONCE(data->flags & BLK_MQ_REQ_RESERVED);
+
+                       data->rq_flags |= RQF_USE_SCHED;
+                       if (ops->limit_depth)
+                               ops->limit_depth(data->cmd_flags, data);
+               }
        }
 
 retry:
        data->ctx = blk_mq_get_ctx(q);
        data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
-       if (!(data->rq_flags & RQF_ELV))
+       if (!(data->rq_flags & RQF_SCHED_TAGS))
                blk_mq_tag_busy(data->hctx);
 
        if (data->flags & BLK_MQ_REQ_RESERVED)
@@ -646,10 +651,10 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
                goto out_queue_exit;
        data.ctx = __blk_mq_get_ctx(q, cpu);
 
-       if (!q->elevator)
-               blk_mq_tag_busy(data.hctx);
+       if (q->elevator)
+               data.rq_flags |= RQF_SCHED_TAGS;
        else
-               data.rq_flags |= RQF_ELV;
+               blk_mq_tag_busy(data.hctx);
 
        if (flags & BLK_MQ_REQ_RESERVED)
                data.rq_flags |= RQF_RESV;
@@ -694,7 +699,7 @@ void blk_mq_free_request(struct request *rq)
        struct request_queue *q = rq->q;
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
 
-       if ((rq->rq_flags & RQF_ELV) && !op_is_flush(rq->cmd_flags) &&
+       if ((rq->rq_flags & RQF_USE_SCHED) &&
            q->elevator->type->ops.finish_request)
                q->elevator->type->ops.finish_request(rq);
 
@@ -1268,7 +1273,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
 
        if (!plug->multiple_queues && last && last->q != rq->q)
                plug->multiple_queues = true;
-       if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
+       if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED))
                plug->has_elevator = true;
        rq->rq_next = NULL;
        rq_list_add(&plug->mq_list, rq);
@@ -2620,7 +2625,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
                return;
        }
 
-       if ((rq->rq_flags & RQF_ELV) || !blk_mq_get_budget_and_tag(rq)) {
+       if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {
                blk_mq_insert_request(rq, 0);
                blk_mq_run_hw_queue(hctx, false);
                return;
@@ -2983,7 +2988,7 @@ void blk_mq_submit_bio(struct bio *bio)
        }
 
        hctx = rq->mq_hctx;
-       if ((rq->rq_flags & RQF_ELV) ||
+       if ((rq->rq_flags & RQF_USE_SCHED) ||
            (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) {
                blk_mq_insert_request(rq, 0);
                blk_mq_run_hw_queue(hctx, true);
index e876584..d15981d 100644 (file)
@@ -226,9 +226,9 @@ static inline bool blk_mq_is_shared_tags(unsigned int flags)
 
 static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
 {
-       if (!(data->rq_flags & RQF_ELV))
-               return data->hctx->tags;
-       return data->hctx->sched_tags;
+       if (data->rq_flags & RQF_SCHED_TAGS)
+               return data->hctx->sched_tags;
+       return data->hctx->tags;
 }
 
 static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
index 5529e7d..e4a2119 100644 (file)
@@ -38,6 +38,10 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_MQ_INFLIGHT                ((__force req_flags_t)(1 << 6))
 /* don't call prep for this one */
 #define RQF_DONTPREP           ((__force req_flags_t)(1 << 7))
+/* use hctx->sched_tags */
+#define RQF_SCHED_TAGS         ((__force req_flags_t)(1 << 8))
+/* use an I/O scheduler for this request */
+#define RQF_USE_SCHED          ((__force req_flags_t)(1 << 9))
 /* vaguely specified driver internal error.  Ignored by the block layer */
 #define RQF_FAILED             ((__force req_flags_t)(1 << 10))
 /* don't warn about errors */
@@ -57,9 +61,7 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_ZONE_WRITE_LOCKED  ((__force req_flags_t)(1 << 19))
 /* ->timeout has been called, don't expire again */
 #define RQF_TIMED_OUT          ((__force req_flags_t)(1 << 21))
-/* queue has elevator attached */
-#define RQF_ELV                        ((__force req_flags_t)(1 << 22))
-#define RQF_RESV                       ((__force req_flags_t)(1 << 23))
+#define RQF_RESV               ((__force req_flags_t)(1 << 23))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
@@ -842,7 +844,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib);
  */
 static inline bool blk_mq_need_time_stamp(struct request *rq)
 {
-       return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
+       return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED));
 }
 
 static inline bool blk_mq_is_reserved_rq(struct request *rq)
@@ -858,7 +860,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
                                       struct io_comp_batch *iob, int ioerror,
                                       void (*complete)(struct io_comp_batch *))
 {
-       if (!iob || (req->rq_flags & RQF_ELV) || ioerror ||
+       if (!iob || (req->rq_flags & RQF_USE_SCHED) || ioerror ||
                        (req->end_io && !blk_rq_is_passthrough(req)))
                return false;