scsi: core: avoid host-wide host_busy counter for scsi_mq
authorMing Lei <ming.lei@redhat.com>
Fri, 25 Oct 2019 06:58:55 +0000 (14:58 +0800)
committerMartin K. Petersen <martin.petersen@oracle.com>
Sat, 2 Nov 2019 00:12:50 +0000 (20:12 -0400)
It isn't necessary to check the host depth in scsi_queue_rq() any more
since it has been respected by blk-mq before calling scsi_queue_rq() via
getting driver tag.

Lots of LUNs may attach to same host and per-host IOPS may reach millions,
so we should avoid expensive atomic operations on the host-wide counter in
the IO path.

This patch implements scsi_host_busy() via blk_mq_tagset_busy_iter() with
one scsi command state for reading the count of busy IOs for scsi_mq.

It is observed that IOPS is increased by 15% in IO test on scsi_debug (32
LUNs, 32 submit queues, 1024 can_queue, libaio/dio) in a dual-socket
system.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Ewan D. Milne <emilne@redhat.com>
Cc: Omar Sandoval <osandov@fb.com>,
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>,
Cc: James Bottomley <james.bottomley@hansenpartnership.com>,
Cc: Christoph Hellwig <hch@lst.de>,
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20191025065855.6309-1-ming.lei@redhat.com
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/hosts.c
drivers/scsi/scsi.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_priv.h
include/scsi/scsi_cmnd.h
include/scsi/scsi_host.h

index 55522b7..1d669e4 100644 (file)
@@ -38,6 +38,7 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport.h>
+#include <scsi/scsi_cmnd.h>
 
 #include "scsi_priv.h"
 #include "scsi_logging.h"
@@ -554,13 +555,29 @@ struct Scsi_Host *scsi_host_get(struct Scsi_Host *shost)
 }
 EXPORT_SYMBOL(scsi_host_get);
 
+static bool scsi_host_check_in_flight(struct request *rq, void *data,
+                                     bool reserved)
+{
+       int *count = data;
+       struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+       if (test_bit(SCMD_STATE_INFLIGHT, &cmd->state))
+               (*count)++;
+
+       return true;
+}
+
 /**
  * scsi_host_busy - Return the host busy counter
  * @shost:     Pointer to Scsi_Host to inc.
  **/
 int scsi_host_busy(struct Scsi_Host *shost)
 {
-       return atomic_read(&shost->host_busy);
+       int cnt = 0;
+
+       blk_mq_tagset_busy_iter(&shost->tag_set,
+                               scsi_host_check_in_flight, &cnt);
+       return cnt;
 }
 EXPORT_SYMBOL(scsi_host_busy);
 
index 4f76841..adfe8b3 100644 (file)
@@ -186,7 +186,7 @@ void scsi_finish_command(struct scsi_cmnd *cmd)
        struct scsi_driver *drv;
        unsigned int good_bytes;
 
-       scsi_device_unbusy(sdev);
+       scsi_device_unbusy(sdev, cmd);
 
        /*
         * Clear the flags that say that the device/target/host is no longer
index dc210b9..2563b06 100644 (file)
@@ -189,7 +189,7 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy)
         * active on the host/device.
         */
        if (unbusy)
-               scsi_device_unbusy(device);
+               scsi_device_unbusy(device, cmd);
 
        /*
         * Requeue this command.  It will go before all other commands
@@ -321,20 +321,20 @@ static void scsi_init_cmd_errh(struct scsi_cmnd *cmd)
 }
 
 /*
- * Decrement the host_busy counter and wake up the error handler if necessary.
- * Avoid as follows that the error handler is not woken up if shost->host_busy
- * == shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination
- * with an RCU read lock in this function to ensure that this function in its
- * entirety either finishes before scsi_eh_scmd_add() increases the
+ * Wake up the error handler if necessary. Avoid as follows that the error
+ * handler is not woken up if host in-flight requests number ==
+ * shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination
+ * with an RCU read lock in this function to ensure that this function in
+ * its entirety either finishes before scsi_eh_scmd_add() increases the
  * host_failed counter or that it notices the shost state change made by
  * scsi_eh_scmd_add().
  */
-static void scsi_dec_host_busy(struct Scsi_Host *shost)
+static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 {
        unsigned long flags;
 
        rcu_read_lock();
-       atomic_dec(&shost->host_busy);
+       __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state);
        if (unlikely(scsi_host_in_recovery(shost))) {
                spin_lock_irqsave(shost->host_lock, flags);
                if (shost->host_failed || shost->host_eh_scheduled)
@@ -344,12 +344,12 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost)
        rcu_read_unlock();
 }
 
-void scsi_device_unbusy(struct scsi_device *sdev)
+void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd)
 {
        struct Scsi_Host *shost = sdev->host;
        struct scsi_target *starget = scsi_target(sdev);
 
-       scsi_dec_host_busy(shost);
+       scsi_dec_host_busy(shost, cmd);
 
        if (starget->can_queue > 0)
                atomic_dec(&starget->target_busy);
@@ -430,9 +430,6 @@ static inline bool scsi_target_is_busy(struct scsi_target *starget)
 
 static inline bool scsi_host_is_busy(struct Scsi_Host *shost)
 {
-       if (shost->can_queue > 0 &&
-           atomic_read(&shost->host_busy) >= shost->can_queue)
-               return true;
        if (atomic_read(&shost->host_blocked) > 0)
                return true;
        if (shost->host_self_blocked)
@@ -1139,6 +1136,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
        unsigned int flags = cmd->flags & SCMD_PRESERVED_FLAGS;
        unsigned long jiffies_at_alloc;
        int retries;
+       bool in_flight;
 
        if (!blk_rq_is_scsi(rq) && !(flags & SCMD_INITIALIZED)) {
                flags |= SCMD_INITIALIZED;
@@ -1147,6 +1145,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
 
        jiffies_at_alloc = cmd->jiffies_at_alloc;
        retries = cmd->retries;
+       in_flight = test_bit(SCMD_STATE_INFLIGHT, &cmd->state);
        /* zero out the cmd, except for the embedded scsi_request */
        memset((char *)cmd + sizeof(cmd->req), 0,
                sizeof(*cmd) - sizeof(cmd->req) + dev->host->hostt->cmd_size);
@@ -1158,6 +1157,8 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
        INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
        cmd->jiffies_at_alloc = jiffies_at_alloc;
        cmd->retries = retries;
+       if (in_flight)
+               __set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
 
        scsi_add_cmd_to_list(cmd);
 }
@@ -1367,16 +1368,14 @@ out_dec:
  */
 static inline int scsi_host_queue_ready(struct request_queue *q,
                                   struct Scsi_Host *shost,
-                                  struct scsi_device *sdev)
+                                  struct scsi_device *sdev,
+                                  struct scsi_cmnd *cmd)
 {
-       unsigned int busy;
-
        if (scsi_host_in_recovery(shost))
                return 0;
 
-       busy = atomic_inc_return(&shost->host_busy) - 1;
        if (atomic_read(&shost->host_blocked) > 0) {
-               if (busy)
+               if (scsi_host_busy(shost) > 0)
                        goto starved;
 
                /*
@@ -1390,8 +1389,6 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
                                     "unblocking host at zero depth\n"));
        }
 
-       if (shost->can_queue > 0 && busy >= shost->can_queue)
-               goto starved;
        if (shost->host_self_blocked)
                goto starved;
 
@@ -1403,6 +1400,8 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
                spin_unlock_irq(shost->host_lock);
        }
 
+       __set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
+
        return 1;
 
 starved:
@@ -1411,7 +1410,7 @@ starved:
                list_add_tail(&sdev->starved_entry, &shost->starved_list);
        spin_unlock_irq(shost->host_lock);
 out_dec:
-       scsi_dec_host_busy(shost);
+       scsi_dec_host_busy(shost, cmd);
        return 0;
 }
 
@@ -1665,7 +1664,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
        ret = BLK_STS_RESOURCE;
        if (!scsi_target_queue_ready(shost, sdev))
                goto out_put_budget;
-       if (!scsi_host_queue_ready(q, shost, sdev))
+       if (!scsi_host_queue_ready(q, shost, sdev, cmd))
                goto out_dec_target_busy;
 
        if (!(req->rq_flags & RQF_DONTPREP)) {
@@ -1697,7 +1696,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
        return BLK_STS_OK;
 
 out_dec_host_busy:
-       scsi_dec_host_busy(shost);
+       scsi_dec_host_busy(shost, cmd);
 out_dec_target_busy:
        if (scsi_target(sdev)->can_queue > 0)
                atomic_dec(&scsi_target(sdev)->target_busy);
index cc2859d..3bff9f7 100644 (file)
@@ -87,7 +87,7 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd);
 extern void scsi_add_cmd_to_list(struct scsi_cmnd *cmd);
 extern void scsi_del_cmd_from_list(struct scsi_cmnd *cmd);
 extern int scsi_maybe_unblock_host(struct scsi_device *sdev);
-extern void scsi_device_unbusy(struct scsi_device *sdev);
+extern void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd);
 extern void scsi_queue_insert(struct scsi_cmnd *cmd, int reason);
 extern void scsi_io_completion(struct scsi_cmnd *, unsigned int);
 extern void scsi_run_host_queues(struct Scsi_Host *shost);
index 91bd749..9c22e85 100644 (file)
@@ -63,6 +63,7 @@ struct scsi_pointer {
 
 /* for scmd->state */
 #define SCMD_STATE_COMPLETE    0
+#define SCMD_STATE_INFLIGHT    1
 
 struct scsi_cmnd {
        struct scsi_request req;
index 2c3f0c5..fccdf84 100644 (file)
@@ -345,7 +345,7 @@ struct scsi_host_template {
        /*
         * This determines if we will use a non-interrupt driven
         * or an interrupt driven scheme.  It is set to the maximum number
-        * of simultaneous commands a given host adapter will accept.
+        * of simultaneous commands a single hw queue in HBA will accept.
         */
        int can_queue;
 
@@ -554,7 +554,6 @@ struct Scsi_Host {
        /* Area to keep a shared tag map */
        struct blk_mq_tag_set   tag_set;
 
-       atomic_t host_busy;                /* commands actually active on low-level */
        atomic_t host_blocked;
 
        unsigned int host_failed;          /* commands that failed.