nvme-fabrics: reject I/O to offline device

author Victor Gladkov <Victor.Gladkov@kioxia.com>

Tue, 24 Nov 2020 18:34:59 +0000 (18:34 +0000)

committer Christoph Hellwig <hch@lst.de>

Tue, 1 Dec 2020 19:36:37 +0000 (20:36 +0100)
author Victor Gladkov <Victor.Gladkov@kioxia.com>
Tue, 24 Nov 2020 18:34:59 +0000 (18:34 +0000)
committer Christoph Hellwig <hch@lst.de>
Tue, 1 Dec 2020 19:36:37 +0000 (20:36 +0100)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c

index fff9020..9c1645f 100644 (file)
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -148,6 +148,38 @@ int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
  }
  EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
  
+static void nvme_failfast_work(struct work_struct *work)
+{
+       struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
+                       struct nvme_ctrl, failfast_work);
+
+       if (ctrl->state != NVME_CTRL_CONNECTING)
+               return;
+
+       set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+       dev_info(ctrl->device, "failfast expired\n");
+       nvme_kick_requeue_lists(ctrl);
+}
+
+static inline void nvme_start_failfast_work(struct nvme_ctrl *ctrl)
+{
+       if (!ctrl->opts || ctrl->opts->fast_io_fail_tmo == -1)
+               return;
+
+       schedule_delayed_work(&ctrl->failfast_work,
+                             ctrl->opts->fast_io_fail_tmo * HZ);
+}
+
+static inline void nvme_stop_failfast_work(struct nvme_ctrl *ctrl)
+{
+       if (!ctrl->opts)
+               return;
+
+       cancel_delayed_work_sync(&ctrl->failfast_work);
+       clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+}
+
+
  int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
  {
         if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -433,8 +465,17 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
         }
  
         spin_unlock_irqrestore(&ctrl->lock, flags);
-       if (changed && ctrl->state == NVME_CTRL_LIVE)
+       if (!changed)
+               return false;
+
+       if (ctrl->state == NVME_CTRL_LIVE) {
+               if (old_state == NVME_CTRL_CONNECTING)
+                       nvme_stop_failfast_work(ctrl);
                 nvme_kick_requeue_lists(ctrl);
+       } else if (ctrl->state == NVME_CTRL_CONNECTING &&
+               old_state == NVME_CTRL_RESETTING) {
+               nvme_start_failfast_work(ctrl);
+       }
         return changed;
  }
  EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
@@ -4372,6 +4413,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
  {
         nvme_mpath_stop(ctrl);
         nvme_stop_keep_alive(ctrl);
+       nvme_stop_failfast_work(ctrl);
         flush_work(&ctrl->async_event_work);
         cancel_work_sync(&ctrl->fw_act_work);
  }
@@ -4437,6 +4479,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
         int ret;
  
         ctrl->state = NVME_CTRL_NEW;
+       clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
         spin_lock_init(&ctrl->lock);
         mutex_init(&ctrl->scan_lock);
         INIT_LIST_HEAD(&ctrl->namespaces);
@@ -4453,6 +4496,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
         init_waitqueue_head(&ctrl->state_wq);
  
         INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
+       INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
         memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
         ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
  
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c

index 8575724..72ac001 100644 (file)
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -549,6 +549,7 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
  {
         if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
             ctrl->state != NVME_CTRL_DEAD &&
+           !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
             !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
                 return BLK_STS_RESOURCE;
  
@@ -615,6 +616,7 @@ static const match_table_t opt_tokens = {
         { NVMF_OPT_NR_WRITE_QUEUES,     "nr_write_queues=%d"    },
         { NVMF_OPT_NR_POLL_QUEUES,      "nr_poll_queues=%d"     },
         { NVMF_OPT_TOS,                 "tos=%d"                },
+       { NVMF_OPT_FAIL_FAST_TMO,       "fast_io_fail_tmo=%d"   },
         { NVMF_OPT_ERR,                 NULL                    }
  };
  
@@ -634,6 +636,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
         opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
         opts->kato = NVME_DEFAULT_KATO;
         opts->duplicate_connect = false;
+       opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO;
         opts->hdr_digest = false;
         opts->data_digest = false;
         opts->tos = -1; /* < 0 == use transport default */
@@ -754,6 +757,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                                 pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
                         ctrl_loss_tmo = token;
                         break;
+               case NVMF_OPT_FAIL_FAST_TMO:
+                       if (match_int(args, &token)) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+
+                       if (token >= 0)
+                               pr_warn("I/O fail on reconnect controller after %d sec\n",
+                                       token);
+                       opts->fast_io_fail_tmo = token;
+                       break;
                 case NVMF_OPT_HOSTNQN:
                         if (opts->host) {
                                 pr_err("hostnqn already user-assigned: %s\n",
@@ -884,11 +898,15 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                 opts->nr_poll_queues = 0;
                 opts->duplicate_connect = true;
         }
-       if (ctrl_loss_tmo < 0)
+       if (ctrl_loss_tmo < 0) {
                 opts->max_reconnects = -1;
-       else
+       } else {
                 opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
                                                 opts->reconnect_delay);
+               if (ctrl_loss_tmo < opts->fast_io_fail_tmo)
+                       pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n",
+                               opts->fast_io_fail_tmo, ctrl_loss_tmo);
+       }
  
         if (!opts->host) {
                 kref_get(&nvmf_default_host->ref);
@@ -988,7 +1006,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
  #define NVMF_ALLOWED_OPTS      (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
                                  NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
                                  NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
-                                NVMF_OPT_DISABLE_SQFLOW)
+                                NVMF_OPT_DISABLE_SQFLOW |\
+                                NVMF_OPT_FAIL_FAST_TMO)
  
  static struct nvme_ctrl *
  nvmf_create_ctrl(struct device *dev, const char *buf)
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h

index a9c1e3b..733010d 100644 (file)
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -15,6 +15,8 @@
  #define NVMF_DEF_RECONNECT_DELAY       10
  /* default to 600 seconds of reconnect attempts before giving up */
  #define NVMF_DEF_CTRL_LOSS_TMO         600
+/* default is -1: the fail fast mechanism is disabled  */
+#define NVMF_DEF_FAIL_FAST_TMO         -1
  
  /*
   * Define a host as seen by the target.  We allocate one at boot, but also
@@ -56,6 +58,7 @@ enum {
         NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
         NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
         NVMF_OPT_TOS            = 1 << 19,
+       NVMF_OPT_FAIL_FAST_TMO  = 1 << 20,
  };
  
  /**
@@ -89,6 +92,7 @@ enum {
   * @nr_write_queues: number of queues for write I/O
   * @nr_poll_queues: number of queues for polling I/O
   * @tos: type of service
+ * @fast_io_fail_tmo: Fast I/O fail timeout in seconds
   */
  struct nvmf_ctrl_options {
         unsigned                mask;
@@ -111,6 +115,7 @@ struct nvmf_ctrl_options {
         unsigned int            nr_write_queues;
         unsigned int            nr_poll_queues;
         int                     tos;
+       int                     fast_io_fail_tmo;
  };
  
  /*
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c

index 74896be..7169681 100644 (file)
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -279,6 +279,8 @@ static bool nvme_available_path(struct nvme_ns_head *head)
         struct nvme_ns *ns;
  
         list_for_each_entry_rcu(ns, &head->list, siblings) {
+               if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
+                       continue;
                 switch (ns->ctrl->state) {
                 case NVME_CTRL_LIVE:
                 case NVME_CTRL_RESETTING:
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h

index 83fb30e..ae017f7 100644 (file)
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -305,6 +305,7 @@ struct nvme_ctrl {
         struct work_struct scan_work;
         struct work_struct async_event_work;
         struct delayed_work ka_work;
+       struct delayed_work failfast_work;
         struct nvme_command ka_cmd;
         struct work_struct fw_act_work;
         unsigned long events;
@@ -338,6 +339,8 @@ struct nvme_ctrl {
         u16 icdoff;
         u16 maxcmd;
         int nr_reconnects;
+       unsigned long flags;
+#define NVME_CTRL_FAILFAST_EXPIRED     0
         struct nvmf_ctrl_options *opts;
  
         struct page *discard_page;
author	Victor Gladkov <Victor.Gladkov@kioxia.com>
	Tue, 24 Nov 2020 18:34:59 +0000 (18:34 +0000)
committer	Christoph Hellwig <hch@lst.de>
	Tue, 1 Dec 2020 19:36:37 +0000 (20:36 +0100)
drivers/nvme/host/core.c		patch \| blob \| history
drivers/nvme/host/fabrics.c		patch \| blob \| history
drivers/nvme/host/fabrics.h		patch \| blob \| history
drivers/nvme/host/multipath.c		patch \| blob \| history
drivers/nvme/host/nvme.h		patch \| blob \| history