Merge tag 'block-5.10-2020-10-24' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 24 Oct 2020 19:46:42 +0000 (12:46 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 24 Oct 2020 19:46:42 +0000 (12:46 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 24 Oct 2020 19:46:42 +0000 (12:46 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 24 Oct 2020 19:46:42 +0000 (12:46 -0700)
diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst

index f261a5c..2638d34 100644 (file)
--- a/Documentation/block/queue-sysfs.rst
+++ b/Documentation/block/queue-sysfs.rst
@@ -124,6 +124,10 @@ For zoned block devices (zoned attribute indicating "host-managed" or
  EXPLICIT OPEN, IMPLICIT OPEN or CLOSED, is limited by this value.
  If this value is 0, there is no limit.
  
+If the host attempts to exceed this limit, the driver should report this error
+with BLK_STS_ZONE_ACTIVE_RESOURCE, which user space may see as the EOVERFLOW
+errno.
+
  max_open_zones (RO)
  -------------------
  For zoned block devices (zoned attribute indicating "host-managed" or
@@ -131,6 +135,10 @@ For zoned block devices (zoned attribute indicating "host-managed" or
  EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
  If this value is 0, there is no limit.
  
+If the host attempts to exceed this limit, the driver should report this error
+with BLK_STS_ZONE_OPEN_RESOURCE, which user space may see as the ETOOMANYREFS
+errno.
+
  max_sectors_kb (RW)
  -------------------
  This is the maximum number of kilobytes that the block layer will allow
diff --git a/block/blk-core.c b/block/blk-core.c

index ac00d2f..2db8bda 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -186,6 +186,10 @@ static const struct {
         /* device mapper special case, should not leak out: */
         [BLK_STS_DM_REQUEUE]    = { -EREMCHG, "dm internal retry" },
  
+       /* zone device specific errors */
+       [BLK_STS_ZONE_OPEN_RESOURCE]    = { -ETOOMANYREFS, "open zones exceeded" },
+       [BLK_STS_ZONE_ACTIVE_RESOURCE]  = { -EOVERFLOW, "active zones exceeded" },
+
         /* everything else not covered above: */
         [BLK_STS_IOERR]         = { -EIO,       "I/O" },
  };
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c

index 0157f2b..3db84d3 100644 (file)
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -89,7 +89,7 @@ int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index)
  
         for_each_possible_cpu(i) {
                 if (index == qmap->mq_map[i])
-                       return local_memory_node(cpu_to_node(i));
+                       return cpu_to_node(i);
         }
  
         return NUMA_NO_NODE;
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 6964502..55bcee5 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1664,7 +1664,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
  EXPORT_SYMBOL(blk_mq_run_hw_queue);
  
  /**
- * blk_mq_run_hw_queue - Run all hardware queues in a request queue.
+ * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
   * @q: Pointer to the request queue to run.
   * @async: If we want to run the queue asynchronously.
   */
@@ -2743,7 +2743,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
                 for (j = 0; j < set->nr_maps; j++) {
                         hctx = blk_mq_map_queue_type(q, j, i);
                         if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
-                               hctx->numa_node = local_memory_node(cpu_to_node(i));
+                               hctx->numa_node = cpu_to_node(i);
                 }
         }
  }
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c

index 3c9485a..0bed21c 100644 (file)
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -802,9 +802,9 @@ static void recv_work(struct work_struct *work)
                 if (likely(!blk_should_fake_timeout(rq->q)))
                         blk_mq_complete_request(rq);
         }
+       nbd_config_put(nbd);
         atomic_dec(&config->recv_threads);
         wake_up(&config->recv_wq);
-       nbd_config_put(nbd);
         kfree(args);
  }
  
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c

index fa0cc70..7d94f2d 100644 (file)
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -220,29 +220,34 @@ static void null_close_first_imp_zone(struct nullb_device *dev)
         }
  }
  
-static bool null_can_set_active(struct nullb_device *dev)
+static blk_status_t null_check_active(struct nullb_device *dev)
  {
         if (!dev->zone_max_active)
-               return true;
+               return BLK_STS_OK;
+
+       if (dev->nr_zones_exp_open + dev->nr_zones_imp_open +
+                       dev->nr_zones_closed < dev->zone_max_active)
+               return BLK_STS_OK;
  
-       return dev->nr_zones_exp_open + dev->nr_zones_imp_open +
-              dev->nr_zones_closed < dev->zone_max_active;
+       return BLK_STS_ZONE_ACTIVE_RESOURCE;
  }
  
-static bool null_can_open(struct nullb_device *dev)
+static blk_status_t null_check_open(struct nullb_device *dev)
  {
         if (!dev->zone_max_open)
-               return true;
+               return BLK_STS_OK;
  
         if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open)
-               return true;
+               return BLK_STS_OK;
  
-       if (dev->nr_zones_imp_open && null_can_set_active(dev)) {
-               null_close_first_imp_zone(dev);
-               return true;
+       if (dev->nr_zones_imp_open) {
+               if (null_check_active(dev) == BLK_STS_OK) {
+                       null_close_first_imp_zone(dev);
+                       return BLK_STS_OK;
+               }
         }
  
-       return false;
+       return BLK_STS_ZONE_OPEN_RESOURCE;
  }
  
  /*
@@ -258,19 +263,22 @@ static bool null_can_open(struct nullb_device *dev)
   * it is not certain that closing an implicit open zone will allow a new zone
   * to be opened, since we might already be at the active limit capacity.
   */
-static bool null_has_zone_resources(struct nullb_device *dev, struct blk_zone *zone)
+static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone)
  {
+       blk_status_t ret;
+
         switch (zone->cond) {
         case BLK_ZONE_COND_EMPTY:
-               if (!null_can_set_active(dev))
-                       return false;
+               ret = null_check_active(dev);
+               if (ret != BLK_STS_OK)
+                       return ret;
                 fallthrough;
         case BLK_ZONE_COND_CLOSED:
-               return null_can_open(dev);
+               return null_check_open(dev);
         default:
                 /* Should never be called for other states */
                 WARN_ON(1);
-               return false;
+               return BLK_STS_IOERR;
         }
  }
  
@@ -293,8 +301,9 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
                 return BLK_STS_IOERR;
         case BLK_ZONE_COND_EMPTY:
         case BLK_ZONE_COND_CLOSED:
-               if (!null_has_zone_resources(dev, zone))
-                       return BLK_STS_IOERR;
+               ret = null_check_zone_resources(dev, zone);
+               if (ret != BLK_STS_OK)
+                       return ret;
                 break;
         case BLK_ZONE_COND_IMP_OPEN:
         case BLK_ZONE_COND_EXP_OPEN:
@@ -349,6 +358,8 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
  
  static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone)
  {
+       blk_status_t ret;
+
         if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
                 return BLK_STS_IOERR;
  
@@ -357,15 +368,17 @@ static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zo
                 /* open operation on exp open is not an error */
                 return BLK_STS_OK;
         case BLK_ZONE_COND_EMPTY:
-               if (!null_has_zone_resources(dev, zone))
-                       return BLK_STS_IOERR;
+               ret = null_check_zone_resources(dev, zone);
+               if (ret != BLK_STS_OK)
+                       return ret;
                 break;
         case BLK_ZONE_COND_IMP_OPEN:
                 dev->nr_zones_imp_open--;
                 break;
         case BLK_ZONE_COND_CLOSED:
-               if (!null_has_zone_resources(dev, zone))
-                       return BLK_STS_IOERR;
+               ret = null_check_zone_resources(dev, zone);
+               if (ret != BLK_STS_OK)
+                       return ret;
                 dev->nr_zones_closed--;
                 break;
         case BLK_ZONE_COND_FULL:
@@ -381,6 +394,8 @@ static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zo
  
  static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone)
  {
+       blk_status_t ret;
+
         if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
                 return BLK_STS_IOERR;
  
@@ -389,8 +404,9 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *
                 /* finish operation on full is not an error */
                 return BLK_STS_OK;
         case BLK_ZONE_COND_EMPTY:
-               if (!null_has_zone_resources(dev, zone))
-                       return BLK_STS_IOERR;
+               ret = null_check_zone_resources(dev, zone);
+               if (ret != BLK_STS_OK)
+                       return ret;
                 break;
         case BLK_ZONE_COND_IMP_OPEN:
                 dev->nr_zones_imp_open--;
@@ -399,8 +415,9 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *
                 dev->nr_zones_exp_open--;
                 break;
         case BLK_ZONE_COND_CLOSED:
-               if (!null_has_zone_resources(dev, zone))
-                       return BLK_STS_IOERR;
+               ret = null_check_zone_resources(dev, zone);
+               if (ret != BLK_STS_OK)
+                       return ret;
                 dev->nr_zones_closed--;
                 break;
         default:
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c

index d7a6974..8b2411c 100644 (file)
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -91,11 +91,6 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
         dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE;
         dev->max_segments = BMAX_SEGMENTS;
  
-       dev->max_hw_sectors = min_t(u32, dev->max_hw_sectors,
-                                   le32_to_cpu(rsp->max_hw_sectors));
-       dev->max_segments = min_t(u16, dev->max_segments,
-                                 le16_to_cpu(rsp->max_segments));
-
         return 0;
  }
  
@@ -427,7 +422,7 @@ enum wait_type {
  };
  
  static int send_usr_msg(struct rtrs_clt *rtrs, int dir,
-                       struct rnbd_iu *iu, struct kvec *vec, size_t nr,
+                       struct rnbd_iu *iu, struct kvec *vec,
                         size_t len, struct scatterlist *sg, unsigned int sg_len,
                         void (*conf)(struct work_struct *work),
                         int *errno, enum wait_type wait)
@@ -441,7 +436,7 @@ static int send_usr_msg(struct rtrs_clt *rtrs, int dir,
                 .conf_fn = msg_conf,
         };
         err = rtrs_clt_request(dir, &req_ops, rtrs, iu->permit,
-                               vec, nr, len, sg, sg_len);
+                               vec, 1, len, sg, sg_len);
         if (!err && wait) {
                 wait_event(iu->comp.wait, iu->comp.errno != INT_MAX);
                 *errno = iu->comp.errno;
@@ -486,7 +481,7 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait)
         msg.device_id   = cpu_to_le32(device_id);
  
         WARN_ON(!rnbd_clt_get_dev(dev));
-       err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 1, 0, NULL, 0,
+       err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 0, NULL, 0,
                            msg_close_conf, &errno, wait);
         if (err) {
                 rnbd_clt_put_dev(dev);
@@ -575,7 +570,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait)
  
         WARN_ON(!rnbd_clt_get_dev(dev));
         err = send_usr_msg(sess->rtrs, READ, iu,
-                          &vec, 1, sizeof(*rsp), iu->sglist, 1,
+                          &vec, sizeof(*rsp), iu->sglist, 1,
                            msg_open_conf, &errno, wait);
         if (err) {
                 rnbd_clt_put_dev(dev);
@@ -629,7 +624,7 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait)
                 goto put_iu;
         }
         err = send_usr_msg(sess->rtrs, READ, iu,
-                          &vec, 1, sizeof(*rsp), iu->sglist, 1,
+                          &vec, sizeof(*rsp), iu->sglist, 1,
                            msg_sess_info_conf, &errno, wait);
         if (err) {
                 rnbd_clt_put_sess(sess);
@@ -1514,7 +1509,7 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
                               "map_device: Failed to configure device, err: %d\n",
                               ret);
                 mutex_unlock(&dev->lock);
-               goto del_dev;
+               goto send_close;
         }
  
         rnbd_clt_info(dev,
@@ -1533,6 +1528,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
  
         return dev;
  
+send_close:
+       send_msg_close(dev, dev->device_id, WAIT);
  del_dev:
         delete_dev(dev);
  put_dev:
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c

index ae6454c..a962b45 100644 (file)
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -25,7 +25,6 @@
  #include <linux/dma-mapping.h>
  #include <linux/completion.h>
  #include <linux/scatterlist.h>
-#include <linux/version.h>
  #include <linux/err.h>
  #include <linux/aer.h>
  #include <linux/wait.h>
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c

index 029403c..1b69720 100644 (file)
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1218,10 +1218,11 @@ out:
  static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
                                 struct bio *bio, bool partial_io)
  {
-       int ret;
+       struct zcomp_strm *zstrm;
         unsigned long handle;
         unsigned int size;
         void *src, *dst;
+       int ret;
  
         zram_slot_lock(zram, index);
         if (zram_test_flag(zram, index, ZRAM_WB)) {
@@ -1252,6 +1253,9 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
  
         size = zram_get_obj_size(zram, index);
  
+       if (size != PAGE_SIZE)
+               zstrm = zcomp_stream_get(zram->comp);
+
         src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
         if (size == PAGE_SIZE) {
                 dst = kmap_atomic(page);
@@ -1259,8 +1263,6 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
                 kunmap_atomic(dst);
                 ret = 0;
         } else {
-               struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
-
                 dst = kmap_atomic(page);
                 ret = zcomp_decompress(zstrm, src, size, dst);
                 kunmap_atomic(dst);
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c

index fe78bf0..c1bcac7 100644 (file)
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -1311,8 +1311,9 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
                 strlcpy(info->bmname, "gennvm", sizeof(info->bmname));
                 i++;
  
-               if (i > 31) {
-                       pr_err("max 31 devices can be reported.\n");
+               if (i >= ARRAY_SIZE(devices->info)) {
+                       pr_err("max %zd devices can be reported.\n",
+                              ARRAY_SIZE(devices->info));
                         break;
                 }
         }
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c

index 56e2a22..95ef494 100644 (file)
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -248,6 +248,10 @@ static blk_status_t nvme_error_status(u16 status)
                 return BLK_STS_NEXUS;
         case NVME_SC_HOST_PATH_ERROR:
                 return BLK_STS_TRANSPORT;
+       case NVME_SC_ZONE_TOO_MANY_ACTIVE:
+               return BLK_STS_ZONE_ACTIVE_RESOURCE;
+       case NVME_SC_ZONE_TOO_MANY_OPEN:
+               return BLK_STS_ZONE_OPEN_RESOURCE;
         default:
                 return BLK_STS_IOERR;
         }
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c

index e2e09e2..3c002bd 100644 (file)
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -26,6 +26,10 @@ enum nvme_fc_queue_flags {
  };
  
  #define NVME_FC_DEFAULT_DEV_LOSS_TMO   60      /* seconds */
+#define NVME_FC_DEFAULT_RECONNECT_TMO  2       /* delay between reconnects
+                                                * when connected and a
+                                                * connection failure.
+                                                */
  
  struct nvme_fc_queue {
         struct nvme_fc_ctrl     *ctrl;
@@ -1837,8 +1841,10 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
         opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
         if (opstate != FCPOP_STATE_ACTIVE)
                 atomic_set(&op->state, opstate);
-       else if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
+       else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
+               op->flags |= FCOP_FLAGS_TERMIO;
                 ctrl->iocnt++;
+       }
         spin_unlock_irqrestore(&ctrl->lock, flags);
  
         if (opstate != FCPOP_STATE_ACTIVE)
@@ -1874,7 +1880,8 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
  
         if (opstate == FCPOP_STATE_ABORTED) {
                 spin_lock_irqsave(&ctrl->lock, flags);
-               if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
+               if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
+                   op->flags & FCOP_FLAGS_TERMIO) {
                         if (!--ctrl->iocnt)
                                 wake_up(&ctrl->ioabort_wait);
                 }
@@ -2314,7 +2321,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
         return 0;
  
  delete_queues:
-       for (; i >= 0; i--)
+       for (; i > 0; i--)
                 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
         return ret;
  }
@@ -2433,7 +2440,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
                 return;
  
         dev_warn(ctrl->ctrl.device,
-               "NVME-FC{%d}: transport association error detected: %s\n",
+               "NVME-FC{%d}: transport association event: %s\n",
                 ctrl->cnum, errmsg);
         dev_warn(ctrl->ctrl.device,
                 "NVME-FC{%d}: resetting controller\n", ctrl->cnum);
@@ -2446,15 +2453,20 @@ nvme_fc_timeout(struct request *rq, bool reserved)
  {
         struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
         struct nvme_fc_ctrl *ctrl = op->ctrl;
+       struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
+       struct nvme_command *sqe = &cmdiu->sqe;
  
         /*
-        * we can't individually ABTS an io without affecting the queue,
-        * thus killing the queue, and thus the association.
-        * So resolve by performing a controller reset, which will stop
-        * the host/io stack, terminate the association on the link,
-        * and recreate an association on the link.
+        * Attempt to abort the offending command. Command completion
+        * will detect the aborted io and will fail the connection.
          */
-       nvme_fc_error_recovery(ctrl, "io timeout error");
+       dev_info(ctrl->ctrl.device,
+               "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
+               "x%08x/x%08x\n",
+               ctrl->cnum, op->queue->qnum, sqe->common.opcode,
+               sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
+       if (__nvme_fc_abort_op(ctrl, op))
+               nvme_fc_error_recovery(ctrl, "io timeout abort failed");
  
         /*
          * the io abort has been initiated. Have the reset timer
@@ -2726,6 +2738,7 @@ nvme_fc_complete_rq(struct request *rq)
         struct nvme_fc_ctrl *ctrl = op->ctrl;
  
         atomic_set(&op->state, FCPOP_STATE_IDLE);
+       op->flags &= ~FCOP_FLAGS_TERMIO;
  
         nvme_fc_unmap_data(ctrl, rq, op);
         nvme_complete_rq(rq);
@@ -2876,11 +2889,14 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
         if (ret)
                 goto out_delete_hw_queues;
  
-       if (prior_ioq_cnt != nr_io_queues)
+       if (prior_ioq_cnt != nr_io_queues) {
                 dev_info(ctrl->ctrl.device,
                         "reconnect: revising io queue count from %d to %d\n",
                         prior_ioq_cnt, nr_io_queues);
-       blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
+               nvme_wait_freeze(&ctrl->ctrl);
+               blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
+               nvme_unfreeze(&ctrl->ctrl);
+       }
  
         return 0;
  
@@ -3090,26 +3106,19 @@ out_free_queue:
         return ret;
  }
  
+
  /*
- * This routine stops operation of the controller on the host side.
- * On the host os stack side: Admin and IO queues are stopped,
- *   outstanding ios on them terminated via FC ABTS.
- * On the link side: the association is terminated.
+ * This routine runs through all outstanding commands on the association
+ * and aborts them.  This routine is typically be called by the
+ * delete_association routine. It is also called due to an error during
+ * reconnect. In that scenario, it is most likely a command that initializes
+ * the controller, including fabric Connect commands on io queues, that
+ * may have timed out or failed thus the io must be killed for the connect
+ * thread to see the error.
   */
  static void
-nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
+__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
  {
-       struct nvmefc_ls_rcv_op *disls = NULL;
-       unsigned long flags;
-
-       if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
-               return;
-
-       spin_lock_irqsave(&ctrl->lock, flags);
-       set_bit(FCCTRL_TERMIO, &ctrl->flags);
-       ctrl->iocnt = 0;
-       spin_unlock_irqrestore(&ctrl->lock, flags);
-
         /*
          * If io queues are present, stop them and terminate all outstanding
          * ios on them. As FC allocates FC exchange for each io, the
@@ -3127,6 +3136,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
                 blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                 nvme_fc_terminate_exchange, &ctrl->ctrl);
                 blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
+               if (start_queues)
+                       nvme_start_queues(&ctrl->ctrl);
         }
  
         /*
@@ -3143,13 +3154,34 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
  
         /*
          * clean up the admin queue. Same thing as above.
-        * use blk_mq_tagset_busy_itr() and the transport routine to
-        * terminate the exchanges.
          */
         blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
         blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                 nvme_fc_terminate_exchange, &ctrl->ctrl);
         blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
+}
+
+/*
+ * This routine stops operation of the controller on the host side.
+ * On the host os stack side: Admin and IO queues are stopped,
+ *   outstanding ios on them terminated via FC ABTS.
+ * On the link side: the association is terminated.
+ */
+static void
+nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
+{
+       struct nvmefc_ls_rcv_op *disls = NULL;
+       unsigned long flags;
+
+       if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
+               return;
+
+       spin_lock_irqsave(&ctrl->lock, flags);
+       set_bit(FCCTRL_TERMIO, &ctrl->flags);
+       ctrl->iocnt = 0;
+       spin_unlock_irqrestore(&ctrl->lock, flags);
+
+       __nvme_fc_abort_outstanding_ios(ctrl, false);
  
         /* kill the aens as they are a separate path */
         nvme_fc_abort_aen_ops(ctrl);
@@ -3263,22 +3295,27 @@ static void
  __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
  {
         /*
-        * if state is connecting - the error occurred as part of a
-        * reconnect attempt. The create_association error paths will
-        * clean up any outstanding io.
-        *
-        * if it's a different state - ensure all pending io is
-        * terminated. Given this can delay while waiting for the
-        * aborted io to return, we recheck adapter state below
-        * before changing state.
+        * if state is CONNECTING - the error occurred as part of a
+        * reconnect attempt. Abort any ios on the association and
+        * let the create_association error paths resolve things.
          */
-       if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
-               nvme_stop_keep_alive(&ctrl->ctrl);
-
-               /* will block will waiting for io to terminate */
-               nvme_fc_delete_association(ctrl);
+       if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
+               __nvme_fc_abort_outstanding_ios(ctrl, true);
+               return;
         }
  
+       /*
+        * For any other state, kill the association. As this routine
+        * is a common io abort routine for resetting and such, after
+        * the association is terminated, ensure that the state is set
+        * to CONNECTING.
+        */
+
+       nvme_stop_keep_alive(&ctrl->ctrl);
+
+       /* will block will waiting for io to terminate */
+       nvme_fc_delete_association(ctrl);
+
         if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
             !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
                 dev_err(ctrl->ctrl.device,
@@ -3403,7 +3440,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
  {
         struct nvme_fc_ctrl *ctrl;
         unsigned long flags;
-       int ret, idx;
+       int ret, idx, ctrl_loss_tmo;
  
         if (!(rport->remoteport.port_role &
             (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@ -3429,6 +3466,19 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
                 goto out_free_ctrl;
         }
  
+       /*
+        * if ctrl_loss_tmo is being enforced and the default reconnect delay
+        * is being used, change to a shorter reconnect delay for FC.
+        */
+       if (opts->max_reconnects != -1 &&
+           opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY &&
+           opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) {
+               ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay;
+               opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO;
+               opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
+                                               opts->reconnect_delay);
+       }
+
         ctrl->ctrl.opts = opts;
         ctrl->ctrl.nr_reconnects = 0;
         if (lport->dev)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h

index e7c88b4..cc11113 100644 (file)
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -176,7 +176,7 @@ static inline struct nvme_request *nvme_req(struct request *req)
  
  static inline u16 nvme_req_qid(struct request *req)
  {
-       if (!req->rq_disk)
+       if (!req->q->queuedata)
                 return 0;
         return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1;
  }
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index e5b0224..df8f361 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3185,6 +3185,8 @@ static const struct pci_device_id nvme_id_table[] = {
                                 NVME_QUIRK_IGNORE_DEV_SUBNQN, },
         { PCI_DEVICE(0x1c5c, 0x1504),   /* SK Hynix PC400 */
                 .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+       { PCI_DEVICE(0x15b7, 0x2001),   /*  Sandisk Skyhawk */
+               .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
         { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
                 .driver_data = NVME_QUIRK_SINGLE_VECTOR },
         { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c

index 9e378d0..aad829a 100644 (file)
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1730,10 +1730,11 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
         req->result = cqe->result;
  
         if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
-               if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) {
+               if (unlikely(!req->mr ||
+                            wc->ex.invalidate_rkey != req->mr->rkey)) {
                         dev_err(queue->ctrl->ctrl.device,
                                 "Bogus remote invalidation for rkey %#x\n",
-                               req->mr->rkey);
+                               req->mr ? req->mr->rkey : 0);
                         nvme_rdma_error_recovery(queue->ctrl);
                 }
         } else if (req->mr) {
@@ -1926,7 +1927,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
                 complete(&queue->cm_done);
                 return 0;
         case RDMA_CM_EVENT_REJECTED:
-               nvme_rdma_destroy_queue_ib(queue);
                 cm_error = nvme_rdma_conn_rejected(queue, ev);
                 break;
         case RDMA_CM_EVENT_ROUTE_ERROR:
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c

index 25d62d8..aafcbc4 100644 (file)
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1126,7 +1126,8 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
          * in case a host died before it enabled the controller.  Hence, simply
          * reset the keep alive timer when the controller is enabled.
          */
-       mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
+       if (ctrl->kato)
+               mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
  }
  
  static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c

index 56c5710..8ee94f0 100644 (file)
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -26,7 +26,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
         struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
         u16 status = NVME_SC_SUCCESS;
         struct nvme_id_ctrl *id;
-       u32 max_hw_sectors;
+       int max_hw_sectors;
         int page_shift;
  
         id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -48,6 +48,13 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
         max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
                                       pctrl->max_hw_sectors);
  
+       /*
+        * nvmet_passthru_map_sg is limitted to using a single bio so limit
+        * the mdts based on BIO_MAX_PAGES as well
+        */
+       max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9),
+                                     max_hw_sectors);
+
         page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
  
         id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
@@ -180,18 +187,20 @@ static void nvmet_passthru_req_done(struct request *rq,
  
  static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
  {
-       int sg_cnt = req->sg_cnt;
         struct scatterlist *sg;
         int op_flags = 0;
         struct bio *bio;
         int i, ret;
  
+       if (req->sg_cnt > BIO_MAX_PAGES)
+               return -EINVAL;
+
         if (req->cmd->common.opcode == nvme_cmd_flush)
                 op_flags = REQ_FUA;
         else if (nvme_is_write(req->cmd))
                 op_flags = REQ_SYNC | REQ_IDLE;
  
-       bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+       bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
         bio->bi_end_io = bio_put;
         bio->bi_opf = req_op(rq) | op_flags;
  
@@ -201,7 +210,6 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
                         bio_put(bio);
                         return -EINVAL;
                 }
-               sg_cnt--;
         }
  
         ret = blk_rq_append_bio(rq, &bio);
@@ -236,7 +244,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
                 q = ns->queue;
         }
  
-       rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
+       rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY);
         if (IS_ERR(rq)) {
                 status = NVME_SC_INTERNAL;
                 goto out_put_ns;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c

index ab676ce..60c7a7d 100644 (file)
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -777,6 +777,15 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
                         /* See SSC3rXX or current. */
                         action = ACTION_FAIL;
                         break;
+               case DATA_PROTECT:
+                       action = ACTION_FAIL;
+                       if ((sshdr.asc == 0x0C && sshdr.ascq == 0x12) ||
+                           (sshdr.asc == 0x55 &&
+                            (sshdr.ascq == 0x0E || sshdr.ascq == 0x0F))) {
+                               /* Insufficient zone resources */
+                               blk_stat = BLK_STS_ZONE_OPEN_RESOURCE;
+                       }
+                       break;
                 default:
                         action = ACTION_FAIL;
                         break;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h

index 7d7c132..d9b69bb 100644 (file)
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -104,6 +104,24 @@ typedef u8 __bitwise blk_status_t;
   */
  #define BLK_STS_ZONE_RESOURCE  ((__force blk_status_t)14)
  
+/*
+ * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion
+ * path if the device returns a status indicating that too many zone resources
+ * are currently open. The same command should be successful if resubmitted
+ * after the number of open zones decreases below the device's limits, which is
+ * reported in the request_queue's max_open_zones.
+ */
+#define BLK_STS_ZONE_OPEN_RESOURCE     ((__force blk_status_t)15)
+
+/*
+ * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion
+ * path if the device returns a status indicating that too many zone resources
+ * are currently active. The same command should be successful if resubmitted
+ * after the number of active zones decreases below the device's limits, which
+ * is reported in the request_queue's max_active_zones.
+ */
+#define BLK_STS_ZONE_ACTIVE_RESOURCE   ((__force blk_status_t)16)
+
  /**
   * blk_path_error - returns true if error may be path related
   * @error: status the request was completed with
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h

index 1dcd919..0a9dc40 100644 (file)
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -106,12 +106,6 @@ struct io_context {
  
         unsigned short ioprio;
  
-       /*
-        * For request batching
-        */
-       int nr_batch_requests;     /* Number of requests left in the batch */
-       unsigned long last_waited; /* Time last woken after wait for request */
-
         struct radix_tree_root  icq_tree;
         struct io_cq __rcu      *icq_hint;
         struct hlist_head       icq_list;
diff --git a/lib/scatterlist.c b/lib/scatterlist.c

index 9a4992d..0a482ef 100644 (file)
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -595,7 +595,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length,
                 elem_len = min_t(u64, length, PAGE_SIZE << order);
                 page = alloc_pages(gfp, order);
                 if (!page) {
-                       sgl_free(sgl);
+                       sgl_free_order(sgl, order);
                         return NULL;
                 }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 24 Oct 2020 19:46:42 +0000 (12:46 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 24 Oct 2020 19:46:42 +0000 (12:46 -0700)
Documentation/block/queue-sysfs.rst		patch \| blob \| history
block/blk-core.c		patch \| blob \| history
block/blk-mq-cpumap.c		patch \| blob \| history
block/blk-mq.c		patch \| blob \| history
drivers/block/nbd.c		patch \| blob \| history
drivers/block/null_blk_zoned.c		patch \| blob \| history
drivers/block/rnbd/rnbd-clt.c		patch \| blob \| history
drivers/block/skd_main.c		patch \| blob \| history
drivers/block/zram/zram_drv.c		patch \| blob \| history
drivers/lightnvm/core.c		patch \| blob \| history
drivers/nvme/host/core.c		patch \| blob \| history
drivers/nvme/host/fc.c		patch \| blob \| history
drivers/nvme/host/nvme.h		patch \| blob \| history
drivers/nvme/host/pci.c		patch \| blob \| history
drivers/nvme/host/rdma.c		patch \| blob \| history
drivers/nvme/target/core.c		patch \| blob \| history
drivers/nvme/target/passthru.c		patch \| blob \| history
drivers/scsi/scsi_lib.c		patch \| blob \| history
include/linux/blk_types.h		patch \| blob \| history
include/linux/iocontext.h		patch \| blob \| history
lib/scatterlist.c		patch \| blob \| history