Merge tag 'block-5.10-2020-10-24' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 24 Oct 2020 19:46:42 +0000 (12:46 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 24 Oct 2020 19:46:42 +0000 (12:46 -0700)
Pull block fixes from Jens Axboe:

 - NVMe pull request from Christoph
     - rdma error handling fixes (Chao Leng)
     - fc error handling and reconnect fixes (James Smart)
     - fix the qid displace when tracing ioctl command (Keith Busch)
     - don't use BLK_MQ_REQ_NOWAIT for passthru (Chaitanya Kulkarni)
     - fix MTDT for passthru (Logan Gunthorpe)
     - blacklist Write Same on more devices (Kai-Heng Feng)
     - fix an uninitialized work struct (zhenwei pi)"

 - lightnvm out-of-bounds fix (Colin)

 - SG allocation leak fix (Doug)

 - rnbd fixes (Gioh, Guoqing, Jack)

 - zone error translation fixes (Keith)

 - kerneldoc markup fix (Mauro)

 - zram lockdep fix (Peter)

 - Kill unused io_context members (Yufen)

 - NUMA memory allocation cleanup (Xianting)

 - NBD config wakeup fix (Xiubo)

* tag 'block-5.10-2020-10-24' of git://git.kernel.dk/linux-block: (27 commits)
  block: blk-mq: fix a kernel-doc markup
  nvme-fc: shorten reconnect delay if possible for FC
  nvme-fc: wait for queues to freeze before calling update_hr_hw_queues
  nvme-fc: fix error loop in create_hw_io_queues
  nvme-fc: fix io timeout to abort I/O
  null_blk: use zone status for max active/open
  nvmet: don't use BLK_MQ_REQ_NOWAIT for passthru
  nvmet: cleanup nvmet_passthru_map_sg()
  nvmet: limit passthru MTDS by BIO_MAX_PAGES
  nvmet: fix uninitialized work for zero kato
  nvme-pci: disable Write Zeroes on Sandisk Skyhawk
  nvme: use queuedata for nvme_req_qid
  nvme-rdma: fix crash due to incorrect cqe
  nvme-rdma: fix crash when connect rejected
  block: remove unused members for io_context
  blk-mq: remove the calling of local_memory_node()
  zram: Fix __zram_bvec_{read,write}() locking order
  skd_main: remove unused including <linux/version.h>
  sgl_alloc_order: fix memory leak
  lightnvm: fix out-of-bounds write to array devices->info[]
  ...

21 files changed:
Documentation/block/queue-sysfs.rst
block/blk-core.c
block/blk-mq-cpumap.c
block/blk-mq.c
drivers/block/nbd.c
drivers/block/null_blk_zoned.c
drivers/block/rnbd/rnbd-clt.c
drivers/block/skd_main.c
drivers/block/zram/zram_drv.c
drivers/lightnvm/core.c
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/target/core.c
drivers/nvme/target/passthru.c
drivers/scsi/scsi_lib.c
include/linux/blk_types.h
include/linux/iocontext.h
lib/scatterlist.c

index f261a5c..2638d34 100644 (file)
@@ -124,6 +124,10 @@ For zoned block devices (zoned attribute indicating "host-managed" or
 EXPLICIT OPEN, IMPLICIT OPEN or CLOSED, is limited by this value.
 If this value is 0, there is no limit.
 
+If the host attempts to exceed this limit, the driver should report this error
+with BLK_STS_ZONE_ACTIVE_RESOURCE, which user space may see as the EOVERFLOW
+errno.
+
 max_open_zones (RO)
 -------------------
 For zoned block devices (zoned attribute indicating "host-managed" or
@@ -131,6 +135,10 @@ For zoned block devices (zoned attribute indicating "host-managed" or
 EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
 If this value is 0, there is no limit.
 
+If the host attempts to exceed this limit, the driver should report this error
+with BLK_STS_ZONE_OPEN_RESOURCE, which user space may see as the ETOOMANYREFS
+errno.
+
 max_sectors_kb (RW)
 -------------------
 This is the maximum number of kilobytes that the block layer will allow
index ac00d2f..2db8bda 100644 (file)
@@ -186,6 +186,10 @@ static const struct {
        /* device mapper special case, should not leak out: */
        [BLK_STS_DM_REQUEUE]    = { -EREMCHG, "dm internal retry" },
 
+       /* zone device specific errors */
+       [BLK_STS_ZONE_OPEN_RESOURCE]    = { -ETOOMANYREFS, "open zones exceeded" },
+       [BLK_STS_ZONE_ACTIVE_RESOURCE]  = { -EOVERFLOW, "active zones exceeded" },
+
        /* everything else not covered above: */
        [BLK_STS_IOERR]         = { -EIO,       "I/O" },
 };
index 0157f2b..3db84d3 100644 (file)
@@ -89,7 +89,7 @@ int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index)
 
        for_each_possible_cpu(i) {
                if (index == qmap->mq_map[i])
-                       return local_memory_node(cpu_to_node(i));
+                       return cpu_to_node(i);
        }
 
        return NUMA_NO_NODE;
index 6964502..55bcee5 100644 (file)
@@ -1664,7 +1664,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 EXPORT_SYMBOL(blk_mq_run_hw_queue);
 
 /**
- * blk_mq_run_hw_queue - Run all hardware queues in a request queue.
+ * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
  * @q: Pointer to the request queue to run.
  * @async: If we want to run the queue asynchronously.
  */
@@ -2743,7 +2743,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
                for (j = 0; j < set->nr_maps; j++) {
                        hctx = blk_mq_map_queue_type(q, j, i);
                        if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
-                               hctx->numa_node = local_memory_node(cpu_to_node(i));
+                               hctx->numa_node = cpu_to_node(i);
                }
        }
 }
index 3c9485a..0bed21c 100644 (file)
@@ -802,9 +802,9 @@ static void recv_work(struct work_struct *work)
                if (likely(!blk_should_fake_timeout(rq->q)))
                        blk_mq_complete_request(rq);
        }
+       nbd_config_put(nbd);
        atomic_dec(&config->recv_threads);
        wake_up(&config->recv_wq);
-       nbd_config_put(nbd);
        kfree(args);
 }
 
index fa0cc70..7d94f2d 100644 (file)
@@ -220,29 +220,34 @@ static void null_close_first_imp_zone(struct nullb_device *dev)
        }
 }
 
-static bool null_can_set_active(struct nullb_device *dev)
+static blk_status_t null_check_active(struct nullb_device *dev)
 {
        if (!dev->zone_max_active)
-               return true;
+               return BLK_STS_OK;
+
+       if (dev->nr_zones_exp_open + dev->nr_zones_imp_open +
+                       dev->nr_zones_closed < dev->zone_max_active)
+               return BLK_STS_OK;
 
-       return dev->nr_zones_exp_open + dev->nr_zones_imp_open +
-              dev->nr_zones_closed < dev->zone_max_active;
+       return BLK_STS_ZONE_ACTIVE_RESOURCE;
 }
 
-static bool null_can_open(struct nullb_device *dev)
+static blk_status_t null_check_open(struct nullb_device *dev)
 {
        if (!dev->zone_max_open)
-               return true;
+               return BLK_STS_OK;
 
        if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open)
-               return true;
+               return BLK_STS_OK;
 
-       if (dev->nr_zones_imp_open && null_can_set_active(dev)) {
-               null_close_first_imp_zone(dev);
-               return true;
+       if (dev->nr_zones_imp_open) {
+               if (null_check_active(dev) == BLK_STS_OK) {
+                       null_close_first_imp_zone(dev);
+                       return BLK_STS_OK;
+               }
        }
 
-       return false;
+       return BLK_STS_ZONE_OPEN_RESOURCE;
 }
 
 /*
@@ -258,19 +263,22 @@ static bool null_can_open(struct nullb_device *dev)
  * it is not certain that closing an implicit open zone will allow a new zone
  * to be opened, since we might already be at the active limit capacity.
  */
-static bool null_has_zone_resources(struct nullb_device *dev, struct blk_zone *zone)
+static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone)
 {
+       blk_status_t ret;
+
        switch (zone->cond) {
        case BLK_ZONE_COND_EMPTY:
-               if (!null_can_set_active(dev))
-                       return false;
+               ret = null_check_active(dev);
+               if (ret != BLK_STS_OK)
+                       return ret;
                fallthrough;
        case BLK_ZONE_COND_CLOSED:
-               return null_can_open(dev);
+               return null_check_open(dev);
        default:
                /* Should never be called for other states */
                WARN_ON(1);
-               return false;
+               return BLK_STS_IOERR;
        }
 }
 
@@ -293,8 +301,9 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
                return BLK_STS_IOERR;
        case BLK_ZONE_COND_EMPTY:
        case BLK_ZONE_COND_CLOSED:
-               if (!null_has_zone_resources(dev, zone))
-                       return BLK_STS_IOERR;
+               ret = null_check_zone_resources(dev, zone);
+               if (ret != BLK_STS_OK)
+                       return ret;
                break;
        case BLK_ZONE_COND_IMP_OPEN:
        case BLK_ZONE_COND_EXP_OPEN:
@@ -349,6 +358,8 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
 
 static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone)
 {
+       blk_status_t ret;
+
        if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
                return BLK_STS_IOERR;
 
@@ -357,15 +368,17 @@ static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zo
                /* open operation on exp open is not an error */
                return BLK_STS_OK;
        case BLK_ZONE_COND_EMPTY:
-               if (!null_has_zone_resources(dev, zone))
-                       return BLK_STS_IOERR;
+               ret = null_check_zone_resources(dev, zone);
+               if (ret != BLK_STS_OK)
+                       return ret;
                break;
        case BLK_ZONE_COND_IMP_OPEN:
                dev->nr_zones_imp_open--;
                break;
        case BLK_ZONE_COND_CLOSED:
-               if (!null_has_zone_resources(dev, zone))
-                       return BLK_STS_IOERR;
+               ret = null_check_zone_resources(dev, zone);
+               if (ret != BLK_STS_OK)
+                       return ret;
                dev->nr_zones_closed--;
                break;
        case BLK_ZONE_COND_FULL:
@@ -381,6 +394,8 @@ static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zo
 
 static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone)
 {
+       blk_status_t ret;
+
        if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
                return BLK_STS_IOERR;
 
@@ -389,8 +404,9 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *
                /* finish operation on full is not an error */
                return BLK_STS_OK;
        case BLK_ZONE_COND_EMPTY:
-               if (!null_has_zone_resources(dev, zone))
-                       return BLK_STS_IOERR;
+               ret = null_check_zone_resources(dev, zone);
+               if (ret != BLK_STS_OK)
+                       return ret;
                break;
        case BLK_ZONE_COND_IMP_OPEN:
                dev->nr_zones_imp_open--;
@@ -399,8 +415,9 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *
                dev->nr_zones_exp_open--;
                break;
        case BLK_ZONE_COND_CLOSED:
-               if (!null_has_zone_resources(dev, zone))
-                       return BLK_STS_IOERR;
+               ret = null_check_zone_resources(dev, zone);
+               if (ret != BLK_STS_OK)
+                       return ret;
                dev->nr_zones_closed--;
                break;
        default:
index d7a6974..8b2411c 100644 (file)
@@ -91,11 +91,6 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
        dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE;
        dev->max_segments = BMAX_SEGMENTS;
 
-       dev->max_hw_sectors = min_t(u32, dev->max_hw_sectors,
-                                   le32_to_cpu(rsp->max_hw_sectors));
-       dev->max_segments = min_t(u16, dev->max_segments,
-                                 le16_to_cpu(rsp->max_segments));
-
        return 0;
 }
 
@@ -427,7 +422,7 @@ enum wait_type {
 };
 
 static int send_usr_msg(struct rtrs_clt *rtrs, int dir,
-                       struct rnbd_iu *iu, struct kvec *vec, size_t nr,
+                       struct rnbd_iu *iu, struct kvec *vec,
                        size_t len, struct scatterlist *sg, unsigned int sg_len,
                        void (*conf)(struct work_struct *work),
                        int *errno, enum wait_type wait)
@@ -441,7 +436,7 @@ static int send_usr_msg(struct rtrs_clt *rtrs, int dir,
                .conf_fn = msg_conf,
        };
        err = rtrs_clt_request(dir, &req_ops, rtrs, iu->permit,
-                               vec, nr, len, sg, sg_len);
+                               vec, 1, len, sg, sg_len);
        if (!err && wait) {
                wait_event(iu->comp.wait, iu->comp.errno != INT_MAX);
                *errno = iu->comp.errno;
@@ -486,7 +481,7 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait)
        msg.device_id   = cpu_to_le32(device_id);
 
        WARN_ON(!rnbd_clt_get_dev(dev));
-       err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 1, 0, NULL, 0,
+       err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 0, NULL, 0,
                           msg_close_conf, &errno, wait);
        if (err) {
                rnbd_clt_put_dev(dev);
@@ -575,7 +570,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait)
 
        WARN_ON(!rnbd_clt_get_dev(dev));
        err = send_usr_msg(sess->rtrs, READ, iu,
-                          &vec, 1, sizeof(*rsp), iu->sglist, 1,
+                          &vec, sizeof(*rsp), iu->sglist, 1,
                           msg_open_conf, &errno, wait);
        if (err) {
                rnbd_clt_put_dev(dev);
@@ -629,7 +624,7 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait)
                goto put_iu;
        }
        err = send_usr_msg(sess->rtrs, READ, iu,
-                          &vec, 1, sizeof(*rsp), iu->sglist, 1,
+                          &vec, sizeof(*rsp), iu->sglist, 1,
                           msg_sess_info_conf, &errno, wait);
        if (err) {
                rnbd_clt_put_sess(sess);
@@ -1514,7 +1509,7 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
                              "map_device: Failed to configure device, err: %d\n",
                              ret);
                mutex_unlock(&dev->lock);
-               goto del_dev;
+               goto send_close;
        }
 
        rnbd_clt_info(dev,
@@ -1533,6 +1528,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
 
        return dev;
 
+send_close:
+       send_msg_close(dev, dev->device_id, WAIT);
 del_dev:
        delete_dev(dev);
 put_dev:
index ae6454c..a962b45 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/completion.h>
 #include <linux/scatterlist.h>
-#include <linux/version.h>
 #include <linux/err.h>
 #include <linux/aer.h>
 #include <linux/wait.h>
index 029403c..1b69720 100644 (file)
@@ -1218,10 +1218,11 @@ out:
 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
                                struct bio *bio, bool partial_io)
 {
-       int ret;
+       struct zcomp_strm *zstrm;
        unsigned long handle;
        unsigned int size;
        void *src, *dst;
+       int ret;
 
        zram_slot_lock(zram, index);
        if (zram_test_flag(zram, index, ZRAM_WB)) {
@@ -1252,6 +1253,9 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
 
        size = zram_get_obj_size(zram, index);
 
+       if (size != PAGE_SIZE)
+               zstrm = zcomp_stream_get(zram->comp);
+
        src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
        if (size == PAGE_SIZE) {
                dst = kmap_atomic(page);
@@ -1259,8 +1263,6 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
                kunmap_atomic(dst);
                ret = 0;
        } else {
-               struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
-
                dst = kmap_atomic(page);
                ret = zcomp_decompress(zstrm, src, size, dst);
                kunmap_atomic(dst);
index fe78bf0..c1bcac7 100644 (file)
@@ -1311,8 +1311,9 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
                strlcpy(info->bmname, "gennvm", sizeof(info->bmname));
                i++;
 
-               if (i > 31) {
-                       pr_err("max 31 devices can be reported.\n");
+               if (i >= ARRAY_SIZE(devices->info)) {
+                       pr_err("max %zd devices can be reported.\n",
+                              ARRAY_SIZE(devices->info));
                        break;
                }
        }
index 56e2a22..95ef494 100644 (file)
@@ -248,6 +248,10 @@ static blk_status_t nvme_error_status(u16 status)
                return BLK_STS_NEXUS;
        case NVME_SC_HOST_PATH_ERROR:
                return BLK_STS_TRANSPORT;
+       case NVME_SC_ZONE_TOO_MANY_ACTIVE:
+               return BLK_STS_ZONE_ACTIVE_RESOURCE;
+       case NVME_SC_ZONE_TOO_MANY_OPEN:
+               return BLK_STS_ZONE_OPEN_RESOURCE;
        default:
                return BLK_STS_IOERR;
        }
index e2e09e2..3c002bd 100644 (file)
@@ -26,6 +26,10 @@ enum nvme_fc_queue_flags {
 };
 
 #define NVME_FC_DEFAULT_DEV_LOSS_TMO   60      /* seconds */
+#define NVME_FC_DEFAULT_RECONNECT_TMO  2       /* delay between reconnects
+                                                * when connected and a
+                                                * connection failure.
+                                                */
 
 struct nvme_fc_queue {
        struct nvme_fc_ctrl     *ctrl;
@@ -1837,8 +1841,10 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
        opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
        if (opstate != FCPOP_STATE_ACTIVE)
                atomic_set(&op->state, opstate);
-       else if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
+       else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
+               op->flags |= FCOP_FLAGS_TERMIO;
                ctrl->iocnt++;
+       }
        spin_unlock_irqrestore(&ctrl->lock, flags);
 
        if (opstate != FCPOP_STATE_ACTIVE)
@@ -1874,7 +1880,8 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
 
        if (opstate == FCPOP_STATE_ABORTED) {
                spin_lock_irqsave(&ctrl->lock, flags);
-               if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
+               if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
+                   op->flags & FCOP_FLAGS_TERMIO) {
                        if (!--ctrl->iocnt)
                                wake_up(&ctrl->ioabort_wait);
                }
@@ -2314,7 +2321,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
        return 0;
 
 delete_queues:
-       for (; i >= 0; i--)
+       for (; i > 0; i--)
                __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
        return ret;
 }
@@ -2433,7 +2440,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
                return;
 
        dev_warn(ctrl->ctrl.device,
-               "NVME-FC{%d}: transport association error detected: %s\n",
+               "NVME-FC{%d}: transport association event: %s\n",
                ctrl->cnum, errmsg);
        dev_warn(ctrl->ctrl.device,
                "NVME-FC{%d}: resetting controller\n", ctrl->cnum);
@@ -2446,15 +2453,20 @@ nvme_fc_timeout(struct request *rq, bool reserved)
 {
        struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
        struct nvme_fc_ctrl *ctrl = op->ctrl;
+       struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
+       struct nvme_command *sqe = &cmdiu->sqe;
 
        /*
-        * we can't individually ABTS an io without affecting the queue,
-        * thus killing the queue, and thus the association.
-        * So resolve by performing a controller reset, which will stop
-        * the host/io stack, terminate the association on the link,
-        * and recreate an association on the link.
+        * Attempt to abort the offending command. Command completion
+        * will detect the aborted io and will fail the connection.
         */
-       nvme_fc_error_recovery(ctrl, "io timeout error");
+       dev_info(ctrl->ctrl.device,
+               "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
+               "x%08x/x%08x\n",
+               ctrl->cnum, op->queue->qnum, sqe->common.opcode,
+               sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
+       if (__nvme_fc_abort_op(ctrl, op))
+               nvme_fc_error_recovery(ctrl, "io timeout abort failed");
 
        /*
         * the io abort has been initiated. Have the reset timer
@@ -2726,6 +2738,7 @@ nvme_fc_complete_rq(struct request *rq)
        struct nvme_fc_ctrl *ctrl = op->ctrl;
 
        atomic_set(&op->state, FCPOP_STATE_IDLE);
+       op->flags &= ~FCOP_FLAGS_TERMIO;
 
        nvme_fc_unmap_data(ctrl, rq, op);
        nvme_complete_rq(rq);
@@ -2876,11 +2889,14 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
        if (ret)
                goto out_delete_hw_queues;
 
-       if (prior_ioq_cnt != nr_io_queues)
+       if (prior_ioq_cnt != nr_io_queues) {
                dev_info(ctrl->ctrl.device,
                        "reconnect: revising io queue count from %d to %d\n",
                        prior_ioq_cnt, nr_io_queues);
-       blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
+               nvme_wait_freeze(&ctrl->ctrl);
+               blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
+               nvme_unfreeze(&ctrl->ctrl);
+       }
 
        return 0;
 
@@ -3090,26 +3106,19 @@ out_free_queue:
        return ret;
 }
 
+
 /*
- * This routine stops operation of the controller on the host side.
- * On the host os stack side: Admin and IO queues are stopped,
- *   outstanding ios on them terminated via FC ABTS.
- * On the link side: the association is terminated.
+ * This routine runs through all outstanding commands on the association
+ * and aborts them.  This routine is typically be called by the
+ * delete_association routine. It is also called due to an error during
+ * reconnect. In that scenario, it is most likely a command that initializes
+ * the controller, including fabric Connect commands on io queues, that
+ * may have timed out or failed thus the io must be killed for the connect
+ * thread to see the error.
  */
 static void
-nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
+__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
 {
-       struct nvmefc_ls_rcv_op *disls = NULL;
-       unsigned long flags;
-
-       if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
-               return;
-
-       spin_lock_irqsave(&ctrl->lock, flags);
-       set_bit(FCCTRL_TERMIO, &ctrl->flags);
-       ctrl->iocnt = 0;
-       spin_unlock_irqrestore(&ctrl->lock, flags);
-
        /*
         * If io queues are present, stop them and terminate all outstanding
         * ios on them. As FC allocates FC exchange for each io, the
@@ -3127,6 +3136,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
                blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                nvme_fc_terminate_exchange, &ctrl->ctrl);
                blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
+               if (start_queues)
+                       nvme_start_queues(&ctrl->ctrl);
        }
 
        /*
@@ -3143,13 +3154,34 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 
        /*
         * clean up the admin queue. Same thing as above.
-        * use blk_mq_tagset_busy_itr() and the transport routine to
-        * terminate the exchanges.
         */
        blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_fc_terminate_exchange, &ctrl->ctrl);
        blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
+}
+
+/*
+ * This routine stops operation of the controller on the host side.
+ * On the host os stack side: Admin and IO queues are stopped,
+ *   outstanding ios on them terminated via FC ABTS.
+ * On the link side: the association is terminated.
+ */
+static void
+nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
+{
+       struct nvmefc_ls_rcv_op *disls = NULL;
+       unsigned long flags;
+
+       if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
+               return;
+
+       spin_lock_irqsave(&ctrl->lock, flags);
+       set_bit(FCCTRL_TERMIO, &ctrl->flags);
+       ctrl->iocnt = 0;
+       spin_unlock_irqrestore(&ctrl->lock, flags);
+
+       __nvme_fc_abort_outstanding_ios(ctrl, false);
 
        /* kill the aens as they are a separate path */
        nvme_fc_abort_aen_ops(ctrl);
@@ -3263,22 +3295,27 @@ static void
 __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
 {
        /*
-        * if state is connecting - the error occurred as part of a
-        * reconnect attempt. The create_association error paths will
-        * clean up any outstanding io.
-        *
-        * if it's a different state - ensure all pending io is
-        * terminated. Given this can delay while waiting for the
-        * aborted io to return, we recheck adapter state below
-        * before changing state.
+        * if state is CONNECTING - the error occurred as part of a
+        * reconnect attempt. Abort any ios on the association and
+        * let the create_association error paths resolve things.
         */
-       if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
-               nvme_stop_keep_alive(&ctrl->ctrl);
-
-               /* will block will waiting for io to terminate */
-               nvme_fc_delete_association(ctrl);
+       if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
+               __nvme_fc_abort_outstanding_ios(ctrl, true);
+               return;
        }
 
+       /*
+        * For any other state, kill the association. As this routine
+        * is a common io abort routine for resetting and such, after
+        * the association is terminated, ensure that the state is set
+        * to CONNECTING.
+        */
+
+       nvme_stop_keep_alive(&ctrl->ctrl);
+
+       /* will block will waiting for io to terminate */
+       nvme_fc_delete_association(ctrl);
+
        if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
            !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
                dev_err(ctrl->ctrl.device,
@@ -3403,7 +3440,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 {
        struct nvme_fc_ctrl *ctrl;
        unsigned long flags;
-       int ret, idx;
+       int ret, idx, ctrl_loss_tmo;
 
        if (!(rport->remoteport.port_role &
            (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@ -3429,6 +3466,19 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
                goto out_free_ctrl;
        }
 
+       /*
+        * if ctrl_loss_tmo is being enforced and the default reconnect delay
+        * is being used, change to a shorter reconnect delay for FC.
+        */
+       if (opts->max_reconnects != -1 &&
+           opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY &&
+           opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) {
+               ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay;
+               opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO;
+               opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
+                                               opts->reconnect_delay);
+       }
+
        ctrl->ctrl.opts = opts;
        ctrl->ctrl.nr_reconnects = 0;
        if (lport->dev)
index e7c88b4..cc11113 100644 (file)
@@ -176,7 +176,7 @@ static inline struct nvme_request *nvme_req(struct request *req)
 
 static inline u16 nvme_req_qid(struct request *req)
 {
-       if (!req->rq_disk)
+       if (!req->q->queuedata)
                return 0;
        return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1;
 }
index e5b0224..df8f361 100644 (file)
@@ -3185,6 +3185,8 @@ static const struct pci_device_id nvme_id_table[] = {
                                NVME_QUIRK_IGNORE_DEV_SUBNQN, },
        { PCI_DEVICE(0x1c5c, 0x1504),   /* SK Hynix PC400 */
                .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+       { PCI_DEVICE(0x15b7, 0x2001),   /*  Sandisk Skyhawk */
+               .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
                .driver_data = NVME_QUIRK_SINGLE_VECTOR },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
index 9e378d0..aad829a 100644 (file)
@@ -1730,10 +1730,11 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
        req->result = cqe->result;
 
        if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
-               if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) {
+               if (unlikely(!req->mr ||
+                            wc->ex.invalidate_rkey != req->mr->rkey)) {
                        dev_err(queue->ctrl->ctrl.device,
                                "Bogus remote invalidation for rkey %#x\n",
-                               req->mr->rkey);
+                               req->mr ? req->mr->rkey : 0);
                        nvme_rdma_error_recovery(queue->ctrl);
                }
        } else if (req->mr) {
@@ -1926,7 +1927,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
                complete(&queue->cm_done);
                return 0;
        case RDMA_CM_EVENT_REJECTED:
-               nvme_rdma_destroy_queue_ib(queue);
                cm_error = nvme_rdma_conn_rejected(queue, ev);
                break;
        case RDMA_CM_EVENT_ROUTE_ERROR:
index 25d62d8..aafcbc4 100644 (file)
@@ -1126,7 +1126,8 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
         * in case a host died before it enabled the controller.  Hence, simply
         * reset the keep alive timer when the controller is enabled.
         */
-       mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
+       if (ctrl->kato)
+               mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
 }
 
 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
index 56c5710..8ee94f0 100644 (file)
@@ -26,7 +26,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
        struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
        u16 status = NVME_SC_SUCCESS;
        struct nvme_id_ctrl *id;
-       u32 max_hw_sectors;
+       int max_hw_sectors;
        int page_shift;
 
        id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -48,6 +48,13 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
        max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
                                      pctrl->max_hw_sectors);
 
+       /*
+        * nvmet_passthru_map_sg is limitted to using a single bio so limit
+        * the mdts based on BIO_MAX_PAGES as well
+        */
+       max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9),
+                                     max_hw_sectors);
+
        page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
 
        id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
@@ -180,18 +187,20 @@ static void nvmet_passthru_req_done(struct request *rq,
 
 static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
 {
-       int sg_cnt = req->sg_cnt;
        struct scatterlist *sg;
        int op_flags = 0;
        struct bio *bio;
        int i, ret;
 
+       if (req->sg_cnt > BIO_MAX_PAGES)
+               return -EINVAL;
+
        if (req->cmd->common.opcode == nvme_cmd_flush)
                op_flags = REQ_FUA;
        else if (nvme_is_write(req->cmd))
                op_flags = REQ_SYNC | REQ_IDLE;
 
-       bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+       bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
        bio->bi_end_io = bio_put;
        bio->bi_opf = req_op(rq) | op_flags;
 
@@ -201,7 +210,6 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
                        bio_put(bio);
                        return -EINVAL;
                }
-               sg_cnt--;
        }
 
        ret = blk_rq_append_bio(rq, &bio);
@@ -236,7 +244,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
                q = ns->queue;
        }
 
-       rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
+       rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY);
        if (IS_ERR(rq)) {
                status = NVME_SC_INTERNAL;
                goto out_put_ns;
index ab676ce..60c7a7d 100644 (file)
@@ -777,6 +777,15 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
                        /* See SSC3rXX or current. */
                        action = ACTION_FAIL;
                        break;
+               case DATA_PROTECT:
+                       action = ACTION_FAIL;
+                       if ((sshdr.asc == 0x0C && sshdr.ascq == 0x12) ||
+                           (sshdr.asc == 0x55 &&
+                            (sshdr.ascq == 0x0E || sshdr.ascq == 0x0F))) {
+                               /* Insufficient zone resources */
+                               blk_stat = BLK_STS_ZONE_OPEN_RESOURCE;
+                       }
+                       break;
                default:
                        action = ACTION_FAIL;
                        break;
index 7d7c132..d9b69bb 100644 (file)
@@ -104,6 +104,24 @@ typedef u8 __bitwise blk_status_t;
  */
 #define BLK_STS_ZONE_RESOURCE  ((__force blk_status_t)14)
 
+/*
+ * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion
+ * path if the device returns a status indicating that too many zone resources
+ * are currently open. The same command should be successful if resubmitted
+ * after the number of open zones decreases below the device's limits, which is
+ * reported in the request_queue's max_open_zones.
+ */
+#define BLK_STS_ZONE_OPEN_RESOURCE     ((__force blk_status_t)15)
+
+/*
+ * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion
+ * path if the device returns a status indicating that too many zone resources
+ * are currently active. The same command should be successful if resubmitted
+ * after the number of active zones decreases below the device's limits, which
+ * is reported in the request_queue's max_active_zones.
+ */
+#define BLK_STS_ZONE_ACTIVE_RESOURCE   ((__force blk_status_t)16)
+
 /**
  * blk_path_error - returns true if error may be path related
  * @error: status the request was completed with
index 1dcd919..0a9dc40 100644 (file)
@@ -106,12 +106,6 @@ struct io_context {
 
        unsigned short ioprio;
 
-       /*
-        * For request batching
-        */
-       int nr_batch_requests;     /* Number of requests left in the batch */
-       unsigned long last_waited; /* Time last woken after wait for request */
-
        struct radix_tree_root  icq_tree;
        struct io_cq __rcu      *icq_hint;
        struct hlist_head       icq_list;
index 9a4992d..0a482ef 100644 (file)
@@ -595,7 +595,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length,
                elem_len = min_t(u64, length, PAGE_SIZE << order);
                page = alloc_pages(gfp, order);
                if (!page) {
-                       sgl_free(sgl);
+                       sgl_free_order(sgl, order);
                        return NULL;
                }