From: Linus Torvalds Date: Sat, 24 Oct 2020 19:46:42 +0000 (-0700) Subject: Merge tag 'block-5.10-2020-10-24' of git://git.kernel.dk/linux-block X-Git-Tag: microblaze-v5.11~19 X-Git-Url: http://git.monstr.eu/?p=linux-2.6-microblaze.git;a=commitdiff_plain;h=d76913908102044f14381df865bb74df17a538cb;hp=af0041875ce7f5a05362b884e90cf82c27876096 Merge tag 'block-5.10-2020-10-24' of git://git.kernel.dk/linux-block Pull block fixes from Jens Axboe: - NVMe pull request from Christoph - rdma error handling fixes (Chao Leng) - fc error handling and reconnect fixes (James Smart) - fix the qid displace when tracing ioctl command (Keith Busch) - don't use BLK_MQ_REQ_NOWAIT for passthru (Chaitanya Kulkarni) - fix MTDT for passthru (Logan Gunthorpe) - blacklist Write Same on more devices (Kai-Heng Feng) - fix an uninitialized work struct (zhenwei pi)" - lightnvm out-of-bounds fix (Colin) - SG allocation leak fix (Doug) - rnbd fixes (Gioh, Guoqing, Jack) - zone error translation fixes (Keith) - kerneldoc markup fix (Mauro) - zram lockdep fix (Peter) - Kill unused io_context members (Yufen) - NUMA memory allocation cleanup (Xianting) - NBD config wakeup fix (Xiubo) * tag 'block-5.10-2020-10-24' of git://git.kernel.dk/linux-block: (27 commits) block: blk-mq: fix a kernel-doc markup nvme-fc: shorten reconnect delay if possible for FC nvme-fc: wait for queues to freeze before calling update_hr_hw_queues nvme-fc: fix error loop in create_hw_io_queues nvme-fc: fix io timeout to abort I/O null_blk: use zone status for max active/open nvmet: don't use BLK_MQ_REQ_NOWAIT for passthru nvmet: cleanup nvmet_passthru_map_sg() nvmet: limit passthru MTDS by BIO_MAX_PAGES nvmet: fix uninitialized work for zero kato nvme-pci: disable Write Zeroes on Sandisk Skyhawk nvme: use queuedata for nvme_req_qid nvme-rdma: fix crash due to incorrect cqe nvme-rdma: fix crash when connect rejected block: remove unused members for io_context blk-mq: remove the calling of local_memory_node() zram: Fix __zram_bvec_{read,write}() locking order skd_main: remove unused including sgl_alloc_order: fix memory leak lightnvm: fix out-of-bounds write to array devices->info[] ... --- diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst index f261a5c84170..2638d3446b79 100644 --- a/Documentation/block/queue-sysfs.rst +++ b/Documentation/block/queue-sysfs.rst @@ -124,6 +124,10 @@ For zoned block devices (zoned attribute indicating "host-managed" or EXPLICIT OPEN, IMPLICIT OPEN or CLOSED, is limited by this value. If this value is 0, there is no limit. +If the host attempts to exceed this limit, the driver should report this error +with BLK_STS_ZONE_ACTIVE_RESOURCE, which user space may see as the EOVERFLOW +errno. + max_open_zones (RO) ------------------- For zoned block devices (zoned attribute indicating "host-managed" or @@ -131,6 +135,10 @@ For zoned block devices (zoned attribute indicating "host-managed" or EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value. If this value is 0, there is no limit. +If the host attempts to exceed this limit, the driver should report this error +with BLK_STS_ZONE_OPEN_RESOURCE, which user space may see as the ETOOMANYREFS +errno. + max_sectors_kb (RW) ------------------- This is the maximum number of kilobytes that the block layer will allow diff --git a/block/blk-core.c b/block/blk-core.c index ac00d2fa4eb4..2db8bda43b6e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -186,6 +186,10 @@ static const struct { /* device mapper special case, should not leak out: */ [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" }, + /* zone device specific errors */ + [BLK_STS_ZONE_OPEN_RESOURCE] = { -ETOOMANYREFS, "open zones exceeded" }, + [BLK_STS_ZONE_ACTIVE_RESOURCE] = { -EOVERFLOW, "active zones exceeded" }, + /* everything else not covered above: */ [BLK_STS_IOERR] = { -EIO, "I/O" }, }; diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 0157f2b3485a..3db84d3197f1 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -89,7 +89,7 @@ int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index) for_each_possible_cpu(i) { if (index == qmap->mq_map[i]) - return local_memory_node(cpu_to_node(i)); + return cpu_to_node(i); } return NUMA_NO_NODE; diff --git a/block/blk-mq.c b/block/blk-mq.c index 696450257ac1..55bcee5dc032 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1664,7 +1664,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) EXPORT_SYMBOL(blk_mq_run_hw_queue); /** - * blk_mq_run_hw_queue - Run all hardware queues in a request queue. + * blk_mq_run_hw_queues - Run all hardware queues in a request queue. * @q: Pointer to the request queue to run. * @async: If we want to run the queue asynchronously. */ @@ -2743,7 +2743,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, for (j = 0; j < set->nr_maps; j++) { hctx = blk_mq_map_queue_type(q, j, i); if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE) - hctx->numa_node = local_memory_node(cpu_to_node(i)); + hctx->numa_node = cpu_to_node(i); } } } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 3c9485acdd81..0bed21c0c81b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -802,9 +802,9 @@ static void recv_work(struct work_struct *work) if (likely(!blk_should_fake_timeout(rq->q))) blk_mq_complete_request(rq); } + nbd_config_put(nbd); atomic_dec(&config->recv_threads); wake_up(&config->recv_wq); - nbd_config_put(nbd); kfree(args); } diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index fa0cc70f05e6..7d94f2d47a6a 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -220,29 +220,34 @@ static void null_close_first_imp_zone(struct nullb_device *dev) } } -static bool null_can_set_active(struct nullb_device *dev) +static blk_status_t null_check_active(struct nullb_device *dev) { if (!dev->zone_max_active) - return true; + return BLK_STS_OK; + + if (dev->nr_zones_exp_open + dev->nr_zones_imp_open + + dev->nr_zones_closed < dev->zone_max_active) + return BLK_STS_OK; - return dev->nr_zones_exp_open + dev->nr_zones_imp_open + - dev->nr_zones_closed < dev->zone_max_active; + return BLK_STS_ZONE_ACTIVE_RESOURCE; } -static bool null_can_open(struct nullb_device *dev) +static blk_status_t null_check_open(struct nullb_device *dev) { if (!dev->zone_max_open) - return true; + return BLK_STS_OK; if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open) - return true; + return BLK_STS_OK; - if (dev->nr_zones_imp_open && null_can_set_active(dev)) { - null_close_first_imp_zone(dev); - return true; + if (dev->nr_zones_imp_open) { + if (null_check_active(dev) == BLK_STS_OK) { + null_close_first_imp_zone(dev); + return BLK_STS_OK; + } } - return false; + return BLK_STS_ZONE_OPEN_RESOURCE; } /* @@ -258,19 +263,22 @@ static bool null_can_open(struct nullb_device *dev) * it is not certain that closing an implicit open zone will allow a new zone * to be opened, since we might already be at the active limit capacity. */ -static bool null_has_zone_resources(struct nullb_device *dev, struct blk_zone *zone) +static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone) { + blk_status_t ret; + switch (zone->cond) { case BLK_ZONE_COND_EMPTY: - if (!null_can_set_active(dev)) - return false; + ret = null_check_active(dev); + if (ret != BLK_STS_OK) + return ret; fallthrough; case BLK_ZONE_COND_CLOSED: - return null_can_open(dev); + return null_check_open(dev); default: /* Should never be called for other states */ WARN_ON(1); - return false; + return BLK_STS_IOERR; } } @@ -293,8 +301,9 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, return BLK_STS_IOERR; case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_CLOSED: - if (!null_has_zone_resources(dev, zone)) - return BLK_STS_IOERR; + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) + return ret; break; case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_EXP_OPEN: @@ -349,6 +358,8 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone) { + blk_status_t ret; + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; @@ -357,15 +368,17 @@ static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zo /* open operation on exp open is not an error */ return BLK_STS_OK; case BLK_ZONE_COND_EMPTY: - if (!null_has_zone_resources(dev, zone)) - return BLK_STS_IOERR; + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) + return ret; break; case BLK_ZONE_COND_IMP_OPEN: dev->nr_zones_imp_open--; break; case BLK_ZONE_COND_CLOSED: - if (!null_has_zone_resources(dev, zone)) - return BLK_STS_IOERR; + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) + return ret; dev->nr_zones_closed--; break; case BLK_ZONE_COND_FULL: @@ -381,6 +394,8 @@ static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zo static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone) { + blk_status_t ret; + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; @@ -389,8 +404,9 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone * /* finish operation on full is not an error */ return BLK_STS_OK; case BLK_ZONE_COND_EMPTY: - if (!null_has_zone_resources(dev, zone)) - return BLK_STS_IOERR; + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) + return ret; break; case BLK_ZONE_COND_IMP_OPEN: dev->nr_zones_imp_open--; @@ -399,8 +415,9 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone * dev->nr_zones_exp_open--; break; case BLK_ZONE_COND_CLOSED: - if (!null_has_zone_resources(dev, zone)) - return BLK_STS_IOERR; + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) + return ret; dev->nr_zones_closed--; break; default: diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index d7a69741c0f6..8b2411ccbda9 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -91,11 +91,6 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev, dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE; dev->max_segments = BMAX_SEGMENTS; - dev->max_hw_sectors = min_t(u32, dev->max_hw_sectors, - le32_to_cpu(rsp->max_hw_sectors)); - dev->max_segments = min_t(u16, dev->max_segments, - le16_to_cpu(rsp->max_segments)); - return 0; } @@ -427,7 +422,7 @@ enum wait_type { }; static int send_usr_msg(struct rtrs_clt *rtrs, int dir, - struct rnbd_iu *iu, struct kvec *vec, size_t nr, + struct rnbd_iu *iu, struct kvec *vec, size_t len, struct scatterlist *sg, unsigned int sg_len, void (*conf)(struct work_struct *work), int *errno, enum wait_type wait) @@ -441,7 +436,7 @@ static int send_usr_msg(struct rtrs_clt *rtrs, int dir, .conf_fn = msg_conf, }; err = rtrs_clt_request(dir, &req_ops, rtrs, iu->permit, - vec, nr, len, sg, sg_len); + vec, 1, len, sg, sg_len); if (!err && wait) { wait_event(iu->comp.wait, iu->comp.errno != INT_MAX); *errno = iu->comp.errno; @@ -486,7 +481,7 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) msg.device_id = cpu_to_le32(device_id); WARN_ON(!rnbd_clt_get_dev(dev)); - err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 1, 0, NULL, 0, + err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 0, NULL, 0, msg_close_conf, &errno, wait); if (err) { rnbd_clt_put_dev(dev); @@ -575,7 +570,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) WARN_ON(!rnbd_clt_get_dev(dev)); err = send_usr_msg(sess->rtrs, READ, iu, - &vec, 1, sizeof(*rsp), iu->sglist, 1, + &vec, sizeof(*rsp), iu->sglist, 1, msg_open_conf, &errno, wait); if (err) { rnbd_clt_put_dev(dev); @@ -629,7 +624,7 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait) goto put_iu; } err = send_usr_msg(sess->rtrs, READ, iu, - &vec, 1, sizeof(*rsp), iu->sglist, 1, + &vec, sizeof(*rsp), iu->sglist, 1, msg_sess_info_conf, &errno, wait); if (err) { rnbd_clt_put_sess(sess); @@ -1514,7 +1509,7 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, "map_device: Failed to configure device, err: %d\n", ret); mutex_unlock(&dev->lock); - goto del_dev; + goto send_close; } rnbd_clt_info(dev, @@ -1533,6 +1528,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, return dev; +send_close: + send_msg_close(dev, dev->device_id, WAIT); del_dev: delete_dev(dev); put_dev: diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index ae6454c24594..a962b4551bed 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 029403c18ca3..1b697208d661 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1218,10 +1218,11 @@ out: static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, struct bio *bio, bool partial_io) { - int ret; + struct zcomp_strm *zstrm; unsigned long handle; unsigned int size; void *src, *dst; + int ret; zram_slot_lock(zram, index); if (zram_test_flag(zram, index, ZRAM_WB)) { @@ -1252,6 +1253,9 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, size = zram_get_obj_size(zram, index); + if (size != PAGE_SIZE) + zstrm = zcomp_stream_get(zram->comp); + src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { dst = kmap_atomic(page); @@ -1259,8 +1263,6 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, kunmap_atomic(dst); ret = 0; } else { - struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); - dst = kmap_atomic(page); ret = zcomp_decompress(zstrm, src, size, dst); kunmap_atomic(dst); diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index fe78bf0fdce5..c1bcac71008c 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -1311,8 +1311,9 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg) strlcpy(info->bmname, "gennvm", sizeof(info->bmname)); i++; - if (i > 31) { - pr_err("max 31 devices can be reported.\n"); + if (i >= ARRAY_SIZE(devices->info)) { + pr_err("max %zd devices can be reported.\n", + ARRAY_SIZE(devices->info)); break; } } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 56e2a22e8a02..95ef4943d8bd 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -248,6 +248,10 @@ static blk_status_t nvme_error_status(u16 status) return BLK_STS_NEXUS; case NVME_SC_HOST_PATH_ERROR: return BLK_STS_TRANSPORT; + case NVME_SC_ZONE_TOO_MANY_ACTIVE: + return BLK_STS_ZONE_ACTIVE_RESOURCE; + case NVME_SC_ZONE_TOO_MANY_OPEN: + return BLK_STS_ZONE_OPEN_RESOURCE; default: return BLK_STS_IOERR; } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e2e09e25c056..3c002bdcace3 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -26,6 +26,10 @@ enum nvme_fc_queue_flags { }; #define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ +#define NVME_FC_DEFAULT_RECONNECT_TMO 2 /* delay between reconnects + * when connected and a + * connection failure. + */ struct nvme_fc_queue { struct nvme_fc_ctrl *ctrl; @@ -1837,8 +1841,10 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); if (opstate != FCPOP_STATE_ACTIVE) atomic_set(&op->state, opstate); - else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) + else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { + op->flags |= FCOP_FLAGS_TERMIO; ctrl->iocnt++; + } spin_unlock_irqrestore(&ctrl->lock, flags); if (opstate != FCPOP_STATE_ACTIVE) @@ -1874,7 +1880,8 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, if (opstate == FCPOP_STATE_ABORTED) { spin_lock_irqsave(&ctrl->lock, flags); - if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { + if (test_bit(FCCTRL_TERMIO, &ctrl->flags) && + op->flags & FCOP_FLAGS_TERMIO) { if (!--ctrl->iocnt) wake_up(&ctrl->ioabort_wait); } @@ -2314,7 +2321,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) return 0; delete_queues: - for (; i >= 0; i--) + for (; i > 0; i--) __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); return ret; } @@ -2433,7 +2440,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) return; dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: transport association error detected: %s\n", + "NVME-FC{%d}: transport association event: %s\n", ctrl->cnum, errmsg); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: resetting controller\n", ctrl->cnum); @@ -2446,15 +2453,20 @@ nvme_fc_timeout(struct request *rq, bool reserved) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; + struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; + struct nvme_command *sqe = &cmdiu->sqe; /* - * we can't individually ABTS an io without affecting the queue, - * thus killing the queue, and thus the association. - * So resolve by performing a controller reset, which will stop - * the host/io stack, terminate the association on the link, - * and recreate an association on the link. + * Attempt to abort the offending command. Command completion + * will detect the aborted io and will fail the connection. */ - nvme_fc_error_recovery(ctrl, "io timeout error"); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: " + "x%08x/x%08x\n", + ctrl->cnum, op->queue->qnum, sqe->common.opcode, + sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11); + if (__nvme_fc_abort_op(ctrl, op)) + nvme_fc_error_recovery(ctrl, "io timeout abort failed"); /* * the io abort has been initiated. Have the reset timer @@ -2726,6 +2738,7 @@ nvme_fc_complete_rq(struct request *rq) struct nvme_fc_ctrl *ctrl = op->ctrl; atomic_set(&op->state, FCPOP_STATE_IDLE); + op->flags &= ~FCOP_FLAGS_TERMIO; nvme_fc_unmap_data(ctrl, rq, op); nvme_complete_rq(rq); @@ -2876,11 +2889,14 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) if (ret) goto out_delete_hw_queues; - if (prior_ioq_cnt != nr_io_queues) + if (prior_ioq_cnt != nr_io_queues) { dev_info(ctrl->ctrl.device, "reconnect: revising io queue count from %d to %d\n", prior_ioq_cnt, nr_io_queues); - blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); + nvme_wait_freeze(&ctrl->ctrl); + blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); + nvme_unfreeze(&ctrl->ctrl); + } return 0; @@ -3090,26 +3106,19 @@ out_free_queue: return ret; } + /* - * This routine stops operation of the controller on the host side. - * On the host os stack side: Admin and IO queues are stopped, - * outstanding ios on them terminated via FC ABTS. - * On the link side: the association is terminated. + * This routine runs through all outstanding commands on the association + * and aborts them. This routine is typically be called by the + * delete_association routine. It is also called due to an error during + * reconnect. In that scenario, it is most likely a command that initializes + * the controller, including fabric Connect commands on io queues, that + * may have timed out or failed thus the io must be killed for the connect + * thread to see the error. */ static void -nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) +__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) { - struct nvmefc_ls_rcv_op *disls = NULL; - unsigned long flags; - - if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags)) - return; - - spin_lock_irqsave(&ctrl->lock, flags); - set_bit(FCCTRL_TERMIO, &ctrl->flags); - ctrl->iocnt = 0; - spin_unlock_irqrestore(&ctrl->lock, flags); - /* * If io queues are present, stop them and terminate all outstanding * ios on them. As FC allocates FC exchange for each io, the @@ -3127,6 +3136,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->tag_set); + if (start_queues) + nvme_start_queues(&ctrl->ctrl); } /* @@ -3143,13 +3154,34 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) /* * clean up the admin queue. Same thing as above. - * use blk_mq_tagset_busy_itr() and the transport routine to - * terminate the exchanges. */ blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); +} + +/* + * This routine stops operation of the controller on the host side. + * On the host os stack side: Admin and IO queues are stopped, + * outstanding ios on them terminated via FC ABTS. + * On the link side: the association is terminated. + */ +static void +nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) +{ + struct nvmefc_ls_rcv_op *disls = NULL; + unsigned long flags; + + if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags)) + return; + + spin_lock_irqsave(&ctrl->lock, flags); + set_bit(FCCTRL_TERMIO, &ctrl->flags); + ctrl->iocnt = 0; + spin_unlock_irqrestore(&ctrl->lock, flags); + + __nvme_fc_abort_outstanding_ios(ctrl, false); /* kill the aens as they are a separate path */ nvme_fc_abort_aen_ops(ctrl); @@ -3263,22 +3295,27 @@ static void __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) { /* - * if state is connecting - the error occurred as part of a - * reconnect attempt. The create_association error paths will - * clean up any outstanding io. - * - * if it's a different state - ensure all pending io is - * terminated. Given this can delay while waiting for the - * aborted io to return, we recheck adapter state below - * before changing state. + * if state is CONNECTING - the error occurred as part of a + * reconnect attempt. Abort any ios on the association and + * let the create_association error paths resolve things. */ - if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) { - nvme_stop_keep_alive(&ctrl->ctrl); - - /* will block will waiting for io to terminate */ - nvme_fc_delete_association(ctrl); + if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { + __nvme_fc_abort_outstanding_ios(ctrl, true); + return; } + /* + * For any other state, kill the association. As this routine + * is a common io abort routine for resetting and such, after + * the association is terminated, ensure that the state is set + * to CONNECTING. + */ + + nvme_stop_keep_alive(&ctrl->ctrl); + + /* will block will waiting for io to terminate */ + nvme_fc_delete_association(ctrl); + if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) dev_err(ctrl->ctrl.device, @@ -3403,7 +3440,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, { struct nvme_fc_ctrl *ctrl; unsigned long flags; - int ret, idx; + int ret, idx, ctrl_loss_tmo; if (!(rport->remoteport.port_role & (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { @@ -3429,6 +3466,19 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_free_ctrl; } + /* + * if ctrl_loss_tmo is being enforced and the default reconnect delay + * is being used, change to a shorter reconnect delay for FC. + */ + if (opts->max_reconnects != -1 && + opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY && + opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) { + ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay; + opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO; + opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, + opts->reconnect_delay); + } + ctrl->ctrl.opts = opts; ctrl->ctrl.nr_reconnects = 0; if (lport->dev) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index e7c88b40f5bb..cc111136a981 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -176,7 +176,7 @@ static inline struct nvme_request *nvme_req(struct request *req) static inline u16 nvme_req_qid(struct request *req) { - if (!req->rq_disk) + if (!req->q->queuedata) return 0; return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1; } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e5b02242f3ca..df8f3612107f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3185,6 +3185,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), .driver_data = NVME_QUIRK_SINGLE_VECTOR }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 9e378d0a0c01..aad829a2b50d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1730,10 +1730,11 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, req->result = cqe->result; if (wc->wc_flags & IB_WC_WITH_INVALIDATE) { - if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) { + if (unlikely(!req->mr || + wc->ex.invalidate_rkey != req->mr->rkey)) { dev_err(queue->ctrl->ctrl.device, "Bogus remote invalidation for rkey %#x\n", - req->mr->rkey); + req->mr ? req->mr->rkey : 0); nvme_rdma_error_recovery(queue->ctrl); } } else if (req->mr) { @@ -1926,7 +1927,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, complete(&queue->cm_done); return 0; case RDMA_CM_EVENT_REJECTED: - nvme_rdma_destroy_queue_ib(queue); cm_error = nvme_rdma_conn_rejected(queue, ev); break; case RDMA_CM_EVENT_ROUTE_ERROR: diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 25d62d867563..aafcbc424b7a 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1126,7 +1126,8 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) * in case a host died before it enabled the controller. Hence, simply * reset the keep alive timer when the controller is enabled. */ - mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); + if (ctrl->kato) + mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); } static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 56c571052216..8ee94f056898 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -26,7 +26,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl; u16 status = NVME_SC_SUCCESS; struct nvme_id_ctrl *id; - u32 max_hw_sectors; + int max_hw_sectors; int page_shift; id = kzalloc(sizeof(*id), GFP_KERNEL); @@ -48,6 +48,13 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9), pctrl->max_hw_sectors); + /* + * nvmet_passthru_map_sg is limitted to using a single bio so limit + * the mdts based on BIO_MAX_PAGES as well + */ + max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9), + max_hw_sectors); + page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; id->mdts = ilog2(max_hw_sectors) + 9 - page_shift; @@ -180,18 +187,20 @@ static void nvmet_passthru_req_done(struct request *rq, static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) { - int sg_cnt = req->sg_cnt; struct scatterlist *sg; int op_flags = 0; struct bio *bio; int i, ret; + if (req->sg_cnt > BIO_MAX_PAGES) + return -EINVAL; + if (req->cmd->common.opcode == nvme_cmd_flush) op_flags = REQ_FUA; else if (nvme_is_write(req->cmd)) op_flags = REQ_SYNC | REQ_IDLE; - bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); + bio = bio_alloc(GFP_KERNEL, req->sg_cnt); bio->bi_end_io = bio_put; bio->bi_opf = req_op(rq) | op_flags; @@ -201,7 +210,6 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) bio_put(bio); return -EINVAL; } - sg_cnt--; } ret = blk_rq_append_bio(rq, &bio); @@ -236,7 +244,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) q = ns->queue; } - rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); + rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY); if (IS_ERR(rq)) { status = NVME_SC_INTERNAL; goto out_put_ns; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ab676ce2d051..60c7a7d74852 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -777,6 +777,15 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) /* See SSC3rXX or current. */ action = ACTION_FAIL; break; + case DATA_PROTECT: + action = ACTION_FAIL; + if ((sshdr.asc == 0x0C && sshdr.ascq == 0x12) || + (sshdr.asc == 0x55 && + (sshdr.ascq == 0x0E || sshdr.ascq == 0x0F))) { + /* Insufficient zone resources */ + blk_stat = BLK_STS_ZONE_OPEN_RESOURCE; + } + break; default: action = ACTION_FAIL; break; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 7d7c13238fdb..d9b69bbde5cc 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -104,6 +104,24 @@ typedef u8 __bitwise blk_status_t; */ #define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14) +/* + * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion + * path if the device returns a status indicating that too many zone resources + * are currently open. The same command should be successful if resubmitted + * after the number of open zones decreases below the device's limits, which is + * reported in the request_queue's max_open_zones. + */ +#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15) + +/* + * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion + * path if the device returns a status indicating that too many zone resources + * are currently active. The same command should be successful if resubmitted + * after the number of active zones decreases below the device's limits, which + * is reported in the request_queue's max_active_zones. + */ +#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) + /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 1dcd9198beb7..0a9dc40b7be8 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -106,12 +106,6 @@ struct io_context { unsigned short ioprio; - /* - * For request batching - */ - int nr_batch_requests; /* Number of requests left in the batch */ - unsigned long last_waited; /* Time last woken after wait for request */ - struct radix_tree_root icq_tree; struct io_cq __rcu *icq_hint; struct hlist_head icq_list; diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 9a4992dc8e8c..0a482ef988e5 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -595,7 +595,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, elem_len = min_t(u64, length, PAGE_SIZE << order); page = alloc_pages(gfp, order); if (!page) { - sgl_free(sgl); + sgl_free_order(sgl, order); return NULL; }