Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[linux-2.6-microblaze.git] / drivers / nvme / host / core.c
index 5ccc996..95c488e 100644 (file)
@@ -384,6 +384,8 @@ static inline void nvme_end_req(struct request *req)
                nvme_log_error(req);
        nvme_end_req_zoned(req);
        nvme_trace_bio_complete(req);
+       if (req->cmd_flags & REQ_NVME_MPATH)
+               nvme_mpath_end_request(req);
        blk_mq_end_request(req, status);
 }
 
@@ -675,6 +677,7 @@ void nvme_init_request(struct request *req, struct nvme_command *cmd)
        if (req->mq_hctx->type == HCTX_TYPE_POLL)
                req->cmd_flags |= REQ_POLLED;
        nvme_clear_nvme_request(req);
+       req->rq_flags |= RQF_QUIET;
        memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
 }
 EXPORT_SYMBOL_GPL(nvme_init_request);
@@ -850,8 +853,11 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
        cmnd->write_zeroes.length =
                cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
 
+       if (!(req->cmd_flags & REQ_NOUNMAP) && (ns->features & NVME_NS_DEAC))
+               cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC);
+
        if (nvme_ns_has_pi(ns)) {
-               cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT);
+               cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT);
 
                switch (ns->pi_type) {
                case NVME_NS_DPS_PI_TYPE1:
@@ -1037,7 +1043,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                        goto out;
        }
 
-       req->rq_flags |= RQF_QUIET;
        ret = nvme_execute_rq(req, at_head);
        if (result && ret >= 0)
                *result = nvme_req(req)->result;
@@ -1118,11 +1123,12 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
                nvme_unfreeze(ctrl);
                nvme_mpath_unfreeze(ctrl->subsys);
                mutex_unlock(&ctrl->subsys->lock);
-               nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
                mutex_unlock(&ctrl->scan_lock);
        }
-       if (effects & NVME_CMD_EFFECTS_CCC)
-               nvme_init_ctrl_finish(ctrl);
+       if (effects & NVME_CMD_EFFECTS_CCC) {
+               dev_info(ctrl->device,
+"controller capabilities changed, reset may be required to take effect.\n");
+       }
        if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
                nvme_queue_scan(ctrl);
                flush_work(&ctrl->scan_work);
@@ -1227,7 +1233,6 @@ static void nvme_keep_alive_work(struct work_struct *work)
        rq->timeout = ctrl->kato * HZ;
        rq->end_io = nvme_keep_alive_end_io;
        rq->end_io_data = ctrl;
-       rq->rq_flags |= RQF_QUIET;
        blk_execute_rq_nowait(rq, false);
 }
 
@@ -2004,6 +2009,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
                }
        }
 
+       /*
+        * Only set the DEAC bit if the device guarantees that reads from
+        * deallocated data return zeroes.  While the DEAC bit does not
+        * require that, it must be a no-op if reads from deallocated data
+        * do not return zeroes.
+        */
+       if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
+               ns->features |= NVME_NS_DEAC;
        set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
        set_bit(NVME_NS_READY, &ns->flags);
        blk_mq_unfreeze_queue(ns->disk->queue);
@@ -2209,7 +2222,7 @@ const struct pr_ops nvme_pr_ops = {
 };
 
 #ifdef CONFIG_BLK_SED_OPAL
-int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
+static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
                bool send)
 {
        struct nvme_ctrl *ctrl = data;
@@ -2226,7 +2239,23 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
        return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
                        NVME_QID_ANY, 1, 0);
 }
-EXPORT_SYMBOL_GPL(nvme_sec_submit);
+
+static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended)
+{
+       if (ctrl->oacs & NVME_CTRL_OACS_SEC_SUPP) {
+               if (!ctrl->opal_dev)
+                       ctrl->opal_dev = init_opal_dev(ctrl, &nvme_sec_submit);
+               else if (was_suspended)
+                       opal_unlock_from_suspend(ctrl->opal_dev);
+       } else {
+               free_opal_dev(ctrl->opal_dev);
+               ctrl->opal_dev = NULL;
+       }
+}
+#else
+static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended)
+{
+}
 #endif /* CONFIG_BLK_SED_OPAL */
 
 #ifdef CONFIG_BLK_DEV_ZONED
@@ -2251,16 +2280,17 @@ static const struct block_device_operations nvme_bdev_ops = {
        .pr_ops         = &nvme_pr_ops,
 };
 
-static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
+static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 mask, u32 val,
+               u32 timeout, const char *op)
 {
-       unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies;
-       u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
+       unsigned long timeout_jiffies = jiffies + timeout * HZ;
+       u32 csts;
        int ret;
 
        while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
                if (csts == ~0)
                        return -ENODEV;
-               if ((csts & NVME_CSTS_RDY) == bit)
+               if ((csts & mask) == val)
                        break;
 
                usleep_range(1000, 2000);
@@ -2269,7 +2299,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
                if (time_after(jiffies, timeout_jiffies)) {
                        dev_err(ctrl->device,
                                "Device not ready; aborting %s, CSTS=0x%x\n",
-                               enabled ? "initialisation" : "reset", csts);
+                               op, csts);
                        return -ENODEV;
                }
        }
@@ -2277,27 +2307,29 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
        return ret;
 }
 
-/*
- * If the device has been passed off to us in an enabled state, just clear
- * the enabled bit.  The spec says we should set the 'shutdown notification
- * bits', but doing so may cause the device to complete commands to the
- * admin queue ... and we don't know what memory that might be pointing at!
- */
-int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
 {
        int ret;
 
        ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
-       ctrl->ctrl_config &= ~NVME_CC_ENABLE;
+       if (shutdown)
+               ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
+       else
+               ctrl->ctrl_config &= ~NVME_CC_ENABLE;
 
        ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
        if (ret)
                return ret;
 
+       if (shutdown) {
+               return nvme_wait_ready(ctrl, NVME_CSTS_SHST_MASK,
+                                      NVME_CSTS_SHST_CMPLT,
+                                      ctrl->shutdown_timeout, "shutdown");
+       }
        if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
                msleep(NVME_QUIRK_DELAY_AMOUNT);
-
-       return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false);
+       return nvme_wait_ready(ctrl, NVME_CSTS_RDY, 0,
+                              (NVME_CAP_TIMEOUT(ctrl->cap) + 1) / 2, "reset");
 }
 EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
 
@@ -2362,41 +2394,11 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
        ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
        if (ret)
                return ret;
-       return nvme_wait_ready(ctrl, timeout, true);
+       return nvme_wait_ready(ctrl, NVME_CSTS_RDY, NVME_CSTS_RDY,
+                              (timeout + 1) / 2, "initialisation");
 }
 EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
 
-int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
-{
-       unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ);
-       u32 csts;
-       int ret;
-
-       ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
-       ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
-
-       ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
-       if (ret)
-               return ret;
-
-       while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
-               if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
-                       break;
-
-               msleep(100);
-               if (fatal_signal_pending(current))
-                       return -EINTR;
-               if (time_after(jiffies, timeout)) {
-                       dev_err(ctrl->device,
-                               "Device shutdown incomplete; abort shutdown\n");
-                       return -ENODEV;
-               }
-       }
-
-       return ret;
-}
-EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);
-
 static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
 {
        __le64 ts;
@@ -3079,7 +3081,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
 
        id = kzalloc(sizeof(*id), GFP_KERNEL);
        if (!id)
-               return 0;
+               return -ENOMEM;
 
        c.identify.opcode = nvme_admin_identify;
        c.identify.cns = NVME_ID_CNS_CS_CTRL;
@@ -3125,10 +3127,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
        if (!ctrl->identified) {
                unsigned int i;
 
-               ret = nvme_init_subsystem(ctrl, id);
-               if (ret)
-                       goto out_free;
-
                /*
                 * Check for quirks.  Quirk can depend on firmware version,
                 * so, in principle, the set of quirks present can change
@@ -3141,6 +3139,10 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
                        if (quirk_matches(id, &core_quirks[i]))
                                ctrl->quirks |= core_quirks[i].quirks;
                }
+
+               ret = nvme_init_subsystem(ctrl, id);
+               if (ret)
+                       goto out_free;
        }
        memcpy(ctrl->subsys->firmware_rev, id->fr,
               sizeof(ctrl->subsys->firmware_rev));
@@ -3259,7 +3261,7 @@ out_free:
  * register in our nvme_ctrl structure.  This should be called as soon as
  * the admin queue is fully up and running.
  */
-int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
+int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended)
 {
        int ret;
 
@@ -3290,9 +3292,15 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
        if (ret < 0)
                return ret;
 
+       nvme_configure_opal(ctrl, was_suspended);
+
        if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) {
+               /*
+                * Do not return errors unless we are in a controller reset,
+                * the controller works perfectly fine without hwmon.
+                */
                ret = nvme_hwmon_init(ctrl);
-               if (ret < 0)
+               if (ret == -EINTR)
                        return ret;
        }
 
@@ -3771,15 +3779,19 @@ static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev,
        memcpy(dhchap_secret, buf, count);
        nvme_auth_stop(ctrl);
        if (strcmp(dhchap_secret, opts->dhchap_secret)) {
+               struct nvme_dhchap_key *key, *host_key;
                int ret;
 
-               ret = nvme_auth_generate_key(dhchap_secret, &ctrl->host_key);
+               ret = nvme_auth_generate_key(dhchap_secret, &key);
                if (ret)
                        return ret;
                kfree(opts->dhchap_secret);
                opts->dhchap_secret = dhchap_secret;
-               /* Key has changed; re-authentication with new key */
-               nvme_auth_reset(ctrl);
+               host_key = ctrl->host_key;
+               mutex_lock(&ctrl->dhchap_auth_mutex);
+               ctrl->host_key = key;
+               mutex_unlock(&ctrl->dhchap_auth_mutex);
+               nvme_auth_free_key(host_key);
        }
        /* Start re-authentication */
        dev_info(ctrl->device, "re-authenticating controller\n");
@@ -3821,15 +3833,19 @@ static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev,
        memcpy(dhchap_secret, buf, count);
        nvme_auth_stop(ctrl);
        if (strcmp(dhchap_secret, opts->dhchap_ctrl_secret)) {
+               struct nvme_dhchap_key *key, *ctrl_key;
                int ret;
 
-               ret = nvme_auth_generate_key(dhchap_secret, &ctrl->ctrl_key);
+               ret = nvme_auth_generate_key(dhchap_secret, &key);
                if (ret)
                        return ret;
                kfree(opts->dhchap_ctrl_secret);
                opts->dhchap_ctrl_secret = dhchap_secret;
-               /* Key has changed; re-authentication with new key */
-               nvme_auth_reset(ctrl);
+               ctrl_key = ctrl->ctrl_key;
+               mutex_lock(&ctrl->dhchap_auth_mutex);
+               ctrl->ctrl_key = key;
+               mutex_unlock(&ctrl->dhchap_auth_mutex);
+               nvme_auth_free_key(ctrl_key);
        }
        /* Start re-authentication */
        dev_info(ctrl->device, "re-authenticating controller\n");
@@ -3901,10 +3917,11 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
        return a->mode;
 }
 
-static const struct attribute_group nvme_dev_attrs_group = {
+const struct attribute_group nvme_dev_attrs_group = {
        .attrs          = nvme_dev_attrs,
        .is_visible     = nvme_dev_attrs_are_visible,
 };
+EXPORT_SYMBOL_GPL(nvme_dev_attrs_group);
 
 static const struct attribute_group *nvme_dev_attr_groups[] = {
        &nvme_dev_attrs_group,
@@ -4330,7 +4347,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
        mutex_unlock(&ns->ctrl->subsys->lock);
 
        /* guarantee not available in head->list */
-       synchronize_rcu();
+       synchronize_srcu(&ns->head->srcu);
 
        if (!nvme_ns_head_multipath(ns->head))
                nvme_cdev_del(&ns->cdev, &ns->cdev_device);
@@ -4359,10 +4376,6 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
 {
        int ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
 
-       if (test_bit(NVME_NS_DEAD, &ns->flags))
-               goto out;
-
-       ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
        if (!nvme_ns_ids_equal(&ns->head->ids, &info->ids)) {
                dev_err(ns->ctrl->device,
                        "identifiers changed for nsid %d\n", ns->head->ns_id);
@@ -4433,7 +4446,7 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 
        down_write(&ctrl->namespaces_rwsem);
        list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
-               if (ns->head->ns_id > nsid || test_bit(NVME_NS_DEAD, &ns->flags))
+               if (ns->head->ns_id > nsid)
                        list_move_tail(&ns->list, &rm_list);
        }
        up_write(&ctrl->namespaces_rwsem);
@@ -4450,9 +4463,6 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
        u32 prev = 0;
        int ret = 0, i;
 
-       if (nvme_ctrl_limited_cns(ctrl))
-               return -EOPNOTSUPP;
-
        ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
        if (!ns_list)
                return -ENOMEM;
@@ -4560,8 +4570,18 @@ static void nvme_scan_work(struct work_struct *work)
        }
 
        mutex_lock(&ctrl->scan_lock);
-       if (nvme_scan_ns_list(ctrl) != 0)
+       if (nvme_ctrl_limited_cns(ctrl)) {
                nvme_scan_ns_sequential(ctrl);
+       } else {
+               /*
+                * Fall back to sequential scan if DNR is set to handle broken
+                * devices which should support Identify NS List (as per the VS
+                * they report) but don't actually support it.
+                */
+               ret = nvme_scan_ns_list(ctrl);
+               if (ret > 0 && ret & NVME_SC_DNR)
+                       nvme_scan_ns_sequential(ctrl);
+       }
        mutex_unlock(&ctrl->scan_lock);
 }
 
@@ -4591,8 +4611,10 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
         * removing the namespaces' disks; fail all the queues now to avoid
         * potentially having to clean up the failed sync later.
         */
-       if (ctrl->state == NVME_CTRL_DEAD)
-               nvme_kill_queues(ctrl);
+       if (ctrl->state == NVME_CTRL_DEAD) {
+               nvme_mark_namespaces_dead(ctrl);
+               nvme_unquiesce_io_queues(ctrl);
+       }
 
        /* this is a no-op when called from the controller reset handler */
        nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
@@ -4606,9 +4628,9 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
 
-static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int nvme_class_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
-       struct nvme_ctrl *ctrl =
+       const struct nvme_ctrl *ctrl =
                container_of(dev, struct nvme_ctrl, ctrl_device);
        struct nvmf_ctrl_options *opts = ctrl->opts;
        int ret;
@@ -4718,7 +4740,7 @@ static void nvme_fw_act_work(struct work_struct *work)
                fw_act_timeout = jiffies +
                                msecs_to_jiffies(admin_timeout * 1000);
 
-       nvme_stop_queues(ctrl);
+       nvme_quiesce_io_queues(ctrl);
        while (nvme_ctrl_pp_status(ctrl)) {
                if (time_after(jiffies, fw_act_timeout)) {
                        dev_warn(ctrl->device,
@@ -4732,7 +4754,7 @@ static void nvme_fw_act_work(struct work_struct *work)
        if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
                return;
 
-       nvme_start_queues(ctrl);
+       nvme_unquiesce_io_queues(ctrl);
        /* read FW slot information to clear the AER */
        nvme_get_fw_slot_info(ctrl);
 
@@ -4837,8 +4859,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 EXPORT_SYMBOL_GPL(nvme_complete_async_event);
 
 int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
-               const struct blk_mq_ops *ops, unsigned int flags,
-               unsigned int cmd_size)
+               const struct blk_mq_ops *ops, unsigned int cmd_size)
 {
        int ret;
 
@@ -4848,7 +4869,9 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
        if (ctrl->ops->flags & NVME_F_FABRICS)
                set->reserved_tags = NVMF_RESERVED_TAGS;
        set->numa_node = ctrl->numa_node;
-       set->flags = flags;
+       set->flags = BLK_MQ_F_NO_SCHED;
+       if (ctrl->ops->flags & NVME_F_BLOCKING)
+               set->flags |= BLK_MQ_F_BLOCKING;
        set->cmd_size = cmd_size;
        set->driver_data = ctrl;
        set->nr_hw_queues = 1;
@@ -4875,7 +4898,8 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
        return 0;
 
 out_cleanup_admin_q:
-       blk_mq_destroy_queue(ctrl->fabrics_q);
+       blk_mq_destroy_queue(ctrl->admin_q);
+       blk_put_queue(ctrl->admin_q);
 out_free_tagset:
        blk_mq_free_tag_set(ctrl->admin_tagset);
        return ret;
@@ -4885,14 +4909,17 @@ EXPORT_SYMBOL_GPL(nvme_alloc_admin_tag_set);
 void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl)
 {
        blk_mq_destroy_queue(ctrl->admin_q);
-       if (ctrl->ops->flags & NVME_F_FABRICS)
+       blk_put_queue(ctrl->admin_q);
+       if (ctrl->ops->flags & NVME_F_FABRICS) {
                blk_mq_destroy_queue(ctrl->fabrics_q);
+               blk_put_queue(ctrl->fabrics_q);
+       }
        blk_mq_free_tag_set(ctrl->admin_tagset);
 }
 EXPORT_SYMBOL_GPL(nvme_remove_admin_tag_set);
 
 int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
-               const struct blk_mq_ops *ops, unsigned int flags,
+               const struct blk_mq_ops *ops, unsigned int nr_maps,
                unsigned int cmd_size)
 {
        int ret;
@@ -4900,15 +4927,23 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
        memset(set, 0, sizeof(*set));
        set->ops = ops;
        set->queue_depth = ctrl->sqsize + 1;
-       set->reserved_tags = NVMF_RESERVED_TAGS;
+       /*
+        * Some Apple controllers requires tags to be unique across admin and
+        * the (only) I/O queue, so reserve the first 32 tags of the I/O queue.
+        */
+       if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS)
+               set->reserved_tags = NVME_AQ_DEPTH;
+       else if (ctrl->ops->flags & NVME_F_FABRICS)
+               set->reserved_tags = NVMF_RESERVED_TAGS;
        set->numa_node = ctrl->numa_node;
-       set->flags = flags;
+       set->flags = BLK_MQ_F_SHOULD_MERGE;
+       if (ctrl->ops->flags & NVME_F_BLOCKING)
+               set->flags |= BLK_MQ_F_BLOCKING;
        set->cmd_size = cmd_size,
        set->driver_data = ctrl;
        set->nr_hw_queues = ctrl->queue_count - 1;
        set->timeout = NVME_IO_TIMEOUT;
-       if (ops->map_queues)
-               set->nr_maps = ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
+       set->nr_maps = nr_maps;
        ret = blk_mq_alloc_tag_set(set);
        if (ret)
                return ret;
@@ -4919,6 +4954,8 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
                        ret = PTR_ERR(ctrl->connect_q);
                        goto out_free_tag_set;
                }
+               blk_queue_flag_set(QUEUE_FLAG_SKIP_TAGSET_QUIESCE,
+                                  ctrl->connect_q);
        }
 
        ctrl->tagset = set;
@@ -4932,8 +4969,10 @@ EXPORT_SYMBOL_GPL(nvme_alloc_io_tag_set);
 
 void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl)
 {
-       if (ctrl->ops->flags & NVME_F_FABRICS)
+       if (ctrl->ops->flags & NVME_F_FABRICS) {
                blk_mq_destroy_queue(ctrl->connect_q);
+               blk_put_queue(ctrl->connect_q);
+       }
        blk_mq_free_tag_set(ctrl->tagset);
 }
 EXPORT_SYMBOL_GPL(nvme_remove_io_tag_set);
@@ -4969,7 +5008,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
 
        if (ctrl->queue_count > 1) {
                nvme_queue_scan(ctrl);
-               nvme_start_queues(ctrl);
+               nvme_unquiesce_io_queues(ctrl);
                nvme_mpath_update(ctrl);
        }
 
@@ -5014,6 +5053,7 @@ static void nvme_free_ctrl(struct device *dev)
        nvme_auth_stop(ctrl);
        nvme_auth_free(ctrl);
        __free_page(ctrl->discard_page);
+       free_opal_dev(ctrl->opal_dev);
 
        if (subsys) {
                mutex_lock(&nvme_subsystems_lock);
@@ -5079,7 +5119,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
                        ctrl->instance);
        ctrl->device->class = nvme_class;
        ctrl->device->parent = ctrl->dev;
-       ctrl->device->groups = nvme_dev_attr_groups;
+       if (ops->dev_attr_groups)
+               ctrl->device->groups = ops->dev_attr_groups;
+       else
+               ctrl->device->groups = nvme_dev_attr_groups;
        ctrl->device->release = nvme_free_ctrl;
        dev_set_drvdata(ctrl->device, ctrl);
        ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
@@ -5103,9 +5146,13 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 
        nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
        nvme_mpath_init_ctrl(ctrl);
-       nvme_auth_init_ctrl(ctrl);
+       ret = nvme_auth_init_ctrl(ctrl);
+       if (ret)
+               goto out_free_cdev;
 
        return 0;
+out_free_cdev:
+       cdev_device_del(&ctrl->cdev, ctrl->device);
 out_free_name:
        nvme_put_ctrl(ctrl);
        kfree_const(ctrl->device->kobj.name);
@@ -5118,62 +5165,17 @@ out:
 }
 EXPORT_SYMBOL_GPL(nvme_init_ctrl);
 
-static void nvme_start_ns_queue(struct nvme_ns *ns)
-{
-       if (test_and_clear_bit(NVME_NS_STOPPED, &ns->flags))
-               blk_mq_unquiesce_queue(ns->queue);
-}
-
-static void nvme_stop_ns_queue(struct nvme_ns *ns)
-{
-       if (!test_and_set_bit(NVME_NS_STOPPED, &ns->flags))
-               blk_mq_quiesce_queue(ns->queue);
-       else
-               blk_mq_wait_quiesce_done(ns->queue);
-}
-
-/*
- * Prepare a queue for teardown.
- *
- * This must forcibly unquiesce queues to avoid blocking dispatch, and only set
- * the capacity to 0 after that to avoid blocking dispatchers that may be
- * holding bd_butex.  This will end buffered writers dirtying pages that can't
- * be synced.
- */
-static void nvme_set_queue_dying(struct nvme_ns *ns)
-{
-       if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
-               return;
-
-       blk_mark_disk_dead(ns->disk);
-       nvme_start_ns_queue(ns);
-
-       set_capacity_and_notify(ns->disk, 0);
-}
-
-/**
- * nvme_kill_queues(): Ends all namespace queues
- * @ctrl: the dead controller that needs to end
- *
- * Call this function when the driver determines it is unable to get the
- * controller in a state capable of servicing IO.
- */
-void nvme_kill_queues(struct nvme_ctrl *ctrl)
+/* let I/O to all namespaces fail in preparation for surprise removal */
+void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)
 {
        struct nvme_ns *ns;
 
        down_read(&ctrl->namespaces_rwsem);
-
-       /* Forcibly unquiesce queues to avoid blocking dispatch */
-       if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
-               nvme_start_admin_queue(ctrl);
-
        list_for_each_entry(ns, &ctrl->namespaces, list)
-               nvme_set_queue_dying(ns);
-
+               blk_mark_disk_dead(ns->disk);
        up_read(&ctrl->namespaces_rwsem);
 }
-EXPORT_SYMBOL_GPL(nvme_kill_queues);
+EXPORT_SYMBOL_GPL(nvme_mark_namespaces_dead);
 
 void nvme_unfreeze(struct nvme_ctrl *ctrl)
 {
@@ -5223,43 +5225,41 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_start_freeze);
 
-void nvme_stop_queues(struct nvme_ctrl *ctrl)
+void nvme_quiesce_io_queues(struct nvme_ctrl *ctrl)
 {
-       struct nvme_ns *ns;
-
-       down_read(&ctrl->namespaces_rwsem);
-       list_for_each_entry(ns, &ctrl->namespaces, list)
-               nvme_stop_ns_queue(ns);
-       up_read(&ctrl->namespaces_rwsem);
+       if (!ctrl->tagset)
+               return;
+       if (!test_and_set_bit(NVME_CTRL_STOPPED, &ctrl->flags))
+               blk_mq_quiesce_tagset(ctrl->tagset);
+       else
+               blk_mq_wait_quiesce_done(ctrl->tagset);
 }
-EXPORT_SYMBOL_GPL(nvme_stop_queues);
+EXPORT_SYMBOL_GPL(nvme_quiesce_io_queues);
 
-void nvme_start_queues(struct nvme_ctrl *ctrl)
+void nvme_unquiesce_io_queues(struct nvme_ctrl *ctrl)
 {
-       struct nvme_ns *ns;
-
-       down_read(&ctrl->namespaces_rwsem);
-       list_for_each_entry(ns, &ctrl->namespaces, list)
-               nvme_start_ns_queue(ns);
-       up_read(&ctrl->namespaces_rwsem);
+       if (!ctrl->tagset)
+               return;
+       if (test_and_clear_bit(NVME_CTRL_STOPPED, &ctrl->flags))
+               blk_mq_unquiesce_tagset(ctrl->tagset);
 }
-EXPORT_SYMBOL_GPL(nvme_start_queues);
+EXPORT_SYMBOL_GPL(nvme_unquiesce_io_queues);
 
-void nvme_stop_admin_queue(struct nvme_ctrl *ctrl)
+void nvme_quiesce_admin_queue(struct nvme_ctrl *ctrl)
 {
        if (!test_and_set_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
                blk_mq_quiesce_queue(ctrl->admin_q);
        else
-               blk_mq_wait_quiesce_done(ctrl->admin_q);
+               blk_mq_wait_quiesce_done(ctrl->admin_q->tag_set);
 }
-EXPORT_SYMBOL_GPL(nvme_stop_admin_queue);
+EXPORT_SYMBOL_GPL(nvme_quiesce_admin_queue);
 
-void nvme_start_admin_queue(struct nvme_ctrl *ctrl)
+void nvme_unquiesce_admin_queue(struct nvme_ctrl *ctrl)
 {
        if (test_and_clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
                blk_mq_unquiesce_queue(ctrl->admin_q);
 }
-EXPORT_SYMBOL_GPL(nvme_start_admin_queue);
+EXPORT_SYMBOL_GPL(nvme_unquiesce_admin_queue);
 
 void nvme_sync_io_queues(struct nvme_ctrl *ctrl)
 {
@@ -5370,8 +5370,13 @@ static int __init nvme_core_init(void)
                goto unregister_generic_ns;
        }
 
+       result = nvme_init_auth();
+       if (result)
+               goto destroy_ns_chr;
        return 0;
 
+destroy_ns_chr:
+       class_destroy(nvme_ns_chr_class);
 unregister_generic_ns:
        unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
 destroy_subsys_class:
@@ -5392,6 +5397,7 @@ out:
 
 static void __exit nvme_core_exit(void)
 {
+       nvme_exit_auth();
        class_destroy(nvme_ns_chr_class);
        class_destroy(nvme_subsys_class);
        class_destroy(nvme_class);