2 * Common code for the NVMe target.
3 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/module.h>
16 #include <linux/random.h>
17 #include <linux/rculist.h>
21 struct workqueue_struct *buffered_io_wq;
22 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
23 static DEFINE_IDA(cntlid_ida);
26 * This read/write semaphore is used to synchronize access to configuration
27 * information on a target system that will result in discovery log page
28 * information change for at least one host.
29 * The full list of resources to protected by this semaphore is:
32 * - per-subsystem allowed hosts list
33 * - allow_any_host subsystem attribute
35 * - the nvmet_transports array
37 * When updating any of those lists/structures write lock should be obtained,
38 * while when reading (popolating discovery log page or checking host-subsystem
39 * link) read lock is obtained to allow concurrent reads.
41 DECLARE_RWSEM(nvmet_config_sem);
43 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
44 const char *subsysnqn);
46 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
49 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
50 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
54 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
56 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
57 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
61 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
63 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len)
64 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
68 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
72 if (list_empty(&subsys->namespaces))
75 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link);
79 static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
81 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
84 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
86 struct nvmet_req *req;
89 mutex_lock(&ctrl->lock);
90 if (!ctrl->nr_async_event_cmds) {
91 mutex_unlock(&ctrl->lock);
95 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
96 mutex_unlock(&ctrl->lock);
97 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
101 static void nvmet_async_event_work(struct work_struct *work)
103 struct nvmet_ctrl *ctrl =
104 container_of(work, struct nvmet_ctrl, async_event_work);
105 struct nvmet_async_event *aen;
106 struct nvmet_req *req;
109 mutex_lock(&ctrl->lock);
110 aen = list_first_entry_or_null(&ctrl->async_events,
111 struct nvmet_async_event, entry);
112 if (!aen || !ctrl->nr_async_event_cmds) {
113 mutex_unlock(&ctrl->lock);
117 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
118 nvmet_set_result(req, nvmet_async_event_result(aen));
120 list_del(&aen->entry);
123 mutex_unlock(&ctrl->lock);
124 nvmet_req_complete(req, 0);
128 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
129 u8 event_info, u8 log_page)
131 struct nvmet_async_event *aen;
133 aen = kmalloc(sizeof(*aen), GFP_KERNEL);
137 aen->event_type = event_type;
138 aen->event_info = event_info;
139 aen->log_page = log_page;
141 mutex_lock(&ctrl->lock);
142 list_add_tail(&aen->entry, &ctrl->async_events);
143 mutex_unlock(&ctrl->lock);
145 schedule_work(&ctrl->async_event_work);
148 static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen)
150 if (!(READ_ONCE(ctrl->aen_enabled) & aen))
152 return test_and_set_bit(aen, &ctrl->aen_masked);
155 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
159 mutex_lock(&ctrl->lock);
160 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES)
163 for (i = 0; i < ctrl->nr_changed_ns; i++) {
164 if (ctrl->changed_ns_list[i] == nsid)
168 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) {
169 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff);
170 ctrl->nr_changed_ns = U32_MAX;
174 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid;
176 mutex_unlock(&ctrl->lock);
179 static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
181 struct nvmet_ctrl *ctrl;
183 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
184 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
185 if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR))
187 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
188 NVME_AER_NOTICE_NS_CHANGED,
189 NVME_LOG_CHANGED_NS);
193 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
197 down_write(&nvmet_config_sem);
198 if (nvmet_transports[ops->type])
201 nvmet_transports[ops->type] = ops;
202 up_write(&nvmet_config_sem);
206 EXPORT_SYMBOL_GPL(nvmet_register_transport);
208 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
210 down_write(&nvmet_config_sem);
211 nvmet_transports[ops->type] = NULL;
212 up_write(&nvmet_config_sem);
214 EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
216 int nvmet_enable_port(struct nvmet_port *port)
218 const struct nvmet_fabrics_ops *ops;
221 lockdep_assert_held(&nvmet_config_sem);
223 ops = nvmet_transports[port->disc_addr.trtype];
225 up_write(&nvmet_config_sem);
226 request_module("nvmet-transport-%d", port->disc_addr.trtype);
227 down_write(&nvmet_config_sem);
228 ops = nvmet_transports[port->disc_addr.trtype];
230 pr_err("transport type %d not supported\n",
231 port->disc_addr.trtype);
236 if (!try_module_get(ops->owner))
239 ret = ops->add_port(port);
241 module_put(ops->owner);
245 /* If the transport didn't set inline_data_size, then disable it. */
246 if (port->inline_data_size < 0)
247 port->inline_data_size = 0;
249 port->enabled = true;
253 void nvmet_disable_port(struct nvmet_port *port)
255 const struct nvmet_fabrics_ops *ops;
257 lockdep_assert_held(&nvmet_config_sem);
259 port->enabled = false;
261 ops = nvmet_transports[port->disc_addr.trtype];
262 ops->remove_port(port);
263 module_put(ops->owner);
266 static void nvmet_keep_alive_timer(struct work_struct *work)
268 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
269 struct nvmet_ctrl, ka_work);
271 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
272 ctrl->cntlid, ctrl->kato);
274 nvmet_ctrl_fatal_error(ctrl);
277 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
279 pr_debug("ctrl %d start keep-alive timer for %d secs\n",
280 ctrl->cntlid, ctrl->kato);
282 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
283 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
286 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
288 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
290 cancel_delayed_work_sync(&ctrl->ka_work);
293 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
298 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
299 if (ns->nsid == le32_to_cpu(nsid))
306 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
311 ns = __nvmet_find_namespace(ctrl, nsid);
313 percpu_ref_get(&ns->ref);
319 static void nvmet_destroy_namespace(struct percpu_ref *ref)
321 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
323 complete(&ns->disable_done);
326 void nvmet_put_namespace(struct nvmet_ns *ns)
328 percpu_ref_put(&ns->ref);
331 static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
333 nvmet_bdev_ns_disable(ns);
334 nvmet_file_ns_disable(ns);
337 int nvmet_ns_enable(struct nvmet_ns *ns)
339 struct nvmet_subsys *subsys = ns->subsys;
342 mutex_lock(&subsys->lock);
346 ret = nvmet_bdev_ns_enable(ns);
348 ret = nvmet_file_ns_enable(ns);
352 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
357 if (ns->nsid > subsys->max_nsid)
358 subsys->max_nsid = ns->nsid;
361 * The namespaces list needs to be sorted to simplify the implementation
362 * of the Identify Namepace List subcommand.
364 if (list_empty(&subsys->namespaces)) {
365 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
367 struct nvmet_ns *old;
369 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
370 BUG_ON(ns->nsid == old->nsid);
371 if (ns->nsid < old->nsid)
375 list_add_tail_rcu(&ns->dev_link, &old->dev_link);
378 nvmet_ns_changed(subsys, ns->nsid);
382 mutex_unlock(&subsys->lock);
385 nvmet_ns_dev_disable(ns);
389 void nvmet_ns_disable(struct nvmet_ns *ns)
391 struct nvmet_subsys *subsys = ns->subsys;
393 mutex_lock(&subsys->lock);
398 list_del_rcu(&ns->dev_link);
399 if (ns->nsid == subsys->max_nsid)
400 subsys->max_nsid = nvmet_max_nsid(subsys);
401 mutex_unlock(&subsys->lock);
404 * Now that we removed the namespaces from the lookup list, we
405 * can kill the per_cpu ref and wait for any remaining references
406 * to be dropped, as well as a RCU grace period for anyone only
407 * using the namepace under rcu_read_lock(). Note that we can't
408 * use call_rcu here as we need to ensure the namespaces have
409 * been fully destroyed before unloading the module.
411 percpu_ref_kill(&ns->ref);
413 wait_for_completion(&ns->disable_done);
414 percpu_ref_exit(&ns->ref);
416 mutex_lock(&subsys->lock);
417 nvmet_ns_changed(subsys, ns->nsid);
418 nvmet_ns_dev_disable(ns);
420 mutex_unlock(&subsys->lock);
423 void nvmet_ns_free(struct nvmet_ns *ns)
425 nvmet_ns_disable(ns);
427 kfree(ns->device_path);
431 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
435 ns = kzalloc(sizeof(*ns), GFP_KERNEL);
439 INIT_LIST_HEAD(&ns->dev_link);
440 init_completion(&ns->disable_done);
445 ns->buffered_io = false;
450 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
452 u32 old_sqhd, new_sqhd;
456 nvmet_set_status(req, status);
460 old_sqhd = req->sq->sqhd;
461 new_sqhd = (old_sqhd + 1) % req->sq->size;
462 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
465 sqhd = req->sq->sqhd & 0x0000FFFF;
466 req->rsp->sq_head = cpu_to_le16(sqhd);
467 req->rsp->sq_id = cpu_to_le16(req->sq->qid);
468 req->rsp->command_id = req->cmd->common.command_id;
471 nvmet_put_namespace(req->ns);
472 req->ops->queue_response(req);
475 void nvmet_req_complete(struct nvmet_req *req, u16 status)
477 __nvmet_req_complete(req, status);
478 percpu_ref_put(&req->sq->ref);
480 EXPORT_SYMBOL_GPL(nvmet_req_complete);
482 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
491 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
501 static void nvmet_confirm_sq(struct percpu_ref *ref)
503 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
505 complete(&sq->confirm_done);
508 void nvmet_sq_destroy(struct nvmet_sq *sq)
511 * If this is the admin queue, complete all AERs so that our
512 * queue doesn't have outstanding requests on it.
514 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
515 nvmet_async_events_free(sq->ctrl);
516 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
517 wait_for_completion(&sq->confirm_done);
518 wait_for_completion(&sq->free_done);
519 percpu_ref_exit(&sq->ref);
522 nvmet_ctrl_put(sq->ctrl);
523 sq->ctrl = NULL; /* allows reusing the queue later */
526 EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
528 static void nvmet_sq_free(struct percpu_ref *ref)
530 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
532 complete(&sq->free_done);
535 int nvmet_sq_init(struct nvmet_sq *sq)
539 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
541 pr_err("percpu_ref init failed!\n");
544 init_completion(&sq->free_done);
545 init_completion(&sq->confirm_done);
549 EXPORT_SYMBOL_GPL(nvmet_sq_init);
551 static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
553 struct nvme_command *cmd = req->cmd;
556 ret = nvmet_check_ctrl_status(req, cmd);
560 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
561 if (unlikely(!req->ns))
562 return NVME_SC_INVALID_NS | NVME_SC_DNR;
565 return nvmet_file_parse_io_cmd(req);
567 return nvmet_bdev_parse_io_cmd(req);
570 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
571 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
573 u8 flags = req->cmd->common.flags;
581 req->transfer_len = 0;
582 req->rsp->status = 0;
585 /* no support for fused commands yet */
586 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
587 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
592 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
593 * contains an address of a single contiguous physical buffer that is
596 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
597 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
601 if (unlikely(!req->sq->ctrl))
602 /* will return an error for any Non-connect command: */
603 status = nvmet_parse_connect_cmd(req);
604 else if (likely(req->sq->qid != 0))
605 status = nvmet_parse_io_cmd(req);
606 else if (req->cmd->common.opcode == nvme_fabrics_command)
607 status = nvmet_parse_fabrics_cmd(req);
608 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
609 status = nvmet_parse_discovery_cmd(req);
611 status = nvmet_parse_admin_cmd(req);
616 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
617 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
624 __nvmet_req_complete(req, status);
627 EXPORT_SYMBOL_GPL(nvmet_req_init);
629 void nvmet_req_uninit(struct nvmet_req *req)
631 percpu_ref_put(&req->sq->ref);
633 nvmet_put_namespace(req->ns);
635 EXPORT_SYMBOL_GPL(nvmet_req_uninit);
637 void nvmet_req_execute(struct nvmet_req *req)
639 if (unlikely(req->data_len != req->transfer_len))
640 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
644 EXPORT_SYMBOL_GPL(nvmet_req_execute);
646 static inline bool nvmet_cc_en(u32 cc)
648 return (cc >> NVME_CC_EN_SHIFT) & 0x1;
651 static inline u8 nvmet_cc_css(u32 cc)
653 return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
656 static inline u8 nvmet_cc_mps(u32 cc)
658 return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
661 static inline u8 nvmet_cc_ams(u32 cc)
663 return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
666 static inline u8 nvmet_cc_shn(u32 cc)
668 return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
671 static inline u8 nvmet_cc_iosqes(u32 cc)
673 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
676 static inline u8 nvmet_cc_iocqes(u32 cc)
678 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
681 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
683 lockdep_assert_held(&ctrl->lock);
685 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
686 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
687 nvmet_cc_mps(ctrl->cc) != 0 ||
688 nvmet_cc_ams(ctrl->cc) != 0 ||
689 nvmet_cc_css(ctrl->cc) != 0) {
690 ctrl->csts = NVME_CSTS_CFS;
694 ctrl->csts = NVME_CSTS_RDY;
697 * Controllers that are not yet enabled should not really enforce the
698 * keep alive timeout, but we still want to track a timeout and cleanup
699 * in case a host died before it enabled the controller. Hence, simply
700 * reset the keep alive timer when the controller is enabled.
702 mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
705 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
707 lockdep_assert_held(&ctrl->lock);
709 /* XXX: tear down queues? */
710 ctrl->csts &= ~NVME_CSTS_RDY;
714 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
718 mutex_lock(&ctrl->lock);
722 if (nvmet_cc_en(new) && !nvmet_cc_en(old))
723 nvmet_start_ctrl(ctrl);
724 if (!nvmet_cc_en(new) && nvmet_cc_en(old))
725 nvmet_clear_ctrl(ctrl);
726 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
727 nvmet_clear_ctrl(ctrl);
728 ctrl->csts |= NVME_CSTS_SHST_CMPLT;
730 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
731 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
732 mutex_unlock(&ctrl->lock);
735 static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
737 /* command sets supported: NVMe command set: */
738 ctrl->cap = (1ULL << 37);
739 /* CC.EN timeout in 500msec units: */
740 ctrl->cap |= (15ULL << 24);
741 /* maximum queue entries supported: */
742 ctrl->cap |= NVMET_QUEUE_SIZE - 1;
745 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
746 struct nvmet_req *req, struct nvmet_ctrl **ret)
748 struct nvmet_subsys *subsys;
749 struct nvmet_ctrl *ctrl;
752 subsys = nvmet_find_get_subsys(req->port, subsysnqn);
754 pr_warn("connect request for invalid subsystem %s!\n",
756 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
757 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
760 mutex_lock(&subsys->lock);
761 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
762 if (ctrl->cntlid == cntlid) {
763 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
764 pr_warn("hostnqn mismatch.\n");
767 if (!kref_get_unless_zero(&ctrl->ref))
775 pr_warn("could not find controller %d for subsys %s / host %s\n",
776 cntlid, subsysnqn, hostnqn);
777 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
778 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
781 mutex_unlock(&subsys->lock);
782 nvmet_subsys_put(subsys);
786 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
788 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
789 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
790 cmd->common.opcode, req->sq->qid);
791 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
794 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
795 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
796 cmd->common.opcode, req->sq->qid);
797 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
802 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
805 struct nvmet_host_link *p;
807 if (subsys->allow_any_host)
810 list_for_each_entry(p, &subsys->hosts, entry) {
811 if (!strcmp(nvmet_host_name(p->host), hostnqn))
818 static bool nvmet_host_discovery_allowed(struct nvmet_req *req,
821 struct nvmet_subsys_link *s;
823 list_for_each_entry(s, &req->port->subsystems, entry) {
824 if (__nvmet_host_allowed(s->subsys, hostnqn))
831 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
834 lockdep_assert_held(&nvmet_config_sem);
836 if (subsys->type == NVME_NQN_DISC)
837 return nvmet_host_discovery_allowed(req, hostnqn);
839 return __nvmet_host_allowed(subsys, hostnqn);
842 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
843 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
845 struct nvmet_subsys *subsys;
846 struct nvmet_ctrl *ctrl;
850 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
851 subsys = nvmet_find_get_subsys(req->port, subsysnqn);
853 pr_warn("connect request for invalid subsystem %s!\n",
855 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
859 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
860 down_read(&nvmet_config_sem);
861 if (!nvmet_host_allowed(req, subsys, hostnqn)) {
862 pr_info("connect by host %s for subsystem %s not allowed\n",
864 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
865 up_read(&nvmet_config_sem);
866 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
867 goto out_put_subsystem;
869 up_read(&nvmet_config_sem);
871 status = NVME_SC_INTERNAL;
872 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
874 goto out_put_subsystem;
875 mutex_init(&ctrl->lock);
877 nvmet_init_cap(ctrl);
879 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
880 INIT_LIST_HEAD(&ctrl->async_events);
882 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
883 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
885 kref_init(&ctrl->ref);
886 ctrl->subsys = subsys;
887 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
889 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
890 sizeof(__le32), GFP_KERNEL);
891 if (!ctrl->changed_ns_list)
894 ctrl->cqs = kcalloc(subsys->max_qid + 1,
895 sizeof(struct nvmet_cq *),
898 goto out_free_changed_ns_list;
900 ctrl->sqs = kcalloc(subsys->max_qid + 1,
901 sizeof(struct nvmet_sq *),
906 ret = ida_simple_get(&cntlid_ida,
907 NVME_CNTLID_MIN, NVME_CNTLID_MAX,
910 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
915 ctrl->ops = req->ops;
916 if (ctrl->subsys->type == NVME_NQN_DISC) {
917 /* Don't accept keep-alive timeout for discovery controllers */
919 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
924 * Discovery controllers use some arbitrary high value in order
925 * to cleanup stale discovery sessions
927 * From the latest base diff RC:
928 * "The Keep Alive command is not supported by
929 * Discovery controllers. A transport may specify a
930 * fixed Discovery controller activity timeout value
931 * (e.g., 2 minutes). If no commands are received
932 * by a Discovery controller within that time
933 * period, the controller may perform the
934 * actions for Keep Alive Timer expiration".
936 ctrl->kato = NVMET_DISC_KATO;
938 /* keep-alive timeout in seconds */
939 ctrl->kato = DIV_ROUND_UP(kato, 1000);
941 nvmet_start_keep_alive_timer(ctrl);
943 mutex_lock(&subsys->lock);
944 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
945 mutex_unlock(&subsys->lock);
951 ida_simple_remove(&cntlid_ida, ctrl->cntlid);
956 out_free_changed_ns_list:
957 kfree(ctrl->changed_ns_list);
961 nvmet_subsys_put(subsys);
966 static void nvmet_ctrl_free(struct kref *ref)
968 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
969 struct nvmet_subsys *subsys = ctrl->subsys;
971 mutex_lock(&subsys->lock);
972 list_del(&ctrl->subsys_entry);
973 mutex_unlock(&subsys->lock);
975 nvmet_stop_keep_alive_timer(ctrl);
977 flush_work(&ctrl->async_event_work);
978 cancel_work_sync(&ctrl->fatal_err_work);
980 ida_simple_remove(&cntlid_ida, ctrl->cntlid);
984 kfree(ctrl->changed_ns_list);
987 nvmet_subsys_put(subsys);
990 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
992 kref_put(&ctrl->ref, nvmet_ctrl_free);
995 static void nvmet_fatal_error_handler(struct work_struct *work)
997 struct nvmet_ctrl *ctrl =
998 container_of(work, struct nvmet_ctrl, fatal_err_work);
1000 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
1001 ctrl->ops->delete_ctrl(ctrl);
1004 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
1006 mutex_lock(&ctrl->lock);
1007 if (!(ctrl->csts & NVME_CSTS_CFS)) {
1008 ctrl->csts |= NVME_CSTS_CFS;
1009 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
1010 schedule_work(&ctrl->fatal_err_work);
1012 mutex_unlock(&ctrl->lock);
1014 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
1016 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
1017 const char *subsysnqn)
1019 struct nvmet_subsys_link *p;
1024 if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn,
1026 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
1028 return nvmet_disc_subsys;
1031 down_read(&nvmet_config_sem);
1032 list_for_each_entry(p, &port->subsystems, entry) {
1033 if (!strncmp(p->subsys->subsysnqn, subsysnqn,
1035 if (!kref_get_unless_zero(&p->subsys->ref))
1037 up_read(&nvmet_config_sem);
1041 up_read(&nvmet_config_sem);
1045 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
1046 enum nvme_subsys_type type)
1048 struct nvmet_subsys *subsys;
1050 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
1054 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
1055 /* generate a random serial number as our controllers are ephemeral: */
1056 get_random_bytes(&subsys->serial, sizeof(subsys->serial));
1060 subsys->max_qid = NVMET_NR_QUEUES;
1063 subsys->max_qid = 0;
1066 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
1070 subsys->type = type;
1071 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
1073 if (!subsys->subsysnqn) {
1078 kref_init(&subsys->ref);
1080 mutex_init(&subsys->lock);
1081 INIT_LIST_HEAD(&subsys->namespaces);
1082 INIT_LIST_HEAD(&subsys->ctrls);
1083 INIT_LIST_HEAD(&subsys->hosts);
1088 static void nvmet_subsys_free(struct kref *ref)
1090 struct nvmet_subsys *subsys =
1091 container_of(ref, struct nvmet_subsys, ref);
1093 WARN_ON_ONCE(!list_empty(&subsys->namespaces));
1095 kfree(subsys->subsysnqn);
1099 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
1101 struct nvmet_ctrl *ctrl;
1103 mutex_lock(&subsys->lock);
1104 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
1105 ctrl->ops->delete_ctrl(ctrl);
1106 mutex_unlock(&subsys->lock);
1109 void nvmet_subsys_put(struct nvmet_subsys *subsys)
1111 kref_put(&subsys->ref, nvmet_subsys_free);
1114 static int __init nvmet_init(void)
1118 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
1120 if (!buffered_io_wq) {
1124 error = nvmet_init_discovery();
1128 error = nvmet_init_configfs();
1130 goto out_exit_discovery;
1134 nvmet_exit_discovery();
1139 static void __exit nvmet_exit(void)
1141 nvmet_exit_configfs();
1142 nvmet_exit_discovery();
1143 ida_destroy(&cntlid_ida);
1144 destroy_workqueue(buffered_io_wq);
1146 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
1147 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
1150 module_init(nvmet_init);
1151 module_exit(nvmet_exit);
1153 MODULE_LICENSE("GPL v2");