2 * Common code for the NVMe target.
3 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/module.h>
16 #include <linux/random.h>
17 #include <linux/rculist.h>
21 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
22 static DEFINE_IDA(cntlid_ida);
25 * This read/write semaphore is used to synchronize access to configuration
26 * information on a target system that will result in discovery log page
27 * information change for at least one host.
28 * The full list of resources to protected by this semaphore is:
31 * - per-subsystem allowed hosts list
32 * - allow_any_host subsystem attribute
34 * - the nvmet_transports array
36 * When updating any of those lists/structures write lock should be obtained,
37 * while when reading (popolating discovery log page or checking host-subsystem
38 * link) read lock is obtained to allow concurrent reads.
40 DECLARE_RWSEM(nvmet_config_sem);
42 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
43 const char *subsysnqn);
45 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
48 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
49 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
53 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
55 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
56 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
60 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
62 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len)
63 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
67 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
71 if (list_empty(&subsys->namespaces))
74 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link);
78 static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
80 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
83 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
85 struct nvmet_req *req;
88 mutex_lock(&ctrl->lock);
89 if (!ctrl->nr_async_event_cmds) {
90 mutex_unlock(&ctrl->lock);
94 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
95 mutex_unlock(&ctrl->lock);
96 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
100 static void nvmet_async_event_work(struct work_struct *work)
102 struct nvmet_ctrl *ctrl =
103 container_of(work, struct nvmet_ctrl, async_event_work);
104 struct nvmet_async_event *aen;
105 struct nvmet_req *req;
108 mutex_lock(&ctrl->lock);
109 aen = list_first_entry_or_null(&ctrl->async_events,
110 struct nvmet_async_event, entry);
111 if (!aen || !ctrl->nr_async_event_cmds) {
112 mutex_unlock(&ctrl->lock);
116 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
117 nvmet_set_result(req, nvmet_async_event_result(aen));
119 list_del(&aen->entry);
122 mutex_unlock(&ctrl->lock);
123 nvmet_req_complete(req, 0);
127 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
128 u8 event_info, u8 log_page)
130 struct nvmet_async_event *aen;
132 aen = kmalloc(sizeof(*aen), GFP_KERNEL);
136 aen->event_type = event_type;
137 aen->event_info = event_info;
138 aen->log_page = log_page;
140 mutex_lock(&ctrl->lock);
141 list_add_tail(&aen->entry, &ctrl->async_events);
142 mutex_unlock(&ctrl->lock);
144 schedule_work(&ctrl->async_event_work);
147 static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen)
149 if (!(READ_ONCE(ctrl->aen_enabled) & aen))
151 return test_and_set_bit(aen, &ctrl->aen_masked);
154 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
158 mutex_lock(&ctrl->lock);
159 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES)
162 for (i = 0; i < ctrl->nr_changed_ns; i++) {
163 if (ctrl->changed_ns_list[i] == nsid)
167 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) {
168 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff);
169 ctrl->nr_changed_ns = U32_MAX;
173 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid;
175 mutex_unlock(&ctrl->lock);
178 static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
180 struct nvmet_ctrl *ctrl;
182 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
183 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
184 if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR))
186 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
187 NVME_AER_NOTICE_NS_CHANGED,
188 NVME_LOG_CHANGED_NS);
192 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
196 down_write(&nvmet_config_sem);
197 if (nvmet_transports[ops->type])
200 nvmet_transports[ops->type] = ops;
201 up_write(&nvmet_config_sem);
205 EXPORT_SYMBOL_GPL(nvmet_register_transport);
207 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
209 down_write(&nvmet_config_sem);
210 nvmet_transports[ops->type] = NULL;
211 up_write(&nvmet_config_sem);
213 EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
215 int nvmet_enable_port(struct nvmet_port *port)
217 const struct nvmet_fabrics_ops *ops;
220 lockdep_assert_held(&nvmet_config_sem);
222 ops = nvmet_transports[port->disc_addr.trtype];
224 up_write(&nvmet_config_sem);
225 request_module("nvmet-transport-%d", port->disc_addr.trtype);
226 down_write(&nvmet_config_sem);
227 ops = nvmet_transports[port->disc_addr.trtype];
229 pr_err("transport type %d not supported\n",
230 port->disc_addr.trtype);
235 if (!try_module_get(ops->owner))
238 ret = ops->add_port(port);
240 module_put(ops->owner);
244 port->enabled = true;
248 void nvmet_disable_port(struct nvmet_port *port)
250 const struct nvmet_fabrics_ops *ops;
252 lockdep_assert_held(&nvmet_config_sem);
254 port->enabled = false;
256 ops = nvmet_transports[port->disc_addr.trtype];
257 ops->remove_port(port);
258 module_put(ops->owner);
261 static void nvmet_keep_alive_timer(struct work_struct *work)
263 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
264 struct nvmet_ctrl, ka_work);
266 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
267 ctrl->cntlid, ctrl->kato);
269 nvmet_ctrl_fatal_error(ctrl);
272 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
274 pr_debug("ctrl %d start keep-alive timer for %d secs\n",
275 ctrl->cntlid, ctrl->kato);
277 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
278 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
281 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
283 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
285 cancel_delayed_work_sync(&ctrl->ka_work);
288 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
293 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
294 if (ns->nsid == le32_to_cpu(nsid))
301 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
306 ns = __nvmet_find_namespace(ctrl, nsid);
308 percpu_ref_get(&ns->ref);
314 static void nvmet_destroy_namespace(struct percpu_ref *ref)
316 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
318 complete(&ns->disable_done);
321 void nvmet_put_namespace(struct nvmet_ns *ns)
323 percpu_ref_put(&ns->ref);
326 static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
328 nvmet_bdev_ns_disable(ns);
329 nvmet_file_ns_disable(ns);
332 int nvmet_ns_enable(struct nvmet_ns *ns)
334 struct nvmet_subsys *subsys = ns->subsys;
337 mutex_lock(&subsys->lock);
341 ret = nvmet_bdev_ns_enable(ns);
343 ret = nvmet_file_ns_enable(ns);
347 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
352 if (ns->nsid > subsys->max_nsid)
353 subsys->max_nsid = ns->nsid;
356 * The namespaces list needs to be sorted to simplify the implementation
357 * of the Identify Namepace List subcommand.
359 if (list_empty(&subsys->namespaces)) {
360 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
362 struct nvmet_ns *old;
364 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
365 BUG_ON(ns->nsid == old->nsid);
366 if (ns->nsid < old->nsid)
370 list_add_tail_rcu(&ns->dev_link, &old->dev_link);
373 nvmet_ns_changed(subsys, ns->nsid);
377 mutex_unlock(&subsys->lock);
380 nvmet_ns_dev_disable(ns);
384 void nvmet_ns_disable(struct nvmet_ns *ns)
386 struct nvmet_subsys *subsys = ns->subsys;
388 mutex_lock(&subsys->lock);
393 list_del_rcu(&ns->dev_link);
394 if (ns->nsid == subsys->max_nsid)
395 subsys->max_nsid = nvmet_max_nsid(subsys);
396 mutex_unlock(&subsys->lock);
399 * Now that we removed the namespaces from the lookup list, we
400 * can kill the per_cpu ref and wait for any remaining references
401 * to be dropped, as well as a RCU grace period for anyone only
402 * using the namepace under rcu_read_lock(). Note that we can't
403 * use call_rcu here as we need to ensure the namespaces have
404 * been fully destroyed before unloading the module.
406 percpu_ref_kill(&ns->ref);
408 wait_for_completion(&ns->disable_done);
409 percpu_ref_exit(&ns->ref);
411 mutex_lock(&subsys->lock);
412 nvmet_ns_changed(subsys, ns->nsid);
413 nvmet_ns_dev_disable(ns);
415 mutex_unlock(&subsys->lock);
418 void nvmet_ns_free(struct nvmet_ns *ns)
420 nvmet_ns_disable(ns);
422 kfree(ns->device_path);
426 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
430 ns = kzalloc(sizeof(*ns), GFP_KERNEL);
434 INIT_LIST_HEAD(&ns->dev_link);
435 init_completion(&ns->disable_done);
444 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
446 u32 old_sqhd, new_sqhd;
450 nvmet_set_status(req, status);
454 old_sqhd = req->sq->sqhd;
455 new_sqhd = (old_sqhd + 1) % req->sq->size;
456 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
459 sqhd = req->sq->sqhd & 0x0000FFFF;
460 req->rsp->sq_head = cpu_to_le16(sqhd);
461 req->rsp->sq_id = cpu_to_le16(req->sq->qid);
462 req->rsp->command_id = req->cmd->common.command_id;
465 nvmet_put_namespace(req->ns);
466 req->ops->queue_response(req);
469 void nvmet_req_complete(struct nvmet_req *req, u16 status)
471 __nvmet_req_complete(req, status);
472 percpu_ref_put(&req->sq->ref);
474 EXPORT_SYMBOL_GPL(nvmet_req_complete);
476 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
485 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
495 static void nvmet_confirm_sq(struct percpu_ref *ref)
497 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
499 complete(&sq->confirm_done);
502 void nvmet_sq_destroy(struct nvmet_sq *sq)
505 * If this is the admin queue, complete all AERs so that our
506 * queue doesn't have outstanding requests on it.
508 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
509 nvmet_async_events_free(sq->ctrl);
510 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
511 wait_for_completion(&sq->confirm_done);
512 wait_for_completion(&sq->free_done);
513 percpu_ref_exit(&sq->ref);
516 nvmet_ctrl_put(sq->ctrl);
517 sq->ctrl = NULL; /* allows reusing the queue later */
520 EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
522 static void nvmet_sq_free(struct percpu_ref *ref)
524 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
526 complete(&sq->free_done);
529 int nvmet_sq_init(struct nvmet_sq *sq)
533 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
535 pr_err("percpu_ref init failed!\n");
538 init_completion(&sq->free_done);
539 init_completion(&sq->confirm_done);
543 EXPORT_SYMBOL_GPL(nvmet_sq_init);
545 static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
547 struct nvme_command *cmd = req->cmd;
550 ret = nvmet_check_ctrl_status(req, cmd);
554 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
555 if (unlikely(!req->ns))
556 return NVME_SC_INVALID_NS | NVME_SC_DNR;
559 return nvmet_file_parse_io_cmd(req);
561 return nvmet_bdev_parse_io_cmd(req);
564 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
565 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
567 u8 flags = req->cmd->common.flags;
575 req->transfer_len = 0;
576 req->rsp->status = 0;
579 /* no support for fused commands yet */
580 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
581 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
586 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
587 * contains an address of a single contiguous physical buffer that is
590 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
591 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
595 if (unlikely(!req->sq->ctrl))
596 /* will return an error for any Non-connect command: */
597 status = nvmet_parse_connect_cmd(req);
598 else if (likely(req->sq->qid != 0))
599 status = nvmet_parse_io_cmd(req);
600 else if (req->cmd->common.opcode == nvme_fabrics_command)
601 status = nvmet_parse_fabrics_cmd(req);
602 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
603 status = nvmet_parse_discovery_cmd(req);
605 status = nvmet_parse_admin_cmd(req);
610 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
611 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
618 __nvmet_req_complete(req, status);
621 EXPORT_SYMBOL_GPL(nvmet_req_init);
623 void nvmet_req_uninit(struct nvmet_req *req)
625 percpu_ref_put(&req->sq->ref);
627 nvmet_put_namespace(req->ns);
629 EXPORT_SYMBOL_GPL(nvmet_req_uninit);
631 void nvmet_req_execute(struct nvmet_req *req)
633 if (unlikely(req->data_len != req->transfer_len))
634 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
638 EXPORT_SYMBOL_GPL(nvmet_req_execute);
640 static inline bool nvmet_cc_en(u32 cc)
642 return (cc >> NVME_CC_EN_SHIFT) & 0x1;
645 static inline u8 nvmet_cc_css(u32 cc)
647 return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
650 static inline u8 nvmet_cc_mps(u32 cc)
652 return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
655 static inline u8 nvmet_cc_ams(u32 cc)
657 return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
660 static inline u8 nvmet_cc_shn(u32 cc)
662 return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
665 static inline u8 nvmet_cc_iosqes(u32 cc)
667 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
670 static inline u8 nvmet_cc_iocqes(u32 cc)
672 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
675 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
677 lockdep_assert_held(&ctrl->lock);
679 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
680 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
681 nvmet_cc_mps(ctrl->cc) != 0 ||
682 nvmet_cc_ams(ctrl->cc) != 0 ||
683 nvmet_cc_css(ctrl->cc) != 0) {
684 ctrl->csts = NVME_CSTS_CFS;
688 ctrl->csts = NVME_CSTS_RDY;
691 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
693 lockdep_assert_held(&ctrl->lock);
695 /* XXX: tear down queues? */
696 ctrl->csts &= ~NVME_CSTS_RDY;
700 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
704 mutex_lock(&ctrl->lock);
708 if (nvmet_cc_en(new) && !nvmet_cc_en(old))
709 nvmet_start_ctrl(ctrl);
710 if (!nvmet_cc_en(new) && nvmet_cc_en(old))
711 nvmet_clear_ctrl(ctrl);
712 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
713 nvmet_clear_ctrl(ctrl);
714 ctrl->csts |= NVME_CSTS_SHST_CMPLT;
716 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
717 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
718 mutex_unlock(&ctrl->lock);
721 static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
723 /* command sets supported: NVMe command set: */
724 ctrl->cap = (1ULL << 37);
725 /* CC.EN timeout in 500msec units: */
726 ctrl->cap |= (15ULL << 24);
727 /* maximum queue entries supported: */
728 ctrl->cap |= NVMET_QUEUE_SIZE - 1;
731 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
732 struct nvmet_req *req, struct nvmet_ctrl **ret)
734 struct nvmet_subsys *subsys;
735 struct nvmet_ctrl *ctrl;
738 subsys = nvmet_find_get_subsys(req->port, subsysnqn);
740 pr_warn("connect request for invalid subsystem %s!\n",
742 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
743 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
746 mutex_lock(&subsys->lock);
747 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
748 if (ctrl->cntlid == cntlid) {
749 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
750 pr_warn("hostnqn mismatch.\n");
753 if (!kref_get_unless_zero(&ctrl->ref))
761 pr_warn("could not find controller %d for subsys %s / host %s\n",
762 cntlid, subsysnqn, hostnqn);
763 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
764 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
767 mutex_unlock(&subsys->lock);
768 nvmet_subsys_put(subsys);
772 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
774 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
775 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
776 cmd->common.opcode, req->sq->qid);
777 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
780 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
781 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
782 cmd->common.opcode, req->sq->qid);
783 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
788 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
791 struct nvmet_host_link *p;
793 if (subsys->allow_any_host)
796 list_for_each_entry(p, &subsys->hosts, entry) {
797 if (!strcmp(nvmet_host_name(p->host), hostnqn))
804 static bool nvmet_host_discovery_allowed(struct nvmet_req *req,
807 struct nvmet_subsys_link *s;
809 list_for_each_entry(s, &req->port->subsystems, entry) {
810 if (__nvmet_host_allowed(s->subsys, hostnqn))
817 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
820 lockdep_assert_held(&nvmet_config_sem);
822 if (subsys->type == NVME_NQN_DISC)
823 return nvmet_host_discovery_allowed(req, hostnqn);
825 return __nvmet_host_allowed(subsys, hostnqn);
828 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
829 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
831 struct nvmet_subsys *subsys;
832 struct nvmet_ctrl *ctrl;
836 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
837 subsys = nvmet_find_get_subsys(req->port, subsysnqn);
839 pr_warn("connect request for invalid subsystem %s!\n",
841 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
845 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
846 down_read(&nvmet_config_sem);
847 if (!nvmet_host_allowed(req, subsys, hostnqn)) {
848 pr_info("connect by host %s for subsystem %s not allowed\n",
850 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
851 up_read(&nvmet_config_sem);
852 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
853 goto out_put_subsystem;
855 up_read(&nvmet_config_sem);
857 status = NVME_SC_INTERNAL;
858 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
860 goto out_put_subsystem;
861 mutex_init(&ctrl->lock);
863 nvmet_init_cap(ctrl);
865 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
866 INIT_LIST_HEAD(&ctrl->async_events);
868 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
869 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
871 kref_init(&ctrl->ref);
872 ctrl->subsys = subsys;
873 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
875 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
876 sizeof(__le32), GFP_KERNEL);
877 if (!ctrl->changed_ns_list)
880 ctrl->cqs = kcalloc(subsys->max_qid + 1,
881 sizeof(struct nvmet_cq *),
884 goto out_free_changed_ns_list;
886 ctrl->sqs = kcalloc(subsys->max_qid + 1,
887 sizeof(struct nvmet_sq *),
892 ret = ida_simple_get(&cntlid_ida,
893 NVME_CNTLID_MIN, NVME_CNTLID_MAX,
896 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
901 ctrl->ops = req->ops;
902 if (ctrl->subsys->type == NVME_NQN_DISC) {
903 /* Don't accept keep-alive timeout for discovery controllers */
905 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
910 * Discovery controllers use some arbitrary high value in order
911 * to cleanup stale discovery sessions
913 * From the latest base diff RC:
914 * "The Keep Alive command is not supported by
915 * Discovery controllers. A transport may specify a
916 * fixed Discovery controller activity timeout value
917 * (e.g., 2 minutes). If no commands are received
918 * by a Discovery controller within that time
919 * period, the controller may perform the
920 * actions for Keep Alive Timer expiration".
922 ctrl->kato = NVMET_DISC_KATO;
924 /* keep-alive timeout in seconds */
925 ctrl->kato = DIV_ROUND_UP(kato, 1000);
927 nvmet_start_keep_alive_timer(ctrl);
929 mutex_lock(&subsys->lock);
930 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
931 mutex_unlock(&subsys->lock);
937 ida_simple_remove(&cntlid_ida, ctrl->cntlid);
942 out_free_changed_ns_list:
943 kfree(ctrl->changed_ns_list);
947 nvmet_subsys_put(subsys);
952 static void nvmet_ctrl_free(struct kref *ref)
954 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
955 struct nvmet_subsys *subsys = ctrl->subsys;
957 mutex_lock(&subsys->lock);
958 list_del(&ctrl->subsys_entry);
959 mutex_unlock(&subsys->lock);
961 nvmet_stop_keep_alive_timer(ctrl);
963 flush_work(&ctrl->async_event_work);
964 cancel_work_sync(&ctrl->fatal_err_work);
966 ida_simple_remove(&cntlid_ida, ctrl->cntlid);
970 kfree(ctrl->changed_ns_list);
973 nvmet_subsys_put(subsys);
976 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
978 kref_put(&ctrl->ref, nvmet_ctrl_free);
981 static void nvmet_fatal_error_handler(struct work_struct *work)
983 struct nvmet_ctrl *ctrl =
984 container_of(work, struct nvmet_ctrl, fatal_err_work);
986 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
987 ctrl->ops->delete_ctrl(ctrl);
990 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
992 mutex_lock(&ctrl->lock);
993 if (!(ctrl->csts & NVME_CSTS_CFS)) {
994 ctrl->csts |= NVME_CSTS_CFS;
995 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
996 schedule_work(&ctrl->fatal_err_work);
998 mutex_unlock(&ctrl->lock);
1000 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
1002 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
1003 const char *subsysnqn)
1005 struct nvmet_subsys_link *p;
1010 if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn,
1012 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
1014 return nvmet_disc_subsys;
1017 down_read(&nvmet_config_sem);
1018 list_for_each_entry(p, &port->subsystems, entry) {
1019 if (!strncmp(p->subsys->subsysnqn, subsysnqn,
1021 if (!kref_get_unless_zero(&p->subsys->ref))
1023 up_read(&nvmet_config_sem);
1027 up_read(&nvmet_config_sem);
1031 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
1032 enum nvme_subsys_type type)
1034 struct nvmet_subsys *subsys;
1036 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
1040 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
1041 /* generate a random serial number as our controllers are ephemeral: */
1042 get_random_bytes(&subsys->serial, sizeof(subsys->serial));
1046 subsys->max_qid = NVMET_NR_QUEUES;
1049 subsys->max_qid = 0;
1052 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
1056 subsys->type = type;
1057 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
1059 if (!subsys->subsysnqn) {
1064 kref_init(&subsys->ref);
1066 mutex_init(&subsys->lock);
1067 INIT_LIST_HEAD(&subsys->namespaces);
1068 INIT_LIST_HEAD(&subsys->ctrls);
1069 INIT_LIST_HEAD(&subsys->hosts);
1074 static void nvmet_subsys_free(struct kref *ref)
1076 struct nvmet_subsys *subsys =
1077 container_of(ref, struct nvmet_subsys, ref);
1079 WARN_ON_ONCE(!list_empty(&subsys->namespaces));
1081 kfree(subsys->subsysnqn);
1085 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
1087 struct nvmet_ctrl *ctrl;
1089 mutex_lock(&subsys->lock);
1090 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
1091 ctrl->ops->delete_ctrl(ctrl);
1092 mutex_unlock(&subsys->lock);
1095 void nvmet_subsys_put(struct nvmet_subsys *subsys)
1097 kref_put(&subsys->ref, nvmet_subsys_free);
1100 static int __init nvmet_init(void)
1104 error = nvmet_init_discovery();
1108 error = nvmet_init_configfs();
1110 goto out_exit_discovery;
1114 nvmet_exit_discovery();
1119 static void __exit nvmet_exit(void)
1121 nvmet_exit_configfs();
1122 nvmet_exit_discovery();
1123 ida_destroy(&cntlid_ida);
1125 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
1126 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
1129 module_init(nvmet_init);
1130 module_exit(nvmet_exit);
1132 MODULE_LICENSE("GPL v2");