nvmet: support completion queue sharing
authorWilfred Mallawa <wilfred.mallawa@wdc.com>
Thu, 24 Apr 2025 05:13:52 +0000 (15:13 +1000)
committerChristoph Hellwig <hch@lst.de>
Tue, 20 May 2025 03:34:26 +0000 (05:34 +0200)
The NVMe PCI transport specification allows for completion queues to be
shared by different submission queues.

This patch allows a submission queue to keep track of the completion queue
it is using with reference counting. As such, it can be ensured that a
completion queue is not deleted while a submission queue is actively
using it.

This patch enables completion queue sharing in the pci-epf target driver.
For fabrics drivers, completion queue sharing is not enabled as it is
not possible as per the fabrics specification. However, this patch
modifies the fabrics drivers to correctly integrate the new API that
supports completion queue sharing.

Signed-off-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
drivers/nvme/target/admin-cmd.c
drivers/nvme/target/core.c
drivers/nvme/target/fc.c
drivers/nvme/target/loop.c
drivers/nvme/target/nvmet.h
drivers/nvme/target/pci-epf.c
drivers/nvme/target/rdma.c
drivers/nvme/target/tcp.c

index 5e36999..c731729 100644 (file)
@@ -63,14 +63,9 @@ static void nvmet_execute_create_sq(struct nvmet_req *req)
        if (status != NVME_SC_SUCCESS)
                goto complete;
 
-       /*
-        * Note: The NVMe specification allows multiple SQs to use the same CQ.
-        * However, the target code does not really support that. So for now,
-        * prevent this and fail the command if sqid and cqid are different.
-        */
-       if (!cqid || cqid != sqid) {
-               pr_err("SQ %u: Unsupported CQID %u\n", sqid, cqid);
-               status = NVME_SC_CQ_INVALID | NVME_STATUS_DNR;
+       status = nvmet_check_io_cqid(ctrl, cqid, false);
+       if (status != NVME_SC_SUCCESS) {
+               pr_err("SQ %u: Invalid CQID %u\n", sqid, cqid);
                goto complete;
        }
 
@@ -79,7 +74,7 @@ static void nvmet_execute_create_sq(struct nvmet_req *req)
                goto complete;
        }
 
-       status = ctrl->ops->create_sq(ctrl, sqid, sq_flags, qsize, prp1);
+       status = ctrl->ops->create_sq(ctrl, sqid, cqid, sq_flags, qsize, prp1);
 
 complete:
        nvmet_req_complete(req, status);
@@ -100,6 +95,12 @@ static void nvmet_execute_delete_cq(struct nvmet_req *req)
        if (status != NVME_SC_SUCCESS)
                goto complete;
 
+       if (!ctrl->cqs[cqid] || nvmet_cq_in_use(ctrl->cqs[cqid])) {
+               /* Some SQs are still using this CQ */
+               status = NVME_SC_QID_INVALID | NVME_STATUS_DNR;
+               goto complete;
+       }
+
        status = ctrl->ops->delete_cq(ctrl, cqid);
 
 complete:
index bfaba79..2b02b2f 100644 (file)
@@ -905,6 +905,11 @@ u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
        if (status != NVME_SC_SUCCESS)
                return status;
 
+       if (!kref_get_unless_zero(&ctrl->ref))
+               return NVME_SC_INTERNAL | NVME_STATUS_DNR;
+       cq->ctrl = ctrl;
+
+       nvmet_cq_init(cq);
        nvmet_cq_setup(ctrl, cq, qid, size);
 
        return NVME_SC_SUCCESS;
@@ -928,7 +933,7 @@ u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid,
 }
 
 u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
-                   u16 sqid, u16 size)
+                   struct nvmet_cq *cq, u16 sqid, u16 size)
 {
        u16 status;
        int ret;
@@ -940,7 +945,7 @@ u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
        if (status != NVME_SC_SUCCESS)
                return status;
 
-       ret = nvmet_sq_init(sq);
+       ret = nvmet_sq_init(sq, cq);
        if (ret) {
                status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
                goto ctrl_put;
@@ -972,6 +977,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
        wait_for_completion(&sq->free_done);
        percpu_ref_exit(&sq->ref);
        nvmet_auth_sq_free(sq);
+       nvmet_cq_put(sq->cq);
 
        /*
         * we must reference the ctrl again after waiting for inflight IO
@@ -1004,18 +1010,23 @@ static void nvmet_sq_free(struct percpu_ref *ref)
        complete(&sq->free_done);
 }
 
-int nvmet_sq_init(struct nvmet_sq *sq)
+int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq)
 {
        int ret;
 
+       if (!nvmet_cq_get(cq))
+               return -EINVAL;
+
        ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
        if (ret) {
                pr_err("percpu_ref init failed!\n");
+               nvmet_cq_put(cq);
                return ret;
        }
        init_completion(&sq->free_done);
        init_completion(&sq->confirm_done);
        nvmet_auth_sq_init(sq);
+       sq->cq = cq;
 
        return 0;
 }
index 7c2a4e2..2e813e5 100644 (file)
@@ -817,7 +817,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
        nvmet_fc_prep_fcp_iodlist(assoc->tgtport, queue);
 
        nvmet_cq_init(&queue->nvme_cq);
-       ret = nvmet_sq_init(&queue->nvme_sq);
+       ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq);
        if (ret)
                goto out_fail_iodlist;
 
index bbb3699..c9ec13e 100644 (file)
@@ -332,7 +332,8 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
        for (i = 1; i <= nr_io_queues; i++) {
                ctrl->queues[i].ctrl = ctrl;
                nvmet_cq_init(&ctrl->queues[i].nvme_cq);
-               ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
+               ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq,
+                               &ctrl->queues[i].nvme_cq);
                if (ret) {
                        nvmet_cq_put(&ctrl->queues[i].nvme_cq);
                        goto out_destroy_queues;
@@ -368,7 +369,8 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 
        ctrl->queues[0].ctrl = ctrl;
        nvmet_cq_init(&ctrl->queues[0].nvme_cq);
-       error = nvmet_sq_init(&ctrl->queues[0].nvme_sq);
+       error = nvmet_sq_init(&ctrl->queues[0].nvme_sq,
+                       &ctrl->queues[0].nvme_cq);
        if (error) {
                nvmet_cq_put(&ctrl->queues[0].nvme_cq);
                return error;
index c87f6fb..d3795b0 100644 (file)
@@ -150,6 +150,7 @@ struct nvmet_cq {
 struct nvmet_sq {
        struct nvmet_ctrl       *ctrl;
        struct percpu_ref       ref;
+       struct nvmet_cq         *cq;
        u16                     qid;
        u16                     size;
        u32                     sqhd;
@@ -427,7 +428,7 @@ struct nvmet_fabrics_ops {
        u16 (*get_max_queue_size)(const struct nvmet_ctrl *ctrl);
 
        /* Operations mandatory for PCI target controllers */
-       u16 (*create_sq)(struct nvmet_ctrl *ctrl, u16 sqid, u16 flags,
+       u16 (*create_sq)(struct nvmet_ctrl *ctrl, u16 sqid, u16 cqid, u16 flags,
                         u16 qsize, u64 prp1);
        u16 (*delete_sq)(struct nvmet_ctrl *ctrl, u16 sqid);
        u16 (*create_cq)(struct nvmet_ctrl *ctrl, u16 cqid, u16 flags,
@@ -588,10 +589,10 @@ bool nvmet_cq_in_use(struct nvmet_cq *cq);
 u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid, bool create);
 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
                u16 size);
-u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
-               u16 size);
+u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
+       struct nvmet_cq *cq, u16 qid, u16 size);
 void nvmet_sq_destroy(struct nvmet_sq *sq);
-int nvmet_sq_init(struct nvmet_sq *sq);
+int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq);
 
 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl);
 
index 7dda415..ebe2fc3 100644 (file)
@@ -1346,16 +1346,17 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid)
        nvmet_pci_epf_drain_queue(cq);
        nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
        nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
-       tctrl->cqs[cqid] = NULL;
+       nvmet_cq_put(&cq->nvme_cq);
 
        return NVME_SC_SUCCESS;
 }
 
 static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
-               u16 sqid, u16 flags, u16 qsize, u64 pci_addr)
+               u16 sqid, u16 cqid, u16 flags, u16 qsize, u64 pci_addr)
 {
        struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata;
        struct nvmet_pci_epf_queue *sq = &ctrl->sq[sqid];
+       struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid];
        u16 status;
 
        if (test_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags))
@@ -1378,7 +1379,8 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
                sq->qes = ctrl->io_sqes;
        sq->pci_size = sq->qes * sq->depth;
 
-       status = nvmet_sq_create(tctrl, &sq->nvme_sq, sqid, sq->depth);
+       status = nvmet_sq_create(tctrl, &sq->nvme_sq, &cq->nvme_cq, sqid,
+                       sq->depth);
        if (status != NVME_SC_SUCCESS)
                return status;
 
@@ -1873,8 +1875,8 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
 
        qsize = aqa & 0x00000fff;
        pci_addr = asq & GENMASK_ULL(63, 12);
-       status = nvmet_pci_epf_create_sq(ctrl->tctrl, 0, NVME_QUEUE_PHYS_CONTIG,
-                                        qsize, pci_addr);
+       status = nvmet_pci_epf_create_sq(ctrl->tctrl, 0, 0,
+                       NVME_QUEUE_PHYS_CONTIG, qsize, pci_addr);
        if (status != NVME_SC_SUCCESS) {
                dev_err(ctrl->dev, "Failed to create admin submission queue\n");
                nvmet_pci_epf_delete_cq(ctrl->tctrl, 0);
index 3ad9b4d..2e5c322 100644 (file)
@@ -1438,7 +1438,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
        }
 
        nvmet_cq_init(&queue->nvme_cq);
-       ret = nvmet_sq_init(&queue->nvme_sq);
+       ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq);
        if (ret) {
                ret = NVME_RDMA_CM_NO_RSC;
                goto out_free_queue;
index 4dacb6b..5811246 100644 (file)
@@ -1912,7 +1912,7 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
                goto out_ida_remove;
 
        nvmet_cq_init(&queue->nvme_cq);
-       ret = nvmet_sq_init(&queue->nvme_sq);
+       ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq);
        if (ret)
                goto out_free_connect;