nvme/pci: move cqe check after device shutdown
authorKeith Busch <kbusch@kernel.org>
Wed, 12 Feb 2020 16:41:05 +0000 (01:41 +0900)
committerJens Axboe <axboe@kernel.dk>
Fri, 14 Feb 2020 17:12:04 +0000 (10:12 -0700)
Many users have reported nvme triggered irq_startup() warnings during
shutdown. The driver uses the nvme queue's irq to synchronize scanning
for completions, and enabling an interrupt affined to only offline CPUs
triggers the alarming warning.

Move the final CQE check to after disabling the device and all
registered interrupts have been torn down so that we do not have any
IRQ to synchronize.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=206509
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/nvme/host/pci.c

index da392b5..9c80f9f 100644 (file)
@@ -1401,6 +1401,23 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
        nvme_poll_irqdisable(nvmeq, -1);
 }
 
+/*
+ * Called only on a device that has been disabled and after all other threads
+ * that can check this device's completion queues have synced. This is the
+ * last chance for the driver to see a natural completion before
+ * nvme_cancel_request() terminates all incomplete requests.
+ */
+static void nvme_reap_pending_cqes(struct nvme_dev *dev)
+{
+       u16 start, end;
+       int i;
+
+       for (i = dev->ctrl.queue_count - 1; i > 0; i--) {
+               nvme_process_cq(&dev->queues[i], &start, &end, -1);
+               nvme_complete_cqes(&dev->queues[i], start, end);
+       }
+}
+
 static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
                                int entry_size)
 {
@@ -2235,11 +2252,6 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
                if (timeout == 0)
                        return false;
 
-               /* handle any remaining CQEs */
-               if (opcode == nvme_admin_delete_cq &&
-                   !test_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags))
-                       nvme_poll_irqdisable(nvmeq, -1);
-
                sent--;
                if (nr_queues)
                        goto retry;
@@ -2428,6 +2440,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
        nvme_suspend_io_queues(dev);
        nvme_suspend_queue(&dev->queues[0]);
        nvme_pci_disable(dev);
+       nvme_reap_pending_cqes(dev);
 
        blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
        blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);