vfio: Replace the iommu notifier with a device list
authorJason Gunthorpe <jgg@nvidia.com>
Wed, 20 Jul 2022 00:02:49 +0000 (21:02 -0300)
committerAlex Williamson <alex.williamson@redhat.com>
Wed, 20 Jul 2022 17:57:59 +0000 (11:57 -0600)
Instead of bouncing the function call to the driver op through a blocking
notifier just have the iommu layer call it directly.

Register each device that is being attached to the iommu with the lower
driver which then threads them on a linked list and calls the appropriate
driver op at the right time.

Currently the only use is if dma_unmap() is defined.

Also, fully lock all the debugging tests on the pinning path that a
dma_unmap is registered.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/2-v4-681e038e30fd+78-vfio_unmap_notif_jgg@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
drivers/vfio/vfio.c
drivers/vfio/vfio.h
drivers/vfio/vfio_iommu_type1.c
include/linux/vfio.h

index 83c375f..b3ce807 100644 (file)
@@ -231,7 +231,7 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
 {
        struct vfio_iommu_driver *driver, *tmp;
 
-       if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier))
+       if (WARN_ON(!ops->register_device != !ops->unregister_device))
                return -EINVAL;
 
        driver = kzalloc(sizeof(*driver), GFP_KERNEL);
@@ -1082,17 +1082,6 @@ static void vfio_device_unassign_container(struct vfio_device *device)
        up_write(&device->group->group_rwsem);
 }
 
-static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action,
-                              void *data)
-{
-       struct vfio_device *vfio_device =
-               container_of(nb, struct vfio_device, iommu_nb);
-       struct vfio_iommu_type1_dma_unmap *unmap = data;
-
-       vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size);
-       return NOTIFY_OK;
-}
-
 static struct file *vfio_device_open(struct vfio_device *device)
 {
        struct vfio_iommu_driver *iommu_driver;
@@ -1128,15 +1117,9 @@ static struct file *vfio_device_open(struct vfio_device *device)
                }
 
                iommu_driver = device->group->container->iommu_driver;
-               if (device->ops->dma_unmap && iommu_driver &&
-                   iommu_driver->ops->register_notifier) {
-                       unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
-
-                       device->iommu_nb.notifier_call = vfio_iommu_notifier;
-                       iommu_driver->ops->register_notifier(
-                               device->group->container->iommu_data, &events,
-                               &device->iommu_nb);
-               }
+               if (iommu_driver && iommu_driver->ops->register_device)
+                       iommu_driver->ops->register_device(
+                               device->group->container->iommu_data, device);
 
                up_read(&device->group->group_rwsem);
        }
@@ -1176,11 +1159,9 @@ err_close_device:
                device->ops->close_device(device);
 
                iommu_driver = device->group->container->iommu_driver;
-               if (device->ops->dma_unmap && iommu_driver &&
-                   iommu_driver->ops->unregister_notifier)
-                       iommu_driver->ops->unregister_notifier(
-                               device->group->container->iommu_data,
-                               &device->iommu_nb);
+               if (iommu_driver && iommu_driver->ops->unregister_device)
+                       iommu_driver->ops->unregister_device(
+                               device->group->container->iommu_data, device);
        }
 err_undo_count:
        up_read(&device->group->group_rwsem);
@@ -1385,11 +1366,9 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
                device->ops->close_device(device);
 
        iommu_driver = device->group->container->iommu_driver;
-       if (device->ops->dma_unmap && iommu_driver &&
-           iommu_driver->ops->unregister_notifier)
-               iommu_driver->ops->unregister_notifier(
-                       device->group->container->iommu_data,
-                       &device->iommu_nb);
+       if (iommu_driver && iommu_driver->ops->unregister_device)
+               iommu_driver->ops->unregister_device(
+                       device->group->container->iommu_data, device);
        up_read(&device->group->group_rwsem);
        device->open_count--;
        if (device->open_count == 0)
index 25da02c..4a7db1f 100644 (file)
@@ -33,9 +33,6 @@ enum vfio_iommu_notify_type {
        VFIO_IOMMU_CONTAINER_CLOSE = 0,
 };
 
-/* events for register_notifier() */
-#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
-
 /**
  * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
  */
@@ -58,11 +55,10 @@ struct vfio_iommu_driver_ops {
                                     unsigned long *phys_pfn);
        int             (*unpin_pages)(void *iommu_data,
                                       unsigned long *user_pfn, int npage);
-       int             (*register_notifier)(void *iommu_data,
-                                            unsigned long *events,
-                                            struct notifier_block *nb);
-       int             (*unregister_notifier)(void *iommu_data,
-                                              struct notifier_block *nb);
+       void            (*register_device)(void *iommu_data,
+                                          struct vfio_device *vdev);
+       void            (*unregister_device)(void *iommu_data,
+                                            struct vfio_device *vdev);
        int             (*dma_rw)(void *iommu_data, dma_addr_t user_iova,
                                  void *data, size_t count, bool write);
        struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
index db24062..026a1d2 100644 (file)
@@ -67,7 +67,8 @@ struct vfio_iommu {
        struct list_head        iova_list;
        struct mutex            lock;
        struct rb_root          dma_list;
-       struct blocking_notifier_head notifier;
+       struct list_head        device_list;
+       struct mutex            device_list_lock;
        unsigned int            dma_avail;
        unsigned int            vaddr_invalid_count;
        uint64_t                pgsize_bitmap;
@@ -865,8 +866,8 @@ again:
                }
        }
 
-       /* Fail if notifier list is empty */
-       if (!iommu->notifier.head) {
+       /* Fail if no dma_umap notifier is registered */
+       if (list_empty(&iommu->device_list)) {
                ret = -EINVAL;
                goto pin_done;
        }
@@ -1287,6 +1288,35 @@ static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size)
        return 0;
 }
 
+/*
+ * Notify VFIO drivers using vfio_register_emulated_iommu_dev() to invalidate
+ * and unmap iovas within the range we're about to unmap. Drivers MUST unpin
+ * pages in response to an invalidation.
+ */
+static void vfio_notify_dma_unmap(struct vfio_iommu *iommu,
+                                 struct vfio_dma *dma)
+{
+       struct vfio_device *device;
+
+       if (list_empty(&iommu->device_list))
+               return;
+
+       /*
+        * The device is expected to call vfio_unpin_pages() for any IOVA it has
+        * pinned within the range. Since vfio_unpin_pages() will eventually
+        * call back down to this code and try to obtain the iommu->lock we must
+        * drop it.
+        */
+       mutex_lock(&iommu->device_list_lock);
+       mutex_unlock(&iommu->lock);
+
+       list_for_each_entry(device, &iommu->device_list, iommu_entry)
+               device->ops->dma_unmap(device, dma->iova, dma->size);
+
+       mutex_unlock(&iommu->device_list_lock);
+       mutex_lock(&iommu->lock);
+}
+
 static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
                             struct vfio_iommu_type1_dma_unmap *unmap,
                             struct vfio_bitmap *bitmap)
@@ -1400,8 +1430,6 @@ again:
                }
 
                if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
-                       struct vfio_iommu_type1_dma_unmap nb_unmap;
-
                        if (dma_last == dma) {
                                BUG_ON(++retries > 10);
                        } else {
@@ -1409,20 +1437,7 @@ again:
                                retries = 0;
                        }
 
-                       nb_unmap.iova = dma->iova;
-                       nb_unmap.size = dma->size;
-
-                       /*
-                        * Notify anyone (mdev vendor drivers) to invalidate and
-                        * unmap iovas within the range we're about to unmap.
-                        * Vendor drivers MUST unpin pages in response to an
-                        * invalidation.
-                        */
-                       mutex_unlock(&iommu->lock);
-                       blocking_notifier_call_chain(&iommu->notifier,
-                                                   VFIO_IOMMU_NOTIFY_DMA_UNMAP,
-                                                   &nb_unmap);
-                       mutex_lock(&iommu->lock);
+                       vfio_notify_dma_unmap(iommu, dma);
                        goto again;
                }
 
@@ -2475,7 +2490,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
 
                if (list_empty(&iommu->emulated_iommu_groups) &&
                    list_empty(&iommu->domain_list)) {
-                       WARN_ON(iommu->notifier.head);
+                       WARN_ON(!list_empty(&iommu->device_list));
                        vfio_iommu_unmap_unpin_all(iommu);
                }
                goto detach_group_done;
@@ -2507,7 +2522,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
                if (list_empty(&domain->group_list)) {
                        if (list_is_singular(&iommu->domain_list)) {
                                if (list_empty(&iommu->emulated_iommu_groups)) {
-                                       WARN_ON(iommu->notifier.head);
+                                       WARN_ON(!list_empty(
+                                               &iommu->device_list));
                                        vfio_iommu_unmap_unpin_all(iommu);
                                } else {
                                        vfio_iommu_unmap_unpin_reaccount(iommu);
@@ -2568,7 +2584,8 @@ static void *vfio_iommu_type1_open(unsigned long arg)
        iommu->dma_avail = dma_entry_limit;
        iommu->container_open = true;
        mutex_init(&iommu->lock);
-       BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
+       mutex_init(&iommu->device_list_lock);
+       INIT_LIST_HEAD(&iommu->device_list);
        init_waitqueue_head(&iommu->vaddr_wait);
        iommu->pgsize_bitmap = PAGE_MASK;
        INIT_LIST_HEAD(&iommu->emulated_iommu_groups);
@@ -3005,28 +3022,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
        }
 }
 
-static int vfio_iommu_type1_register_notifier(void *iommu_data,
-                                             unsigned long *events,
-                                             struct notifier_block *nb)
+static void vfio_iommu_type1_register_device(void *iommu_data,
+                                            struct vfio_device *vdev)
 {
        struct vfio_iommu *iommu = iommu_data;
 
-       /* clear known events */
-       *events &= ~VFIO_IOMMU_NOTIFY_DMA_UNMAP;
-
-       /* refuse to register if still events remaining */
-       if (*events)
-               return -EINVAL;
+       if (!vdev->ops->dma_unmap)
+               return;
 
-       return blocking_notifier_chain_register(&iommu->notifier, nb);
+       /*
+        * list_empty(&iommu->device_list) is tested under the iommu->lock while
+        * iteration for dma_unmap must be done under the device_list_lock.
+        * Holding both locks here allows avoiding the device_list_lock in
+        * several fast paths. See vfio_notify_dma_unmap()
+        */
+       mutex_lock(&iommu->lock);
+       mutex_lock(&iommu->device_list_lock);
+       list_add(&vdev->iommu_entry, &iommu->device_list);
+       mutex_unlock(&iommu->device_list_lock);
+       mutex_unlock(&iommu->lock);
 }
 
-static int vfio_iommu_type1_unregister_notifier(void *iommu_data,
-                                               struct notifier_block *nb)
+static void vfio_iommu_type1_unregister_device(void *iommu_data,
+                                              struct vfio_device *vdev)
 {
        struct vfio_iommu *iommu = iommu_data;
 
-       return blocking_notifier_chain_unregister(&iommu->notifier, nb);
+       if (!vdev->ops->dma_unmap)
+               return;
+
+       mutex_lock(&iommu->lock);
+       mutex_lock(&iommu->device_list_lock);
+       list_del(&vdev->iommu_entry);
+       mutex_unlock(&iommu->device_list_lock);
+       mutex_unlock(&iommu->lock);
 }
 
 static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
@@ -3160,8 +3189,8 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
        .detach_group           = vfio_iommu_type1_detach_group,
        .pin_pages              = vfio_iommu_type1_pin_pages,
        .unpin_pages            = vfio_iommu_type1_unpin_pages,
-       .register_notifier      = vfio_iommu_type1_register_notifier,
-       .unregister_notifier    = vfio_iommu_type1_unregister_notifier,
+       .register_device        = vfio_iommu_type1_register_device,
+       .unregister_device      = vfio_iommu_type1_unregister_device,
        .dma_rw                 = vfio_iommu_type1_dma_rw,
        .group_iommu_domain     = vfio_iommu_type1_group_iommu_domain,
        .notify                 = vfio_iommu_type1_notify,
index 1f9fc7a..19cefba 100644 (file)
@@ -49,7 +49,7 @@ struct vfio_device {
        unsigned int open_count;
        struct completion comp;
        struct list_head group_next;
-       struct notifier_block iommu_nb;
+       struct list_head iommu_entry;
 };
 
 /**