Merge tag 'vfio-v4.8-rc1' of git://github.com/awilliam/linux-vfio
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jul 2016 01:13:35 +0000 (18:13 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jul 2016 01:13:35 +0000 (18:13 -0700)
Pull VFIO updates from Alex Williamson:
 - Enable no-iommu mode for platform devices (Peng Fan)
 - Sub-page mmap for exclusive pages (Yongji Xie)
 - Use-after-free fix (Ilya Lesokhin)
 - Support for ACPI-based platform devices (Sinan Kaya)

* tag 'vfio-v4.8-rc1' of git://github.com/awilliam/linux-vfio:
  vfio: platform: check reset call return code during release
  vfio: platform: check reset call return code during open
  vfio, platform: make reset driver a requirement by default
  vfio: platform: call _RST method when using ACPI
  vfio: platform: add extra debug info argument to call reset
  vfio: platform: add support for ACPI probe
  vfio: platform: determine reset capability
  vfio: platform: move reset call to a common function
  vfio: platform: rename reset function
  vfio: fix possible use after free of vfio group
  vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive
  vfio: platform: support No-IOMMU mode

drivers/vfio/pci/vfio_pci.c
drivers/vfio/pci/vfio_pci_private.h
drivers/vfio/platform/vfio_amba.c
drivers/vfio/platform/vfio_platform.c
drivers/vfio/platform/vfio_platform_common.c
drivers/vfio/platform/vfio_platform_private.h
drivers/vfio/vfio.c

index 188b1ff..d624a52 100644 (file)
@@ -110,6 +110,74 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
        return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
 }
 
+static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
+{
+       struct resource *res;
+       int bar;
+       struct vfio_pci_dummy_resource *dummy_res;
+
+       INIT_LIST_HEAD(&vdev->dummy_resources_list);
+
+       for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
+               res = vdev->pdev->resource + bar;
+
+               if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))
+                       goto no_mmap;
+
+               if (!(res->flags & IORESOURCE_MEM))
+                       goto no_mmap;
+
+               /*
+                * The PCI core shouldn't set up a resource with a
+                * type but zero size. But there may be bugs that
+                * cause us to do that.
+                */
+               if (!resource_size(res))
+                       goto no_mmap;
+
+               if (resource_size(res) >= PAGE_SIZE) {
+                       vdev->bar_mmap_supported[bar] = true;
+                       continue;
+               }
+
+               if (!(res->start & ~PAGE_MASK)) {
+                       /*
+                        * Add a dummy resource to reserve the remainder
+                        * of the exclusive page in case that hot-add
+                        * device's bar is assigned into it.
+                        */
+                       dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
+                       if (dummy_res == NULL)
+                               goto no_mmap;
+
+                       dummy_res->resource.name = "vfio sub-page reserved";
+                       dummy_res->resource.start = res->end + 1;
+                       dummy_res->resource.end = res->start + PAGE_SIZE - 1;
+                       dummy_res->resource.flags = res->flags;
+                       if (request_resource(res->parent,
+                                               &dummy_res->resource)) {
+                               kfree(dummy_res);
+                               goto no_mmap;
+                       }
+                       dummy_res->index = bar;
+                       list_add(&dummy_res->res_next,
+                                       &vdev->dummy_resources_list);
+                       vdev->bar_mmap_supported[bar] = true;
+                       continue;
+               }
+               /*
+                * Here we don't handle the case when the BAR is not page
+                * aligned because we can't expect the BAR will be
+                * assigned into the same location in a page in guest
+                * when we passthrough the BAR. And it's hard to access
+                * this BAR in userspace because we have no way to get
+                * the BAR's location in a page.
+                */
+no_mmap:
+               vdev->bar_mmap_supported[bar] = false;
+       }
+}
+
 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
 static void vfio_pci_disable(struct vfio_pci_device *vdev);
 
@@ -218,12 +286,15 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
                }
        }
 
+       vfio_pci_probe_mmaps(vdev);
+
        return 0;
 }
 
 static void vfio_pci_disable(struct vfio_pci_device *vdev)
 {
        struct pci_dev *pdev = vdev->pdev;
+       struct vfio_pci_dummy_resource *dummy_res, *tmp;
        int i, bar;
 
        /* Stop the device from further DMA */
@@ -252,6 +323,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
                vdev->barmap[bar] = NULL;
        }
 
+       list_for_each_entry_safe(dummy_res, tmp,
+                                &vdev->dummy_resources_list, res_next) {
+               list_del(&dummy_res->res_next);
+               release_resource(&dummy_res->resource);
+               kfree(dummy_res);
+       }
+
        vdev->needs_reset = true;
 
        /*
@@ -623,9 +701,7 @@ static long vfio_pci_ioctl(void *device_data,
 
                        info.flags = VFIO_REGION_INFO_FLAG_READ |
                                     VFIO_REGION_INFO_FLAG_WRITE;
-                       if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
-                           pci_resource_flags(pdev, info.index) &
-                           IORESOURCE_MEM && info.size >= PAGE_SIZE) {
+                       if (vdev->bar_mmap_supported[info.index]) {
                                info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
                                if (info.index == vdev->msix_bar) {
                                        ret = msix_sparse_mmap_cap(vdev, &caps);
@@ -1049,16 +1125,16 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
                return -EINVAL;
        if (index >= VFIO_PCI_ROM_REGION_INDEX)
                return -EINVAL;
-       if (!(pci_resource_flags(pdev, index) & IORESOURCE_MEM))
+       if (!vdev->bar_mmap_supported[index])
                return -EINVAL;
 
-       phys_len = pci_resource_len(pdev, index);
+       phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
        req_len = vma->vm_end - vma->vm_start;
        pgoff = vma->vm_pgoff &
                ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
        req_start = pgoff << PAGE_SHIFT;
 
-       if (phys_len < PAGE_SIZE || req_start + req_len > phys_len)
+       if (req_start + req_len > phys_len)
                return -EINVAL;
 
        if (index == vdev->msix_bar) {
index 016c14a..2128de8 100644 (file)
@@ -57,9 +57,16 @@ struct vfio_pci_region {
        u32                             flags;
 };
 
+struct vfio_pci_dummy_resource {
+       struct resource         resource;
+       int                     index;
+       struct list_head        res_next;
+};
+
 struct vfio_pci_device {
        struct pci_dev          *pdev;
        void __iomem            *barmap[PCI_STD_RESOURCE_END + 1];
+       bool                    bar_mmap_supported[PCI_STD_RESOURCE_END + 1];
        u8                      *pci_config_map;
        u8                      *vconfig;
        struct perm_bits        *msi_perm;
@@ -88,6 +95,7 @@ struct vfio_pci_device {
        int                     refcnt;
        struct eventfd_ctx      *err_trigger;
        struct eventfd_ctx      *req_trigger;
+       struct list_head        dummy_resources_list;
 };
 
 #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
index a66479b..31372fb 100644 (file)
@@ -68,6 +68,7 @@ static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id)
        vdev->get_resource = get_amba_resource;
        vdev->get_irq = get_amba_irq;
        vdev->parent_module = THIS_MODULE;
+       vdev->reset_required = false;
 
        ret = vfio_platform_probe_common(vdev, &adev->dev);
        if (ret) {
index b1cc3a7..6561751 100644 (file)
 #define DRIVER_AUTHOR   "Antonios Motakis <a.motakis@virtualopensystems.com>"
 #define DRIVER_DESC     "VFIO for platform devices - User Level meta-driver"
 
+static bool reset_required = true;
+module_param(reset_required, bool, 0444);
+MODULE_PARM_DESC(reset_required, "override reset requirement (default: 1)");
+
 /* probing devices from the linux platform bus */
 
 static struct resource *get_platform_resource(struct vfio_platform_device *vdev,
@@ -66,6 +70,7 @@ static int vfio_platform_probe(struct platform_device *pdev)
        vdev->get_resource = get_platform_resource;
        vdev->get_irq = get_platform_irq;
        vdev->parent_module = THIS_MODULE;
+       vdev->reset_required = reset_required;
 
        ret = vfio_platform_probe_common(vdev, &pdev->dev);
        if (ret)
index e65b142..1cf2d46 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include <linux/device.h>
+#include <linux/acpi.h>
 #include <linux/iommu.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -27,6 +28,8 @@
 #define DRIVER_AUTHOR   "Antonios Motakis <a.motakis@virtualopensystems.com>"
 #define DRIVER_DESC     "VFIO platform base module"
 
+#define VFIO_PLATFORM_IS_ACPI(vdev) ((vdev)->acpihid != NULL)
+
 static LIST_HEAD(reset_list);
 static DEFINE_MUTEX(driver_lock);
 
@@ -41,7 +44,7 @@ static vfio_platform_reset_fn_t vfio_platform_lookup_reset(const char *compat,
                if (!strcmp(iter->compat, compat) &&
                        try_module_get(iter->owner)) {
                        *module = iter->owner;
-                       reset_fn = iter->reset;
+                       reset_fn = iter->of_reset;
                        break;
                }
        }
@@ -49,20 +52,91 @@ static vfio_platform_reset_fn_t vfio_platform_lookup_reset(const char *compat,
        return reset_fn;
 }
 
-static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
+static int vfio_platform_acpi_probe(struct vfio_platform_device *vdev,
+                                   struct device *dev)
 {
-       vdev->reset = vfio_platform_lookup_reset(vdev->compat,
-                                               &vdev->reset_module);
-       if (!vdev->reset) {
+       struct acpi_device *adev;
+
+       if (acpi_disabled)
+               return -ENOENT;
+
+       adev = ACPI_COMPANION(dev);
+       if (!adev) {
+               pr_err("VFIO: ACPI companion device not found for %s\n",
+                       vdev->name);
+               return -ENODEV;
+       }
+
+#ifdef CONFIG_ACPI
+       vdev->acpihid = acpi_device_hid(adev);
+#endif
+       return WARN_ON(!vdev->acpihid) ? -EINVAL : 0;
+}
+
+int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev,
+                                 const char **extra_dbg)
+{
+#ifdef CONFIG_ACPI
+       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+       struct device *dev = vdev->device;
+       acpi_handle handle = ACPI_HANDLE(dev);
+       acpi_status acpi_ret;
+
+       acpi_ret = acpi_evaluate_object(handle, "_RST", NULL, &buffer);
+       if (ACPI_FAILURE(acpi_ret)) {
+               if (extra_dbg)
+                       *extra_dbg = acpi_format_exception(acpi_ret);
+               return -EINVAL;
+       }
+
+       return 0;
+#else
+       return -ENOENT;
+#endif
+}
+
+bool vfio_platform_acpi_has_reset(struct vfio_platform_device *vdev)
+{
+#ifdef CONFIG_ACPI
+       struct device *dev = vdev->device;
+       acpi_handle handle = ACPI_HANDLE(dev);
+
+       return acpi_has_method(handle, "_RST");
+#else
+       return false;
+#endif
+}
+
+static bool vfio_platform_has_reset(struct vfio_platform_device *vdev)
+{
+       if (VFIO_PLATFORM_IS_ACPI(vdev))
+               return vfio_platform_acpi_has_reset(vdev);
+
+       return vdev->of_reset ? true : false;
+}
+
+static int vfio_platform_get_reset(struct vfio_platform_device *vdev)
+{
+       if (VFIO_PLATFORM_IS_ACPI(vdev))
+               return vfio_platform_acpi_has_reset(vdev) ? 0 : -ENOENT;
+
+       vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
+                                                   &vdev->reset_module);
+       if (!vdev->of_reset) {
                request_module("vfio-reset:%s", vdev->compat);
-               vdev->reset = vfio_platform_lookup_reset(vdev->compat,
-                                                        &vdev->reset_module);
+               vdev->of_reset = vfio_platform_lookup_reset(vdev->compat,
+                                                       &vdev->reset_module);
        }
+
+       return vdev->of_reset ? 0 : -ENOENT;
 }
 
 static void vfio_platform_put_reset(struct vfio_platform_device *vdev)
 {
-       if (vdev->reset)
+       if (VFIO_PLATFORM_IS_ACPI(vdev))
+               return;
+
+       if (vdev->of_reset)
                module_put(vdev->reset_module);
 }
 
@@ -134,6 +208,21 @@ static void vfio_platform_regions_cleanup(struct vfio_platform_device *vdev)
        kfree(vdev->regions);
 }
 
+static int vfio_platform_call_reset(struct vfio_platform_device *vdev,
+                                   const char **extra_dbg)
+{
+       if (VFIO_PLATFORM_IS_ACPI(vdev)) {
+               dev_info(vdev->device, "reset\n");
+               return vfio_platform_acpi_call_reset(vdev, extra_dbg);
+       } else if (vdev->of_reset) {
+               dev_info(vdev->device, "reset\n");
+               return vdev->of_reset(vdev);
+       }
+
+       dev_warn(vdev->device, "no reset function found!\n");
+       return -EINVAL;
+}
+
 static void vfio_platform_release(void *device_data)
 {
        struct vfio_platform_device *vdev = device_data;
@@ -141,11 +230,14 @@ static void vfio_platform_release(void *device_data)
        mutex_lock(&driver_lock);
 
        if (!(--vdev->refcnt)) {
-               if (vdev->reset) {
-                       dev_info(vdev->device, "reset\n");
-                       vdev->reset(vdev);
-               } else {
-                       dev_warn(vdev->device, "no reset function found!\n");
+               const char *extra_dbg = NULL;
+               int ret;
+
+               ret = vfio_platform_call_reset(vdev, &extra_dbg);
+               if (ret && vdev->reset_required) {
+                       dev_warn(vdev->device, "reset driver is required and reset call failed in release (%d) %s\n",
+                                ret, extra_dbg ? extra_dbg : "");
+                       WARN_ON(1);
                }
                vfio_platform_regions_cleanup(vdev);
                vfio_platform_irq_cleanup(vdev);
@@ -167,6 +259,8 @@ static int vfio_platform_open(void *device_data)
        mutex_lock(&driver_lock);
 
        if (!vdev->refcnt) {
+               const char *extra_dbg = NULL;
+
                ret = vfio_platform_regions_init(vdev);
                if (ret)
                        goto err_reg;
@@ -175,11 +269,11 @@ static int vfio_platform_open(void *device_data)
                if (ret)
                        goto err_irq;
 
-               if (vdev->reset) {
-                       dev_info(vdev->device, "reset\n");
-                       vdev->reset(vdev);
-               } else {
-                       dev_warn(vdev->device, "no reset function found!\n");
+               ret = vfio_platform_call_reset(vdev, &extra_dbg);
+               if (ret && vdev->reset_required) {
+                       dev_warn(vdev->device, "reset driver is required and reset call failed in open (%d) %s\n",
+                                ret, extra_dbg ? extra_dbg : "");
+                       goto err_rst;
                }
        }
 
@@ -188,6 +282,8 @@ static int vfio_platform_open(void *device_data)
        mutex_unlock(&driver_lock);
        return 0;
 
+err_rst:
+       vfio_platform_irq_cleanup(vdev);
 err_irq:
        vfio_platform_regions_cleanup(vdev);
 err_reg:
@@ -213,7 +309,7 @@ static long vfio_platform_ioctl(void *device_data,
                if (info.argsz < minsz)
                        return -EINVAL;
 
-               if (vdev->reset)
+               if (vfio_platform_has_reset(vdev))
                        vdev->flags |= VFIO_DEVICE_FLAGS_RESET;
                info.flags = vdev->flags;
                info.num_regions = vdev->num_regions;
@@ -312,10 +408,7 @@ static long vfio_platform_ioctl(void *device_data,
                return ret;
 
        } else if (cmd == VFIO_DEVICE_RESET) {
-               if (vdev->reset)
-                       return vdev->reset(vdev);
-               else
-                       return -EINVAL;
+               return vfio_platform_call_reset(vdev, NULL);
        }
 
        return -ENOTTY;
@@ -544,6 +637,37 @@ static const struct vfio_device_ops vfio_platform_ops = {
        .mmap           = vfio_platform_mmap,
 };
 
+int vfio_platform_of_probe(struct vfio_platform_device *vdev,
+                          struct device *dev)
+{
+       int ret;
+
+       ret = device_property_read_string(dev, "compatible",
+                                         &vdev->compat);
+       if (ret)
+               pr_err("VFIO: cannot retrieve compat for %s\n",
+                       vdev->name);
+
+       return ret;
+}
+
+/*
+ * There can be two kernel build combinations. One build where
+ * ACPI is not selected in Kconfig and another one with the ACPI Kconfig.
+ *
+ * In the first case, vfio_platform_acpi_probe will return since
+ * acpi_disabled is 1. DT user will not see any kind of messages from
+ * ACPI.
+ *
+ * In the second case, both DT and ACPI is compiled in but the system is
+ * booting with any of these combinations.
+ *
+ * If the firmware is DT type, then acpi_disabled is 1. The ACPI probe routine
+ * terminates immediately without any messages.
+ *
+ * If the firmware is ACPI type, then acpi_disabled is 0. All other checks are
+ * valid checks. We cannot claim that this system is DT.
+ */
 int vfio_platform_probe_common(struct vfio_platform_device *vdev,
                               struct device *dev)
 {
@@ -553,15 +677,23 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
        if (!vdev)
                return -EINVAL;
 
-       ret = device_property_read_string(dev, "compatible", &vdev->compat);
-       if (ret) {
-               pr_err("VFIO: cannot retrieve compat for %s\n", vdev->name);
-               return -EINVAL;
-       }
+       ret = vfio_platform_acpi_probe(vdev, dev);
+       if (ret)
+               ret = vfio_platform_of_probe(vdev, dev);
+
+       if (ret)
+               return ret;
 
        vdev->device = dev;
 
-       group = iommu_group_get(dev);
+       ret = vfio_platform_get_reset(vdev);
+       if (ret && vdev->reset_required) {
+               pr_err("vfio: no reset function found for device %s\n",
+                      vdev->name);
+               return ret;
+       }
+
+       group = vfio_iommu_group_get(dev);
        if (!group) {
                pr_err("VFIO: No IOMMU group for device %s\n", vdev->name);
                return -EINVAL;
@@ -569,12 +701,10 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
 
        ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev);
        if (ret) {
-               iommu_group_put(group);
+               vfio_iommu_group_put(group, dev);
                return ret;
        }
 
-       vfio_platform_get_reset(vdev);
-
        mutex_init(&vdev->igate);
 
        return 0;
@@ -589,7 +719,7 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev)
 
        if (vdev) {
                vfio_platform_put_reset(vdev);
-               iommu_group_put(dev->iommu_group);
+               vfio_iommu_group_put(dev->iommu_group, dev);
        }
 
        return vdev;
@@ -611,7 +741,7 @@ void vfio_platform_unregister_reset(const char *compat,
 
        mutex_lock(&driver_lock);
        list_for_each_entry_safe(iter, temp, &reset_list, link) {
-               if (!strcmp(iter->compat, compat) && (iter->reset == fn)) {
+               if (!strcmp(iter->compat, compat) && (iter->of_reset == fn)) {
                        list_del(&iter->link);
                        break;
                }
index 42816dd..85ffe5d 100644 (file)
@@ -58,6 +58,7 @@ struct vfio_platform_device {
        struct mutex                    igate;
        struct module                   *parent_module;
        const char                      *compat;
+       const char                      *acpihid;
        struct module                   *reset_module;
        struct device                   *device;
 
@@ -71,7 +72,9 @@ struct vfio_platform_device {
        struct resource*
                (*get_resource)(struct vfio_platform_device *vdev, int i);
        int     (*get_irq)(struct vfio_platform_device *vdev, int i);
-       int     (*reset)(struct vfio_platform_device *vdev);
+       int     (*of_reset)(struct vfio_platform_device *vdev);
+
+       bool                            reset_required;
 };
 
 typedef int (*vfio_platform_reset_fn_t)(struct vfio_platform_device *vdev);
@@ -80,7 +83,7 @@ struct vfio_platform_reset_node {
        struct list_head link;
        char *compat;
        struct module *owner;
-       vfio_platform_reset_fn_t reset;
+       vfio_platform_reset_fn_t of_reset;
 };
 
 extern int vfio_platform_probe_common(struct vfio_platform_device *vdev,
@@ -103,7 +106,7 @@ extern void vfio_platform_unregister_reset(const char *compat,
 static struct vfio_platform_reset_node __reset ## _node = {    \
        .owner = THIS_MODULE,                                   \
        .compat = __compat,                                     \
-       .reset = __reset,                                       \
+       .of_reset = __reset,                                    \
 };                                                             \
 __vfio_platform_register_reset(&__reset ## _node)
 
index 6fd6fa5..d1d70e0 100644 (file)
@@ -1711,8 +1711,8 @@ EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
 
 void vfio_group_put_external_user(struct vfio_group *group)
 {
-       vfio_group_put(group);
        vfio_group_try_dissolve_container(group);
+       vfio_group_put(group);
 }
 EXPORT_SYMBOL_GPL(vfio_group_put_external_user);