vfio/pci: Cleanup .probe() exit paths
[linux-2.6-microblaze.git] / drivers / vfio / pci / vfio_pci.c
index 379a02c..6c6b37b 100644 (file)
@@ -9,7 +9,6 @@
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#define dev_fmt pr_fmt
 
 #include <linux/device.h>
 #include <linux/eventfd.h>
@@ -54,6 +53,12 @@ module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(disable_idle_d3,
                 "Disable using the PCI D3 low power state for idle, unused devices");
 
+static bool enable_sriov;
+#ifdef CONFIG_PCI_IOV
+module_param(enable_sriov, bool, 0644);
+MODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV configuration.  Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF.");
+#endif
+
 static inline bool vfio_vga_disabled(void)
 {
 #ifdef CONFIG_VFIO_PCI_VGA
@@ -466,6 +471,44 @@ out:
                vfio_pci_set_power_state(vdev, PCI_D3hot);
 }
 
+static struct pci_driver vfio_pci_driver;
+
+static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev,
+                                          struct vfio_device **pf_dev)
+{
+       struct pci_dev *physfn = pci_physfn(vdev->pdev);
+
+       if (!vdev->pdev->is_virtfn)
+               return NULL;
+
+       *pf_dev = vfio_device_get_from_dev(&physfn->dev);
+       if (!*pf_dev)
+               return NULL;
+
+       if (pci_dev_driver(physfn) != &vfio_pci_driver) {
+               vfio_device_put(*pf_dev);
+               return NULL;
+       }
+
+       return vfio_device_data(*pf_dev);
+}
+
+static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val)
+{
+       struct vfio_device *pf_dev;
+       struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
+
+       if (!pf_vdev)
+               return;
+
+       mutex_lock(&pf_vdev->vf_token->lock);
+       pf_vdev->vf_token->users += val;
+       WARN_ON(pf_vdev->vf_token->users < 0);
+       mutex_unlock(&pf_vdev->vf_token->lock);
+
+       vfio_device_put(pf_dev);
+}
+
 static void vfio_pci_release(void *device_data)
 {
        struct vfio_pci_device *vdev = device_data;
@@ -473,6 +516,7 @@ static void vfio_pci_release(void *device_data)
        mutex_lock(&vdev->reflck->lock);
 
        if (!(--vdev->refcnt)) {
+               vfio_pci_vf_token_user_add(vdev, -1);
                vfio_spapr_pci_eeh_release(vdev->pdev);
                vfio_pci_disable(vdev);
        }
@@ -498,6 +542,7 @@ static int vfio_pci_open(void *device_data)
                        goto error;
 
                vfio_spapr_pci_eeh_open(vdev->pdev);
+               vfio_pci_vf_token_user_add(vdev, 1);
        }
        vdev->refcnt++;
 error:
@@ -1140,6 +1185,65 @@ hot_reset_release:
 
                return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
                                          ioeventfd.data, count, ioeventfd.fd);
+       } else if (cmd == VFIO_DEVICE_FEATURE) {
+               struct vfio_device_feature feature;
+               uuid_t uuid;
+
+               minsz = offsetofend(struct vfio_device_feature, flags);
+
+               if (copy_from_user(&feature, (void __user *)arg, minsz))
+                       return -EFAULT;
+
+               if (feature.argsz < minsz)
+                       return -EINVAL;
+
+               /* Check unknown flags */
+               if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK |
+                                     VFIO_DEVICE_FEATURE_SET |
+                                     VFIO_DEVICE_FEATURE_GET |
+                                     VFIO_DEVICE_FEATURE_PROBE))
+                       return -EINVAL;
+
+               /* GET & SET are mutually exclusive except with PROBE */
+               if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
+                   (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
+                   (feature.flags & VFIO_DEVICE_FEATURE_GET))
+                       return -EINVAL;
+
+               switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
+               case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
+                       if (!vdev->vf_token)
+                               return -ENOTTY;
+
+                       /*
+                        * We do not support GET of the VF Token UUID as this
+                        * could expose the token of the previous device user.
+                        */
+                       if (feature.flags & VFIO_DEVICE_FEATURE_GET)
+                               return -EINVAL;
+
+                       if (feature.flags & VFIO_DEVICE_FEATURE_PROBE)
+                               return 0;
+
+                       /* Don't SET unless told to do so */
+                       if (!(feature.flags & VFIO_DEVICE_FEATURE_SET))
+                               return -EINVAL;
+
+                       if (feature.argsz < minsz + sizeof(uuid))
+                               return -EINVAL;
+
+                       if (copy_from_user(&uuid, (void __user *)(arg + minsz),
+                                          sizeof(uuid)))
+                               return -EFAULT;
+
+                       mutex_lock(&vdev->vf_token->lock);
+                       uuid_copy(&vdev->vf_token->uuid, &uuid);
+                       mutex_unlock(&vdev->vf_token->lock);
+
+                       return 0;
+               default:
+                       return -ENOTTY;
+               }
        }
 
        return -ENOTTY;
@@ -1278,6 +1382,150 @@ static void vfio_pci_request(void *device_data, unsigned int count)
        mutex_unlock(&vdev->igate);
 }
 
+static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
+                                     bool vf_token, uuid_t *uuid)
+{
+       /*
+        * There's always some degree of trust or collaboration between SR-IOV
+        * PF and VFs, even if just that the PF hosts the SR-IOV capability and
+        * can disrupt VFs with a reset, but often the PF has more explicit
+        * access to deny service to the VF or access data passed through the
+        * VF.  We therefore require an opt-in via a shared VF token (UUID) to
+        * represent this trust.  This both prevents that a VF driver might
+        * assume the PF driver is a trusted, in-kernel driver, and also that
+        * a PF driver might be replaced with a rogue driver, unknown to in-use
+        * VF drivers.
+        *
+        * Therefore when presented with a VF, if the PF is a vfio device and
+        * it is bound to the vfio-pci driver, the user needs to provide a VF
+        * token to access the device, in the form of appending a vf_token to
+        * the device name, for example:
+        *
+        * "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"
+        *
+        * When presented with a PF which has VFs in use, the user must also
+        * provide the current VF token to prove collaboration with existing
+        * VF users.  If VFs are not in use, the VF token provided for the PF
+        * device will act to set the VF token.
+        *
+        * If the VF token is provided but unused, an error is generated.
+        */
+       if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token)
+               return 0; /* No VF token provided or required */
+
+       if (vdev->pdev->is_virtfn) {
+               struct vfio_device *pf_dev;
+               struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
+               bool match;
+
+               if (!pf_vdev) {
+                       if (!vf_token)
+                               return 0; /* PF is not vfio-pci, no VF token */
+
+                       pci_info_ratelimited(vdev->pdev,
+                               "VF token incorrectly provided, PF not bound to vfio-pci\n");
+                       return -EINVAL;
+               }
+
+               if (!vf_token) {
+                       vfio_device_put(pf_dev);
+                       pci_info_ratelimited(vdev->pdev,
+                               "VF token required to access device\n");
+                       return -EACCES;
+               }
+
+               mutex_lock(&pf_vdev->vf_token->lock);
+               match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
+               mutex_unlock(&pf_vdev->vf_token->lock);
+
+               vfio_device_put(pf_dev);
+
+               if (!match) {
+                       pci_info_ratelimited(vdev->pdev,
+                               "Incorrect VF token provided for device\n");
+                       return -EACCES;
+               }
+       } else if (vdev->vf_token) {
+               mutex_lock(&vdev->vf_token->lock);
+               if (vdev->vf_token->users) {
+                       if (!vf_token) {
+                               mutex_unlock(&vdev->vf_token->lock);
+                               pci_info_ratelimited(vdev->pdev,
+                                       "VF token required to access device\n");
+                               return -EACCES;
+                       }
+
+                       if (!uuid_equal(uuid, &vdev->vf_token->uuid)) {
+                               mutex_unlock(&vdev->vf_token->lock);
+                               pci_info_ratelimited(vdev->pdev,
+                                       "Incorrect VF token provided for device\n");
+                               return -EACCES;
+                       }
+               } else if (vf_token) {
+                       uuid_copy(&vdev->vf_token->uuid, uuid);
+               }
+
+               mutex_unlock(&vdev->vf_token->lock);
+       } else if (vf_token) {
+               pci_info_ratelimited(vdev->pdev,
+                       "VF token incorrectly provided, not a PF or VF\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+#define VF_TOKEN_ARG "vf_token="
+
+static int vfio_pci_match(void *device_data, char *buf)
+{
+       struct vfio_pci_device *vdev = device_data;
+       bool vf_token = false;
+       uuid_t uuid;
+       int ret;
+
+       if (strncmp(pci_name(vdev->pdev), buf, strlen(pci_name(vdev->pdev))))
+               return 0; /* No match */
+
+       if (strlen(buf) > strlen(pci_name(vdev->pdev))) {
+               buf += strlen(pci_name(vdev->pdev));
+
+               if (*buf != ' ')
+                       return 0; /* No match: non-whitespace after name */
+
+               while (*buf) {
+                       if (*buf == ' ') {
+                               buf++;
+                               continue;
+                       }
+
+                       if (!vf_token && !strncmp(buf, VF_TOKEN_ARG,
+                                                 strlen(VF_TOKEN_ARG))) {
+                               buf += strlen(VF_TOKEN_ARG);
+
+                               if (strlen(buf) < UUID_STRING_LEN)
+                                       return -EINVAL;
+
+                               ret = uuid_parse(buf, &uuid);
+                               if (ret)
+                                       return ret;
+
+                               vf_token = true;
+                               buf += UUID_STRING_LEN;
+                       } else {
+                               /* Unknown/duplicate option */
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       ret = vfio_pci_validate_vf_token(vdev, vf_token, &uuid);
+       if (ret)
+               return ret;
+
+       return 1; /* Match */
+}
+
 static const struct vfio_device_ops vfio_pci_ops = {
        .name           = "vfio-pci",
        .open           = vfio_pci_open,
@@ -1287,10 +1535,40 @@ static const struct vfio_device_ops vfio_pci_ops = {
        .write          = vfio_pci_write,
        .mmap           = vfio_pci_mmap,
        .request        = vfio_pci_request,
+       .match          = vfio_pci_match,
 };
 
 static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev);
 static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck);
+static struct pci_driver vfio_pci_driver;
+
+static int vfio_pci_bus_notifier(struct notifier_block *nb,
+                                unsigned long action, void *data)
+{
+       struct vfio_pci_device *vdev = container_of(nb,
+                                                   struct vfio_pci_device, nb);
+       struct device *dev = data;
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct pci_dev *physfn = pci_physfn(pdev);
+
+       if (action == BUS_NOTIFY_ADD_DEVICE &&
+           pdev->is_virtfn && physfn == vdev->pdev) {
+               pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n",
+                        pci_name(pdev));
+               pdev->driver_override = kasprintf(GFP_KERNEL, "%s",
+                                                 vfio_pci_ops.name);
+       } else if (action == BUS_NOTIFY_BOUND_DRIVER &&
+                  pdev->is_virtfn && physfn == vdev->pdev) {
+               struct pci_driver *drv = pci_dev_driver(pdev);
+
+               if (drv && drv != &vfio_pci_driver)
+                       pci_warn(vdev->pdev,
+                                "VF %s bound to driver %s while PF bound to vfio-pci\n",
+                                pci_name(pdev), drv->name);
+       }
+
+       return 0;
+}
 
 static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
@@ -1302,12 +1580,12 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                return -EINVAL;
 
        /*
-        * Prevent binding to PFs with VFs enabled, this too easily allows
-        * userspace instance with VFs and PFs from the same device, which
-        * cannot work.  Disabling SR-IOV here would initiate removing the
-        * VFs, which would unbind the driver, which is prone to blocking
-        * if that VF is also in use by vfio-pci.  Just reject these PFs
-        * and let the user sort it out.
+        * Prevent binding to PFs with VFs enabled, the VFs might be in use
+        * by the host or other users.  We cannot capture the VFs if they
+        * already exist, nor can we track VF users.  Disabling SR-IOV here
+        * would initiate removing the VFs, which would unbind the driver,
+        * which is prone to blocking if that VF is also in use by vfio-pci.
+        * Just reject these PFs and let the user sort it out.
         */
        if (pci_num_vf(pdev)) {
                pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n");
@@ -1320,8 +1598,8 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
        if (!vdev) {
-               vfio_iommu_group_put(group, &pdev->dev);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto out_group_put;
        }
 
        vdev->pdev = pdev;
@@ -1332,18 +1610,27 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        INIT_LIST_HEAD(&vdev->ioeventfds_list);
 
        ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
-       if (ret) {
-               vfio_iommu_group_put(group, &pdev->dev);
-               kfree(vdev);
-               return ret;
-       }
+       if (ret)
+               goto out_free;
 
        ret = vfio_pci_reflck_attach(vdev);
-       if (ret) {
-               vfio_del_group_dev(&pdev->dev);
-               vfio_iommu_group_put(group, &pdev->dev);
-               kfree(vdev);
-               return ret;
+       if (ret)
+               goto out_del_group_dev;
+
+       if (pdev->is_physfn) {
+               vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
+               if (!vdev->vf_token) {
+                       ret = -ENOMEM;
+                       goto out_reflck;
+               }
+
+               mutex_init(&vdev->vf_token->lock);
+               uuid_gen(&vdev->vf_token->uuid);
+
+               vdev->nb.notifier_call = vfio_pci_bus_notifier;
+               ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
+               if (ret)
+                       goto out_vf_token;
        }
 
        if (vfio_pci_is_vga(pdev)) {
@@ -1369,16 +1656,39 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        }
 
        return ret;
+
+out_vf_token:
+       kfree(vdev->vf_token);
+out_reflck:
+       vfio_pci_reflck_put(vdev->reflck);
+out_del_group_dev:
+       vfio_del_group_dev(&pdev->dev);
+out_free:
+       kfree(vdev);
+out_group_put:
+       vfio_iommu_group_put(group, &pdev->dev);
+       return ret;
 }
 
 static void vfio_pci_remove(struct pci_dev *pdev)
 {
        struct vfio_pci_device *vdev;
 
+       pci_disable_sriov(pdev);
+
        vdev = vfio_del_group_dev(&pdev->dev);
        if (!vdev)
                return;
 
+       if (vdev->vf_token) {
+               WARN_ON(vdev->vf_token->users);
+               mutex_destroy(&vdev->vf_token->lock);
+               kfree(vdev->vf_token);
+       }
+
+       if (vdev->nb.notifier_call)
+               bus_unregister_notifier(&pci_bus_type, &vdev->nb);
+
        vfio_pci_reflck_put(vdev->reflck);
 
        vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
@@ -1427,16 +1737,48 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
        return PCI_ERS_RESULT_CAN_RECOVER;
 }
 
+static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
+{
+       struct vfio_pci_device *vdev;
+       struct vfio_device *device;
+       int ret = 0;
+
+       might_sleep();
+
+       if (!enable_sriov)
+               return -ENOENT;
+
+       device = vfio_device_get_from_dev(&pdev->dev);
+       if (!device)
+               return -ENODEV;
+
+       vdev = vfio_device_data(device);
+       if (!vdev) {
+               vfio_device_put(device);
+               return -ENODEV;
+       }
+
+       if (nr_virtfn == 0)
+               pci_disable_sriov(pdev);
+       else
+               ret = pci_enable_sriov(pdev, nr_virtfn);
+
+       vfio_device_put(device);
+
+       return ret < 0 ? ret : nr_virtfn;
+}
+
 static const struct pci_error_handlers vfio_err_handlers = {
        .error_detected = vfio_pci_aer_err_detected,
 };
 
 static struct pci_driver vfio_pci_driver = {
-       .name           = "vfio-pci",
-       .id_table       = NULL, /* only dynamic ids */
-       .probe          = vfio_pci_probe,
-       .remove         = vfio_pci_remove,
-       .err_handler    = &vfio_err_handlers,
+       .name                   = "vfio-pci",
+       .id_table               = NULL, /* only dynamic ids */
+       .probe                  = vfio_pci_probe,
+       .remove                 = vfio_pci_remove,
+       .sriov_configure        = vfio_pci_sriov_configure,
+       .err_handler            = &vfio_err_handlers,
 };
 
 static DEFINE_MUTEX(reflck_lock);