1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * VFIO container (/dev/vfio/vfio)
7 #include <linux/file.h>
8 #include <linux/slab.h>
10 #include <linux/capability.h>
11 #include <linux/iommu.h>
12 #include <linux/miscdevice.h>
13 #include <linux/vfio.h>
14 #include <uapi/linux/vfio.h>
18 struct vfio_container {
20 struct list_head group_list;
21 struct rw_semaphore group_lock;
22 struct vfio_iommu_driver *iommu_driver;
28 struct list_head iommu_drivers_list;
29 struct mutex iommu_drivers_lock;
32 #ifdef CONFIG_VFIO_NOIOMMU
33 bool vfio_noiommu __read_mostly;
34 module_param_named(enable_unsafe_noiommu_mode,
35 vfio_noiommu, bool, S_IRUGO | S_IWUSR);
36 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
39 static void *vfio_noiommu_open(unsigned long arg)
41 if (arg != VFIO_NOIOMMU_IOMMU)
42 return ERR_PTR(-EINVAL);
43 if (!capable(CAP_SYS_RAWIO))
44 return ERR_PTR(-EPERM);
49 static void vfio_noiommu_release(void *iommu_data)
53 static long vfio_noiommu_ioctl(void *iommu_data,
54 unsigned int cmd, unsigned long arg)
56 if (cmd == VFIO_CHECK_EXTENSION)
57 return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
62 static int vfio_noiommu_attach_group(void *iommu_data,
63 struct iommu_group *iommu_group, enum vfio_group_type type)
68 static void vfio_noiommu_detach_group(void *iommu_data,
69 struct iommu_group *iommu_group)
73 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
74 .name = "vfio-noiommu",
76 .open = vfio_noiommu_open,
77 .release = vfio_noiommu_release,
78 .ioctl = vfio_noiommu_ioctl,
79 .attach_group = vfio_noiommu_attach_group,
80 .detach_group = vfio_noiommu_detach_group,
84 * Only noiommu containers can use vfio-noiommu and noiommu containers can only
87 static bool vfio_iommu_driver_allowed(struct vfio_container *container,
88 const struct vfio_iommu_driver *driver)
90 if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
92 return container->noiommu == (driver->ops == &vfio_noiommu_ops);
96 * IOMMU driver registration
98 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
100 struct vfio_iommu_driver *driver, *tmp;
102 if (WARN_ON(!ops->register_device != !ops->unregister_device))
105 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
111 mutex_lock(&vfio.iommu_drivers_lock);
113 /* Check for duplicates */
114 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
115 if (tmp->ops == ops) {
116 mutex_unlock(&vfio.iommu_drivers_lock);
122 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
124 mutex_unlock(&vfio.iommu_drivers_lock);
128 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
130 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
132 struct vfio_iommu_driver *driver;
134 mutex_lock(&vfio.iommu_drivers_lock);
135 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
136 if (driver->ops == ops) {
137 list_del(&driver->vfio_next);
138 mutex_unlock(&vfio.iommu_drivers_lock);
143 mutex_unlock(&vfio.iommu_drivers_lock);
145 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
148 * Container objects - containers are created when /dev/vfio/vfio is
149 * opened, but their lifecycle extends until the last user is done, so
150 * it's freed via kref. Must support container/group/device being
151 * closed in any order.
153 static void vfio_container_release(struct kref *kref)
155 struct vfio_container *container;
156 container = container_of(kref, struct vfio_container, kref);
161 static void vfio_container_get(struct vfio_container *container)
163 kref_get(&container->kref);
166 static void vfio_container_put(struct vfio_container *container)
168 kref_put(&container->kref, vfio_container_release);
171 void vfio_device_container_register(struct vfio_device *device)
173 struct vfio_iommu_driver *iommu_driver =
174 device->group->container->iommu_driver;
176 if (iommu_driver && iommu_driver->ops->register_device)
177 iommu_driver->ops->register_device(
178 device->group->container->iommu_data, device);
181 void vfio_device_container_unregister(struct vfio_device *device)
183 struct vfio_iommu_driver *iommu_driver =
184 device->group->container->iommu_driver;
186 if (iommu_driver && iommu_driver->ops->unregister_device)
187 iommu_driver->ops->unregister_device(
188 device->group->container->iommu_data, device);
192 vfio_container_ioctl_check_extension(struct vfio_container *container,
195 struct vfio_iommu_driver *driver;
198 down_read(&container->group_lock);
200 driver = container->iommu_driver;
203 /* No base extensions yet */
206 * If no driver is set, poll all registered drivers for
207 * extensions and return the first positive result. If
208 * a driver is already set, further queries will be passed
209 * only to that driver.
212 mutex_lock(&vfio.iommu_drivers_lock);
213 list_for_each_entry(driver, &vfio.iommu_drivers_list,
216 if (!list_empty(&container->group_list) &&
217 !vfio_iommu_driver_allowed(container,
220 if (!try_module_get(driver->ops->owner))
223 ret = driver->ops->ioctl(NULL,
224 VFIO_CHECK_EXTENSION,
226 module_put(driver->ops->owner);
230 mutex_unlock(&vfio.iommu_drivers_lock);
232 ret = driver->ops->ioctl(container->iommu_data,
233 VFIO_CHECK_EXTENSION, arg);
236 up_read(&container->group_lock);
241 /* hold write lock on container->group_lock */
242 static int __vfio_container_attach_groups(struct vfio_container *container,
243 struct vfio_iommu_driver *driver,
246 struct vfio_group *group;
249 list_for_each_entry(group, &container->group_list, container_next) {
250 ret = driver->ops->attach_group(data, group->iommu_group,
259 list_for_each_entry_continue_reverse(group, &container->group_list,
261 driver->ops->detach_group(data, group->iommu_group);
267 static long vfio_ioctl_set_iommu(struct vfio_container *container,
270 struct vfio_iommu_driver *driver;
273 down_write(&container->group_lock);
276 * The container is designed to be an unprivileged interface while
277 * the group can be assigned to specific users. Therefore, only by
278 * adding a group to a container does the user get the privilege of
279 * enabling the iommu, which may allocate finite resources. There
280 * is no unset_iommu, but by removing all the groups from a container,
281 * the container is deprivileged and returns to an unset state.
283 if (list_empty(&container->group_list) || container->iommu_driver) {
284 up_write(&container->group_lock);
288 mutex_lock(&vfio.iommu_drivers_lock);
289 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
292 if (!vfio_iommu_driver_allowed(container, driver))
294 if (!try_module_get(driver->ops->owner))
298 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
299 * so test which iommu driver reported support for this
300 * extension and call open on them. We also pass them the
301 * magic, allowing a single driver to support multiple
302 * interfaces if they'd like.
304 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
305 module_put(driver->ops->owner);
309 data = driver->ops->open(arg);
312 module_put(driver->ops->owner);
316 ret = __vfio_container_attach_groups(container, driver, data);
318 driver->ops->release(data);
319 module_put(driver->ops->owner);
323 container->iommu_driver = driver;
324 container->iommu_data = data;
328 mutex_unlock(&vfio.iommu_drivers_lock);
329 up_write(&container->group_lock);
334 static long vfio_fops_unl_ioctl(struct file *filep,
335 unsigned int cmd, unsigned long arg)
337 struct vfio_container *container = filep->private_data;
338 struct vfio_iommu_driver *driver;
346 case VFIO_GET_API_VERSION:
347 ret = VFIO_API_VERSION;
349 case VFIO_CHECK_EXTENSION:
350 ret = vfio_container_ioctl_check_extension(container, arg);
353 ret = vfio_ioctl_set_iommu(container, arg);
356 driver = container->iommu_driver;
357 data = container->iommu_data;
359 if (driver) /* passthrough all unrecognized ioctls */
360 ret = driver->ops->ioctl(data, cmd, arg);
366 static int vfio_fops_open(struct inode *inode, struct file *filep)
368 struct vfio_container *container;
370 container = kzalloc(sizeof(*container), GFP_KERNEL);
374 INIT_LIST_HEAD(&container->group_list);
375 init_rwsem(&container->group_lock);
376 kref_init(&container->kref);
378 filep->private_data = container;
383 static int vfio_fops_release(struct inode *inode, struct file *filep)
385 struct vfio_container *container = filep->private_data;
386 struct vfio_iommu_driver *driver = container->iommu_driver;
388 if (driver && driver->ops->notify)
389 driver->ops->notify(container->iommu_data,
390 VFIO_IOMMU_CONTAINER_CLOSE);
392 filep->private_data = NULL;
394 vfio_container_put(container);
399 static const struct file_operations vfio_fops = {
400 .owner = THIS_MODULE,
401 .open = vfio_fops_open,
402 .release = vfio_fops_release,
403 .unlocked_ioctl = vfio_fops_unl_ioctl,
404 .compat_ioctl = compat_ptr_ioctl,
407 struct vfio_container *vfio_container_from_file(struct file *file)
409 struct vfio_container *container;
411 /* Sanity check, is this really our fd? */
412 if (file->f_op != &vfio_fops)
415 container = file->private_data;
416 WARN_ON(!container); /* fget ensures we don't race vfio_release */
420 static struct miscdevice vfio_dev = {
424 .nodename = "vfio/vfio",
425 .mode = S_IRUGO | S_IWUGO,
428 int vfio_container_attach_group(struct vfio_container *container,
429 struct vfio_group *group)
431 struct vfio_iommu_driver *driver;
434 lockdep_assert_held(&group->group_lock);
436 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
439 down_write(&container->group_lock);
441 /* Real groups and fake groups cannot mix */
442 if (!list_empty(&container->group_list) &&
443 container->noiommu != (group->type == VFIO_NO_IOMMU)) {
445 goto out_unlock_container;
448 if (group->type == VFIO_IOMMU) {
449 ret = iommu_group_claim_dma_owner(group->iommu_group, group);
451 goto out_unlock_container;
454 driver = container->iommu_driver;
456 ret = driver->ops->attach_group(container->iommu_data,
460 if (group->type == VFIO_IOMMU)
461 iommu_group_release_dma_owner(
463 goto out_unlock_container;
467 group->container = container;
468 group->container_users = 1;
469 container->noiommu = (group->type == VFIO_NO_IOMMU);
470 list_add(&group->container_next, &container->group_list);
472 /* Get a reference on the container and mark a user within the group */
473 vfio_container_get(container);
475 out_unlock_container:
476 up_write(&container->group_lock);
480 void vfio_group_detach_container(struct vfio_group *group)
482 struct vfio_container *container = group->container;
483 struct vfio_iommu_driver *driver;
485 lockdep_assert_held(&group->group_lock);
486 WARN_ON(group->container_users != 1);
488 down_write(&container->group_lock);
490 driver = container->iommu_driver;
492 driver->ops->detach_group(container->iommu_data,
495 if (group->type == VFIO_IOMMU)
496 iommu_group_release_dma_owner(group->iommu_group);
498 group->container = NULL;
499 group->container_users = 0;
500 list_del(&group->container_next);
502 /* Detaching the last group deprivileges a container, remove iommu */
503 if (driver && list_empty(&container->group_list)) {
504 driver->ops->release(container->iommu_data);
505 module_put(driver->ops->owner);
506 container->iommu_driver = NULL;
507 container->iommu_data = NULL;
510 up_write(&container->group_lock);
512 vfio_container_put(container);
515 int vfio_group_use_container(struct vfio_group *group)
517 lockdep_assert_held(&group->group_lock);
520 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
521 * VFIO_SET_IOMMU hasn't been done yet.
523 if (!group->container->iommu_driver)
526 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
529 get_file(group->opened_file);
530 group->container_users++;
534 void vfio_group_unuse_container(struct vfio_group *group)
536 lockdep_assert_held(&group->group_lock);
538 WARN_ON(group->container_users <= 1);
539 group->container_users--;
540 fput(group->opened_file);
543 int vfio_device_container_pin_pages(struct vfio_device *device,
544 dma_addr_t iova, int npage,
545 int prot, struct page **pages)
547 struct vfio_container *container = device->group->container;
548 struct iommu_group *iommu_group = device->group->iommu_group;
549 struct vfio_iommu_driver *driver = container->iommu_driver;
551 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
554 if (unlikely(!driver || !driver->ops->pin_pages))
556 return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
560 void vfio_device_container_unpin_pages(struct vfio_device *device,
561 dma_addr_t iova, int npage)
563 struct vfio_container *container = device->group->container;
565 if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
568 container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
572 int vfio_device_container_dma_rw(struct vfio_device *device,
573 dma_addr_t iova, void *data,
574 size_t len, bool write)
576 struct vfio_container *container = device->group->container;
577 struct vfio_iommu_driver *driver = container->iommu_driver;
579 if (unlikely(!driver || !driver->ops->dma_rw))
581 return driver->ops->dma_rw(container->iommu_data, iova, data, len,
585 int __init vfio_container_init(void)
589 mutex_init(&vfio.iommu_drivers_lock);
590 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
592 ret = misc_register(&vfio_dev);
594 pr_err("vfio: misc device register failed\n");
598 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
599 ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
606 misc_deregister(&vfio_dev);
610 void vfio_container_cleanup(void)
612 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
613 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
614 misc_deregister(&vfio_dev);
615 mutex_destroy(&vfio.iommu_drivers_lock);
618 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
619 MODULE_ALIAS("devname:vfio/vfio");