1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * VFIO container (/dev/vfio/vfio)
7 #include <linux/file.h>
8 #include <linux/slab.h>
10 #include <linux/capability.h>
11 #include <linux/iommu.h>
12 #include <linux/miscdevice.h>
13 #include <linux/vfio.h>
14 #include <uapi/linux/vfio.h>
18 struct vfio_container {
20 struct list_head group_list;
21 struct rw_semaphore group_lock;
22 struct vfio_iommu_driver *iommu_driver;
28 struct list_head iommu_drivers_list;
29 struct mutex iommu_drivers_lock;
32 static void *vfio_noiommu_open(unsigned long arg)
34 if (arg != VFIO_NOIOMMU_IOMMU)
35 return ERR_PTR(-EINVAL);
36 if (!capable(CAP_SYS_RAWIO))
37 return ERR_PTR(-EPERM);
42 static void vfio_noiommu_release(void *iommu_data)
46 static long vfio_noiommu_ioctl(void *iommu_data,
47 unsigned int cmd, unsigned long arg)
49 if (cmd == VFIO_CHECK_EXTENSION)
50 return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
55 static int vfio_noiommu_attach_group(void *iommu_data,
56 struct iommu_group *iommu_group, enum vfio_group_type type)
61 static void vfio_noiommu_detach_group(void *iommu_data,
62 struct iommu_group *iommu_group)
66 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
67 .name = "vfio-noiommu",
69 .open = vfio_noiommu_open,
70 .release = vfio_noiommu_release,
71 .ioctl = vfio_noiommu_ioctl,
72 .attach_group = vfio_noiommu_attach_group,
73 .detach_group = vfio_noiommu_detach_group,
77 * Only noiommu containers can use vfio-noiommu and noiommu containers can only
80 static bool vfio_iommu_driver_allowed(struct vfio_container *container,
81 const struct vfio_iommu_driver *driver)
83 if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
85 return container->noiommu == (driver->ops == &vfio_noiommu_ops);
89 * IOMMU driver registration
91 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
93 struct vfio_iommu_driver *driver, *tmp;
95 if (WARN_ON(!ops->register_device != !ops->unregister_device))
98 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
104 mutex_lock(&vfio.iommu_drivers_lock);
106 /* Check for duplicates */
107 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
108 if (tmp->ops == ops) {
109 mutex_unlock(&vfio.iommu_drivers_lock);
115 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
117 mutex_unlock(&vfio.iommu_drivers_lock);
121 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
123 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
125 struct vfio_iommu_driver *driver;
127 mutex_lock(&vfio.iommu_drivers_lock);
128 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
129 if (driver->ops == ops) {
130 list_del(&driver->vfio_next);
131 mutex_unlock(&vfio.iommu_drivers_lock);
136 mutex_unlock(&vfio.iommu_drivers_lock);
138 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
141 * Container objects - containers are created when /dev/vfio/vfio is
142 * opened, but their lifecycle extends until the last user is done, so
143 * it's freed via kref. Must support container/group/device being
144 * closed in any order.
146 static void vfio_container_release(struct kref *kref)
148 struct vfio_container *container;
149 container = container_of(kref, struct vfio_container, kref);
154 static void vfio_container_get(struct vfio_container *container)
156 kref_get(&container->kref);
159 static void vfio_container_put(struct vfio_container *container)
161 kref_put(&container->kref, vfio_container_release);
164 void vfio_device_container_register(struct vfio_device *device)
166 struct vfio_iommu_driver *iommu_driver =
167 device->group->container->iommu_driver;
169 if (iommu_driver && iommu_driver->ops->register_device)
170 iommu_driver->ops->register_device(
171 device->group->container->iommu_data, device);
174 void vfio_device_container_unregister(struct vfio_device *device)
176 struct vfio_iommu_driver *iommu_driver =
177 device->group->container->iommu_driver;
179 if (iommu_driver && iommu_driver->ops->unregister_device)
180 iommu_driver->ops->unregister_device(
181 device->group->container->iommu_data, device);
185 vfio_container_ioctl_check_extension(struct vfio_container *container,
188 struct vfio_iommu_driver *driver;
191 down_read(&container->group_lock);
193 driver = container->iommu_driver;
196 /* No base extensions yet */
199 * If no driver is set, poll all registered drivers for
200 * extensions and return the first positive result. If
201 * a driver is already set, further queries will be passed
202 * only to that driver.
205 mutex_lock(&vfio.iommu_drivers_lock);
206 list_for_each_entry(driver, &vfio.iommu_drivers_list,
209 if (!list_empty(&container->group_list) &&
210 !vfio_iommu_driver_allowed(container,
213 if (!try_module_get(driver->ops->owner))
216 ret = driver->ops->ioctl(NULL,
217 VFIO_CHECK_EXTENSION,
219 module_put(driver->ops->owner);
223 mutex_unlock(&vfio.iommu_drivers_lock);
225 ret = driver->ops->ioctl(container->iommu_data,
226 VFIO_CHECK_EXTENSION, arg);
229 up_read(&container->group_lock);
234 /* hold write lock on container->group_lock */
235 static int __vfio_container_attach_groups(struct vfio_container *container,
236 struct vfio_iommu_driver *driver,
239 struct vfio_group *group;
242 list_for_each_entry(group, &container->group_list, container_next) {
243 ret = driver->ops->attach_group(data, group->iommu_group,
252 list_for_each_entry_continue_reverse(group, &container->group_list,
254 driver->ops->detach_group(data, group->iommu_group);
260 static long vfio_ioctl_set_iommu(struct vfio_container *container,
263 struct vfio_iommu_driver *driver;
266 down_write(&container->group_lock);
269 * The container is designed to be an unprivileged interface while
270 * the group can be assigned to specific users. Therefore, only by
271 * adding a group to a container does the user get the privilege of
272 * enabling the iommu, which may allocate finite resources. There
273 * is no unset_iommu, but by removing all the groups from a container,
274 * the container is deprivileged and returns to an unset state.
276 if (list_empty(&container->group_list) || container->iommu_driver) {
277 up_write(&container->group_lock);
281 mutex_lock(&vfio.iommu_drivers_lock);
282 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
285 if (!vfio_iommu_driver_allowed(container, driver))
287 if (!try_module_get(driver->ops->owner))
291 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
292 * so test which iommu driver reported support for this
293 * extension and call open on them. We also pass them the
294 * magic, allowing a single driver to support multiple
295 * interfaces if they'd like.
297 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
298 module_put(driver->ops->owner);
302 data = driver->ops->open(arg);
305 module_put(driver->ops->owner);
309 ret = __vfio_container_attach_groups(container, driver, data);
311 driver->ops->release(data);
312 module_put(driver->ops->owner);
316 container->iommu_driver = driver;
317 container->iommu_data = data;
321 mutex_unlock(&vfio.iommu_drivers_lock);
322 up_write(&container->group_lock);
327 static long vfio_fops_unl_ioctl(struct file *filep,
328 unsigned int cmd, unsigned long arg)
330 struct vfio_container *container = filep->private_data;
331 struct vfio_iommu_driver *driver;
339 case VFIO_GET_API_VERSION:
340 ret = VFIO_API_VERSION;
342 case VFIO_CHECK_EXTENSION:
343 ret = vfio_container_ioctl_check_extension(container, arg);
346 ret = vfio_ioctl_set_iommu(container, arg);
349 driver = container->iommu_driver;
350 data = container->iommu_data;
352 if (driver) /* passthrough all unrecognized ioctls */
353 ret = driver->ops->ioctl(data, cmd, arg);
359 static int vfio_fops_open(struct inode *inode, struct file *filep)
361 struct vfio_container *container;
363 container = kzalloc(sizeof(*container), GFP_KERNEL);
367 INIT_LIST_HEAD(&container->group_list);
368 init_rwsem(&container->group_lock);
369 kref_init(&container->kref);
371 filep->private_data = container;
376 static int vfio_fops_release(struct inode *inode, struct file *filep)
378 struct vfio_container *container = filep->private_data;
379 struct vfio_iommu_driver *driver = container->iommu_driver;
381 if (driver && driver->ops->notify)
382 driver->ops->notify(container->iommu_data,
383 VFIO_IOMMU_CONTAINER_CLOSE);
385 filep->private_data = NULL;
387 vfio_container_put(container);
392 static const struct file_operations vfio_fops = {
393 .owner = THIS_MODULE,
394 .open = vfio_fops_open,
395 .release = vfio_fops_release,
396 .unlocked_ioctl = vfio_fops_unl_ioctl,
397 .compat_ioctl = compat_ptr_ioctl,
400 struct vfio_container *vfio_container_from_file(struct file *file)
402 struct vfio_container *container;
404 /* Sanity check, is this really our fd? */
405 if (file->f_op != &vfio_fops)
408 container = file->private_data;
409 WARN_ON(!container); /* fget ensures we don't race vfio_release */
413 static struct miscdevice vfio_dev = {
417 .nodename = "vfio/vfio",
418 .mode = S_IRUGO | S_IWUGO,
421 int vfio_container_attach_group(struct vfio_container *container,
422 struct vfio_group *group)
424 struct vfio_iommu_driver *driver;
427 lockdep_assert_held(&group->group_lock);
429 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
432 down_write(&container->group_lock);
434 /* Real groups and fake groups cannot mix */
435 if (!list_empty(&container->group_list) &&
436 container->noiommu != (group->type == VFIO_NO_IOMMU)) {
438 goto out_unlock_container;
441 if (group->type == VFIO_IOMMU) {
442 ret = iommu_group_claim_dma_owner(group->iommu_group, group);
444 goto out_unlock_container;
447 driver = container->iommu_driver;
449 ret = driver->ops->attach_group(container->iommu_data,
453 if (group->type == VFIO_IOMMU)
454 iommu_group_release_dma_owner(
456 goto out_unlock_container;
460 group->container = container;
461 group->container_users = 1;
462 container->noiommu = (group->type == VFIO_NO_IOMMU);
463 list_add(&group->container_next, &container->group_list);
465 /* Get a reference on the container and mark a user within the group */
466 vfio_container_get(container);
468 out_unlock_container:
469 up_write(&container->group_lock);
473 void vfio_group_detach_container(struct vfio_group *group)
475 struct vfio_container *container = group->container;
476 struct vfio_iommu_driver *driver;
478 lockdep_assert_held(&group->group_lock);
479 WARN_ON(group->container_users != 1);
481 down_write(&container->group_lock);
483 driver = container->iommu_driver;
485 driver->ops->detach_group(container->iommu_data,
488 if (group->type == VFIO_IOMMU)
489 iommu_group_release_dma_owner(group->iommu_group);
491 group->container = NULL;
492 group->container_users = 0;
493 list_del(&group->container_next);
495 /* Detaching the last group deprivileges a container, remove iommu */
496 if (driver && list_empty(&container->group_list)) {
497 driver->ops->release(container->iommu_data);
498 module_put(driver->ops->owner);
499 container->iommu_driver = NULL;
500 container->iommu_data = NULL;
503 up_write(&container->group_lock);
505 vfio_container_put(container);
508 int vfio_group_use_container(struct vfio_group *group)
510 lockdep_assert_held(&group->group_lock);
513 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
514 * VFIO_SET_IOMMU hasn't been done yet.
516 if (!group->container->iommu_driver)
519 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
522 get_file(group->opened_file);
523 group->container_users++;
527 void vfio_group_unuse_container(struct vfio_group *group)
529 lockdep_assert_held(&group->group_lock);
531 WARN_ON(group->container_users <= 1);
532 group->container_users--;
533 fput(group->opened_file);
536 int vfio_device_container_pin_pages(struct vfio_device *device,
537 dma_addr_t iova, int npage,
538 int prot, struct page **pages)
540 struct vfio_container *container = device->group->container;
541 struct iommu_group *iommu_group = device->group->iommu_group;
542 struct vfio_iommu_driver *driver = container->iommu_driver;
544 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
547 if (unlikely(!driver || !driver->ops->pin_pages))
549 return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
553 void vfio_device_container_unpin_pages(struct vfio_device *device,
554 dma_addr_t iova, int npage)
556 struct vfio_container *container = device->group->container;
558 if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
561 container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
565 int vfio_device_container_dma_rw(struct vfio_device *device,
566 dma_addr_t iova, void *data,
567 size_t len, bool write)
569 struct vfio_container *container = device->group->container;
570 struct vfio_iommu_driver *driver = container->iommu_driver;
572 if (unlikely(!driver || !driver->ops->dma_rw))
574 return driver->ops->dma_rw(container->iommu_data, iova, data, len,
578 int __init vfio_container_init(void)
582 mutex_init(&vfio.iommu_drivers_lock);
583 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
585 ret = misc_register(&vfio_dev);
587 pr_err("vfio: misc device register failed\n");
591 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
592 ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
599 misc_deregister(&vfio_dev);
603 void vfio_container_cleanup(void)
605 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
606 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
607 misc_deregister(&vfio_dev);
608 mutex_destroy(&vfio.iommu_drivers_lock);
611 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
612 MODULE_ALIAS("devname:vfio/vfio");