Merge branch 'for-6.3/hid-bpf' into for-linus
[linux-2.6-microblaze.git] / drivers / vfio / container.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
4  *
5  * VFIO container (/dev/vfio/vfio)
6  */
7 #include <linux/file.h>
8 #include <linux/slab.h>
9 #include <linux/fs.h>
10 #include <linux/capability.h>
11 #include <linux/iommu.h>
12 #include <linux/miscdevice.h>
13 #include <linux/vfio.h>
14 #include <uapi/linux/vfio.h>
15
16 #include "vfio.h"
17
18 struct vfio_container {
19         struct kref                     kref;
20         struct list_head                group_list;
21         struct rw_semaphore             group_lock;
22         struct vfio_iommu_driver        *iommu_driver;
23         void                            *iommu_data;
24         bool                            noiommu;
25 };
26
27 static struct vfio {
28         struct list_head                iommu_drivers_list;
29         struct mutex                    iommu_drivers_lock;
30 } vfio;
31
32 #ifdef CONFIG_VFIO_NOIOMMU
33 bool vfio_noiommu __read_mostly;
34 module_param_named(enable_unsafe_noiommu_mode,
35                    vfio_noiommu, bool, S_IRUGO | S_IWUSR);
36 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
37 #endif
38
39 static void *vfio_noiommu_open(unsigned long arg)
40 {
41         if (arg != VFIO_NOIOMMU_IOMMU)
42                 return ERR_PTR(-EINVAL);
43         if (!capable(CAP_SYS_RAWIO))
44                 return ERR_PTR(-EPERM);
45
46         return NULL;
47 }
48
49 static void vfio_noiommu_release(void *iommu_data)
50 {
51 }
52
53 static long vfio_noiommu_ioctl(void *iommu_data,
54                                unsigned int cmd, unsigned long arg)
55 {
56         if (cmd == VFIO_CHECK_EXTENSION)
57                 return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
58
59         return -ENOTTY;
60 }
61
62 static int vfio_noiommu_attach_group(void *iommu_data,
63                 struct iommu_group *iommu_group, enum vfio_group_type type)
64 {
65         return 0;
66 }
67
68 static void vfio_noiommu_detach_group(void *iommu_data,
69                                       struct iommu_group *iommu_group)
70 {
71 }
72
73 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
74         .name = "vfio-noiommu",
75         .owner = THIS_MODULE,
76         .open = vfio_noiommu_open,
77         .release = vfio_noiommu_release,
78         .ioctl = vfio_noiommu_ioctl,
79         .attach_group = vfio_noiommu_attach_group,
80         .detach_group = vfio_noiommu_detach_group,
81 };
82
83 /*
84  * Only noiommu containers can use vfio-noiommu and noiommu containers can only
85  * use vfio-noiommu.
86  */
87 static bool vfio_iommu_driver_allowed(struct vfio_container *container,
88                                       const struct vfio_iommu_driver *driver)
89 {
90         if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
91                 return true;
92         return container->noiommu == (driver->ops == &vfio_noiommu_ops);
93 }
94
95 /*
96  * IOMMU driver registration
97  */
98 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
99 {
100         struct vfio_iommu_driver *driver, *tmp;
101
102         if (WARN_ON(!ops->register_device != !ops->unregister_device))
103                 return -EINVAL;
104
105         driver = kzalloc(sizeof(*driver), GFP_KERNEL);
106         if (!driver)
107                 return -ENOMEM;
108
109         driver->ops = ops;
110
111         mutex_lock(&vfio.iommu_drivers_lock);
112
113         /* Check for duplicates */
114         list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
115                 if (tmp->ops == ops) {
116                         mutex_unlock(&vfio.iommu_drivers_lock);
117                         kfree(driver);
118                         return -EINVAL;
119                 }
120         }
121
122         list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
123
124         mutex_unlock(&vfio.iommu_drivers_lock);
125
126         return 0;
127 }
128 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
129
130 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
131 {
132         struct vfio_iommu_driver *driver;
133
134         mutex_lock(&vfio.iommu_drivers_lock);
135         list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
136                 if (driver->ops == ops) {
137                         list_del(&driver->vfio_next);
138                         mutex_unlock(&vfio.iommu_drivers_lock);
139                         kfree(driver);
140                         return;
141                 }
142         }
143         mutex_unlock(&vfio.iommu_drivers_lock);
144 }
145 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
146
147 /*
148  * Container objects - containers are created when /dev/vfio/vfio is
149  * opened, but their lifecycle extends until the last user is done, so
150  * it's freed via kref.  Must support container/group/device being
151  * closed in any order.
152  */
153 static void vfio_container_release(struct kref *kref)
154 {
155         struct vfio_container *container;
156         container = container_of(kref, struct vfio_container, kref);
157
158         kfree(container);
159 }
160
161 static void vfio_container_get(struct vfio_container *container)
162 {
163         kref_get(&container->kref);
164 }
165
166 static void vfio_container_put(struct vfio_container *container)
167 {
168         kref_put(&container->kref, vfio_container_release);
169 }
170
171 void vfio_device_container_register(struct vfio_device *device)
172 {
173         struct vfio_iommu_driver *iommu_driver =
174                 device->group->container->iommu_driver;
175
176         if (iommu_driver && iommu_driver->ops->register_device)
177                 iommu_driver->ops->register_device(
178                         device->group->container->iommu_data, device);
179 }
180
181 void vfio_device_container_unregister(struct vfio_device *device)
182 {
183         struct vfio_iommu_driver *iommu_driver =
184                 device->group->container->iommu_driver;
185
186         if (iommu_driver && iommu_driver->ops->unregister_device)
187                 iommu_driver->ops->unregister_device(
188                         device->group->container->iommu_data, device);
189 }
190
191 static long
192 vfio_container_ioctl_check_extension(struct vfio_container *container,
193                                      unsigned long arg)
194 {
195         struct vfio_iommu_driver *driver;
196         long ret = 0;
197
198         down_read(&container->group_lock);
199
200         driver = container->iommu_driver;
201
202         switch (arg) {
203                 /* No base extensions yet */
204         default:
205                 /*
206                  * If no driver is set, poll all registered drivers for
207                  * extensions and return the first positive result.  If
208                  * a driver is already set, further queries will be passed
209                  * only to that driver.
210                  */
211                 if (!driver) {
212                         mutex_lock(&vfio.iommu_drivers_lock);
213                         list_for_each_entry(driver, &vfio.iommu_drivers_list,
214                                             vfio_next) {
215
216                                 if (!list_empty(&container->group_list) &&
217                                     !vfio_iommu_driver_allowed(container,
218                                                                driver))
219                                         continue;
220                                 if (!try_module_get(driver->ops->owner))
221                                         continue;
222
223                                 ret = driver->ops->ioctl(NULL,
224                                                          VFIO_CHECK_EXTENSION,
225                                                          arg);
226                                 module_put(driver->ops->owner);
227                                 if (ret > 0)
228                                         break;
229                         }
230                         mutex_unlock(&vfio.iommu_drivers_lock);
231                 } else
232                         ret = driver->ops->ioctl(container->iommu_data,
233                                                  VFIO_CHECK_EXTENSION, arg);
234         }
235
236         up_read(&container->group_lock);
237
238         return ret;
239 }
240
241 /* hold write lock on container->group_lock */
242 static int __vfio_container_attach_groups(struct vfio_container *container,
243                                           struct vfio_iommu_driver *driver,
244                                           void *data)
245 {
246         struct vfio_group *group;
247         int ret = -ENODEV;
248
249         list_for_each_entry(group, &container->group_list, container_next) {
250                 ret = driver->ops->attach_group(data, group->iommu_group,
251                                                 group->type);
252                 if (ret)
253                         goto unwind;
254         }
255
256         return ret;
257
258 unwind:
259         list_for_each_entry_continue_reverse(group, &container->group_list,
260                                              container_next) {
261                 driver->ops->detach_group(data, group->iommu_group);
262         }
263
264         return ret;
265 }
266
267 static long vfio_ioctl_set_iommu(struct vfio_container *container,
268                                  unsigned long arg)
269 {
270         struct vfio_iommu_driver *driver;
271         long ret = -ENODEV;
272
273         down_write(&container->group_lock);
274
275         /*
276          * The container is designed to be an unprivileged interface while
277          * the group can be assigned to specific users.  Therefore, only by
278          * adding a group to a container does the user get the privilege of
279          * enabling the iommu, which may allocate finite resources.  There
280          * is no unset_iommu, but by removing all the groups from a container,
281          * the container is deprivileged and returns to an unset state.
282          */
283         if (list_empty(&container->group_list) || container->iommu_driver) {
284                 up_write(&container->group_lock);
285                 return -EINVAL;
286         }
287
288         mutex_lock(&vfio.iommu_drivers_lock);
289         list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
290                 void *data;
291
292                 if (!vfio_iommu_driver_allowed(container, driver))
293                         continue;
294                 if (!try_module_get(driver->ops->owner))
295                         continue;
296
297                 /*
298                  * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
299                  * so test which iommu driver reported support for this
300                  * extension and call open on them.  We also pass them the
301                  * magic, allowing a single driver to support multiple
302                  * interfaces if they'd like.
303                  */
304                 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
305                         module_put(driver->ops->owner);
306                         continue;
307                 }
308
309                 data = driver->ops->open(arg);
310                 if (IS_ERR(data)) {
311                         ret = PTR_ERR(data);
312                         module_put(driver->ops->owner);
313                         continue;
314                 }
315
316                 ret = __vfio_container_attach_groups(container, driver, data);
317                 if (ret) {
318                         driver->ops->release(data);
319                         module_put(driver->ops->owner);
320                         continue;
321                 }
322
323                 container->iommu_driver = driver;
324                 container->iommu_data = data;
325                 break;
326         }
327
328         mutex_unlock(&vfio.iommu_drivers_lock);
329         up_write(&container->group_lock);
330
331         return ret;
332 }
333
334 static long vfio_fops_unl_ioctl(struct file *filep,
335                                 unsigned int cmd, unsigned long arg)
336 {
337         struct vfio_container *container = filep->private_data;
338         struct vfio_iommu_driver *driver;
339         void *data;
340         long ret = -EINVAL;
341
342         if (!container)
343                 return ret;
344
345         switch (cmd) {
346         case VFIO_GET_API_VERSION:
347                 ret = VFIO_API_VERSION;
348                 break;
349         case VFIO_CHECK_EXTENSION:
350                 ret = vfio_container_ioctl_check_extension(container, arg);
351                 break;
352         case VFIO_SET_IOMMU:
353                 ret = vfio_ioctl_set_iommu(container, arg);
354                 break;
355         default:
356                 driver = container->iommu_driver;
357                 data = container->iommu_data;
358
359                 if (driver) /* passthrough all unrecognized ioctls */
360                         ret = driver->ops->ioctl(data, cmd, arg);
361         }
362
363         return ret;
364 }
365
366 static int vfio_fops_open(struct inode *inode, struct file *filep)
367 {
368         struct vfio_container *container;
369
370         container = kzalloc(sizeof(*container), GFP_KERNEL);
371         if (!container)
372                 return -ENOMEM;
373
374         INIT_LIST_HEAD(&container->group_list);
375         init_rwsem(&container->group_lock);
376         kref_init(&container->kref);
377
378         filep->private_data = container;
379
380         return 0;
381 }
382
383 static int vfio_fops_release(struct inode *inode, struct file *filep)
384 {
385         struct vfio_container *container = filep->private_data;
386         struct vfio_iommu_driver *driver = container->iommu_driver;
387
388         if (driver && driver->ops->notify)
389                 driver->ops->notify(container->iommu_data,
390                                     VFIO_IOMMU_CONTAINER_CLOSE);
391
392         filep->private_data = NULL;
393
394         vfio_container_put(container);
395
396         return 0;
397 }
398
399 static const struct file_operations vfio_fops = {
400         .owner          = THIS_MODULE,
401         .open           = vfio_fops_open,
402         .release        = vfio_fops_release,
403         .unlocked_ioctl = vfio_fops_unl_ioctl,
404         .compat_ioctl   = compat_ptr_ioctl,
405 };
406
407 struct vfio_container *vfio_container_from_file(struct file *file)
408 {
409         struct vfio_container *container;
410
411         /* Sanity check, is this really our fd? */
412         if (file->f_op != &vfio_fops)
413                 return NULL;
414
415         container = file->private_data;
416         WARN_ON(!container); /* fget ensures we don't race vfio_release */
417         return container;
418 }
419
420 static struct miscdevice vfio_dev = {
421         .minor = VFIO_MINOR,
422         .name = "vfio",
423         .fops = &vfio_fops,
424         .nodename = "vfio/vfio",
425         .mode = S_IRUGO | S_IWUGO,
426 };
427
428 int vfio_container_attach_group(struct vfio_container *container,
429                                 struct vfio_group *group)
430 {
431         struct vfio_iommu_driver *driver;
432         int ret = 0;
433
434         lockdep_assert_held(&group->group_lock);
435
436         if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
437                 return -EPERM;
438
439         down_write(&container->group_lock);
440
441         /* Real groups and fake groups cannot mix */
442         if (!list_empty(&container->group_list) &&
443             container->noiommu != (group->type == VFIO_NO_IOMMU)) {
444                 ret = -EPERM;
445                 goto out_unlock_container;
446         }
447
448         if (group->type == VFIO_IOMMU) {
449                 ret = iommu_group_claim_dma_owner(group->iommu_group, group);
450                 if (ret)
451                         goto out_unlock_container;
452         }
453
454         driver = container->iommu_driver;
455         if (driver) {
456                 ret = driver->ops->attach_group(container->iommu_data,
457                                                 group->iommu_group,
458                                                 group->type);
459                 if (ret) {
460                         if (group->type == VFIO_IOMMU)
461                                 iommu_group_release_dma_owner(
462                                         group->iommu_group);
463                         goto out_unlock_container;
464                 }
465         }
466
467         group->container = container;
468         group->container_users = 1;
469         container->noiommu = (group->type == VFIO_NO_IOMMU);
470         list_add(&group->container_next, &container->group_list);
471
472         /* Get a reference on the container and mark a user within the group */
473         vfio_container_get(container);
474
475 out_unlock_container:
476         up_write(&container->group_lock);
477         return ret;
478 }
479
480 void vfio_group_detach_container(struct vfio_group *group)
481 {
482         struct vfio_container *container = group->container;
483         struct vfio_iommu_driver *driver;
484
485         lockdep_assert_held(&group->group_lock);
486         WARN_ON(group->container_users != 1);
487
488         down_write(&container->group_lock);
489
490         driver = container->iommu_driver;
491         if (driver)
492                 driver->ops->detach_group(container->iommu_data,
493                                           group->iommu_group);
494
495         if (group->type == VFIO_IOMMU)
496                 iommu_group_release_dma_owner(group->iommu_group);
497
498         group->container = NULL;
499         group->container_users = 0;
500         list_del(&group->container_next);
501
502         /* Detaching the last group deprivileges a container, remove iommu */
503         if (driver && list_empty(&container->group_list)) {
504                 driver->ops->release(container->iommu_data);
505                 module_put(driver->ops->owner);
506                 container->iommu_driver = NULL;
507                 container->iommu_data = NULL;
508         }
509
510         up_write(&container->group_lock);
511
512         vfio_container_put(container);
513 }
514
515 int vfio_group_use_container(struct vfio_group *group)
516 {
517         lockdep_assert_held(&group->group_lock);
518
519         /*
520          * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
521          * VFIO_SET_IOMMU hasn't been done yet.
522          */
523         if (!group->container->iommu_driver)
524                 return -EINVAL;
525
526         if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
527                 return -EPERM;
528
529         get_file(group->opened_file);
530         group->container_users++;
531         return 0;
532 }
533
534 void vfio_group_unuse_container(struct vfio_group *group)
535 {
536         lockdep_assert_held(&group->group_lock);
537
538         WARN_ON(group->container_users <= 1);
539         group->container_users--;
540         fput(group->opened_file);
541 }
542
543 int vfio_device_container_pin_pages(struct vfio_device *device,
544                                     dma_addr_t iova, int npage,
545                                     int prot, struct page **pages)
546 {
547         struct vfio_container *container = device->group->container;
548         struct iommu_group *iommu_group = device->group->iommu_group;
549         struct vfio_iommu_driver *driver = container->iommu_driver;
550
551         if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
552                 return -E2BIG;
553
554         if (unlikely(!driver || !driver->ops->pin_pages))
555                 return -ENOTTY;
556         return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
557                                       npage, prot, pages);
558 }
559
560 void vfio_device_container_unpin_pages(struct vfio_device *device,
561                                        dma_addr_t iova, int npage)
562 {
563         struct vfio_container *container = device->group->container;
564
565         if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
566                 return;
567
568         container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
569                                                   npage);
570 }
571
572 int vfio_device_container_dma_rw(struct vfio_device *device,
573                                  dma_addr_t iova, void *data,
574                                  size_t len, bool write)
575 {
576         struct vfio_container *container = device->group->container;
577         struct vfio_iommu_driver *driver = container->iommu_driver;
578
579         if (unlikely(!driver || !driver->ops->dma_rw))
580                 return -ENOTTY;
581         return driver->ops->dma_rw(container->iommu_data, iova, data, len,
582                                    write);
583 }
584
585 int __init vfio_container_init(void)
586 {
587         int ret;
588
589         mutex_init(&vfio.iommu_drivers_lock);
590         INIT_LIST_HEAD(&vfio.iommu_drivers_list);
591
592         ret = misc_register(&vfio_dev);
593         if (ret) {
594                 pr_err("vfio: misc device register failed\n");
595                 return ret;
596         }
597
598         if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
599                 ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
600                 if (ret)
601                         goto err_misc;
602         }
603         return 0;
604
605 err_misc:
606         misc_deregister(&vfio_dev);
607         return ret;
608 }
609
610 void vfio_container_cleanup(void)
611 {
612         if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
613                 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
614         misc_deregister(&vfio_dev);
615         mutex_destroy(&vfio.iommu_drivers_lock);
616 }
617
618 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
619 MODULE_ALIAS("devname:vfio/vfio");