vfio: revert "iommu driver notify callback"
[linux-2.6-microblaze.git] / drivers / vfio / container.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
4  *
5  * VFIO container (/dev/vfio/vfio)
6  */
7 #include <linux/file.h>
8 #include <linux/slab.h>
9 #include <linux/fs.h>
10 #include <linux/capability.h>
11 #include <linux/iommu.h>
12 #include <linux/miscdevice.h>
13 #include <linux/vfio.h>
14 #include <uapi/linux/vfio.h>
15
16 #include "vfio.h"
17
18 struct vfio_container {
19         struct kref                     kref;
20         struct list_head                group_list;
21         struct rw_semaphore             group_lock;
22         struct vfio_iommu_driver        *iommu_driver;
23         void                            *iommu_data;
24         bool                            noiommu;
25 };
26
27 static struct vfio {
28         struct list_head                iommu_drivers_list;
29         struct mutex                    iommu_drivers_lock;
30 } vfio;
31
32 #ifdef CONFIG_VFIO_NOIOMMU
33 bool vfio_noiommu __read_mostly;
34 module_param_named(enable_unsafe_noiommu_mode,
35                    vfio_noiommu, bool, S_IRUGO | S_IWUSR);
36 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
37 #endif
38
39 static void *vfio_noiommu_open(unsigned long arg)
40 {
41         if (arg != VFIO_NOIOMMU_IOMMU)
42                 return ERR_PTR(-EINVAL);
43         if (!capable(CAP_SYS_RAWIO))
44                 return ERR_PTR(-EPERM);
45
46         return NULL;
47 }
48
49 static void vfio_noiommu_release(void *iommu_data)
50 {
51 }
52
53 static long vfio_noiommu_ioctl(void *iommu_data,
54                                unsigned int cmd, unsigned long arg)
55 {
56         if (cmd == VFIO_CHECK_EXTENSION)
57                 return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
58
59         return -ENOTTY;
60 }
61
62 static int vfio_noiommu_attach_group(void *iommu_data,
63                 struct iommu_group *iommu_group, enum vfio_group_type type)
64 {
65         return 0;
66 }
67
68 static void vfio_noiommu_detach_group(void *iommu_data,
69                                       struct iommu_group *iommu_group)
70 {
71 }
72
73 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
74         .name = "vfio-noiommu",
75         .owner = THIS_MODULE,
76         .open = vfio_noiommu_open,
77         .release = vfio_noiommu_release,
78         .ioctl = vfio_noiommu_ioctl,
79         .attach_group = vfio_noiommu_attach_group,
80         .detach_group = vfio_noiommu_detach_group,
81 };
82
83 /*
84  * Only noiommu containers can use vfio-noiommu and noiommu containers can only
85  * use vfio-noiommu.
86  */
87 static bool vfio_iommu_driver_allowed(struct vfio_container *container,
88                                       const struct vfio_iommu_driver *driver)
89 {
90         if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
91                 return true;
92         return container->noiommu == (driver->ops == &vfio_noiommu_ops);
93 }
94
95 /*
96  * IOMMU driver registration
97  */
98 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
99 {
100         struct vfio_iommu_driver *driver, *tmp;
101
102         if (WARN_ON(!ops->register_device != !ops->unregister_device))
103                 return -EINVAL;
104
105         driver = kzalloc(sizeof(*driver), GFP_KERNEL);
106         if (!driver)
107                 return -ENOMEM;
108
109         driver->ops = ops;
110
111         mutex_lock(&vfio.iommu_drivers_lock);
112
113         /* Check for duplicates */
114         list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
115                 if (tmp->ops == ops) {
116                         mutex_unlock(&vfio.iommu_drivers_lock);
117                         kfree(driver);
118                         return -EINVAL;
119                 }
120         }
121
122         list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
123
124         mutex_unlock(&vfio.iommu_drivers_lock);
125
126         return 0;
127 }
128 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
129
130 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
131 {
132         struct vfio_iommu_driver *driver;
133
134         mutex_lock(&vfio.iommu_drivers_lock);
135         list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
136                 if (driver->ops == ops) {
137                         list_del(&driver->vfio_next);
138                         mutex_unlock(&vfio.iommu_drivers_lock);
139                         kfree(driver);
140                         return;
141                 }
142         }
143         mutex_unlock(&vfio.iommu_drivers_lock);
144 }
145 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
146
147 /*
148  * Container objects - containers are created when /dev/vfio/vfio is
149  * opened, but their lifecycle extends until the last user is done, so
150  * it's freed via kref.  Must support container/group/device being
151  * closed in any order.
152  */
153 static void vfio_container_release(struct kref *kref)
154 {
155         struct vfio_container *container;
156         container = container_of(kref, struct vfio_container, kref);
157
158         kfree(container);
159 }
160
161 static void vfio_container_get(struct vfio_container *container)
162 {
163         kref_get(&container->kref);
164 }
165
166 static void vfio_container_put(struct vfio_container *container)
167 {
168         kref_put(&container->kref, vfio_container_release);
169 }
170
171 void vfio_device_container_register(struct vfio_device *device)
172 {
173         struct vfio_iommu_driver *iommu_driver =
174                 device->group->container->iommu_driver;
175
176         if (iommu_driver && iommu_driver->ops->register_device)
177                 iommu_driver->ops->register_device(
178                         device->group->container->iommu_data, device);
179 }
180
181 void vfio_device_container_unregister(struct vfio_device *device)
182 {
183         struct vfio_iommu_driver *iommu_driver =
184                 device->group->container->iommu_driver;
185
186         if (iommu_driver && iommu_driver->ops->unregister_device)
187                 iommu_driver->ops->unregister_device(
188                         device->group->container->iommu_data, device);
189 }
190
191 static long
192 vfio_container_ioctl_check_extension(struct vfio_container *container,
193                                      unsigned long arg)
194 {
195         struct vfio_iommu_driver *driver;
196         long ret = 0;
197
198         down_read(&container->group_lock);
199
200         driver = container->iommu_driver;
201
202         switch (arg) {
203                 /* No base extensions yet */
204         default:
205                 /*
206                  * If no driver is set, poll all registered drivers for
207                  * extensions and return the first positive result.  If
208                  * a driver is already set, further queries will be passed
209                  * only to that driver.
210                  */
211                 if (!driver) {
212                         mutex_lock(&vfio.iommu_drivers_lock);
213                         list_for_each_entry(driver, &vfio.iommu_drivers_list,
214                                             vfio_next) {
215
216                                 if (!list_empty(&container->group_list) &&
217                                     !vfio_iommu_driver_allowed(container,
218                                                                driver))
219                                         continue;
220                                 if (!try_module_get(driver->ops->owner))
221                                         continue;
222
223                                 ret = driver->ops->ioctl(NULL,
224                                                          VFIO_CHECK_EXTENSION,
225                                                          arg);
226                                 module_put(driver->ops->owner);
227                                 if (ret > 0)
228                                         break;
229                         }
230                         mutex_unlock(&vfio.iommu_drivers_lock);
231                 } else
232                         ret = driver->ops->ioctl(container->iommu_data,
233                                                  VFIO_CHECK_EXTENSION, arg);
234         }
235
236         up_read(&container->group_lock);
237
238         return ret;
239 }
240
241 /* hold write lock on container->group_lock */
242 static int __vfio_container_attach_groups(struct vfio_container *container,
243                                           struct vfio_iommu_driver *driver,
244                                           void *data)
245 {
246         struct vfio_group *group;
247         int ret = -ENODEV;
248
249         list_for_each_entry(group, &container->group_list, container_next) {
250                 ret = driver->ops->attach_group(data, group->iommu_group,
251                                                 group->type);
252                 if (ret)
253                         goto unwind;
254         }
255
256         return ret;
257
258 unwind:
259         list_for_each_entry_continue_reverse(group, &container->group_list,
260                                              container_next) {
261                 driver->ops->detach_group(data, group->iommu_group);
262         }
263
264         return ret;
265 }
266
267 static long vfio_ioctl_set_iommu(struct vfio_container *container,
268                                  unsigned long arg)
269 {
270         struct vfio_iommu_driver *driver;
271         long ret = -ENODEV;
272
273         down_write(&container->group_lock);
274
275         /*
276          * The container is designed to be an unprivileged interface while
277          * the group can be assigned to specific users.  Therefore, only by
278          * adding a group to a container does the user get the privilege of
279          * enabling the iommu, which may allocate finite resources.  There
280          * is no unset_iommu, but by removing all the groups from a container,
281          * the container is deprivileged and returns to an unset state.
282          */
283         if (list_empty(&container->group_list) || container->iommu_driver) {
284                 up_write(&container->group_lock);
285                 return -EINVAL;
286         }
287
288         mutex_lock(&vfio.iommu_drivers_lock);
289         list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
290                 void *data;
291
292                 if (!vfio_iommu_driver_allowed(container, driver))
293                         continue;
294                 if (!try_module_get(driver->ops->owner))
295                         continue;
296
297                 /*
298                  * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
299                  * so test which iommu driver reported support for this
300                  * extension and call open on them.  We also pass them the
301                  * magic, allowing a single driver to support multiple
302                  * interfaces if they'd like.
303                  */
304                 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
305                         module_put(driver->ops->owner);
306                         continue;
307                 }
308
309                 data = driver->ops->open(arg);
310                 if (IS_ERR(data)) {
311                         ret = PTR_ERR(data);
312                         module_put(driver->ops->owner);
313                         continue;
314                 }
315
316                 ret = __vfio_container_attach_groups(container, driver, data);
317                 if (ret) {
318                         driver->ops->release(data);
319                         module_put(driver->ops->owner);
320                         continue;
321                 }
322
323                 container->iommu_driver = driver;
324                 container->iommu_data = data;
325                 break;
326         }
327
328         mutex_unlock(&vfio.iommu_drivers_lock);
329         up_write(&container->group_lock);
330
331         return ret;
332 }
333
334 static long vfio_fops_unl_ioctl(struct file *filep,
335                                 unsigned int cmd, unsigned long arg)
336 {
337         struct vfio_container *container = filep->private_data;
338         struct vfio_iommu_driver *driver;
339         void *data;
340         long ret = -EINVAL;
341
342         if (!container)
343                 return ret;
344
345         switch (cmd) {
346         case VFIO_GET_API_VERSION:
347                 ret = VFIO_API_VERSION;
348                 break;
349         case VFIO_CHECK_EXTENSION:
350                 ret = vfio_container_ioctl_check_extension(container, arg);
351                 break;
352         case VFIO_SET_IOMMU:
353                 ret = vfio_ioctl_set_iommu(container, arg);
354                 break;
355         default:
356                 driver = container->iommu_driver;
357                 data = container->iommu_data;
358
359                 if (driver) /* passthrough all unrecognized ioctls */
360                         ret = driver->ops->ioctl(data, cmd, arg);
361         }
362
363         return ret;
364 }
365
366 static int vfio_fops_open(struct inode *inode, struct file *filep)
367 {
368         struct vfio_container *container;
369
370         container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT);
371         if (!container)
372                 return -ENOMEM;
373
374         INIT_LIST_HEAD(&container->group_list);
375         init_rwsem(&container->group_lock);
376         kref_init(&container->kref);
377
378         filep->private_data = container;
379
380         return 0;
381 }
382
383 static int vfio_fops_release(struct inode *inode, struct file *filep)
384 {
385         struct vfio_container *container = filep->private_data;
386
387         filep->private_data = NULL;
388
389         vfio_container_put(container);
390
391         return 0;
392 }
393
394 static const struct file_operations vfio_fops = {
395         .owner          = THIS_MODULE,
396         .open           = vfio_fops_open,
397         .release        = vfio_fops_release,
398         .unlocked_ioctl = vfio_fops_unl_ioctl,
399         .compat_ioctl   = compat_ptr_ioctl,
400 };
401
402 struct vfio_container *vfio_container_from_file(struct file *file)
403 {
404         struct vfio_container *container;
405
406         /* Sanity check, is this really our fd? */
407         if (file->f_op != &vfio_fops)
408                 return NULL;
409
410         container = file->private_data;
411         WARN_ON(!container); /* fget ensures we don't race vfio_release */
412         return container;
413 }
414
415 static struct miscdevice vfio_dev = {
416         .minor = VFIO_MINOR,
417         .name = "vfio",
418         .fops = &vfio_fops,
419         .nodename = "vfio/vfio",
420         .mode = S_IRUGO | S_IWUGO,
421 };
422
423 int vfio_container_attach_group(struct vfio_container *container,
424                                 struct vfio_group *group)
425 {
426         struct vfio_iommu_driver *driver;
427         int ret = 0;
428
429         lockdep_assert_held(&group->group_lock);
430
431         if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
432                 return -EPERM;
433
434         down_write(&container->group_lock);
435
436         /* Real groups and fake groups cannot mix */
437         if (!list_empty(&container->group_list) &&
438             container->noiommu != (group->type == VFIO_NO_IOMMU)) {
439                 ret = -EPERM;
440                 goto out_unlock_container;
441         }
442
443         if (group->type == VFIO_IOMMU) {
444                 ret = iommu_group_claim_dma_owner(group->iommu_group, group);
445                 if (ret)
446                         goto out_unlock_container;
447         }
448
449         driver = container->iommu_driver;
450         if (driver) {
451                 ret = driver->ops->attach_group(container->iommu_data,
452                                                 group->iommu_group,
453                                                 group->type);
454                 if (ret) {
455                         if (group->type == VFIO_IOMMU)
456                                 iommu_group_release_dma_owner(
457                                         group->iommu_group);
458                         goto out_unlock_container;
459                 }
460         }
461
462         group->container = container;
463         group->container_users = 1;
464         container->noiommu = (group->type == VFIO_NO_IOMMU);
465         list_add(&group->container_next, &container->group_list);
466
467         /* Get a reference on the container and mark a user within the group */
468         vfio_container_get(container);
469
470 out_unlock_container:
471         up_write(&container->group_lock);
472         return ret;
473 }
474
475 void vfio_group_detach_container(struct vfio_group *group)
476 {
477         struct vfio_container *container = group->container;
478         struct vfio_iommu_driver *driver;
479
480         lockdep_assert_held(&group->group_lock);
481         WARN_ON(group->container_users != 1);
482
483         down_write(&container->group_lock);
484
485         driver = container->iommu_driver;
486         if (driver)
487                 driver->ops->detach_group(container->iommu_data,
488                                           group->iommu_group);
489
490         if (group->type == VFIO_IOMMU)
491                 iommu_group_release_dma_owner(group->iommu_group);
492
493         group->container = NULL;
494         group->container_users = 0;
495         list_del(&group->container_next);
496
497         /* Detaching the last group deprivileges a container, remove iommu */
498         if (driver && list_empty(&container->group_list)) {
499                 driver->ops->release(container->iommu_data);
500                 module_put(driver->ops->owner);
501                 container->iommu_driver = NULL;
502                 container->iommu_data = NULL;
503         }
504
505         up_write(&container->group_lock);
506
507         vfio_container_put(container);
508 }
509
510 int vfio_group_use_container(struct vfio_group *group)
511 {
512         lockdep_assert_held(&group->group_lock);
513
514         /*
515          * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
516          * VFIO_SET_IOMMU hasn't been done yet.
517          */
518         if (!group->container->iommu_driver)
519                 return -EINVAL;
520
521         if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
522                 return -EPERM;
523
524         get_file(group->opened_file);
525         group->container_users++;
526         return 0;
527 }
528
529 void vfio_group_unuse_container(struct vfio_group *group)
530 {
531         lockdep_assert_held(&group->group_lock);
532
533         WARN_ON(group->container_users <= 1);
534         group->container_users--;
535         fput(group->opened_file);
536 }
537
538 int vfio_device_container_pin_pages(struct vfio_device *device,
539                                     dma_addr_t iova, int npage,
540                                     int prot, struct page **pages)
541 {
542         struct vfio_container *container = device->group->container;
543         struct iommu_group *iommu_group = device->group->iommu_group;
544         struct vfio_iommu_driver *driver = container->iommu_driver;
545
546         if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
547                 return -E2BIG;
548
549         if (unlikely(!driver || !driver->ops->pin_pages))
550                 return -ENOTTY;
551         return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
552                                       npage, prot, pages);
553 }
554
555 void vfio_device_container_unpin_pages(struct vfio_device *device,
556                                        dma_addr_t iova, int npage)
557 {
558         struct vfio_container *container = device->group->container;
559
560         if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
561                 return;
562
563         container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
564                                                   npage);
565 }
566
567 int vfio_device_container_dma_rw(struct vfio_device *device,
568                                  dma_addr_t iova, void *data,
569                                  size_t len, bool write)
570 {
571         struct vfio_container *container = device->group->container;
572         struct vfio_iommu_driver *driver = container->iommu_driver;
573
574         if (unlikely(!driver || !driver->ops->dma_rw))
575                 return -ENOTTY;
576         return driver->ops->dma_rw(container->iommu_data, iova, data, len,
577                                    write);
578 }
579
580 int __init vfio_container_init(void)
581 {
582         int ret;
583
584         mutex_init(&vfio.iommu_drivers_lock);
585         INIT_LIST_HEAD(&vfio.iommu_drivers_list);
586
587         ret = misc_register(&vfio_dev);
588         if (ret) {
589                 pr_err("vfio: misc device register failed\n");
590                 return ret;
591         }
592
593         if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
594                 ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
595                 if (ret)
596                         goto err_misc;
597         }
598         return 0;
599
600 err_misc:
601         misc_deregister(&vfio_dev);
602         return ret;
603 }
604
605 void vfio_container_cleanup(void)
606 {
607         if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
608                 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
609         misc_deregister(&vfio_dev);
610         mutex_destroy(&vfio.iommu_drivers_lock);
611 }
612
613 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
614 MODULE_ALIAS("devname:vfio/vfio");