Merge tag 'i2c-for-6.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa...
[linux-2.6-microblaze.git] / drivers / vfio / container.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
4  *
5  * VFIO container (/dev/vfio/vfio)
6  */
7 #include <linux/file.h>
8 #include <linux/slab.h>
9 #include <linux/fs.h>
10 #include <linux/capability.h>
11 #include <linux/iommu.h>
12 #include <linux/miscdevice.h>
13 #include <linux/vfio.h>
14 #include <uapi/linux/vfio.h>
15
16 #include "vfio.h"
17
18 struct vfio_container {
19         struct kref                     kref;
20         struct list_head                group_list;
21         struct rw_semaphore             group_lock;
22         struct vfio_iommu_driver        *iommu_driver;
23         void                            *iommu_data;
24         bool                            noiommu;
25 };
26
27 static struct vfio {
28         struct list_head                iommu_drivers_list;
29         struct mutex                    iommu_drivers_lock;
30 } vfio;
31
32 static void *vfio_noiommu_open(unsigned long arg)
33 {
34         if (arg != VFIO_NOIOMMU_IOMMU)
35                 return ERR_PTR(-EINVAL);
36         if (!capable(CAP_SYS_RAWIO))
37                 return ERR_PTR(-EPERM);
38
39         return NULL;
40 }
41
42 static void vfio_noiommu_release(void *iommu_data)
43 {
44 }
45
46 static long vfio_noiommu_ioctl(void *iommu_data,
47                                unsigned int cmd, unsigned long arg)
48 {
49         if (cmd == VFIO_CHECK_EXTENSION)
50                 return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
51
52         return -ENOTTY;
53 }
54
55 static int vfio_noiommu_attach_group(void *iommu_data,
56                 struct iommu_group *iommu_group, enum vfio_group_type type)
57 {
58         return 0;
59 }
60
61 static void vfio_noiommu_detach_group(void *iommu_data,
62                                       struct iommu_group *iommu_group)
63 {
64 }
65
66 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
67         .name = "vfio-noiommu",
68         .owner = THIS_MODULE,
69         .open = vfio_noiommu_open,
70         .release = vfio_noiommu_release,
71         .ioctl = vfio_noiommu_ioctl,
72         .attach_group = vfio_noiommu_attach_group,
73         .detach_group = vfio_noiommu_detach_group,
74 };
75
76 /*
77  * Only noiommu containers can use vfio-noiommu and noiommu containers can only
78  * use vfio-noiommu.
79  */
80 static bool vfio_iommu_driver_allowed(struct vfio_container *container,
81                                       const struct vfio_iommu_driver *driver)
82 {
83         if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
84                 return true;
85         return container->noiommu == (driver->ops == &vfio_noiommu_ops);
86 }
87
88 /*
89  * IOMMU driver registration
90  */
91 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
92 {
93         struct vfio_iommu_driver *driver, *tmp;
94
95         if (WARN_ON(!ops->register_device != !ops->unregister_device))
96                 return -EINVAL;
97
98         driver = kzalloc(sizeof(*driver), GFP_KERNEL);
99         if (!driver)
100                 return -ENOMEM;
101
102         driver->ops = ops;
103
104         mutex_lock(&vfio.iommu_drivers_lock);
105
106         /* Check for duplicates */
107         list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
108                 if (tmp->ops == ops) {
109                         mutex_unlock(&vfio.iommu_drivers_lock);
110                         kfree(driver);
111                         return -EINVAL;
112                 }
113         }
114
115         list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
116
117         mutex_unlock(&vfio.iommu_drivers_lock);
118
119         return 0;
120 }
121 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
122
123 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
124 {
125         struct vfio_iommu_driver *driver;
126
127         mutex_lock(&vfio.iommu_drivers_lock);
128         list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
129                 if (driver->ops == ops) {
130                         list_del(&driver->vfio_next);
131                         mutex_unlock(&vfio.iommu_drivers_lock);
132                         kfree(driver);
133                         return;
134                 }
135         }
136         mutex_unlock(&vfio.iommu_drivers_lock);
137 }
138 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
139
140 /*
141  * Container objects - containers are created when /dev/vfio/vfio is
142  * opened, but their lifecycle extends until the last user is done, so
143  * it's freed via kref.  Must support container/group/device being
144  * closed in any order.
145  */
146 static void vfio_container_release(struct kref *kref)
147 {
148         struct vfio_container *container;
149         container = container_of(kref, struct vfio_container, kref);
150
151         kfree(container);
152 }
153
154 static void vfio_container_get(struct vfio_container *container)
155 {
156         kref_get(&container->kref);
157 }
158
159 static void vfio_container_put(struct vfio_container *container)
160 {
161         kref_put(&container->kref, vfio_container_release);
162 }
163
164 void vfio_device_container_register(struct vfio_device *device)
165 {
166         struct vfio_iommu_driver *iommu_driver =
167                 device->group->container->iommu_driver;
168
169         if (iommu_driver && iommu_driver->ops->register_device)
170                 iommu_driver->ops->register_device(
171                         device->group->container->iommu_data, device);
172 }
173
174 void vfio_device_container_unregister(struct vfio_device *device)
175 {
176         struct vfio_iommu_driver *iommu_driver =
177                 device->group->container->iommu_driver;
178
179         if (iommu_driver && iommu_driver->ops->unregister_device)
180                 iommu_driver->ops->unregister_device(
181                         device->group->container->iommu_data, device);
182 }
183
184 static long
185 vfio_container_ioctl_check_extension(struct vfio_container *container,
186                                      unsigned long arg)
187 {
188         struct vfio_iommu_driver *driver;
189         long ret = 0;
190
191         down_read(&container->group_lock);
192
193         driver = container->iommu_driver;
194
195         switch (arg) {
196                 /* No base extensions yet */
197         default:
198                 /*
199                  * If no driver is set, poll all registered drivers for
200                  * extensions and return the first positive result.  If
201                  * a driver is already set, further queries will be passed
202                  * only to that driver.
203                  */
204                 if (!driver) {
205                         mutex_lock(&vfio.iommu_drivers_lock);
206                         list_for_each_entry(driver, &vfio.iommu_drivers_list,
207                                             vfio_next) {
208
209                                 if (!list_empty(&container->group_list) &&
210                                     !vfio_iommu_driver_allowed(container,
211                                                                driver))
212                                         continue;
213                                 if (!try_module_get(driver->ops->owner))
214                                         continue;
215
216                                 ret = driver->ops->ioctl(NULL,
217                                                          VFIO_CHECK_EXTENSION,
218                                                          arg);
219                                 module_put(driver->ops->owner);
220                                 if (ret > 0)
221                                         break;
222                         }
223                         mutex_unlock(&vfio.iommu_drivers_lock);
224                 } else
225                         ret = driver->ops->ioctl(container->iommu_data,
226                                                  VFIO_CHECK_EXTENSION, arg);
227         }
228
229         up_read(&container->group_lock);
230
231         return ret;
232 }
233
234 /* hold write lock on container->group_lock */
235 static int __vfio_container_attach_groups(struct vfio_container *container,
236                                           struct vfio_iommu_driver *driver,
237                                           void *data)
238 {
239         struct vfio_group *group;
240         int ret = -ENODEV;
241
242         list_for_each_entry(group, &container->group_list, container_next) {
243                 ret = driver->ops->attach_group(data, group->iommu_group,
244                                                 group->type);
245                 if (ret)
246                         goto unwind;
247         }
248
249         return ret;
250
251 unwind:
252         list_for_each_entry_continue_reverse(group, &container->group_list,
253                                              container_next) {
254                 driver->ops->detach_group(data, group->iommu_group);
255         }
256
257         return ret;
258 }
259
260 static long vfio_ioctl_set_iommu(struct vfio_container *container,
261                                  unsigned long arg)
262 {
263         struct vfio_iommu_driver *driver;
264         long ret = -ENODEV;
265
266         down_write(&container->group_lock);
267
268         /*
269          * The container is designed to be an unprivileged interface while
270          * the group can be assigned to specific users.  Therefore, only by
271          * adding a group to a container does the user get the privilege of
272          * enabling the iommu, which may allocate finite resources.  There
273          * is no unset_iommu, but by removing all the groups from a container,
274          * the container is deprivileged and returns to an unset state.
275          */
276         if (list_empty(&container->group_list) || container->iommu_driver) {
277                 up_write(&container->group_lock);
278                 return -EINVAL;
279         }
280
281         mutex_lock(&vfio.iommu_drivers_lock);
282         list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
283                 void *data;
284
285                 if (!vfio_iommu_driver_allowed(container, driver))
286                         continue;
287                 if (!try_module_get(driver->ops->owner))
288                         continue;
289
290                 /*
291                  * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
292                  * so test which iommu driver reported support for this
293                  * extension and call open on them.  We also pass them the
294                  * magic, allowing a single driver to support multiple
295                  * interfaces if they'd like.
296                  */
297                 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
298                         module_put(driver->ops->owner);
299                         continue;
300                 }
301
302                 data = driver->ops->open(arg);
303                 if (IS_ERR(data)) {
304                         ret = PTR_ERR(data);
305                         module_put(driver->ops->owner);
306                         continue;
307                 }
308
309                 ret = __vfio_container_attach_groups(container, driver, data);
310                 if (ret) {
311                         driver->ops->release(data);
312                         module_put(driver->ops->owner);
313                         continue;
314                 }
315
316                 container->iommu_driver = driver;
317                 container->iommu_data = data;
318                 break;
319         }
320
321         mutex_unlock(&vfio.iommu_drivers_lock);
322         up_write(&container->group_lock);
323
324         return ret;
325 }
326
327 static long vfio_fops_unl_ioctl(struct file *filep,
328                                 unsigned int cmd, unsigned long arg)
329 {
330         struct vfio_container *container = filep->private_data;
331         struct vfio_iommu_driver *driver;
332         void *data;
333         long ret = -EINVAL;
334
335         if (!container)
336                 return ret;
337
338         switch (cmd) {
339         case VFIO_GET_API_VERSION:
340                 ret = VFIO_API_VERSION;
341                 break;
342         case VFIO_CHECK_EXTENSION:
343                 ret = vfio_container_ioctl_check_extension(container, arg);
344                 break;
345         case VFIO_SET_IOMMU:
346                 ret = vfio_ioctl_set_iommu(container, arg);
347                 break;
348         default:
349                 driver = container->iommu_driver;
350                 data = container->iommu_data;
351
352                 if (driver) /* passthrough all unrecognized ioctls */
353                         ret = driver->ops->ioctl(data, cmd, arg);
354         }
355
356         return ret;
357 }
358
359 static int vfio_fops_open(struct inode *inode, struct file *filep)
360 {
361         struct vfio_container *container;
362
363         container = kzalloc(sizeof(*container), GFP_KERNEL);
364         if (!container)
365                 return -ENOMEM;
366
367         INIT_LIST_HEAD(&container->group_list);
368         init_rwsem(&container->group_lock);
369         kref_init(&container->kref);
370
371         filep->private_data = container;
372
373         return 0;
374 }
375
376 static int vfio_fops_release(struct inode *inode, struct file *filep)
377 {
378         struct vfio_container *container = filep->private_data;
379         struct vfio_iommu_driver *driver = container->iommu_driver;
380
381         if (driver && driver->ops->notify)
382                 driver->ops->notify(container->iommu_data,
383                                     VFIO_IOMMU_CONTAINER_CLOSE);
384
385         filep->private_data = NULL;
386
387         vfio_container_put(container);
388
389         return 0;
390 }
391
392 static const struct file_operations vfio_fops = {
393         .owner          = THIS_MODULE,
394         .open           = vfio_fops_open,
395         .release        = vfio_fops_release,
396         .unlocked_ioctl = vfio_fops_unl_ioctl,
397         .compat_ioctl   = compat_ptr_ioctl,
398 };
399
400 struct vfio_container *vfio_container_from_file(struct file *file)
401 {
402         struct vfio_container *container;
403
404         /* Sanity check, is this really our fd? */
405         if (file->f_op != &vfio_fops)
406                 return NULL;
407
408         container = file->private_data;
409         WARN_ON(!container); /* fget ensures we don't race vfio_release */
410         return container;
411 }
412
413 static struct miscdevice vfio_dev = {
414         .minor = VFIO_MINOR,
415         .name = "vfio",
416         .fops = &vfio_fops,
417         .nodename = "vfio/vfio",
418         .mode = S_IRUGO | S_IWUGO,
419 };
420
421 int vfio_container_attach_group(struct vfio_container *container,
422                                 struct vfio_group *group)
423 {
424         struct vfio_iommu_driver *driver;
425         int ret = 0;
426
427         lockdep_assert_held(&group->group_lock);
428
429         if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
430                 return -EPERM;
431
432         down_write(&container->group_lock);
433
434         /* Real groups and fake groups cannot mix */
435         if (!list_empty(&container->group_list) &&
436             container->noiommu != (group->type == VFIO_NO_IOMMU)) {
437                 ret = -EPERM;
438                 goto out_unlock_container;
439         }
440
441         if (group->type == VFIO_IOMMU) {
442                 ret = iommu_group_claim_dma_owner(group->iommu_group, group);
443                 if (ret)
444                         goto out_unlock_container;
445         }
446
447         driver = container->iommu_driver;
448         if (driver) {
449                 ret = driver->ops->attach_group(container->iommu_data,
450                                                 group->iommu_group,
451                                                 group->type);
452                 if (ret) {
453                         if (group->type == VFIO_IOMMU)
454                                 iommu_group_release_dma_owner(
455                                         group->iommu_group);
456                         goto out_unlock_container;
457                 }
458         }
459
460         group->container = container;
461         group->container_users = 1;
462         container->noiommu = (group->type == VFIO_NO_IOMMU);
463         list_add(&group->container_next, &container->group_list);
464
465         /* Get a reference on the container and mark a user within the group */
466         vfio_container_get(container);
467
468 out_unlock_container:
469         up_write(&container->group_lock);
470         return ret;
471 }
472
473 void vfio_group_detach_container(struct vfio_group *group)
474 {
475         struct vfio_container *container = group->container;
476         struct vfio_iommu_driver *driver;
477
478         lockdep_assert_held(&group->group_lock);
479         WARN_ON(group->container_users != 1);
480
481         down_write(&container->group_lock);
482
483         driver = container->iommu_driver;
484         if (driver)
485                 driver->ops->detach_group(container->iommu_data,
486                                           group->iommu_group);
487
488         if (group->type == VFIO_IOMMU)
489                 iommu_group_release_dma_owner(group->iommu_group);
490
491         group->container = NULL;
492         group->container_users = 0;
493         list_del(&group->container_next);
494
495         /* Detaching the last group deprivileges a container, remove iommu */
496         if (driver && list_empty(&container->group_list)) {
497                 driver->ops->release(container->iommu_data);
498                 module_put(driver->ops->owner);
499                 container->iommu_driver = NULL;
500                 container->iommu_data = NULL;
501         }
502
503         up_write(&container->group_lock);
504
505         vfio_container_put(container);
506 }
507
508 int vfio_group_use_container(struct vfio_group *group)
509 {
510         lockdep_assert_held(&group->group_lock);
511
512         /*
513          * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
514          * VFIO_SET_IOMMU hasn't been done yet.
515          */
516         if (!group->container->iommu_driver)
517                 return -EINVAL;
518
519         if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
520                 return -EPERM;
521
522         get_file(group->opened_file);
523         group->container_users++;
524         return 0;
525 }
526
527 void vfio_group_unuse_container(struct vfio_group *group)
528 {
529         lockdep_assert_held(&group->group_lock);
530
531         WARN_ON(group->container_users <= 1);
532         group->container_users--;
533         fput(group->opened_file);
534 }
535
536 int vfio_device_container_pin_pages(struct vfio_device *device,
537                                     dma_addr_t iova, int npage,
538                                     int prot, struct page **pages)
539 {
540         struct vfio_container *container = device->group->container;
541         struct iommu_group *iommu_group = device->group->iommu_group;
542         struct vfio_iommu_driver *driver = container->iommu_driver;
543
544         if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
545                 return -E2BIG;
546
547         if (unlikely(!driver || !driver->ops->pin_pages))
548                 return -ENOTTY;
549         return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
550                                       npage, prot, pages);
551 }
552
553 void vfio_device_container_unpin_pages(struct vfio_device *device,
554                                        dma_addr_t iova, int npage)
555 {
556         struct vfio_container *container = device->group->container;
557
558         if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
559                 return;
560
561         container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
562                                                   npage);
563 }
564
565 int vfio_device_container_dma_rw(struct vfio_device *device,
566                                  dma_addr_t iova, void *data,
567                                  size_t len, bool write)
568 {
569         struct vfio_container *container = device->group->container;
570         struct vfio_iommu_driver *driver = container->iommu_driver;
571
572         if (unlikely(!driver || !driver->ops->dma_rw))
573                 return -ENOTTY;
574         return driver->ops->dma_rw(container->iommu_data, iova, data, len,
575                                    write);
576 }
577
578 int __init vfio_container_init(void)
579 {
580         int ret;
581
582         mutex_init(&vfio.iommu_drivers_lock);
583         INIT_LIST_HEAD(&vfio.iommu_drivers_list);
584
585         ret = misc_register(&vfio_dev);
586         if (ret) {
587                 pr_err("vfio: misc device register failed\n");
588                 return ret;
589         }
590
591         if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
592                 ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
593                 if (ret)
594                         goto err_misc;
595         }
596         return 0;
597
598 err_misc:
599         misc_deregister(&vfio_dev);
600         return ret;
601 }
602
603 void vfio_container_cleanup(void)
604 {
605         if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
606                 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
607         misc_deregister(&vfio_dev);
608         mutex_destroy(&vfio.iommu_drivers_lock);
609 }
610
611 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
612 MODULE_ALIAS("devname:vfio/vfio");