drm/i915/gvt: Move mdev attribute groups into kvmgt module
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gvt / kvmgt.c
1 /*
2  * KVMGT - the implementation of Intel mediated pass-through framework for KVM
3  *
4  * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Kevin Tian <kevin.tian@intel.com>
27  *    Jike Song <jike.song@intel.com>
28  *    Xiaoguang Chen <xiaoguang.chen@intel.com>
29  */
30
31 #include <linux/init.h>
32 #include <linux/device.h>
33 #include <linux/mm.h>
34 #include <linux/kthread.h>
35 #include <linux/sched/mm.h>
36 #include <linux/types.h>
37 #include <linux/list.h>
38 #include <linux/rbtree.h>
39 #include <linux/spinlock.h>
40 #include <linux/eventfd.h>
41 #include <linux/uuid.h>
42 #include <linux/kvm_host.h>
43 #include <linux/vfio.h>
44 #include <linux/mdev.h>
45 #include <linux/debugfs.h>
46
47 #include <linux/nospec.h>
48
49 #include "i915_drv.h"
50 #include "gvt.h"
51
52 static const struct intel_gvt_ops *intel_gvt_ops;
53
54 /* helper macros copied from vfio-pci */
55 #define VFIO_PCI_OFFSET_SHIFT   40
56 #define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
57 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
58 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
59
60 #define EDID_BLOB_OFFSET (PAGE_SIZE/2)
61
62 #define OPREGION_SIGNATURE "IntelGraphicsMem"
63
64 struct vfio_region;
65 struct intel_vgpu_regops {
66         size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
67                         size_t count, loff_t *ppos, bool iswrite);
68         void (*release)(struct intel_vgpu *vgpu,
69                         struct vfio_region *region);
70 };
71
72 struct vfio_region {
73         u32                             type;
74         u32                             subtype;
75         size_t                          size;
76         u32                             flags;
77         const struct intel_vgpu_regops  *ops;
78         void                            *data;
79 };
80
81 struct vfio_edid_region {
82         struct vfio_region_gfx_edid vfio_edid_regs;
83         void *edid_blob;
84 };
85
86 struct kvmgt_pgfn {
87         gfn_t gfn;
88         struct hlist_node hnode;
89 };
90
91 struct kvmgt_guest_info {
92         struct kvm *kvm;
93         struct intel_vgpu *vgpu;
94         struct kvm_page_track_notifier_node track_node;
95 #define NR_BKT (1 << 18)
96         struct hlist_head ptable[NR_BKT];
97 #undef NR_BKT
98         struct dentry *debugfs_cache_entries;
99 };
100
101 struct gvt_dma {
102         struct intel_vgpu *vgpu;
103         struct rb_node gfn_node;
104         struct rb_node dma_addr_node;
105         gfn_t gfn;
106         dma_addr_t dma_addr;
107         unsigned long size;
108         struct kref ref;
109 };
110
111 struct kvmgt_vdev {
112         struct intel_vgpu *vgpu;
113         struct mdev_device *mdev;
114         struct vfio_region *region;
115         int num_regions;
116         struct eventfd_ctx *intx_trigger;
117         struct eventfd_ctx *msi_trigger;
118
119         /*
120          * Two caches are used to avoid mapping duplicated pages (eg.
121          * scratch pages). This help to reduce dma setup overhead.
122          */
123         struct rb_root gfn_cache;
124         struct rb_root dma_addr_cache;
125         unsigned long nr_cache_entries;
126         struct mutex cache_lock;
127
128         struct notifier_block iommu_notifier;
129         struct notifier_block group_notifier;
130         struct kvm *kvm;
131         struct work_struct release_work;
132         atomic_t released;
133         struct vfio_device *vfio_device;
134         struct vfio_group *vfio_group;
135 };
136
137 static inline struct kvmgt_vdev *kvmgt_vdev(struct intel_vgpu *vgpu)
138 {
139         return intel_vgpu_vdev(vgpu);
140 }
141
142 static inline bool handle_valid(unsigned long handle)
143 {
144         return !!(handle & ~0xff);
145 }
146
147 static ssize_t available_instances_show(struct mdev_type *mtype,
148                                         struct mdev_type_attribute *attr,
149                                         char *buf)
150 {
151         struct intel_vgpu_type *type;
152         unsigned int num = 0;
153         struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
154
155         type = &gvt->types[mtype_get_type_group_id(mtype)];
156         if (!type)
157                 num = 0;
158         else
159                 num = type->avail_instance;
160
161         return sprintf(buf, "%u\n", num);
162 }
163
164 static ssize_t device_api_show(struct mdev_type *mtype,
165                                struct mdev_type_attribute *attr, char *buf)
166 {
167         return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
168 }
169
170 static ssize_t description_show(struct mdev_type *mtype,
171                                 struct mdev_type_attribute *attr, char *buf)
172 {
173         struct intel_vgpu_type *type;
174         struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
175
176         type = &gvt->types[mtype_get_type_group_id(mtype)];
177         if (!type)
178                 return 0;
179
180         return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
181                        "fence: %d\nresolution: %s\n"
182                        "weight: %d\n",
183                        BYTES_TO_MB(type->low_gm_size),
184                        BYTES_TO_MB(type->high_gm_size),
185                        type->fence, vgpu_edid_str(type->resolution),
186                        type->weight);
187 }
188
189 static MDEV_TYPE_ATTR_RO(available_instances);
190 static MDEV_TYPE_ATTR_RO(device_api);
191 static MDEV_TYPE_ATTR_RO(description);
192
193 static struct attribute *gvt_type_attrs[] = {
194         &mdev_type_attr_available_instances.attr,
195         &mdev_type_attr_device_api.attr,
196         &mdev_type_attr_description.attr,
197         NULL,
198 };
199
200 static struct attribute_group *gvt_vgpu_type_groups[] = {
201         [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
202 };
203
204 static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
205 {
206         int i, j;
207         struct intel_vgpu_type *type;
208         struct attribute_group *group;
209
210         for (i = 0; i < gvt->num_types; i++) {
211                 type = &gvt->types[i];
212
213                 group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
214                 if (!group)
215                         goto unwind;
216
217                 group->name = type->name;
218                 group->attrs = gvt_type_attrs;
219                 gvt_vgpu_type_groups[i] = group;
220         }
221
222         return 0;
223
224 unwind:
225         for (j = 0; j < i; j++) {
226                 group = gvt_vgpu_type_groups[j];
227                 kfree(group);
228         }
229
230         return -ENOMEM;
231 }
232
233 static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
234 {
235         int i;
236         struct attribute_group *group;
237
238         for (i = 0; i < gvt->num_types; i++) {
239                 group = gvt_vgpu_type_groups[i];
240                 gvt_vgpu_type_groups[i] = NULL;
241                 kfree(group);
242         }
243 }
244
245 static int kvmgt_guest_init(struct mdev_device *mdev);
246 static void intel_vgpu_release_work(struct work_struct *work);
247 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
248
249 static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
250                 unsigned long size)
251 {
252         struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
253         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
254         int total_pages;
255         int npage;
256         int ret;
257
258         total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
259
260         for (npage = 0; npage < total_pages; npage++) {
261                 unsigned long cur_gfn = gfn + npage;
262
263                 ret = vfio_group_unpin_pages(vdev->vfio_group, &cur_gfn, 1);
264                 drm_WARN_ON(&i915->drm, ret != 1);
265         }
266 }
267
268 /* Pin a normal or compound guest page for dma. */
269 static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
270                 unsigned long size, struct page **page)
271 {
272         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
273         unsigned long base_pfn = 0;
274         int total_pages;
275         int npage;
276         int ret;
277
278         total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
279         /*
280          * We pin the pages one-by-one to avoid allocating a big arrary
281          * on stack to hold pfns.
282          */
283         for (npage = 0; npage < total_pages; npage++) {
284                 unsigned long cur_gfn = gfn + npage;
285                 unsigned long pfn;
286
287                 ret = vfio_group_pin_pages(vdev->vfio_group, &cur_gfn, 1,
288                                            IOMMU_READ | IOMMU_WRITE, &pfn);
289                 if (ret != 1) {
290                         gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
291                                      cur_gfn, ret);
292                         goto err;
293                 }
294
295                 if (!pfn_valid(pfn)) {
296                         gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn);
297                         npage++;
298                         ret = -EFAULT;
299                         goto err;
300                 }
301
302                 if (npage == 0)
303                         base_pfn = pfn;
304                 else if (base_pfn + npage != pfn) {
305                         gvt_vgpu_err("The pages are not continuous\n");
306                         ret = -EINVAL;
307                         npage++;
308                         goto err;
309                 }
310         }
311
312         *page = pfn_to_page(base_pfn);
313         return 0;
314 err:
315         gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
316         return ret;
317 }
318
319 static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
320                 dma_addr_t *dma_addr, unsigned long size)
321 {
322         struct device *dev = vgpu->gvt->gt->i915->drm.dev;
323         struct page *page = NULL;
324         int ret;
325
326         ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
327         if (ret)
328                 return ret;
329
330         /* Setup DMA mapping. */
331         *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL);
332         if (dma_mapping_error(dev, *dma_addr)) {
333                 gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n",
334                              page_to_pfn(page), ret);
335                 gvt_unpin_guest_page(vgpu, gfn, size);
336                 return -ENOMEM;
337         }
338
339         return 0;
340 }
341
342 static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
343                 dma_addr_t dma_addr, unsigned long size)
344 {
345         struct device *dev = vgpu->gvt->gt->i915->drm.dev;
346
347         dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
348         gvt_unpin_guest_page(vgpu, gfn, size);
349 }
350
351 static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
352                 dma_addr_t dma_addr)
353 {
354         struct rb_node *node = kvmgt_vdev(vgpu)->dma_addr_cache.rb_node;
355         struct gvt_dma *itr;
356
357         while (node) {
358                 itr = rb_entry(node, struct gvt_dma, dma_addr_node);
359
360                 if (dma_addr < itr->dma_addr)
361                         node = node->rb_left;
362                 else if (dma_addr > itr->dma_addr)
363                         node = node->rb_right;
364                 else
365                         return itr;
366         }
367         return NULL;
368 }
369
370 static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
371 {
372         struct rb_node *node = kvmgt_vdev(vgpu)->gfn_cache.rb_node;
373         struct gvt_dma *itr;
374
375         while (node) {
376                 itr = rb_entry(node, struct gvt_dma, gfn_node);
377
378                 if (gfn < itr->gfn)
379                         node = node->rb_left;
380                 else if (gfn > itr->gfn)
381                         node = node->rb_right;
382                 else
383                         return itr;
384         }
385         return NULL;
386 }
387
388 static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
389                 dma_addr_t dma_addr, unsigned long size)
390 {
391         struct gvt_dma *new, *itr;
392         struct rb_node **link, *parent = NULL;
393         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
394
395         new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
396         if (!new)
397                 return -ENOMEM;
398
399         new->vgpu = vgpu;
400         new->gfn = gfn;
401         new->dma_addr = dma_addr;
402         new->size = size;
403         kref_init(&new->ref);
404
405         /* gfn_cache maps gfn to struct gvt_dma. */
406         link = &vdev->gfn_cache.rb_node;
407         while (*link) {
408                 parent = *link;
409                 itr = rb_entry(parent, struct gvt_dma, gfn_node);
410
411                 if (gfn < itr->gfn)
412                         link = &parent->rb_left;
413                 else
414                         link = &parent->rb_right;
415         }
416         rb_link_node(&new->gfn_node, parent, link);
417         rb_insert_color(&new->gfn_node, &vdev->gfn_cache);
418
419         /* dma_addr_cache maps dma addr to struct gvt_dma. */
420         parent = NULL;
421         link = &vdev->dma_addr_cache.rb_node;
422         while (*link) {
423                 parent = *link;
424                 itr = rb_entry(parent, struct gvt_dma, dma_addr_node);
425
426                 if (dma_addr < itr->dma_addr)
427                         link = &parent->rb_left;
428                 else
429                         link = &parent->rb_right;
430         }
431         rb_link_node(&new->dma_addr_node, parent, link);
432         rb_insert_color(&new->dma_addr_node, &vdev->dma_addr_cache);
433
434         vdev->nr_cache_entries++;
435         return 0;
436 }
437
438 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
439                                 struct gvt_dma *entry)
440 {
441         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
442
443         rb_erase(&entry->gfn_node, &vdev->gfn_cache);
444         rb_erase(&entry->dma_addr_node, &vdev->dma_addr_cache);
445         kfree(entry);
446         vdev->nr_cache_entries--;
447 }
448
449 static void gvt_cache_destroy(struct intel_vgpu *vgpu)
450 {
451         struct gvt_dma *dma;
452         struct rb_node *node = NULL;
453         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
454
455         for (;;) {
456                 mutex_lock(&vdev->cache_lock);
457                 node = rb_first(&vdev->gfn_cache);
458                 if (!node) {
459                         mutex_unlock(&vdev->cache_lock);
460                         break;
461                 }
462                 dma = rb_entry(node, struct gvt_dma, gfn_node);
463                 gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size);
464                 __gvt_cache_remove_entry(vgpu, dma);
465                 mutex_unlock(&vdev->cache_lock);
466         }
467 }
468
469 static void gvt_cache_init(struct intel_vgpu *vgpu)
470 {
471         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
472
473         vdev->gfn_cache = RB_ROOT;
474         vdev->dma_addr_cache = RB_ROOT;
475         vdev->nr_cache_entries = 0;
476         mutex_init(&vdev->cache_lock);
477 }
478
479 static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
480 {
481         hash_init(info->ptable);
482 }
483
484 static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
485 {
486         struct kvmgt_pgfn *p;
487         struct hlist_node *tmp;
488         int i;
489
490         hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
491                 hash_del(&p->hnode);
492                 kfree(p);
493         }
494 }
495
496 static struct kvmgt_pgfn *
497 __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
498 {
499         struct kvmgt_pgfn *p, *res = NULL;
500
501         hash_for_each_possible(info->ptable, p, hnode, gfn) {
502                 if (gfn == p->gfn) {
503                         res = p;
504                         break;
505                 }
506         }
507
508         return res;
509 }
510
511 static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
512                                 gfn_t gfn)
513 {
514         struct kvmgt_pgfn *p;
515
516         p = __kvmgt_protect_table_find(info, gfn);
517         return !!p;
518 }
519
520 static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
521 {
522         struct kvmgt_pgfn *p;
523
524         if (kvmgt_gfn_is_write_protected(info, gfn))
525                 return;
526
527         p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
528         if (WARN(!p, "gfn: 0x%llx\n", gfn))
529                 return;
530
531         p->gfn = gfn;
532         hash_add(info->ptable, &p->hnode, gfn);
533 }
534
535 static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
536                                 gfn_t gfn)
537 {
538         struct kvmgt_pgfn *p;
539
540         p = __kvmgt_protect_table_find(info, gfn);
541         if (p) {
542                 hash_del(&p->hnode);
543                 kfree(p);
544         }
545 }
546
547 static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
548                 size_t count, loff_t *ppos, bool iswrite)
549 {
550         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
551         unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
552                         VFIO_PCI_NUM_REGIONS;
553         void *base = vdev->region[i].data;
554         loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
555
556
557         if (pos >= vdev->region[i].size || iswrite) {
558                 gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
559                 return -EINVAL;
560         }
561         count = min(count, (size_t)(vdev->region[i].size - pos));
562         memcpy(buf, base + pos, count);
563
564         return count;
565 }
566
567 static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
568                 struct vfio_region *region)
569 {
570 }
571
572 static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
573         .rw = intel_vgpu_reg_rw_opregion,
574         .release = intel_vgpu_reg_release_opregion,
575 };
576
577 static int handle_edid_regs(struct intel_vgpu *vgpu,
578                         struct vfio_edid_region *region, char *buf,
579                         size_t count, u16 offset, bool is_write)
580 {
581         struct vfio_region_gfx_edid *regs = &region->vfio_edid_regs;
582         unsigned int data;
583
584         if (offset + count > sizeof(*regs))
585                 return -EINVAL;
586
587         if (count != 4)
588                 return -EINVAL;
589
590         if (is_write) {
591                 data = *((unsigned int *)buf);
592                 switch (offset) {
593                 case offsetof(struct vfio_region_gfx_edid, link_state):
594                         if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) {
595                                 if (!drm_edid_block_valid(
596                                         (u8 *)region->edid_blob,
597                                         0,
598                                         true,
599                                         NULL)) {
600                                         gvt_vgpu_err("invalid EDID blob\n");
601                                         return -EINVAL;
602                                 }
603                                 intel_gvt_ops->emulate_hotplug(vgpu, true);
604                         } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN)
605                                 intel_gvt_ops->emulate_hotplug(vgpu, false);
606                         else {
607                                 gvt_vgpu_err("invalid EDID link state %d\n",
608                                         regs->link_state);
609                                 return -EINVAL;
610                         }
611                         regs->link_state = data;
612                         break;
613                 case offsetof(struct vfio_region_gfx_edid, edid_size):
614                         if (data > regs->edid_max_size) {
615                                 gvt_vgpu_err("EDID size is bigger than %d!\n",
616                                         regs->edid_max_size);
617                                 return -EINVAL;
618                         }
619                         regs->edid_size = data;
620                         break;
621                 default:
622                         /* read-only regs */
623                         gvt_vgpu_err("write read-only EDID region at offset %d\n",
624                                 offset);
625                         return -EPERM;
626                 }
627         } else {
628                 memcpy(buf, (char *)regs + offset, count);
629         }
630
631         return count;
632 }
633
634 static int handle_edid_blob(struct vfio_edid_region *region, char *buf,
635                         size_t count, u16 offset, bool is_write)
636 {
637         if (offset + count > region->vfio_edid_regs.edid_size)
638                 return -EINVAL;
639
640         if (is_write)
641                 memcpy(region->edid_blob + offset, buf, count);
642         else
643                 memcpy(buf, region->edid_blob + offset, count);
644
645         return count;
646 }
647
648 static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf,
649                 size_t count, loff_t *ppos, bool iswrite)
650 {
651         int ret;
652         unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
653                         VFIO_PCI_NUM_REGIONS;
654         struct vfio_edid_region *region =
655                 (struct vfio_edid_region *)kvmgt_vdev(vgpu)->region[i].data;
656         loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
657
658         if (pos < region->vfio_edid_regs.edid_offset) {
659                 ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite);
660         } else {
661                 pos -= EDID_BLOB_OFFSET;
662                 ret = handle_edid_blob(region, buf, count, pos, iswrite);
663         }
664
665         if (ret < 0)
666                 gvt_vgpu_err("failed to access EDID region\n");
667
668         return ret;
669 }
670
671 static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu,
672                                         struct vfio_region *region)
673 {
674         kfree(region->data);
675 }
676
677 static const struct intel_vgpu_regops intel_vgpu_regops_edid = {
678         .rw = intel_vgpu_reg_rw_edid,
679         .release = intel_vgpu_reg_release_edid,
680 };
681
682 static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
683                 unsigned int type, unsigned int subtype,
684                 const struct intel_vgpu_regops *ops,
685                 size_t size, u32 flags, void *data)
686 {
687         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
688         struct vfio_region *region;
689
690         region = krealloc(vdev->region,
691                         (vdev->num_regions + 1) * sizeof(*region),
692                         GFP_KERNEL);
693         if (!region)
694                 return -ENOMEM;
695
696         vdev->region = region;
697         vdev->region[vdev->num_regions].type = type;
698         vdev->region[vdev->num_regions].subtype = subtype;
699         vdev->region[vdev->num_regions].ops = ops;
700         vdev->region[vdev->num_regions].size = size;
701         vdev->region[vdev->num_regions].flags = flags;
702         vdev->region[vdev->num_regions].data = data;
703         vdev->num_regions++;
704         return 0;
705 }
706
707 static int kvmgt_get_vfio_device(void *p_vgpu)
708 {
709         struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
710         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
711
712         vdev->vfio_device = vfio_device_get_from_dev(
713                 mdev_dev(vdev->mdev));
714         if (!vdev->vfio_device) {
715                 gvt_vgpu_err("failed to get vfio device\n");
716                 return -ENODEV;
717         }
718         return 0;
719 }
720
721
722 static int kvmgt_set_opregion(void *p_vgpu)
723 {
724         struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
725         void *base;
726         int ret;
727
728         /* Each vgpu has its own opregion, although VFIO would create another
729          * one later. This one is used to expose opregion to VFIO. And the
730          * other one created by VFIO later, is used by guest actually.
731          */
732         base = vgpu_opregion(vgpu)->va;
733         if (!base)
734                 return -ENOMEM;
735
736         if (memcmp(base, OPREGION_SIGNATURE, 16)) {
737                 memunmap(base);
738                 return -EINVAL;
739         }
740
741         ret = intel_vgpu_register_reg(vgpu,
742                         PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
743                         VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
744                         &intel_vgpu_regops_opregion, OPREGION_SIZE,
745                         VFIO_REGION_INFO_FLAG_READ, base);
746
747         return ret;
748 }
749
750 static int kvmgt_set_edid(void *p_vgpu, int port_num)
751 {
752         struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
753         struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
754         struct vfio_edid_region *base;
755         int ret;
756
757         base = kzalloc(sizeof(*base), GFP_KERNEL);
758         if (!base)
759                 return -ENOMEM;
760
761         /* TODO: Add multi-port and EDID extension block support */
762         base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET;
763         base->vfio_edid_regs.edid_max_size = EDID_SIZE;
764         base->vfio_edid_regs.edid_size = EDID_SIZE;
765         base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id);
766         base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id);
767         base->edid_blob = port->edid->edid_block;
768
769         ret = intel_vgpu_register_reg(vgpu,
770                         VFIO_REGION_TYPE_GFX,
771                         VFIO_REGION_SUBTYPE_GFX_EDID,
772                         &intel_vgpu_regops_edid, EDID_SIZE,
773                         VFIO_REGION_INFO_FLAG_READ |
774                         VFIO_REGION_INFO_FLAG_WRITE |
775                         VFIO_REGION_INFO_FLAG_CAPS, base);
776
777         return ret;
778 }
779
780 static void kvmgt_put_vfio_device(void *vgpu)
781 {
782         struct kvmgt_vdev *vdev = kvmgt_vdev((struct intel_vgpu *)vgpu);
783
784         if (WARN_ON(!vdev->vfio_device))
785                 return;
786
787         vfio_device_put(vdev->vfio_device);
788 }
789
790 static int intel_vgpu_create(struct mdev_device *mdev)
791 {
792         struct intel_vgpu *vgpu = NULL;
793         struct intel_vgpu_type *type;
794         struct device *pdev;
795         struct intel_gvt *gvt;
796         int ret;
797
798         pdev = mdev_parent_dev(mdev);
799         gvt = kdev_to_i915(pdev)->gvt;
800
801         type = &gvt->types[mdev_get_type_group_id(mdev)];
802         if (!type) {
803                 ret = -EINVAL;
804                 goto out;
805         }
806
807         vgpu = intel_gvt_ops->vgpu_create(gvt, type);
808         if (IS_ERR_OR_NULL(vgpu)) {
809                 ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
810                 gvt_err("failed to create intel vgpu: %d\n", ret);
811                 goto out;
812         }
813
814         INIT_WORK(&kvmgt_vdev(vgpu)->release_work, intel_vgpu_release_work);
815
816         kvmgt_vdev(vgpu)->mdev = mdev;
817         mdev_set_drvdata(mdev, vgpu);
818
819         gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
820                      dev_name(mdev_dev(mdev)));
821         ret = 0;
822
823 out:
824         return ret;
825 }
826
827 static int intel_vgpu_remove(struct mdev_device *mdev)
828 {
829         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
830
831         if (handle_valid(vgpu->handle))
832                 return -EBUSY;
833
834         intel_gvt_ops->vgpu_destroy(vgpu);
835         return 0;
836 }
837
838 static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
839                                      unsigned long action, void *data)
840 {
841         struct kvmgt_vdev *vdev = container_of(nb,
842                                                struct kvmgt_vdev,
843                                                iommu_notifier);
844         struct intel_vgpu *vgpu = vdev->vgpu;
845
846         if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
847                 struct vfio_iommu_type1_dma_unmap *unmap = data;
848                 struct gvt_dma *entry;
849                 unsigned long iov_pfn, end_iov_pfn;
850
851                 iov_pfn = unmap->iova >> PAGE_SHIFT;
852                 end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
853
854                 mutex_lock(&vdev->cache_lock);
855                 for (; iov_pfn < end_iov_pfn; iov_pfn++) {
856                         entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
857                         if (!entry)
858                                 continue;
859
860                         gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
861                                            entry->size);
862                         __gvt_cache_remove_entry(vgpu, entry);
863                 }
864                 mutex_unlock(&vdev->cache_lock);
865         }
866
867         return NOTIFY_OK;
868 }
869
870 static int intel_vgpu_group_notifier(struct notifier_block *nb,
871                                      unsigned long action, void *data)
872 {
873         struct kvmgt_vdev *vdev = container_of(nb,
874                                                struct kvmgt_vdev,
875                                                group_notifier);
876
877         /* the only action we care about */
878         if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
879                 vdev->kvm = data;
880
881                 if (!data)
882                         schedule_work(&vdev->release_work);
883         }
884
885         return NOTIFY_OK;
886 }
887
888 static int intel_vgpu_open(struct mdev_device *mdev)
889 {
890         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
891         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
892         unsigned long events;
893         int ret;
894         struct vfio_group *vfio_group;
895
896         vdev->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
897         vdev->group_notifier.notifier_call = intel_vgpu_group_notifier;
898
899         events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
900         ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
901                                 &vdev->iommu_notifier);
902         if (ret != 0) {
903                 gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
904                         ret);
905                 goto out;
906         }
907
908         events = VFIO_GROUP_NOTIFY_SET_KVM;
909         ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
910                                 &vdev->group_notifier);
911         if (ret != 0) {
912                 gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
913                         ret);
914                 goto undo_iommu;
915         }
916
917         vfio_group = vfio_group_get_external_user_from_dev(mdev_dev(mdev));
918         if (IS_ERR_OR_NULL(vfio_group)) {
919                 ret = !vfio_group ? -EFAULT : PTR_ERR(vfio_group);
920                 gvt_vgpu_err("vfio_group_get_external_user_from_dev failed\n");
921                 goto undo_register;
922         }
923         vdev->vfio_group = vfio_group;
924
925         /* Take a module reference as mdev core doesn't take
926          * a reference for vendor driver.
927          */
928         if (!try_module_get(THIS_MODULE)) {
929                 ret = -ENODEV;
930                 goto undo_group;
931         }
932
933         ret = kvmgt_guest_init(mdev);
934         if (ret)
935                 goto undo_group;
936
937         intel_gvt_ops->vgpu_activate(vgpu);
938
939         atomic_set(&vdev->released, 0);
940         return ret;
941
942 undo_group:
943         vfio_group_put_external_user(vdev->vfio_group);
944         vdev->vfio_group = NULL;
945
946 undo_register:
947         vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
948                                         &vdev->group_notifier);
949
950 undo_iommu:
951         vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
952                                         &vdev->iommu_notifier);
953 out:
954         return ret;
955 }
956
957 static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
958 {
959         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
960         struct eventfd_ctx *trigger;
961
962         trigger = vdev->msi_trigger;
963         if (trigger) {
964                 eventfd_ctx_put(trigger);
965                 vdev->msi_trigger = NULL;
966         }
967 }
968
969 static void __intel_vgpu_release(struct intel_vgpu *vgpu)
970 {
971         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
972         struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
973         struct kvmgt_guest_info *info;
974         int ret;
975
976         if (!handle_valid(vgpu->handle))
977                 return;
978
979         if (atomic_cmpxchg(&vdev->released, 0, 1))
980                 return;
981
982         intel_gvt_ops->vgpu_release(vgpu);
983
984         ret = vfio_unregister_notifier(mdev_dev(vdev->mdev), VFIO_IOMMU_NOTIFY,
985                                         &vdev->iommu_notifier);
986         drm_WARN(&i915->drm, ret,
987                  "vfio_unregister_notifier for iommu failed: %d\n", ret);
988
989         ret = vfio_unregister_notifier(mdev_dev(vdev->mdev), VFIO_GROUP_NOTIFY,
990                                         &vdev->group_notifier);
991         drm_WARN(&i915->drm, ret,
992                  "vfio_unregister_notifier for group failed: %d\n", ret);
993
994         /* dereference module reference taken at open */
995         module_put(THIS_MODULE);
996
997         info = (struct kvmgt_guest_info *)vgpu->handle;
998         kvmgt_guest_exit(info);
999
1000         intel_vgpu_release_msi_eventfd_ctx(vgpu);
1001         vfio_group_put_external_user(vdev->vfio_group);
1002
1003         vdev->kvm = NULL;
1004         vgpu->handle = 0;
1005 }
1006
1007 static void intel_vgpu_release(struct mdev_device *mdev)
1008 {
1009         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1010
1011         __intel_vgpu_release(vgpu);
1012 }
1013
1014 static void intel_vgpu_release_work(struct work_struct *work)
1015 {
1016         struct kvmgt_vdev *vdev = container_of(work, struct kvmgt_vdev,
1017                                                release_work);
1018
1019         __intel_vgpu_release(vdev->vgpu);
1020 }
1021
1022 static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
1023 {
1024         u32 start_lo, start_hi;
1025         u32 mem_type;
1026
1027         start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
1028                         PCI_BASE_ADDRESS_MEM_MASK;
1029         mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
1030                         PCI_BASE_ADDRESS_MEM_TYPE_MASK;
1031
1032         switch (mem_type) {
1033         case PCI_BASE_ADDRESS_MEM_TYPE_64:
1034                 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
1035                                                 + bar + 4));
1036                 break;
1037         case PCI_BASE_ADDRESS_MEM_TYPE_32:
1038         case PCI_BASE_ADDRESS_MEM_TYPE_1M:
1039                 /* 1M mem BAR treated as 32-bit BAR */
1040         default:
1041                 /* mem unknown type treated as 32-bit BAR */
1042                 start_hi = 0;
1043                 break;
1044         }
1045
1046         return ((u64)start_hi << 32) | start_lo;
1047 }
1048
1049 static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off,
1050                              void *buf, unsigned int count, bool is_write)
1051 {
1052         u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar);
1053         int ret;
1054
1055         if (is_write)
1056                 ret = intel_gvt_ops->emulate_mmio_write(vgpu,
1057                                         bar_start + off, buf, count);
1058         else
1059                 ret = intel_gvt_ops->emulate_mmio_read(vgpu,
1060                                         bar_start + off, buf, count);
1061         return ret;
1062 }
1063
1064 static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off)
1065 {
1066         return off >= vgpu_aperture_offset(vgpu) &&
1067                off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu);
1068 }
1069
1070 static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off,
1071                 void *buf, unsigned long count, bool is_write)
1072 {
1073         void __iomem *aperture_va;
1074
1075         if (!intel_vgpu_in_aperture(vgpu, off) ||
1076             !intel_vgpu_in_aperture(vgpu, off + count)) {
1077                 gvt_vgpu_err("Invalid aperture offset %llu\n", off);
1078                 return -EINVAL;
1079         }
1080
1081         aperture_va = io_mapping_map_wc(&vgpu->gvt->gt->ggtt->iomap,
1082                                         ALIGN_DOWN(off, PAGE_SIZE),
1083                                         count + offset_in_page(off));
1084         if (!aperture_va)
1085                 return -EIO;
1086
1087         if (is_write)
1088                 memcpy_toio(aperture_va + offset_in_page(off), buf, count);
1089         else
1090                 memcpy_fromio(buf, aperture_va + offset_in_page(off), count);
1091
1092         io_mapping_unmap(aperture_va);
1093
1094         return 0;
1095 }
1096
1097 static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
1098                         size_t count, loff_t *ppos, bool is_write)
1099 {
1100         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1101         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
1102         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
1103         u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
1104         int ret = -EINVAL;
1105
1106
1107         if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) {
1108                 gvt_vgpu_err("invalid index: %u\n", index);
1109                 return -EINVAL;
1110         }
1111
1112         switch (index) {
1113         case VFIO_PCI_CONFIG_REGION_INDEX:
1114                 if (is_write)
1115                         ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
1116                                                 buf, count);
1117                 else
1118                         ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
1119                                                 buf, count);
1120                 break;
1121         case VFIO_PCI_BAR0_REGION_INDEX:
1122                 ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos,
1123                                         buf, count, is_write);
1124                 break;
1125         case VFIO_PCI_BAR2_REGION_INDEX:
1126                 ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write);
1127                 break;
1128         case VFIO_PCI_BAR1_REGION_INDEX:
1129         case VFIO_PCI_BAR3_REGION_INDEX:
1130         case VFIO_PCI_BAR4_REGION_INDEX:
1131         case VFIO_PCI_BAR5_REGION_INDEX:
1132         case VFIO_PCI_VGA_REGION_INDEX:
1133         case VFIO_PCI_ROM_REGION_INDEX:
1134                 break;
1135         default:
1136                 if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
1137                         return -EINVAL;
1138
1139                 index -= VFIO_PCI_NUM_REGIONS;
1140                 return vdev->region[index].ops->rw(vgpu, buf, count,
1141                                 ppos, is_write);
1142         }
1143
1144         return ret == 0 ? count : ret;
1145 }
1146
1147 static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos)
1148 {
1149         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1150         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
1151         struct intel_gvt *gvt = vgpu->gvt;
1152         int offset;
1153
1154         /* Only allow MMIO GGTT entry access */
1155         if (index != PCI_BASE_ADDRESS_0)
1156                 return false;
1157
1158         offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
1159                 intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
1160
1161         return (offset >= gvt->device_info.gtt_start_offset &&
1162                 offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
1163                         true : false;
1164 }
1165
1166 static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
1167                         size_t count, loff_t *ppos)
1168 {
1169         unsigned int done = 0;
1170         int ret;
1171
1172         while (count) {
1173                 size_t filled;
1174
1175                 /* Only support GGTT entry 8 bytes read */
1176                 if (count >= 8 && !(*ppos % 8) &&
1177                         gtt_entry(mdev, ppos)) {
1178                         u64 val;
1179
1180                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1181                                         ppos, false);
1182                         if (ret <= 0)
1183                                 goto read_err;
1184
1185                         if (copy_to_user(buf, &val, sizeof(val)))
1186                                 goto read_err;
1187
1188                         filled = 8;
1189                 } else if (count >= 4 && !(*ppos % 4)) {
1190                         u32 val;
1191
1192                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1193                                         ppos, false);
1194                         if (ret <= 0)
1195                                 goto read_err;
1196
1197                         if (copy_to_user(buf, &val, sizeof(val)))
1198                                 goto read_err;
1199
1200                         filled = 4;
1201                 } else if (count >= 2 && !(*ppos % 2)) {
1202                         u16 val;
1203
1204                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1205                                         ppos, false);
1206                         if (ret <= 0)
1207                                 goto read_err;
1208
1209                         if (copy_to_user(buf, &val, sizeof(val)))
1210                                 goto read_err;
1211
1212                         filled = 2;
1213                 } else {
1214                         u8 val;
1215
1216                         ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
1217                                         false);
1218                         if (ret <= 0)
1219                                 goto read_err;
1220
1221                         if (copy_to_user(buf, &val, sizeof(val)))
1222                                 goto read_err;
1223
1224                         filled = 1;
1225                 }
1226
1227                 count -= filled;
1228                 done += filled;
1229                 *ppos += filled;
1230                 buf += filled;
1231         }
1232
1233         return done;
1234
1235 read_err:
1236         return -EFAULT;
1237 }
1238
1239 static ssize_t intel_vgpu_write(struct mdev_device *mdev,
1240                                 const char __user *buf,
1241                                 size_t count, loff_t *ppos)
1242 {
1243         unsigned int done = 0;
1244         int ret;
1245
1246         while (count) {
1247                 size_t filled;
1248
1249                 /* Only support GGTT entry 8 bytes write */
1250                 if (count >= 8 && !(*ppos % 8) &&
1251                         gtt_entry(mdev, ppos)) {
1252                         u64 val;
1253
1254                         if (copy_from_user(&val, buf, sizeof(val)))
1255                                 goto write_err;
1256
1257                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1258                                         ppos, true);
1259                         if (ret <= 0)
1260                                 goto write_err;
1261
1262                         filled = 8;
1263                 } else if (count >= 4 && !(*ppos % 4)) {
1264                         u32 val;
1265
1266                         if (copy_from_user(&val, buf, sizeof(val)))
1267                                 goto write_err;
1268
1269                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1270                                         ppos, true);
1271                         if (ret <= 0)
1272                                 goto write_err;
1273
1274                         filled = 4;
1275                 } else if (count >= 2 && !(*ppos % 2)) {
1276                         u16 val;
1277
1278                         if (copy_from_user(&val, buf, sizeof(val)))
1279                                 goto write_err;
1280
1281                         ret = intel_vgpu_rw(mdev, (char *)&val,
1282                                         sizeof(val), ppos, true);
1283                         if (ret <= 0)
1284                                 goto write_err;
1285
1286                         filled = 2;
1287                 } else {
1288                         u8 val;
1289
1290                         if (copy_from_user(&val, buf, sizeof(val)))
1291                                 goto write_err;
1292
1293                         ret = intel_vgpu_rw(mdev, &val, sizeof(val),
1294                                         ppos, true);
1295                         if (ret <= 0)
1296                                 goto write_err;
1297
1298                         filled = 1;
1299                 }
1300
1301                 count -= filled;
1302                 done += filled;
1303                 *ppos += filled;
1304                 buf += filled;
1305         }
1306
1307         return done;
1308 write_err:
1309         return -EFAULT;
1310 }
1311
1312 static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
1313 {
1314         unsigned int index;
1315         u64 virtaddr;
1316         unsigned long req_size, pgoff, req_start;
1317         pgprot_t pg_prot;
1318         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1319
1320         index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
1321         if (index >= VFIO_PCI_ROM_REGION_INDEX)
1322                 return -EINVAL;
1323
1324         if (vma->vm_end < vma->vm_start)
1325                 return -EINVAL;
1326         if ((vma->vm_flags & VM_SHARED) == 0)
1327                 return -EINVAL;
1328         if (index != VFIO_PCI_BAR2_REGION_INDEX)
1329                 return -EINVAL;
1330
1331         pg_prot = vma->vm_page_prot;
1332         virtaddr = vma->vm_start;
1333         req_size = vma->vm_end - vma->vm_start;
1334         pgoff = vma->vm_pgoff &
1335                 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
1336         req_start = pgoff << PAGE_SHIFT;
1337
1338         if (!intel_vgpu_in_aperture(vgpu, req_start))
1339                 return -EINVAL;
1340         if (req_start + req_size >
1341             vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
1342                 return -EINVAL;
1343
1344         pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;
1345
1346         return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
1347 }
1348
1349 static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
1350 {
1351         if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
1352                 return 1;
1353
1354         return 0;
1355 }
1356
1357 static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
1358                         unsigned int index, unsigned int start,
1359                         unsigned int count, u32 flags,
1360                         void *data)
1361 {
1362         return 0;
1363 }
1364
1365 static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
1366                         unsigned int index, unsigned int start,
1367                         unsigned int count, u32 flags, void *data)
1368 {
1369         return 0;
1370 }
1371
1372 static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
1373                 unsigned int index, unsigned int start, unsigned int count,
1374                 u32 flags, void *data)
1375 {
1376         return 0;
1377 }
1378
1379 static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
1380                 unsigned int index, unsigned int start, unsigned int count,
1381                 u32 flags, void *data)
1382 {
1383         struct eventfd_ctx *trigger;
1384
1385         if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
1386                 int fd = *(int *)data;
1387
1388                 trigger = eventfd_ctx_fdget(fd);
1389                 if (IS_ERR(trigger)) {
1390                         gvt_vgpu_err("eventfd_ctx_fdget failed\n");
1391                         return PTR_ERR(trigger);
1392                 }
1393                 kvmgt_vdev(vgpu)->msi_trigger = trigger;
1394         } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count)
1395                 intel_vgpu_release_msi_eventfd_ctx(vgpu);
1396
1397         return 0;
1398 }
1399
1400 static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags,
1401                 unsigned int index, unsigned int start, unsigned int count,
1402                 void *data)
1403 {
1404         int (*func)(struct intel_vgpu *vgpu, unsigned int index,
1405                         unsigned int start, unsigned int count, u32 flags,
1406                         void *data) = NULL;
1407
1408         switch (index) {
1409         case VFIO_PCI_INTX_IRQ_INDEX:
1410                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1411                 case VFIO_IRQ_SET_ACTION_MASK:
1412                         func = intel_vgpu_set_intx_mask;
1413                         break;
1414                 case VFIO_IRQ_SET_ACTION_UNMASK:
1415                         func = intel_vgpu_set_intx_unmask;
1416                         break;
1417                 case VFIO_IRQ_SET_ACTION_TRIGGER:
1418                         func = intel_vgpu_set_intx_trigger;
1419                         break;
1420                 }
1421                 break;
1422         case VFIO_PCI_MSI_IRQ_INDEX:
1423                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1424                 case VFIO_IRQ_SET_ACTION_MASK:
1425                 case VFIO_IRQ_SET_ACTION_UNMASK:
1426                         /* XXX Need masking support exported */
1427                         break;
1428                 case VFIO_IRQ_SET_ACTION_TRIGGER:
1429                         func = intel_vgpu_set_msi_trigger;
1430                         break;
1431                 }
1432                 break;
1433         }
1434
1435         if (!func)
1436                 return -ENOTTY;
1437
1438         return func(vgpu, index, start, count, flags, data);
1439 }
1440
1441 static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
1442                              unsigned long arg)
1443 {
1444         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1445         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
1446         unsigned long minsz;
1447
1448         gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
1449
1450         if (cmd == VFIO_DEVICE_GET_INFO) {
1451                 struct vfio_device_info info;
1452
1453                 minsz = offsetofend(struct vfio_device_info, num_irqs);
1454
1455                 if (copy_from_user(&info, (void __user *)arg, minsz))
1456                         return -EFAULT;
1457
1458                 if (info.argsz < minsz)
1459                         return -EINVAL;
1460
1461                 info.flags = VFIO_DEVICE_FLAGS_PCI;
1462                 info.flags |= VFIO_DEVICE_FLAGS_RESET;
1463                 info.num_regions = VFIO_PCI_NUM_REGIONS +
1464                                 vdev->num_regions;
1465                 info.num_irqs = VFIO_PCI_NUM_IRQS;
1466
1467                 return copy_to_user((void __user *)arg, &info, minsz) ?
1468                         -EFAULT : 0;
1469
1470         } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
1471                 struct vfio_region_info info;
1472                 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
1473                 unsigned int i;
1474                 int ret;
1475                 struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
1476                 int nr_areas = 1;
1477                 int cap_type_id;
1478
1479                 minsz = offsetofend(struct vfio_region_info, offset);
1480
1481                 if (copy_from_user(&info, (void __user *)arg, minsz))
1482                         return -EFAULT;
1483
1484                 if (info.argsz < minsz)
1485                         return -EINVAL;
1486
1487                 switch (info.index) {
1488                 case VFIO_PCI_CONFIG_REGION_INDEX:
1489                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1490                         info.size = vgpu->gvt->device_info.cfg_space_size;
1491                         info.flags = VFIO_REGION_INFO_FLAG_READ |
1492                                      VFIO_REGION_INFO_FLAG_WRITE;
1493                         break;
1494                 case VFIO_PCI_BAR0_REGION_INDEX:
1495                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1496                         info.size = vgpu->cfg_space.bar[info.index].size;
1497                         if (!info.size) {
1498                                 info.flags = 0;
1499                                 break;
1500                         }
1501
1502                         info.flags = VFIO_REGION_INFO_FLAG_READ |
1503                                      VFIO_REGION_INFO_FLAG_WRITE;
1504                         break;
1505                 case VFIO_PCI_BAR1_REGION_INDEX:
1506                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1507                         info.size = 0;
1508                         info.flags = 0;
1509                         break;
1510                 case VFIO_PCI_BAR2_REGION_INDEX:
1511                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1512                         info.flags = VFIO_REGION_INFO_FLAG_CAPS |
1513                                         VFIO_REGION_INFO_FLAG_MMAP |
1514                                         VFIO_REGION_INFO_FLAG_READ |
1515                                         VFIO_REGION_INFO_FLAG_WRITE;
1516                         info.size = gvt_aperture_sz(vgpu->gvt);
1517
1518                         sparse = kzalloc(struct_size(sparse, areas, nr_areas),
1519                                          GFP_KERNEL);
1520                         if (!sparse)
1521                                 return -ENOMEM;
1522
1523                         sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1524                         sparse->header.version = 1;
1525                         sparse->nr_areas = nr_areas;
1526                         cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1527                         sparse->areas[0].offset =
1528                                         PAGE_ALIGN(vgpu_aperture_offset(vgpu));
1529                         sparse->areas[0].size = vgpu_aperture_sz(vgpu);
1530                         break;
1531
1532                 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
1533                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1534                         info.size = 0;
1535                         info.flags = 0;
1536
1537                         gvt_dbg_core("get region info bar:%d\n", info.index);
1538                         break;
1539
1540                 case VFIO_PCI_ROM_REGION_INDEX:
1541                 case VFIO_PCI_VGA_REGION_INDEX:
1542                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1543                         info.size = 0;
1544                         info.flags = 0;
1545
1546                         gvt_dbg_core("get region info index:%d\n", info.index);
1547                         break;
1548                 default:
1549                         {
1550                                 struct vfio_region_info_cap_type cap_type = {
1551                                         .header.id = VFIO_REGION_INFO_CAP_TYPE,
1552                                         .header.version = 1 };
1553
1554                                 if (info.index >= VFIO_PCI_NUM_REGIONS +
1555                                                 vdev->num_regions)
1556                                         return -EINVAL;
1557                                 info.index =
1558                                         array_index_nospec(info.index,
1559                                                         VFIO_PCI_NUM_REGIONS +
1560                                                         vdev->num_regions);
1561
1562                                 i = info.index - VFIO_PCI_NUM_REGIONS;
1563
1564                                 info.offset =
1565                                         VFIO_PCI_INDEX_TO_OFFSET(info.index);
1566                                 info.size = vdev->region[i].size;
1567                                 info.flags = vdev->region[i].flags;
1568
1569                                 cap_type.type = vdev->region[i].type;
1570                                 cap_type.subtype = vdev->region[i].subtype;
1571
1572                                 ret = vfio_info_add_capability(&caps,
1573                                                         &cap_type.header,
1574                                                         sizeof(cap_type));
1575                                 if (ret)
1576                                         return ret;
1577                         }
1578                 }
1579
1580                 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1581                         switch (cap_type_id) {
1582                         case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1583                                 ret = vfio_info_add_capability(&caps,
1584                                         &sparse->header,
1585                                         struct_size(sparse, areas,
1586                                                     sparse->nr_areas));
1587                                 if (ret) {
1588                                         kfree(sparse);
1589                                         return ret;
1590                                 }
1591                                 break;
1592                         default:
1593                                 kfree(sparse);
1594                                 return -EINVAL;
1595                         }
1596                 }
1597
1598                 if (caps.size) {
1599                         info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
1600                         if (info.argsz < sizeof(info) + caps.size) {
1601                                 info.argsz = sizeof(info) + caps.size;
1602                                 info.cap_offset = 0;
1603                         } else {
1604                                 vfio_info_cap_shift(&caps, sizeof(info));
1605                                 if (copy_to_user((void __user *)arg +
1606                                                   sizeof(info), caps.buf,
1607                                                   caps.size)) {
1608                                         kfree(caps.buf);
1609                                         kfree(sparse);
1610                                         return -EFAULT;
1611                                 }
1612                                 info.cap_offset = sizeof(info);
1613                         }
1614
1615                         kfree(caps.buf);
1616                 }
1617
1618                 kfree(sparse);
1619                 return copy_to_user((void __user *)arg, &info, minsz) ?
1620                         -EFAULT : 0;
1621         } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1622                 struct vfio_irq_info info;
1623
1624                 minsz = offsetofend(struct vfio_irq_info, count);
1625
1626                 if (copy_from_user(&info, (void __user *)arg, minsz))
1627                         return -EFAULT;
1628
1629                 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1630                         return -EINVAL;
1631
1632                 switch (info.index) {
1633                 case VFIO_PCI_INTX_IRQ_INDEX:
1634                 case VFIO_PCI_MSI_IRQ_INDEX:
1635                         break;
1636                 default:
1637                         return -EINVAL;
1638                 }
1639
1640                 info.flags = VFIO_IRQ_INFO_EVENTFD;
1641
1642                 info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1643
1644                 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1645                         info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1646                                        VFIO_IRQ_INFO_AUTOMASKED);
1647                 else
1648                         info.flags |= VFIO_IRQ_INFO_NORESIZE;
1649
1650                 return copy_to_user((void __user *)arg, &info, minsz) ?
1651                         -EFAULT : 0;
1652         } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1653                 struct vfio_irq_set hdr;
1654                 u8 *data = NULL;
1655                 int ret = 0;
1656                 size_t data_size = 0;
1657
1658                 minsz = offsetofend(struct vfio_irq_set, count);
1659
1660                 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1661                         return -EFAULT;
1662
1663                 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1664                         int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1665
1666                         ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1667                                                 VFIO_PCI_NUM_IRQS, &data_size);
1668                         if (ret) {
1669                                 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1670                                 return -EINVAL;
1671                         }
1672                         if (data_size) {
1673                                 data = memdup_user((void __user *)(arg + minsz),
1674                                                    data_size);
1675                                 if (IS_ERR(data))
1676                                         return PTR_ERR(data);
1677                         }
1678                 }
1679
1680                 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1681                                         hdr.start, hdr.count, data);
1682                 kfree(data);
1683
1684                 return ret;
1685         } else if (cmd == VFIO_DEVICE_RESET) {
1686                 intel_gvt_ops->vgpu_reset(vgpu);
1687                 return 0;
1688         } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) {
1689                 struct vfio_device_gfx_plane_info dmabuf;
1690                 int ret = 0;
1691
1692                 minsz = offsetofend(struct vfio_device_gfx_plane_info,
1693                                     dmabuf_id);
1694                 if (copy_from_user(&dmabuf, (void __user *)arg, minsz))
1695                         return -EFAULT;
1696                 if (dmabuf.argsz < minsz)
1697                         return -EINVAL;
1698
1699                 ret = intel_gvt_ops->vgpu_query_plane(vgpu, &dmabuf);
1700                 if (ret != 0)
1701                         return ret;
1702
1703                 return copy_to_user((void __user *)arg, &dmabuf, minsz) ?
1704                                                                 -EFAULT : 0;
1705         } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) {
1706                 __u32 dmabuf_id;
1707                 __s32 dmabuf_fd;
1708
1709                 if (get_user(dmabuf_id, (__u32 __user *)arg))
1710                         return -EFAULT;
1711
1712                 dmabuf_fd = intel_gvt_ops->vgpu_get_dmabuf(vgpu, dmabuf_id);
1713                 return dmabuf_fd;
1714
1715         }
1716
1717         return -ENOTTY;
1718 }
1719
1720 static ssize_t
1721 vgpu_id_show(struct device *dev, struct device_attribute *attr,
1722              char *buf)
1723 {
1724         struct mdev_device *mdev = mdev_from_dev(dev);
1725
1726         if (mdev) {
1727                 struct intel_vgpu *vgpu = (struct intel_vgpu *)
1728                         mdev_get_drvdata(mdev);
1729                 return sprintf(buf, "%d\n", vgpu->id);
1730         }
1731         return sprintf(buf, "\n");
1732 }
1733
1734 static DEVICE_ATTR_RO(vgpu_id);
1735
1736 static struct attribute *intel_vgpu_attrs[] = {
1737         &dev_attr_vgpu_id.attr,
1738         NULL
1739 };
1740
1741 static const struct attribute_group intel_vgpu_group = {
1742         .name = "intel_vgpu",
1743         .attrs = intel_vgpu_attrs,
1744 };
1745
1746 static const struct attribute_group *intel_vgpu_groups[] = {
1747         &intel_vgpu_group,
1748         NULL,
1749 };
1750
1751 static struct mdev_parent_ops intel_vgpu_ops = {
1752         .mdev_attr_groups       = intel_vgpu_groups,
1753         .create                 = intel_vgpu_create,
1754         .remove                 = intel_vgpu_remove,
1755
1756         .open                   = intel_vgpu_open,
1757         .release                = intel_vgpu_release,
1758
1759         .read                   = intel_vgpu_read,
1760         .write                  = intel_vgpu_write,
1761         .mmap                   = intel_vgpu_mmap,
1762         .ioctl                  = intel_vgpu_ioctl,
1763 };
1764
1765 static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
1766 {
1767         int ret;
1768
1769         ret = intel_gvt_init_vgpu_type_groups((struct intel_gvt *)gvt);
1770         if (ret)
1771                 return ret;
1772
1773         intel_gvt_ops = ops;
1774         intel_vgpu_ops.supported_type_groups = gvt_vgpu_type_groups;
1775
1776         ret = mdev_register_device(dev, &intel_vgpu_ops);
1777         if (ret)
1778                 intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt);
1779
1780         return ret;
1781 }
1782
1783 static void kvmgt_host_exit(struct device *dev, void *gvt)
1784 {
1785         mdev_unregister_device(dev);
1786         intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt);
1787 }
1788
1789 static int kvmgt_page_track_add(unsigned long handle, u64 gfn)
1790 {
1791         struct kvmgt_guest_info *info;
1792         struct kvm *kvm;
1793         struct kvm_memory_slot *slot;
1794         int idx;
1795
1796         if (!handle_valid(handle))
1797                 return -ESRCH;
1798
1799         info = (struct kvmgt_guest_info *)handle;
1800         kvm = info->kvm;
1801
1802         idx = srcu_read_lock(&kvm->srcu);
1803         slot = gfn_to_memslot(kvm, gfn);
1804         if (!slot) {
1805                 srcu_read_unlock(&kvm->srcu, idx);
1806                 return -EINVAL;
1807         }
1808
1809         write_lock(&kvm->mmu_lock);
1810
1811         if (kvmgt_gfn_is_write_protected(info, gfn))
1812                 goto out;
1813
1814         kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1815         kvmgt_protect_table_add(info, gfn);
1816
1817 out:
1818         write_unlock(&kvm->mmu_lock);
1819         srcu_read_unlock(&kvm->srcu, idx);
1820         return 0;
1821 }
1822
1823 static int kvmgt_page_track_remove(unsigned long handle, u64 gfn)
1824 {
1825         struct kvmgt_guest_info *info;
1826         struct kvm *kvm;
1827         struct kvm_memory_slot *slot;
1828         int idx;
1829
1830         if (!handle_valid(handle))
1831                 return 0;
1832
1833         info = (struct kvmgt_guest_info *)handle;
1834         kvm = info->kvm;
1835
1836         idx = srcu_read_lock(&kvm->srcu);
1837         slot = gfn_to_memslot(kvm, gfn);
1838         if (!slot) {
1839                 srcu_read_unlock(&kvm->srcu, idx);
1840                 return -EINVAL;
1841         }
1842
1843         write_lock(&kvm->mmu_lock);
1844
1845         if (!kvmgt_gfn_is_write_protected(info, gfn))
1846                 goto out;
1847
1848         kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1849         kvmgt_protect_table_del(info, gfn);
1850
1851 out:
1852         write_unlock(&kvm->mmu_lock);
1853         srcu_read_unlock(&kvm->srcu, idx);
1854         return 0;
1855 }
1856
1857 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1858                 const u8 *val, int len,
1859                 struct kvm_page_track_notifier_node *node)
1860 {
1861         struct kvmgt_guest_info *info = container_of(node,
1862                                         struct kvmgt_guest_info, track_node);
1863
1864         if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1865                 intel_gvt_ops->write_protect_handler(info->vgpu, gpa,
1866                                                      (void *)val, len);
1867 }
1868
1869 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1870                 struct kvm_memory_slot *slot,
1871                 struct kvm_page_track_notifier_node *node)
1872 {
1873         int i;
1874         gfn_t gfn;
1875         struct kvmgt_guest_info *info = container_of(node,
1876                                         struct kvmgt_guest_info, track_node);
1877
1878         write_lock(&kvm->mmu_lock);
1879         for (i = 0; i < slot->npages; i++) {
1880                 gfn = slot->base_gfn + i;
1881                 if (kvmgt_gfn_is_write_protected(info, gfn)) {
1882                         kvm_slot_page_track_remove_page(kvm, slot, gfn,
1883                                                 KVM_PAGE_TRACK_WRITE);
1884                         kvmgt_protect_table_del(info, gfn);
1885                 }
1886         }
1887         write_unlock(&kvm->mmu_lock);
1888 }
1889
1890 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
1891 {
1892         struct intel_vgpu *itr;
1893         struct kvmgt_guest_info *info;
1894         int id;
1895         bool ret = false;
1896
1897         mutex_lock(&vgpu->gvt->lock);
1898         for_each_active_vgpu(vgpu->gvt, itr, id) {
1899                 if (!handle_valid(itr->handle))
1900                         continue;
1901
1902                 info = (struct kvmgt_guest_info *)itr->handle;
1903                 if (kvm && kvm == info->kvm) {
1904                         ret = true;
1905                         goto out;
1906                 }
1907         }
1908 out:
1909         mutex_unlock(&vgpu->gvt->lock);
1910         return ret;
1911 }
1912
1913 static int kvmgt_guest_init(struct mdev_device *mdev)
1914 {
1915         struct kvmgt_guest_info *info;
1916         struct intel_vgpu *vgpu;
1917         struct kvmgt_vdev *vdev;
1918         struct kvm *kvm;
1919
1920         vgpu = mdev_get_drvdata(mdev);
1921         if (handle_valid(vgpu->handle))
1922                 return -EEXIST;
1923
1924         vdev = kvmgt_vdev(vgpu);
1925         kvm = vdev->kvm;
1926         if (!kvm || kvm->mm != current->mm) {
1927                 gvt_vgpu_err("KVM is required to use Intel vGPU\n");
1928                 return -ESRCH;
1929         }
1930
1931         if (__kvmgt_vgpu_exist(vgpu, kvm))
1932                 return -EEXIST;
1933
1934         info = vzalloc(sizeof(struct kvmgt_guest_info));
1935         if (!info)
1936                 return -ENOMEM;
1937
1938         vgpu->handle = (unsigned long)info;
1939         info->vgpu = vgpu;
1940         info->kvm = kvm;
1941         kvm_get_kvm(info->kvm);
1942
1943         kvmgt_protect_table_init(info);
1944         gvt_cache_init(vgpu);
1945
1946         info->track_node.track_write = kvmgt_page_track_write;
1947         info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
1948         kvm_page_track_register_notifier(kvm, &info->track_node);
1949
1950         info->debugfs_cache_entries = debugfs_create_ulong(
1951                                                 "kvmgt_nr_cache_entries",
1952                                                 0444, vgpu->debugfs,
1953                                                 &vdev->nr_cache_entries);
1954         return 0;
1955 }
1956
1957 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
1958 {
1959         debugfs_remove(info->debugfs_cache_entries);
1960
1961         kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
1962         kvm_put_kvm(info->kvm);
1963         kvmgt_protect_table_destroy(info);
1964         gvt_cache_destroy(info->vgpu);
1965         vfree(info);
1966
1967         return true;
1968 }
1969
1970 static int kvmgt_attach_vgpu(void *p_vgpu, unsigned long *handle)
1971 {
1972         struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
1973
1974         vgpu->vdev = kzalloc(sizeof(struct kvmgt_vdev), GFP_KERNEL);
1975
1976         if (!vgpu->vdev)
1977                 return -ENOMEM;
1978
1979         kvmgt_vdev(vgpu)->vgpu = vgpu;
1980
1981         return 0;
1982 }
1983
1984 static void kvmgt_detach_vgpu(void *p_vgpu)
1985 {
1986         int i;
1987         struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
1988         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
1989
1990         if (!vdev->region)
1991                 return;
1992
1993         for (i = 0; i < vdev->num_regions; i++)
1994                 if (vdev->region[i].ops->release)
1995                         vdev->region[i].ops->release(vgpu,
1996                                         &vdev->region[i]);
1997         vdev->num_regions = 0;
1998         kfree(vdev->region);
1999         vdev->region = NULL;
2000
2001         kfree(vdev);
2002 }
2003
2004 static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
2005 {
2006         struct kvmgt_guest_info *info;
2007         struct intel_vgpu *vgpu;
2008         struct kvmgt_vdev *vdev;
2009
2010         if (!handle_valid(handle))
2011                 return -ESRCH;
2012
2013         info = (struct kvmgt_guest_info *)handle;
2014         vgpu = info->vgpu;
2015         vdev = kvmgt_vdev(vgpu);
2016
2017         /*
2018          * When guest is poweroff, msi_trigger is set to NULL, but vgpu's
2019          * config and mmio register isn't restored to default during guest
2020          * poweroff. If this vgpu is still used in next vm, this vgpu's pipe
2021          * may be enabled, then once this vgpu is active, it will get inject
2022          * vblank interrupt request. But msi_trigger is null until msi is
2023          * enabled by guest. so if msi_trigger is null, success is still
2024          * returned and don't inject interrupt into guest.
2025          */
2026         if (vdev->msi_trigger == NULL)
2027                 return 0;
2028
2029         if (eventfd_signal(vdev->msi_trigger, 1) == 1)
2030                 return 0;
2031
2032         return -EFAULT;
2033 }
2034
2035 static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
2036 {
2037         struct kvmgt_guest_info *info;
2038         kvm_pfn_t pfn;
2039
2040         if (!handle_valid(handle))
2041                 return INTEL_GVT_INVALID_ADDR;
2042
2043         info = (struct kvmgt_guest_info *)handle;
2044
2045         pfn = gfn_to_pfn(info->kvm, gfn);
2046         if (is_error_noslot_pfn(pfn))
2047                 return INTEL_GVT_INVALID_ADDR;
2048
2049         return pfn;
2050 }
2051
2052 static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
2053                 unsigned long size, dma_addr_t *dma_addr)
2054 {
2055         struct intel_vgpu *vgpu;
2056         struct kvmgt_vdev *vdev;
2057         struct gvt_dma *entry;
2058         int ret;
2059
2060         if (!handle_valid(handle))
2061                 return -EINVAL;
2062
2063         vgpu = ((struct kvmgt_guest_info *)handle)->vgpu;
2064         vdev = kvmgt_vdev(vgpu);
2065
2066         mutex_lock(&vdev->cache_lock);
2067
2068         entry = __gvt_cache_find_gfn(vgpu, gfn);
2069         if (!entry) {
2070                 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
2071                 if (ret)
2072                         goto err_unlock;
2073
2074                 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
2075                 if (ret)
2076                         goto err_unmap;
2077         } else if (entry->size != size) {
2078                 /* the same gfn with different size: unmap and re-map */
2079                 gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size);
2080                 __gvt_cache_remove_entry(vgpu, entry);
2081
2082                 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
2083                 if (ret)
2084                         goto err_unlock;
2085
2086                 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
2087                 if (ret)
2088                         goto err_unmap;
2089         } else {
2090                 kref_get(&entry->ref);
2091                 *dma_addr = entry->dma_addr;
2092         }
2093
2094         mutex_unlock(&vdev->cache_lock);
2095         return 0;
2096
2097 err_unmap:
2098         gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size);
2099 err_unlock:
2100         mutex_unlock(&vdev->cache_lock);
2101         return ret;
2102 }
2103
2104 static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr)
2105 {
2106         struct kvmgt_guest_info *info;
2107         struct kvmgt_vdev *vdev;
2108         struct gvt_dma *entry;
2109         int ret = 0;
2110
2111         if (!handle_valid(handle))
2112                 return -ENODEV;
2113
2114         info = (struct kvmgt_guest_info *)handle;
2115         vdev = kvmgt_vdev(info->vgpu);
2116
2117         mutex_lock(&vdev->cache_lock);
2118         entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr);
2119         if (entry)
2120                 kref_get(&entry->ref);
2121         else
2122                 ret = -ENOMEM;
2123         mutex_unlock(&vdev->cache_lock);
2124
2125         return ret;
2126 }
2127
2128 static void __gvt_dma_release(struct kref *ref)
2129 {
2130         struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
2131
2132         gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr,
2133                            entry->size);
2134         __gvt_cache_remove_entry(entry->vgpu, entry);
2135 }
2136
2137 static void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr)
2138 {
2139         struct intel_vgpu *vgpu;
2140         struct kvmgt_vdev *vdev;
2141         struct gvt_dma *entry;
2142
2143         if (!handle_valid(handle))
2144                 return;
2145
2146         vgpu = ((struct kvmgt_guest_info *)handle)->vgpu;
2147         vdev = kvmgt_vdev(vgpu);
2148
2149         mutex_lock(&vdev->cache_lock);
2150         entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
2151         if (entry)
2152                 kref_put(&entry->ref, __gvt_dma_release);
2153         mutex_unlock(&vdev->cache_lock);
2154 }
2155
2156 static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
2157                         void *buf, unsigned long len, bool write)
2158 {
2159         struct kvmgt_guest_info *info;
2160
2161         if (!handle_valid(handle))
2162                 return -ESRCH;
2163
2164         info = (struct kvmgt_guest_info *)handle;
2165
2166         return vfio_dma_rw(kvmgt_vdev(info->vgpu)->vfio_group,
2167                            gpa, buf, len, write);
2168 }
2169
2170 static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
2171                         void *buf, unsigned long len)
2172 {
2173         return kvmgt_rw_gpa(handle, gpa, buf, len, false);
2174 }
2175
2176 static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
2177                         void *buf, unsigned long len)
2178 {
2179         return kvmgt_rw_gpa(handle, gpa, buf, len, true);
2180 }
2181
2182 static unsigned long kvmgt_virt_to_pfn(void *addr)
2183 {
2184         return PFN_DOWN(__pa(addr));
2185 }
2186
2187 static bool kvmgt_is_valid_gfn(unsigned long handle, unsigned long gfn)
2188 {
2189         struct kvmgt_guest_info *info;
2190         struct kvm *kvm;
2191         int idx;
2192         bool ret;
2193
2194         if (!handle_valid(handle))
2195                 return false;
2196
2197         info = (struct kvmgt_guest_info *)handle;
2198         kvm = info->kvm;
2199
2200         idx = srcu_read_lock(&kvm->srcu);
2201         ret = kvm_is_visible_gfn(kvm, gfn);
2202         srcu_read_unlock(&kvm->srcu, idx);
2203
2204         return ret;
2205 }
2206
2207 static const struct intel_gvt_mpt kvmgt_mpt = {
2208         .type = INTEL_GVT_HYPERVISOR_KVM,
2209         .host_init = kvmgt_host_init,
2210         .host_exit = kvmgt_host_exit,
2211         .attach_vgpu = kvmgt_attach_vgpu,
2212         .detach_vgpu = kvmgt_detach_vgpu,
2213         .inject_msi = kvmgt_inject_msi,
2214         .from_virt_to_mfn = kvmgt_virt_to_pfn,
2215         .enable_page_track = kvmgt_page_track_add,
2216         .disable_page_track = kvmgt_page_track_remove,
2217         .read_gpa = kvmgt_read_gpa,
2218         .write_gpa = kvmgt_write_gpa,
2219         .gfn_to_mfn = kvmgt_gfn_to_pfn,
2220         .dma_map_guest_page = kvmgt_dma_map_guest_page,
2221         .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
2222         .dma_pin_guest_page = kvmgt_dma_pin_guest_page,
2223         .set_opregion = kvmgt_set_opregion,
2224         .set_edid = kvmgt_set_edid,
2225         .get_vfio_device = kvmgt_get_vfio_device,
2226         .put_vfio_device = kvmgt_put_vfio_device,
2227         .is_valid_gfn = kvmgt_is_valid_gfn,
2228 };
2229
2230 static int __init kvmgt_init(void)
2231 {
2232         if (intel_gvt_register_hypervisor(&kvmgt_mpt) < 0)
2233                 return -ENODEV;
2234         return 0;
2235 }
2236
2237 static void __exit kvmgt_exit(void)
2238 {
2239         intel_gvt_unregister_hypervisor();
2240 }
2241
2242 module_init(kvmgt_init);
2243 module_exit(kvmgt_exit);
2244
2245 MODULE_LICENSE("GPL and additional rights");
2246 MODULE_AUTHOR("Intel Corporation");