drm/i915/gvt: Wean gvt off using dev_priv
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gvt / kvmgt.c
1 /*
2  * KVMGT - the implementation of Intel mediated pass-through framework for KVM
3  *
4  * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Kevin Tian <kevin.tian@intel.com>
27  *    Jike Song <jike.song@intel.com>
28  *    Xiaoguang Chen <xiaoguang.chen@intel.com>
29  */
30
31 #include <linux/init.h>
32 #include <linux/device.h>
33 #include <linux/mm.h>
34 #include <linux/mmu_context.h>
35 #include <linux/sched/mm.h>
36 #include <linux/types.h>
37 #include <linux/list.h>
38 #include <linux/rbtree.h>
39 #include <linux/spinlock.h>
40 #include <linux/eventfd.h>
41 #include <linux/uuid.h>
42 #include <linux/kvm_host.h>
43 #include <linux/vfio.h>
44 #include <linux/mdev.h>
45 #include <linux/debugfs.h>
46
47 #include <linux/nospec.h>
48
49 #include "i915_drv.h"
50 #include "gvt.h"
51
52 static const struct intel_gvt_ops *intel_gvt_ops;
53
54 /* helper macros copied from vfio-pci */
55 #define VFIO_PCI_OFFSET_SHIFT   40
56 #define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
57 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
58 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
59
60 #define EDID_BLOB_OFFSET (PAGE_SIZE/2)
61
62 #define OPREGION_SIGNATURE "IntelGraphicsMem"
63
64 struct vfio_region;
65 struct intel_vgpu_regops {
66         size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
67                         size_t count, loff_t *ppos, bool iswrite);
68         void (*release)(struct intel_vgpu *vgpu,
69                         struct vfio_region *region);
70 };
71
72 struct vfio_region {
73         u32                             type;
74         u32                             subtype;
75         size_t                          size;
76         u32                             flags;
77         const struct intel_vgpu_regops  *ops;
78         void                            *data;
79 };
80
81 struct vfio_edid_region {
82         struct vfio_region_gfx_edid vfio_edid_regs;
83         void *edid_blob;
84 };
85
86 struct kvmgt_pgfn {
87         gfn_t gfn;
88         struct hlist_node hnode;
89 };
90
91 struct kvmgt_guest_info {
92         struct kvm *kvm;
93         struct intel_vgpu *vgpu;
94         struct kvm_page_track_notifier_node track_node;
95 #define NR_BKT (1 << 18)
96         struct hlist_head ptable[NR_BKT];
97 #undef NR_BKT
98         struct dentry *debugfs_cache_entries;
99 };
100
101 struct gvt_dma {
102         struct intel_vgpu *vgpu;
103         struct rb_node gfn_node;
104         struct rb_node dma_addr_node;
105         gfn_t gfn;
106         dma_addr_t dma_addr;
107         unsigned long size;
108         struct kref ref;
109 };
110
111 struct kvmgt_vdev {
112         struct intel_vgpu *vgpu;
113         struct mdev_device *mdev;
114         struct vfio_region *region;
115         int num_regions;
116         struct eventfd_ctx *intx_trigger;
117         struct eventfd_ctx *msi_trigger;
118
119         /*
120          * Two caches are used to avoid mapping duplicated pages (eg.
121          * scratch pages). This help to reduce dma setup overhead.
122          */
123         struct rb_root gfn_cache;
124         struct rb_root dma_addr_cache;
125         unsigned long nr_cache_entries;
126         struct mutex cache_lock;
127
128         struct notifier_block iommu_notifier;
129         struct notifier_block group_notifier;
130         struct kvm *kvm;
131         struct work_struct release_work;
132         atomic_t released;
133         struct vfio_device *vfio_device;
134 };
135
136 static inline struct kvmgt_vdev *kvmgt_vdev(struct intel_vgpu *vgpu)
137 {
138         return intel_vgpu_vdev(vgpu);
139 }
140
141 static inline bool handle_valid(unsigned long handle)
142 {
143         return !!(handle & ~0xff);
144 }
145
146 static int kvmgt_guest_init(struct mdev_device *mdev);
147 static void intel_vgpu_release_work(struct work_struct *work);
148 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
149
150 static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
151                 unsigned long size)
152 {
153         struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
154         int total_pages;
155         int npage;
156         int ret;
157
158         total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
159
160         for (npage = 0; npage < total_pages; npage++) {
161                 unsigned long cur_gfn = gfn + npage;
162
163                 ret = vfio_unpin_pages(mdev_dev(kvmgt_vdev(vgpu)->mdev), &cur_gfn, 1);
164                 drm_WARN_ON(&i915->drm, ret != 1);
165         }
166 }
167
168 /* Pin a normal or compound guest page for dma. */
169 static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
170                 unsigned long size, struct page **page)
171 {
172         unsigned long base_pfn = 0;
173         int total_pages;
174         int npage;
175         int ret;
176
177         total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
178         /*
179          * We pin the pages one-by-one to avoid allocating a big arrary
180          * on stack to hold pfns.
181          */
182         for (npage = 0; npage < total_pages; npage++) {
183                 unsigned long cur_gfn = gfn + npage;
184                 unsigned long pfn;
185
186                 ret = vfio_pin_pages(mdev_dev(kvmgt_vdev(vgpu)->mdev), &cur_gfn, 1,
187                                      IOMMU_READ | IOMMU_WRITE, &pfn);
188                 if (ret != 1) {
189                         gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
190                                      cur_gfn, ret);
191                         goto err;
192                 }
193
194                 if (!pfn_valid(pfn)) {
195                         gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn);
196                         npage++;
197                         ret = -EFAULT;
198                         goto err;
199                 }
200
201                 if (npage == 0)
202                         base_pfn = pfn;
203                 else if (base_pfn + npage != pfn) {
204                         gvt_vgpu_err("The pages are not continuous\n");
205                         ret = -EINVAL;
206                         npage++;
207                         goto err;
208                 }
209         }
210
211         *page = pfn_to_page(base_pfn);
212         return 0;
213 err:
214         gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
215         return ret;
216 }
217
218 static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
219                 dma_addr_t *dma_addr, unsigned long size)
220 {
221         struct device *dev = &vgpu->gvt->gt->i915->drm.pdev->dev;
222         struct page *page = NULL;
223         int ret;
224
225         ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
226         if (ret)
227                 return ret;
228
229         /* Setup DMA mapping. */
230         *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL);
231         if (dma_mapping_error(dev, *dma_addr)) {
232                 gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n",
233                              page_to_pfn(page), ret);
234                 gvt_unpin_guest_page(vgpu, gfn, size);
235                 return -ENOMEM;
236         }
237
238         return 0;
239 }
240
241 static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
242                 dma_addr_t dma_addr, unsigned long size)
243 {
244         struct device *dev = &vgpu->gvt->gt->i915->drm.pdev->dev;
245
246         dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
247         gvt_unpin_guest_page(vgpu, gfn, size);
248 }
249
250 static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
251                 dma_addr_t dma_addr)
252 {
253         struct rb_node *node = kvmgt_vdev(vgpu)->dma_addr_cache.rb_node;
254         struct gvt_dma *itr;
255
256         while (node) {
257                 itr = rb_entry(node, struct gvt_dma, dma_addr_node);
258
259                 if (dma_addr < itr->dma_addr)
260                         node = node->rb_left;
261                 else if (dma_addr > itr->dma_addr)
262                         node = node->rb_right;
263                 else
264                         return itr;
265         }
266         return NULL;
267 }
268
269 static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
270 {
271         struct rb_node *node = kvmgt_vdev(vgpu)->gfn_cache.rb_node;
272         struct gvt_dma *itr;
273
274         while (node) {
275                 itr = rb_entry(node, struct gvt_dma, gfn_node);
276
277                 if (gfn < itr->gfn)
278                         node = node->rb_left;
279                 else if (gfn > itr->gfn)
280                         node = node->rb_right;
281                 else
282                         return itr;
283         }
284         return NULL;
285 }
286
287 static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
288                 dma_addr_t dma_addr, unsigned long size)
289 {
290         struct gvt_dma *new, *itr;
291         struct rb_node **link, *parent = NULL;
292         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
293
294         new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
295         if (!new)
296                 return -ENOMEM;
297
298         new->vgpu = vgpu;
299         new->gfn = gfn;
300         new->dma_addr = dma_addr;
301         new->size = size;
302         kref_init(&new->ref);
303
304         /* gfn_cache maps gfn to struct gvt_dma. */
305         link = &vdev->gfn_cache.rb_node;
306         while (*link) {
307                 parent = *link;
308                 itr = rb_entry(parent, struct gvt_dma, gfn_node);
309
310                 if (gfn < itr->gfn)
311                         link = &parent->rb_left;
312                 else
313                         link = &parent->rb_right;
314         }
315         rb_link_node(&new->gfn_node, parent, link);
316         rb_insert_color(&new->gfn_node, &vdev->gfn_cache);
317
318         /* dma_addr_cache maps dma addr to struct gvt_dma. */
319         parent = NULL;
320         link = &vdev->dma_addr_cache.rb_node;
321         while (*link) {
322                 parent = *link;
323                 itr = rb_entry(parent, struct gvt_dma, dma_addr_node);
324
325                 if (dma_addr < itr->dma_addr)
326                         link = &parent->rb_left;
327                 else
328                         link = &parent->rb_right;
329         }
330         rb_link_node(&new->dma_addr_node, parent, link);
331         rb_insert_color(&new->dma_addr_node, &vdev->dma_addr_cache);
332
333         vdev->nr_cache_entries++;
334         return 0;
335 }
336
337 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
338                                 struct gvt_dma *entry)
339 {
340         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
341
342         rb_erase(&entry->gfn_node, &vdev->gfn_cache);
343         rb_erase(&entry->dma_addr_node, &vdev->dma_addr_cache);
344         kfree(entry);
345         vdev->nr_cache_entries--;
346 }
347
348 static void gvt_cache_destroy(struct intel_vgpu *vgpu)
349 {
350         struct gvt_dma *dma;
351         struct rb_node *node = NULL;
352         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
353
354         for (;;) {
355                 mutex_lock(&vdev->cache_lock);
356                 node = rb_first(&vdev->gfn_cache);
357                 if (!node) {
358                         mutex_unlock(&vdev->cache_lock);
359                         break;
360                 }
361                 dma = rb_entry(node, struct gvt_dma, gfn_node);
362                 gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size);
363                 __gvt_cache_remove_entry(vgpu, dma);
364                 mutex_unlock(&vdev->cache_lock);
365         }
366 }
367
368 static void gvt_cache_init(struct intel_vgpu *vgpu)
369 {
370         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
371
372         vdev->gfn_cache = RB_ROOT;
373         vdev->dma_addr_cache = RB_ROOT;
374         vdev->nr_cache_entries = 0;
375         mutex_init(&vdev->cache_lock);
376 }
377
378 static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
379 {
380         hash_init(info->ptable);
381 }
382
383 static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
384 {
385         struct kvmgt_pgfn *p;
386         struct hlist_node *tmp;
387         int i;
388
389         hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
390                 hash_del(&p->hnode);
391                 kfree(p);
392         }
393 }
394
395 static struct kvmgt_pgfn *
396 __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
397 {
398         struct kvmgt_pgfn *p, *res = NULL;
399
400         hash_for_each_possible(info->ptable, p, hnode, gfn) {
401                 if (gfn == p->gfn) {
402                         res = p;
403                         break;
404                 }
405         }
406
407         return res;
408 }
409
410 static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
411                                 gfn_t gfn)
412 {
413         struct kvmgt_pgfn *p;
414
415         p = __kvmgt_protect_table_find(info, gfn);
416         return !!p;
417 }
418
419 static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
420 {
421         struct kvmgt_pgfn *p;
422
423         if (kvmgt_gfn_is_write_protected(info, gfn))
424                 return;
425
426         p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
427         if (WARN(!p, "gfn: 0x%llx\n", gfn))
428                 return;
429
430         p->gfn = gfn;
431         hash_add(info->ptable, &p->hnode, gfn);
432 }
433
434 static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
435                                 gfn_t gfn)
436 {
437         struct kvmgt_pgfn *p;
438
439         p = __kvmgt_protect_table_find(info, gfn);
440         if (p) {
441                 hash_del(&p->hnode);
442                 kfree(p);
443         }
444 }
445
446 static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
447                 size_t count, loff_t *ppos, bool iswrite)
448 {
449         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
450         unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
451                         VFIO_PCI_NUM_REGIONS;
452         void *base = vdev->region[i].data;
453         loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
454
455
456         if (pos >= vdev->region[i].size || iswrite) {
457                 gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
458                 return -EINVAL;
459         }
460         count = min(count, (size_t)(vdev->region[i].size - pos));
461         memcpy(buf, base + pos, count);
462
463         return count;
464 }
465
466 static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
467                 struct vfio_region *region)
468 {
469 }
470
471 static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
472         .rw = intel_vgpu_reg_rw_opregion,
473         .release = intel_vgpu_reg_release_opregion,
474 };
475
476 static int handle_edid_regs(struct intel_vgpu *vgpu,
477                         struct vfio_edid_region *region, char *buf,
478                         size_t count, u16 offset, bool is_write)
479 {
480         struct vfio_region_gfx_edid *regs = &region->vfio_edid_regs;
481         unsigned int data;
482
483         if (offset + count > sizeof(*regs))
484                 return -EINVAL;
485
486         if (count != 4)
487                 return -EINVAL;
488
489         if (is_write) {
490                 data = *((unsigned int *)buf);
491                 switch (offset) {
492                 case offsetof(struct vfio_region_gfx_edid, link_state):
493                         if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) {
494                                 if (!drm_edid_block_valid(
495                                         (u8 *)region->edid_blob,
496                                         0,
497                                         true,
498                                         NULL)) {
499                                         gvt_vgpu_err("invalid EDID blob\n");
500                                         return -EINVAL;
501                                 }
502                                 intel_gvt_ops->emulate_hotplug(vgpu, true);
503                         } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN)
504                                 intel_gvt_ops->emulate_hotplug(vgpu, false);
505                         else {
506                                 gvt_vgpu_err("invalid EDID link state %d\n",
507                                         regs->link_state);
508                                 return -EINVAL;
509                         }
510                         regs->link_state = data;
511                         break;
512                 case offsetof(struct vfio_region_gfx_edid, edid_size):
513                         if (data > regs->edid_max_size) {
514                                 gvt_vgpu_err("EDID size is bigger than %d!\n",
515                                         regs->edid_max_size);
516                                 return -EINVAL;
517                         }
518                         regs->edid_size = data;
519                         break;
520                 default:
521                         /* read-only regs */
522                         gvt_vgpu_err("write read-only EDID region at offset %d\n",
523                                 offset);
524                         return -EPERM;
525                 }
526         } else {
527                 memcpy(buf, (char *)regs + offset, count);
528         }
529
530         return count;
531 }
532
533 static int handle_edid_blob(struct vfio_edid_region *region, char *buf,
534                         size_t count, u16 offset, bool is_write)
535 {
536         if (offset + count > region->vfio_edid_regs.edid_size)
537                 return -EINVAL;
538
539         if (is_write)
540                 memcpy(region->edid_blob + offset, buf, count);
541         else
542                 memcpy(buf, region->edid_blob + offset, count);
543
544         return count;
545 }
546
547 static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf,
548                 size_t count, loff_t *ppos, bool iswrite)
549 {
550         int ret;
551         unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
552                         VFIO_PCI_NUM_REGIONS;
553         struct vfio_edid_region *region =
554                 (struct vfio_edid_region *)kvmgt_vdev(vgpu)->region[i].data;
555         loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
556
557         if (pos < region->vfio_edid_regs.edid_offset) {
558                 ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite);
559         } else {
560                 pos -= EDID_BLOB_OFFSET;
561                 ret = handle_edid_blob(region, buf, count, pos, iswrite);
562         }
563
564         if (ret < 0)
565                 gvt_vgpu_err("failed to access EDID region\n");
566
567         return ret;
568 }
569
570 static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu,
571                                         struct vfio_region *region)
572 {
573         kfree(region->data);
574 }
575
576 static const struct intel_vgpu_regops intel_vgpu_regops_edid = {
577         .rw = intel_vgpu_reg_rw_edid,
578         .release = intel_vgpu_reg_release_edid,
579 };
580
581 static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
582                 unsigned int type, unsigned int subtype,
583                 const struct intel_vgpu_regops *ops,
584                 size_t size, u32 flags, void *data)
585 {
586         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
587         struct vfio_region *region;
588
589         region = krealloc(vdev->region,
590                         (vdev->num_regions + 1) * sizeof(*region),
591                         GFP_KERNEL);
592         if (!region)
593                 return -ENOMEM;
594
595         vdev->region = region;
596         vdev->region[vdev->num_regions].type = type;
597         vdev->region[vdev->num_regions].subtype = subtype;
598         vdev->region[vdev->num_regions].ops = ops;
599         vdev->region[vdev->num_regions].size = size;
600         vdev->region[vdev->num_regions].flags = flags;
601         vdev->region[vdev->num_regions].data = data;
602         vdev->num_regions++;
603         return 0;
604 }
605
606 static int kvmgt_get_vfio_device(void *p_vgpu)
607 {
608         struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
609         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
610
611         vdev->vfio_device = vfio_device_get_from_dev(
612                 mdev_dev(vdev->mdev));
613         if (!vdev->vfio_device) {
614                 gvt_vgpu_err("failed to get vfio device\n");
615                 return -ENODEV;
616         }
617         return 0;
618 }
619
620
621 static int kvmgt_set_opregion(void *p_vgpu)
622 {
623         struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
624         void *base;
625         int ret;
626
627         /* Each vgpu has its own opregion, although VFIO would create another
628          * one later. This one is used to expose opregion to VFIO. And the
629          * other one created by VFIO later, is used by guest actually.
630          */
631         base = vgpu_opregion(vgpu)->va;
632         if (!base)
633                 return -ENOMEM;
634
635         if (memcmp(base, OPREGION_SIGNATURE, 16)) {
636                 memunmap(base);
637                 return -EINVAL;
638         }
639
640         ret = intel_vgpu_register_reg(vgpu,
641                         PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
642                         VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
643                         &intel_vgpu_regops_opregion, OPREGION_SIZE,
644                         VFIO_REGION_INFO_FLAG_READ, base);
645
646         return ret;
647 }
648
649 static int kvmgt_set_edid(void *p_vgpu, int port_num)
650 {
651         struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
652         struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
653         struct vfio_edid_region *base;
654         int ret;
655
656         base = kzalloc(sizeof(*base), GFP_KERNEL);
657         if (!base)
658                 return -ENOMEM;
659
660         /* TODO: Add multi-port and EDID extension block support */
661         base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET;
662         base->vfio_edid_regs.edid_max_size = EDID_SIZE;
663         base->vfio_edid_regs.edid_size = EDID_SIZE;
664         base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id);
665         base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id);
666         base->edid_blob = port->edid->edid_block;
667
668         ret = intel_vgpu_register_reg(vgpu,
669                         VFIO_REGION_TYPE_GFX,
670                         VFIO_REGION_SUBTYPE_GFX_EDID,
671                         &intel_vgpu_regops_edid, EDID_SIZE,
672                         VFIO_REGION_INFO_FLAG_READ |
673                         VFIO_REGION_INFO_FLAG_WRITE |
674                         VFIO_REGION_INFO_FLAG_CAPS, base);
675
676         return ret;
677 }
678
679 static void kvmgt_put_vfio_device(void *vgpu)
680 {
681         struct kvmgt_vdev *vdev = kvmgt_vdev((struct intel_vgpu *)vgpu);
682
683         if (WARN_ON(!vdev->vfio_device))
684                 return;
685
686         vfio_device_put(vdev->vfio_device);
687 }
688
689 static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
690 {
691         struct intel_vgpu *vgpu = NULL;
692         struct intel_vgpu_type *type;
693         struct device *pdev;
694         void *gvt;
695         int ret;
696
697         pdev = mdev_parent_dev(mdev);
698         gvt = kdev_to_i915(pdev)->gvt;
699
700         type = intel_gvt_ops->gvt_find_vgpu_type(gvt, kobject_name(kobj));
701         if (!type) {
702                 gvt_vgpu_err("failed to find type %s to create\n",
703                                                 kobject_name(kobj));
704                 ret = -EINVAL;
705                 goto out;
706         }
707
708         vgpu = intel_gvt_ops->vgpu_create(gvt, type);
709         if (IS_ERR_OR_NULL(vgpu)) {
710                 ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
711                 gvt_err("failed to create intel vgpu: %d\n", ret);
712                 goto out;
713         }
714
715         INIT_WORK(&kvmgt_vdev(vgpu)->release_work, intel_vgpu_release_work);
716
717         kvmgt_vdev(vgpu)->mdev = mdev;
718         mdev_set_drvdata(mdev, vgpu);
719
720         gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
721                      dev_name(mdev_dev(mdev)));
722         ret = 0;
723
724 out:
725         return ret;
726 }
727
728 static int intel_vgpu_remove(struct mdev_device *mdev)
729 {
730         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
731
732         if (handle_valid(vgpu->handle))
733                 return -EBUSY;
734
735         intel_gvt_ops->vgpu_destroy(vgpu);
736         return 0;
737 }
738
739 static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
740                                      unsigned long action, void *data)
741 {
742         struct kvmgt_vdev *vdev = container_of(nb,
743                                                struct kvmgt_vdev,
744                                                iommu_notifier);
745         struct intel_vgpu *vgpu = vdev->vgpu;
746
747         if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
748                 struct vfio_iommu_type1_dma_unmap *unmap = data;
749                 struct gvt_dma *entry;
750                 unsigned long iov_pfn, end_iov_pfn;
751
752                 iov_pfn = unmap->iova >> PAGE_SHIFT;
753                 end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
754
755                 mutex_lock(&vdev->cache_lock);
756                 for (; iov_pfn < end_iov_pfn; iov_pfn++) {
757                         entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
758                         if (!entry)
759                                 continue;
760
761                         gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
762                                            entry->size);
763                         __gvt_cache_remove_entry(vgpu, entry);
764                 }
765                 mutex_unlock(&vdev->cache_lock);
766         }
767
768         return NOTIFY_OK;
769 }
770
771 static int intel_vgpu_group_notifier(struct notifier_block *nb,
772                                      unsigned long action, void *data)
773 {
774         struct kvmgt_vdev *vdev = container_of(nb,
775                                                struct kvmgt_vdev,
776                                                group_notifier);
777
778         /* the only action we care about */
779         if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
780                 vdev->kvm = data;
781
782                 if (!data)
783                         schedule_work(&vdev->release_work);
784         }
785
786         return NOTIFY_OK;
787 }
788
789 static int intel_vgpu_open(struct mdev_device *mdev)
790 {
791         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
792         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
793         unsigned long events;
794         int ret;
795
796         vdev->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
797         vdev->group_notifier.notifier_call = intel_vgpu_group_notifier;
798
799         events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
800         ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
801                                 &vdev->iommu_notifier);
802         if (ret != 0) {
803                 gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
804                         ret);
805                 goto out;
806         }
807
808         events = VFIO_GROUP_NOTIFY_SET_KVM;
809         ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
810                                 &vdev->group_notifier);
811         if (ret != 0) {
812                 gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
813                         ret);
814                 goto undo_iommu;
815         }
816
817         /* Take a module reference as mdev core doesn't take
818          * a reference for vendor driver.
819          */
820         if (!try_module_get(THIS_MODULE))
821                 goto undo_group;
822
823         ret = kvmgt_guest_init(mdev);
824         if (ret)
825                 goto undo_group;
826
827         intel_gvt_ops->vgpu_activate(vgpu);
828
829         atomic_set(&vdev->released, 0);
830         return ret;
831
832 undo_group:
833         vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
834                                         &vdev->group_notifier);
835
836 undo_iommu:
837         vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
838                                         &vdev->iommu_notifier);
839 out:
840         return ret;
841 }
842
843 static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
844 {
845         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
846         struct eventfd_ctx *trigger;
847
848         trigger = vdev->msi_trigger;
849         if (trigger) {
850                 eventfd_ctx_put(trigger);
851                 vdev->msi_trigger = NULL;
852         }
853 }
854
855 static void __intel_vgpu_release(struct intel_vgpu *vgpu)
856 {
857         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
858         struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
859         struct kvmgt_guest_info *info;
860         int ret;
861
862         if (!handle_valid(vgpu->handle))
863                 return;
864
865         if (atomic_cmpxchg(&vdev->released, 0, 1))
866                 return;
867
868         intel_gvt_ops->vgpu_release(vgpu);
869
870         ret = vfio_unregister_notifier(mdev_dev(vdev->mdev), VFIO_IOMMU_NOTIFY,
871                                         &vdev->iommu_notifier);
872         drm_WARN(&i915->drm, ret,
873                  "vfio_unregister_notifier for iommu failed: %d\n", ret);
874
875         ret = vfio_unregister_notifier(mdev_dev(vdev->mdev), VFIO_GROUP_NOTIFY,
876                                         &vdev->group_notifier);
877         drm_WARN(&i915->drm, ret,
878                  "vfio_unregister_notifier for group failed: %d\n", ret);
879
880         /* dereference module reference taken at open */
881         module_put(THIS_MODULE);
882
883         info = (struct kvmgt_guest_info *)vgpu->handle;
884         kvmgt_guest_exit(info);
885
886         intel_vgpu_release_msi_eventfd_ctx(vgpu);
887
888         vdev->kvm = NULL;
889         vgpu->handle = 0;
890 }
891
892 static void intel_vgpu_release(struct mdev_device *mdev)
893 {
894         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
895
896         __intel_vgpu_release(vgpu);
897 }
898
899 static void intel_vgpu_release_work(struct work_struct *work)
900 {
901         struct kvmgt_vdev *vdev = container_of(work, struct kvmgt_vdev,
902                                                release_work);
903
904         __intel_vgpu_release(vdev->vgpu);
905 }
906
907 static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
908 {
909         u32 start_lo, start_hi;
910         u32 mem_type;
911
912         start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
913                         PCI_BASE_ADDRESS_MEM_MASK;
914         mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
915                         PCI_BASE_ADDRESS_MEM_TYPE_MASK;
916
917         switch (mem_type) {
918         case PCI_BASE_ADDRESS_MEM_TYPE_64:
919                 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
920                                                 + bar + 4));
921                 break;
922         case PCI_BASE_ADDRESS_MEM_TYPE_32:
923         case PCI_BASE_ADDRESS_MEM_TYPE_1M:
924                 /* 1M mem BAR treated as 32-bit BAR */
925         default:
926                 /* mem unknown type treated as 32-bit BAR */
927                 start_hi = 0;
928                 break;
929         }
930
931         return ((u64)start_hi << 32) | start_lo;
932 }
933
934 static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off,
935                              void *buf, unsigned int count, bool is_write)
936 {
937         u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar);
938         int ret;
939
940         if (is_write)
941                 ret = intel_gvt_ops->emulate_mmio_write(vgpu,
942                                         bar_start + off, buf, count);
943         else
944                 ret = intel_gvt_ops->emulate_mmio_read(vgpu,
945                                         bar_start + off, buf, count);
946         return ret;
947 }
948
949 static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off)
950 {
951         return off >= vgpu_aperture_offset(vgpu) &&
952                off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu);
953 }
954
955 static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off,
956                 void *buf, unsigned long count, bool is_write)
957 {
958         void __iomem *aperture_va;
959
960         if (!intel_vgpu_in_aperture(vgpu, off) ||
961             !intel_vgpu_in_aperture(vgpu, off + count)) {
962                 gvt_vgpu_err("Invalid aperture offset %llu\n", off);
963                 return -EINVAL;
964         }
965
966         aperture_va = io_mapping_map_wc(&vgpu->gvt->gt->ggtt->iomap,
967                                         ALIGN_DOWN(off, PAGE_SIZE),
968                                         count + offset_in_page(off));
969         if (!aperture_va)
970                 return -EIO;
971
972         if (is_write)
973                 memcpy_toio(aperture_va + offset_in_page(off), buf, count);
974         else
975                 memcpy_fromio(buf, aperture_va + offset_in_page(off), count);
976
977         io_mapping_unmap(aperture_va);
978
979         return 0;
980 }
981
982 static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
983                         size_t count, loff_t *ppos, bool is_write)
984 {
985         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
986         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
987         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
988         u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
989         int ret = -EINVAL;
990
991
992         if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) {
993                 gvt_vgpu_err("invalid index: %u\n", index);
994                 return -EINVAL;
995         }
996
997         switch (index) {
998         case VFIO_PCI_CONFIG_REGION_INDEX:
999                 if (is_write)
1000                         ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
1001                                                 buf, count);
1002                 else
1003                         ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
1004                                                 buf, count);
1005                 break;
1006         case VFIO_PCI_BAR0_REGION_INDEX:
1007                 ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos,
1008                                         buf, count, is_write);
1009                 break;
1010         case VFIO_PCI_BAR2_REGION_INDEX:
1011                 ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write);
1012                 break;
1013         case VFIO_PCI_BAR1_REGION_INDEX:
1014         case VFIO_PCI_BAR3_REGION_INDEX:
1015         case VFIO_PCI_BAR4_REGION_INDEX:
1016         case VFIO_PCI_BAR5_REGION_INDEX:
1017         case VFIO_PCI_VGA_REGION_INDEX:
1018         case VFIO_PCI_ROM_REGION_INDEX:
1019                 break;
1020         default:
1021                 if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
1022                         return -EINVAL;
1023
1024                 index -= VFIO_PCI_NUM_REGIONS;
1025                 return vdev->region[index].ops->rw(vgpu, buf, count,
1026                                 ppos, is_write);
1027         }
1028
1029         return ret == 0 ? count : ret;
1030 }
1031
1032 static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos)
1033 {
1034         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1035         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
1036         struct intel_gvt *gvt = vgpu->gvt;
1037         int offset;
1038
1039         /* Only allow MMIO GGTT entry access */
1040         if (index != PCI_BASE_ADDRESS_0)
1041                 return false;
1042
1043         offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
1044                 intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
1045
1046         return (offset >= gvt->device_info.gtt_start_offset &&
1047                 offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
1048                         true : false;
1049 }
1050
1051 static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
1052                         size_t count, loff_t *ppos)
1053 {
1054         unsigned int done = 0;
1055         int ret;
1056
1057         while (count) {
1058                 size_t filled;
1059
1060                 /* Only support GGTT entry 8 bytes read */
1061                 if (count >= 8 && !(*ppos % 8) &&
1062                         gtt_entry(mdev, ppos)) {
1063                         u64 val;
1064
1065                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1066                                         ppos, false);
1067                         if (ret <= 0)
1068                                 goto read_err;
1069
1070                         if (copy_to_user(buf, &val, sizeof(val)))
1071                                 goto read_err;
1072
1073                         filled = 8;
1074                 } else if (count >= 4 && !(*ppos % 4)) {
1075                         u32 val;
1076
1077                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1078                                         ppos, false);
1079                         if (ret <= 0)
1080                                 goto read_err;
1081
1082                         if (copy_to_user(buf, &val, sizeof(val)))
1083                                 goto read_err;
1084
1085                         filled = 4;
1086                 } else if (count >= 2 && !(*ppos % 2)) {
1087                         u16 val;
1088
1089                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1090                                         ppos, false);
1091                         if (ret <= 0)
1092                                 goto read_err;
1093
1094                         if (copy_to_user(buf, &val, sizeof(val)))
1095                                 goto read_err;
1096
1097                         filled = 2;
1098                 } else {
1099                         u8 val;
1100
1101                         ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
1102                                         false);
1103                         if (ret <= 0)
1104                                 goto read_err;
1105
1106                         if (copy_to_user(buf, &val, sizeof(val)))
1107                                 goto read_err;
1108
1109                         filled = 1;
1110                 }
1111
1112                 count -= filled;
1113                 done += filled;
1114                 *ppos += filled;
1115                 buf += filled;
1116         }
1117
1118         return done;
1119
1120 read_err:
1121         return -EFAULT;
1122 }
1123
1124 static ssize_t intel_vgpu_write(struct mdev_device *mdev,
1125                                 const char __user *buf,
1126                                 size_t count, loff_t *ppos)
1127 {
1128         unsigned int done = 0;
1129         int ret;
1130
1131         while (count) {
1132                 size_t filled;
1133
1134                 /* Only support GGTT entry 8 bytes write */
1135                 if (count >= 8 && !(*ppos % 8) &&
1136                         gtt_entry(mdev, ppos)) {
1137                         u64 val;
1138
1139                         if (copy_from_user(&val, buf, sizeof(val)))
1140                                 goto write_err;
1141
1142                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1143                                         ppos, true);
1144                         if (ret <= 0)
1145                                 goto write_err;
1146
1147                         filled = 8;
1148                 } else if (count >= 4 && !(*ppos % 4)) {
1149                         u32 val;
1150
1151                         if (copy_from_user(&val, buf, sizeof(val)))
1152                                 goto write_err;
1153
1154                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1155                                         ppos, true);
1156                         if (ret <= 0)
1157                                 goto write_err;
1158
1159                         filled = 4;
1160                 } else if (count >= 2 && !(*ppos % 2)) {
1161                         u16 val;
1162
1163                         if (copy_from_user(&val, buf, sizeof(val)))
1164                                 goto write_err;
1165
1166                         ret = intel_vgpu_rw(mdev, (char *)&val,
1167                                         sizeof(val), ppos, true);
1168                         if (ret <= 0)
1169                                 goto write_err;
1170
1171                         filled = 2;
1172                 } else {
1173                         u8 val;
1174
1175                         if (copy_from_user(&val, buf, sizeof(val)))
1176                                 goto write_err;
1177
1178                         ret = intel_vgpu_rw(mdev, &val, sizeof(val),
1179                                         ppos, true);
1180                         if (ret <= 0)
1181                                 goto write_err;
1182
1183                         filled = 1;
1184                 }
1185
1186                 count -= filled;
1187                 done += filled;
1188                 *ppos += filled;
1189                 buf += filled;
1190         }
1191
1192         return done;
1193 write_err:
1194         return -EFAULT;
1195 }
1196
1197 static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
1198 {
1199         unsigned int index;
1200         u64 virtaddr;
1201         unsigned long req_size, pgoff, req_start;
1202         pgprot_t pg_prot;
1203         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1204
1205         index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
1206         if (index >= VFIO_PCI_ROM_REGION_INDEX)
1207                 return -EINVAL;
1208
1209         if (vma->vm_end < vma->vm_start)
1210                 return -EINVAL;
1211         if ((vma->vm_flags & VM_SHARED) == 0)
1212                 return -EINVAL;
1213         if (index != VFIO_PCI_BAR2_REGION_INDEX)
1214                 return -EINVAL;
1215
1216         pg_prot = vma->vm_page_prot;
1217         virtaddr = vma->vm_start;
1218         req_size = vma->vm_end - vma->vm_start;
1219         pgoff = vma->vm_pgoff &
1220                 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
1221         req_start = pgoff << PAGE_SHIFT;
1222
1223         if (!intel_vgpu_in_aperture(vgpu, req_start))
1224                 return -EINVAL;
1225         if (req_start + req_size >
1226             vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
1227                 return -EINVAL;
1228
1229         pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;
1230
1231         return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
1232 }
1233
1234 static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
1235 {
1236         if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
1237                 return 1;
1238
1239         return 0;
1240 }
1241
1242 static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
1243                         unsigned int index, unsigned int start,
1244                         unsigned int count, u32 flags,
1245                         void *data)
1246 {
1247         return 0;
1248 }
1249
1250 static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
1251                         unsigned int index, unsigned int start,
1252                         unsigned int count, u32 flags, void *data)
1253 {
1254         return 0;
1255 }
1256
1257 static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
1258                 unsigned int index, unsigned int start, unsigned int count,
1259                 u32 flags, void *data)
1260 {
1261         return 0;
1262 }
1263
1264 static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
1265                 unsigned int index, unsigned int start, unsigned int count,
1266                 u32 flags, void *data)
1267 {
1268         struct eventfd_ctx *trigger;
1269
1270         if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
1271                 int fd = *(int *)data;
1272
1273                 trigger = eventfd_ctx_fdget(fd);
1274                 if (IS_ERR(trigger)) {
1275                         gvt_vgpu_err("eventfd_ctx_fdget failed\n");
1276                         return PTR_ERR(trigger);
1277                 }
1278                 kvmgt_vdev(vgpu)->msi_trigger = trigger;
1279         } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count)
1280                 intel_vgpu_release_msi_eventfd_ctx(vgpu);
1281
1282         return 0;
1283 }
1284
1285 static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags,
1286                 unsigned int index, unsigned int start, unsigned int count,
1287                 void *data)
1288 {
1289         int (*func)(struct intel_vgpu *vgpu, unsigned int index,
1290                         unsigned int start, unsigned int count, u32 flags,
1291                         void *data) = NULL;
1292
1293         switch (index) {
1294         case VFIO_PCI_INTX_IRQ_INDEX:
1295                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1296                 case VFIO_IRQ_SET_ACTION_MASK:
1297                         func = intel_vgpu_set_intx_mask;
1298                         break;
1299                 case VFIO_IRQ_SET_ACTION_UNMASK:
1300                         func = intel_vgpu_set_intx_unmask;
1301                         break;
1302                 case VFIO_IRQ_SET_ACTION_TRIGGER:
1303                         func = intel_vgpu_set_intx_trigger;
1304                         break;
1305                 }
1306                 break;
1307         case VFIO_PCI_MSI_IRQ_INDEX:
1308                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1309                 case VFIO_IRQ_SET_ACTION_MASK:
1310                 case VFIO_IRQ_SET_ACTION_UNMASK:
1311                         /* XXX Need masking support exported */
1312                         break;
1313                 case VFIO_IRQ_SET_ACTION_TRIGGER:
1314                         func = intel_vgpu_set_msi_trigger;
1315                         break;
1316                 }
1317                 break;
1318         }
1319
1320         if (!func)
1321                 return -ENOTTY;
1322
1323         return func(vgpu, index, start, count, flags, data);
1324 }
1325
1326 static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
1327                              unsigned long arg)
1328 {
1329         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1330         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
1331         unsigned long minsz;
1332
1333         gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
1334
1335         if (cmd == VFIO_DEVICE_GET_INFO) {
1336                 struct vfio_device_info info;
1337
1338                 minsz = offsetofend(struct vfio_device_info, num_irqs);
1339
1340                 if (copy_from_user(&info, (void __user *)arg, minsz))
1341                         return -EFAULT;
1342
1343                 if (info.argsz < minsz)
1344                         return -EINVAL;
1345
1346                 info.flags = VFIO_DEVICE_FLAGS_PCI;
1347                 info.flags |= VFIO_DEVICE_FLAGS_RESET;
1348                 info.num_regions = VFIO_PCI_NUM_REGIONS +
1349                                 vdev->num_regions;
1350                 info.num_irqs = VFIO_PCI_NUM_IRQS;
1351
1352                 return copy_to_user((void __user *)arg, &info, minsz) ?
1353                         -EFAULT : 0;
1354
1355         } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
1356                 struct vfio_region_info info;
1357                 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
1358                 unsigned int i;
1359                 int ret;
1360                 struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
1361                 int nr_areas = 1;
1362                 int cap_type_id;
1363
1364                 minsz = offsetofend(struct vfio_region_info, offset);
1365
1366                 if (copy_from_user(&info, (void __user *)arg, minsz))
1367                         return -EFAULT;
1368
1369                 if (info.argsz < minsz)
1370                         return -EINVAL;
1371
1372                 switch (info.index) {
1373                 case VFIO_PCI_CONFIG_REGION_INDEX:
1374                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1375                         info.size = vgpu->gvt->device_info.cfg_space_size;
1376                         info.flags = VFIO_REGION_INFO_FLAG_READ |
1377                                      VFIO_REGION_INFO_FLAG_WRITE;
1378                         break;
1379                 case VFIO_PCI_BAR0_REGION_INDEX:
1380                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1381                         info.size = vgpu->cfg_space.bar[info.index].size;
1382                         if (!info.size) {
1383                                 info.flags = 0;
1384                                 break;
1385                         }
1386
1387                         info.flags = VFIO_REGION_INFO_FLAG_READ |
1388                                      VFIO_REGION_INFO_FLAG_WRITE;
1389                         break;
1390                 case VFIO_PCI_BAR1_REGION_INDEX:
1391                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1392                         info.size = 0;
1393                         info.flags = 0;
1394                         break;
1395                 case VFIO_PCI_BAR2_REGION_INDEX:
1396                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1397                         info.flags = VFIO_REGION_INFO_FLAG_CAPS |
1398                                         VFIO_REGION_INFO_FLAG_MMAP |
1399                                         VFIO_REGION_INFO_FLAG_READ |
1400                                         VFIO_REGION_INFO_FLAG_WRITE;
1401                         info.size = gvt_aperture_sz(vgpu->gvt);
1402
1403                         sparse = kzalloc(struct_size(sparse, areas, nr_areas),
1404                                          GFP_KERNEL);
1405                         if (!sparse)
1406                                 return -ENOMEM;
1407
1408                         sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1409                         sparse->header.version = 1;
1410                         sparse->nr_areas = nr_areas;
1411                         cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1412                         sparse->areas[0].offset =
1413                                         PAGE_ALIGN(vgpu_aperture_offset(vgpu));
1414                         sparse->areas[0].size = vgpu_aperture_sz(vgpu);
1415                         break;
1416
1417                 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
1418                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1419                         info.size = 0;
1420                         info.flags = 0;
1421
1422                         gvt_dbg_core("get region info bar:%d\n", info.index);
1423                         break;
1424
1425                 case VFIO_PCI_ROM_REGION_INDEX:
1426                 case VFIO_PCI_VGA_REGION_INDEX:
1427                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1428                         info.size = 0;
1429                         info.flags = 0;
1430
1431                         gvt_dbg_core("get region info index:%d\n", info.index);
1432                         break;
1433                 default:
1434                         {
1435                                 struct vfio_region_info_cap_type cap_type = {
1436                                         .header.id = VFIO_REGION_INFO_CAP_TYPE,
1437                                         .header.version = 1 };
1438
1439                                 if (info.index >= VFIO_PCI_NUM_REGIONS +
1440                                                 vdev->num_regions)
1441                                         return -EINVAL;
1442                                 info.index =
1443                                         array_index_nospec(info.index,
1444                                                         VFIO_PCI_NUM_REGIONS +
1445                                                         vdev->num_regions);
1446
1447                                 i = info.index - VFIO_PCI_NUM_REGIONS;
1448
1449                                 info.offset =
1450                                         VFIO_PCI_INDEX_TO_OFFSET(info.index);
1451                                 info.size = vdev->region[i].size;
1452                                 info.flags = vdev->region[i].flags;
1453
1454                                 cap_type.type = vdev->region[i].type;
1455                                 cap_type.subtype = vdev->region[i].subtype;
1456
1457                                 ret = vfio_info_add_capability(&caps,
1458                                                         &cap_type.header,
1459                                                         sizeof(cap_type));
1460                                 if (ret)
1461                                         return ret;
1462                         }
1463                 }
1464
1465                 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1466                         switch (cap_type_id) {
1467                         case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1468                                 ret = vfio_info_add_capability(&caps,
1469                                         &sparse->header,
1470                                         struct_size(sparse, areas,
1471                                                     sparse->nr_areas));
1472                                 if (ret) {
1473                                         kfree(sparse);
1474                                         return ret;
1475                                 }
1476                                 break;
1477                         default:
1478                                 kfree(sparse);
1479                                 return -EINVAL;
1480                         }
1481                 }
1482
1483                 if (caps.size) {
1484                         info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
1485                         if (info.argsz < sizeof(info) + caps.size) {
1486                                 info.argsz = sizeof(info) + caps.size;
1487                                 info.cap_offset = 0;
1488                         } else {
1489                                 vfio_info_cap_shift(&caps, sizeof(info));
1490                                 if (copy_to_user((void __user *)arg +
1491                                                   sizeof(info), caps.buf,
1492                                                   caps.size)) {
1493                                         kfree(caps.buf);
1494                                         kfree(sparse);
1495                                         return -EFAULT;
1496                                 }
1497                                 info.cap_offset = sizeof(info);
1498                         }
1499
1500                         kfree(caps.buf);
1501                 }
1502
1503                 kfree(sparse);
1504                 return copy_to_user((void __user *)arg, &info, minsz) ?
1505                         -EFAULT : 0;
1506         } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1507                 struct vfio_irq_info info;
1508
1509                 minsz = offsetofend(struct vfio_irq_info, count);
1510
1511                 if (copy_from_user(&info, (void __user *)arg, minsz))
1512                         return -EFAULT;
1513
1514                 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1515                         return -EINVAL;
1516
1517                 switch (info.index) {
1518                 case VFIO_PCI_INTX_IRQ_INDEX:
1519                 case VFIO_PCI_MSI_IRQ_INDEX:
1520                         break;
1521                 default:
1522                         return -EINVAL;
1523                 }
1524
1525                 info.flags = VFIO_IRQ_INFO_EVENTFD;
1526
1527                 info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1528
1529                 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1530                         info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1531                                        VFIO_IRQ_INFO_AUTOMASKED);
1532                 else
1533                         info.flags |= VFIO_IRQ_INFO_NORESIZE;
1534
1535                 return copy_to_user((void __user *)arg, &info, minsz) ?
1536                         -EFAULT : 0;
1537         } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1538                 struct vfio_irq_set hdr;
1539                 u8 *data = NULL;
1540                 int ret = 0;
1541                 size_t data_size = 0;
1542
1543                 minsz = offsetofend(struct vfio_irq_set, count);
1544
1545                 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1546                         return -EFAULT;
1547
1548                 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1549                         int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1550
1551                         ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1552                                                 VFIO_PCI_NUM_IRQS, &data_size);
1553                         if (ret) {
1554                                 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1555                                 return -EINVAL;
1556                         }
1557                         if (data_size) {
1558                                 data = memdup_user((void __user *)(arg + minsz),
1559                                                    data_size);
1560                                 if (IS_ERR(data))
1561                                         return PTR_ERR(data);
1562                         }
1563                 }
1564
1565                 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1566                                         hdr.start, hdr.count, data);
1567                 kfree(data);
1568
1569                 return ret;
1570         } else if (cmd == VFIO_DEVICE_RESET) {
1571                 intel_gvt_ops->vgpu_reset(vgpu);
1572                 return 0;
1573         } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) {
1574                 struct vfio_device_gfx_plane_info dmabuf;
1575                 int ret = 0;
1576
1577                 minsz = offsetofend(struct vfio_device_gfx_plane_info,
1578                                     dmabuf_id);
1579                 if (copy_from_user(&dmabuf, (void __user *)arg, minsz))
1580                         return -EFAULT;
1581                 if (dmabuf.argsz < minsz)
1582                         return -EINVAL;
1583
1584                 ret = intel_gvt_ops->vgpu_query_plane(vgpu, &dmabuf);
1585                 if (ret != 0)
1586                         return ret;
1587
1588                 return copy_to_user((void __user *)arg, &dmabuf, minsz) ?
1589                                                                 -EFAULT : 0;
1590         } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) {
1591                 __u32 dmabuf_id;
1592                 __s32 dmabuf_fd;
1593
1594                 if (get_user(dmabuf_id, (__u32 __user *)arg))
1595                         return -EFAULT;
1596
1597                 dmabuf_fd = intel_gvt_ops->vgpu_get_dmabuf(vgpu, dmabuf_id);
1598                 return dmabuf_fd;
1599
1600         }
1601
1602         return -ENOTTY;
1603 }
1604
1605 static ssize_t
1606 vgpu_id_show(struct device *dev, struct device_attribute *attr,
1607              char *buf)
1608 {
1609         struct mdev_device *mdev = mdev_from_dev(dev);
1610
1611         if (mdev) {
1612                 struct intel_vgpu *vgpu = (struct intel_vgpu *)
1613                         mdev_get_drvdata(mdev);
1614                 return sprintf(buf, "%d\n", vgpu->id);
1615         }
1616         return sprintf(buf, "\n");
1617 }
1618
1619 static DEVICE_ATTR_RO(vgpu_id);
1620
1621 static struct attribute *intel_vgpu_attrs[] = {
1622         &dev_attr_vgpu_id.attr,
1623         NULL
1624 };
1625
1626 static const struct attribute_group intel_vgpu_group = {
1627         .name = "intel_vgpu",
1628         .attrs = intel_vgpu_attrs,
1629 };
1630
1631 static const struct attribute_group *intel_vgpu_groups[] = {
1632         &intel_vgpu_group,
1633         NULL,
1634 };
1635
1636 static struct mdev_parent_ops intel_vgpu_ops = {
1637         .mdev_attr_groups       = intel_vgpu_groups,
1638         .create                 = intel_vgpu_create,
1639         .remove                 = intel_vgpu_remove,
1640
1641         .open                   = intel_vgpu_open,
1642         .release                = intel_vgpu_release,
1643
1644         .read                   = intel_vgpu_read,
1645         .write                  = intel_vgpu_write,
1646         .mmap                   = intel_vgpu_mmap,
1647         .ioctl                  = intel_vgpu_ioctl,
1648 };
1649
1650 static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
1651 {
1652         struct attribute_group **kvm_vgpu_type_groups;
1653
1654         intel_gvt_ops = ops;
1655         if (!intel_gvt_ops->get_gvt_attrs(&kvm_vgpu_type_groups))
1656                 return -EFAULT;
1657         intel_vgpu_ops.supported_type_groups = kvm_vgpu_type_groups;
1658
1659         return mdev_register_device(dev, &intel_vgpu_ops);
1660 }
1661
1662 static void kvmgt_host_exit(struct device *dev)
1663 {
1664         mdev_unregister_device(dev);
1665 }
1666
1667 static int kvmgt_page_track_add(unsigned long handle, u64 gfn)
1668 {
1669         struct kvmgt_guest_info *info;
1670         struct kvm *kvm;
1671         struct kvm_memory_slot *slot;
1672         int idx;
1673
1674         if (!handle_valid(handle))
1675                 return -ESRCH;
1676
1677         info = (struct kvmgt_guest_info *)handle;
1678         kvm = info->kvm;
1679
1680         idx = srcu_read_lock(&kvm->srcu);
1681         slot = gfn_to_memslot(kvm, gfn);
1682         if (!slot) {
1683                 srcu_read_unlock(&kvm->srcu, idx);
1684                 return -EINVAL;
1685         }
1686
1687         spin_lock(&kvm->mmu_lock);
1688
1689         if (kvmgt_gfn_is_write_protected(info, gfn))
1690                 goto out;
1691
1692         kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1693         kvmgt_protect_table_add(info, gfn);
1694
1695 out:
1696         spin_unlock(&kvm->mmu_lock);
1697         srcu_read_unlock(&kvm->srcu, idx);
1698         return 0;
1699 }
1700
1701 static int kvmgt_page_track_remove(unsigned long handle, u64 gfn)
1702 {
1703         struct kvmgt_guest_info *info;
1704         struct kvm *kvm;
1705         struct kvm_memory_slot *slot;
1706         int idx;
1707
1708         if (!handle_valid(handle))
1709                 return 0;
1710
1711         info = (struct kvmgt_guest_info *)handle;
1712         kvm = info->kvm;
1713
1714         idx = srcu_read_lock(&kvm->srcu);
1715         slot = gfn_to_memslot(kvm, gfn);
1716         if (!slot) {
1717                 srcu_read_unlock(&kvm->srcu, idx);
1718                 return -EINVAL;
1719         }
1720
1721         spin_lock(&kvm->mmu_lock);
1722
1723         if (!kvmgt_gfn_is_write_protected(info, gfn))
1724                 goto out;
1725
1726         kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1727         kvmgt_protect_table_del(info, gfn);
1728
1729 out:
1730         spin_unlock(&kvm->mmu_lock);
1731         srcu_read_unlock(&kvm->srcu, idx);
1732         return 0;
1733 }
1734
1735 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1736                 const u8 *val, int len,
1737                 struct kvm_page_track_notifier_node *node)
1738 {
1739         struct kvmgt_guest_info *info = container_of(node,
1740                                         struct kvmgt_guest_info, track_node);
1741
1742         if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1743                 intel_gvt_ops->write_protect_handler(info->vgpu, gpa,
1744                                                      (void *)val, len);
1745 }
1746
1747 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1748                 struct kvm_memory_slot *slot,
1749                 struct kvm_page_track_notifier_node *node)
1750 {
1751         int i;
1752         gfn_t gfn;
1753         struct kvmgt_guest_info *info = container_of(node,
1754                                         struct kvmgt_guest_info, track_node);
1755
1756         spin_lock(&kvm->mmu_lock);
1757         for (i = 0; i < slot->npages; i++) {
1758                 gfn = slot->base_gfn + i;
1759                 if (kvmgt_gfn_is_write_protected(info, gfn)) {
1760                         kvm_slot_page_track_remove_page(kvm, slot, gfn,
1761                                                 KVM_PAGE_TRACK_WRITE);
1762                         kvmgt_protect_table_del(info, gfn);
1763                 }
1764         }
1765         spin_unlock(&kvm->mmu_lock);
1766 }
1767
1768 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
1769 {
1770         struct intel_vgpu *itr;
1771         struct kvmgt_guest_info *info;
1772         int id;
1773         bool ret = false;
1774
1775         mutex_lock(&vgpu->gvt->lock);
1776         for_each_active_vgpu(vgpu->gvt, itr, id) {
1777                 if (!handle_valid(itr->handle))
1778                         continue;
1779
1780                 info = (struct kvmgt_guest_info *)itr->handle;
1781                 if (kvm && kvm == info->kvm) {
1782                         ret = true;
1783                         goto out;
1784                 }
1785         }
1786 out:
1787         mutex_unlock(&vgpu->gvt->lock);
1788         return ret;
1789 }
1790
1791 static int kvmgt_guest_init(struct mdev_device *mdev)
1792 {
1793         struct kvmgt_guest_info *info;
1794         struct intel_vgpu *vgpu;
1795         struct kvmgt_vdev *vdev;
1796         struct kvm *kvm;
1797
1798         vgpu = mdev_get_drvdata(mdev);
1799         if (handle_valid(vgpu->handle))
1800                 return -EEXIST;
1801
1802         vdev = kvmgt_vdev(vgpu);
1803         kvm = vdev->kvm;
1804         if (!kvm || kvm->mm != current->mm) {
1805                 gvt_vgpu_err("KVM is required to use Intel vGPU\n");
1806                 return -ESRCH;
1807         }
1808
1809         if (__kvmgt_vgpu_exist(vgpu, kvm))
1810                 return -EEXIST;
1811
1812         info = vzalloc(sizeof(struct kvmgt_guest_info));
1813         if (!info)
1814                 return -ENOMEM;
1815
1816         vgpu->handle = (unsigned long)info;
1817         info->vgpu = vgpu;
1818         info->kvm = kvm;
1819         kvm_get_kvm(info->kvm);
1820
1821         kvmgt_protect_table_init(info);
1822         gvt_cache_init(vgpu);
1823
1824         info->track_node.track_write = kvmgt_page_track_write;
1825         info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
1826         kvm_page_track_register_notifier(kvm, &info->track_node);
1827
1828         info->debugfs_cache_entries = debugfs_create_ulong(
1829                                                 "kvmgt_nr_cache_entries",
1830                                                 0444, vgpu->debugfs,
1831                                                 &vdev->nr_cache_entries);
1832         return 0;
1833 }
1834
1835 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
1836 {
1837         debugfs_remove(info->debugfs_cache_entries);
1838
1839         kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
1840         kvm_put_kvm(info->kvm);
1841         kvmgt_protect_table_destroy(info);
1842         gvt_cache_destroy(info->vgpu);
1843         vfree(info);
1844
1845         return true;
1846 }
1847
1848 static int kvmgt_attach_vgpu(void *p_vgpu, unsigned long *handle)
1849 {
1850         struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
1851
1852         vgpu->vdev = kzalloc(sizeof(struct kvmgt_vdev), GFP_KERNEL);
1853
1854         if (!vgpu->vdev)
1855                 return -ENOMEM;
1856
1857         kvmgt_vdev(vgpu)->vgpu = vgpu;
1858
1859         return 0;
1860 }
1861
1862 static void kvmgt_detach_vgpu(void *p_vgpu)
1863 {
1864         int i;
1865         struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
1866         struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
1867
1868         if (!vdev->region)
1869                 return;
1870
1871         for (i = 0; i < vdev->num_regions; i++)
1872                 if (vdev->region[i].ops->release)
1873                         vdev->region[i].ops->release(vgpu,
1874                                         &vdev->region[i]);
1875         vdev->num_regions = 0;
1876         kfree(vdev->region);
1877         vdev->region = NULL;
1878
1879         kfree(vdev);
1880 }
1881
1882 static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
1883 {
1884         struct kvmgt_guest_info *info;
1885         struct intel_vgpu *vgpu;
1886         struct kvmgt_vdev *vdev;
1887
1888         if (!handle_valid(handle))
1889                 return -ESRCH;
1890
1891         info = (struct kvmgt_guest_info *)handle;
1892         vgpu = info->vgpu;
1893         vdev = kvmgt_vdev(vgpu);
1894
1895         /*
1896          * When guest is poweroff, msi_trigger is set to NULL, but vgpu's
1897          * config and mmio register isn't restored to default during guest
1898          * poweroff. If this vgpu is still used in next vm, this vgpu's pipe
1899          * may be enabled, then once this vgpu is active, it will get inject
1900          * vblank interrupt request. But msi_trigger is null until msi is
1901          * enabled by guest. so if msi_trigger is null, success is still
1902          * returned and don't inject interrupt into guest.
1903          */
1904         if (vdev->msi_trigger == NULL)
1905                 return 0;
1906
1907         if (eventfd_signal(vdev->msi_trigger, 1) == 1)
1908                 return 0;
1909
1910         return -EFAULT;
1911 }
1912
1913 static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
1914 {
1915         struct kvmgt_guest_info *info;
1916         kvm_pfn_t pfn;
1917
1918         if (!handle_valid(handle))
1919                 return INTEL_GVT_INVALID_ADDR;
1920
1921         info = (struct kvmgt_guest_info *)handle;
1922
1923         pfn = gfn_to_pfn(info->kvm, gfn);
1924         if (is_error_noslot_pfn(pfn))
1925                 return INTEL_GVT_INVALID_ADDR;
1926
1927         return pfn;
1928 }
1929
1930 static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
1931                 unsigned long size, dma_addr_t *dma_addr)
1932 {
1933         struct intel_vgpu *vgpu;
1934         struct kvmgt_vdev *vdev;
1935         struct gvt_dma *entry;
1936         int ret;
1937
1938         if (!handle_valid(handle))
1939                 return -EINVAL;
1940
1941         vgpu = ((struct kvmgt_guest_info *)handle)->vgpu;
1942         vdev = kvmgt_vdev(vgpu);
1943
1944         mutex_lock(&vdev->cache_lock);
1945
1946         entry = __gvt_cache_find_gfn(vgpu, gfn);
1947         if (!entry) {
1948                 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
1949                 if (ret)
1950                         goto err_unlock;
1951
1952                 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
1953                 if (ret)
1954                         goto err_unmap;
1955         } else if (entry->size != size) {
1956                 /* the same gfn with different size: unmap and re-map */
1957                 gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size);
1958                 __gvt_cache_remove_entry(vgpu, entry);
1959
1960                 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
1961                 if (ret)
1962                         goto err_unlock;
1963
1964                 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
1965                 if (ret)
1966                         goto err_unmap;
1967         } else {
1968                 kref_get(&entry->ref);
1969                 *dma_addr = entry->dma_addr;
1970         }
1971
1972         mutex_unlock(&vdev->cache_lock);
1973         return 0;
1974
1975 err_unmap:
1976         gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size);
1977 err_unlock:
1978         mutex_unlock(&vdev->cache_lock);
1979         return ret;
1980 }
1981
1982 static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr)
1983 {
1984         struct kvmgt_guest_info *info;
1985         struct kvmgt_vdev *vdev;
1986         struct gvt_dma *entry;
1987         int ret = 0;
1988
1989         if (!handle_valid(handle))
1990                 return -ENODEV;
1991
1992         info = (struct kvmgt_guest_info *)handle;
1993         vdev = kvmgt_vdev(info->vgpu);
1994
1995         mutex_lock(&vdev->cache_lock);
1996         entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr);
1997         if (entry)
1998                 kref_get(&entry->ref);
1999         else
2000                 ret = -ENOMEM;
2001         mutex_unlock(&vdev->cache_lock);
2002
2003         return ret;
2004 }
2005
2006 static void __gvt_dma_release(struct kref *ref)
2007 {
2008         struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
2009
2010         gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr,
2011                            entry->size);
2012         __gvt_cache_remove_entry(entry->vgpu, entry);
2013 }
2014
2015 static void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr)
2016 {
2017         struct intel_vgpu *vgpu;
2018         struct kvmgt_vdev *vdev;
2019         struct gvt_dma *entry;
2020
2021         if (!handle_valid(handle))
2022                 return;
2023
2024         vgpu = ((struct kvmgt_guest_info *)handle)->vgpu;
2025         vdev = kvmgt_vdev(vgpu);
2026
2027         mutex_lock(&vdev->cache_lock);
2028         entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
2029         if (entry)
2030                 kref_put(&entry->ref, __gvt_dma_release);
2031         mutex_unlock(&vdev->cache_lock);
2032 }
2033
2034 static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
2035                         void *buf, unsigned long len, bool write)
2036 {
2037         struct kvmgt_guest_info *info;
2038         struct kvm *kvm;
2039         int idx, ret;
2040         bool kthread = current->mm == NULL;
2041
2042         if (!handle_valid(handle))
2043                 return -ESRCH;
2044
2045         info = (struct kvmgt_guest_info *)handle;
2046         kvm = info->kvm;
2047
2048         if (kthread) {
2049                 if (!mmget_not_zero(kvm->mm))
2050                         return -EFAULT;
2051                 use_mm(kvm->mm);
2052         }
2053
2054         idx = srcu_read_lock(&kvm->srcu);
2055         ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
2056                       kvm_read_guest(kvm, gpa, buf, len);
2057         srcu_read_unlock(&kvm->srcu, idx);
2058
2059         if (kthread) {
2060                 unuse_mm(kvm->mm);
2061                 mmput(kvm->mm);
2062         }
2063
2064         return ret;
2065 }
2066
2067 static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
2068                         void *buf, unsigned long len)
2069 {
2070         return kvmgt_rw_gpa(handle, gpa, buf, len, false);
2071 }
2072
2073 static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
2074                         void *buf, unsigned long len)
2075 {
2076         return kvmgt_rw_gpa(handle, gpa, buf, len, true);
2077 }
2078
2079 static unsigned long kvmgt_virt_to_pfn(void *addr)
2080 {
2081         return PFN_DOWN(__pa(addr));
2082 }
2083
2084 static bool kvmgt_is_valid_gfn(unsigned long handle, unsigned long gfn)
2085 {
2086         struct kvmgt_guest_info *info;
2087         struct kvm *kvm;
2088         int idx;
2089         bool ret;
2090
2091         if (!handle_valid(handle))
2092                 return false;
2093
2094         info = (struct kvmgt_guest_info *)handle;
2095         kvm = info->kvm;
2096
2097         idx = srcu_read_lock(&kvm->srcu);
2098         ret = kvm_is_visible_gfn(kvm, gfn);
2099         srcu_read_unlock(&kvm->srcu, idx);
2100
2101         return ret;
2102 }
2103
2104 static struct intel_gvt_mpt kvmgt_mpt = {
2105         .type = INTEL_GVT_HYPERVISOR_KVM,
2106         .host_init = kvmgt_host_init,
2107         .host_exit = kvmgt_host_exit,
2108         .attach_vgpu = kvmgt_attach_vgpu,
2109         .detach_vgpu = kvmgt_detach_vgpu,
2110         .inject_msi = kvmgt_inject_msi,
2111         .from_virt_to_mfn = kvmgt_virt_to_pfn,
2112         .enable_page_track = kvmgt_page_track_add,
2113         .disable_page_track = kvmgt_page_track_remove,
2114         .read_gpa = kvmgt_read_gpa,
2115         .write_gpa = kvmgt_write_gpa,
2116         .gfn_to_mfn = kvmgt_gfn_to_pfn,
2117         .dma_map_guest_page = kvmgt_dma_map_guest_page,
2118         .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
2119         .dma_pin_guest_page = kvmgt_dma_pin_guest_page,
2120         .set_opregion = kvmgt_set_opregion,
2121         .set_edid = kvmgt_set_edid,
2122         .get_vfio_device = kvmgt_get_vfio_device,
2123         .put_vfio_device = kvmgt_put_vfio_device,
2124         .is_valid_gfn = kvmgt_is_valid_gfn,
2125 };
2126
2127 static int __init kvmgt_init(void)
2128 {
2129         if (intel_gvt_register_hypervisor(&kvmgt_mpt) < 0)
2130                 return -ENODEV;
2131         return 0;
2132 }
2133
2134 static void __exit kvmgt_exit(void)
2135 {
2136         intel_gvt_unregister_hypervisor();
2137 }
2138
2139 module_init(kvmgt_init);
2140 module_exit(kvmgt_exit);
2141
2142 MODULE_LICENSE("GPL and additional rights");
2143 MODULE_AUTHOR("Intel Corporation");