Merge branch 'for-linus-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad...
[linux-2.6-microblaze.git] / drivers / xen / privcmd.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
3  * privcmd.c
4  *
5  * Interface to privileged domain-0 commands.
6  *
7  * Copyright (c) 2002-2004, K A Fraser, B Dragovic
8  */
9
10 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
11
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/sched.h>
15 #include <linux/slab.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/mm.h>
19 #include <linux/mman.h>
20 #include <linux/uaccess.h>
21 #include <linux/swap.h>
22 #include <linux/highmem.h>
23 #include <linux/pagemap.h>
24 #include <linux/seq_file.h>
25 #include <linux/miscdevice.h>
26 #include <linux/moduleparam.h>
27
28 #include <asm/pgalloc.h>
29 #include <asm/pgtable.h>
30 #include <asm/tlb.h>
31 #include <asm/xen/hypervisor.h>
32 #include <asm/xen/hypercall.h>
33
34 #include <xen/xen.h>
35 #include <xen/privcmd.h>
36 #include <xen/interface/xen.h>
37 #include <xen/interface/memory.h>
38 #include <xen/interface/hvm/dm_op.h>
39 #include <xen/features.h>
40 #include <xen/page.h>
41 #include <xen/xen-ops.h>
42 #include <xen/balloon.h>
43
44 #include "privcmd.h"
45
46 MODULE_LICENSE("GPL");
47
48 #define PRIV_VMA_LOCKED ((void *)1)
49
50 static unsigned int privcmd_dm_op_max_num = 16;
51 module_param_named(dm_op_max_nr_bufs, privcmd_dm_op_max_num, uint, 0644);
52 MODULE_PARM_DESC(dm_op_max_nr_bufs,
53                  "Maximum number of buffers per dm_op hypercall");
54
55 static unsigned int privcmd_dm_op_buf_max_size = 4096;
56 module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint,
57                    0644);
58 MODULE_PARM_DESC(dm_op_buf_max_size,
59                  "Maximum size of a dm_op hypercall buffer");
60
61 struct privcmd_data {
62         domid_t domid;
63 };
64
65 static int privcmd_vma_range_is_mapped(
66                struct vm_area_struct *vma,
67                unsigned long addr,
68                unsigned long nr_pages);
69
70 static long privcmd_ioctl_hypercall(struct file *file, void __user *udata)
71 {
72         struct privcmd_data *data = file->private_data;
73         struct privcmd_hypercall hypercall;
74         long ret;
75
76         /* Disallow arbitrary hypercalls if restricted */
77         if (data->domid != DOMID_INVALID)
78                 return -EPERM;
79
80         if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
81                 return -EFAULT;
82
83         xen_preemptible_hcall_begin();
84         ret = privcmd_call(hypercall.op,
85                            hypercall.arg[0], hypercall.arg[1],
86                            hypercall.arg[2], hypercall.arg[3],
87                            hypercall.arg[4]);
88         xen_preemptible_hcall_end();
89
90         return ret;
91 }
92
93 static void free_page_list(struct list_head *pages)
94 {
95         struct page *p, *n;
96
97         list_for_each_entry_safe(p, n, pages, lru)
98                 __free_page(p);
99
100         INIT_LIST_HEAD(pages);
101 }
102
103 /*
104  * Given an array of items in userspace, return a list of pages
105  * containing the data.  If copying fails, either because of memory
106  * allocation failure or a problem reading user memory, return an
107  * error code; its up to the caller to dispose of any partial list.
108  */
109 static int gather_array(struct list_head *pagelist,
110                         unsigned nelem, size_t size,
111                         const void __user *data)
112 {
113         unsigned pageidx;
114         void *pagedata;
115         int ret;
116
117         if (size > PAGE_SIZE)
118                 return 0;
119
120         pageidx = PAGE_SIZE;
121         pagedata = NULL;        /* quiet, gcc */
122         while (nelem--) {
123                 if (pageidx > PAGE_SIZE-size) {
124                         struct page *page = alloc_page(GFP_KERNEL);
125
126                         ret = -ENOMEM;
127                         if (page == NULL)
128                                 goto fail;
129
130                         pagedata = page_address(page);
131
132                         list_add_tail(&page->lru, pagelist);
133                         pageidx = 0;
134                 }
135
136                 ret = -EFAULT;
137                 if (copy_from_user(pagedata + pageidx, data, size))
138                         goto fail;
139
140                 data += size;
141                 pageidx += size;
142         }
143
144         ret = 0;
145
146 fail:
147         return ret;
148 }
149
150 /*
151  * Call function "fn" on each element of the array fragmented
152  * over a list of pages.
153  */
154 static int traverse_pages(unsigned nelem, size_t size,
155                           struct list_head *pos,
156                           int (*fn)(void *data, void *state),
157                           void *state)
158 {
159         void *pagedata;
160         unsigned pageidx;
161         int ret = 0;
162
163         BUG_ON(size > PAGE_SIZE);
164
165         pageidx = PAGE_SIZE;
166         pagedata = NULL;        /* hush, gcc */
167
168         while (nelem--) {
169                 if (pageidx > PAGE_SIZE-size) {
170                         struct page *page;
171                         pos = pos->next;
172                         page = list_entry(pos, struct page, lru);
173                         pagedata = page_address(page);
174                         pageidx = 0;
175                 }
176
177                 ret = (*fn)(pagedata + pageidx, state);
178                 if (ret)
179                         break;
180                 pageidx += size;
181         }
182
183         return ret;
184 }
185
186 /*
187  * Similar to traverse_pages, but use each page as a "block" of
188  * data to be processed as one unit.
189  */
190 static int traverse_pages_block(unsigned nelem, size_t size,
191                                 struct list_head *pos,
192                                 int (*fn)(void *data, int nr, void *state),
193                                 void *state)
194 {
195         void *pagedata;
196         int ret = 0;
197
198         BUG_ON(size > PAGE_SIZE);
199
200         while (nelem) {
201                 int nr = (PAGE_SIZE/size);
202                 struct page *page;
203                 if (nr > nelem)
204                         nr = nelem;
205                 pos = pos->next;
206                 page = list_entry(pos, struct page, lru);
207                 pagedata = page_address(page);
208                 ret = (*fn)(pagedata, nr, state);
209                 if (ret)
210                         break;
211                 nelem -= nr;
212         }
213
214         return ret;
215 }
216
217 struct mmap_gfn_state {
218         unsigned long va;
219         struct vm_area_struct *vma;
220         domid_t domain;
221 };
222
223 static int mmap_gfn_range(void *data, void *state)
224 {
225         struct privcmd_mmap_entry *msg = data;
226         struct mmap_gfn_state *st = state;
227         struct vm_area_struct *vma = st->vma;
228         int rc;
229
230         /* Do not allow range to wrap the address space. */
231         if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
232             ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
233                 return -EINVAL;
234
235         /* Range chunks must be contiguous in va space. */
236         if ((msg->va != st->va) ||
237             ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
238                 return -EINVAL;
239
240         rc = xen_remap_domain_gfn_range(vma,
241                                         msg->va & PAGE_MASK,
242                                         msg->mfn, msg->npages,
243                                         vma->vm_page_prot,
244                                         st->domain, NULL);
245         if (rc < 0)
246                 return rc;
247
248         st->va += msg->npages << PAGE_SHIFT;
249
250         return 0;
251 }
252
253 static long privcmd_ioctl_mmap(struct file *file, void __user *udata)
254 {
255         struct privcmd_data *data = file->private_data;
256         struct privcmd_mmap mmapcmd;
257         struct mm_struct *mm = current->mm;
258         struct vm_area_struct *vma;
259         int rc;
260         LIST_HEAD(pagelist);
261         struct mmap_gfn_state state;
262
263         /* We only support privcmd_ioctl_mmap_batch for auto translated. */
264         if (xen_feature(XENFEAT_auto_translated_physmap))
265                 return -ENOSYS;
266
267         if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
268                 return -EFAULT;
269
270         /* If restriction is in place, check the domid matches */
271         if (data->domid != DOMID_INVALID && data->domid != mmapcmd.dom)
272                 return -EPERM;
273
274         rc = gather_array(&pagelist,
275                           mmapcmd.num, sizeof(struct privcmd_mmap_entry),
276                           mmapcmd.entry);
277
278         if (rc || list_empty(&pagelist))
279                 goto out;
280
281         down_write(&mm->mmap_sem);
282
283         {
284                 struct page *page = list_first_entry(&pagelist,
285                                                      struct page, lru);
286                 struct privcmd_mmap_entry *msg = page_address(page);
287
288                 vma = find_vma(mm, msg->va);
289                 rc = -EINVAL;
290
291                 if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
292                         goto out_up;
293                 vma->vm_private_data = PRIV_VMA_LOCKED;
294         }
295
296         state.va = vma->vm_start;
297         state.vma = vma;
298         state.domain = mmapcmd.dom;
299
300         rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
301                             &pagelist,
302                             mmap_gfn_range, &state);
303
304
305 out_up:
306         up_write(&mm->mmap_sem);
307
308 out:
309         free_page_list(&pagelist);
310
311         return rc;
312 }
313
314 struct mmap_batch_state {
315         domid_t domain;
316         unsigned long va;
317         struct vm_area_struct *vma;
318         int index;
319         /* A tristate:
320          *      0 for no errors
321          *      1 if at least one error has happened (and no
322          *          -ENOENT errors have happened)
323          *      -ENOENT if at least 1 -ENOENT has happened.
324          */
325         int global_error;
326         int version;
327
328         /* User-space gfn array to store errors in the second pass for V1. */
329         xen_pfn_t __user *user_gfn;
330         /* User-space int array to store errors in the second pass for V2. */
331         int __user *user_err;
332 };
333
334 /* auto translated dom0 note: if domU being created is PV, then gfn is
335  * mfn(addr on bus). If it's auto xlated, then gfn is pfn (input to HAP).
336  */
337 static int mmap_batch_fn(void *data, int nr, void *state)
338 {
339         xen_pfn_t *gfnp = data;
340         struct mmap_batch_state *st = state;
341         struct vm_area_struct *vma = st->vma;
342         struct page **pages = vma->vm_private_data;
343         struct page **cur_pages = NULL;
344         int ret;
345
346         if (xen_feature(XENFEAT_auto_translated_physmap))
347                 cur_pages = &pages[st->index];
348
349         BUG_ON(nr < 0);
350         ret = xen_remap_domain_gfn_array(st->vma, st->va & PAGE_MASK, gfnp, nr,
351                                          (int *)gfnp, st->vma->vm_page_prot,
352                                          st->domain, cur_pages);
353
354         /* Adjust the global_error? */
355         if (ret != nr) {
356                 if (ret == -ENOENT)
357                         st->global_error = -ENOENT;
358                 else {
359                         /* Record that at least one error has happened. */
360                         if (st->global_error == 0)
361                                 st->global_error = 1;
362                 }
363         }
364         st->va += XEN_PAGE_SIZE * nr;
365         st->index += nr / XEN_PFN_PER_PAGE;
366
367         return 0;
368 }
369
370 static int mmap_return_error(int err, struct mmap_batch_state *st)
371 {
372         int ret;
373
374         if (st->version == 1) {
375                 if (err) {
376                         xen_pfn_t gfn;
377
378                         ret = get_user(gfn, st->user_gfn);
379                         if (ret < 0)
380                                 return ret;
381                         /*
382                          * V1 encodes the error codes in the 32bit top
383                          * nibble of the gfn (with its known
384                          * limitations vis-a-vis 64 bit callers).
385                          */
386                         gfn |= (err == -ENOENT) ?
387                                 PRIVCMD_MMAPBATCH_PAGED_ERROR :
388                                 PRIVCMD_MMAPBATCH_MFN_ERROR;
389                         return __put_user(gfn, st->user_gfn++);
390                 } else
391                         st->user_gfn++;
392         } else { /* st->version == 2 */
393                 if (err)
394                         return __put_user(err, st->user_err++);
395                 else
396                         st->user_err++;
397         }
398
399         return 0;
400 }
401
402 static int mmap_return_errors(void *data, int nr, void *state)
403 {
404         struct mmap_batch_state *st = state;
405         int *errs = data;
406         int i;
407         int ret;
408
409         for (i = 0; i < nr; i++) {
410                 ret = mmap_return_error(errs[i], st);
411                 if (ret < 0)
412                         return ret;
413         }
414         return 0;
415 }
416
417 /* Allocate pfns that are then mapped with gfns from foreign domid. Update
418  * the vma with the page info to use later.
419  * Returns: 0 if success, otherwise -errno
420  */
421 static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
422 {
423         int rc;
424         struct page **pages;
425
426         pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
427         if (pages == NULL)
428                 return -ENOMEM;
429
430         rc = alloc_xenballooned_pages(numpgs, pages);
431         if (rc != 0) {
432                 pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
433                         numpgs, rc);
434                 kfree(pages);
435                 return -ENOMEM;
436         }
437         BUG_ON(vma->vm_private_data != NULL);
438         vma->vm_private_data = pages;
439
440         return 0;
441 }
442
443 static const struct vm_operations_struct privcmd_vm_ops;
444
445 static long privcmd_ioctl_mmap_batch(
446         struct file *file, void __user *udata, int version)
447 {
448         struct privcmd_data *data = file->private_data;
449         int ret;
450         struct privcmd_mmapbatch_v2 m;
451         struct mm_struct *mm = current->mm;
452         struct vm_area_struct *vma;
453         unsigned long nr_pages;
454         LIST_HEAD(pagelist);
455         struct mmap_batch_state state;
456
457         switch (version) {
458         case 1:
459                 if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
460                         return -EFAULT;
461                 /* Returns per-frame error in m.arr. */
462                 m.err = NULL;
463                 if (!access_ok(m.arr, m.num * sizeof(*m.arr)))
464                         return -EFAULT;
465                 break;
466         case 2:
467                 if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
468                         return -EFAULT;
469                 /* Returns per-frame error code in m.err. */
470                 if (!access_ok(m.err, m.num * (sizeof(*m.err))))
471                         return -EFAULT;
472                 break;
473         default:
474                 return -EINVAL;
475         }
476
477         /* If restriction is in place, check the domid matches */
478         if (data->domid != DOMID_INVALID && data->domid != m.dom)
479                 return -EPERM;
480
481         nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE);
482         if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
483                 return -EINVAL;
484
485         ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
486
487         if (ret)
488                 goto out;
489         if (list_empty(&pagelist)) {
490                 ret = -EINVAL;
491                 goto out;
492         }
493
494         if (version == 2) {
495                 /* Zero error array now to only copy back actual errors. */
496                 if (clear_user(m.err, sizeof(int) * m.num)) {
497                         ret = -EFAULT;
498                         goto out;
499                 }
500         }
501
502         down_write(&mm->mmap_sem);
503
504         vma = find_vma(mm, m.addr);
505         if (!vma ||
506             vma->vm_ops != &privcmd_vm_ops) {
507                 ret = -EINVAL;
508                 goto out_unlock;
509         }
510
511         /*
512          * Caller must either:
513          *
514          * Map the whole VMA range, which will also allocate all the
515          * pages required for the auto_translated_physmap case.
516          *
517          * Or
518          *
519          * Map unmapped holes left from a previous map attempt (e.g.,
520          * because those foreign frames were previously paged out).
521          */
522         if (vma->vm_private_data == NULL) {
523                 if (m.addr != vma->vm_start ||
524                     m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
525                         ret = -EINVAL;
526                         goto out_unlock;
527                 }
528                 if (xen_feature(XENFEAT_auto_translated_physmap)) {
529                         ret = alloc_empty_pages(vma, nr_pages);
530                         if (ret < 0)
531                                 goto out_unlock;
532                 } else
533                         vma->vm_private_data = PRIV_VMA_LOCKED;
534         } else {
535                 if (m.addr < vma->vm_start ||
536                     m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
537                         ret = -EINVAL;
538                         goto out_unlock;
539                 }
540                 if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
541                         ret = -EINVAL;
542                         goto out_unlock;
543                 }
544         }
545
546         state.domain        = m.dom;
547         state.vma           = vma;
548         state.va            = m.addr;
549         state.index         = 0;
550         state.global_error  = 0;
551         state.version       = version;
552
553         BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0);
554         /* mmap_batch_fn guarantees ret == 0 */
555         BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
556                                     &pagelist, mmap_batch_fn, &state));
557
558         up_write(&mm->mmap_sem);
559
560         if (state.global_error) {
561                 /* Write back errors in second pass. */
562                 state.user_gfn = (xen_pfn_t *)m.arr;
563                 state.user_err = m.err;
564                 ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
565                                            &pagelist, mmap_return_errors, &state);
566         } else
567                 ret = 0;
568
569         /* If we have not had any EFAULT-like global errors then set the global
570          * error to -ENOENT if necessary. */
571         if ((ret == 0) && (state.global_error == -ENOENT))
572                 ret = -ENOENT;
573
574 out:
575         free_page_list(&pagelist);
576         return ret;
577
578 out_unlock:
579         up_write(&mm->mmap_sem);
580         goto out;
581 }
582
583 static int lock_pages(
584         struct privcmd_dm_op_buf kbufs[], unsigned int num,
585         struct page *pages[], unsigned int nr_pages)
586 {
587         unsigned int i;
588
589         for (i = 0; i < num; i++) {
590                 unsigned int requested;
591                 int pinned;
592
593                 requested = DIV_ROUND_UP(
594                         offset_in_page(kbufs[i].uptr) + kbufs[i].size,
595                         PAGE_SIZE);
596                 if (requested > nr_pages)
597                         return -ENOSPC;
598
599                 pinned = get_user_pages_fast(
600                         (unsigned long) kbufs[i].uptr,
601                         requested, FOLL_WRITE, pages);
602                 if (pinned < 0)
603                         return pinned;
604
605                 nr_pages -= pinned;
606                 pages += pinned;
607         }
608
609         return 0;
610 }
611
612 static void unlock_pages(struct page *pages[], unsigned int nr_pages)
613 {
614         unsigned int i;
615
616         if (!pages)
617                 return;
618
619         for (i = 0; i < nr_pages; i++) {
620                 if (pages[i])
621                         put_page(pages[i]);
622         }
623 }
624
625 static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
626 {
627         struct privcmd_data *data = file->private_data;
628         struct privcmd_dm_op kdata;
629         struct privcmd_dm_op_buf *kbufs;
630         unsigned int nr_pages = 0;
631         struct page **pages = NULL;
632         struct xen_dm_op_buf *xbufs = NULL;
633         unsigned int i;
634         long rc;
635
636         if (copy_from_user(&kdata, udata, sizeof(kdata)))
637                 return -EFAULT;
638
639         /* If restriction is in place, check the domid matches */
640         if (data->domid != DOMID_INVALID && data->domid != kdata.dom)
641                 return -EPERM;
642
643         if (kdata.num == 0)
644                 return 0;
645
646         if (kdata.num > privcmd_dm_op_max_num)
647                 return -E2BIG;
648
649         kbufs = kcalloc(kdata.num, sizeof(*kbufs), GFP_KERNEL);
650         if (!kbufs)
651                 return -ENOMEM;
652
653         if (copy_from_user(kbufs, kdata.ubufs,
654                            sizeof(*kbufs) * kdata.num)) {
655                 rc = -EFAULT;
656                 goto out;
657         }
658
659         for (i = 0; i < kdata.num; i++) {
660                 if (kbufs[i].size > privcmd_dm_op_buf_max_size) {
661                         rc = -E2BIG;
662                         goto out;
663                 }
664
665                 if (!access_ok(kbufs[i].uptr,
666                                kbufs[i].size)) {
667                         rc = -EFAULT;
668                         goto out;
669                 }
670
671                 nr_pages += DIV_ROUND_UP(
672                         offset_in_page(kbufs[i].uptr) + kbufs[i].size,
673                         PAGE_SIZE);
674         }
675
676         pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL);
677         if (!pages) {
678                 rc = -ENOMEM;
679                 goto out;
680         }
681
682         xbufs = kcalloc(kdata.num, sizeof(*xbufs), GFP_KERNEL);
683         if (!xbufs) {
684                 rc = -ENOMEM;
685                 goto out;
686         }
687
688         rc = lock_pages(kbufs, kdata.num, pages, nr_pages);
689         if (rc)
690                 goto out;
691
692         for (i = 0; i < kdata.num; i++) {
693                 set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
694                 xbufs[i].size = kbufs[i].size;
695         }
696
697         xen_preemptible_hcall_begin();
698         rc = HYPERVISOR_dm_op(kdata.dom, kdata.num, xbufs);
699         xen_preemptible_hcall_end();
700
701 out:
702         unlock_pages(pages, nr_pages);
703         kfree(xbufs);
704         kfree(pages);
705         kfree(kbufs);
706
707         return rc;
708 }
709
710 static long privcmd_ioctl_restrict(struct file *file, void __user *udata)
711 {
712         struct privcmd_data *data = file->private_data;
713         domid_t dom;
714
715         if (copy_from_user(&dom, udata, sizeof(dom)))
716                 return -EFAULT;
717
718         /* Set restriction to the specified domain, or check it matches */
719         if (data->domid == DOMID_INVALID)
720                 data->domid = dom;
721         else if (data->domid != dom)
722                 return -EINVAL;
723
724         return 0;
725 }
726
727 struct remap_pfn {
728         struct mm_struct *mm;
729         struct page **pages;
730         pgprot_t prot;
731         unsigned long i;
732 };
733
734 static int remap_pfn_fn(pte_t *ptep, unsigned long addr, void *data)
735 {
736         struct remap_pfn *r = data;
737         struct page *page = r->pages[r->i];
738         pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), r->prot));
739
740         set_pte_at(r->mm, addr, ptep, pte);
741         r->i++;
742
743         return 0;
744 }
745
746 static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
747 {
748         struct privcmd_data *data = file->private_data;
749         struct mm_struct *mm = current->mm;
750         struct vm_area_struct *vma;
751         struct privcmd_mmap_resource kdata;
752         xen_pfn_t *pfns = NULL;
753         struct xen_mem_acquire_resource xdata;
754         int rc;
755
756         if (copy_from_user(&kdata, udata, sizeof(kdata)))
757                 return -EFAULT;
758
759         /* If restriction is in place, check the domid matches */
760         if (data->domid != DOMID_INVALID && data->domid != kdata.dom)
761                 return -EPERM;
762
763         down_write(&mm->mmap_sem);
764
765         vma = find_vma(mm, kdata.addr);
766         if (!vma || vma->vm_ops != &privcmd_vm_ops) {
767                 rc = -EINVAL;
768                 goto out;
769         }
770
771         pfns = kcalloc(kdata.num, sizeof(*pfns), GFP_KERNEL);
772         if (!pfns) {
773                 rc = -ENOMEM;
774                 goto out;
775         }
776
777         if (xen_feature(XENFEAT_auto_translated_physmap)) {
778                 unsigned int nr = DIV_ROUND_UP(kdata.num, XEN_PFN_PER_PAGE);
779                 struct page **pages;
780                 unsigned int i;
781
782                 rc = alloc_empty_pages(vma, nr);
783                 if (rc < 0)
784                         goto out;
785
786                 pages = vma->vm_private_data;
787                 for (i = 0; i < kdata.num; i++) {
788                         xen_pfn_t pfn =
789                                 page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]);
790
791                         pfns[i] = pfn + (i % XEN_PFN_PER_PAGE);
792                 }
793         } else
794                 vma->vm_private_data = PRIV_VMA_LOCKED;
795
796         memset(&xdata, 0, sizeof(xdata));
797         xdata.domid = kdata.dom;
798         xdata.type = kdata.type;
799         xdata.id = kdata.id;
800         xdata.frame = kdata.idx;
801         xdata.nr_frames = kdata.num;
802         set_xen_guest_handle(xdata.frame_list, pfns);
803
804         xen_preemptible_hcall_begin();
805         rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata);
806         xen_preemptible_hcall_end();
807
808         if (rc)
809                 goto out;
810
811         if (xen_feature(XENFEAT_auto_translated_physmap)) {
812                 struct remap_pfn r = {
813                         .mm = vma->vm_mm,
814                         .pages = vma->vm_private_data,
815                         .prot = vma->vm_page_prot,
816                 };
817
818                 rc = apply_to_page_range(r.mm, kdata.addr,
819                                          kdata.num << PAGE_SHIFT,
820                                          remap_pfn_fn, &r);
821         } else {
822                 unsigned int domid =
823                         (xdata.flags & XENMEM_rsrc_acq_caller_owned) ?
824                         DOMID_SELF : kdata.dom;
825                 int num;
826
827                 num = xen_remap_domain_mfn_array(vma,
828                                                  kdata.addr & PAGE_MASK,
829                                                  pfns, kdata.num, (int *)pfns,
830                                                  vma->vm_page_prot,
831                                                  domid,
832                                                  vma->vm_private_data);
833                 if (num < 0)
834                         rc = num;
835                 else if (num != kdata.num) {
836                         unsigned int i;
837
838                         for (i = 0; i < num; i++) {
839                                 rc = pfns[i];
840                                 if (rc < 0)
841                                         break;
842                         }
843                 } else
844                         rc = 0;
845         }
846
847 out:
848         up_write(&mm->mmap_sem);
849         kfree(pfns);
850
851         return rc;
852 }
853
854 static long privcmd_ioctl(struct file *file,
855                           unsigned int cmd, unsigned long data)
856 {
857         int ret = -ENOTTY;
858         void __user *udata = (void __user *) data;
859
860         switch (cmd) {
861         case IOCTL_PRIVCMD_HYPERCALL:
862                 ret = privcmd_ioctl_hypercall(file, udata);
863                 break;
864
865         case IOCTL_PRIVCMD_MMAP:
866                 ret = privcmd_ioctl_mmap(file, udata);
867                 break;
868
869         case IOCTL_PRIVCMD_MMAPBATCH:
870                 ret = privcmd_ioctl_mmap_batch(file, udata, 1);
871                 break;
872
873         case IOCTL_PRIVCMD_MMAPBATCH_V2:
874                 ret = privcmd_ioctl_mmap_batch(file, udata, 2);
875                 break;
876
877         case IOCTL_PRIVCMD_DM_OP:
878                 ret = privcmd_ioctl_dm_op(file, udata);
879                 break;
880
881         case IOCTL_PRIVCMD_RESTRICT:
882                 ret = privcmd_ioctl_restrict(file, udata);
883                 break;
884
885         case IOCTL_PRIVCMD_MMAP_RESOURCE:
886                 ret = privcmd_ioctl_mmap_resource(file, udata);
887                 break;
888
889         default:
890                 break;
891         }
892
893         return ret;
894 }
895
896 static int privcmd_open(struct inode *ino, struct file *file)
897 {
898         struct privcmd_data *data = kzalloc(sizeof(*data), GFP_KERNEL);
899
900         if (!data)
901                 return -ENOMEM;
902
903         /* DOMID_INVALID implies no restriction */
904         data->domid = DOMID_INVALID;
905
906         file->private_data = data;
907         return 0;
908 }
909
910 static int privcmd_release(struct inode *ino, struct file *file)
911 {
912         struct privcmd_data *data = file->private_data;
913
914         kfree(data);
915         return 0;
916 }
917
918 static void privcmd_close(struct vm_area_struct *vma)
919 {
920         struct page **pages = vma->vm_private_data;
921         int numpgs = vma_pages(vma);
922         int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT;
923         int rc;
924
925         if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages)
926                 return;
927
928         rc = xen_unmap_domain_gfn_range(vma, numgfns, pages);
929         if (rc == 0)
930                 free_xenballooned_pages(numpgs, pages);
931         else
932                 pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n",
933                         numpgs, rc);
934         kfree(pages);
935 }
936
937 static vm_fault_t privcmd_fault(struct vm_fault *vmf)
938 {
939         printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
940                vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end,
941                vmf->pgoff, (void *)vmf->address);
942
943         return VM_FAULT_SIGBUS;
944 }
945
946 static const struct vm_operations_struct privcmd_vm_ops = {
947         .close = privcmd_close,
948         .fault = privcmd_fault
949 };
950
951 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
952 {
953         /* DONTCOPY is essential for Xen because copy_page_range doesn't know
954          * how to recreate these mappings */
955         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY |
956                          VM_DONTEXPAND | VM_DONTDUMP;
957         vma->vm_ops = &privcmd_vm_ops;
958         vma->vm_private_data = NULL;
959
960         return 0;
961 }
962
963 /*
964  * For MMAPBATCH*. This allows asserting the singleshot mapping
965  * on a per pfn/pte basis. Mapping calls that fail with ENOENT
966  * can be then retried until success.
967  */
968 static int is_mapped_fn(pte_t *pte, unsigned long addr, void *data)
969 {
970         return pte_none(*pte) ? 0 : -EBUSY;
971 }
972
973 static int privcmd_vma_range_is_mapped(
974                    struct vm_area_struct *vma,
975                    unsigned long addr,
976                    unsigned long nr_pages)
977 {
978         return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
979                                    is_mapped_fn, NULL) != 0;
980 }
981
982 const struct file_operations xen_privcmd_fops = {
983         .owner = THIS_MODULE,
984         .unlocked_ioctl = privcmd_ioctl,
985         .open = privcmd_open,
986         .release = privcmd_release,
987         .mmap = privcmd_mmap,
988 };
989 EXPORT_SYMBOL_GPL(xen_privcmd_fops);
990
991 static struct miscdevice privcmd_dev = {
992         .minor = MISC_DYNAMIC_MINOR,
993         .name = "xen/privcmd",
994         .fops = &xen_privcmd_fops,
995 };
996
997 static int __init privcmd_init(void)
998 {
999         int err;
1000
1001         if (!xen_domain())
1002                 return -ENODEV;
1003
1004         err = misc_register(&privcmd_dev);
1005         if (err != 0) {
1006                 pr_err("Could not register Xen privcmd device\n");
1007                 return err;
1008         }
1009
1010         err = misc_register(&xen_privcmdbuf_dev);
1011         if (err != 0) {
1012                 pr_err("Could not register Xen hypercall-buf device\n");
1013                 misc_deregister(&privcmd_dev);
1014                 return err;
1015         }
1016
1017         return 0;
1018 }
1019
1020 static void __exit privcmd_exit(void)
1021 {
1022         misc_deregister(&privcmd_dev);
1023         misc_deregister(&xen_privcmdbuf_dev);
1024 }
1025
1026 module_init(privcmd_init);
1027 module_exit(privcmd_exit);