Merge tag '5.12-smb3-part1' of git://git.samba.org/sfrench/cifs-2.6
[linux-2.6-microblaze.git] / drivers / vfio / vfio_iommu_type1.c
index 82ed8bf..4bb162c 100644 (file)
@@ -103,6 +103,14 @@ struct vfio_dma {
        unsigned long           *bitmap;
 };
 
+struct vfio_batch {
+       struct page             **pages;        /* for pin_user_pages_remote */
+       struct page             *fallback_page; /* if pages alloc fails */
+       int                     capacity;       /* length of pages array */
+       int                     size;           /* of batch currently */
+       int                     offset;         /* of next entry in pages */
+};
+
 struct vfio_group {
        struct iommu_group      *iommu_group;
        struct list_head        next;
@@ -459,6 +467,45 @@ static int put_pfn(unsigned long pfn, int prot)
        return 0;
 }
 
+#define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *))
+
+static void vfio_batch_init(struct vfio_batch *batch)
+{
+       batch->size = 0;
+       batch->offset = 0;
+
+       if (unlikely(disable_hugepages))
+               goto fallback;
+
+       batch->pages = (struct page **) __get_free_page(GFP_KERNEL);
+       if (!batch->pages)
+               goto fallback;
+
+       batch->capacity = VFIO_BATCH_MAX_CAPACITY;
+       return;
+
+fallback:
+       batch->pages = &batch->fallback_page;
+       batch->capacity = 1;
+}
+
+static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma)
+{
+       while (batch->size) {
+               unsigned long pfn = page_to_pfn(batch->pages[batch->offset]);
+
+               put_pfn(pfn, dma->prot);
+               batch->offset++;
+               batch->size--;
+       }
+}
+
+static void vfio_batch_fini(struct vfio_batch *batch)
+{
+       if (batch->capacity == VFIO_BATCH_MAX_CAPACITY)
+               free_page((unsigned long)batch->pages);
+}
+
 static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
                            unsigned long vaddr, unsigned long *pfn,
                            bool write_fault)
@@ -499,10 +546,10 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
  * Returns the positive number of pfns successfully obtained or a negative
  * error code.
  */
-static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
-                        int prot, unsigned long *pfn)
+static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
+                         long npages, int prot, unsigned long *pfn,
+                         struct page **pages)
 {
-       struct page *page[1];
        struct vm_area_struct *vma;
        unsigned int flags = 0;
        int ret;
@@ -511,10 +558,10 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
                flags |= FOLL_WRITE;
 
        mmap_read_lock(mm);
-       ret = pin_user_pages_remote(mm, vaddr, 1, flags | FOLL_LONGTERM,
-                                   page, NULL, NULL);
-       if (ret == 1) {
-               *pfn = page_to_pfn(page[0]);
+       ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM,
+                                   pages, NULL, NULL);
+       if (ret > 0) {
+               *pfn = page_to_pfn(pages[0]);
                goto done;
        }
 
@@ -602,65 +649,90 @@ static int vfio_wait_all_valid(struct vfio_iommu *iommu)
  */
 static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
                                  long npage, unsigned long *pfn_base,
-                                 unsigned long limit)
+                                 unsigned long limit, struct vfio_batch *batch)
 {
-       unsigned long pfn = 0;
+       unsigned long pfn;
+       struct mm_struct *mm = current->mm;
        long ret, pinned = 0, lock_acct = 0;
        bool rsvd;
        dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
 
        /* This code path is only user initiated */
-       if (!current->mm)
+       if (!mm)
                return -ENODEV;
 
-       ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, pfn_base);
-       if (ret < 0)
-               return ret;
-
-       pinned++;
-       rsvd = is_invalid_reserved_pfn(*pfn_base);
-
-       /*
-        * Reserved pages aren't counted against the user, externally pinned
-        * pages are already counted against the user.
-        */
-       if (!rsvd && !vfio_find_vpfn(dma, iova)) {
-               if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) {
-                       put_pfn(*pfn_base, dma->prot);
-                       pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
-                                       limit << PAGE_SHIFT);
-                       return -ENOMEM;
-               }
-               lock_acct++;
+       if (batch->size) {
+               /* Leftover pages in batch from an earlier call. */
+               *pfn_base = page_to_pfn(batch->pages[batch->offset]);
+               pfn = *pfn_base;
+               rsvd = is_invalid_reserved_pfn(*pfn_base);
+       } else {
+               *pfn_base = 0;
        }
 
-       if (unlikely(disable_hugepages))
-               goto out;
+       while (npage) {
+               if (!batch->size) {
+                       /* Empty batch, so refill it. */
+                       long req_pages = min_t(long, npage, batch->capacity);
 
-       /* Lock all the consecutive pages from pfn_base */
-       for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage;
-            pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) {
-               ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn);
-               if (ret < 0)
-                       break;
+                       ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot,
+                                            &pfn, batch->pages);
+                       if (ret < 0)
+                               goto unpin_out;
 
-               if (pfn != *pfn_base + pinned ||
-                   rsvd != is_invalid_reserved_pfn(pfn)) {
-                       put_pfn(pfn, dma->prot);
-                       break;
+                       batch->size = ret;
+                       batch->offset = 0;
+
+                       if (!*pfn_base) {
+                               *pfn_base = pfn;
+                               rsvd = is_invalid_reserved_pfn(*pfn_base);
+                       }
                }
 
-               if (!rsvd && !vfio_find_vpfn(dma, iova)) {
-                       if (!dma->lock_cap &&
-                           current->mm->locked_vm + lock_acct + 1 > limit) {
-                               put_pfn(pfn, dma->prot);
-                               pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
-                                       __func__, limit << PAGE_SHIFT);
-                               ret = -ENOMEM;
-                               goto unpin_out;
+               /*
+                * pfn is preset for the first iteration of this inner loop and
+                * updated at the end to handle a VM_PFNMAP pfn.  In that case,
+                * batch->pages isn't valid (there's no struct page), so allow
+                * batch->pages to be touched only when there's more than one
+                * pfn to check, which guarantees the pfns are from a
+                * !VM_PFNMAP vma.
+                */
+               while (true) {
+                       if (pfn != *pfn_base + pinned ||
+                           rsvd != is_invalid_reserved_pfn(pfn))
+                               goto out;
+
+                       /*
+                        * Reserved pages aren't counted against the user,
+                        * externally pinned pages are already counted against
+                        * the user.
+                        */
+                       if (!rsvd && !vfio_find_vpfn(dma, iova)) {
+                               if (!dma->lock_cap &&
+                                   mm->locked_vm + lock_acct + 1 > limit) {
+                                       pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
+                                               __func__, limit << PAGE_SHIFT);
+                                       ret = -ENOMEM;
+                                       goto unpin_out;
+                               }
+                               lock_acct++;
                        }
-                       lock_acct++;
+
+                       pinned++;
+                       npage--;
+                       vaddr += PAGE_SIZE;
+                       iova += PAGE_SIZE;
+                       batch->offset++;
+                       batch->size--;
+
+                       if (!batch->size)
+                               break;
+
+                       pfn = page_to_pfn(batch->pages[batch->offset]);
                }
+
+               if (unlikely(disable_hugepages))
+                       break;
        }
 
 out:
@@ -668,10 +740,11 @@ out:
 
 unpin_out:
        if (ret < 0) {
-               if (!rsvd) {
+               if (pinned && !rsvd) {
                        for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
                                put_pfn(pfn, dma->prot);
                }
+               vfio_batch_unpin(batch, dma);
 
                return ret;
        }
@@ -703,6 +776,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
 static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
                                  unsigned long *pfn_base, bool do_accounting)
 {
+       struct page *pages[1];
        struct mm_struct *mm;
        int ret;
 
@@ -710,7 +784,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
        if (!mm)
                return -ENODEV;
 
-       ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base);
+       ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
        if (ret == 1 && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
                ret = vfio_lock_acct(dma, 1, true);
                if (ret) {
@@ -1404,15 +1478,19 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
 {
        dma_addr_t iova = dma->iova;
        unsigned long vaddr = dma->vaddr;
+       struct vfio_batch batch;
        size_t size = map_size;
        long npage;
        unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
        int ret = 0;
 
+       vfio_batch_init(&batch);
+
        while (size) {
                /* Pin a contiguous chunk of memory */
                npage = vfio_pin_pages_remote(dma, vaddr + dma->size,
-                                             size >> PAGE_SHIFT, &pfn, limit);
+                                             size >> PAGE_SHIFT, &pfn, limit,
+                                             &batch);
                if (npage <= 0) {
                        WARN_ON(!npage);
                        ret = (int)npage;
@@ -1425,6 +1503,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
                if (ret) {
                        vfio_unpin_pages_remote(dma, iova + dma->size, pfn,
                                                npage, true);
+                       vfio_batch_unpin(&batch, dma);
                        break;
                }
 
@@ -1432,6 +1511,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
                dma->size += npage << PAGE_SHIFT;
        }
 
+       vfio_batch_fini(&batch);
        dma->iommu_mapped = true;
 
        if (ret)
@@ -1608,6 +1688,7 @@ static int vfio_bus_type(struct device *dev, void *data)
 static int vfio_iommu_replay(struct vfio_iommu *iommu,
                             struct vfio_domain *domain)
 {
+       struct vfio_batch batch;
        struct vfio_domain *d = NULL;
        struct rb_node *n;
        unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -1622,6 +1703,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
                d = list_first_entry(&iommu->domain_list,
                                     struct vfio_domain, next);
 
+       vfio_batch_init(&batch);
+
        n = rb_first(&iommu->dma_list);
 
        for (; n; n = rb_next(n)) {
@@ -1669,7 +1752,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 
                                npage = vfio_pin_pages_remote(dma, vaddr,
                                                              n >> PAGE_SHIFT,
-                                                             &pfn, limit);
+                                                             &pfn, limit,
+                                                             &batch);
                                if (npage <= 0) {
                                        WARN_ON(!npage);
                                        ret = (int)npage;
@@ -1683,11 +1767,13 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
                        ret = iommu_map(domain->domain, iova, phys,
                                        size, dma->prot | domain->prot);
                        if (ret) {
-                               if (!dma->iommu_mapped)
+                               if (!dma->iommu_mapped) {
                                        vfio_unpin_pages_remote(dma, iova,
                                                        phys >> PAGE_SHIFT,
                                                        size >> PAGE_SHIFT,
                                                        true);
+                                       vfio_batch_unpin(&batch, dma);
+                               }
                                goto unwind;
                        }
 
@@ -1702,6 +1788,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
                dma->iommu_mapped = true;
        }
 
+       vfio_batch_fini(&batch);
        return 0;
 
 unwind:
@@ -1742,6 +1829,7 @@ unwind:
                }
        }
 
+       vfio_batch_fini(&batch);
        return ret;
 }