Merge tag 'x86_seves_for_v5.10_rc3' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / drivers / vhost / vdpa.c
index 62a9bb0..2754f30 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/nospec.h>
 #include <linux/vhost.h>
 #include <linux/virtio_net.h>
-#include <linux/kernel.h>
 
 #include "vhost.h"
 
@@ -48,6 +47,7 @@ struct vhost_vdpa {
        int minor;
        struct eventfd_ctx *config_ctx;
        int in_batch;
+       struct vdpa_iova_range range;
 };
 
 static DEFINE_IDA(vhost_vdpa_ida);
@@ -97,26 +97,23 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
                return;
 
        irq = ops->get_vq_irq(vdpa, qid);
-       spin_lock(&vq->call_ctx.ctx_lock);
        irq_bypass_unregister_producer(&vq->call_ctx.producer);
-       if (!vq->call_ctx.ctx || irq < 0) {
-               spin_unlock(&vq->call_ctx.ctx_lock);
+       if (!vq->call_ctx.ctx || irq < 0)
                return;
-       }
 
        vq->call_ctx.producer.token = vq->call_ctx.ctx;
        vq->call_ctx.producer.irq = irq;
        ret = irq_bypass_register_producer(&vq->call_ctx.producer);
-       spin_unlock(&vq->call_ctx.ctx_lock);
+       if (unlikely(ret))
+               dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n",
+                        qid, vq->call_ctx.producer.token, ret);
 }
 
 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
 {
        struct vhost_virtqueue *vq = &v->vqs[qid];
 
-       spin_lock(&vq->call_ctx.ctx_lock);
        irq_bypass_unregister_producer(&vq->call_ctx.producer);
-       spin_unlock(&vq->call_ctx.ctx_lock);
 }
 
 static void vhost_vdpa_reset(struct vhost_vdpa *v)
@@ -344,6 +341,16 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
        return 0;
 }
 
+static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
+{
+       struct vhost_vdpa_iova_range range = {
+               .first = v->range.first,
+               .last = v->range.last,
+       };
+
+       return copy_to_user(argp, &range, sizeof(range));
+}
+
 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
                                   void __user *argp)
 {
@@ -428,12 +435,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
        void __user *argp = (void __user *)arg;
        u64 __user *featurep = argp;
        u64 features;
-       long r;
+       long r = 0;
 
        if (cmd == VHOST_SET_BACKEND_FEATURES) {
-               r = copy_from_user(&features, featurep, sizeof(features));
-               if (r)
-                       return r;
+               if (copy_from_user(&features, featurep, sizeof(features)))
+                       return -EFAULT;
                if (features & ~VHOST_VDPA_BACKEND_FEATURES)
                        return -EOPNOTSUPP;
                vhost_set_backend_features(&v->vdev, features);
@@ -476,7 +482,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
                break;
        case VHOST_GET_BACKEND_FEATURES:
                features = VHOST_VDPA_BACKEND_FEATURES;
-               r = copy_to_user(featurep, &features, sizeof(features));
+               if (copy_to_user(featurep, &features, sizeof(features)))
+                       r = -EFAULT;
+               break;
+       case VHOST_VDPA_GET_IOVA_RANGE:
+               r = vhost_vdpa_get_iova_range(v, argp);
                break;
        default:
                r = vhost_dev_ioctl(&v->vdev, cmd, argp);
@@ -595,19 +605,25 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
        struct vhost_dev *dev = &v->vdev;
        struct vhost_iotlb *iotlb = dev->iotlb;
        struct page **page_list;
-       struct vm_area_struct **vmas;
+       unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
        unsigned int gup_flags = FOLL_LONGTERM;
-       unsigned long map_pfn, last_pfn = 0;
-       unsigned long npages, lock_limit;
-       unsigned long i, nmap = 0;
+       unsigned long npages, cur_base, map_pfn, last_pfn = 0;
+       unsigned long locked, lock_limit, pinned, i;
        u64 iova = msg->iova;
-       long pinned;
        int ret = 0;
 
+       if (msg->iova < v->range.first ||
+           msg->iova + msg->size - 1 > v->range.last)
+               return -EINVAL;
+
        if (vhost_iotlb_itree_first(iotlb, msg->iova,
                                    msg->iova + msg->size - 1))
                return -EEXIST;
 
+       page_list = (struct page **) __get_free_page(GFP_KERNEL);
+       if (!page_list)
+               return -ENOMEM;
+
        if (msg->perm & VHOST_ACCESS_WO)
                gup_flags |= FOLL_WRITE;
 
@@ -615,86 +631,61 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
        if (!npages)
                return -EINVAL;
 
-       page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
-       vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
-                             GFP_KERNEL);
-       if (!page_list || !vmas) {
-               ret = -ENOMEM;
-               goto free;
-       }
-
        mmap_read_lock(dev->mm);
 
+       locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-       if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
-               ret = -ENOMEM;
-               goto unlock;
-       }
 
-       pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,
-                               page_list, vmas);
-       if (npages != pinned) {
-               if (pinned < 0) {
-                       ret = pinned;
-               } else {
-                       unpin_user_pages(page_list, pinned);
-                       ret = -ENOMEM;
-               }
-               goto unlock;
+       if (locked > lock_limit) {
+               ret = -ENOMEM;
+               goto out;
        }
 
+       cur_base = msg->uaddr & PAGE_MASK;
        iova &= PAGE_MASK;
-       map_pfn = page_to_pfn(page_list[0]);
-
-       /* One more iteration to avoid extra vdpa_map() call out of loop. */
-       for (i = 0; i <= npages; i++) {
-               unsigned long this_pfn;
-               u64 csize;
-
-               /* The last chunk may have no valid PFN next to it */
-               this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL;
-
-               if (last_pfn && (this_pfn == -1UL ||
-                                this_pfn != last_pfn + 1)) {
-                       /* Pin a contiguous chunk of memory */
-                       csize = last_pfn - map_pfn + 1;
-                       ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
-                                            map_pfn << PAGE_SHIFT,
-                                            msg->perm);
-                       if (ret) {
-                               /*
-                                * Unpin the rest chunks of memory on the
-                                * flight with no corresponding vdpa_map()
-                                * calls having been made yet. On the other
-                                * hand, vdpa_unmap() in the failure path
-                                * is in charge of accounting the number of
-                                * pinned pages for its own.
-                                * This asymmetrical pattern of accounting
-                                * is for efficiency to pin all pages at
-                                * once, while there is no other callsite
-                                * of vdpa_map() than here above.
-                                */
-                               unpin_user_pages(&page_list[nmap],
-                                                npages - nmap);
-                               goto out;
+
+       while (npages) {
+               pinned = min_t(unsigned long, npages, list_size);
+               ret = pin_user_pages(cur_base, pinned,
+                                    gup_flags, page_list, NULL);
+               if (ret != pinned)
+                       goto out;
+
+               if (!last_pfn)
+                       map_pfn = page_to_pfn(page_list[0]);
+
+               for (i = 0; i < ret; i++) {
+                       unsigned long this_pfn = page_to_pfn(page_list[i]);
+                       u64 csize;
+
+                       if (last_pfn && (this_pfn != last_pfn + 1)) {
+                               /* Pin a contiguous chunk of memory */
+                               csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
+                               if (vhost_vdpa_map(v, iova, csize,
+                                                  map_pfn << PAGE_SHIFT,
+                                                  msg->perm))
+                                       goto out;
+                               map_pfn = this_pfn;
+                               iova += csize;
                        }
-                       atomic64_add(csize, &dev->mm->pinned_vm);
-                       nmap += csize;
-                       iova += csize << PAGE_SHIFT;
-                       map_pfn = this_pfn;
+
+                       last_pfn = this_pfn;
                }
-               last_pfn = this_pfn;
+
+               cur_base += ret << PAGE_SHIFT;
+               npages -= ret;
        }
 
-       WARN_ON(nmap != npages);
+       /* Pin the rest chunk */
+       ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
+                            map_pfn << PAGE_SHIFT, msg->perm);
 out:
-       if (ret)
+       if (ret) {
                vhost_vdpa_unmap(v, msg->iova, msg->size);
-unlock:
+               atomic64_sub(npages, &dev->mm->pinned_vm);
+       }
        mmap_read_unlock(dev->mm);
-free:
-       kvfree(vmas);
-       kvfree(page_list);
+       free_page((unsigned long)page_list);
        return ret;
 }
 
@@ -790,6 +781,27 @@ static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
        v->domain = NULL;
 }
 
+static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
+{
+       struct vdpa_iova_range *range = &v->range;
+       struct iommu_domain_geometry geo;
+       struct vdpa_device *vdpa = v->vdpa;
+       const struct vdpa_config_ops *ops = vdpa->config;
+
+       if (ops->get_iova_range) {
+               *range = ops->get_iova_range(vdpa);
+       } else if (v->domain &&
+                  !iommu_domain_get_attr(v->domain,
+                  DOMAIN_ATTR_GEOMETRY, &geo) &&
+                  geo.force_aperture) {
+               range->first = geo.aperture_start;
+               range->last = geo.aperture_end;
+       } else {
+               range->first = 0;
+               range->last = ULLONG_MAX;
+       }
+}
+
 static int vhost_vdpa_open(struct inode *inode, struct file *filep)
 {
        struct vhost_vdpa *v;
@@ -830,6 +842,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
        if (r)
                goto err_init_iotlb;
 
+       vhost_vdpa_set_iova_range(v);
+
        filep->private_data = v;
 
        return 0;