Merge tag 'libnvdimm-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdim...
[linux-2.6-microblaze.git] / fs / userfaultfd.c
index 5c2d806..003f0d3 100644 (file)
@@ -33,11 +33,6 @@ int sysctl_unprivileged_userfaultfd __read_mostly;
 
 static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
 
-enum userfaultfd_state {
-       UFFD_STATE_WAIT_API,
-       UFFD_STATE_RUNNING,
-};
-
 /*
  * Start with fault_pending_wqh and fault_wqh so they're more likely
  * to be in the same cacheline.
@@ -69,12 +64,10 @@ struct userfaultfd_ctx {
        unsigned int flags;
        /* features requested from the userspace */
        unsigned int features;
-       /* state machine */
-       enum userfaultfd_state state;
        /* released */
        bool released;
        /* memory mappings are changing because of non-cooperative event */
-       bool mmap_changing;
+       atomic_t mmap_changing;
        /* mm with one ore more vmas attached to this userfaultfd_ctx */
        struct mm_struct *mm;
 };
@@ -104,6 +97,14 @@ struct userfaultfd_wake_range {
        unsigned long len;
 };
 
+/* internal indication that UFFD_API ioctl was successfully executed */
+#define UFFD_FEATURE_INITIALIZED               (1u << 31)
+
+static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx)
+{
+       return ctx->features & UFFD_FEATURE_INITIALIZED;
+}
+
 static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
                                     int wake_flags, void *key)
 {
@@ -623,7 +624,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
         * already released.
         */
 out:
-       WRITE_ONCE(ctx->mmap_changing, false);
+       atomic_dec(&ctx->mmap_changing);
+       VM_BUG_ON(atomic_read(&ctx->mmap_changing) < 0);
        userfaultfd_ctx_put(ctx);
 }
 
@@ -666,15 +668,14 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
 
                refcount_set(&ctx->refcount, 1);
                ctx->flags = octx->flags;
-               ctx->state = UFFD_STATE_RUNNING;
                ctx->features = octx->features;
                ctx->released = false;
-               ctx->mmap_changing = false;
+               atomic_set(&ctx->mmap_changing, 0);
                ctx->mm = vma->vm_mm;
                mmgrab(ctx->mm);
 
                userfaultfd_ctx_get(octx);
-               WRITE_ONCE(octx->mmap_changing, true);
+               atomic_inc(&octx->mmap_changing);
                fctx->orig = octx;
                fctx->new = ctx;
                list_add_tail(&fctx->list, fcs);
@@ -721,7 +722,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
        if (ctx->features & UFFD_FEATURE_EVENT_REMAP) {
                vm_ctx->ctx = ctx;
                userfaultfd_ctx_get(ctx);
-               WRITE_ONCE(ctx->mmap_changing, true);
+               atomic_inc(&ctx->mmap_changing);
        } else {
                /* Drop uffd context if remap feature not enabled */
                vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@@ -766,7 +767,7 @@ bool userfaultfd_remove(struct vm_area_struct *vma,
                return true;
 
        userfaultfd_ctx_get(ctx);
-       WRITE_ONCE(ctx->mmap_changing, true);
+       atomic_inc(&ctx->mmap_changing);
        mmap_read_unlock(mm);
 
        msg_init(&ewq.msg);
@@ -810,7 +811,7 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma,
                        return -ENOMEM;
 
                userfaultfd_ctx_get(ctx);
-               WRITE_ONCE(ctx->mmap_changing, true);
+               atomic_inc(&ctx->mmap_changing);
                unmap_ctx->ctx = ctx;
                unmap_ctx->start = start;
                unmap_ctx->end = end;
@@ -943,38 +944,33 @@ static __poll_t userfaultfd_poll(struct file *file, poll_table *wait)
 
        poll_wait(file, &ctx->fd_wqh, wait);
 
-       switch (ctx->state) {
-       case UFFD_STATE_WAIT_API:
+       if (!userfaultfd_is_initialized(ctx))
                return EPOLLERR;
-       case UFFD_STATE_RUNNING:
-               /*
-                * poll() never guarantees that read won't block.
-                * userfaults can be waken before they're read().
-                */
-               if (unlikely(!(file->f_flags & O_NONBLOCK)))
-                       return EPOLLERR;
-               /*
-                * lockless access to see if there are pending faults
-                * __pollwait last action is the add_wait_queue but
-                * the spin_unlock would allow the waitqueue_active to
-                * pass above the actual list_add inside
-                * add_wait_queue critical section. So use a full
-                * memory barrier to serialize the list_add write of
-                * add_wait_queue() with the waitqueue_active read
-                * below.
-                */
-               ret = 0;
-               smp_mb();
-               if (waitqueue_active(&ctx->fault_pending_wqh))
-                       ret = EPOLLIN;
-               else if (waitqueue_active(&ctx->event_wqh))
-                       ret = EPOLLIN;
 
-               return ret;
-       default:
-               WARN_ON_ONCE(1);
+       /*
+        * poll() never guarantees that read won't block.
+        * userfaults can be waken before they're read().
+        */
+       if (unlikely(!(file->f_flags & O_NONBLOCK)))
                return EPOLLERR;
-       }
+       /*
+        * lockless access to see if there are pending faults
+        * __pollwait last action is the add_wait_queue but
+        * the spin_unlock would allow the waitqueue_active to
+        * pass above the actual list_add inside
+        * add_wait_queue critical section. So use a full
+        * memory barrier to serialize the list_add write of
+        * add_wait_queue() with the waitqueue_active read
+        * below.
+        */
+       ret = 0;
+       smp_mb();
+       if (waitqueue_active(&ctx->fault_pending_wqh))
+               ret = EPOLLIN;
+       else if (waitqueue_active(&ctx->event_wqh))
+               ret = EPOLLIN;
+
+       return ret;
 }
 
 static const struct file_operations userfaultfd_fops;
@@ -1169,7 +1165,7 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
        int no_wait = file->f_flags & O_NONBLOCK;
        struct inode *inode = file_inode(file);
 
-       if (ctx->state == UFFD_STATE_WAIT_API)
+       if (!userfaultfd_is_initialized(ctx))
                return -EINVAL;
 
        for (;;) {
@@ -1700,7 +1696,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
        user_uffdio_copy = (struct uffdio_copy __user *) arg;
 
        ret = -EAGAIN;
-       if (READ_ONCE(ctx->mmap_changing))
+       if (atomic_read(&ctx->mmap_changing))
                goto out;
 
        ret = -EFAULT;
@@ -1757,7 +1753,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
        user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg;
 
        ret = -EAGAIN;
-       if (READ_ONCE(ctx->mmap_changing))
+       if (atomic_read(&ctx->mmap_changing))
                goto out;
 
        ret = -EFAULT;
@@ -1807,7 +1803,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
        struct userfaultfd_wake_range range;
        bool mode_wp, mode_dontwake;
 
-       if (READ_ONCE(ctx->mmap_changing))
+       if (atomic_read(&ctx->mmap_changing))
                return -EAGAIN;
 
        user_uffdio_wp = (struct uffdio_writeprotect __user *) arg;
@@ -1855,7 +1851,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
        user_uffdio_continue = (struct uffdio_continue __user *)arg;
 
        ret = -EAGAIN;
-       if (READ_ONCE(ctx->mmap_changing))
+       if (atomic_read(&ctx->mmap_changing))
                goto out;
 
        ret = -EFAULT;
@@ -1908,9 +1904,10 @@ out:
 static inline unsigned int uffd_ctx_features(__u64 user_features)
 {
        /*
-        * For the current set of features the bits just coincide
+        * For the current set of features the bits just coincide. Set
+        * UFFD_FEATURE_INITIALIZED to mark the features as enabled.
         */
-       return (unsigned int)user_features;
+       return (unsigned int)user_features | UFFD_FEATURE_INITIALIZED;
 }
 
 /*
@@ -1923,12 +1920,10 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
 {
        struct uffdio_api uffdio_api;
        void __user *buf = (void __user *)arg;
+       unsigned int ctx_features;
        int ret;
        __u64 features;
 
-       ret = -EINVAL;
-       if (ctx->state != UFFD_STATE_WAIT_API)
-               goto out;
        ret = -EFAULT;
        if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api)))
                goto out;
@@ -1952,9 +1947,13 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
        ret = -EFAULT;
        if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
                goto out;
-       ctx->state = UFFD_STATE_RUNNING;
+
        /* only enable the requested features for this uffd context */
-       ctx->features = uffd_ctx_features(features);
+       ctx_features = uffd_ctx_features(features);
+       ret = -EINVAL;
+       if (cmpxchg(&ctx->features, 0, ctx_features) != 0)
+               goto err_out;
+
        ret = 0;
 out:
        return ret;
@@ -1971,7 +1970,7 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
        int ret = -EINVAL;
        struct userfaultfd_ctx *ctx = file->private_data;
 
-       if (cmd != UFFDIO_API && ctx->state == UFFD_STATE_WAIT_API)
+       if (cmd != UFFDIO_API && !userfaultfd_is_initialized(ctx))
                return -EINVAL;
 
        switch(cmd) {
@@ -2085,9 +2084,8 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
        refcount_set(&ctx->refcount, 1);
        ctx->flags = flags;
        ctx->features = 0;
-       ctx->state = UFFD_STATE_WAIT_API;
        ctx->released = false;
-       ctx->mmap_changing = false;
+       atomic_set(&ctx->mmap_changing, 0);
        ctx->mm = current->mm;
        /* prevent the mm struct to be freed */
        mmgrab(ctx->mm);