Merge tag 'pinctrl-v5.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[linux-2.6-microblaze.git] / fs / userfaultfd.c
index 0e4a383..894cc28 100644 (file)
@@ -28,7 +28,7 @@
 #include <linux/security.h>
 #include <linux/hugetlb.h>
 
-int sysctl_unprivileged_userfaultfd __read_mostly = 1;
+int sysctl_unprivileged_userfaultfd __read_mostly;
 
 static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
 
@@ -405,6 +405,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 
        if (ctx->features & UFFD_FEATURE_SIGBUS)
                goto out;
+       if ((vmf->flags & FAULT_FLAG_USER) == 0 &&
+           ctx->flags & UFFD_USER_MODE_ONLY) {
+               printk_once(KERN_WARNING "uffd: Set unprivileged_userfaultfd "
+                       "sysctl knob to 1 if kernel faults must be handled "
+                       "without obtaining CAP_SYS_PTRACE capability\n");
+               goto out;
+       }
 
        /*
         * If it's already released don't get it. This avoids to loop
@@ -601,8 +608,6 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 
                /* the various vma->vm_userfaultfd_ctx still points to it */
                mmap_write_lock(mm);
-               /* no task can run (and in turn coredump) yet */
-               VM_WARN_ON(!mmget_still_valid(mm));
                for (vma = mm->mmap; vma; vma = vma->vm_next)
                        if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
                                vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@@ -842,7 +847,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
        /* len == 0 means wake all */
        struct userfaultfd_wake_range range = { .len = 0, };
        unsigned long new_flags;
-       bool still_valid;
 
        WRITE_ONCE(ctx->released, true);
 
@@ -858,7 +862,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
         * taking the mmap_lock for writing.
         */
        mmap_write_lock(mm);
-       still_valid = mmget_still_valid(mm);
        prev = NULL;
        for (vma = mm->mmap; vma; vma = vma->vm_next) {
                cond_resched();
@@ -869,17 +872,15 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
                        continue;
                }
                new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
-               if (still_valid) {
-                       prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
-                                        new_flags, vma->anon_vma,
-                                        vma->vm_file, vma->vm_pgoff,
-                                        vma_policy(vma),
-                                        NULL_VM_UFFD_CTX);
-                       if (prev)
-                               vma = prev;
-                       else
-                               prev = vma;
-               }
+               prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
+                                new_flags, vma->anon_vma,
+                                vma->vm_file, vma->vm_pgoff,
+                                vma_policy(vma),
+                                NULL_VM_UFFD_CTX);
+               if (prev)
+                       vma = prev;
+               else
+                       prev = vma;
                vma->vm_flags = new_flags;
                vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
        }
@@ -1309,8 +1310,6 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
                goto out;
 
        mmap_write_lock(mm);
-       if (!mmget_still_valid(mm))
-               goto out_unlock;
        vma = find_vma_prev(mm, start, &prev);
        if (!vma)
                goto out_unlock;
@@ -1511,8 +1510,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
                goto out;
 
        mmap_write_lock(mm);
-       if (!mmget_still_valid(mm))
-               goto out_unlock;
        vma = find_vma_prev(mm, start, &prev);
        if (!vma)
                goto out_unlock;
@@ -1969,16 +1966,23 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
        struct userfaultfd_ctx *ctx;
        int fd;
 
-       if (!sysctl_unprivileged_userfaultfd && !capable(CAP_SYS_PTRACE))
+       if (!sysctl_unprivileged_userfaultfd &&
+           (flags & UFFD_USER_MODE_ONLY) == 0 &&
+           !capable(CAP_SYS_PTRACE)) {
+               printk_once(KERN_WARNING "uffd: Set unprivileged_userfaultfd "
+                       "sysctl knob to 1 if kernel faults must be handled "
+                       "without obtaining CAP_SYS_PTRACE capability\n");
                return -EPERM;
+       }
 
        BUG_ON(!current->mm);
 
        /* Check the UFFD_* constants for consistency.  */
+       BUILD_BUG_ON(UFFD_USER_MODE_ONLY & UFFD_SHARED_FCNTL_FLAGS);
        BUILD_BUG_ON(UFFD_CLOEXEC != O_CLOEXEC);
        BUILD_BUG_ON(UFFD_NONBLOCK != O_NONBLOCK);
 
-       if (flags & ~UFFD_SHARED_FCNTL_FLAGS)
+       if (flags & ~(UFFD_SHARED_FCNTL_FLAGS | UFFD_USER_MODE_ONLY))
                return -EINVAL;
 
        ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL);