mm: remove the now-unnecessary mmget_still_valid() hack
authorJann Horn <jannh@google.com>
Fri, 16 Oct 2020 03:13:00 +0000 (20:13 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 16 Oct 2020 18:11:22 +0000 (11:11 -0700)
The preceding patches have ensured that core dumping properly takes the
mmap_lock.  Thanks to that, we can now remove mmget_still_valid() and all
its users.

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Link: http://lkml.kernel.org/r/20200827114932.3572699-8-jannh@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
drivers/infiniband/core/uverbs_main.c
drivers/vfio/pci/vfio_pci.c
fs/proc/task_mmu.c
fs/userfaultfd.c
include/linux/sched/mm.h
mm/khugepaged.c
mm/madvise.c
mm/mmap.c

index 37794d8..a4ba0b8 100644 (file)
@@ -845,8 +845,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
                 * will only be one mm, so no big deal.
                 */
                mmap_read_lock(mm);
-               if (!mmget_still_valid(mm))
-                       goto skip_mm;
                mutex_lock(&ufile->umap_lock);
                list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
                                          list) {
@@ -865,7 +863,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
                        }
                }
                mutex_unlock(&ufile->umap_lock);
-       skip_mm:
                mmap_read_unlock(mm);
                mmput(mm);
        }
index 1ab1f5c..b0f4b92 100644 (file)
@@ -1480,31 +1480,29 @@ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
                } else {
                        mmap_read_lock(mm);
                }
-               if (mmget_still_valid(mm)) {
-                       if (try) {
-                               if (!mutex_trylock(&vdev->vma_lock)) {
-                                       mmap_read_unlock(mm);
-                                       mmput(mm);
-                                       return 0;
-                               }
-                       } else {
-                               mutex_lock(&vdev->vma_lock);
+               if (try) {
+                       if (!mutex_trylock(&vdev->vma_lock)) {
+                               mmap_read_unlock(mm);
+                               mmput(mm);
+                               return 0;
                        }
-                       list_for_each_entry_safe(mmap_vma, tmp,
-                                                &vdev->vma_list, vma_next) {
-                               struct vm_area_struct *vma = mmap_vma->vma;
+               } else {
+                       mutex_lock(&vdev->vma_lock);
+               }
+               list_for_each_entry_safe(mmap_vma, tmp,
+                                        &vdev->vma_list, vma_next) {
+                       struct vm_area_struct *vma = mmap_vma->vma;
 
-                               if (vma->vm_mm != mm)
-                                       continue;
+                       if (vma->vm_mm != mm)
+                               continue;
 
-                               list_del(&mmap_vma->vma_next);
-                               kfree(mmap_vma);
+                       list_del(&mmap_vma->vma_next);
+                       kfree(mmap_vma);
 
-                               zap_vma_ptes(vma, vma->vm_start,
-                                            vma->vm_end - vma->vm_start);
-                       }
-                       mutex_unlock(&vdev->vma_lock);
+                       zap_vma_ptes(vma, vma->vm_start,
+                                    vma->vm_end - vma->vm_start);
                }
+               mutex_unlock(&vdev->vma_lock);
                mmap_read_unlock(mm);
                mmput(mm);
        }
index 846d43d..217aa27 100644 (file)
@@ -1244,24 +1244,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                                        count = -EINTR;
                                        goto out_mm;
                                }
-                               /*
-                                * Avoid to modify vma->vm_flags
-                                * without locked ops while the
-                                * coredump reads the vm_flags.
-                                */
-                               if (!mmget_still_valid(mm)) {
-                                       /*
-                                        * Silently return "count"
-                                        * like if get_task_mm()
-                                        * failed. FIXME: should this
-                                        * function have returned
-                                        * -ESRCH if get_task_mm()
-                                        * failed like if
-                                        * get_proc_task() fails?
-                                        */
-                                       mmap_write_unlock(mm);
-                                       goto out_mm;
-                               }
                                for (vma = mm->mmap; vma; vma = vma->vm_next) {
                                        vma->vm_flags &= ~VM_SOFTDIRTY;
                                        vma_set_page_prot(vma);
index 0e4a383..000b457 100644 (file)
@@ -601,8 +601,6 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 
                /* the various vma->vm_userfaultfd_ctx still points to it */
                mmap_write_lock(mm);
-               /* no task can run (and in turn coredump) yet */
-               VM_WARN_ON(!mmget_still_valid(mm));
                for (vma = mm->mmap; vma; vma = vma->vm_next)
                        if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
                                vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@@ -842,7 +840,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
        /* len == 0 means wake all */
        struct userfaultfd_wake_range range = { .len = 0, };
        unsigned long new_flags;
-       bool still_valid;
 
        WRITE_ONCE(ctx->released, true);
 
@@ -858,7 +855,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
         * taking the mmap_lock for writing.
         */
        mmap_write_lock(mm);
-       still_valid = mmget_still_valid(mm);
        prev = NULL;
        for (vma = mm->mmap; vma; vma = vma->vm_next) {
                cond_resched();
@@ -869,17 +865,15 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
                        continue;
                }
                new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
-               if (still_valid) {
-                       prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
-                                        new_flags, vma->anon_vma,
-                                        vma->vm_file, vma->vm_pgoff,
-                                        vma_policy(vma),
-                                        NULL_VM_UFFD_CTX);
-                       if (prev)
-                               vma = prev;
-                       else
-                               prev = vma;
-               }
+               prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
+                                new_flags, vma->anon_vma,
+                                vma->vm_file, vma->vm_pgoff,
+                                vma_policy(vma),
+                                NULL_VM_UFFD_CTX);
+               if (prev)
+                       vma = prev;
+               else
+                       prev = vma;
                vma->vm_flags = new_flags;
                vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
        }
@@ -1309,8 +1303,6 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
                goto out;
 
        mmap_write_lock(mm);
-       if (!mmget_still_valid(mm))
-               goto out_unlock;
        vma = find_vma_prev(mm, start, &prev);
        if (!vma)
                goto out_unlock;
@@ -1511,8 +1503,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
                goto out;
 
        mmap_write_lock(mm);
-       if (!mmget_still_valid(mm))
-               goto out_unlock;
        vma = find_vma_prev(mm, start, &prev);
        if (!vma)
                goto out_unlock;
index 15bfb06..981e34c 100644 (file)
@@ -49,31 +49,6 @@ static inline void mmdrop(struct mm_struct *mm)
                __mmdrop(mm);
 }
 
-/*
- * This has to be called after a get_task_mm()/mmget_not_zero()
- * followed by taking the mmap_lock for writing before modifying the
- * vmas or anything the coredump pretends not to change from under it.
- *
- * It also has to be called when mmgrab() is used in the context of
- * the process, but then the mm_count refcount is transferred outside
- * the context of the process to run down_write() on that pinned mm.
- *
- * NOTE: find_extend_vma() called from GUP context is the only place
- * that can modify the "mm" (notably the vm_start/end) under mmap_lock
- * for reading and outside the context of the process, so it is also
- * the only case that holds the mmap_lock for reading that must call
- * this function. Generally if the mmap_lock is hold for reading
- * there's no need of this check after get_task_mm()/mmget_not_zero().
- *
- * This function can be obsoleted and the check can be removed, after
- * the coredump code will hold the mmap_lock for writing before
- * invoking the ->core_dump methods.
- */
-static inline bool mmget_still_valid(struct mm_struct *mm)
-{
-       return likely(!mm->core_state);
-}
-
 /**
  * mmget() - Pin the address space associated with a &struct mm_struct.
  * @mm: The address space to pin.
index 58b0d9c..4e3dff1 100644 (file)
@@ -434,7 +434,7 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,
 
 static inline int khugepaged_test_exit(struct mm_struct *mm)
 {
-       return atomic_read(&mm->mm_users) == 0 || !mmget_still_valid(mm);
+       return atomic_read(&mm->mm_users) == 0;
 }
 
 static bool hugepage_vma_check(struct vm_area_struct *vma,
index 68b761c..fd1f448 100644 (file)
@@ -1085,23 +1085,6 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
        if (write) {
                if (mmap_write_lock_killable(current->mm))
                        return -EINTR;
-
-               /*
-                * We may have stolen the mm from another process
-                * that is undergoing core dumping.
-                *
-                * Right now that's io_ring, in the future it may
-                * be remote process management and not "current"
-                * at all.
-                *
-                * We need to fix core dumping to not do this,
-                * but for now we have the mmget_still_valid()
-                * model.
-                */
-               if (!mmget_still_valid(current->mm)) {
-                       mmap_write_unlock(current->mm);
-                       return -EINTR;
-               }
        } else {
                mmap_read_lock(current->mm);
        }
index 9fb9f8a..ebb92f5 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2562,7 +2562,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
        if (vma && (vma->vm_start <= addr))
                return vma;
        /* don't alter vm_end if the coredump is running */
-       if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
+       if (!prev || expand_stack(prev, addr))
                return NULL;
        if (prev->vm_flags & VM_LOCKED)
                populate_vma_page_range(prev, addr, prev->vm_end, NULL);
@@ -2588,9 +2588,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
                return vma;
        if (!(vma->vm_flags & VM_GROWSDOWN))
                return NULL;
-       /* don't alter vm_start if the coredump is running */
-       if (!mmget_still_valid(mm))
-               return NULL;
        start = vma->vm_start;
        if (expand_stack(vma, addr))
                return NULL;