Merge tag 'char-misc-5.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...
[linux-2.6-microblaze.git] / mm / mmap.c
index f609e9e..40248d8 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -132,7 +132,7 @@ void vma_set_page_prot(struct vm_area_struct *vma)
                vm_flags &= ~VM_SHARED;
                vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
        }
-       /* remove_protection_ptes reads vma->vm_page_prot without mmap_sem */
+       /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
        WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
 }
 
@@ -198,7 +198,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
        bool downgraded = false;
        LIST_HEAD(uf);
 
-       if (down_write_killable(&mm->mmap_sem))
+       if (mmap_write_lock_killable(mm))
                return -EINTR;
 
        origbrk = mm->brk;
@@ -238,14 +238,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 
        /*
         * Always allow shrinking brk.
-        * __do_munmap() may downgrade mmap_sem to read.
+        * __do_munmap() may downgrade mmap_lock to read.
         */
        if (brk <= mm->brk) {
                int ret;
 
                /*
-                * mm->brk must to be protected by write mmap_sem so update it
-                * before downgrading mmap_sem. When __do_munmap() fails,
+                * mm->brk must to be protected by write mmap_lock so update it
+                * before downgrading mmap_lock. When __do_munmap() fails,
                 * mm->brk will be restored from origbrk.
                 */
                mm->brk = brk;
@@ -272,9 +272,9 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 success:
        populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
        if (downgraded)
-               up_read(&mm->mmap_sem);
+               mmap_read_unlock(mm);
        else
-               up_write(&mm->mmap_sem);
+               mmap_write_unlock(mm);
        userfaultfd_unmap_complete(mm, &uf);
        if (populate)
                mm_populate(oldbrk, newbrk - oldbrk);
@@ -282,7 +282,7 @@ success:
 
 out:
        retval = origbrk;
-       up_write(&mm->mmap_sem);
+       mmap_write_unlock(mm);
        return retval;
 }
 
@@ -505,7 +505,7 @@ static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
  * After the update, the vma will be reinserted using
  * anon_vma_interval_tree_post_update_vma().
  *
- * The entire update must be protected by exclusive mmap_sem and by
+ * The entire update must be protected by exclusive mmap_lock and by
  * the root anon_vma's mutex.
  */
 static inline void
@@ -1030,7 +1030,7 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
  * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
  *
  * We don't check here for the merged mmap wrapping around the end of pagecache
- * indices (16TB on ia32) because do_mmap_pgoff() does not permit mmap's which
+ * indices (16TB on ia32) because do_mmap() does not permit mmap's which
  * wrap, nor mmaps which cover the final page at index -1UL.
  */
 static int
@@ -1207,7 +1207,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 }
 
 /*
- * Rough compatbility check to quickly see if it's even worth looking
+ * Rough compatibility check to quickly see if it's even worth looking
  * at sharing an anon_vma.
  *
  * They need to have the same vm_file, and the flags can only differ
@@ -1361,15 +1361,15 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode,
 }
 
 /*
- * The caller must hold down_write(&current->mm->mmap_sem).
+ * The caller must write-lock current->mm->mmap_lock.
  */
 unsigned long do_mmap(struct file *file, unsigned long addr,
                        unsigned long len, unsigned long prot,
-                       unsigned long flags, vm_flags_t vm_flags,
-                       unsigned long pgoff, unsigned long *populate,
-                       struct list_head *uf)
+                       unsigned long flags, unsigned long pgoff,
+                       unsigned long *populate, struct list_head *uf)
 {
        struct mm_struct *mm = current->mm;
+       vm_flags_t vm_flags;
        int pkey = 0;
 
        *populate = 0;
@@ -1431,7 +1431,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
         * to. we assume access permissions have been handled by the open
         * of the memory object, so we don't do any here.
         */
-       vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
+       vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
                        mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
        if (flags & MAP_LOCKED)
@@ -1562,11 +1562,12 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
                file = fget(fd);
                if (!file)
                        return -EBADF;
-               if (is_file_hugepages(file))
+               if (is_file_hugepages(file)) {
                        len = ALIGN(len, huge_page_size(hstate_file(file)));
-               retval = -EINVAL;
-               if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
+               } else if (unlikely(flags & MAP_HUGETLB)) {
+                       retval = -EINVAL;
                        goto out_fput;
+               }
        } else if (flags & MAP_HUGETLB) {
                struct user_struct *user = NULL;
                struct hstate *hs;
@@ -1689,7 +1690,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
                struct list_head *uf)
 {
        struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma, *prev;
+       struct vm_area_struct *vma, *prev, *merge;
        int error;
        struct rb_node **rb_link, *rb_parent;
        unsigned long charged = 0;
@@ -1773,6 +1774,25 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
                if (error)
                        goto unmap_and_free_vma;
 
+               /* If vm_flags changed after call_mmap(), we should try merge vma again
+                * as we may succeed this time.
+                */
+               if (unlikely(vm_flags != vma->vm_flags && prev)) {
+                       merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
+                               NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX);
+                       if (merge) {
+                               fput(file);
+                               vm_area_free(vma);
+                               vma = merge;
+                               /* Update vm_flags and possible addr to pick up the change. We don't
+                                * warn here if addr changed as the vma is not linked by vma_link().
+                                */
+                               addr = vma->vm_start;
+                               vm_flags = vma->vm_flags;
+                               goto unmap_writable;
+                       }
+               }
+
                /* Can addr have changed??
                 *
                 * Answer: Yes, several device drivers can do it in their
@@ -1795,6 +1815,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
        vma_link(mm, vma, prev, rb_link, rb_parent);
        /* Once vma denies write, undo our temporary denial count */
        if (file) {
+unmap_writable:
                if (vm_flags & VM_SHARED)
                        mapping_unmap_writable(file->f_mapping);
                if (vm_flags & VM_DENYWRITE)
@@ -2209,7 +2230,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                /*
                 * mmap_region() will call shmem_zero_setup() to create a file,
                 * so use shmem's get_unmapped_area in case it can be huge.
-                * do_mmap_pgoff() will clear pgoff, so match alignment.
+                * do_mmap() will clear pgoff, so match alignment.
                 */
                pgoff = 0;
                get_area = shmem_get_unmapped_area;
@@ -2371,7 +2392,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
-        * is required to hold the mmap_sem in read mode.  We need the
+        * is required to hold the mmap_lock in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
         */
        anon_vma_lock_write(vma->anon_vma);
@@ -2389,7 +2410,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                        if (!error) {
                                /*
                                 * vma_gap_update() doesn't support concurrent
-                                * updates, but we only hold a shared mmap_sem
+                                * updates, but we only hold a shared mmap_lock
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
                                 * anon_vma_lock_write() doesn't help here, as
@@ -2451,7 +2472,7 @@ int expand_downwards(struct vm_area_struct *vma,
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
-        * is required to hold the mmap_sem in read mode.  We need the
+        * is required to hold the mmap_lock in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
         */
        anon_vma_lock_write(vma->anon_vma);
@@ -2469,7 +2490,7 @@ int expand_downwards(struct vm_area_struct *vma,
                        if (!error) {
                                /*
                                 * vma_gap_update() doesn't support concurrent
-                                * updates, but we only hold a shared mmap_sem
+                                * updates, but we only hold a shared mmap_lock
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
                                 * anon_vma_lock_write() doesn't help here, as
@@ -2620,7 +2641,7 @@ static void unmap_region(struct mm_struct *mm,
  * Create a list of vma's touched by the unmap, removing them from the mm's
  * vma list as we go..
  */
-static void
+static bool
 detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
        struct vm_area_struct *prev, unsigned long end)
 {
@@ -2645,6 +2666,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 
        /* Kill the cache */
        vmacache_invalidate(mm);
+
+       /*
+        * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
+        * VM_GROWSUP VMA. Such VMAs can change their size under
+        * down_read(mmap_lock) and collide with the VMA we are about to unmap.
+        */
+       if (vma && (vma->vm_flags & VM_GROWSDOWN))
+               return false;
+       if (prev && (prev->vm_flags & VM_GROWSUP))
+               return false;
+       return true;
 }
 
 /*
@@ -2825,10 +2857,11 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
        }
 
        /* Detach vmas from rbtree */
-       detach_vmas_to_be_unmapped(mm, vma, prev, end);
+       if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
+               downgrade = false;
 
        if (downgrade)
-               downgrade_write(&mm->mmap_sem);
+               mmap_write_downgrade(mm);
 
        unmap_region(mm, vma, prev, start, end);
 
@@ -2850,20 +2883,20 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
        struct mm_struct *mm = current->mm;
        LIST_HEAD(uf);
 
-       if (down_write_killable(&mm->mmap_sem))
+       if (mmap_write_lock_killable(mm))
                return -EINTR;
 
        ret = __do_munmap(mm, start, len, &uf, downgrade);
        /*
-        * Returning 1 indicates mmap_sem is downgraded.
+        * Returning 1 indicates mmap_lock is downgraded.
         * But 1 is not legal return value of vm_munmap() and munmap(), reset
         * it to 0 before return.
         */
        if (ret == 1) {
-               up_read(&mm->mmap_sem);
+               mmap_read_unlock(mm);
                ret = 0;
        } else
-               up_write(&mm->mmap_sem);
+               mmap_write_unlock(mm);
 
        userfaultfd_unmap_complete(mm, &uf);
        return ret;
@@ -2911,7 +2944,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
        if (pgoff + (size >> PAGE_SHIFT) < pgoff)
                return ret;
 
-       if (down_write_killable(&mm->mmap_sem))
+       if (mmap_write_lock_killable(mm))
                return -EINTR;
 
        vma = find_vma(mm, start);
@@ -2970,11 +3003,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
        }
 
        file = get_file(vma->vm_file);
-       ret = do_mmap_pgoff(vma->vm_file, start, size,
+       ret = do_mmap(vma->vm_file, start, size,
                        prot, flags, pgoff, &populate, NULL);
        fput(file);
 out:
-       up_write(&mm->mmap_sem);
+       mmap_write_unlock(mm);
        if (populate)
                mm_populate(ret, populate);
        if (!IS_ERR_VALUE(ret))
@@ -3074,12 +3107,12 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
        if (!len)
                return 0;
 
-       if (down_write_killable(&mm->mmap_sem))
+       if (mmap_write_lock_killable(mm))
                return -EINTR;
 
        ret = do_brk_flags(addr, len, flags, &uf);
        populate = ((mm->def_flags & VM_LOCKED) != 0);
-       up_write(&mm->mmap_sem);
+       mmap_write_unlock(mm);
        userfaultfd_unmap_complete(mm, &uf);
        if (populate && !ret)
                mm_populate(addr, len);
@@ -3107,12 +3140,12 @@ void exit_mmap(struct mm_struct *mm)
                /*
                 * Manually reap the mm to free as much memory as possible.
                 * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
-                * this mm from further consideration.  Taking mm->mmap_sem for
+                * this mm from further consideration.  Taking mm->mmap_lock for
                 * write after setting MMF_OOM_SKIP will guarantee that the oom
-                * reaper will not run on this mm again after mmap_sem is
+                * reaper will not run on this mm again after mmap_lock is
                 * dropped.
                 *
-                * Nothing can be holding mm->mmap_sem here and the above call
+                * Nothing can be holding mm->mmap_lock here and the above call
                 * to mmu_notifier_release(mm) ensures mmu notifier callbacks in
                 * __oom_reap_task_mm() will not block.
                 *
@@ -3123,8 +3156,8 @@ void exit_mmap(struct mm_struct *mm)
                (void)__oom_reap_task_mm(mm);
 
                set_bit(MMF_OOM_SKIP, &mm->flags);
-               down_write(&mm->mmap_sem);
-               up_write(&mm->mmap_sem);
+               mmap_write_lock(mm);
+               mmap_write_unlock(mm);
        }
 
        if (mm->locked_vm) {
@@ -3159,6 +3192,7 @@ void exit_mmap(struct mm_struct *mm)
                if (vma->vm_flags & VM_ACCOUNT)
                        nr_accounted += vma_pages(vma);
                vma = remove_vma(vma);
+               cond_resched();
        }
        vm_unacct_memory(nr_accounted);
 }
@@ -3189,7 +3223,7 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
         * By setting it to reflect the virtual start address of the
         * vma, merges and splits can happen in a seamless way, just
         * using the existing file pgoff checks and manipulations.
-        * Similarly in do_mmap_pgoff and in do_brk.
+        * Similarly in do_mmap and in do_brk.
         */
        if (vma_is_anonymous(vma)) {
                BUG_ON(vma->anon_vma);
@@ -3437,7 +3471,7 @@ bool vma_is_special_mapping(const struct vm_area_struct *vma,
 }
 
 /*
- * Called with mm->mmap_sem held for writing.
+ * Called with mm->mmap_lock held for writing.
  * Insert a new vma covering the given region, with the given flags.
  * Its pages are supplied by the given array of struct page *.
  * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
@@ -3474,7 +3508,7 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
                 * The LSB of head.next can't change from under us
                 * because we hold the mm_all_locks_mutex.
                 */
-               down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
+               down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
                /*
                 * We can safely modify head.next after taking the
                 * anon_vma->root->rwsem. If some other vma in this mm shares
@@ -3504,7 +3538,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
                 */
                if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
                        BUG();
-               down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
+               down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
        }
 }
 
@@ -3513,11 +3547,11 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  * operations that could ever happen on a certain mm. This includes
  * vmtruncate, try_to_unmap, and all page faults.
  *
- * The caller must take the mmap_sem in write mode before calling
+ * The caller must take the mmap_lock in write mode before calling
  * mm_take_all_locks(). The caller isn't allowed to release the
- * mmap_sem until mm_drop_all_locks() returns.
+ * mmap_lock until mm_drop_all_locks() returns.
  *
- * mmap_sem in write mode is required in order to block all operations
+ * mmap_lock in write mode is required in order to block all operations
  * that could modify pagetables and free pages without need of
  * altering the vma layout. It's also needed in write mode to avoid new
  * anon_vmas to be associated with existing vmas.
@@ -3550,7 +3584,7 @@ int mm_take_all_locks(struct mm_struct *mm)
        struct vm_area_struct *vma;
        struct anon_vma_chain *avc;
 
-       BUG_ON(down_read_trylock(&mm->mmap_sem));
+       BUG_ON(mmap_read_trylock(mm));
 
        mutex_lock(&mm_all_locks_mutex);
 
@@ -3622,7 +3656,7 @@ static void vm_unlock_mapping(struct address_space *mapping)
 }
 
 /*
- * The mmap_sem cannot be released by the caller until
+ * The mmap_lock cannot be released by the caller until
  * mm_drop_all_locks() returns.
  */
 void mm_drop_all_locks(struct mm_struct *mm)
@@ -3630,7 +3664,7 @@ void mm_drop_all_locks(struct mm_struct *mm)
        struct vm_area_struct *vma;
        struct anon_vma_chain *avc;
 
-       BUG_ON(down_read_trylock(&mm->mmap_sem));
+       BUG_ON(mmap_read_trylock(mm));
        BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
 
        for (vma = mm->mmap; vma; vma = vma->vm_next) {