Merge tag 'char-misc-5.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...

[linux-2.6-microblaze.git] / mm / mmap.c
diff --git a/mm/mmap.c b/mm/mmap.c

index f609e9e..40248d8 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -132,7 +132,7 @@ void vma_set_page_prot(struct vm_area_struct *vma)
                 vm_flags &= ~VM_SHARED;
                 vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
         }
-       /* remove_protection_ptes reads vma->vm_page_prot without mmap_sem */
+       /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
         WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
  }
  
@@ -198,7 +198,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
         bool downgraded = false;
         LIST_HEAD(uf);
  
-       if (down_write_killable(&mm->mmap_sem))
+       if (mmap_write_lock_killable(mm))
                 return -EINTR;
  
         origbrk = mm->brk;
@@ -238,14 +238,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
  
         /*
          * Always allow shrinking brk.
-        * __do_munmap() may downgrade mmap_sem to read.
+        * __do_munmap() may downgrade mmap_lock to read.
          */
         if (brk <= mm->brk) {
                 int ret;
  
                 /*
-                * mm->brk must to be protected by write mmap_sem so update it
-                * before downgrading mmap_sem. When __do_munmap() fails,
+                * mm->brk must to be protected by write mmap_lock so update it
+                * before downgrading mmap_lock. When __do_munmap() fails,
                  * mm->brk will be restored from origbrk.
                  */
                 mm->brk = brk;
@@ -272,9 +272,9 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
  success:
         populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
         if (downgraded)
-               up_read(&mm->mmap_sem);
+               mmap_read_unlock(mm);
         else
-               up_write(&mm->mmap_sem);
+               mmap_write_unlock(mm);
         userfaultfd_unmap_complete(mm, &uf);
         if (populate)
                 mm_populate(oldbrk, newbrk - oldbrk);
@@ -282,7 +282,7 @@ success:
  
  out:
         retval = origbrk;
-       up_write(&mm->mmap_sem);
+       mmap_write_unlock(mm);
         return retval;
  }
  
@@ -505,7 +505,7 @@ static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
   * After the update, the vma will be reinserted using
   * anon_vma_interval_tree_post_update_vma().
   *
- * The entire update must be protected by exclusive mmap_sem and by
+ * The entire update must be protected by exclusive mmap_lock and by
   * the root anon_vma's mutex.
   */
  static inline void
@@ -1030,7 +1030,7 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
   * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
   *
   * We don't check here for the merged mmap wrapping around the end of pagecache
- * indices (16TB on ia32) because do_mmap_pgoff() does not permit mmap's which
+ * indices (16TB on ia32) because do_mmap() does not permit mmap's which
   * wrap, nor mmaps which cover the final page at index -1UL.
   */
  static int
@@ -1207,7 +1207,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
  }
  
  /*
- * Rough compatbility check to quickly see if it's even worth looking
+ * Rough compatibility check to quickly see if it's even worth looking
   * at sharing an anon_vma.
   *
   * They need to have the same vm_file, and the flags can only differ
@@ -1361,15 +1361,15 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode,
  }
  
  /*
- * The caller must hold down_write(&current->mm->mmap_sem).
+ * The caller must write-lock current->mm->mmap_lock.
   */
  unsigned long do_mmap(struct file *file, unsigned long addr,
                         unsigned long len, unsigned long prot,
-                       unsigned long flags, vm_flags_t vm_flags,
-                       unsigned long pgoff, unsigned long *populate,
-                       struct list_head *uf)
+                       unsigned long flags, unsigned long pgoff,
+                       unsigned long *populate, struct list_head *uf)
  {
         struct mm_struct *mm = current->mm;
+       vm_flags_t vm_flags;
         int pkey = 0;
  
         *populate = 0;
@@ -1431,7 +1431,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
          * to. we assume access permissions have been handled by the open
          * of the memory object, so we don't do any here.
          */
-       vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
+       vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
                         mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
  
         if (flags & MAP_LOCKED)
@@ -1562,11 +1562,12 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
                 file = fget(fd);
                 if (!file)
                         return -EBADF;
-               if (is_file_hugepages(file))
+               if (is_file_hugepages(file)) {
                         len = ALIGN(len, huge_page_size(hstate_file(file)));
-               retval = -EINVAL;
-               if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
+               } else if (unlikely(flags & MAP_HUGETLB)) {
+                       retval = -EINVAL;
                         goto out_fput;
+               }
         } else if (flags & MAP_HUGETLB) {
                 struct user_struct *user = NULL;
                 struct hstate *hs;
@@ -1689,7 +1690,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
                 struct list_head *uf)
  {
         struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma, *prev;
+       struct vm_area_struct *vma, *prev, *merge;
         int error;
         struct rb_node **rb_link, *rb_parent;
         unsigned long charged = 0;
@@ -1773,6 +1774,25 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
                 if (error)
                         goto unmap_and_free_vma;
  
+               /* If vm_flags changed after call_mmap(), we should try merge vma again
+                * as we may succeed this time.
+                */
+               if (unlikely(vm_flags != vma->vm_flags && prev)) {
+                       merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
+                               NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX);
+                       if (merge) {
+                               fput(file);
+                               vm_area_free(vma);
+                               vma = merge;
+                               /* Update vm_flags and possible addr to pick up the change. We don't
+                                * warn here if addr changed as the vma is not linked by vma_link().
+                                */
+                               addr = vma->vm_start;
+                               vm_flags = vma->vm_flags;
+                               goto unmap_writable;
+                       }
+               }
+
                 /* Can addr have changed??
                  *
                  * Answer: Yes, several device drivers can do it in their
@@ -1795,6 +1815,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
         vma_link(mm, vma, prev, rb_link, rb_parent);
         /* Once vma denies write, undo our temporary denial count */
         if (file) {
+unmap_writable:
                 if (vm_flags & VM_SHARED)
                         mapping_unmap_writable(file->f_mapping);
                 if (vm_flags & VM_DENYWRITE)
@@ -2209,7 +2230,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                 /*
                  * mmap_region() will call shmem_zero_setup() to create a file,
                  * so use shmem's get_unmapped_area in case it can be huge.
-                * do_mmap_pgoff() will clear pgoff, so match alignment.
+                * do_mmap() will clear pgoff, so match alignment.
                  */
                 pgoff = 0;
                 get_area = shmem_get_unmapped_area;
@@ -2371,7 +2392,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
  
         /*
          * vma->vm_start/vm_end cannot change under us because the caller
-        * is required to hold the mmap_sem in read mode.  We need the
+        * is required to hold the mmap_lock in read mode.  We need the
          * anon_vma lock to serialize against concurrent expand_stacks.
          */
         anon_vma_lock_write(vma->anon_vma);
@@ -2389,7 +2410,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                         if (!error) {
                                 /*
                                  * vma_gap_update() doesn't support concurrent
-                                * updates, but we only hold a shared mmap_sem
+                                * updates, but we only hold a shared mmap_lock
                                  * lock here, so we need to protect against
                                  * concurrent vma expansions.
                                  * anon_vma_lock_write() doesn't help here, as
@@ -2451,7 +2472,7 @@ int expand_downwards(struct vm_area_struct *vma,
  
         /*
          * vma->vm_start/vm_end cannot change under us because the caller
-        * is required to hold the mmap_sem in read mode.  We need the
+        * is required to hold the mmap_lock in read mode.  We need the
          * anon_vma lock to serialize against concurrent expand_stacks.
          */
         anon_vma_lock_write(vma->anon_vma);
@@ -2469,7 +2490,7 @@ int expand_downwards(struct vm_area_struct *vma,
                         if (!error) {
                                 /*
                                  * vma_gap_update() doesn't support concurrent
-                                * updates, but we only hold a shared mmap_sem
+                                * updates, but we only hold a shared mmap_lock
                                  * lock here, so we need to protect against
                                  * concurrent vma expansions.
                                  * anon_vma_lock_write() doesn't help here, as
@@ -2620,7 +2641,7 @@ static void unmap_region(struct mm_struct *mm,
   * Create a list of vma's touched by the unmap, removing them from the mm's
   * vma list as we go..
   */
-static void
+static bool
  detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
         struct vm_area_struct *prev, unsigned long end)
  {
@@ -2645,6 +2666,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
  
         /* Kill the cache */
         vmacache_invalidate(mm);
+
+       /*
+        * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
+        * VM_GROWSUP VMA. Such VMAs can change their size under
+        * down_read(mmap_lock) and collide with the VMA we are about to unmap.
+        */
+       if (vma && (vma->vm_flags & VM_GROWSDOWN))
+               return false;
+       if (prev && (prev->vm_flags & VM_GROWSUP))
+               return false;
+       return true;
  }
  
  /*
@@ -2825,10 +2857,11 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
         }
  
         /* Detach vmas from rbtree */
-       detach_vmas_to_be_unmapped(mm, vma, prev, end);
+       if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
+               downgrade = false;
  
         if (downgrade)
-               downgrade_write(&mm->mmap_sem);
+               mmap_write_downgrade(mm);
  
         unmap_region(mm, vma, prev, start, end);
  
@@ -2850,20 +2883,20 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
         struct mm_struct *mm = current->mm;
         LIST_HEAD(uf);
  
-       if (down_write_killable(&mm->mmap_sem))
+       if (mmap_write_lock_killable(mm))
                 return -EINTR;
  
         ret = __do_munmap(mm, start, len, &uf, downgrade);
         /*
-        * Returning 1 indicates mmap_sem is downgraded.
+        * Returning 1 indicates mmap_lock is downgraded.
          * But 1 is not legal return value of vm_munmap() and munmap(), reset
          * it to 0 before return.
          */
         if (ret == 1) {
-               up_read(&mm->mmap_sem);
+               mmap_read_unlock(mm);
                 ret = 0;
         } else
-               up_write(&mm->mmap_sem);
+               mmap_write_unlock(mm);
  
         userfaultfd_unmap_complete(mm, &uf);
         return ret;
@@ -2911,7 +2944,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
         if (pgoff + (size >> PAGE_SHIFT) < pgoff)
                 return ret;
  
-       if (down_write_killable(&mm->mmap_sem))
+       if (mmap_write_lock_killable(mm))
                 return -EINTR;
  
         vma = find_vma(mm, start);
@@ -2970,11 +3003,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
         }
  
         file = get_file(vma->vm_file);
-       ret = do_mmap_pgoff(vma->vm_file, start, size,
+       ret = do_mmap(vma->vm_file, start, size,
                         prot, flags, pgoff, &populate, NULL);
         fput(file);
  out:
-       up_write(&mm->mmap_sem);
+       mmap_write_unlock(mm);
         if (populate)
                 mm_populate(ret, populate);
         if (!IS_ERR_VALUE(ret))
@@ -3074,12 +3107,12 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
         if (!len)
                 return 0;
  
-       if (down_write_killable(&mm->mmap_sem))
+       if (mmap_write_lock_killable(mm))
                 return -EINTR;
  
         ret = do_brk_flags(addr, len, flags, &uf);
         populate = ((mm->def_flags & VM_LOCKED) != 0);
-       up_write(&mm->mmap_sem);
+       mmap_write_unlock(mm);
         userfaultfd_unmap_complete(mm, &uf);
         if (populate && !ret)
                 mm_populate(addr, len);
@@ -3107,12 +3140,12 @@ void exit_mmap(struct mm_struct *mm)
                 /*
                  * Manually reap the mm to free as much memory as possible.
                  * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
-                * this mm from further consideration.  Taking mm->mmap_sem for
+                * this mm from further consideration.  Taking mm->mmap_lock for
                  * write after setting MMF_OOM_SKIP will guarantee that the oom
-                * reaper will not run on this mm again after mmap_sem is
+                * reaper will not run on this mm again after mmap_lock is
                  * dropped.
                  *
-                * Nothing can be holding mm->mmap_sem here and the above call
+                * Nothing can be holding mm->mmap_lock here and the above call
                  * to mmu_notifier_release(mm) ensures mmu notifier callbacks in
                  * __oom_reap_task_mm() will not block.
                  *
@@ -3123,8 +3156,8 @@ void exit_mmap(struct mm_struct *mm)
                 (void)__oom_reap_task_mm(mm);
  
                 set_bit(MMF_OOM_SKIP, &mm->flags);
-               down_write(&mm->mmap_sem);
-               up_write(&mm->mmap_sem);
+               mmap_write_lock(mm);
+               mmap_write_unlock(mm);
         }
  
         if (mm->locked_vm) {
@@ -3159,6 +3192,7 @@ void exit_mmap(struct mm_struct *mm)
                 if (vma->vm_flags & VM_ACCOUNT)
                         nr_accounted += vma_pages(vma);
                 vma = remove_vma(vma);
+               cond_resched();
         }
         vm_unacct_memory(nr_accounted);
  }
@@ -3189,7 +3223,7 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
          * By setting it to reflect the virtual start address of the
          * vma, merges and splits can happen in a seamless way, just
          * using the existing file pgoff checks and manipulations.
-        * Similarly in do_mmap_pgoff and in do_brk.
+        * Similarly in do_mmap and in do_brk.
          */
         if (vma_is_anonymous(vma)) {
                 BUG_ON(vma->anon_vma);
@@ -3437,7 +3471,7 @@ bool vma_is_special_mapping(const struct vm_area_struct *vma,
  }
  
  /*
- * Called with mm->mmap_sem held for writing.
+ * Called with mm->mmap_lock held for writing.
   * Insert a new vma covering the given region, with the given flags.
   * Its pages are supplied by the given array of struct page *.
   * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
@@ -3474,7 +3508,7 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
                  * The LSB of head.next can't change from under us
                  * because we hold the mm_all_locks_mutex.
                  */
-               down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
+               down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
                 /*
                  * We can safely modify head.next after taking the
                  * anon_vma->root->rwsem. If some other vma in this mm shares
@@ -3504,7 +3538,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
                  */
                 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
                         BUG();
-               down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
+               down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
         }
  }
  
@@ -3513,11 +3547,11 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
   * operations that could ever happen on a certain mm. This includes
   * vmtruncate, try_to_unmap, and all page faults.
   *
- * The caller must take the mmap_sem in write mode before calling
+ * The caller must take the mmap_lock in write mode before calling
   * mm_take_all_locks(). The caller isn't allowed to release the
- * mmap_sem until mm_drop_all_locks() returns.
+ * mmap_lock until mm_drop_all_locks() returns.
   *
- * mmap_sem in write mode is required in order to block all operations
+ * mmap_lock in write mode is required in order to block all operations
   * that could modify pagetables and free pages without need of
   * altering the vma layout. It's also needed in write mode to avoid new
   * anon_vmas to be associated with existing vmas.
@@ -3550,7 +3584,7 @@ int mm_take_all_locks(struct mm_struct *mm)
         struct vm_area_struct *vma;
         struct anon_vma_chain *avc;
  
-       BUG_ON(down_read_trylock(&mm->mmap_sem));
+       BUG_ON(mmap_read_trylock(mm));
  
         mutex_lock(&mm_all_locks_mutex);
  
@@ -3622,7 +3656,7 @@ static void vm_unlock_mapping(struct address_space *mapping)
  }
  
  /*
- * The mmap_sem cannot be released by the caller until
+ * The mmap_lock cannot be released by the caller until
   * mm_drop_all_locks() returns.
   */
  void mm_drop_all_locks(struct mm_struct *mm)
@@ -3630,7 +3664,7 @@ void mm_drop_all_locks(struct mm_struct *mm)
         struct vm_area_struct *vma;
         struct anon_vma_chain *avc;
  
-       BUG_ON(down_read_trylock(&mm->mmap_sem));
+       BUG_ON(mmap_read_trylock(mm));
         BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
  
         for (vma = mm->mmap; vma; vma = vma->vm_next) {