cifs: fix uninitialised var in smb2_compound_op()
[linux-2.6-microblaze.git] / mm / mmap.c
index 9d780f4..6e44754 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -14,7 +14,6 @@
 #include <linux/backing-dev.h>
 #include <linux/mm.h>
 #include <linux/mm_inline.h>
-#include <linux/vmacache.h>
 #include <linux/shm.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
@@ -39,7 +38,6 @@
 #include <linux/audit.h>
 #include <linux/khugepaged.h>
 #include <linux/uprobes.h>
-#include <linux/rbtree_augmented.h>
 #include <linux/notifier.h>
 #include <linux/memory.h>
 #include <linux/printk.h>
@@ -77,9 +75,10 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
 static bool ignore_rlimit_data;
 core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
 
-static void unmap_region(struct mm_struct *mm,
+static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
-               unsigned long start, unsigned long end);
+               struct vm_area_struct *next, unsigned long start,
+               unsigned long end);
 
 static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
 {
@@ -132,12 +131,10 @@ void unlink_file_vma(struct vm_area_struct *vma)
 }
 
 /*
- * Close a vm structure and free it, returning the next.
+ * Close a vm structure and free it.
  */
-static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+static void remove_vma(struct vm_area_struct *vma)
 {
-       struct vm_area_struct *next = vma->vm_next;
-
        might_sleep();
        if (vma->vm_ops && vma->vm_ops->close)
                vma->vm_ops->close(vma);
@@ -145,20 +142,41 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
                fput(vma->vm_file);
        mpol_put(vma_policy(vma));
        vm_area_free(vma);
-       return next;
 }
 
-static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
-               struct list_head *uf);
+/*
+ * check_brk_limits() - Use platform specific check of range & verify mlock
+ * limits.
+ * @addr: The address to check
+ * @len: The size of increase.
+ *
+ * Return: 0 on success.
+ */
+static int check_brk_limits(unsigned long addr, unsigned long len)
+{
+       unsigned long mapped_addr;
+
+       mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
+       if (IS_ERR_VALUE(mapped_addr))
+               return mapped_addr;
+
+       return mlock_future_check(current->mm, current->mm->def_flags, len);
+}
+static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
+                        unsigned long newbrk, unsigned long oldbrk,
+                        struct list_head *uf);
+static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *brkvma,
+               unsigned long addr, unsigned long request, unsigned long flags);
 SYSCALL_DEFINE1(brk, unsigned long, brk)
 {
        unsigned long newbrk, oldbrk, origbrk;
        struct mm_struct *mm = current->mm;
-       struct vm_area_struct *next;
+       struct vm_area_struct *brkvma, *next = NULL;
        unsigned long min_brk;
        bool populate;
        bool downgraded = false;
        LIST_HEAD(uf);
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
        if (mmap_write_lock_killable(mm))
                return -EINTR;
@@ -200,35 +218,51 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 
        /*
         * Always allow shrinking brk.
-        * __do_munmap() may downgrade mmap_lock to read.
+        * do_brk_munmap() may downgrade mmap_lock to read.
         */
        if (brk <= mm->brk) {
                int ret;
 
+               /* Search one past newbrk */
+               mas_set(&mas, newbrk);
+               brkvma = mas_find(&mas, oldbrk);
+               BUG_ON(brkvma == NULL);
+               if (brkvma->vm_start >= oldbrk)
+                       goto out; /* mapping intersects with an existing non-brk vma. */
                /*
-                * mm->brk must to be protected by write mmap_lock so update it
-                * before downgrading mmap_lock. When __do_munmap() fails,
-                * mm->brk will be restored from origbrk.
+                * mm->brk must be protected by write mmap_lock.
+                * do_brk_munmap() may downgrade the lock,  so update it
+                * before calling do_brk_munmap().
                 */
                mm->brk = brk;
-               ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true);
-               if (ret < 0) {
-                       mm->brk = origbrk;
-                       goto out;
-               } else if (ret == 1) {
+               ret = do_brk_munmap(&mas, brkvma, newbrk, oldbrk, &uf);
+               if (ret == 1)  {
                        downgraded = true;
-               }
-               goto success;
+                       goto success;
+               } else if (!ret)
+                       goto success;
+
+               mm->brk = origbrk;
+               goto out;
        }
 
-       /* Check against existing mmap mappings. */
-       next = find_vma(mm, oldbrk);
+       if (check_brk_limits(oldbrk, newbrk - oldbrk))
+               goto out;
+
+       /*
+        * Only check if the next VMA is within the stack_guard_gap of the
+        * expansion area
+        */
+       mas_set(&mas, oldbrk);
+       next = mas_find(&mas, newbrk - 1 + PAGE_SIZE + stack_guard_gap);
        if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
                goto out;
 
+       brkvma = mas_prev(&mas, mm->start_brk);
        /* Ok, looks good - let it rip. */
-       if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
+       if (do_brk_flags(&mas, brkvma, oldbrk, newbrk - oldbrk, 0) < 0)
                goto out;
+
        mm->brk = brk;
 
 success:
@@ -247,104 +281,45 @@ out:
        return origbrk;
 }
 
-static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
-{
-       unsigned long gap, prev_end;
-
-       /*
-        * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
-        * allow two stack_guard_gaps between them here, and when choosing
-        * an unmapped area; whereas when expanding we only require one.
-        * That's a little inconsistent, but keeps the code here simpler.
-        */
-       gap = vm_start_gap(vma);
-       if (vma->vm_prev) {
-               prev_end = vm_end_gap(vma->vm_prev);
-               if (gap > prev_end)
-                       gap -= prev_end;
-               else
-                       gap = 0;
-       }
-       return gap;
-}
-
-#ifdef CONFIG_DEBUG_VM_RB
-static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
-{
-       unsigned long max = vma_compute_gap(vma), subtree_gap;
-       if (vma->vm_rb.rb_left) {
-               subtree_gap = rb_entry(vma->vm_rb.rb_left,
-                               struct vm_area_struct, vm_rb)->rb_subtree_gap;
-               if (subtree_gap > max)
-                       max = subtree_gap;
-       }
-       if (vma->vm_rb.rb_right) {
-               subtree_gap = rb_entry(vma->vm_rb.rb_right,
-                               struct vm_area_struct, vm_rb)->rb_subtree_gap;
-               if (subtree_gap > max)
-                       max = subtree_gap;
-       }
-       return max;
-}
-
-static int browse_rb(struct mm_struct *mm)
-{
-       struct rb_root *root = &mm->mm_rb;
-       int i = 0, j, bug = 0;
-       struct rb_node *nd, *pn = NULL;
-       unsigned long prev = 0, pend = 0;
-
-       for (nd = rb_first(root); nd; nd = rb_next(nd)) {
-               struct vm_area_struct *vma;
-               vma = rb_entry(nd, struct vm_area_struct, vm_rb);
-               if (vma->vm_start < prev) {
-                       pr_emerg("vm_start %lx < prev %lx\n",
-                                 vma->vm_start, prev);
-                       bug = 1;
-               }
-               if (vma->vm_start < pend) {
-                       pr_emerg("vm_start %lx < pend %lx\n",
-                                 vma->vm_start, pend);
-                       bug = 1;
-               }
-               if (vma->vm_start > vma->vm_end) {
-                       pr_emerg("vm_start %lx > vm_end %lx\n",
-                                 vma->vm_start, vma->vm_end);
-                       bug = 1;
-               }
-               spin_lock(&mm->page_table_lock);
-               if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
-                       pr_emerg("free gap %lx, correct %lx\n",
-                              vma->rb_subtree_gap,
-                              vma_compute_subtree_gap(vma));
-                       bug = 1;
+#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
+extern void mt_validate(struct maple_tree *mt);
+extern void mt_dump(const struct maple_tree *mt);
+
+/* Validate the maple tree */
+static void validate_mm_mt(struct mm_struct *mm)
+{
+       struct maple_tree *mt = &mm->mm_mt;
+       struct vm_area_struct *vma_mt;
+
+       MA_STATE(mas, mt, 0, 0);
+
+       mt_validate(&mm->mm_mt);
+       mas_for_each(&mas, vma_mt, ULONG_MAX) {
+               if ((vma_mt->vm_start != mas.index) ||
+                   (vma_mt->vm_end - 1 != mas.last)) {
+                       pr_emerg("issue in %s\n", current->comm);
+                       dump_stack();
+                       dump_vma(vma_mt);
+                       pr_emerg("mt piv: %p %lu - %lu\n", vma_mt,
+                                mas.index, mas.last);
+                       pr_emerg("mt vma: %p %lu - %lu\n", vma_mt,
+                                vma_mt->vm_start, vma_mt->vm_end);
+
+                       mt_dump(mas.tree);
+                       if (vma_mt->vm_end != mas.last + 1) {
+                               pr_err("vma: %p vma_mt %lu-%lu\tmt %lu-%lu\n",
+                                               mm, vma_mt->vm_start, vma_mt->vm_end,
+                                               mas.index, mas.last);
+                               mt_dump(mas.tree);
+                       }
+                       VM_BUG_ON_MM(vma_mt->vm_end != mas.last + 1, mm);
+                       if (vma_mt->vm_start != mas.index) {
+                               pr_err("vma: %p vma_mt %p %lu - %lu doesn't match\n",
+                                               mm, vma_mt, vma_mt->vm_start, vma_mt->vm_end);
+                               mt_dump(mas.tree);
+                       }
+                       VM_BUG_ON_MM(vma_mt->vm_start != mas.index, mm);
                }
-               spin_unlock(&mm->page_table_lock);
-               i++;
-               pn = nd;
-               prev = vma->vm_start;
-               pend = vma->vm_end;
-       }
-       j = 0;
-       for (nd = pn; nd; nd = rb_prev(nd))
-               j++;
-       if (i != j) {
-               pr_emerg("backwards %d, forwards %d\n", j, i);
-               bug = 1;
-       }
-       return bug ? -1 : i;
-}
-
-static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
-{
-       struct rb_node *nd;
-
-       for (nd = rb_first(root); nd; nd = rb_next(nd)) {
-               struct vm_area_struct *vma;
-               vma = rb_entry(nd, struct vm_area_struct, vm_rb);
-               VM_BUG_ON_VMA(vma != ignore &&
-                       vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
-                       vma);
        }
 }
 
@@ -352,10 +327,13 @@ static void validate_mm(struct mm_struct *mm)
 {
        int bug = 0;
        int i = 0;
-       unsigned long highest_address = 0;
-       struct vm_area_struct *vma = mm->mmap;
+       struct vm_area_struct *vma;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
-       while (vma) {
+       validate_mm_mt(mm);
+
+       mas_for_each(&mas, vma, ULONG_MAX) {
+#ifdef CONFIG_DEBUG_VM_RB
                struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;
 
@@ -365,93 +343,20 @@ static void validate_mm(struct mm_struct *mm)
                                anon_vma_interval_tree_verify(avc);
                        anon_vma_unlock_read(anon_vma);
                }
-
-               highest_address = vm_end_gap(vma);
-               vma = vma->vm_next;
+#endif
                i++;
        }
        if (i != mm->map_count) {
-               pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
-               bug = 1;
-       }
-       if (highest_address != mm->highest_vm_end) {
-               pr_emerg("mm->highest_vm_end %lx, found %lx\n",
-                         mm->highest_vm_end, highest_address);
-               bug = 1;
-       }
-       i = browse_rb(mm);
-       if (i != mm->map_count) {
-               if (i != -1)
-                       pr_emerg("map_count %d rb %d\n", mm->map_count, i);
+               pr_emerg("map_count %d mas_for_each %d\n", mm->map_count, i);
                bug = 1;
        }
        VM_BUG_ON_MM(bug, mm);
 }
-#else
-#define validate_mm_rb(root, ignore) do { } while (0)
-#define validate_mm(mm) do { } while (0)
-#endif
-
-RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
-                        struct vm_area_struct, vm_rb,
-                        unsigned long, rb_subtree_gap, vma_compute_gap)
-
-/*
- * Update augmented rbtree rb_subtree_gap values after vma->vm_start or
- * vma->vm_prev->vm_end values changed, without modifying the vma's position
- * in the rbtree.
- */
-static void vma_gap_update(struct vm_area_struct *vma)
-{
-       /*
-        * As it turns out, RB_DECLARE_CALLBACKS_MAX() already created
-        * a callback function that does exactly what we want.
-        */
-       vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
-}
-
-static inline void vma_rb_insert(struct vm_area_struct *vma,
-                                struct rb_root *root)
-{
-       /* All rb_subtree_gap values must be consistent prior to insertion */
-       validate_mm_rb(root, NULL);
-
-       rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
-}
-
-static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
-{
-       /*
-        * Note rb_erase_augmented is a fairly large inline function,
-        * so make sure we instantiate it only once with our desired
-        * augmented rbtree callbacks.
-        */
-       rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
-}
-
-static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
-                                               struct rb_root *root,
-                                               struct vm_area_struct *ignore)
-{
-       /*
-        * All rb_subtree_gap values must be consistent prior to erase,
-        * with the possible exception of
-        *
-        * a. the "next" vma being erased if next->vm_start was reduced in
-        *    __vma_adjust() -> __vma_unlink()
-        * b. the vma being erased in detach_vmas_to_be_unmapped() ->
-        *    vma_rb_erase()
-        */
-       validate_mm_rb(root, ignore);
-
-       __vma_rb_erase(vma, root);
-}
 
-static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
-                                        struct rb_root *root)
-{
-       vma_rb_erase_ignore(vma, root, vma);
-}
+#else /* !CONFIG_DEBUG_VM_MAPLE_TREE */
+#define validate_mm_mt(root) do { } while (0)
+#define validate_mm(mm) do { } while (0)
+#endif /* CONFIG_DEBUG_VM_MAPLE_TREE */
 
 /*
  * vma has some anon_vma assigned, and is already inserted on that
@@ -485,208 +390,220 @@ anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
                anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
 }
 
-static int find_vma_links(struct mm_struct *mm, unsigned long addr,
-               unsigned long end, struct vm_area_struct **pprev,
-               struct rb_node ***rb_link, struct rb_node **rb_parent)
+static unsigned long count_vma_pages_range(struct mm_struct *mm,
+               unsigned long addr, unsigned long end)
 {
-       struct rb_node **__rb_link, *__rb_parent, *rb_prev;
+       VMA_ITERATOR(vmi, mm, addr);
+       struct vm_area_struct *vma;
+       unsigned long nr_pages = 0;
 
-       mmap_assert_locked(mm);
-       __rb_link = &mm->mm_rb.rb_node;
-       rb_prev = __rb_parent = NULL;
+       for_each_vma_range(vmi, vma, end) {
+               unsigned long vm_start = max(addr, vma->vm_start);
+               unsigned long vm_end = min(end, vma->vm_end);
 
-       while (*__rb_link) {
-               struct vm_area_struct *vma_tmp;
+               nr_pages += PHYS_PFN(vm_end - vm_start);
+       }
 
-               __rb_parent = *__rb_link;
-               vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
+       return nr_pages;
+}
 
-               if (vma_tmp->vm_end > addr) {
-                       /* Fail if an existing vma overlaps the area */
-                       if (vma_tmp->vm_start < end)
-                               return -ENOMEM;
-                       __rb_link = &__rb_parent->rb_left;
-               } else {
-                       rb_prev = __rb_parent;
-                       __rb_link = &__rb_parent->rb_right;
-               }
-       }
+static void __vma_link_file(struct vm_area_struct *vma,
+                           struct address_space *mapping)
+{
+       if (vma->vm_flags & VM_SHARED)
+               mapping_allow_writable(mapping);
 
-       *pprev = NULL;
-       if (rb_prev)
-               *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
-       *rb_link = __rb_link;
-       *rb_parent = __rb_parent;
-       return 0;
+       flush_dcache_mmap_lock(mapping);
+       vma_interval_tree_insert(vma, &mapping->i_mmap);
+       flush_dcache_mmap_unlock(mapping);
 }
 
 /*
- * vma_next() - Get the next VMA.
- * @mm: The mm_struct.
- * @vma: The current vma.
+ * vma_mas_store() - Store a VMA in the maple tree.
+ * @vma: The vm_area_struct
+ * @mas: The maple state
  *
- * If @vma is NULL, return the first vma in the mm.
+ * Efficient way to store a VMA in the maple tree when the @mas has already
+ * walked to the correct location.
  *
- * Returns: The next VMA after @vma.
+ * Note: the end address is inclusive in the maple tree.
  */
-static inline struct vm_area_struct *vma_next(struct mm_struct *mm,
-                                        struct vm_area_struct *vma)
+void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas)
 {
-       if (!vma)
-               return mm->mmap;
-
-       return vma->vm_next;
+       trace_vma_store(mas->tree, vma);
+       mas_set_range(mas, vma->vm_start, vma->vm_end - 1);
+       mas_store_prealloc(mas, vma);
 }
 
 /*
- * munmap_vma_range() - munmap VMAs that overlap a range.
- * @mm: The mm struct
- * @start: The start of the range.
- * @len: The length of the range.
- * @pprev: pointer to the pointer that will be set to previous vm_area_struct
- * @rb_link: the rb_node
- * @rb_parent: the parent rb_node
- *
- * Find all the vm_area_struct that overlap from @start to
- * @end and munmap them.  Set @pprev to the previous vm_area_struct.
+ * vma_mas_remove() - Remove a VMA from the maple tree.
+ * @vma: The vm_area_struct
+ * @mas: The maple state
  *
- * Returns: -ENOMEM on munmap failure or 0 on success.
+ * Efficient way to remove a VMA from the maple tree when the @mas has already
+ * been established and points to the correct location.
+ * Note: the end address is inclusive in the maple tree.
  */
-static inline int
-munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
-                struct vm_area_struct **pprev, struct rb_node ***link,
-                struct rb_node **parent, struct list_head *uf)
+void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas)
 {
-
-       while (find_vma_links(mm, start, start + len, pprev, link, parent))
-               if (do_munmap(mm, start, len, uf))
-                       return -ENOMEM;
-
-       return 0;
+       trace_vma_mas_szero(mas->tree, vma->vm_start, vma->vm_end - 1);
+       mas->index = vma->vm_start;
+       mas->last = vma->vm_end - 1;
+       mas_store_prealloc(mas, NULL);
 }
-static unsigned long count_vma_pages_range(struct mm_struct *mm,
-               unsigned long addr, unsigned long end)
+
+/*
+ * vma_mas_szero() - Set a given range to zero.  Used when modifying a
+ * vm_area_struct start or end.
+ *
+ * @mm: The struct_mm
+ * @start: The start address to zero
+ * @end: The end address to zero.
+ */
+static inline void vma_mas_szero(struct ma_state *mas, unsigned long start,
+                               unsigned long end)
 {
-       unsigned long nr_pages = 0;
-       struct vm_area_struct *vma;
+       trace_vma_mas_szero(mas->tree, start, end - 1);
+       mas_set_range(mas, start, end - 1);
+       mas_store_prealloc(mas, NULL);
+}
 
-       /* Find first overlapping mapping */
-       vma = find_vma_intersection(mm, addr, end);
-       if (!vma)
-               return 0;
+static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
+       struct address_space *mapping = NULL;
 
-       nr_pages = (min(end, vma->vm_end) -
-               max(addr, vma->vm_start)) >> PAGE_SHIFT;
+       if (mas_preallocate(&mas, vma, GFP_KERNEL))
+               return -ENOMEM;
 
-       /* Iterate over the rest of the overlaps */
-       for (vma = vma->vm_next; vma; vma = vma->vm_next) {
-               unsigned long overlap_len;
+       if (vma->vm_file) {
+               mapping = vma->vm_file->f_mapping;
+               i_mmap_lock_write(mapping);
+       }
 
-               if (vma->vm_start > end)
-                       break;
+       vma_mas_store(vma, &mas);
 
-               overlap_len = min(end, vma->vm_end) - vma->vm_start;
-               nr_pages += overlap_len >> PAGE_SHIFT;
+       if (mapping) {
+               __vma_link_file(vma, mapping);
+               i_mmap_unlock_write(mapping);
        }
 
-       return nr_pages;
+       mm->map_count++;
+       validate_mm(mm);
+       return 0;
 }
 
-void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
-               struct rb_node **rb_link, struct rb_node *rb_parent)
+/*
+ * vma_expand - Expand an existing VMA
+ *
+ * @mas: The maple state
+ * @vma: The vma to expand
+ * @start: The start of the vma
+ * @end: The exclusive end of the vma
+ * @pgoff: The page offset of vma
+ * @next: The current of next vma.
+ *
+ * Expand @vma to @start and @end.  Can expand off the start and end.  Will
+ * expand over @next if it's different from @vma and @end == @next->vm_end.
+ * Checking if the @vma can expand and merge with @next needs to be handled by
+ * the caller.
+ *
+ * Returns: 0 on success
+ */
+inline int vma_expand(struct ma_state *mas, struct vm_area_struct *vma,
+                     unsigned long start, unsigned long end, pgoff_t pgoff,
+                     struct vm_area_struct *next)
 {
-       /* Update tracking information for the gap following the new vma. */
-       if (vma->vm_next)
-               vma_gap_update(vma->vm_next);
-       else
-               mm->highest_vm_end = vm_end_gap(vma);
+       struct mm_struct *mm = vma->vm_mm;
+       struct address_space *mapping = NULL;
+       struct rb_root_cached *root = NULL;
+       struct anon_vma *anon_vma = vma->anon_vma;
+       struct file *file = vma->vm_file;
+       bool remove_next = false;
 
-       /*
-        * vma->vm_prev wasn't known when we followed the rbtree to find the
-        * correct insertion point for that vma. As a result, we could not
-        * update the vma vm_rb parents rb_subtree_gap values on the way down.
-        * So, we first insert the vma with a zero rb_subtree_gap value
-        * (to be consistent with what we did on the way down), and then
-        * immediately update the gap to the correct value. Finally we
-        * rebalance the rbtree after all augmented values have been set.
-        */
-       rb_link_node(&vma->vm_rb, rb_parent, rb_link);
-       vma->rb_subtree_gap = 0;
-       vma_gap_update(vma);
-       vma_rb_insert(vma, &mm->mm_rb);
-}
+       if (next && (vma != next) && (end == next->vm_end)) {
+               remove_next = true;
+               if (next->anon_vma && !vma->anon_vma) {
+                       int error;
 
-static void __vma_link_file(struct vm_area_struct *vma)
-{
-       struct file *file;
+                       anon_vma = next->anon_vma;
+                       vma->anon_vma = anon_vma;
+                       error = anon_vma_clone(vma, next);
+                       if (error)
+                               return error;
+               }
+       }
+
+       /* Not merging but overwriting any part of next is not handled. */
+       VM_BUG_ON(next && !remove_next && next != vma && end > next->vm_start);
+       /* Only handles expanding */
+       VM_BUG_ON(vma->vm_start < start || vma->vm_end > end);
+
+       if (mas_preallocate(mas, vma, GFP_KERNEL))
+               goto nomem;
+
+       vma_adjust_trans_huge(vma, start, end, 0);
 
-       file = vma->vm_file;
        if (file) {
-               struct address_space *mapping = file->f_mapping;
+               mapping = file->f_mapping;
+               root = &mapping->i_mmap;
+               uprobe_munmap(vma, vma->vm_start, vma->vm_end);
+               i_mmap_lock_write(mapping);
+       }
 
-               if (vma->vm_flags & VM_SHARED)
-                       mapping_allow_writable(mapping);
+       if (anon_vma) {
+               anon_vma_lock_write(anon_vma);
+               anon_vma_interval_tree_pre_update_vma(vma);
+       }
 
+       if (file) {
                flush_dcache_mmap_lock(mapping);
-               vma_interval_tree_insert(vma, &mapping->i_mmap);
-               flush_dcache_mmap_unlock(mapping);
+               vma_interval_tree_remove(vma, root);
        }
-}
 
-static void
-__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
-       struct vm_area_struct *prev, struct rb_node **rb_link,
-       struct rb_node *rb_parent)
-{
-       __vma_link_list(mm, vma, prev);
-       __vma_link_rb(mm, vma, rb_link, rb_parent);
-}
+       vma->vm_start = start;
+       vma->vm_end = end;
+       vma->vm_pgoff = pgoff;
+       /* Note: mas must be pointing to the expanding VMA */
+       vma_mas_store(vma, mas);
 
-static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
-                       struct vm_area_struct *prev, struct rb_node **rb_link,
-                       struct rb_node *rb_parent)
-{
-       struct address_space *mapping = NULL;
+       if (file) {
+               vma_interval_tree_insert(vma, root);
+               flush_dcache_mmap_unlock(mapping);
+       }
 
-       if (vma->vm_file) {
-               mapping = vma->vm_file->f_mapping;
-               i_mmap_lock_write(mapping);
+       /* Expanding over the next vma */
+       if (remove_next && file) {
+               __remove_shared_vm_struct(next, file, mapping);
        }
 
-       __vma_link(mm, vma, prev, rb_link, rb_parent);
-       __vma_link_file(vma);
+       if (anon_vma) {
+               anon_vma_interval_tree_post_update_vma(vma);
+               anon_vma_unlock_write(anon_vma);
+       }
 
-       if (mapping)
+       if (file) {
                i_mmap_unlock_write(mapping);
+               uprobe_mmap(vma);
+       }
 
-       mm->map_count++;
-       validate_mm(mm);
-}
-
-/*
- * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
- * mm's list and rbtree.  It has already been inserted into the interval tree.
- */
-static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
-{
-       struct vm_area_struct *prev;
-       struct rb_node **rb_link, *rb_parent;
+       if (remove_next) {
+               if (file) {
+                       uprobe_munmap(next, next->vm_start, next->vm_end);
+                       fput(file);
+               }
+               if (next->anon_vma)
+                       anon_vma_merge(vma, next);
+               mm->map_count--;
+               mpol_put(vma_policy(next));
+               vm_area_free(next);
+       }
 
-       if (find_vma_links(mm, vma->vm_start, vma->vm_end,
-                          &prev, &rb_link, &rb_parent))
-               BUG();
-       __vma_link(mm, vma, prev, rb_link, rb_parent);
-       mm->map_count++;
-}
+       validate_mm(mm);
+       return 0;
 
-static __always_inline void __vma_unlink(struct mm_struct *mm,
-                                               struct vm_area_struct *vma,
-                                               struct vm_area_struct *ignore)
-{
-       vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
-       __vma_unlink_list(mm, vma);
-       /* Kill the cache */
-       vmacache_invalidate(mm);
+nomem:
+       return -ENOMEM;
 }
 
 /*
@@ -701,18 +618,19 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
        struct vm_area_struct *expand)
 {
        struct mm_struct *mm = vma->vm_mm;
-       struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
+       struct vm_area_struct *next_next, *next = find_vma(mm, vma->vm_end);
+       struct vm_area_struct *orig_vma = vma;
        struct address_space *mapping = NULL;
        struct rb_root_cached *root = NULL;
        struct anon_vma *anon_vma = NULL;
        struct file *file = vma->vm_file;
-       bool start_changed = false, end_changed = false;
+       bool vma_changed = false;
        long adjust_next = 0;
        int remove_next = 0;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
+       struct vm_area_struct *exporter = NULL, *importer = NULL;
 
        if (next && !insert) {
-               struct vm_area_struct *exporter = NULL, *importer = NULL;
-
                if (end >= next->vm_end) {
                        /*
                         * vma expands, overlapping all the next, and
@@ -741,10 +659,11 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
                                 * remove_next == 1 is case 1 or 7.
                                 */
                                remove_next = 1 + (end > next->vm_end);
+                               if (remove_next == 2)
+                                       next_next = find_vma(mm, next->vm_end);
+
                                VM_WARN_ON(remove_next == 2 &&
-                                          end != next->vm_next->vm_end);
-                               /* trim end to next, for case 6 first pass */
-                               end = next->vm_end;
+                                          end != next_next->vm_end);
                        }
 
                        exporter = next;
@@ -755,7 +674,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
                         * next, if the vma overlaps with it.
                         */
                        if (remove_next == 2 && !next->anon_vma)
-                               exporter = next->vm_next;
+                               exporter = next_next;
 
                } else if (end > next->vm_start) {
                        /*
@@ -792,9 +711,11 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
                                return error;
                }
        }
-again:
-       vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
 
+       if (mas_preallocate(&mas, vma, GFP_KERNEL))
+               return -ENOMEM;
+
+       vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
        if (file) {
                mapping = file->f_mapping;
                root = &mapping->i_mmap;
@@ -804,14 +725,14 @@ again:
                        uprobe_munmap(next, next->vm_start, next->vm_end);
 
                i_mmap_lock_write(mapping);
-               if (insert) {
+               if (insert && insert->vm_file) {
                        /*
                         * Put into interval tree now, so instantiated pages
                         * are visible to arm/parisc __flush_dcache_page
                         * throughout; but we cannot insert into address
                         * space until vma start or end is updated.
                         */
-                       __vma_link_file(insert);
+                       __vma_link_file(insert, insert->vm_file->f_mapping);
                }
        }
 
@@ -835,17 +756,37 @@ again:
        }
 
        if (start != vma->vm_start) {
+               if ((vma->vm_start < start) &&
+                   (!insert || (insert->vm_end != start))) {
+                       vma_mas_szero(&mas, vma->vm_start, start);
+                       VM_WARN_ON(insert && insert->vm_start > vma->vm_start);
+               } else {
+                       vma_changed = true;
+               }
                vma->vm_start = start;
-               start_changed = true;
        }
        if (end != vma->vm_end) {
+               if (vma->vm_end > end) {
+                       if (!insert || (insert->vm_start != end)) {
+                               vma_mas_szero(&mas, end, vma->vm_end);
+                               mas_reset(&mas);
+                               VM_WARN_ON(insert &&
+                                          insert->vm_end < vma->vm_end);
+                       }
+               } else {
+                       vma_changed = true;
+               }
                vma->vm_end = end;
-               end_changed = true;
        }
+
+       if (vma_changed)
+               vma_mas_store(vma, &mas);
+
        vma->vm_pgoff = pgoff;
        if (adjust_next) {
                next->vm_start += adjust_next;
                next->vm_pgoff += adjust_next >> PAGE_SHIFT;
+               vma_mas_store(next, &mas);
        }
 
        if (file) {
@@ -855,42 +796,19 @@ again:
                flush_dcache_mmap_unlock(mapping);
        }
 
-       if (remove_next) {
-               /*
-                * vma_merge has merged next into vma, and needs
-                * us to remove next before dropping the locks.
-                */
-               if (remove_next != 3)
-                       __vma_unlink(mm, next, next);
-               else
-                       /*
-                        * vma is not before next if they've been
-                        * swapped.
-                        *
-                        * pre-swap() next->vm_start was reduced so
-                        * tell validate_mm_rb to ignore pre-swap()
-                        * "next" (which is stored in post-swap()
-                        * "vma").
-                        */
-                       __vma_unlink(mm, next, vma);
-               if (file)
-                       __remove_shared_vm_struct(next, file, mapping);
+       if (remove_next && file) {
+               __remove_shared_vm_struct(next, file, mapping);
+               if (remove_next == 2)
+                       __remove_shared_vm_struct(next_next, file, mapping);
        } else if (insert) {
                /*
                 * split_vma has split insert from vma, and needs
                 * us to insert it before dropping the locks
                 * (it may either follow vma or precede it).
                 */
-               __insert_vm_struct(mm, insert);
-       } else {
-               if (start_changed)
-                       vma_gap_update(vma);
-               if (end_changed) {
-                       if (!next)
-                               mm->highest_vm_end = vm_end_gap(vma);
-                       else if (!adjust_next)
-                               vma_gap_update(next);
-               }
+               mas_reset(&mas);
+               vma_mas_store(insert, &mas);
+               mm->map_count++;
        }
 
        if (anon_vma) {
@@ -909,6 +827,7 @@ again:
        }
 
        if (remove_next) {
+again:
                if (file) {
                        uprobe_munmap(next, next->vm_start, next->vm_end);
                        fput(file);
@@ -917,66 +836,24 @@ again:
                        anon_vma_merge(vma, next);
                mm->map_count--;
                mpol_put(vma_policy(next));
+               if (remove_next != 2)
+                       BUG_ON(vma->vm_end < next->vm_end);
                vm_area_free(next);
+
                /*
                 * In mprotect's case 6 (see comments on vma_merge),
-                * we must remove another next too. It would clutter
-                * up the code too much to do both in one go.
+                * we must remove next_next too.
                 */
-               if (remove_next != 3) {
-                       /*
-                        * If "next" was removed and vma->vm_end was
-                        * expanded (up) over it, in turn
-                        * "next->vm_prev->vm_end" changed and the
-                        * "vma->vm_next" gap must be updated.
-                        */
-                       next = vma->vm_next;
-               } else {
-                       /*
-                        * For the scope of the comment "next" and
-                        * "vma" considered pre-swap(): if "vma" was
-                        * removed, next->vm_start was expanded (down)
-                        * over it and the "next" gap must be updated.
-                        * Because of the swap() the post-swap() "vma"
-                        * actually points to pre-swap() "next"
-                        * (post-swap() "next" as opposed is now a
-                        * dangling pointer).
-                        */
-                       next = vma;
-               }
                if (remove_next == 2) {
                        remove_next = 1;
-                       end = next->vm_end;
+                       next = next_next;
                        goto again;
                }
-               else if (next)
-                       vma_gap_update(next);
-               else {
-                       /*
-                        * If remove_next == 2 we obviously can't
-                        * reach this path.
-                        *
-                        * If remove_next == 3 we can't reach this
-                        * path because pre-swap() next is always not
-                        * NULL. pre-swap() "next" is not being
-                        * removed and its next->vm_end is not altered
-                        * (and furthermore "end" already matches
-                        * next->vm_end in remove_next == 3).
-                        *
-                        * We reach this only in the remove_next == 1
-                        * case if the "next" vma that was removed was
-                        * the highest vma of the mm. However in such
-                        * case next->vm_end == "end" and the extended
-                        * "vma" has vma->vm_end == next->vm_end so
-                        * mm->highest_vm_end doesn't need any update
-                        * in remove_next == 1 case.
-                        */
-                       VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
-               }
        }
        if (insert && file)
                uprobe_mmap(insert);
 
+       mas_destroy(&mas);
        validate_mm(mm);
 
        return 0;
@@ -1128,8 +1005,10 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                        struct anon_vma_name *anon_name)
 {
        pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
-       struct vm_area_struct *area, *next;
-       int err;
+       struct vm_area_struct *mid, *next, *res;
+       int err = -1;
+       bool merge_prev = false;
+       bool merge_next = false;
 
        /*
         * We later require that vma->vm_flags == vm_flags,
@@ -1138,76 +1017,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
        if (vm_flags & VM_SPECIAL)
                return NULL;
 
-       next = vma_next(mm, prev);
-       area = next;
-       if (area && area->vm_end == end)                /* cases 6, 7, 8 */
-               next = next->vm_next;
+       next = find_vma(mm, prev ? prev->vm_end : 0);
+       mid = next;
+       if (next && next->vm_end == end)                /* cases 6, 7, 8 */
+               next = find_vma(mm, next->vm_end);
 
        /* verify some invariant that must be enforced by the caller */
        VM_WARN_ON(prev && addr <= prev->vm_start);
-       VM_WARN_ON(area && end > area->vm_end);
+       VM_WARN_ON(mid && end > mid->vm_end);
        VM_WARN_ON(addr >= end);
 
-       /*
-        * Can it merge with the predecessor?
-        */
+       /* Can we merge the predecessor? */
        if (prev && prev->vm_end == addr &&
                        mpol_equal(vma_policy(prev), policy) &&
                        can_vma_merge_after(prev, vm_flags,
                                            anon_vma, file, pgoff,
                                            vm_userfaultfd_ctx, anon_name)) {
-               /*
-                * OK, it can.  Can we now merge in the successor as well?
-                */
-               if (next && end == next->vm_start &&
-                               mpol_equal(policy, vma_policy(next)) &&
-                               can_vma_merge_before(next, vm_flags,
-                                                    anon_vma, file,
-                                                    pgoff+pglen,
-                                                    vm_userfaultfd_ctx, anon_name) &&
-                               is_mergeable_anon_vma(prev->anon_vma,
-                                                     next->anon_vma, NULL)) {
-                                                       /* cases 1, 6 */
-                       err = __vma_adjust(prev, prev->vm_start,
-                                        next->vm_end, prev->vm_pgoff, NULL,
-                                        prev);
-               } else                                  /* cases 2, 5, 7 */
-                       err = __vma_adjust(prev, prev->vm_start,
-                                        end, prev->vm_pgoff, NULL, prev);
-               if (err)
-                       return NULL;
-               khugepaged_enter_vma(prev, vm_flags);
-               return prev;
+               merge_prev = true;
        }
-
-       /*
-        * Can this new request be merged in front of next?
-        */
+       /* Can we merge the successor? */
        if (next && end == next->vm_start &&
                        mpol_equal(policy, vma_policy(next)) &&
                        can_vma_merge_before(next, vm_flags,
                                             anon_vma, file, pgoff+pglen,
                                             vm_userfaultfd_ctx, anon_name)) {
+               merge_next = true;
+       }
+       /* Can we merge both the predecessor and the successor? */
+       if (merge_prev && merge_next &&
+                       is_mergeable_anon_vma(prev->anon_vma,
+                               next->anon_vma, NULL)) {         /* cases 1, 6 */
+               err = __vma_adjust(prev, prev->vm_start,
+                                       next->vm_end, prev->vm_pgoff, NULL,
+                                       prev);
+               res = prev;
+       } else if (merge_prev) {                        /* cases 2, 5, 7 */
+               err = __vma_adjust(prev, prev->vm_start,
+                                       end, prev->vm_pgoff, NULL, prev);
+               res = prev;
+       } else if (merge_next) {
                if (prev && addr < prev->vm_end)        /* case 4 */
                        err = __vma_adjust(prev, prev->vm_start,
-                                        addr, prev->vm_pgoff, NULL, next);
-               else {                                  /* cases 3, 8 */
-                       err = __vma_adjust(area, addr, next->vm_end,
-                                        next->vm_pgoff - pglen, NULL, next);
-                       /*
-                        * In case 3 area is already equal to next and
-                        * this is a noop, but in case 8 "area" has
-                        * been removed and next was expanded over it.
-                        */
-                       area = next;
-               }
-               if (err)
-                       return NULL;
-               khugepaged_enter_vma(area, vm_flags);
-               return area;
+                                       addr, prev->vm_pgoff, NULL, next);
+               else                                    /* cases 3, 8 */
+                       err = __vma_adjust(mid, addr, next->vm_end,
+                                       next->vm_pgoff - pglen, NULL, next);
+               res = next;
        }
 
-       return NULL;
+       /*
+        * Cannot merge with predecessor or successor or error in __vma_adjust?
+        */
+       if (err)
+               return NULL;
+       khugepaged_enter_vma(res, vm_flags);
+       return res;
 }
 
 /*
@@ -1275,18 +1139,24 @@ static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_
  */
 struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
 {
+       MA_STATE(mas, &vma->vm_mm->mm_mt, vma->vm_end, vma->vm_end);
        struct anon_vma *anon_vma = NULL;
+       struct vm_area_struct *prev, *next;
 
        /* Try next first. */
-       if (vma->vm_next) {
-               anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
+       next = mas_walk(&mas);
+       if (next) {
+               anon_vma = reusable_anon_vma(next, vma, next);
                if (anon_vma)
                        return anon_vma;
        }
 
+       prev = mas_prev(&mas, 0);
+       VM_BUG_ON_VMA(prev != vma, vma);
+       prev = mas_prev(&mas, 0);
        /* Try prev next. */
-       if (vma->vm_prev)
-               anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
+       if (prev)
+               anon_vma = reusable_anon_vma(prev, prev, vma);
 
        /*
         * We might reach here with anon_vma == NULL if we can't find
@@ -1375,6 +1245,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
        vm_flags_t vm_flags;
        int pkey = 0;
 
+       validate_mm(mm);
        *populate = 0;
 
        if (!len)
@@ -1678,388 +1549,63 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
        return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
 }
 
-unsigned long mmap_region(struct file *file, unsigned long addr,
-               unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-               struct list_head *uf)
-{
-       struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma, *prev, *merge;
-       int error;
-       struct rb_node **rb_link, *rb_parent;
-       unsigned long charged = 0;
-
-       /* Check against address space limit. */
-       if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
-               unsigned long nr_pages;
-
-               /*
-                * MAP_FIXED may remove pages of mappings that intersects with
-                * requested mapping. Account for the pages it would unmap.
-                */
-               nr_pages = count_vma_pages_range(mm, addr, addr + len);
-
-               if (!may_expand_vm(mm, vm_flags,
-                                       (len >> PAGE_SHIFT) - nr_pages))
-                       return -ENOMEM;
-       }
-
-       /* Clear old maps, set up prev, rb_link, rb_parent, and uf */
-       if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
-               return -ENOMEM;
-       /*
-        * Private writable mapping: check memory availability
-        */
-       if (accountable_mapping(file, vm_flags)) {
-               charged = len >> PAGE_SHIFT;
-               if (security_vm_enough_memory_mm(mm, charged))
-                       return -ENOMEM;
-               vm_flags |= VM_ACCOUNT;
-       }
-
-       /*
-        * Can we just expand an old mapping?
-        */
-       vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
-                       NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
-       if (vma)
-               goto out;
-
-       /*
-        * Determine the object being mapped and call the appropriate
-        * specific mapper. the address has already been validated, but
-        * not unmapped, but the maps are removed from the list.
-        */
-       vma = vm_area_alloc(mm);
-       if (!vma) {
-               error = -ENOMEM;
-               goto unacct_error;
-       }
-
-       vma->vm_start = addr;
-       vma->vm_end = addr + len;
-       vma->vm_flags = vm_flags;
-       vma->vm_page_prot = vm_get_page_prot(vm_flags);
-       vma->vm_pgoff = pgoff;
-
-       if (file) {
-               if (vm_flags & VM_SHARED) {
-                       error = mapping_map_writable(file->f_mapping);
-                       if (error)
-                               goto free_vma;
-               }
-
-               vma->vm_file = get_file(file);
-               error = call_mmap(file, vma);
-               if (error)
-                       goto unmap_and_free_vma;
-
-               /* Can addr have changed??
-                *
-                * Answer: Yes, several device drivers can do it in their
-                *         f_op->mmap method. -DaveM
-                * Bug: If addr is changed, prev, rb_link, rb_parent should
-                *      be updated for vma_link()
-                */
-               WARN_ON_ONCE(addr != vma->vm_start);
-
-               addr = vma->vm_start;
-
-               /* If vm_flags changed after call_mmap(), we should try merge vma again
-                * as we may succeed this time.
-                */
-               if (unlikely(vm_flags != vma->vm_flags && prev)) {
-                       merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
-                               NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
-                       if (merge) {
-                               /* ->mmap() can change vma->vm_file and fput the original file. So
-                                * fput the vma->vm_file here or we would add an extra fput for file
-                                * and cause general protection fault ultimately.
-                                */
-                               fput(vma->vm_file);
-                               vm_area_free(vma);
-                               vma = merge;
-                               /* Update vm_flags to pick up the change. */
-                               vm_flags = vma->vm_flags;
-                               goto unmap_writable;
-                       }
-               }
-
-               vm_flags = vma->vm_flags;
-       } else if (vm_flags & VM_SHARED) {
-               error = shmem_zero_setup(vma);
-               if (error)
-                       goto free_vma;
-       } else {
-               vma_set_anonymous(vma);
-       }
-
-       /* Allow architectures to sanity-check the vm_flags */
-       if (!arch_validate_flags(vma->vm_flags)) {
-               error = -EINVAL;
-               if (file)
-                       goto unmap_and_free_vma;
-               else
-                       goto free_vma;
-       }
-
-       vma_link(mm, vma, prev, rb_link, rb_parent);
-
-       /*
-        * vma_merge() calls khugepaged_enter_vma() either, the below
-        * call covers the non-merge case.
-        */
-       khugepaged_enter_vma(vma, vma->vm_flags);
-
-       /* Once vma denies write, undo our temporary denial count */
-unmap_writable:
-       if (file && vm_flags & VM_SHARED)
-               mapping_unmap_writable(file->f_mapping);
-       file = vma->vm_file;
-out:
-       perf_event_mmap(vma);
-
-       vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
-       if (vm_flags & VM_LOCKED) {
-               if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
-                                       is_vm_hugetlb_page(vma) ||
-                                       vma == get_gate_vma(current->mm))
-                       vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
-               else
-                       mm->locked_vm += (len >> PAGE_SHIFT);
-       }
-
-       if (file)
-               uprobe_mmap(vma);
-
-       /*
-        * New (or expanded) vma always get soft dirty status.
-        * Otherwise user-space soft-dirty page tracker won't
-        * be able to distinguish situation when vma area unmapped,
-        * then new mapped in-place (which must be aimed as
-        * a completely new data area).
-        */
-       vma->vm_flags |= VM_SOFTDIRTY;
-
-       vma_set_page_prot(vma);
-
-       return addr;
-
-unmap_and_free_vma:
-       fput(vma->vm_file);
-       vma->vm_file = NULL;
-
-       /* Undo any partial mapping done by a device driver. */
-       unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
-       if (vm_flags & VM_SHARED)
-               mapping_unmap_writable(file->f_mapping);
-free_vma:
-       vm_area_free(vma);
-unacct_error:
-       if (charged)
-               vm_unacct_memory(charged);
-       return error;
-}
-
+/**
+ * unmapped_area() - Find an area between the low_limit and the high_limit with
+ * the correct alignment and offset, all from @info. Note: current->mm is used
+ * for the search.
+ *
+ * @info: The unmapped area information including the range (low_limit -
+ * hight_limit), the alignment offset and mask.
+ *
+ * Return: A memory address or -ENOMEM.
+ */
 static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 {
-       /*
-        * We implement the search by looking for an rbtree node that
-        * immediately follows a suitable gap. That is,
-        * - gap_start = vma->vm_prev->vm_end <= info->high_limit - length;
-        * - gap_end   = vma->vm_start        >= info->low_limit  + length;
-        * - gap_end - gap_start >= length
-        */
+       unsigned long length, gap;
 
-       struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma;
-       unsigned long length, low_limit, high_limit, gap_start, gap_end;
+       MA_STATE(mas, &current->mm->mm_mt, 0, 0);
 
        /* Adjust search length to account for worst case alignment overhead */
        length = info->length + info->align_mask;
        if (length < info->length)
                return -ENOMEM;
 
-       /* Adjust search limits by the desired length */
-       if (info->high_limit < length)
-               return -ENOMEM;
-       high_limit = info->high_limit - length;
-
-       if (info->low_limit > high_limit)
-               return -ENOMEM;
-       low_limit = info->low_limit + length;
-
-       /* Check if rbtree root looks promising */
-       if (RB_EMPTY_ROOT(&mm->mm_rb))
-               goto check_highest;
-       vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
-       if (vma->rb_subtree_gap < length)
-               goto check_highest;
-
-       while (true) {
-               /* Visit left subtree if it looks promising */
-               gap_end = vm_start_gap(vma);
-               if (gap_end >= low_limit && vma->vm_rb.rb_left) {
-                       struct vm_area_struct *left =
-                               rb_entry(vma->vm_rb.rb_left,
-                                        struct vm_area_struct, vm_rb);
-                       if (left->rb_subtree_gap >= length) {
-                               vma = left;
-                               continue;
-                       }
-               }
-
-               gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
-check_current:
-               /* Check if current node has a suitable gap */
-               if (gap_start > high_limit)
-                       return -ENOMEM;
-               if (gap_end >= low_limit &&
-                   gap_end > gap_start && gap_end - gap_start >= length)
-                       goto found;
-
-               /* Visit right subtree if it looks promising */
-               if (vma->vm_rb.rb_right) {
-                       struct vm_area_struct *right =
-                               rb_entry(vma->vm_rb.rb_right,
-                                        struct vm_area_struct, vm_rb);
-                       if (right->rb_subtree_gap >= length) {
-                               vma = right;
-                               continue;
-                       }
-               }
-
-               /* Go back up the rbtree to find next candidate node */
-               while (true) {
-                       struct rb_node *prev = &vma->vm_rb;
-                       if (!rb_parent(prev))
-                               goto check_highest;
-                       vma = rb_entry(rb_parent(prev),
-                                      struct vm_area_struct, vm_rb);
-                       if (prev == vma->vm_rb.rb_left) {
-                               gap_start = vm_end_gap(vma->vm_prev);
-                               gap_end = vm_start_gap(vma);
-                               goto check_current;
-                       }
-               }
-       }
-
-check_highest:
-       /* Check highest gap, which does not precede any rbtree node */
-       gap_start = mm->highest_vm_end;
-       gap_end = ULONG_MAX;  /* Only for VM_BUG_ON below */
-       if (gap_start > high_limit)
+       if (mas_empty_area(&mas, info->low_limit, info->high_limit - 1,
+                                 length))
                return -ENOMEM;
 
-found:
-       /* We found a suitable gap. Clip it with the original low_limit. */
-       if (gap_start < info->low_limit)
-               gap_start = info->low_limit;
-
-       /* Adjust gap address to the desired alignment */
-       gap_start += (info->align_offset - gap_start) & info->align_mask;
-
-       VM_BUG_ON(gap_start + info->length > info->high_limit);
-       VM_BUG_ON(gap_start + info->length > gap_end);
-       return gap_start;
+       gap = mas.index;
+       gap += (info->align_offset - gap) & info->align_mask;
+       return gap;
 }
 
+/**
+ * unmapped_area_topdown() - Find an area between the low_limit and the
+ * high_limit with * the correct alignment and offset at the highest available
+ * address, all from @info. Note: current->mm is used for the search.
+ *
+ * @info: The unmapped area information including the range (low_limit -
+ * hight_limit), the alignment offset and mask.
+ *
+ * Return: A memory address or -ENOMEM.
+ */
 static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 {
-       struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma;
-       unsigned long length, low_limit, high_limit, gap_start, gap_end;
+       unsigned long length, gap;
 
+       MA_STATE(mas, &current->mm->mm_mt, 0, 0);
        /* Adjust search length to account for worst case alignment overhead */
        length = info->length + info->align_mask;
        if (length < info->length)
                return -ENOMEM;
 
-       /*
-        * Adjust search limits by the desired length.
-        * See implementation comment at top of unmapped_area().
-        */
-       gap_end = info->high_limit;
-       if (gap_end < length)
-               return -ENOMEM;
-       high_limit = gap_end - length;
-
-       if (info->low_limit > high_limit)
-               return -ENOMEM;
-       low_limit = info->low_limit + length;
-
-       /* Check highest gap, which does not precede any rbtree node */
-       gap_start = mm->highest_vm_end;
-       if (gap_start <= high_limit)
-               goto found_highest;
-
-       /* Check if rbtree root looks promising */
-       if (RB_EMPTY_ROOT(&mm->mm_rb))
-               return -ENOMEM;
-       vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
-       if (vma->rb_subtree_gap < length)
+       if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1,
+                               length))
                return -ENOMEM;
 
-       while (true) {
-               /* Visit right subtree if it looks promising */
-               gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
-               if (gap_start <= high_limit && vma->vm_rb.rb_right) {
-                       struct vm_area_struct *right =
-                               rb_entry(vma->vm_rb.rb_right,
-                                        struct vm_area_struct, vm_rb);
-                       if (right->rb_subtree_gap >= length) {
-                               vma = right;
-                               continue;
-                       }
-               }
-
-check_current:
-               /* Check if current node has a suitable gap */
-               gap_end = vm_start_gap(vma);
-               if (gap_end < low_limit)
-                       return -ENOMEM;
-               if (gap_start <= high_limit &&
-                   gap_end > gap_start && gap_end - gap_start >= length)
-                       goto found;
-
-               /* Visit left subtree if it looks promising */
-               if (vma->vm_rb.rb_left) {
-                       struct vm_area_struct *left =
-                               rb_entry(vma->vm_rb.rb_left,
-                                        struct vm_area_struct, vm_rb);
-                       if (left->rb_subtree_gap >= length) {
-                               vma = left;
-                               continue;
-                       }
-               }
-
-               /* Go back up the rbtree to find next candidate node */
-               while (true) {
-                       struct rb_node *prev = &vma->vm_rb;
-                       if (!rb_parent(prev))
-                               return -ENOMEM;
-                       vma = rb_entry(rb_parent(prev),
-                                      struct vm_area_struct, vm_rb);
-                       if (prev == vma->vm_rb.rb_right) {
-                               gap_start = vma->vm_prev ?
-                                       vm_end_gap(vma->vm_prev) : 0;
-                               goto check_current;
-                       }
-               }
-       }
-
-found:
-       /* We found a suitable gap. Clip it with the original high_limit. */
-       if (gap_end > info->high_limit)
-               gap_end = info->high_limit;
-
-found_highest:
-       /* Compute highest gap address at the desired alignment */
-       gap_end -= info->length;
-       gap_end -= (gap_end - info->align_offset) & info->align_mask;
-
-       VM_BUG_ON(gap_end < info->low_limit);
-       VM_BUG_ON(gap_end < gap_start);
-       return gap_end;
+       gap = mas.last + 1 - info->length;
+       gap -= (gap - info->align_offset) & info->align_mask;
+       return gap;
 }
 
 /*
@@ -2232,6 +1778,9 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                 */
                pgoff = 0;
                get_area = shmem_get_unmapped_area;
+       } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+               /* Ensures that larger anonymous mappings are THP aligned. */
+               get_area = thp_get_unmapped_area;
        }
 
        addr = get_area(file, addr, len, pgoff, flags);
@@ -2249,58 +1798,67 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 
 EXPORT_SYMBOL(get_unmapped_area);
 
-/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
-struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
+/**
+ * find_vma_intersection() - Look up the first VMA which intersects the interval
+ * @mm: The process address space.
+ * @start_addr: The inclusive start user address.
+ * @end_addr: The exclusive end user address.
+ *
+ * Returns: The first VMA within the provided range, %NULL otherwise.  Assumes
+ * start_addr < end_addr.
+ */
+struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
+                                            unsigned long start_addr,
+                                            unsigned long end_addr)
 {
-       struct rb_node *rb_node;
-       struct vm_area_struct *vma;
+       unsigned long index = start_addr;
 
        mmap_assert_locked(mm);
-       /* Check the cache first. */
-       vma = vmacache_find(mm, addr);
-       if (likely(vma))
-               return vma;
-
-       rb_node = mm->mm_rb.rb_node;
-
-       while (rb_node) {
-               struct vm_area_struct *tmp;
-
-               tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+       return mt_find(&mm->mm_mt, &index, end_addr - 1);
+}
+EXPORT_SYMBOL(find_vma_intersection);
 
-               if (tmp->vm_end > addr) {
-                       vma = tmp;
-                       if (tmp->vm_start <= addr)
-                               break;
-                       rb_node = rb_node->rb_left;
-               } else
-                       rb_node = rb_node->rb_right;
-       }
+/**
+ * find_vma() - Find the VMA for a given address, or the next VMA.
+ * @mm: The mm_struct to check
+ * @addr: The address
+ *
+ * Returns: The VMA associated with addr, or the next VMA.
+ * May return %NULL in the case of no VMA at addr or above.
+ */
+struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
+{
+       unsigned long index = addr;
 
-       if (vma)
-               vmacache_update(addr, vma);
-       return vma;
+       mmap_assert_locked(mm);
+       return mt_find(&mm->mm_mt, &index, ULONG_MAX);
 }
-
 EXPORT_SYMBOL(find_vma);
 
-/*
- * Same as find_vma, but also return a pointer to the previous VMA in *pprev.
+/**
+ * find_vma_prev() - Find the VMA for a given address, or the next vma and
+ * set %pprev to the previous VMA, if any.
+ * @mm: The mm_struct to check
+ * @addr: The address
+ * @pprev: The pointer to set to the previous VMA
+ *
+ * Note that RCU lock is missing here since the external mmap_lock() is used
+ * instead.
+ *
+ * Returns: The VMA associated with @addr, or the next vma.
+ * May return %NULL in the case of no vma at addr or above.
  */
 struct vm_area_struct *
 find_vma_prev(struct mm_struct *mm, unsigned long addr,
                        struct vm_area_struct **pprev)
 {
        struct vm_area_struct *vma;
+       MA_STATE(mas, &mm->mm_mt, addr, addr);
 
-       vma = find_vma(mm, addr);
-       if (vma) {
-               *pprev = vma->vm_prev;
-       } else {
-               struct rb_node *rb_node = rb_last(&mm->mm_rb);
-
-               *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
-       }
+       vma = mas_walk(&mas);
+       *pprev = mas_prev(&mas, 0);
+       if (!vma)
+               vma = mas_next(&mas, ULONG_MAX);
        return vma;
 }
 
@@ -2354,6 +1912,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
        struct vm_area_struct *next;
        unsigned long gap_addr;
        int error = 0;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
        if (!(vma->vm_flags & VM_GROWSUP))
                return -EFAULT;
@@ -2371,16 +1930,21 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
        if (gap_addr < address || gap_addr > TASK_SIZE)
                gap_addr = TASK_SIZE;
 
-       next = vma->vm_next;
-       if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
+       next = find_vma_intersection(mm, vma->vm_end, gap_addr);
+       if (next && vma_is_accessible(next)) {
                if (!(next->vm_flags & VM_GROWSUP))
                        return -ENOMEM;
                /* Check that both stack segments have the same anon_vma? */
        }
 
+       if (mas_preallocate(&mas, vma, GFP_KERNEL))
+               return -ENOMEM;
+
        /* We must make sure the anon_vma is allocated. */
-       if (unlikely(anon_vma_prepare(vma)))
+       if (unlikely(anon_vma_prepare(vma))) {
+               mas_destroy(&mas);
                return -ENOMEM;
+       }
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
@@ -2401,15 +1965,13 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                        error = acct_stack_growth(vma, size, grow);
                        if (!error) {
                                /*
-                                * vma_gap_update() doesn't support concurrent
-                                * updates, but we only hold a shared mmap_lock
-                                * lock here, so we need to protect against
-                                * concurrent vma expansions.
-                                * anon_vma_lock_write() doesn't help here, as
-                                * we don't guarantee that all growable vmas
-                                * in a mm share the same root anon vma.
-                                * So, we reuse mm->page_table_lock to guard
-                                * against concurrent vma expansions.
+                                * We only hold a shared mmap_lock lock here, so
+                                * we need to protect against concurrent vma
+                                * expansions.  anon_vma_lock_write() doesn't
+                                * help here, as we don't guarantee that all
+                                * growable vmas in a mm share the same root
+                                * anon vma.  So, we reuse mm->page_table_lock
+                                * to guard against concurrent vma expansions.
                                 */
                                spin_lock(&mm->page_table_lock);
                                if (vma->vm_flags & VM_LOCKED)
@@ -2417,11 +1979,9 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                vm_stat_account(mm, vma->vm_flags, grow);
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_end = address;
+                               /* Overwrite old entry in mtree. */
+                               vma_mas_store(vma, &mas);
                                anon_vma_interval_tree_post_update_vma(vma);
-                               if (vma->vm_next)
-                                       vma_gap_update(vma->vm_next);
-                               else
-                                       mm->highest_vm_end = vm_end_gap(vma);
                                spin_unlock(&mm->page_table_lock);
 
                                perf_event_mmap(vma);
@@ -2430,7 +1990,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
        }
        anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma(vma, vma->vm_flags);
-       validate_mm(mm);
+       mas_destroy(&mas);
        return error;
 }
 #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
@@ -2438,10 +1998,10 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 /*
  * vma is the first one with address < vma->vm_start.  Have to extend vma.
  */
-int expand_downwards(struct vm_area_struct *vma,
-                                  unsigned long address)
+int expand_downwards(struct vm_area_struct *vma, unsigned long address)
 {
        struct mm_struct *mm = vma->vm_mm;
+       MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_start);
        struct vm_area_struct *prev;
        int error = 0;
 
@@ -2450,7 +2010,7 @@ int expand_downwards(struct vm_area_struct *vma,
                return -EPERM;
 
        /* Enforce stack_guard_gap */
-       prev = vma->vm_prev;
+       prev = mas_prev(&mas, 0);
        /* Check that both stack segments have the same anon_vma? */
        if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
                        vma_is_accessible(prev)) {
@@ -2458,9 +2018,14 @@ int expand_downwards(struct vm_area_struct *vma,
                        return -ENOMEM;
        }
 
+       if (mas_preallocate(&mas, vma, GFP_KERNEL))
+               return -ENOMEM;
+
        /* We must make sure the anon_vma is allocated. */
-       if (unlikely(anon_vma_prepare(vma)))
+       if (unlikely(anon_vma_prepare(vma))) {
+               mas_destroy(&mas);
                return -ENOMEM;
+       }
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
@@ -2481,15 +2046,13 @@ int expand_downwards(struct vm_area_struct *vma,
                        error = acct_stack_growth(vma, size, grow);
                        if (!error) {
                                /*
-                                * vma_gap_update() doesn't support concurrent
-                                * updates, but we only hold a shared mmap_lock
-                                * lock here, so we need to protect against
-                                * concurrent vma expansions.
-                                * anon_vma_lock_write() doesn't help here, as
-                                * we don't guarantee that all growable vmas
-                                * in a mm share the same root anon vma.
-                                * So, we reuse mm->page_table_lock to guard
-                                * against concurrent vma expansions.
+                                * We only hold a shared mmap_lock lock here, so
+                                * we need to protect against concurrent vma
+                                * expansions.  anon_vma_lock_write() doesn't
+                                * help here, as we don't guarantee that all
+                                * growable vmas in a mm share the same root
+                                * anon vma.  So, we reuse mm->page_table_lock
+                                * to guard against concurrent vma expansions.
                                 */
                                spin_lock(&mm->page_table_lock);
                                if (vma->vm_flags & VM_LOCKED)
@@ -2498,8 +2061,9 @@ int expand_downwards(struct vm_area_struct *vma,
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_start = address;
                                vma->vm_pgoff -= grow;
+                               /* Overwrite old entry in mtree. */
+                               vma_mas_store(vma, &mas);
                                anon_vma_interval_tree_post_update_vma(vma);
-                               vma_gap_update(vma);
                                spin_unlock(&mm->page_table_lock);
 
                                perf_event_mmap(vma);
@@ -2508,7 +2072,7 @@ int expand_downwards(struct vm_area_struct *vma,
        }
        anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma(vma, vma->vm_flags);
-       validate_mm(mm);
+       mas_destroy(&mas);
        return error;
 }
 
@@ -2581,25 +2145,26 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 EXPORT_SYMBOL_GPL(find_extend_vma);
 
 /*
- * Ok - we have the memory areas we should free on the vma list,
- * so release them, and do the vma updates.
+ * Ok - we have the memory areas we should free on a maple tree so release them,
+ * and do the vma updates.
  *
  * Called with the mm semaphore held.
  */
-static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
+static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
 {
        unsigned long nr_accounted = 0;
+       struct vm_area_struct *vma;
 
        /* Update high watermark before we lower total_vm */
        update_hiwater_vm(mm);
-       do {
+       mas_for_each(mas, vma, ULONG_MAX) {
                long nrpages = vma_pages(vma);
 
                if (vma->vm_flags & VM_ACCOUNT)
                        nr_accounted += nrpages;
                vm_stat_account(mm, vma->vm_flags, -nrpages);
-               vma = remove_vma(vma);
-       } while (vma);
+               remove_vma(vma);
+       }
        vm_unacct_memory(nr_accounted);
        validate_mm(mm);
 }
@@ -2609,66 +2174,22 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
  *
  * Called with the mm semaphore held.
  */
-static void unmap_region(struct mm_struct *mm,
+static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
+               struct vm_area_struct *next,
                unsigned long start, unsigned long end)
 {
-       struct vm_area_struct *next = vma_next(mm, prev);
        struct mmu_gather tlb;
 
        lru_add_drain();
        tlb_gather_mmu(&tlb, mm);
        update_hiwater_rss(mm);
-       unmap_vmas(&tlb, vma, start, end);
-       free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+       unmap_vmas(&tlb, mt, vma, start, end);
+       free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
                                 next ? next->vm_start : USER_PGTABLES_CEILING);
        tlb_finish_mmu(&tlb);
 }
 
-/*
- * Create a list of vma's touched by the unmap, removing them from the mm's
- * vma list as we go..
- */
-static bool
-detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
-       struct vm_area_struct *prev, unsigned long end)
-{
-       struct vm_area_struct **insertion_point;
-       struct vm_area_struct *tail_vma = NULL;
-
-       insertion_point = (prev ? &prev->vm_next : &mm->mmap);
-       vma->vm_prev = NULL;
-       do {
-               vma_rb_erase(vma, &mm->mm_rb);
-               if (vma->vm_flags & VM_LOCKED)
-                       mm->locked_vm -= vma_pages(vma);
-               mm->map_count--;
-               tail_vma = vma;
-               vma = vma->vm_next;
-       } while (vma && vma->vm_start < end);
-       *insertion_point = vma;
-       if (vma) {
-               vma->vm_prev = prev;
-               vma_gap_update(vma);
-       } else
-               mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
-       tail_vma->vm_next = NULL;
-
-       /* Kill the cache */
-       vmacache_invalidate(mm);
-
-       /*
-        * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
-        * VM_GROWSUP VMA. Such VMAs can change their size under
-        * down_read(mmap_lock) and collide with the VMA we are about to unmap.
-        */
-       if (vma && (vma->vm_flags & VM_GROWSDOWN))
-               return false;
-       if (prev && (prev->vm_flags & VM_GROWSUP))
-               return false;
-       return true;
-}
-
 /*
  * __split_vma() bypasses sysctl_max_map_count checking.  We use this where it
  * has already been checked or doesn't make sense to fail.
@@ -2678,6 +2199,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct vm_area_struct *new;
        int err;
+       validate_mm_mt(mm);
 
        if (vma->vm_ops && vma->vm_ops->may_split) {
                err = vma->vm_ops->may_split(vma, addr);
@@ -2720,6 +2242,9 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
        if (!err)
                return 0;
 
+       /* Avoid vm accounting in close() operation */
+       new->vm_start = new->vm_end;
+       new->vm_pgoff = 0;
        /* Clean everything up if vma_adjust failed. */
        if (new->vm_ops && new->vm_ops->close)
                new->vm_ops->close(new);
@@ -2730,6 +2255,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
        mpol_put(vma_policy(new));
  out_free_vma:
        vm_area_free(new);
+       validate_mm_mt(mm);
        return err;
 }
 
@@ -2746,38 +2272,48 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
        return __split_vma(mm, vma, addr, new_below);
 }
 
-/* Munmap is split into 2 main parts -- this part which finds
- * what needs doing, and the areas themselves, which do the
- * work.  This now handles partial unmappings.
- * Jeremy Fitzhardinge <jeremy@goop.org>
- */
-int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
-               struct list_head *uf, bool downgrade)
+static inline int munmap_sidetree(struct vm_area_struct *vma,
+                                  struct ma_state *mas_detach)
 {
-       unsigned long end;
-       struct vm_area_struct *vma, *prev, *last;
-
-       if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
-               return -EINVAL;
+       mas_set_range(mas_detach, vma->vm_start, vma->vm_end - 1);
+       if (mas_store_gfp(mas_detach, vma, GFP_KERNEL))
+               return -ENOMEM;
 
-       len = PAGE_ALIGN(len);
-       end = start + len;
-       if (len == 0)
-               return -EINVAL;
+       if (vma->vm_flags & VM_LOCKED)
+               vma->vm_mm->locked_vm -= vma_pages(vma);
 
-       /*
-        * arch_unmap() might do unmaps itself.  It must be called
-        * and finish any rbtree manipulation before this code
-        * runs and also starts to manipulate the rbtree.
-        */
-       arch_unmap(mm, start, end);
+       return 0;
+}
 
-       /* Find the first overlapping VMA where start < vma->vm_end */
-       vma = find_vma_intersection(mm, start, end);
-       if (!vma)
-               return 0;
-       prev = vma->vm_prev;
+/*
+ * do_mas_align_munmap() - munmap the aligned region from @start to @end.
+ * @mas: The maple_state, ideally set up to alter the correct tree location.
+ * @vma: The starting vm_area_struct
+ * @mm: The mm_struct
+ * @start: The aligned start address to munmap.
+ * @end: The aligned end address to munmap.
+ * @uf: The userfaultfd list_head
+ * @downgrade: Set to true to attempt a write downgrade of the mmap_sem
+ *
+ * If @downgrade is true, check return code for potential release of the lock.
+ */
+static int
+do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
+                   struct mm_struct *mm, unsigned long start,
+                   unsigned long end, struct list_head *uf, bool downgrade)
+{
+       struct vm_area_struct *prev, *next = NULL;
+       struct maple_tree mt_detach;
+       int count = 0;
+       int error = -ENOMEM;
+       MA_STATE(mas_detach, &mt_detach, 0, 0);
+       mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN);
+       mt_set_external_lock(&mt_detach, &mm->mmap_lock);
+
+       if (mas_preallocate(mas, vma, GFP_KERNEL))
+               return -ENOMEM;
 
+       mas->last = end - 1;
        /*
         * If we need to split any vma, do it now to save pain later.
         *
@@ -2785,8 +2321,9 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
         * unmapped vm_area_struct will remain in use: so lower split_vma
         * places tmp vma above, and higher split_vma places tmp vma below.
         */
+
+       /* Does it split the first one? */
        if (start > vma->vm_start) {
-               int error;
 
                /*
                 * Make sure that map_count on return from munmap() will
@@ -2794,22 +2331,61 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
                 * its limit temporarily, to help free resources as expected.
                 */
                if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
-                       return -ENOMEM;
+                       goto map_count_exceeded;
 
+               /*
+                * mas_pause() is not needed since mas->index needs to be set
+                * differently than vma->vm_end anyways.
+                */
                error = __split_vma(mm, vma, start, 0);
                if (error)
-                       return error;
-               prev = vma;
+                       goto start_split_failed;
+
+               mas_set(mas, start);
+               vma = mas_walk(mas);
        }
 
-       /* Does it split the last one? */
-       last = find_vma(mm, end);
-       if (last && end > last->vm_start) {
-               int error = __split_vma(mm, last, end, 1);
+       prev = mas_prev(mas, 0);
+       if (unlikely((!prev)))
+               mas_set(mas, start);
+
+       /*
+        * Detach a range of VMAs from the mm. Using next as a temp variable as
+        * it is always overwritten.
+        */
+       mas_for_each(mas, next, end - 1) {
+               /* Does it split the end? */
+               if (next->vm_end > end) {
+                       struct vm_area_struct *split;
+
+                       error = __split_vma(mm, next, end, 1);
+                       if (error)
+                               goto end_split_failed;
+
+                       mas_set(mas, end);
+                       split = mas_prev(mas, 0);
+                       error = munmap_sidetree(split, &mas_detach);
+                       if (error)
+                               goto munmap_sidetree_failed;
+
+                       count++;
+                       if (vma == next)
+                               vma = split;
+                       break;
+               }
+               error = munmap_sidetree(next, &mas_detach);
                if (error)
-                       return error;
+                       goto munmap_sidetree_failed;
+
+               count++;
+#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
+               BUG_ON(next->vm_start < start);
+               BUG_ON(next->vm_start > end);
+#endif
        }
-       vma = vma_next(mm, prev);
+
+       if (!next)
+               next = mas_next(mas, ULONG_MAX);
 
        if (unlikely(uf)) {
                /*
@@ -2821,30 +2397,366 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
                 * split, despite we could. This is unlikely enough
                 * failure that it's not worth optimizing it for.
                 */
-               int error = userfaultfd_unmap_prep(vma, start, end, uf);
+               error = userfaultfd_unmap_prep(mm, start, end, uf);
+
                if (error)
-                       return error;
+                       goto userfaultfd_error;
+       }
+
+       /* Point of no return */
+       mas_set_range(mas, start, end - 1);
+#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
+       /* Make sure no VMAs are about to be lost. */
+       {
+               MA_STATE(test, &mt_detach, start, end - 1);
+               struct vm_area_struct *vma_mas, *vma_test;
+               int test_count = 0;
+
+               rcu_read_lock();
+               vma_test = mas_find(&test, end - 1);
+               mas_for_each(mas, vma_mas, end - 1) {
+                       BUG_ON(vma_mas != vma_test);
+                       test_count++;
+                       vma_test = mas_next(&test, end - 1);
+               }
+               rcu_read_unlock();
+               BUG_ON(count != test_count);
+               mas_set_range(mas, start, end - 1);
+       }
+#endif
+       mas_store_prealloc(mas, NULL);
+       mm->map_count -= count;
+       /*
+        * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
+        * VM_GROWSUP VMA. Such VMAs can change their size under
+        * down_read(mmap_lock) and collide with the VMA we are about to unmap.
+        */
+       if (downgrade) {
+               if (next && (next->vm_flags & VM_GROWSDOWN))
+                       downgrade = false;
+               else if (prev && (prev->vm_flags & VM_GROWSUP))
+                       downgrade = false;
+               else
+                       mmap_write_downgrade(mm);
        }
 
-       /* Detach vmas from rbtree */
-       if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
-               downgrade = false;
+       unmap_region(mm, &mt_detach, vma, prev, next, start, end);
+       /* Statistics and freeing VMAs */
+       mas_set(&mas_detach, start);
+       remove_mt(mm, &mas_detach);
+       __mt_destroy(&mt_detach);
 
-       if (downgrade)
-               mmap_write_downgrade(mm);
 
-       unmap_region(mm, vma, prev, start, end);
+       validate_mm(mm);
+       return downgrade ? 1 : 0;
 
-       /* Fix up all other VM information */
-       remove_vma_list(mm, vma);
+userfaultfd_error:
+munmap_sidetree_failed:
+end_split_failed:
+       __mt_destroy(&mt_detach);
+start_split_failed:
+map_count_exceeded:
+       mas_destroy(mas);
+       return error;
+}
 
-       return downgrade ? 1 : 0;
+/*
+ * do_mas_munmap() - munmap a given range.
+ * @mas: The maple state
+ * @mm: The mm_struct
+ * @start: The start address to munmap
+ * @len: The length of the range to munmap
+ * @uf: The userfaultfd list_head
+ * @downgrade: set to true if the user wants to attempt to write_downgrade the
+ * mmap_sem
+ *
+ * This function takes a @mas that is either pointing to the previous VMA or set
+ * to MA_START and sets it up to remove the mapping(s).  The @len will be
+ * aligned and any arch_unmap work will be preformed.
+ *
+ * Returns: -EINVAL on failure, 1 on success and unlock, 0 otherwise.
+ */
+int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
+                 unsigned long start, size_t len, struct list_head *uf,
+                 bool downgrade)
+{
+       unsigned long end;
+       struct vm_area_struct *vma;
+
+       if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
+               return -EINVAL;
+
+       end = start + PAGE_ALIGN(len);
+       if (end == start)
+               return -EINVAL;
+
+        /* arch_unmap() might do unmaps itself.  */
+       arch_unmap(mm, start, end);
+
+       /* Find the first overlapping VMA */
+       vma = mas_find(mas, end - 1);
+       if (!vma)
+               return 0;
+
+       return do_mas_align_munmap(mas, vma, mm, start, end, uf, downgrade);
 }
 
+/* do_munmap() - Wrapper function for non-maple tree aware do_munmap() calls.
+ * @mm: The mm_struct
+ * @start: The start address to munmap
+ * @len: The length to be munmapped.
+ * @uf: The userfaultfd list_head
+ */
 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
              struct list_head *uf)
 {
-       return __do_munmap(mm, start, len, uf, false);
+       MA_STATE(mas, &mm->mm_mt, start, start);
+
+       return do_mas_munmap(&mas, mm, start, len, uf, false);
+}
+
+unsigned long mmap_region(struct file *file, unsigned long addr,
+               unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
+               struct list_head *uf)
+{
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma = NULL;
+       struct vm_area_struct *next, *prev, *merge;
+       pgoff_t pglen = len >> PAGE_SHIFT;
+       unsigned long charged = 0;
+       unsigned long end = addr + len;
+       unsigned long merge_start = addr, merge_end = end;
+       pgoff_t vm_pgoff;
+       int error;
+       MA_STATE(mas, &mm->mm_mt, addr, end - 1);
+
+       /* Check against address space limit. */
+       if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
+               unsigned long nr_pages;
+
+               /*
+                * MAP_FIXED may remove pages of mappings that intersects with
+                * requested mapping. Account for the pages it would unmap.
+                */
+               nr_pages = count_vma_pages_range(mm, addr, end);
+
+               if (!may_expand_vm(mm, vm_flags,
+                                       (len >> PAGE_SHIFT) - nr_pages))
+                       return -ENOMEM;
+       }
+
+       /* Unmap any existing mapping in the area */
+       if (do_mas_munmap(&mas, mm, addr, len, uf, false))
+               return -ENOMEM;
+
+       /*
+        * Private writable mapping: check memory availability
+        */
+       if (accountable_mapping(file, vm_flags)) {
+               charged = len >> PAGE_SHIFT;
+               if (security_vm_enough_memory_mm(mm, charged))
+                       return -ENOMEM;
+               vm_flags |= VM_ACCOUNT;
+       }
+
+       next = mas_next(&mas, ULONG_MAX);
+       prev = mas_prev(&mas, 0);
+       if (vm_flags & VM_SPECIAL)
+               goto cannot_expand;
+
+       /* Attempt to expand an old mapping */
+       /* Check next */
+       if (next && next->vm_start == end && !vma_policy(next) &&
+           can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen,
+                                NULL_VM_UFFD_CTX, NULL)) {
+               merge_end = next->vm_end;
+               vma = next;
+               vm_pgoff = next->vm_pgoff - pglen;
+       }
+
+       /* Check prev */
+       if (prev && prev->vm_end == addr && !vma_policy(prev) &&
+           (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file,
+                                      pgoff, vma->vm_userfaultfd_ctx, NULL) :
+                  can_vma_merge_after(prev, vm_flags, NULL, file, pgoff,
+                                      NULL_VM_UFFD_CTX, NULL))) {
+               merge_start = prev->vm_start;
+               vma = prev;
+               vm_pgoff = prev->vm_pgoff;
+       }
+
+
+       /* Actually expand, if possible */
+       if (vma &&
+           !vma_expand(&mas, vma, merge_start, merge_end, vm_pgoff, next)) {
+               khugepaged_enter_vma(vma, vm_flags);
+               goto expanded;
+       }
+
+       mas.index = addr;
+       mas.last = end - 1;
+cannot_expand:
+       /*
+        * Determine the object being mapped and call the appropriate
+        * specific mapper. the address has already been validated, but
+        * not unmapped, but the maps are removed from the list.
+        */
+       vma = vm_area_alloc(mm);
+       if (!vma) {
+               error = -ENOMEM;
+               goto unacct_error;
+       }
+
+       vma->vm_start = addr;
+       vma->vm_end = end;
+       vma->vm_flags = vm_flags;
+       vma->vm_page_prot = vm_get_page_prot(vm_flags);
+       vma->vm_pgoff = pgoff;
+
+       if (file) {
+               if (vm_flags & VM_SHARED) {
+                       error = mapping_map_writable(file->f_mapping);
+                       if (error)
+                               goto free_vma;
+               }
+
+               vma->vm_file = get_file(file);
+               error = call_mmap(file, vma);
+               if (error)
+                       goto unmap_and_free_vma;
+
+               /* Can addr have changed??
+                *
+                * Answer: Yes, several device drivers can do it in their
+                *         f_op->mmap method. -DaveM
+                */
+               WARN_ON_ONCE(addr != vma->vm_start);
+
+               addr = vma->vm_start;
+               mas_reset(&mas);
+
+               /*
+                * If vm_flags changed after call_mmap(), we should try merge
+                * vma again as we may succeed this time.
+                */
+               if (unlikely(vm_flags != vma->vm_flags && prev)) {
+                       merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
+                               NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
+                       if (merge) {
+                               /*
+                                * ->mmap() can change vma->vm_file and fput
+                                * the original file. So fput the vma->vm_file
+                                * here or we would add an extra fput for file
+                                * and cause general protection fault
+                                * ultimately.
+                                */
+                               fput(vma->vm_file);
+                               vm_area_free(vma);
+                               vma = merge;
+                               /* Update vm_flags to pick up the change. */
+                               addr = vma->vm_start;
+                               vm_flags = vma->vm_flags;
+                               goto unmap_writable;
+                       }
+               }
+
+               vm_flags = vma->vm_flags;
+       } else if (vm_flags & VM_SHARED) {
+               error = shmem_zero_setup(vma);
+               if (error)
+                       goto free_vma;
+       } else {
+               vma_set_anonymous(vma);
+       }
+
+       /* Allow architectures to sanity-check the vm_flags */
+       if (!arch_validate_flags(vma->vm_flags)) {
+               error = -EINVAL;
+               if (file)
+                       goto unmap_and_free_vma;
+               else
+                       goto free_vma;
+       }
+
+       if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
+               error = -ENOMEM;
+               if (file)
+                       goto unmap_and_free_vma;
+               else
+                       goto free_vma;
+       }
+
+       if (vma->vm_file)
+               i_mmap_lock_write(vma->vm_file->f_mapping);
+
+       vma_mas_store(vma, &mas);
+       mm->map_count++;
+       if (vma->vm_file) {
+               if (vma->vm_flags & VM_SHARED)
+                       mapping_allow_writable(vma->vm_file->f_mapping);
+
+               flush_dcache_mmap_lock(vma->vm_file->f_mapping);
+               vma_interval_tree_insert(vma, &vma->vm_file->f_mapping->i_mmap);
+               flush_dcache_mmap_unlock(vma->vm_file->f_mapping);
+               i_mmap_unlock_write(vma->vm_file->f_mapping);
+       }
+
+       /*
+        * vma_merge() calls khugepaged_enter_vma() either, the below
+        * call covers the non-merge case.
+        */
+       khugepaged_enter_vma(vma, vma->vm_flags);
+
+       /* Once vma denies write, undo our temporary denial count */
+unmap_writable:
+       if (file && vm_flags & VM_SHARED)
+               mapping_unmap_writable(file->f_mapping);
+       file = vma->vm_file;
+expanded:
+       perf_event_mmap(vma);
+
+       vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
+       if (vm_flags & VM_LOCKED) {
+               if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
+                                       is_vm_hugetlb_page(vma) ||
+                                       vma == get_gate_vma(current->mm))
+                       vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
+               else
+                       mm->locked_vm += (len >> PAGE_SHIFT);
+       }
+
+       if (file)
+               uprobe_mmap(vma);
+
+       /*
+        * New (or expanded) vma always get soft dirty status.
+        * Otherwise user-space soft-dirty page tracker won't
+        * be able to distinguish situation when vma area unmapped,
+        * then new mapped in-place (which must be aimed as
+        * a completely new data area).
+        */
+       vma->vm_flags |= VM_SOFTDIRTY;
+
+       vma_set_page_prot(vma);
+
+       validate_mm(mm);
+       return addr;
+
+unmap_and_free_vma:
+       fput(vma->vm_file);
+       vma->vm_file = NULL;
+
+       /* Undo any partial mapping done by a device driver. */
+       unmap_region(mm, mas.tree, vma, prev, next, vma->vm_start, vma->vm_end);
+       if (vm_flags & VM_SHARED)
+               mapping_unmap_writable(file->f_mapping);
+free_vma:
+       vm_area_free(vma);
+unacct_error:
+       if (charged)
+               vm_unacct_memory(charged);
+       validate_mm(mm);
+       return error;
 }
 
 static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
@@ -2852,11 +2764,12 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
        int ret;
        struct mm_struct *mm = current->mm;
        LIST_HEAD(uf);
+       MA_STATE(mas, &mm->mm_mt, start, start);
 
        if (mmap_write_lock_killable(mm))
                return -EINTR;
 
-       ret = __do_munmap(mm, start, len, &uf, downgrade);
+       ret = do_mas_munmap(&mas, mm, start, len, &uf, downgrade);
        /*
         * Returning 1 indicates mmap_lock is downgraded.
         * But 1 is not legal return value of vm_munmap() and munmap(), reset
@@ -2922,11 +2835,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
                goto out;
 
        if (start + size > vma->vm_end) {
-               struct vm_area_struct *next;
+               VMA_ITERATOR(vmi, mm, vma->vm_end);
+               struct vm_area_struct *next, *prev = vma;
 
-               for (next = vma->vm_next; next; next = next->vm_next) {
+               for_each_vma_range(vmi, next, start + size) {
                        /* hole between vmas ? */
-                       if (next->vm_start != next->vm_prev->vm_end)
+                       if (next->vm_start != prev->vm_end)
                                goto out;
 
                        if (next->vm_file != vma->vm_file)
@@ -2935,8 +2849,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
                        if (next->vm_flags != vma->vm_flags)
                                goto out;
 
-                       if (start + size <= next->vm_end)
-                               break;
+                       prev = next;
                }
 
                if (!next)
@@ -2966,37 +2879,53 @@ out:
 }
 
 /*
- *  this is really a simplified "do_mmap".  it only handles
- *  anonymous maps.  eventually we may be able to do some
- *  brk-specific accounting here.
+ * brk_munmap() - Unmap a parital vma.
+ * @mas: The maple tree state.
+ * @vma: The vma to be modified
+ * @newbrk: the start of the address to unmap
+ * @oldbrk: The end of the address to unmap
+ * @uf: The userfaultfd list_head
+ *
+ * Returns: 1 on success.
+ * unmaps a partial VMA mapping.  Does not handle alignment, downgrades lock if
+ * possible.
  */
-static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
+static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
+                        unsigned long newbrk, unsigned long oldbrk,
+                        struct list_head *uf)
 {
-       struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma, *prev;
-       struct rb_node **rb_link, *rb_parent;
-       pgoff_t pgoff = addr >> PAGE_SHIFT;
-       int error;
-       unsigned long mapped_addr;
-
-       /* Until we need other flags, refuse anything except VM_EXEC. */
-       if ((flags & (~VM_EXEC)) != 0)
-               return -EINVAL;
-       flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
-
-       mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
-       if (IS_ERR_VALUE(mapped_addr))
-               return mapped_addr;
+       struct mm_struct *mm = vma->vm_mm;
+       int ret;
 
-       error = mlock_future_check(mm, mm->def_flags, len);
-       if (error)
-               return error;
+       arch_unmap(mm, newbrk, oldbrk);
+       ret = do_mas_align_munmap(mas, vma, mm, newbrk, oldbrk, uf, true);
+       validate_mm_mt(mm);
+       return ret;
+}
 
-       /* Clear old maps, set up prev, rb_link, rb_parent, and uf */
-       if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
-               return -ENOMEM;
+/*
+ * do_brk_flags() - Increase the brk vma if the flags match.
+ * @mas: The maple tree state.
+ * @addr: The start address
+ * @len: The length of the increase
+ * @vma: The vma,
+ * @flags: The VMA Flags
+ *
+ * Extend the brk VMA from addr to addr + len.  If the VMA is NULL or the flags
+ * do not match then create a new anonymous VMA.  Eventually we may be able to
+ * do some brk-specific accounting here.
+ */
+static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
+               unsigned long addr, unsigned long len, unsigned long flags)
+{
+       struct mm_struct *mm = current->mm;
 
-       /* Check against address space limits *after* clearing old maps... */
+       validate_mm_mt(mm);
+       /*
+        * Check against address space limits by the changed size
+        * Note: This happens *after* clearing old mappings in some code paths.
+        */
+       flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
        if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
                return -ENOMEM;
 
@@ -3006,28 +2935,49 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
        if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
                return -ENOMEM;
 
-       /* Can we just expand an old private anonymous mapping? */
-       vma = vma_merge(mm, prev, addr, addr + len, flags,
-                       NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
-       if (vma)
-               goto out;
-
        /*
-        * create a vma struct for an anonymous mapping
+        * Expand the existing vma if possible; Note that singular lists do not
+        * occur after forking, so the expand will only happen on new VMAs.
         */
-       vma = vm_area_alloc(mm);
-       if (!vma) {
-               vm_unacct_memory(len >> PAGE_SHIFT);
-               return -ENOMEM;
+       if (vma &&
+           (!vma->anon_vma || list_is_singular(&vma->anon_vma_chain)) &&
+           ((vma->vm_flags & ~VM_SOFTDIRTY) == flags)) {
+               mas->index = vma->vm_start;
+               mas->last = addr + len - 1;
+               vma_adjust_trans_huge(vma, addr, addr + len, 0);
+               if (vma->anon_vma) {
+                       anon_vma_lock_write(vma->anon_vma);
+                       anon_vma_interval_tree_pre_update_vma(vma);
+               }
+               vma->vm_end = addr + len;
+               vma->vm_flags |= VM_SOFTDIRTY;
+               if (mas_store_gfp(mas, vma, GFP_KERNEL))
+                       goto mas_expand_failed;
+
+               if (vma->anon_vma) {
+                       anon_vma_interval_tree_post_update_vma(vma);
+                       anon_vma_unlock_write(vma->anon_vma);
+               }
+               khugepaged_enter_vma(vma, flags);
+               goto out;
        }
 
+       /* create a vma struct for an anonymous mapping */
+       vma = vm_area_alloc(mm);
+       if (!vma)
+               goto vma_alloc_fail;
+
        vma_set_anonymous(vma);
        vma->vm_start = addr;
        vma->vm_end = addr + len;
-       vma->vm_pgoff = pgoff;
+       vma->vm_pgoff = addr >> PAGE_SHIFT;
        vma->vm_flags = flags;
        vma->vm_page_prot = vm_get_page_prot(flags);
-       vma_link(mm, vma, prev, rb_link, rb_parent);
+       mas_set_range(mas, vma->vm_start, addr + len - 1);
+       if (mas_store_gfp(mas, vma, GFP_KERNEL))
+               goto mas_store_fail;
+
+       mm->map_count++;
 out:
        perf_event_mmap(vma);
        mm->total_vm += len >> PAGE_SHIFT;
@@ -3035,16 +2985,32 @@ out:
        if (flags & VM_LOCKED)
                mm->locked_vm += (len >> PAGE_SHIFT);
        vma->vm_flags |= VM_SOFTDIRTY;
+       validate_mm(mm);
        return 0;
+
+mas_store_fail:
+       vm_area_free(vma);
+vma_alloc_fail:
+       vm_unacct_memory(len >> PAGE_SHIFT);
+       return -ENOMEM;
+
+mas_expand_failed:
+       if (vma->anon_vma) {
+               anon_vma_interval_tree_post_update_vma(vma);
+               anon_vma_unlock_write(vma->anon_vma);
+       }
+       return -ENOMEM;
 }
 
 int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
 {
        struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma = NULL;
        unsigned long len;
        int ret;
        bool populate;
        LIST_HEAD(uf);
+       MA_STATE(mas, &mm->mm_mt, addr, addr);
 
        len = PAGE_ALIGN(request);
        if (len < request)
@@ -3055,13 +3021,36 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
        if (mmap_write_lock_killable(mm))
                return -EINTR;
 
-       ret = do_brk_flags(addr, len, flags, &uf);
+       /* Until we need other flags, refuse anything except VM_EXEC. */
+       if ((flags & (~VM_EXEC)) != 0)
+               return -EINVAL;
+
+       ret = check_brk_limits(addr, len);
+       if (ret)
+               goto limits_failed;
+
+       ret = do_mas_munmap(&mas, mm, addr, len, &uf, 0);
+       if (ret)
+               goto munmap_failed;
+
+       vma = mas_prev(&mas, 0);
+       if (!vma || vma->vm_end != addr || vma_policy(vma) ||
+           !can_vma_merge_after(vma, flags, NULL, NULL,
+                                addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL))
+               vma = NULL;
+
+       ret = do_brk_flags(&mas, vma, addr, len, flags);
        populate = ((mm->def_flags & VM_LOCKED) != 0);
        mmap_write_unlock(mm);
        userfaultfd_unmap_complete(mm, &uf);
        if (populate && !ret)
                mm_populate(addr, len);
        return ret;
+
+munmap_failed:
+limits_failed:
+       mmap_write_unlock(mm);
+       return ret;
 }
 EXPORT_SYMBOL(vm_brk_flags);
 
@@ -3077,34 +3066,19 @@ void exit_mmap(struct mm_struct *mm)
        struct mmu_gather tlb;
        struct vm_area_struct *vma;
        unsigned long nr_accounted = 0;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
+       int count = 0;
 
        /* mm's last user has gone, and its about to be pulled down */
        mmu_notifier_release(mm);
 
-       if (unlikely(mm_is_oom_victim(mm))) {
-               /*
-                * Manually reap the mm to free as much memory as possible.
-                * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
-                * this mm from further consideration.  Taking mm->mmap_lock for
-                * write after setting MMF_OOM_SKIP will guarantee that the oom
-                * reaper will not run on this mm again after mmap_lock is
-                * dropped.
-                *
-                * Nothing can be holding mm->mmap_lock here and the above call
-                * to mmu_notifier_release(mm) ensures mmu notifier callbacks in
-                * __oom_reap_task_mm() will not block.
-                */
-               (void)__oom_reap_task_mm(mm);
-               set_bit(MMF_OOM_SKIP, &mm->flags);
-       }
-
-       mmap_write_lock(mm);
+       mmap_read_lock(mm);
        arch_exit_mmap(mm);
 
-       vma = mm->mmap;
+       vma = mas_find(&mas, ULONG_MAX);
        if (!vma) {
                /* Can happen if dup_mmap() received an OOM */
-               mmap_write_unlock(mm);
+               mmap_read_unlock(mm);
                return;
        }
 
@@ -3112,19 +3086,37 @@ void exit_mmap(struct mm_struct *mm)
        flush_cache_mm(mm);
        tlb_gather_mmu_fullmm(&tlb, mm);
        /* update_hiwater_rss(mm) here? but nobody should be looking */
-       /* Use -1 here to ensure all VMAs in the mm are unmapped */
-       unmap_vmas(&tlb, vma, 0, -1);
-       free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
+       /* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
+       unmap_vmas(&tlb, &mm->mm_mt, vma, 0, ULONG_MAX);
+       mmap_read_unlock(mm);
+
+       /*
+        * Set MMF_OOM_SKIP to hide this task from the oom killer/reaper
+        * because the memory has been already freed.
+        */
+       set_bit(MMF_OOM_SKIP, &mm->flags);
+       mmap_write_lock(mm);
+       free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS,
+                     USER_PGTABLES_CEILING);
        tlb_finish_mmu(&tlb);
 
-       /* Walk the list again, actually closing and freeing it. */
-       while (vma) {
+       /*
+        * Walk the list again, actually closing and freeing it, with preemption
+        * enabled, without holding any MM locks besides the unreachable
+        * mmap_write_lock.
+        */
+       do {
                if (vma->vm_flags & VM_ACCOUNT)
                        nr_accounted += vma_pages(vma);
-               vma = remove_vma(vma);
+               remove_vma(vma);
+               count++;
                cond_resched();
-       }
-       mm->mmap = NULL;
+       } while ((vma = mas_find(&mas, ULONG_MAX)) != NULL);
+
+       BUG_ON(count != mm->map_count);
+
+       trace_exit_mmap(mm);
+       __mt_destroy(&mm->mm_mt);
        mmap_write_unlock(mm);
        vm_unacct_memory(nr_accounted);
 }
@@ -3135,14 +3127,14 @@ void exit_mmap(struct mm_struct *mm)
  */
 int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-       struct vm_area_struct *prev;
-       struct rb_node **rb_link, *rb_parent;
+       unsigned long charged = vma_pages(vma);
+
 
-       if (find_vma_links(mm, vma->vm_start, vma->vm_end,
-                          &prev, &rb_link, &rb_parent))
+       if (find_vma_intersection(mm, vma->vm_start, vma->vm_end))
                return -ENOMEM;
+
        if ((vma->vm_flags & VM_ACCOUNT) &&
-            security_vm_enough_memory_mm(mm, vma_pages(vma)))
+            security_vm_enough_memory_mm(mm, charged))
                return -ENOMEM;
 
        /*
@@ -3162,7 +3154,11 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
                vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
        }
 
-       vma_link(mm, vma, prev, rb_link, rb_parent);
+       if (vma_link(mm, vma)) {
+               vm_unacct_memory(charged);
+               return -ENOMEM;
+       }
+
        return 0;
 }
 
@@ -3178,9 +3174,9 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        unsigned long vma_start = vma->vm_start;
        struct mm_struct *mm = vma->vm_mm;
        struct vm_area_struct *new_vma, *prev;
-       struct rb_node **rb_link, *rb_parent;
        bool faulted_in_anon_vma = true;
 
+       validate_mm_mt(mm);
        /*
         * If anonymous vma has not yet been faulted, update new pgoff
         * to match new location, to increase its chance of merging.
@@ -3190,8 +3186,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                faulted_in_anon_vma = false;
        }
 
-       if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
+       new_vma = find_vma_prev(mm, addr, &prev);
+       if (new_vma && new_vma->vm_start < addr + len)
                return NULL;    /* should never get here */
+
        new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
                            vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
                            vma->vm_userfaultfd_ctx, anon_vma_name(vma));
@@ -3232,16 +3230,22 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                        get_file(new_vma->vm_file);
                if (new_vma->vm_ops && new_vma->vm_ops->open)
                        new_vma->vm_ops->open(new_vma);
-               vma_link(mm, new_vma, prev, rb_link, rb_parent);
+               if (vma_link(mm, new_vma))
+                       goto out_vma_link;
                *need_rmap_locks = false;
        }
+       validate_mm_mt(mm);
        return new_vma;
 
+out_vma_link:
+       if (new_vma->vm_ops && new_vma->vm_ops->close)
+               new_vma->vm_ops->close(new_vma);
 out_free_mempol:
        mpol_put(vma_policy(new_vma));
 out_free_vma:
        vm_area_free(new_vma);
 out:
+       validate_mm_mt(mm);
        return NULL;
 }
 
@@ -3378,6 +3382,7 @@ static struct vm_area_struct *__install_special_mapping(
        int ret;
        struct vm_area_struct *vma;
 
+       validate_mm_mt(mm);
        vma = vm_area_alloc(mm);
        if (unlikely(vma == NULL))
                return ERR_PTR(-ENOMEM);
@@ -3400,10 +3405,12 @@ static struct vm_area_struct *__install_special_mapping(
 
        perf_event_mmap(vma);
 
+       validate_mm_mt(mm);
        return vma;
 
 out:
        vm_area_free(vma);
+       validate_mm_mt(mm);
        return ERR_PTR(ret);
 }
 
@@ -3528,12 +3535,13 @@ int mm_take_all_locks(struct mm_struct *mm)
 {
        struct vm_area_struct *vma;
        struct anon_vma_chain *avc;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
        mmap_assert_write_locked(mm);
 
        mutex_lock(&mm_all_locks_mutex);
 
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                if (vma->vm_file && vma->vm_file->f_mapping &&
@@ -3541,7 +3549,8 @@ int mm_take_all_locks(struct mm_struct *mm)
                        vm_lock_mapping(mm, vma->vm_file->f_mapping);
        }
 
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+       mas_set(&mas, 0);
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                if (vma->vm_file && vma->vm_file->f_mapping &&
@@ -3549,7 +3558,8 @@ int mm_take_all_locks(struct mm_struct *mm)
                        vm_lock_mapping(mm, vma->vm_file->f_mapping);
        }
 
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+       mas_set(&mas, 0);
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                if (vma->anon_vma)
@@ -3608,11 +3618,12 @@ void mm_drop_all_locks(struct mm_struct *mm)
 {
        struct vm_area_struct *vma;
        struct anon_vma_chain *avc;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
        mmap_assert_write_locked(mm);
        BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
 
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (vma->anon_vma)
                        list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
                                vm_unlock_anon_vma(avc->anon_vma);