#include <linux/compiler.h>
 #include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
                else \
                        mm->mmap = NULL; \
                rb_erase(&high_vma->vm_rb, &mm->mm_rb); \
-               mm->mmap_cache = NULL; \
+               vmacache_invalidate(mm); \
                mm->map_count--; \
                remove_vma(high_vma); \
        } \
 
 #include <linux/file.h>
 #include <linux/fdtable.h>
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/swap.h>
 static int exec_mmap(struct mm_struct *mm)
 {
        struct task_struct *tsk;
-       struct mm_struct * old_mm, *active_mm;
+       struct mm_struct *old_mm, *active_mm;
 
        /* Notify parent that we're no longer interested in the old VM */
        tsk = current;
        tsk->mm = mm;
        tsk->active_mm = mm;
        activate_mm(active_mm, mm);
+       tsk->mm->vmacache_seqnum = 0;
+       vmacache_flush(tsk);
        task_unlock(tsk);
        if (old_mm) {
                up_read(&old_mm->mmap_sem);
 
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/hugetlb.h>
 #include <linux/huge_mm.h>
 #include <linux/mount.h>
 
        /*
         * We remember last_addr rather than next_addr to hit with
-        * mmap_cache most of the time. We have zero last_addr at
+        * vmacache most of the time. We have zero last_addr at
         * the beginning and also after lseek. We will have -1 last_addr
         * after the end of the vmas.
         */
 
 
 struct kioctx_table;
 struct mm_struct {
-       struct vm_area_struct * mmap;           /* list of VMAs */
+       struct vm_area_struct *mmap;            /* list of VMAs */
        struct rb_root mm_rb;
-       struct vm_area_struct * mmap_cache;     /* last find_vma result */
+       u32 vmacache_seqnum;                   /* per-thread vmacache */
 #ifdef CONFIG_MMU
        unsigned long (*get_unmapped_area) (struct file *filp,
                                unsigned long addr, unsigned long len,
 
 struct blk_plug;
 struct filename;
 
+#define VMACACHE_BITS 2
+#define VMACACHE_SIZE (1U << VMACACHE_BITS)
+#define VMACACHE_MASK (VMACACHE_SIZE - 1)
+
 /*
  * List of flags we want to share for kernel threads,
  * if only because they are not used by them anyway.
 #ifdef CONFIG_COMPAT_BRK
        unsigned brk_randomized:1;
 #endif
+       /* per-thread vma caching */
+       u32 vmacache_seqnum;
+       struct vm_area_struct *vmacache[VMACACHE_SIZE];
 #if defined(SPLIT_RSS_COUNTING)
        struct task_rss_stat    rss_stat;
 #endif
 
--- /dev/null
+#ifndef __LINUX_VMACACHE_H
+#define __LINUX_VMACACHE_H
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+/*
+ * Hash based on the page number. Provides a good hit rate for
+ * workloads with good locality and those with random accesses as well.
+ */
+#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK)
+
+static inline void vmacache_flush(struct task_struct *tsk)
+{
+       memset(tsk->vmacache, 0, sizeof(tsk->vmacache));
+}
+
+extern void vmacache_flush_all(struct mm_struct *mm);
+extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
+extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
+                                                   unsigned long addr);
+
+#ifndef CONFIG_MMU
+extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
+                                                 unsigned long start,
+                                                 unsigned long end);
+#endif
+
+static inline void vmacache_invalidate(struct mm_struct *mm)
+{
+       mm->vmacache_seqnum++;
+
+       /* deal with overflows */
+       if (unlikely(mm->vmacache_seqnum == 0))
+               vmacache_flush_all(mm);
+}
+
+#endif /* __LINUX_VMACACHE_H */
 
 #include <linux/pid.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/rcupdate.h>
 
 #include <asm/cacheflush.h>
        if (!CACHE_FLUSH_IS_SAFE)
                return;
 
-       if (current->mm && current->mm->mmap_cache) {
-               flush_cache_range(current->mm->mmap_cache,
-                                 addr, addr + BREAK_INSTR_SIZE);
+       if (current->mm) {
+               int i;
+
+               for (i = 0; i < VMACACHE_SIZE; i++) {
+                       if (!current->vmacache[i])
+                               continue;
+                       flush_cache_range(current->vmacache[i],
+                                         addr, addr + BREAK_INSTR_SIZE);
+               }
        }
+
        /* Force flush instruction cache if it was outside the mm */
        flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
 }
 
 #include <linux/mman.h>
 #include <linux/mmu_notifier.h>
 #include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/nsproxy.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
 
        mm->locked_vm = 0;
        mm->mmap = NULL;
-       mm->mmap_cache = NULL;
+       mm->vmacache_seqnum = 0;
        mm->map_count = 0;
        cpumask_clear(mm_cpumask(mm));
        mm->mm_rb = RB_ROOT;
        if (!oldmm)
                return 0;
 
+       /* initialize the new vmacache entries */
+       vmacache_flush(tsk);
+
        if (clone_flags & CLONE_VM) {
                atomic_inc(&oldmm->mm_users);
                mm = oldmm;
 
                           readahead.o swap.o truncate.o vmscan.o shmem.o \
                           util.o mmzone.o vmstat.o backing-dev.o \
                           mm_init.o mmu_context.o percpu.o slab_common.o \
-                          compaction.o balloon_compaction.o \
+                          compaction.o balloon_compaction.o vmacache.o \
                           interval_tree.o list_lru.o workingset.o $(mmu-y)
 
 obj-y += init-mm.o
 
 #include <linux/slab.h>
 #include <linux/backing-dev.h>
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/shm.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
        prev->vm_next = next = vma->vm_next;
        if (next)
                next->vm_prev = prev;
-       if (mm->mmap_cache == vma)
-               mm->mmap_cache = prev;
+
+       /* Kill the cache */
+       vmacache_invalidate(mm);
 }
 
 /*
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
-       struct vm_area_struct *vma = NULL;
+       struct rb_node *rb_node;
+       struct vm_area_struct *vma;
 
        /* Check the cache first. */
-       /* (Cache hit rate is typically around 35%.) */
-       vma = ACCESS_ONCE(mm->mmap_cache);
-       if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
-               struct rb_node *rb_node;
+       vma = vmacache_find(mm, addr);
+       if (likely(vma))
+               return vma;
 
-               rb_node = mm->mm_rb.rb_node;
-               vma = NULL;
+       rb_node = mm->mm_rb.rb_node;
+       vma = NULL;
 
-               while (rb_node) {
-                       struct vm_area_struct *vma_tmp;
-
-                       vma_tmp = rb_entry(rb_node,
-                                          struct vm_area_struct, vm_rb);
-
-                       if (vma_tmp->vm_end > addr) {
-                               vma = vma_tmp;
-                               if (vma_tmp->vm_start <= addr)
-                                       break;
-                               rb_node = rb_node->rb_left;
-                       } else
-                               rb_node = rb_node->rb_right;
-               }
-               if (vma)
-                       mm->mmap_cache = vma;
+       while (rb_node) {
+               struct vm_area_struct *tmp;
+
+               tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+
+               if (tmp->vm_end > addr) {
+                       vma = tmp;
+                       if (tmp->vm_start <= addr)
+                               break;
+                       rb_node = rb_node->rb_left;
+               } else
+                       rb_node = rb_node->rb_right;
        }
+
+       if (vma)
+               vmacache_update(addr, vma);
        return vma;
 }
 
        } else
                mm->highest_vm_end = prev ? prev->vm_end : 0;
        tail_vma->vm_next = NULL;
-       mm->mmap_cache = NULL;          /* Kill the cache. */
+
+       /* Kill the cache */
+       vmacache_invalidate(mm);
 }
 
 /*
 
 
 #include <linux/export.h>
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
 #include <linux/file.h>
  */
 static void delete_vma_from_mm(struct vm_area_struct *vma)
 {
+       int i;
        struct address_space *mapping;
        struct mm_struct *mm = vma->vm_mm;
+       struct task_struct *curr = current;
 
        kenter("%p", vma);
 
        protect_vma(vma, 0);
 
        mm->map_count--;
-       if (mm->mmap_cache == vma)
-               mm->mmap_cache = NULL;
+       for (i = 0; i < VMACACHE_SIZE; i++) {
+               /* if the vma is cached, invalidate the entire cache */
+               if (curr->vmacache[i] == vma) {
+                       vmacache_invalidate(curr->mm);
+                       break;
+               }
+       }
 
        /* remove the VMA from the mapping */
        if (vma->vm_file) {
        struct vm_area_struct *vma;
 
        /* check the cache first */
-       vma = ACCESS_ONCE(mm->mmap_cache);
-       if (vma && vma->vm_start <= addr && vma->vm_end > addr)
+       vma = vmacache_find(mm, addr);
+       if (likely(vma))
                return vma;
 
        /* trawl the list (there may be multiple mappings in which addr
                if (vma->vm_start > addr)
                        return NULL;
                if (vma->vm_end > addr) {
-                       mm->mmap_cache = vma;
+                       vmacache_update(addr, vma);
                        return vma;
                }
        }
        unsigned long end = addr + len;
 
        /* check the cache first */
-       vma = mm->mmap_cache;
-       if (vma && vma->vm_start == addr && vma->vm_end == end)
+       vma = vmacache_find_exact(mm, addr, end);
+       if (vma)
                return vma;
 
        /* trawl the list (there may be multiple mappings in which addr
                if (vma->vm_start > addr)
                        return NULL;
                if (vma->vm_end == end) {
-                       mm->mmap_cache = vma;
+                       vmacache_update(addr, vma);
                        return vma;
                }
        }
 
--- /dev/null
+/*
+ * Copyright (C) 2014 Davidlohr Bueso.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmacache.h>
+
+/*
+ * Flush vma caches for threads that share a given mm.
+ *
+ * The operation is safe because the caller holds the mmap_sem
+ * exclusively and other threads accessing the vma cache will
+ * have mmap_sem held at least for read, so no extra locking
+ * is required to maintain the vma cache.
+ */
+void vmacache_flush_all(struct mm_struct *mm)
+{
+       struct task_struct *g, *p;
+
+       rcu_read_lock();
+       for_each_process_thread(g, p) {
+               /*
+                * Only flush the vmacache pointers as the
+                * mm seqnum is already set and curr's will
+                * be set upon invalidation when the next
+                * lookup is done.
+                */
+               if (mm == p->mm)
+                       vmacache_flush(p);
+       }
+       rcu_read_unlock();
+}
+
+/*
+ * This task may be accessing a foreign mm via (for example)
+ * get_user_pages()->find_vma().  The vmacache is task-local and this
+ * task's vmacache pertains to a different mm (ie, its own).  There is
+ * nothing we can do here.
+ *
+ * Also handle the case where a kernel thread has adopted this mm via use_mm().
+ * That kernel thread's vmacache is not applicable to this mm.
+ */
+static bool vmacache_valid_mm(struct mm_struct *mm)
+{
+       return current->mm == mm && !(current->flags & PF_KTHREAD);
+}
+
+void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
+{
+       if (vmacache_valid_mm(newvma->vm_mm))
+               current->vmacache[VMACACHE_HASH(addr)] = newvma;
+}
+
+static bool vmacache_valid(struct mm_struct *mm)
+{
+       struct task_struct *curr;
+
+       if (!vmacache_valid_mm(mm))
+               return false;
+
+       curr = current;
+       if (mm->vmacache_seqnum != curr->vmacache_seqnum) {
+               /*
+                * First attempt will always be invalid, initialize
+                * the new cache for this task here.
+                */
+               curr->vmacache_seqnum = mm->vmacache_seqnum;
+               vmacache_flush(curr);
+               return false;
+       }
+       return true;
+}
+
+struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
+{
+       int i;
+
+       if (!vmacache_valid(mm))
+               return NULL;
+
+       for (i = 0; i < VMACACHE_SIZE; i++) {
+               struct vm_area_struct *vma = current->vmacache[i];
+
+               if (vma && vma->vm_start <= addr && vma->vm_end > addr) {
+                       BUG_ON(vma->vm_mm != mm);
+                       return vma;
+               }
+       }
+
+       return NULL;
+}
+
+#ifndef CONFIG_MMU
+struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
+                                          unsigned long start,
+                                          unsigned long end)
+{
+       int i;
+
+       if (!vmacache_valid(mm))
+               return NULL;
+
+       for (i = 0; i < VMACACHE_SIZE; i++) {
+               struct vm_area_struct *vma = current->vmacache[i];
+
+               if (vma && vma->vm_start == start && vma->vm_end == end)
+                       return vma;
+       }
+
+       return NULL;
+}
+#endif