mm: per-thread vma caching

author Davidlohr Bueso <davidlohr@hp.com>

Mon, 7 Apr 2014 22:37:25 +0000 (15:37 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 7 Apr 2014 23:35:53 +0000 (16:35 -0700)
author Davidlohr Bueso <davidlohr@hp.com>
Mon, 7 Apr 2014 22:37:25 +0000 (15:37 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 7 Apr 2014 23:35:53 +0000 (16:35 -0700)
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h

index fb5e4c6..ef470a7 100644 (file)
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -14,6 +14,8 @@
  
  #include <linux/compiler.h>
  #include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmacache.h>
  #include <linux/io.h>
  
  #include <asm/cacheflush.h>
@@ -73,7 +75,7 @@ do { \
                 else \
                         mm->mmap = NULL; \
                 rb_erase(&high_vma->vm_rb, &mm->mm_rb); \
-               mm->mmap_cache = NULL; \
+               vmacache_invalidate(mm); \
                 mm->map_count--; \
                 remove_vma(high_vma); \
         } \
diff --git a/fs/exec.c b/fs/exec.c

index 25dfeba..b60ccf9 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -26,6 +26,7 @@
  #include <linux/file.h>
  #include <linux/fdtable.h>
  #include <linux/mm.h>
+#include <linux/vmacache.h>
  #include <linux/stat.h>
  #include <linux/fcntl.h>
  #include <linux/swap.h>
@@ -822,7 +823,7 @@ EXPORT_SYMBOL(read_code);
  static int exec_mmap(struct mm_struct *mm)
  {
         struct task_struct *tsk;
-       struct mm_struct * old_mm, *active_mm;
+       struct mm_struct *old_mm, *active_mm;
  
         /* Notify parent that we're no longer interested in the old VM */
         tsk = current;
@@ -848,6 +849,8 @@ static int exec_mmap(struct mm_struct *mm)
         tsk->mm = mm;
         tsk->active_mm = mm;
         activate_mm(active_mm, mm);
+       tsk->mm->vmacache_seqnum = 0;
+       vmacache_flush(tsk);
         task_unlock(tsk);
         if (old_mm) {
                 up_read(&old_mm->mmap_sem);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c

index fb52b54..442177b 100644 (file)
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,4 +1,5 @@
  #include <linux/mm.h>
+#include <linux/vmacache.h>
  #include <linux/hugetlb.h>
  #include <linux/huge_mm.h>
  #include <linux/mount.h>
@@ -152,7 +153,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
  
         /*
          * We remember last_addr rather than next_addr to hit with
-        * mmap_cache most of the time. We have zero last_addr at
+        * vmacache most of the time. We have zero last_addr at
          * the beginning and also after lseek. We will have -1 last_addr
          * after the end of the vmas.
          */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 290901a..2b58d19 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -342,9 +342,9 @@ struct mm_rss_stat {
  
  struct kioctx_table;
  struct mm_struct {
-       struct vm_area_struct * mmap;           /* list of VMAs */
+       struct vm_area_struct *mmap;            /* list of VMAs */
         struct rb_root mm_rb;
-       struct vm_area_struct * mmap_cache;     /* last find_vma result */
+       u32 vmacache_seqnum;                   /* per-thread vmacache */
  #ifdef CONFIG_MMU
         unsigned long (*get_unmapped_area) (struct file *filp,
                                 unsigned long addr, unsigned long len,
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 7cb07fd..642477d 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -132,6 +132,10 @@ struct perf_event_context;
  struct blk_plug;
  struct filename;
  
+#define VMACACHE_BITS 2
+#define VMACACHE_SIZE (1U << VMACACHE_BITS)
+#define VMACACHE_MASK (VMACACHE_SIZE - 1)
+
  /*
   * List of flags we want to share for kernel threads,
   * if only because they are not used by them anyway.
@@ -1235,6 +1239,9 @@ struct task_struct {
  #ifdef CONFIG_COMPAT_BRK
         unsigned brk_randomized:1;
  #endif
+       /* per-thread vma caching */
+       u32 vmacache_seqnum;
+       struct vm_area_struct *vmacache[VMACACHE_SIZE];
  #if defined(SPLIT_RSS_COUNTING)
         struct task_rss_stat    rss_stat;
  #endif
diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h

new file mode 100644 (file)

index 0000000..c3fa0fd
--- /dev/null
+++ b/include/linux/vmacache.h
@@ -0,0 +1,38 @@
+#ifndef __LINUX_VMACACHE_H
+#define __LINUX_VMACACHE_H
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+/*
+ * Hash based on the page number. Provides a good hit rate for
+ * workloads with good locality and those with random accesses as well.
+ */
+#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK)
+
+static inline void vmacache_flush(struct task_struct *tsk)
+{
+       memset(tsk->vmacache, 0, sizeof(tsk->vmacache));
+}
+
+extern void vmacache_flush_all(struct mm_struct *mm);
+extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
+extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
+                                                   unsigned long addr);
+
+#ifndef CONFIG_MMU
+extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
+                                                 unsigned long start,
+                                                 unsigned long end);
+#endif
+
+static inline void vmacache_invalidate(struct mm_struct *mm)
+{
+       mm->vmacache_seqnum++;
+
+       /* deal with overflows */
+       if (unlikely(mm->vmacache_seqnum == 0))
+               vmacache_flush_all(mm);
+}
+
+#endif /* __LINUX_VMACACHE_H */
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c

index 99982a7..2956c8d 100644 (file)
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -49,6 +49,7 @@
  #include <linux/pid.h>
  #include <linux/smp.h>
  #include <linux/mm.h>
+#include <linux/vmacache.h>
  #include <linux/rcupdate.h>
  
  #include <asm/cacheflush.h>
@@ -224,10 +225,17 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
         if (!CACHE_FLUSH_IS_SAFE)
                 return;
  
-       if (current->mm && current->mm->mmap_cache) {
-               flush_cache_range(current->mm->mmap_cache,
-                                 addr, addr + BREAK_INSTR_SIZE);
+       if (current->mm) {
+               int i;
+
+               for (i = 0; i < VMACACHE_SIZE; i++) {
+                       if (!current->vmacache[i])
+                               continue;
+                       flush_cache_range(current->vmacache[i],
+                                         addr, addr + BREAK_INSTR_SIZE);
+               }
         }
+
         /* Force flush instruction cache if it was outside the mm */
         flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
  }
diff --git a/kernel/fork.c b/kernel/fork.c

index e40c0a0..bc0e96b 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -28,6 +28,8 @@
  #include <linux/mman.h>
  #include <linux/mmu_notifier.h>
  #include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/vmacache.h>
  #include <linux/nsproxy.h>
  #include <linux/capability.h>
  #include <linux/cpu.h>
@@ -364,7 +366,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
  
         mm->locked_vm = 0;
         mm->mmap = NULL;
-       mm->mmap_cache = NULL;
+       mm->vmacache_seqnum = 0;
         mm->map_count = 0;
         cpumask_clear(mm_cpumask(mm));
         mm->mm_rb = RB_ROOT;
@@ -882,6 +884,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
         if (!oldmm)
                 return 0;
  
+       /* initialize the new vmacache entries */
+       vmacache_flush(tsk);
+
         if (clone_flags & CLONE_VM) {
                 atomic_inc(&oldmm->mm_users);
                 mm = oldmm;
diff --git a/mm/Makefile b/mm/Makefile

index cdd7415..23a6f7e 100644 (file)
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -16,7 +16,7 @@ obj-y                 := filemap.o mempool.o oom_kill.o fadvise.o \
                            readahead.o swap.o truncate.o vmscan.o shmem.o \
                            util.o mmzone.o vmstat.o backing-dev.o \
                            mm_init.o mmu_context.o percpu.o slab_common.o \
-                          compaction.o balloon_compaction.o \
+                          compaction.o balloon_compaction.o vmacache.o \
                            interval_tree.o list_lru.o workingset.o $(mmu-y)
  
  obj-y += init-mm.o
diff --git a/mm/mmap.c b/mm/mmap.c

index 46433e1..b1202cf 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -10,6 +10,7 @@
  #include <linux/slab.h>
  #include <linux/backing-dev.h>
  #include <linux/mm.h>
+#include <linux/vmacache.h>
  #include <linux/shm.h>
  #include <linux/mman.h>
  #include <linux/pagemap.h>
@@ -681,8 +682,9 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
         prev->vm_next = next = vma->vm_next;
         if (next)
                 next->vm_prev = prev;
-       if (mm->mmap_cache == vma)
-               mm->mmap_cache = prev;
+
+       /* Kill the cache */
+       vmacache_invalidate(mm);
  }
  
  /*
@@ -1989,34 +1991,33 @@ EXPORT_SYMBOL(get_unmapped_area);
  /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
  struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
  {
-       struct vm_area_struct *vma = NULL;
+       struct rb_node *rb_node;
+       struct vm_area_struct *vma;
  
         /* Check the cache first. */
-       /* (Cache hit rate is typically around 35%.) */
-       vma = ACCESS_ONCE(mm->mmap_cache);
-       if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
-               struct rb_node *rb_node;
+       vma = vmacache_find(mm, addr);
+       if (likely(vma))
+               return vma;
  
-               rb_node = mm->mm_rb.rb_node;
-               vma = NULL;
+       rb_node = mm->mm_rb.rb_node;
+       vma = NULL;
  
-               while (rb_node) {
-                       struct vm_area_struct *vma_tmp;
-
-                       vma_tmp = rb_entry(rb_node,
-                                          struct vm_area_struct, vm_rb);
-
-                       if (vma_tmp->vm_end > addr) {
-                               vma = vma_tmp;
-                               if (vma_tmp->vm_start <= addr)
-                                       break;
-                               rb_node = rb_node->rb_left;
-                       } else
-                               rb_node = rb_node->rb_right;
-               }
-               if (vma)
-                       mm->mmap_cache = vma;
+       while (rb_node) {
+               struct vm_area_struct *tmp;
+
+               tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+
+               if (tmp->vm_end > addr) {
+                       vma = tmp;
+                       if (tmp->vm_start <= addr)
+                               break;
+                       rb_node = rb_node->rb_left;
+               } else
+                       rb_node = rb_node->rb_right;
         }
+
+       if (vma)
+               vmacache_update(addr, vma);
         return vma;
  }
  
@@ -2388,7 +2389,9 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
         } else
                 mm->highest_vm_end = prev ? prev->vm_end : 0;
         tail_vma->vm_next = NULL;
-       mm->mmap_cache = NULL;          /* Kill the cache. */
+
+       /* Kill the cache */
+       vmacache_invalidate(mm);
  }
  
  /*
diff --git a/mm/nommu.c b/mm/nommu.c

index e194825..5d3f352 100644 (file)
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -15,6 +15,7 @@
  
  #include <linux/export.h>
  #include <linux/mm.h>
+#include <linux/vmacache.h>
  #include <linux/mman.h>
  #include <linux/swap.h>
  #include <linux/file.h>
@@ -768,16 +769,23 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
   */
  static void delete_vma_from_mm(struct vm_area_struct *vma)
  {
+       int i;
         struct address_space *mapping;
         struct mm_struct *mm = vma->vm_mm;
+       struct task_struct *curr = current;
  
         kenter("%p", vma);
  
         protect_vma(vma, 0);
  
         mm->map_count--;
-       if (mm->mmap_cache == vma)
-               mm->mmap_cache = NULL;
+       for (i = 0; i < VMACACHE_SIZE; i++) {
+               /* if the vma is cached, invalidate the entire cache */
+               if (curr->vmacache[i] == vma) {
+                       vmacache_invalidate(curr->mm);
+                       break;
+               }
+       }
  
         /* remove the VMA from the mapping */
         if (vma->vm_file) {
@@ -825,8 +833,8 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
         struct vm_area_struct *vma;
  
         /* check the cache first */
-       vma = ACCESS_ONCE(mm->mmap_cache);
-       if (vma && vma->vm_start <= addr && vma->vm_end > addr)
+       vma = vmacache_find(mm, addr);
+       if (likely(vma))
                 return vma;
  
         /* trawl the list (there may be multiple mappings in which addr
@@ -835,7 +843,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
                 if (vma->vm_start > addr)
                         return NULL;
                 if (vma->vm_end > addr) {
-                       mm->mmap_cache = vma;
+                       vmacache_update(addr, vma);
                         return vma;
                 }
         }
@@ -874,8 +882,8 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
         unsigned long end = addr + len;
  
         /* check the cache first */
-       vma = mm->mmap_cache;
-       if (vma && vma->vm_start == addr && vma->vm_end == end)
+       vma = vmacache_find_exact(mm, addr, end);
+       if (vma)
                 return vma;
  
         /* trawl the list (there may be multiple mappings in which addr
@@ -886,7 +894,7 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
                 if (vma->vm_start > addr)
                         return NULL;
                 if (vma->vm_end == end) {
-                       mm->mmap_cache = vma;
+                       vmacache_update(addr, vma);
                         return vma;
                 }
         }
diff --git a/mm/vmacache.c b/mm/vmacache.c

new file mode 100644 (file)

index 0000000..d4224b3
--- /dev/null
+++ b/mm/vmacache.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2014 Davidlohr Bueso.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmacache.h>
+
+/*
+ * Flush vma caches for threads that share a given mm.
+ *
+ * The operation is safe because the caller holds the mmap_sem
+ * exclusively and other threads accessing the vma cache will
+ * have mmap_sem held at least for read, so no extra locking
+ * is required to maintain the vma cache.
+ */
+void vmacache_flush_all(struct mm_struct *mm)
+{
+       struct task_struct *g, *p;
+
+       rcu_read_lock();
+       for_each_process_thread(g, p) {
+               /*
+                * Only flush the vmacache pointers as the
+                * mm seqnum is already set and curr's will
+                * be set upon invalidation when the next
+                * lookup is done.
+                */
+               if (mm == p->mm)
+                       vmacache_flush(p);
+       }
+       rcu_read_unlock();
+}
+
+/*
+ * This task may be accessing a foreign mm via (for example)
+ * get_user_pages()->find_vma().  The vmacache is task-local and this
+ * task's vmacache pertains to a different mm (ie, its own).  There is
+ * nothing we can do here.
+ *
+ * Also handle the case where a kernel thread has adopted this mm via use_mm().
+ * That kernel thread's vmacache is not applicable to this mm.
+ */
+static bool vmacache_valid_mm(struct mm_struct *mm)
+{
+       return current->mm == mm && !(current->flags & PF_KTHREAD);
+}
+
+void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
+{
+       if (vmacache_valid_mm(newvma->vm_mm))
+               current->vmacache[VMACACHE_HASH(addr)] = newvma;
+}
+
+static bool vmacache_valid(struct mm_struct *mm)
+{
+       struct task_struct *curr;
+
+       if (!vmacache_valid_mm(mm))
+               return false;
+
+       curr = current;
+       if (mm->vmacache_seqnum != curr->vmacache_seqnum) {
+               /*
+                * First attempt will always be invalid, initialize
+                * the new cache for this task here.
+                */
+               curr->vmacache_seqnum = mm->vmacache_seqnum;
+               vmacache_flush(curr);
+               return false;
+       }
+       return true;
+}
+
+struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
+{
+       int i;
+
+       if (!vmacache_valid(mm))
+               return NULL;
+
+       for (i = 0; i < VMACACHE_SIZE; i++) {
+               struct vm_area_struct *vma = current->vmacache[i];
+
+               if (vma && vma->vm_start <= addr && vma->vm_end > addr) {
+                       BUG_ON(vma->vm_mm != mm);
+                       return vma;
+               }
+       }
+
+       return NULL;
+}
+
+#ifndef CONFIG_MMU
+struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
+                                          unsigned long start,
+                                          unsigned long end)
+{
+       int i;
+
+       if (!vmacache_valid(mm))
+               return NULL;
+
+       for (i = 0; i < VMACACHE_SIZE; i++) {
+               struct vm_area_struct *vma = current->vmacache[i];
+
+               if (vma && vma->vm_start == start && vma->vm_end == end)
+                       return vma;
+       }
+
+       return NULL;
+}
+#endif
author	Davidlohr Bueso <davidlohr@hp.com>
	Mon, 7 Apr 2014 22:37:25 +0000 (15:37 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 7 Apr 2014 23:35:53 +0000 (16:35 -0700)
arch/unicore32/include/asm/mmu_context.h		patch \| blob \| history
fs/exec.c		patch \| blob \| history
fs/proc/task_mmu.c		patch \| blob \| history
include/linux/mm_types.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
include/linux/vmacache.h	[new file with mode: 0644]	patch \| blob
kernel/debug/debug_core.c		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
mm/Makefile		patch \| blob \| history
mm/mmap.c		patch \| blob \| history
mm/nommu.c		patch \| blob \| history
mm/vmacache.c	[new file with mode: 0644]	patch \| blob