Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...

[linux-2.6-microblaze.git] / include / asm-generic / tlb.h
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h

index 075b353..480e5b2 100644 (file)
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -19,6 +19,7 @@
  #include <linux/swap.h>
  #include <asm/pgalloc.h>
  #include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
  
  /*
   * Blindly accessing user memory from NMI context can be dangerous
@@ -31,6 +32,127 @@
  
  #ifdef CONFIG_MMU
  
+/*
+ * Generic MMU-gather implementation.
+ *
+ * The mmu_gather data structure is used by the mm code to implement the
+ * correct and efficient ordering of freeing pages and TLB invalidations.
+ *
+ * This correct ordering is:
+ *
+ *  1) unhook page
+ *  2) TLB invalidate page
+ *  3) free page
+ *
+ * That is, we must never free a page before we have ensured there are no live
+ * translations left to it. Otherwise it might be possible to observe (or
+ * worse, change) the page content after it has been reused.
+ *
+ * The mmu_gather API consists of:
+ *
+ *  - tlb_gather_mmu() / tlb_finish_mmu(); start and finish a mmu_gather
+ *
+ *    Finish in particular will issue a (final) TLB invalidate and free
+ *    all (remaining) queued pages.
+ *
+ *  - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA
+ *
+ *    Defaults to flushing at tlb_end_vma() to reset the range; helps when
+ *    there's large holes between the VMAs.
+ *
+ *  - tlb_remove_page() / __tlb_remove_page()
+ *  - tlb_remove_page_size() / __tlb_remove_page_size()
+ *
+ *    __tlb_remove_page_size() is the basic primitive that queues a page for
+ *    freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a
+ *    boolean indicating if the queue is (now) full and a call to
+ *    tlb_flush_mmu() is required.
+ *
+ *    tlb_remove_page() and tlb_remove_page_size() imply the call to
+ *    tlb_flush_mmu() when required and has no return value.
+ *
+ *  - tlb_change_page_size()
+ *
+ *    call before __tlb_remove_page*() to set the current page-size; implies a
+ *    possible tlb_flush_mmu() call.
+ *
+ *  - tlb_flush_mmu() / tlb_flush_mmu_tlbonly()
+ *
+ *    tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets
+ *                              related state, like the range)
+ *
+ *    tlb_flush_mmu() - in addition to the above TLB invalidate, also frees
+ *                     whatever pages are still batched.
+ *
+ *  - mmu_gather::fullmm
+ *
+ *    A flag set by tlb_gather_mmu() to indicate we're going to free
+ *    the entire mm; this allows a number of optimizations.
+ *
+ *    - We can ignore tlb_{start,end}_vma(); because we don't
+ *      care about ranges. Everything will be shot down.
+ *
+ *    - (RISC) architectures that use ASIDs can cycle to a new ASID
+ *      and delay the invalidation until ASID space runs out.
+ *
+ *  - mmu_gather::need_flush_all
+ *
+ *    A flag that can be set by the arch code if it wants to force
+ *    flush the entire TLB irrespective of the range. For instance
+ *    x86-PAE needs this when changing top-level entries.
+ *
+ * And allows the architecture to provide and implement tlb_flush():
+ *
+ * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make
+ * use of:
+ *
+ *  - mmu_gather::start / mmu_gather::end
+ *
+ *    which provides the range that needs to be flushed to cover the pages to
+ *    be freed.
+ *
+ *  - mmu_gather::freed_tables
+ *
+ *    set when we freed page table pages
+ *
+ *  - tlb_get_unmap_shift() / tlb_get_unmap_size()
+ *
+ *    returns the smallest TLB entry size unmapped in this range.
+ *
+ * If an architecture does not provide tlb_flush() a default implementation
+ * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is
+ * specified, in which case we'll default to flush_tlb_mm().
+ *
+ * Additionally there are a few opt-in features:
+ *
+ *  HAVE_MMU_GATHER_PAGE_SIZE
+ *
+ *  This ensures we call tlb_flush() every time tlb_change_page_size() actually
+ *  changes the size and provides mmu_gather::page_size to tlb_flush().
+ *
+ *  HAVE_RCU_TABLE_FREE
+ *
+ *  This provides tlb_remove_table(), to be used instead of tlb_remove_page()
+ *  for page directores (__p*_free_tlb()). This provides separate freeing of
+ *  the page-table pages themselves in a semi-RCU fashion (see comment below).
+ *  Useful if your architecture doesn't use IPIs for remote TLB invalidates
+ *  and therefore doesn't naturally serialize with software page-table walkers.
+ *
+ *  When used, an architecture is expected to provide __tlb_remove_table()
+ *  which does the actual freeing of these pages.
+ *
+ *  HAVE_RCU_TABLE_NO_INVALIDATE
+ *
+ *  This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before
+ *  freeing the page-table pages. This can be avoided if you use
+ *  HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux
+ *  page-tables natively.
+ *
+ *  MMU_GATHER_NO_RANGE
+ *
+ *  Use this if your architecture lacks an efficient flush_tlb_range().
+ */
+
  #ifdef CONFIG_HAVE_RCU_TABLE_FREE
  /*
   * Semi RCU freeing of the page directories.
@@ -69,11 +191,11 @@ struct mmu_table_batch {
  #define MAX_TABLE_BATCH                \
         ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
  
-extern void tlb_table_flush(struct mmu_gather *tlb);
  extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
  
  #endif
  
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
  /*
   * If we can't allocate a page to make a big batch of page pointers
   * to work on, then just handle a few from the on-stack structure.
@@ -98,14 +220,21 @@ struct mmu_gather_batch {
   */
  #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH)
  
-/* struct mmu_gather is an opaque type used by the mm code for passing around
+extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
+                                  int page_size);
+#endif
+
+/*
+ * struct mmu_gather is an opaque type used by the mm code for passing around
   * any data needed by arch specific code for tlb_remove_page.
   */
  struct mmu_gather {
         struct mm_struct        *mm;
+
  #ifdef CONFIG_HAVE_RCU_TABLE_FREE
         struct mmu_table_batch  *batch;
  #endif
+
         unsigned long           start;
         unsigned long           end;
         /*
@@ -133,23 +262,30 @@ struct mmu_gather {
         unsigned int            cleared_puds : 1;
         unsigned int            cleared_p4ds : 1;
  
+       /*
+        * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma
+        */
+       unsigned int            vma_exec : 1;
+       unsigned int            vma_huge : 1;
+
+       unsigned int            batch_count;
+
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
         struct mmu_gather_batch *active;
         struct mmu_gather_batch local;
         struct page             *__pages[MMU_GATHER_BUNDLE];
-       unsigned int            batch_count;
-       int page_size;
-};
  
-#define HAVE_GENERIC_MMU_GATHER
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
+       unsigned int page_size;
+#endif
+#endif
+};
  
  void arch_tlb_gather_mmu(struct mmu_gather *tlb,
         struct mm_struct *mm, unsigned long start, unsigned long end);
  void tlb_flush_mmu(struct mmu_gather *tlb);
  void arch_tlb_finish_mmu(struct mmu_gather *tlb,
                          unsigned long start, unsigned long end, bool force);
-void tlb_flush_mmu_free(struct mmu_gather *tlb);
-extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
-                                  int page_size);
  
  static inline void __tlb_adjust_range(struct mmu_gather *tlb,
                                       unsigned long address,
@@ -172,8 +308,94 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
         tlb->cleared_pmds = 0;
         tlb->cleared_puds = 0;
         tlb->cleared_p4ds = 0;
+       /*
+        * Do not reset mmu_gather::vma_* fields here, we do not
+        * call into tlb_start_vma() again to set them if there is an
+        * intermediate flush.
+        */
+}
+
+#ifdef CONFIG_MMU_GATHER_NO_RANGE
+
+#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma)
+#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma()
+#endif
+
+/*
+ * When an architecture does not have efficient means of range flushing TLBs
+ * there is no point in doing intermediate flushes on tlb_end_vma() to keep the
+ * range small. We equally don't have to worry about page granularity or other
+ * things.
+ *
+ * All we need to do is issue a full flush for any !0 range.
+ */
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+       if (tlb->end)
+               flush_tlb_mm(tlb->mm);
+}
+
+static inline void
+tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
+
+#define tlb_end_vma tlb_end_vma
+static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
+
+#else /* CONFIG_MMU_GATHER_NO_RANGE */
+
+#ifndef tlb_flush
+
+#if defined(tlb_start_vma) || defined(tlb_end_vma)
+#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma()
+#endif
+
+/*
+ * When an architecture does not provide its own tlb_flush() implementation
+ * but does have a reasonably efficient flush_vma_range() implementation
+ * use that.
+ */
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+       if (tlb->fullmm || tlb->need_flush_all) {
+               flush_tlb_mm(tlb->mm);
+       } else if (tlb->end) {
+               struct vm_area_struct vma = {
+                       .vm_mm = tlb->mm,
+                       .vm_flags = (tlb->vma_exec ? VM_EXEC    : 0) |
+                                   (tlb->vma_huge ? VM_HUGETLB : 0),
+               };
+
+               flush_tlb_range(&vma, tlb->start, tlb->end);
+       }
  }
  
+static inline void
+tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+       /*
+        * flush_tlb_range() implementations that look at VM_HUGETLB (tile,
+        * mips-4k) flush only large pages.
+        *
+        * flush_tlb_range() implementations that flush I-TLB also flush D-TLB
+        * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing
+        * range.
+        *
+        * We rely on tlb_end_vma() to issue a flush, such that when we reset
+        * these values the batch is empty.
+        */
+       tlb->vma_huge = !!(vma->vm_flags & VM_HUGETLB);
+       tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
+}
+
+#else
+
+static inline void
+tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
+
+#endif
+
+#endif /* CONFIG_MMU_GATHER_NO_RANGE */
+
  static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
  {
         if (!tlb->end)
@@ -205,21 +427,18 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
         return tlb_remove_page_size(tlb, page, PAGE_SIZE);
  }
  
-#ifndef tlb_remove_check_page_size_change
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
+static inline void tlb_change_page_size(struct mmu_gather *tlb,
                                                      unsigned int page_size)
  {
-       /*
-        * We don't care about page size change, just update
-        * mmu_gather page size here so that debug checks
-        * doesn't throw false warning.
-        */
-#ifdef CONFIG_DEBUG_VM
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
+       if (tlb->page_size && tlb->page_size != page_size) {
+               if (!tlb->fullmm)
+                       tlb_flush_mmu(tlb);
+       }
+
         tlb->page_size = page_size;
  #endif
  }
-#endif
  
  static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb)
  {
@@ -246,17 +465,30 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
   * the vmas are adjusted to only cover the region to be torn down.
   */
  #ifndef tlb_start_vma
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#endif
+static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+       if (tlb->fullmm)
+               return;
  
-#define __tlb_end_vma(tlb, vma)                                        \
-       do {                                                    \
-               if (!tlb->fullmm)                               \
-                       tlb_flush_mmu_tlbonly(tlb);             \
-       } while (0)
+       tlb_update_vma_flags(tlb, vma);
+       flush_cache_range(vma, vma->vm_start, vma->vm_end);
+}
+#endif
  
  #ifndef tlb_end_vma
-#define tlb_end_vma    __tlb_end_vma
+static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+       if (tlb->fullmm)
+               return;
+
+       /*
+        * Do a TLB flush and reset the range at VMA boundaries; this avoids
+        * the ranges growing with the unused space between consecutive VMAs,
+        * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on
+        * this.
+        */
+       tlb_flush_mmu_tlbonly(tlb);
+}
  #endif
  
  #ifndef __tlb_remove_tlb_entry
@@ -381,6 +613,4 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
  
  #endif /* CONFIG_MMU */
  
-#define tlb_migrate_finish(mm) do {} while (0)
-
  #endif /* _ASM_GENERIC__TLB_H */