x86/mm/tlb: Add freed_tables element to flush_tlb_info

[linux-2.6-microblaze.git] / arch / x86 / include / asm / tlbflush.h
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h

index 29c9da6..323a313 100644 (file)
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -148,22 +148,6 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
  #define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
  #endif
  
-static inline bool tlb_defer_switch_to_init_mm(void)
-{
-       /*
-        * If we have PCID, then switching to init_mm is reasonably
-        * fast.  If we don't have PCID, then switching to init_mm is
-        * quite slow, so we try to defer it in the hopes that we can
-        * avoid it entirely.  The latter approach runs the risk of
-        * receiving otherwise unnecessary IPIs.
-        *
-        * This choice is just a heuristic.  The tlb code can handle this
-        * function returning true or false regardless of whether we have
-        * PCID.
-        */
-       return !static_cpu_has(X86_FEATURE_PCID);
-}
-
  struct tlb_context {
         u64 ctx_id;
         u64 tlb_gen;
@@ -175,8 +159,16 @@ struct tlb_state {
          * are on.  This means that it may not match current->active_mm,
          * which will contain the previous user mm when we're in lazy TLB
          * mode even if we've already switched back to swapper_pg_dir.
+        *
+        * During switch_mm_irqs_off(), loaded_mm will be set to
+        * LOADED_MM_SWITCHING during the brief interrupts-off window
+        * when CR3 and loaded_mm would otherwise be inconsistent.  This
+        * is for nmi_uaccess_okay()'s benefit.
          */
         struct mm_struct *loaded_mm;
+
+#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+
         u16 loaded_mm_asid;
         u16 next_asid;
         /* last user mm's ctx id */
@@ -246,6 +238,38 @@ struct tlb_state {
  };
  DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
  
+/*
+ * Blindly accessing user memory from NMI context can be dangerous
+ * if we're in the middle of switching the current user task or
+ * switching the loaded mm.  It can also be dangerous if we
+ * interrupted some kernel code that was temporarily using a
+ * different mm.
+ */
+static inline bool nmi_uaccess_okay(void)
+{
+       struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+       struct mm_struct *current_mm = current->mm;
+
+       VM_WARN_ON_ONCE(!loaded_mm);
+
+       /*
+        * The condition we want to check is
+        * current_mm->pgd == __va(read_cr3_pa()).  This may be slow, though,
+        * if we're running in a VM with shadow paging, and nmi_uaccess_okay()
+        * is supposed to be reasonably fast.
+        *
+        * Instead, we check the almost equivalent but somewhat conservative
+        * condition below, and we rely on the fact that switch_mm_irqs_off()
+        * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3.
+        */
+       if (loaded_mm != current_mm)
+               return false;
+
+       VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
+
+       return true;
+}
+
  /* Initialize cr4 shadow for this CPU. */
  static inline void cr4_init_shadow(void)
  {
@@ -507,23 +531,30 @@ struct flush_tlb_info {
         unsigned long           start;
         unsigned long           end;
         u64                     new_tlb_gen;
+       unsigned int            stride_shift;
+       bool                    freed_tables;
  };
  
  #define local_flush_tlb() __flush_tlb()
  
-#define flush_tlb_mm(mm)       flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
+#define flush_tlb_mm(mm)                                               \
+               flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true)
  
-#define flush_tlb_range(vma, start, end)       \
-               flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
+#define flush_tlb_range(vma, start, end)                               \
+       flush_tlb_mm_range((vma)->vm_mm, start, end,                    \
+                          ((vma)->vm_flags & VM_HUGETLB)               \
+                               ? huge_page_shift(hstate_vma(vma))      \
+                               : PAGE_SHIFT, false)
  
  extern void flush_tlb_all(void);
  extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
-                               unsigned long end, unsigned long vmflag);
+                               unsigned long end, unsigned int stride_shift,
+                               bool freed_tables);
  extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
  
  static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
  {
-       flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
+       flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
  }
  
  void native_flush_tlb_others(const struct cpumask *cpumask,