Merge tag 'mm-stable-2024-03-13-20-04' of git://git.kernel.org/pub/scm/linux/kernel...

[linux-2.6-microblaze.git] / arch / x86 / mm / tlb.c
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index 4af9309..44ac64f 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -299,7 +299,7 @@ static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, unsigned long lam,
         write_cr3(new_mm_cr3);
  }
  
-void leave_mm(int cpu)
+void leave_mm(void)
  {
         struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
  
@@ -327,7 +327,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
         unsigned long flags;
  
         local_irq_save(flags);
-       switch_mm_irqs_off(prev, next, tsk);
+       switch_mm_irqs_off(NULL, next, tsk);
         local_irq_restore(flags);
  }
  
@@ -492,10 +492,16 @@ void cr4_update_pce(void *ignored)
  static inline void cr4_update_pce_mm(struct mm_struct *mm) { }
  #endif
  
-void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+/*
+ * This optimizes when not actually switching mm's.  Some architectures use the
+ * 'unused' argument for this optimization, but x86 must use
+ * 'cpu_tlbstate.loaded_mm' instead because it does not always keep
+ * 'current->active_mm' up to date.
+ */
+void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
                         struct task_struct *tsk)
  {
-       struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
+       struct mm_struct *prev = this_cpu_read(cpu_tlbstate.loaded_mm);
         u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
         unsigned long new_lam = mm_lam_cr3_mask(next);
         bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
@@ -504,15 +510,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
         bool need_flush;
         u16 new_asid;
  
-       /*
-        * NB: The scheduler will call us with prev == next when switching
-        * from lazy TLB mode to normal mode if active_mm isn't changing.
-        * When this happens, we don't assume that CR3 (and hence
-        * cpu_tlbstate.loaded_mm) matches next.
-        *
-        * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
-        */
-
         /* We don't want flush_tlb_func() to run concurrently with us. */
         if (IS_ENABLED(CONFIG_PROVE_LOCKING))
                 WARN_ON_ONCE(!irqs_disabled());
@@ -527,7 +524,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
          * isn't free.
          */
  #ifdef CONFIG_DEBUG_VM
-       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid,
+       if (WARN_ON_ONCE(__read_cr3() != build_cr3(prev->pgd, prev_asid,
                                                    tlbstate_lam_cr3_mask()))) {
                 /*
                  * If we were to BUG here, we'd be very likely to kill
@@ -559,7 +556,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
          * provides that full memory barrier and core serializing
          * instruction.
          */
-       if (real_prev == next) {
+       if (prev == next) {
                 /* Not actually switching mm's */
                 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
                            next->context.ctx_id);
@@ -574,7 +571,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                  * mm_cpumask. The TLB shootdown code can figure out from
                  * cpu_tlbstate_shared.is_lazy whether or not to send an IPI.
                  */
-               if (WARN_ON_ONCE(real_prev != &init_mm &&
+               if (WARN_ON_ONCE(prev != &init_mm &&
                                  !cpumask_test_cpu(cpu, mm_cpumask(next))))
                         cpumask_set_cpu(cpu, mm_cpumask(next));
  
@@ -616,10 +613,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                  * Skip kernel threads; we never send init_mm TLB flushing IPIs,
                  * but the bitmap manipulation can cause cache line contention.
                  */
-               if (real_prev != &init_mm) {
+               if (prev != &init_mm) {
                         VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
-                                               mm_cpumask(real_prev)));
-                       cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
+                                               mm_cpumask(prev)));
+                       cpumask_clear_cpu(cpu, mm_cpumask(prev));
                 }
  
                 /*
@@ -656,9 +653,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
         this_cpu_write(cpu_tlbstate.loaded_mm, next);
         this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
  
-       if (next != real_prev) {
+       if (next != prev) {
                 cr4_update_pce_mm(next);
-               switch_ldt(real_prev, next);
+               switch_ldt(prev, next);
         }
  }