mm/rmap: fix potential batched TLB flush race

author Huang Ying <ying.huang@intel.com>

Fri, 14 Jan 2022 22:09:16 +0000 (14:09 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 15 Jan 2022 14:30:31 +0000 (16:30 +0200)
author Huang Ying <ying.huang@intel.com>
Fri, 14 Jan 2022 22:09:16 +0000 (14:09 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Jan 2022 14:30:31 +0000 (16:30 +0200)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 6a89f12..e3b0476 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -647,7 +647,7 @@ struct mm_struct {
                 atomic_t tlb_flush_pending;
  #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
                 /* See flush_tlb_batched_pending() */
-               bool tlb_flush_batched;
+               atomic_t tlb_flush_batched;
  #endif
                 struct uprobes_state uprobes_state;
  #ifdef CONFIG_PREEMPT_RT
diff --git a/mm/rmap.c b/mm/rmap.c

index 163ac4e..6a1e8c7 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -621,9 +621,20 @@ void try_to_unmap_flush_dirty(void)
                 try_to_unmap_flush();
  }
  
+/*
+ * Bits 0-14 of mm->tlb_flush_batched record pending generations.
+ * Bits 16-30 of mm->tlb_flush_batched bit record flushed generations.
+ */
+#define TLB_FLUSH_BATCH_FLUSHED_SHIFT  16
+#define TLB_FLUSH_BATCH_PENDING_MASK                   \
+       ((1 << (TLB_FLUSH_BATCH_FLUSHED_SHIFT - 1)) - 1)
+#define TLB_FLUSH_BATCH_PENDING_LARGE                  \
+       (TLB_FLUSH_BATCH_PENDING_MASK / 2)
+
  static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
  {
         struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+       int batch, nbatch;
  
         arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
         tlb_ubc->flush_required = true;
@@ -633,7 +644,22 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
          * before the PTE is cleared.
          */
         barrier();
-       mm->tlb_flush_batched = true;
+       batch = atomic_read(&mm->tlb_flush_batched);
+retry:
+       if ((batch & TLB_FLUSH_BATCH_PENDING_MASK) > TLB_FLUSH_BATCH_PENDING_LARGE) {
+               /*
+                * Prevent `pending' from catching up with `flushed' because of
+                * overflow.  Reset `pending' and `flushed' to be 1 and 0 if
+                * `pending' becomes large.
+                */
+               nbatch = atomic_cmpxchg(&mm->tlb_flush_batched, batch, 1);
+               if (nbatch != batch) {
+                       batch = nbatch;
+                       goto retry;
+               }
+       } else {
+               atomic_inc(&mm->tlb_flush_batched);
+       }
  
         /*
          * If the PTE was dirty then it's best to assume it's writable. The
@@ -680,15 +706,18 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
   */
  void flush_tlb_batched_pending(struct mm_struct *mm)
  {
-       if (data_race(mm->tlb_flush_batched)) {
-               flush_tlb_mm(mm);
+       int batch = atomic_read(&mm->tlb_flush_batched);
+       int pending = batch & TLB_FLUSH_BATCH_PENDING_MASK;
+       int flushed = batch >> TLB_FLUSH_BATCH_FLUSHED_SHIFT;
  
+       if (pending != flushed) {
+               flush_tlb_mm(mm);
                 /*
-                * Do not allow the compiler to re-order the clearing of
-                * tlb_flush_batched before the tlb is flushed.
+                * If the new TLB flushing is pending during flushing, leave
+                * mm->tlb_flush_batched as is, to avoid losing flushing.
                  */
-               barrier();
-               mm->tlb_flush_batched = false;
+               atomic_cmpxchg(&mm->tlb_flush_batched, batch,
+                              pending | (pending << TLB_FLUSH_BATCH_FLUSHED_SHIFT));
         }
  }
  #else
author	Huang Ying <ying.huang@intel.com>
	Fri, 14 Jan 2022 22:09:16 +0000 (14:09 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 15 Jan 2022 14:30:31 +0000 (16:30 +0200)
include/linux/mm_types.h		patch \| blob \| history
mm/rmap.c		patch \| blob \| history