RISC-V: Flush I$ when making a dirty page executable

author Andrew Waterman <andrew@sifive.com>

Wed, 25 Oct 2017 21:30:32 +0000 (14:30 -0700)

committer Palmer Dabbelt <palmer@sifive.com>

Thu, 30 Nov 2017 20:58:25 +0000 (12:58 -0800)
author Andrew Waterman <andrew@sifive.com>
Wed, 25 Oct 2017 21:30:32 +0000 (14:30 -0700)
committer Palmer Dabbelt <palmer@sifive.com>
Thu, 30 Nov 2017 20:58:25 +0000 (12:58 -0800)
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h

index 0595585..5c9ed39 100644 (file)
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -18,21 +18,37 @@
  
  #undef flush_icache_range
  #undef flush_icache_user_range
+#undef flush_dcache_page
  
  static inline void local_flush_icache_all(void)
  {
         asm volatile ("fence.i" ::: "memory");
  }
  
+#define PG_dcache_clean PG_arch_1
+
+static inline void flush_dcache_page(struct page *page)
+{
+       if (test_bit(PG_dcache_clean, &page->flags))
+               clear_bit(PG_dcache_clean, &page->flags);
+}
+
+/*
+ * RISC-V doesn't have an instruction to flush parts of the instruction cache,
+ * so instead we just flush the whole thing.
+ */
+#define flush_icache_range(start, end) flush_icache_all()
+#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all()
+
  #ifndef CONFIG_SMP
  
-#define flush_icache_range(start, end) local_flush_icache_all()
-#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all()
+#define flush_icache_all() local_flush_icache_all()
+#define flush_icache_mm(mm, local) flush_icache_all()
  
  #else /* CONFIG_SMP */
  
-#define flush_icache_range(start, end) sbi_remote_fence_i(0)
-#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0)
+#define flush_icache_all() sbi_remote_fence_i(0)
+void flush_icache_mm(struct mm_struct *mm, bool local);
  
  #endif /* CONFIG_SMP */
  
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h

index 66805cb..5df2dcc 100644 (file)
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -19,6 +19,10 @@
  
  typedef struct {
         void *vdso;
+#ifdef CONFIG_SMP
+       /* A local icache flush is needed before user execution can resume. */
+       cpumask_t icache_stale_mask;
+#endif
  } mm_context_t;
  
  #endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h

index de1fc16..b15b169 100644 (file)
--- a/arch/riscv/include/asm/mmu_context.h
+++ b/arch/riscv/include/asm/mmu_context.h
@@ -1,5 +1,6 @@
  /*
   * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
   *
   *   This program is free software; you can redistribute it and/or
   *   modify it under the terms of the GNU General Public License
@@ -19,6 +20,7 @@
  #include <linux/mm.h>
  #include <linux/sched.h>
  #include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
  
  static inline void enter_lazy_tlb(struct mm_struct *mm,
         struct task_struct *task)
@@ -46,12 +48,54 @@ static inline void set_pgdir(pgd_t *pgd)
         csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE);
  }
  
+/*
+ * When necessary, performs a deferred icache flush for the given MM context,
+ * on the local CPU.  RISC-V has no direct mechanism for instruction cache
+ * shoot downs, so instead we send an IPI that informs the remote harts they
+ * need to flush their local instruction caches.  To avoid pathologically slow
+ * behavior in a common case (a bunch of single-hart processes on a many-hart
+ * machine, ie 'make -j') we avoid the IPIs for harts that are not currently
+ * executing a MM context and instead schedule a deferred local instruction
+ * cache flush to be performed before execution resumes on each hart.  This
+ * actually performs that local instruction cache flush, which implicitly only
+ * refers to the current hart.
+ */
+static inline void flush_icache_deferred(struct mm_struct *mm)
+{
+#ifdef CONFIG_SMP
+       unsigned int cpu = smp_processor_id();
+       cpumask_t *mask = &mm->context.icache_stale_mask;
+
+       if (cpumask_test_cpu(cpu, mask)) {
+               cpumask_clear_cpu(cpu, mask);
+               /*
+                * Ensure the remote hart's writes are visible to this hart.
+                * This pairs with a barrier in flush_icache_mm.
+                */
+               smp_mb();
+               local_flush_icache_all();
+       }
+#endif
+}
+
  static inline void switch_mm(struct mm_struct *prev,
         struct mm_struct *next, struct task_struct *task)
  {
         if (likely(prev != next)) {
+               /*
+                * Mark the current MM context as inactive, and the next as
+                * active.  This is at least used by the icache flushing
+                * routines in order to determine who should
+                */
+               unsigned int cpu = smp_processor_id();
+
+               cpumask_clear_cpu(cpu, mm_cpumask(prev));
+               cpumask_set_cpu(cpu, mm_cpumask(next));
+
                 set_pgdir(next->pgd);
                 local_flush_tlb_all();
+
+               flush_icache_deferred(next);
         }
  }
  
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h

index 3399257..2cbd92e 100644 (file)
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr)
  #define pte_offset_map(dir, addr)      pte_offset_kernel((dir), (addr))
  #define pte_unmap(pte)                 ((void)(pte))
  
-/*
- * Certain architectures need to do special things when PTEs within
- * a page table are directly modified.  Thus, the following hook is
- * made available.
- */
-static inline void set_pte(pte_t *ptep, pte_t pteval)
-{
-       *ptep = pteval;
-}
-
-static inline void set_pte_at(struct mm_struct *mm,
-       unsigned long addr, pte_t *ptep, pte_t pteval)
-{
-       set_pte(ptep, pteval);
-}
-
-static inline void pte_clear(struct mm_struct *mm,
-       unsigned long addr, pte_t *ptep)
-{
-       set_pte_at(mm, addr, ptep, __pte(0));
-}
-
  static inline int pte_present(pte_t pte)
  {
         return (pte_val(pte) & _PAGE_PRESENT);
@@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte)
         return (pte_val(pte) == 0);
  }
  
-/* static inline int pte_read(pte_t pte) */
-
  static inline int pte_write(pte_t pte)
  {
         return pte_val(pte) & _PAGE_WRITE;
  }
  
+static inline int pte_exec(pte_t pte)
+{
+       return pte_val(pte) & _PAGE_EXEC;
+}
+
  static inline int pte_huge(pte_t pte)
  {
         return pte_present(pte)
                 && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
  }
  
-/* static inline int pte_exec(pte_t pte) */
-
  static inline int pte_dirty(pte_t pte)
  {
         return pte_val(pte) & _PAGE_DIRTY;
@@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
         return pte_val(pte_a) == pte_val(pte_b);
  }
  
+/*
+ * Certain architectures need to do special things when PTEs within
+ * a page table are directly modified.  Thus, the following hook is
+ * made available.
+ */
+static inline void set_pte(pte_t *ptep, pte_t pteval)
+{
+       *ptep = pteval;
+}
+
+void flush_icache_pte(pte_t pte);
+
+static inline void set_pte_at(struct mm_struct *mm,
+       unsigned long addr, pte_t *ptep, pte_t pteval)
+{
+       if (pte_present(pteval) && pte_exec(pteval))
+               flush_icache_pte(pteval);
+
+       set_pte(ptep, pteval);
+}
+
+static inline void pte_clear(struct mm_struct *mm,
+       unsigned long addr, pte_t *ptep)
+{
+       set_pte_at(mm, addr, ptep, __pte(0));
+}
+
  #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
  static inline int ptep_set_access_flags(struct vm_area_struct *vma,
                                         unsigned long address, pte_t *ptep,
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h

index 5ee4ae3..77edf28 100644 (file)
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -17,6 +17,8 @@
  
  #ifdef CONFIG_MMU
  
+#include <linux/mm_types.h>
+
  /* Flush entire local TLB */
  static inline void local_flush_tlb_all(void)
  {
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c

index b4a71ec..1b27ade 100644 (file)
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -108,3 +108,51 @@ void smp_send_reschedule(int cpu)
  {
         send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
  }
+
+/*
+ * Performs an icache flush for the given MM context.  RISC-V has no direct
+ * mechanism for instruction cache shoot downs, so instead we send an IPI that
+ * informs the remote harts they need to flush their local instruction caches.
+ * To avoid pathologically slow behavior in a common case (a bunch of
+ * single-hart processes on a many-hart machine, ie 'make -j') we avoid the
+ * IPIs for harts that are not currently executing a MM context and instead
+ * schedule a deferred local instruction cache flush to be performed before
+ * execution resumes on each hart.
+ */
+void flush_icache_mm(struct mm_struct *mm, bool local)
+{
+       unsigned int cpu;
+       cpumask_t others, *mask;
+
+       preempt_disable();
+
+       /* Mark every hart's icache as needing a flush for this MM. */
+       mask = &mm->context.icache_stale_mask;
+       cpumask_setall(mask);
+       /* Flush this hart's I$ now, and mark it as flushed. */
+       cpu = smp_processor_id();
+       cpumask_clear_cpu(cpu, mask);
+       local_flush_icache_all();
+
+       /*
+        * Flush the I$ of other harts concurrently executing, and mark them as
+        * flushed.
+        */
+       cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
+       local |= cpumask_empty(&others);
+       if (mm != current->active_mm || !local)
+               sbi_remote_fence_i(others.bits);
+       else {
+               /*
+                * It's assumed that at least one strongly ordered operation is
+                * performed on this hart between setting a hart's cpumask bit
+                * and scheduling this MM context on that hart.  Sending an SBI
+                * remote message will do this, but in the case where no
+                * messages are sent we still need to order this hart's writes
+                * with flush_icache_deferred().
+                */
+               smp_mb();
+       }
+
+       preempt_enable();
+}
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile

index 81f7d9c..eb22ab4 100644 (file)
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -2,3 +2,4 @@ obj-y += init.o
  obj-y += fault.o
  obj-y += extable.o
  obj-y += ioremap.o
+obj-y += cacheflush.o
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c

new file mode 100644 (file)

index 0000000..498c0a0
--- /dev/null
+++ b/arch/riscv/mm/cacheflush.c
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+
+void flush_icache_pte(pte_t pte)
+{
+       struct page *page = pte_page(pte);
+
+       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+               flush_icache_all();
+}
author	Andrew Waterman <andrew@sifive.com>
	Wed, 25 Oct 2017 21:30:32 +0000 (14:30 -0700)
committer	Palmer Dabbelt <palmer@sifive.com>
	Thu, 30 Nov 2017 20:58:25 +0000 (12:58 -0800)
arch/riscv/include/asm/cacheflush.h		patch \| blob \| history
arch/riscv/include/asm/mmu.h		patch \| blob \| history
arch/riscv/include/asm/mmu_context.h		patch \| blob \| history
arch/riscv/include/asm/pgtable.h		patch \| blob \| history
arch/riscv/include/asm/tlbflush.h		patch \| blob \| history
arch/riscv/kernel/smp.c		patch \| blob \| history
arch/riscv/mm/Makefile		patch \| blob \| history
arch/riscv/mm/cacheflush.c	[new file with mode: 0644]	patch \| blob