Merge tag 'x86_mm_for_6.2_v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 17 Dec 2022 20:06:53 +0000 (14:06 -0600)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 17 Dec 2022 20:06:53 +0000 (14:06 -0600)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 17 Dec 2022 20:06:53 +0000 (14:06 -0600)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 17 Dec 2022 20:06:53 +0000 (14:06 -0600)
diff --git a/arch/arm/mach-omap1/sram-init.c b/arch/arm/mach-omap1/sram-init.c

index 27c42e2..dabf0c4 100644 (file)
--- a/arch/arm/mach-omap1/sram-init.c
+++ b/arch/arm/mach-omap1/sram-init.c
@@ -10,11 +10,11 @@
  #include <linux/kernel.h>
  #include <linux/init.h>
  #include <linux/io.h>
+#include <linux/set_memory.h>
  
  #include <asm/fncpy.h>
  #include <asm/tlb.h>
  #include <asm/cacheflush.h>
-#include <asm/set_memory.h>
  
  #include <asm/mach/map.h>
  
@@ -74,8 +74,7 @@ void *omap_sram_push(void *funcp, unsigned long size)
  
         dst = fncpy(sram, funcp, size);
  
-       set_memory_ro(base, pages);
-       set_memory_x(base, pages);
+       set_memory_rox(base, pages);
  
         return dst;
  }
@@ -126,8 +125,7 @@ static void __init omap_detect_and_map_sram(void)
         base = (unsigned long)omap_sram_base;
         pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE;
  
-       set_memory_ro(base, pages);
-       set_memory_x(base, pages);
+       set_memory_rox(base, pages);
  }
  
  static void (*_omap_sram_reprogram_clock)(u32 dpllctl, u32 ckctl);
diff --git a/arch/arm/mach-omap2/sram.c b/arch/arm/mach-omap2/sram.c

index 39cf270..815d390 100644 (file)
--- a/arch/arm/mach-omap2/sram.c
+++ b/arch/arm/mach-omap2/sram.c
@@ -14,11 +14,11 @@
  #include <linux/kernel.h>
  #include <linux/init.h>
  #include <linux/io.h>
+#include <linux/set_memory.h>
  
  #include <asm/fncpy.h>
  #include <asm/tlb.h>
  #include <asm/cacheflush.h>
-#include <asm/set_memory.h>
  
  #include <asm/mach/map.h>
  
@@ -96,8 +96,7 @@ void *omap_sram_push(void *funcp, unsigned long size)
  
         dst = fncpy(sram, funcp, size);
  
-       set_memory_ro(base, pages);
-       set_memory_x(base, pages);
+       set_memory_rox(base, pages);
  
         return dst;
  }
@@ -217,8 +216,7 @@ static void __init omap2_map_sram(void)
         base = (unsigned long)omap_sram_base;
         pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE;
  
-       set_memory_ro(base, pages);
-       set_memory_x(base, pages);
+       set_memory_rox(base, pages);
  }
  
  static void (*_omap2_sram_ddr_init)(u32 *slow_dll_ctrl, u32 fast_dll_ctrl,
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig

index b26b776..15cb692 100644 (file)
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -46,7 +46,7 @@ config MIPS
         select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
         select GENERIC_SMP_IDLE_THREAD
         select GENERIC_TIME_VSYSCALL
-       select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
+       select GUP_GET_PXX_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
         select HAVE_ARCH_COMPILER_H
         select HAVE_ARCH_JUMP_LABEL
         select HAVE_ARCH_KGDB if MIPS_FP_SUPPORT
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h

index 0d40b33..cb1ac02 100644 (file)
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -263,7 +263,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
  }
  
  #ifdef CONFIG_PPC_16K_PAGES
-#define __HAVE_ARCH_PTEP_GET
+#define ptep_get ptep_get
  static inline pte_t ptep_get(pte_t *ptep)
  {
         pte_basic_t val = READ_ONCE(ptep->pte);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c

index bd7b1a0..7a89de3 100644 (file)
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -20,12 +20,12 @@
  #include <linux/kdebug.h>
  #include <linux/slab.h>
  #include <linux/moduleloader.h>
+#include <linux/set_memory.h>
  #include <asm/code-patching.h>
  #include <asm/cacheflush.h>
  #include <asm/sstep.h>
  #include <asm/sections.h>
  #include <asm/inst.h>
-#include <asm/set_memory.h>
  #include <linux/uaccess.h>
  
  DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -134,10 +134,9 @@ void *alloc_insn_page(void)
         if (!page)
                 return NULL;
  
-       if (strict_module_rwx_enabled()) {
-               set_memory_ro((unsigned long)page, 1);
-               set_memory_x((unsigned long)page, 1);
-       }
+       if (strict_module_rwx_enabled())
+               set_memory_rox((unsigned long)page, 1);
+
         return page;
  }
  
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig

index 5f220e9..0665ac0 100644 (file)
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -24,7 +24,7 @@ config SUPERH
         select GENERIC_PCI_IOMAP if PCI
         select GENERIC_SCHED_CLOCK
         select GENERIC_SMP_IDLE_THREAD
-       select GUP_GET_PTE_LOW_HIGH if X2TLB
+       select GUP_GET_PXX_LOW_HIGH if X2TLB
         select HAVE_ARCH_AUDITSYSCALL
         select HAVE_ARCH_KGDB
         select HAVE_ARCH_SECCOMP_FILTER
diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h

index cdced80..a889a3a 100644 (file)
--- a/arch/sh/include/asm/pgtable-3level.h
+++ b/arch/sh/include/asm/pgtable-3level.h
@@ -28,9 +28,15 @@
  #define pmd_ERROR(e) \
         printk("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
  
-typedef struct { unsigned long long pmd; } pmd_t;
+typedef struct {
+       struct {
+               unsigned long pmd_low;
+               unsigned long pmd_high;
+       };
+       unsigned long long pmd;
+} pmd_t;
  #define pmd_val(x)     ((x).pmd)
-#define __pmd(x)       ((pmd_t) { (x) } )
+#define __pmd(x)       ((pmd_t) { .pmd = (x) } )
  
  static inline pmd_t *pud_pgtable(pud_t pud)
  {
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h

index cb896e6..8a5032e 100644 (file)
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -58,11 +58,7 @@
  #define pud_populate(mm, pud, pmd) \
         set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
  
-#ifdef CONFIG_64BIT
-#define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval))
-#else
  #define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
-#endif
  
  static inline int pgd_newpage(pgd_t pgd)
  {
@@ -71,11 +67,7 @@ static inline int pgd_newpage(pgd_t pgd)
  
  static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
  
-#ifdef CONFIG_64BIT
-#define set_pmd(pmdptr, pmdval) set_64bit((u64 *) (pmdptr), pmd_val(pmdval))
-#else
  #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
-#endif
  
  static inline void pud_clear (pud_t *pud)
  {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 7577c33..edeee31 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -159,7 +159,7 @@ config X86
         select GENERIC_TIME_VSYSCALL
         select GENERIC_GETTIMEOFDAY
         select GENERIC_VDSO_TIME_NS
-       select GUP_GET_PTE_LOW_HIGH             if X86_PAE
+       select GUP_GET_PXX_LOW_HIGH             if X86_PAE
         select HARDIRQS_SW_RESEND
         select HARDLOCKUP_CHECK_TIMESTAMP       if X86_64
         select HAVE_ACPI_APEI                   if ACPI
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h

index 215f5a6..6ba80ce 100644 (file)
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -7,34 +7,6 @@
   *       you need to test for the feature in boot_cpu_data.
   */
  
-/*
- * CMPXCHG8B only writes to the target if we had the previous
- * value in registers, otherwise it acts as a read and gives us the
- * "new previous" value.  That is why there is a loop.  Preloading
- * EDX:EAX is a performance optimization: in the common case it means
- * we need only one locked operation.
- *
- * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
- * least an FPU save and/or %cr0.ts manipulation.
- *
- * cmpxchg8b must be used with the lock prefix here to allow the
- * instruction to be executed atomically.  We need to have the reader
- * side to see the coherent 64bit value.
- */
-static inline void set_64bit(volatile u64 *ptr, u64 value)
-{
-       u32 low  = value;
-       u32 high = value >> 32;
-       u64 prev = *ptr;
-
-       asm volatile("\n1:\t"
-                    LOCK_PREFIX "cmpxchg8b %0\n\t"
-                    "jnz 1b"
-                    : "=m" (*ptr), "+A" (prev)
-                    : "b" (low), "c" (high)
-                    : "memory");
-}
-
  #ifdef CONFIG_X86_CMPXCHG64
  #define arch_cmpxchg64(ptr, o, n)                                      \
         ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h

index 250187a..0d3beb2 100644 (file)
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -2,11 +2,6 @@
  #ifndef _ASM_X86_CMPXCHG_64_H
  #define _ASM_X86_CMPXCHG_64_H
  
-static inline void set_64bit(volatile u64 *ptr, u64 val)
-{
-       *ptr = val;
-}
-
  #define arch_cmpxchg64(ptr, o, n)                                      \
  ({                                                                     \
         BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h

index 75efc4c..462fc34 100644 (file)
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -130,10 +130,6 @@ struct cpu_entry_area {
  };
  
  #define CPU_ENTRY_AREA_SIZE            (sizeof(struct cpu_entry_area))
-#define CPU_ENTRY_AREA_ARRAY_SIZE      (CPU_ENTRY_AREA_SIZE * NR_CPUS)
-
-/* Total size includes the readonly IDT mapping page as well: */
-#define CPU_ENTRY_AREA_TOTAL_SIZE      (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)
  
  DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
  DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h

index 13e70da..de75306 100644 (file)
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -28,9 +28,12 @@
  #ifdef CONFIG_KASAN
  void __init kasan_early_init(void);
  void __init kasan_init(void);
+void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid);
  #else
  static inline void kasan_early_init(void) { }
  static inline void kasan_init(void) { }
+static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size,
+                                                  int nid) { }
  #endif
  
  #endif
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h

index a506a41..86bd431 100644 (file)
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -11,20 +11,14 @@
  #define PAGE_SIZE              (_AC(1,UL) << PAGE_SHIFT)
  #define PAGE_MASK              (~(PAGE_SIZE-1))
  
-#define PMD_PAGE_SIZE          (_AC(1, UL) << PMD_SHIFT)
-#define PMD_PAGE_MASK          (~(PMD_PAGE_SIZE-1))
-
-#define PUD_PAGE_SIZE          (_AC(1, UL) << PUD_SHIFT)
-#define PUD_PAGE_MASK          (~(PUD_PAGE_SIZE-1))
-
  #define __VIRTUAL_MASK         ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
  
-/* Cast *PAGE_MASK to a signed type so that it is sign-extended if
+/* Cast P*D_MASK to a signed type so that it is sign-extended if
     virtual addresses are 32-bits but physical addresses are larger
     (ie, 32-bit PAE). */
  #define PHYSICAL_PAGE_MASK     (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
-#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK)
-#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_MASK) & __PHYSICAL_MASK)
  
  #define HPAGE_SHIFT            PMD_SHIFT
  #define HPAGE_SIZE             (_AC(1,UL) << HPAGE_SHIFT)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h

index 28421a8..967b135 100644 (file)
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -2,8 +2,6 @@
  #ifndef _ASM_X86_PGTABLE_3LEVEL_H
  #define _ASM_X86_PGTABLE_3LEVEL_H
  
-#include <asm/atomic64_32.h>
-
  /*
   * Intel Physical Address Extension (PAE) Mode - three-level page
   * tables on PPro+ CPUs.
@@ -21,7 +19,15 @@
         pr_err("%s:%d: bad pgd %p(%016Lx)\n",                           \
                __FILE__, __LINE__, &(e), pgd_val(e))
  
-/* Rules for using set_pte: the pte being assigned *must* be
+#define pxx_xchg64(_pxx, _ptr, _val) ({                                        \
+       _pxx##val_t *_p = (_pxx##val_t *)_ptr;                          \
+       _pxx##val_t _o = *_p;                                           \
+       do { } while (!try_cmpxchg64(_p, &_o, (_val)));                 \
+       native_make_##_pxx(_o);                                         \
+})
+
+/*
+ * Rules for using set_pte: the pte being assigned *must* be
   * either not present or in a state where the hardware will
   * not attempt to update the pte.  In places where this is
   * not possible, use pte_get_and_clear to obtain the old pte
@@ -29,75 +35,19 @@
   */
  static inline void native_set_pte(pte_t *ptep, pte_t pte)
  {
-       ptep->pte_high = pte.pte_high;
+       WRITE_ONCE(ptep->pte_high, pte.pte_high);
         smp_wmb();
-       ptep->pte_low = pte.pte_low;
-}
-
-#define pmd_read_atomic pmd_read_atomic
-/*
- * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with
- * a "*pmdp" dereference done by GCC. Problem is, in certain places
- * where pte_offset_map_lock() is called, concurrent page faults are
- * allowed, if the mmap_lock is hold for reading. An example is mincore
- * vs page faults vs MADV_DONTNEED. On the page fault side
- * pmd_populate() rightfully does a set_64bit(), but if we're reading the
- * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
- * because GCC will not read the 64-bit value of the pmd atomically.
- *
- * To fix this all places running pte_offset_map_lock() while holding the
- * mmap_lock in read mode, shall read the pmdp pointer using this
- * function to know if the pmd is null or not, and in turn to know if
- * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd
- * operations.
- *
- * Without THP if the mmap_lock is held for reading, the pmd can only
- * transition from null to not null while pmd_read_atomic() runs. So
- * we can always return atomic pmd values with this function.
- *
- * With THP if the mmap_lock is held for reading, the pmd can become
- * trans_huge or none or point to a pte (and in turn become "stable")
- * at any time under pmd_read_atomic(). We could read it truly
- * atomically here with an atomic64_read() for the THP enabled case (and
- * it would be a whole lot simpler), but to avoid using cmpxchg8b we
- * only return an atomic pmdval if the low part of the pmdval is later
- * found to be stable (i.e. pointing to a pte). We are also returning a
- * 'none' (zero) pmdval if the low part of the pmd is zero.
- *
- * In some cases the high and low part of the pmdval returned may not be
- * consistent if THP is enabled (the low part may point to previously
- * mapped hugepage, while the high part may point to a more recently
- * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only
- * needs the low part of the pmd to be read atomically to decide if the
- * pmd is unstable or not, with the only exception when the low part
- * of the pmd is zero, in which case we return a 'none' pmd.
- */
-static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
-{
-       pmdval_t ret;
-       u32 *tmp = (u32 *)pmdp;
-
-       ret = (pmdval_t) (*tmp);
-       if (ret) {
-               /*
-                * If the low part is null, we must not read the high part
-                * or we can end up with a partial pmd.
-                */
-               smp_rmb();
-               ret |= ((pmdval_t)*(tmp + 1)) << 32;
-       }
-
-       return (pmd_t) { ret };
+       WRITE_ONCE(ptep->pte_low, pte.pte_low);
  }
  
  static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
  {
-       set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
+       pxx_xchg64(pte, ptep, native_pte_val(pte));
  }
  
  static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
  {
-       set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd));
+       pxx_xchg64(pmd, pmdp, native_pmd_val(pmd));
  }
  
  static inline void native_set_pud(pud_t *pudp, pud_t pud)
@@ -105,7 +55,7 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud)
  #ifdef CONFIG_PAGE_TABLE_ISOLATION
         pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd);
  #endif
-       set_64bit((unsigned long long *)(pudp), native_pud_val(pud));
+       pxx_xchg64(pud, pudp, native_pud_val(pud));
  }
  
  /*
@@ -116,17 +66,16 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud)
  static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
                                     pte_t *ptep)
  {
-       ptep->pte_low = 0;
+       WRITE_ONCE(ptep->pte_low, 0);
         smp_wmb();
-       ptep->pte_high = 0;
+       WRITE_ONCE(ptep->pte_high, 0);
  }
  
-static inline void native_pmd_clear(pmd_t *pmd)
+static inline void native_pmd_clear(pmd_t *pmdp)
  {
-       u32 *tmp = (u32 *)pmd;
-       *tmp = 0;
+       WRITE_ONCE(pmdp->pmd_low, 0);
         smp_wmb();
-       *(tmp + 1) = 0;
+       WRITE_ONCE(pmdp->pmd_high, 0);
  }
  
  static inline void native_pud_clear(pud_t *pudp)
@@ -149,41 +98,26 @@ static inline void pud_clear(pud_t *pudp)
          */
  }
  
+
  #ifdef CONFIG_SMP
  static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
  {
-       pte_t res;
-
-       res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0);
-
-       return res;
+       return pxx_xchg64(pte, ptep, 0ULL);
  }
-#else
-#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
-#endif
  
-union split_pmd {
-       struct {
-               u32 pmd_low;
-               u32 pmd_high;
-       };
-       pmd_t pmd;
-};
-
-#ifdef CONFIG_SMP
  static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
  {
-       union split_pmd res, *orig = (union split_pmd *)pmdp;
-
-       /* xchg acts as a barrier before setting of the high bits */
-       res.pmd_low = xchg(&orig->pmd_low, 0);
-       res.pmd_high = orig->pmd_high;
-       orig->pmd_high = 0;
+       return pxx_xchg64(pmd, pmdp, 0ULL);
+}
  
-       return res.pmd;
+static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
+{
+       return pxx_xchg64(pud, pudp, 0ULL);
  }
  #else
+#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
  #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
+#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
  #endif
  
  #ifndef pmdp_establish
@@ -199,53 +133,16 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
          * anybody.
          */
         if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
-               union split_pmd old, new, *ptr;
-
-               ptr = (union split_pmd *)pmdp;
-
-               new.pmd = pmd;
-
                 /* xchg acts as a barrier before setting of the high bits */
-               old.pmd_low = xchg(&ptr->pmd_low, new.pmd_low);
-               old.pmd_high = ptr->pmd_high;
-               ptr->pmd_high = new.pmd_high;
-               return old.pmd;
-       }
-
-       do {
-               old = *pmdp;
-       } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd);
-
-       return old;
-}
-#endif
-
-#ifdef CONFIG_SMP
-union split_pud {
-       struct {
-               u32 pud_low;
-               u32 pud_high;
-       };
-       pud_t pud;
-};
-
-static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
-{
-       union split_pud res, *orig = (union split_pud *)pudp;
+               old.pmd_low = xchg(&pmdp->pmd_low, pmd.pmd_low);
+               old.pmd_high = READ_ONCE(pmdp->pmd_high);
+               WRITE_ONCE(pmdp->pmd_high, pmd.pmd_high);
  
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-       pti_set_user_pgtbl(&pudp->p4d.pgd, __pgd(0));
-#endif
-
-       /* xchg acts as a barrier before setting of the high bits */
-       res.pud_low = xchg(&orig->pud_low, 0);
-       res.pud_high = orig->pud_high;
-       orig->pud_high = 0;
+               return old;
+       }
  
-       return res.pud;
+       return pxx_xchg64(pmd, pmdp, pmd.pmd);
  }
-#else
-#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
  #endif
  
  /* Encode and de-code a swap entry */
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h

index 56baf43..8091134 100644 (file)
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -18,6 +18,13 @@ typedef union {
         };
         pteval_t pte;
  } pte_t;
+
+typedef union {
+       struct {
+               unsigned long pmd_low, pmd_high;
+       };
+       pmdval_t pmd;
+} pmd_t;
  #endif /* !__ASSEMBLY__ */
  
  #define SHARED_KERNEL_PMD      (!static_cpu_has(X86_FEATURE_PTI))
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h

index 04f3606..38bf837 100644 (file)
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -19,6 +19,7 @@ typedef unsigned long pgdval_t;
  typedef unsigned long  pgprotval_t;
  
  typedef struct { pteval_t pte; } pte_t;
+typedef struct { pmdval_t pmd; } pmd_t;
  
  #ifdef CONFIG_X86_5LEVEL
  extern unsigned int __pgtable_l5_enabled;
diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h

index d34cce1..4f056fb 100644 (file)
--- a/arch/x86/include/asm/pgtable_areas.h
+++ b/arch/x86/include/asm/pgtable_areas.h
@@ -11,6 +11,12 @@
  
  #define CPU_ENTRY_AREA_RO_IDT_VADDR    ((void *)CPU_ENTRY_AREA_RO_IDT)
  
-#define CPU_ENTRY_AREA_MAP_SIZE                (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
+#ifdef CONFIG_X86_32
+#define CPU_ENTRY_AREA_MAP_SIZE                (CPU_ENTRY_AREA_PER_CPU +               \
+                                        (CPU_ENTRY_AREA_SIZE * NR_CPUS) -      \
+                                        CPU_ENTRY_AREA_BASE)
+#else
+#define CPU_ENTRY_AREA_MAP_SIZE                P4D_SIZE
+#endif
  
  #endif /* _ASM_X86_PGTABLE_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h

index aa174fe..447d4be 100644 (file)
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -361,11 +361,9 @@ static inline pudval_t native_pud_val(pud_t pud)
  #endif
  
  #if CONFIG_PGTABLE_LEVELS > 2
-typedef struct { pmdval_t pmd; } pmd_t;
-
  static inline pmd_t native_make_pmd(pmdval_t val)
  {
-       return (pmd_t) { val };
+       return (pmd_t) { .pmd = val };
  }
  
  static inline pmdval_t native_pmd_val(pmd_t pmd)
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h

index 02c2cbd..a7f3d91 100644 (file)
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -35,7 +35,7 @@
   */
  #ifdef CONFIG_X86_64
  /* Mask off the address space ID and SME encryption bits. */
-#define CR3_ADDR_MASK  __sme_clr(0x7FFFFFFFFFFFF000ull)
+#define CR3_ADDR_MASK  __sme_clr(PHYSICAL_PAGE_MASK)
  #define CR3_PCID_MASK  0xFFFull
  #define CR3_NOFLUSH    BIT_ULL(63)
  
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h

index b45c4d2..a5e8964 100644 (file)
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -6,6 +6,9 @@
  #include <asm/page.h>
  #include <asm-generic/set_memory.h>
  
+#define set_memory_rox set_memory_rox
+int set_memory_rox(unsigned long addr, int numpages);
+
  /*
   * The set_memory_* API can be used to change various attributes of a virtual
   * address range. The attributes include:
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c

index 23cbfa8..7d8c3cb 100644 (file)
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -2142,11 +2142,6 @@ void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const voi
  {
         struct text_poke_loc *tp;
  
-       if (unlikely(system_state == SYSTEM_BOOTING)) {
-               text_poke_early(addr, opcode, len);
-               return;
-       }
-
         text_poke_flush(addr);
  
         tp = &tp_vec[tp_vec_nr++];
@@ -2168,11 +2163,6 @@ void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *
  {
         struct text_poke_loc tp;
  
-       if (unlikely(system_state == SYSTEM_BOOTING)) {
-               text_poke_early(addr, opcode, len);
-               return;
-       }
-
         text_poke_loc_init(&tp, addr, opcode, len, emulate);
         text_poke_bp_batch(&tp, 1);
  }
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c

index 19a0207..56a917d 100644 (file)
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -504,7 +504,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
         }
  
         a = aper + iommu_size;
-       iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
+       iommu_size -= round_up(a, PMD_SIZE) - a;
  
         if (iommu_size < 64*1024*1024) {
                 pr_warn("PCI-DMA: Warning: Small IOMMU %luMB."
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c

index ae0a8e5..5e7ead5 100644 (file)
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,10 +24,10 @@
  #include <linux/module.h>
  #include <linux/memory.h>
  #include <linux/vmalloc.h>
+#include <linux/set_memory.h>
  
  #include <trace/syscall.h>
  
-#include <asm/set_memory.h>
  #include <asm/kprobes.h>
  #include <asm/ftrace.h>
  #include <asm/nops.h>
@@ -423,9 +423,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
         /* ALLOC_TRAMP flags lets us know we created it */
         ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
  
-       if (likely(system_state != SYSTEM_BOOTING))
-               set_memory_ro((unsigned long)trampoline, npages);
-       set_memory_x((unsigned long)trampoline, npages);
+       set_memory_rox((unsigned long)trampoline, npages);
         return (unsigned long)trampoline;
  fail:
         tramp_free(trampoline);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c

index 6a3cfaf..387e4b1 100644 (file)
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -203,7 +203,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
         load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
  
         /* Is the address not 2M aligned? */
-       if (load_delta & ~PMD_PAGE_MASK)
+       if (load_delta & ~PMD_MASK)
                 for (;;);
  
         /* Include the SME encryption mask in the fixup value */
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c

index 668a4a6..bbb0f73 100644 (file)
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
  
         /* CPU entry erea is always used for CPU entry */
         if (within_area(addr, end, CPU_ENTRY_AREA_BASE,
-                       CPU_ENTRY_AREA_TOTAL_SIZE))
+                       CPU_ENTRY_AREA_MAP_SIZE))
                 return true;
  
         /*
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c

index 01b8d95..6629968 100644 (file)
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -43,6 +43,7 @@
  #include <linux/objtool.h>
  #include <linux/vmalloc.h>
  #include <linux/pgtable.h>
+#include <linux/set_memory.h>
  
  #include <asm/text-patching.h>
  #include <asm/cacheflush.h>
@@ -51,7 +52,6 @@
  #include <asm/alternative.h>
  #include <asm/insn.h>
  #include <asm/debugreg.h>
-#include <asm/set_memory.h>
  #include <asm/ibt.h>
  
  #include "common.h"
@@ -414,17 +414,11 @@ void *alloc_insn_page(void)
         if (!page)
                 return NULL;
  
-       /*
-        * First make the page read-only, and only then make it executable to
-        * prevent it from being W+X in between.
-        */
-       set_memory_ro((unsigned long)page, 1);
-
         /*
          * TODO: Once additional kernel code protection mechanisms are set, ensure
          * that the page was not maliciously altered and it is still zeroed.
          */
-       set_memory_x((unsigned long)page, 1);
+       set_memory_rox((unsigned long)page, 1);
  
         return page;
  }
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c

index 42cd96e..8bb1aa6 100644 (file)
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -9,22 +9,60 @@
  #include <asm/cpu_entry_area.h>
  #include <asm/fixmap.h>
  #include <asm/desc.h>
+#include <asm/kasan.h>
  
  static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
  
  #ifdef CONFIG_X86_64
  static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
  DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
-#endif
  
-#ifdef CONFIG_X86_32
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset);
+
+static __always_inline unsigned int cea_offset(unsigned int cpu)
+{
+       return per_cpu(_cea_offset, cpu);
+}
+
+static __init void init_cea_offsets(void)
+{
+       unsigned int max_cea;
+       unsigned int i, j;
+
+       max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;
+
+       /* O(sodding terrible) */
+       for_each_possible_cpu(i) {
+               unsigned int cea;
+
+again:
+               cea = prandom_u32_max(max_cea);
+
+               for_each_possible_cpu(j) {
+                       if (cea_offset(j) == cea)
+                               goto again;
+
+                       if (i == j)
+                               break;
+               }
+
+               per_cpu(_cea_offset, i) = cea;
+       }
+}
+#else /* !X86_64 */
  DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
+
+static __always_inline unsigned int cea_offset(unsigned int cpu)
+{
+       return cpu;
+}
+static inline void init_cea_offsets(void) { }
  #endif
  
  /* Is called from entry code, so must be noinstr */
  noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu)
  {
-       unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
+       unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE;
         BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
  
         return (struct cpu_entry_area *) va;
@@ -148,6 +186,9 @@ static void __init setup_cpu_entry_area(unsigned int cpu)
         pgprot_t tss_prot = PAGE_KERNEL;
  #endif
  
+       kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE,
+                                       early_cpu_to_node(cpu));
+
         cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
  
         cea_map_percpu_pages(&cea->entry_stack_page,
@@ -201,7 +242,6 @@ static __init void setup_cpu_entry_area_ptes(void)
  
         /* The +1 is for the readonly IDT: */
         BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
-       BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
         BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
  
         start = CPU_ENTRY_AREA_BASE;
@@ -217,6 +257,8 @@ void __init setup_cpu_entry_areas(void)
  {
         unsigned int cpu;
  
+       init_cea_offsets();
+
         setup_cpu_entry_area_ptes();
  
         for_each_possible_cpu(cpu)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c

index 9121bc1..d398735 100644 (file)
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -801,7 +801,7 @@ void __init poking_init(void)
         spinlock_t *ptl;
         pte_t *ptep;
  
-       poking_mm = copy_init_mm();
+       poking_mm = mm_alloc();
         BUG_ON(!poking_mm);
  
         /*
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c

index e7b9b46..0302491 100644 (file)
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -316,10 +316,33 @@ void __init kasan_early_init(void)
         kasan_map_early_shadow(init_top_pgt);
  }
  
+static unsigned long kasan_mem_to_shadow_align_down(unsigned long va)
+{
+       unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va);
+
+       return round_down(shadow, PAGE_SIZE);
+}
+
+static unsigned long kasan_mem_to_shadow_align_up(unsigned long va)
+{
+       unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va);
+
+       return round_up(shadow, PAGE_SIZE);
+}
+
+void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid)
+{
+       unsigned long shadow_start, shadow_end;
+
+       shadow_start = kasan_mem_to_shadow_align_down((unsigned long)va);
+       shadow_end = kasan_mem_to_shadow_align_up((unsigned long)va + size);
+       kasan_populate_shadow(shadow_start, shadow_end, nid);
+}
+
  void __init kasan_init(void)
  {
+       unsigned long shadow_cea_begin, shadow_cea_per_cpu_begin, shadow_cea_end;
         int i;
-       void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
  
         memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
  
@@ -360,16 +383,10 @@ void __init kasan_init(void)
                 map_range(&pfn_mapped[i]);
         }
  
-       shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
-       shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
-       shadow_cpu_entry_begin = (void *)round_down(
-                       (unsigned long)shadow_cpu_entry_begin, PAGE_SIZE);
-
-       shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
-                                       CPU_ENTRY_AREA_MAP_SIZE);
-       shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
-       shadow_cpu_entry_end = (void *)round_up(
-                       (unsigned long)shadow_cpu_entry_end, PAGE_SIZE);
+       shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE);
+       shadow_cea_per_cpu_begin = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_PER_CPU);
+       shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE +
+                                                     CPU_ENTRY_AREA_MAP_SIZE);
  
         kasan_populate_early_shadow(
                 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
@@ -391,12 +408,18 @@ void __init kasan_init(void)
  
         kasan_populate_early_shadow(
                 kasan_mem_to_shadow((void *)VMALLOC_END + 1),
-               shadow_cpu_entry_begin);
+               (void *)shadow_cea_begin);
  
-       kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
-                             (unsigned long)shadow_cpu_entry_end, 0);
+       /*
+        * Populate the shadow for the shared portion of the CPU entry area.
+        * Shadows for the per-CPU areas are mapped on-demand, as each CPU's
+        * area is randomly placed somewhere in the 512GiB range and mapping
+        * the entire 512GiB range is prohibitively expensive.
+        */
+       kasan_populate_shadow(shadow_cea_begin,
+                             shadow_cea_per_cpu_begin, 0);
  
-       kasan_populate_early_shadow(shadow_cpu_entry_end,
+       kasan_populate_early_shadow((void *)shadow_cea_end,
                         kasan_mem_to_shadow((void *)__START_KERNEL_map));
  
         kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S

index 9de3d90..e25288e 100644 (file)
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -26,7 +26,7 @@ SYM_FUNC_START(sme_encrypt_execute)
          *   RCX - virtual address of the encryption workarea, including:
          *     - stack page (PAGE_SIZE)
          *     - encryption routine page (PAGE_SIZE)
-        *     - intermediate copy buffer (PMD_PAGE_SIZE)
+        *     - intermediate copy buffer (PMD_SIZE)
          *    R8 - physical address of the pagetables to use for encryption
          */
  
@@ -123,7 +123,7 @@ SYM_FUNC_START(__enc_copy)
         wbinvd                          /* Invalidate any cache entries */
  
         /* Copy/encrypt up to 2MB at a time */
-       movq    $PMD_PAGE_SIZE, %r12
+       movq    $PMD_SIZE, %r12
  1:
         cmpq    %r12, %r9
         jnb     2f
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c

index f415498..88cccd6 100644 (file)
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -93,7 +93,7 @@ struct sme_populate_pgd_data {
   * section is 2MB aligned to allow for simple pagetable setup using only
   * PMD entries (see vmlinux.lds.S).
   */
-static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch");
+static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
  
  static char sme_cmdline_arg[] __initdata = "mem_encrypt";
  static char sme_cmdline_on[]  __initdata = "on";
@@ -198,8 +198,8 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
         while (ppd->vaddr < ppd->vaddr_end) {
                 sme_populate_pgd_large(ppd);
  
-               ppd->vaddr += PMD_PAGE_SIZE;
-               ppd->paddr += PMD_PAGE_SIZE;
+               ppd->vaddr += PMD_SIZE;
+               ppd->paddr += PMD_SIZE;
         }
  }
  
@@ -225,11 +225,11 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
         vaddr_end = ppd->vaddr_end;
  
         /* If start is not 2MB aligned, create PTE entries */
-       ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
+       ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_SIZE);
         __sme_map_range_pte(ppd);
  
         /* Create PMD entries */
-       ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
+       ppd->vaddr_end = vaddr_end & PMD_MASK;
         __sme_map_range_pmd(ppd);
  
         /* If end is not 2MB aligned, create PTE entries */
@@ -325,7 +325,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
  
         /* Physical addresses gives us the identity mapped virtual addresses */
         kernel_start = __pa_symbol(_text);
-       kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
+       kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE);
         kernel_len = kernel_end - kernel_start;
  
         initrd_start = 0;
@@ -355,12 +355,12 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
          *   executable encryption area size:
          *     stack page (PAGE_SIZE)
          *     encryption routine page (PAGE_SIZE)
-        *     intermediate copy buffer (PMD_PAGE_SIZE)
+        *     intermediate copy buffer (PMD_SIZE)
          *   pagetable structures for the encryption of the kernel
          *   pagetable structures for workarea (in case not currently mapped)
          */
         execute_start = workarea_start;
-       execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
+       execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE;
         execute_len = execute_end - execute_start;
  
         /*
@@ -383,7 +383,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
          * before it is mapped.
          */
         workarea_len = execute_len + pgtable_area_len;
-       workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
+       workarea_end = ALIGN(workarea_start + workarea_len, PMD_SIZE);
  
         /*
          * Set the address to the start of where newly created pagetable
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c

index ef34ba2..356758b 100644 (file)
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -220,6 +220,23 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
  
  #ifdef CONFIG_X86_64
  
+/*
+ * The kernel image is mapped into two places in the virtual address space
+ * (addresses without KASLR, of course):
+ *
+ * 1. The kernel direct map (0xffff880000000000)
+ * 2. The "high kernel map" (0xffffffff81000000)
+ *
+ * We actually execute out of #2. If we get the address of a kernel symbol, it
+ * points to #2, but almost all physical-to-virtual translations point to #1.
+ *
+ * This is so that we can have both a directmap of all physical memory *and*
+ * take full advantage of the the limited (s32) immediate addressing range (2G)
+ * of x86_64.
+ *
+ * See Documentation/x86/x86_64/mm.rst for more detail.
+ */
+
  static inline unsigned long highmap_start_pfn(void)
  {
         return __pa_symbol(_text) >> PAGE_SHIFT;
@@ -605,10 +622,6 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star
  {
         unsigned long end;
  
-       /* Kernel text is rw at boot up */
-       if (system_state == SYSTEM_BOOTING)
-               return new;
-
         /*
          * 32-bit has some unfixable W+X issues, like EFI code
          * and writeable data being in the same page.  Disable
@@ -765,11 +778,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr)
         switch (level) {
         case PG_LEVEL_1G:
                 phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
-               offset = virt_addr & ~PUD_PAGE_MASK;
+               offset = virt_addr & ~PUD_MASK;
                 break;
         case PG_LEVEL_2M:
                 phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
-               offset = virt_addr & ~PMD_PAGE_MASK;
+               offset = virt_addr & ~PMD_MASK;
                 break;
         default:
                 phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
@@ -1059,7 +1072,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
         case PG_LEVEL_1G:
                 ref_prot = pud_pgprot(*(pud_t *)kpte);
                 ref_pfn = pud_pfn(*(pud_t *)kpte);
-               pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+               pfninc = PMD_SIZE >> PAGE_SHIFT;
                 lpaddr = address & PUD_MASK;
                 lpinc = PMD_SIZE;
                 /*
@@ -1646,8 +1659,11 @@ repeat:
         return err;
  }
  
-static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary);
  
+/*
+ * Check the directmap and "high kernel map" 'aliases'.
+ */
  static int cpa_process_alias(struct cpa_data *cpa)
  {
         struct cpa_data alias_cpa;
@@ -1671,6 +1687,12 @@ static int cpa_process_alias(struct cpa_data *cpa)
                 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
                 alias_cpa.curpage = 0;
  
+               /* Directmap always has NX set, do not modify. */
+               if (__supported_pte_mask & _PAGE_NX) {
+                       alias_cpa.mask_clr.pgprot &= ~_PAGE_NX;
+                       alias_cpa.mask_set.pgprot &= ~_PAGE_NX;
+               }
+
                 cpa->force_flush_all = 1;
  
                 ret = __change_page_attr_set_clr(&alias_cpa, 0);
@@ -1693,6 +1715,15 @@ static int cpa_process_alias(struct cpa_data *cpa)
                 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
                 alias_cpa.curpage = 0;
  
+               /*
+                * [_text, _brk_end) also covers data, do not modify NX except
+                * in cases where the highmap is the primary target.
+                */
+               if (__supported_pte_mask & _PAGE_NX) {
+                       alias_cpa.mask_clr.pgprot &= ~_PAGE_NX;
+                       alias_cpa.mask_set.pgprot &= ~_PAGE_NX;
+               }
+
                 cpa->force_flush_all = 1;
                 /*
                  * The high mapping range is imprecise, so ignore the
@@ -1705,12 +1736,19 @@ static int cpa_process_alias(struct cpa_data *cpa)
         return 0;
  }
  
-static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary)
  {
         unsigned long numpages = cpa->numpages;
         unsigned long rempages = numpages;
         int ret = 0;
  
+       /*
+        * No changes, easy!
+        */
+       if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr)) &&
+           !cpa->force_split)
+               return ret;
+
         while (rempages) {
                 /*
                  * Store the remaining nr of pages for the large page
@@ -1723,13 +1761,13 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
  
                 if (!debug_pagealloc_enabled())
                         spin_lock(&cpa_lock);
-               ret = __change_page_attr(cpa, checkalias);
+               ret = __change_page_attr(cpa, primary);
                 if (!debug_pagealloc_enabled())
                         spin_unlock(&cpa_lock);
                 if (ret)
                         goto out;
  
-               if (checkalias) {
+               if (primary && !(cpa->flags & CPA_NO_CHECK_ALIAS)) {
                         ret = cpa_process_alias(cpa);
                         if (ret)
                                 goto out;
@@ -1757,7 +1795,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
                                     struct page **pages)
  {
         struct cpa_data cpa;
-       int ret, cache, checkalias;
+       int ret, cache;
  
         memset(&cpa, 0, sizeof(cpa));
  
@@ -1803,20 +1841,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
         cpa.numpages = numpages;
         cpa.mask_set = mask_set;
         cpa.mask_clr = mask_clr;
-       cpa.flags = 0;
+       cpa.flags = in_flag;
         cpa.curpage = 0;
         cpa.force_split = force_split;
  
-       if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
-               cpa.flags |= in_flag;
-
-       /* No alias checking for _NX bit modifications */
-       checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
-       /* Has caller explicitly disabled alias checking? */
-       if (in_flag & CPA_NO_CHECK_ALIAS)
-               checkalias = 0;
-
-       ret = __change_page_attr_set_clr(&cpa, checkalias);
+       ret = __change_page_attr_set_clr(&cpa, 1);
  
         /*
          * Check whether we really changed something:
@@ -2047,6 +2076,16 @@ int set_memory_ro(unsigned long addr, int numpages)
         return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
  }
  
+int set_memory_rox(unsigned long addr, int numpages)
+{
+       pgprot_t clr = __pgprot(_PAGE_RW);
+
+       if (__supported_pte_mask & _PAGE_NX)
+               clr.pgprot |= _PAGE_NX;
+
+       return change_page_attr_clear(&addr, numpages, clr, 0);
+}
+
  int set_memory_rw(unsigned long addr, int numpages)
  {
         return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
@@ -2059,11 +2098,9 @@ int set_memory_np(unsigned long addr, int numpages)
  
  int set_memory_np_noalias(unsigned long addr, int numpages)
  {
-       int cpa_flags = CPA_NO_CHECK_ALIAS;
-
         return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
                                         __pgprot(_PAGE_PRESENT), 0,
-                                       cpa_flags, NULL);
+                                       CPA_NO_CHECK_ALIAS, NULL);
  }
  
  int set_memory_4k(unsigned long addr, int numpages)
@@ -2280,7 +2317,7 @@ static int __set_pages_p(struct page *page, int numpages)
                                 .numpages = numpages,
                                 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
                                 .mask_clr = __pgprot(0),
-                               .flags = 0};
+                               .flags = CPA_NO_CHECK_ALIAS };
  
         /*
          * No alias checking needed for setting present flag. otherwise,
@@ -2288,7 +2325,7 @@ static int __set_pages_p(struct page *page, int numpages)
          * mappings (this adds to complexity if we want to do this from
          * atomic context especially). Let's keep it simple!
          */
-       return __change_page_attr_set_clr(&cpa, 0);
+       return __change_page_attr_set_clr(&cpa, 1);
  }
  
  static int __set_pages_np(struct page *page, int numpages)
@@ -2299,7 +2336,7 @@ static int __set_pages_np(struct page *page, int numpages)
                                 .numpages = numpages,
                                 .mask_set = __pgprot(0),
                                 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
-                               .flags = 0};
+                               .flags = CPA_NO_CHECK_ALIAS };
  
         /*
          * No alias checking needed for setting not present flag. otherwise,
@@ -2307,7 +2344,7 @@ static int __set_pages_np(struct page *page, int numpages)
          * mappings (this adds to complexity if we want to do this from
          * atomic context especially). Let's keep it simple!
          */
-       return __change_page_attr_set_clr(&cpa, 0);
+       return __change_page_attr_set_clr(&cpa, 1);
  }
  
  int set_direct_map_invalid_noflush(struct page *page)
@@ -2378,7 +2415,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
                 .numpages = numpages,
                 .mask_set = __pgprot(0),
                 .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)),
-               .flags = 0,
+               .flags = CPA_NO_CHECK_ALIAS,
         };
  
         WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
@@ -2391,7 +2428,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
  
         cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
  
-       retval = __change_page_attr_set_clr(&cpa, 0);
+       retval = __change_page_attr_set_clr(&cpa, 1);
         __flush_tlb_all();
  
  out:
@@ -2421,12 +2458,12 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
                 .numpages       = numpages,
                 .mask_set       = __pgprot(0),
                 .mask_clr       = __pgprot(_PAGE_PRESENT | _PAGE_RW),
-               .flags          = 0,
+               .flags          = CPA_NO_CHECK_ALIAS,
         };
  
         WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
  
-       retval = __change_page_attr_set_clr(&cpa, 0);
+       retval = __change_page_attr_set_clr(&cpa, 1);
         __flush_tlb_all();
  
         return retval;
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c

index ffe3b3a..78414c6 100644 (file)
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -592,7 +592,7 @@ static void pti_set_kernel_image_nonglobal(void)
          * of the image.
          */
         unsigned long start = PFN_ALIGN(_text);
-       unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
+       unsigned long end = ALIGN((unsigned long)_end, PMD_SIZE);
  
         /*
          * This clears _PAGE_GLOBAL from the entire kernel image.
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c

index a723f53..f58f5f5 100644 (file)
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -174,7 +174,6 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
         index = irq_iommu->irte_index + irq_iommu->sub_handle;
         irte = &iommu->ir_table->base[index];
  
-#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
         if ((irte->pst == 1) || (irte_modified->pst == 1)) {
                 bool ret;
  
@@ -188,11 +187,9 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
                  * same as the old value.
                  */
                 WARN_ON(!ret);
-       } else
-#endif
-       {
-               set_64bit(&irte->low, irte_modified->low);
-               set_64bit(&irte->high, irte_modified->high);
+       } else {
+               WRITE_ONCE(irte->low, irte_modified->low);
+               WRITE_ONCE(irte->high, irte_modified->high);
         }
         __iommu_flush_cache(iommu, irte, sizeof(*irte));
  
@@ -250,8 +247,8 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
         end = start + (1 << irq_iommu->irte_mask);
  
         for (entry = start; entry < end; entry++) {
-               set_64bit(&entry->low, 0);
-               set_64bit(&entry->high, 0);
+               WRITE_ONCE(entry->low, 0);
+               WRITE_ONCE(entry->high, 0);
         }
         bitmap_release_region(iommu->ir_table->bitmap, index,
                               irq_iommu->irte_mask);
diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c

index a948e95..b71dbbd 100644 (file)
--- a/drivers/misc/sram-exec.c
+++ b/drivers/misc/sram-exec.c
@@ -10,9 +10,9 @@
  #include <linux/genalloc.h>
  #include <linux/mm.h>
  #include <linux/sram.h>
+#include <linux/set_memory.h>
  
  #include <asm/fncpy.h>
-#include <asm/set_memory.h>
  
  #include "sram.h"
  
@@ -106,10 +106,7 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
  
         dst_cpy = fncpy(dst, src, size);
  
-       ret = set_memory_ro((unsigned long)base, pages);
-       if (ret)
-               goto error_out;
-       ret = set_memory_x((unsigned long)base, pages);
+       ret = set_memory_rox((unsigned long)base, pages);
         if (ret)
                 goto error_out;
  
diff --git a/include/linux/filter.h b/include/linux/filter.h

index bf70197..ccc4a4a 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -860,8 +860,7 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
  static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
  {
         set_vm_flush_reset_perms(hdr);
-       set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
-       set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
+       set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
  }
  
  int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h

index dfabd54..1159b25 100644 (file)
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -309,24 +309,28 @@ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
         ptep_get_and_clear(mm, addr, ptep);
  }
  
-#ifndef __HAVE_ARCH_PTEP_GET
+#ifndef ptep_get
  static inline pte_t ptep_get(pte_t *ptep)
  {
         return READ_ONCE(*ptep);
  }
  #endif
  
-#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
+#ifndef pmdp_get
+static inline pmd_t pmdp_get(pmd_t *pmdp)
+{
+       return READ_ONCE(*pmdp);
+}
+#endif
+
+#ifdef CONFIG_GUP_GET_PXX_LOW_HIGH
  /*
- * WARNING: only to be used in the get_user_pages_fast() implementation.
- *
- * With get_user_pages_fast(), we walk down the pagetables without taking any
- * locks.  For this we would like to load the pointers atomically, but sometimes
- * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE).  What
- * we do have is the guarantee that a PTE will only either go from not present
- * to present, or present to not present or both -- it will not switch to a
- * completely different present page without a TLB flush in between; something
- * that we are blocking by holding interrupts off.
+ * For walking the pagetables without holding any locks.  Some architectures
+ * (eg x86-32 PAE) cannot load the entries atomically without using expensive
+ * instructions.  We are guaranteed that a PTE will only either go from not
+ * present to present, or present to not present -- it will not switch to a
+ * completely different present page without a TLB flush inbetween; which we
+ * are blocking by holding interrupts off.
   *
   * Setting ptes from not present to present goes:
   *
@@ -361,15 +365,42 @@ static inline pte_t ptep_get_lockless(pte_t *ptep)
  
         return pte;
  }
-#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
+#define ptep_get_lockless ptep_get_lockless
+
+#if CONFIG_PGTABLE_LEVELS > 2
+static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
+{
+       pmd_t pmd;
+
+       do {
+               pmd.pmd_low = pmdp->pmd_low;
+               smp_rmb();
+               pmd.pmd_high = pmdp->pmd_high;
+               smp_rmb();
+       } while (unlikely(pmd.pmd_low != pmdp->pmd_low));
+
+       return pmd;
+}
+#define pmdp_get_lockless pmdp_get_lockless
+#endif /* CONFIG_PGTABLE_LEVELS > 2 */
+#endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */
+
  /*
   * We require that the PTE can be read atomically.
   */
+#ifndef ptep_get_lockless
  static inline pte_t ptep_get_lockless(pte_t *ptep)
  {
         return ptep_get(ptep);
  }
-#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
+#endif
+
+#ifndef pmdp_get_lockless
+static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
+{
+       return pmdp_get(pmdp);
+}
+#endif
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
@@ -1313,18 +1344,6 @@ static inline int pud_trans_unstable(pud_t *pud)
  #endif
  }
  
-#ifndef pmd_read_atomic
-static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
-{
-       /*
-        * Depend on compiler for an atomic pmd read. NOTE: this is
-        * only going to work, if the pmdval_t isn't larger than
-        * an unsigned long.
-        */
-       return *pmdp;
-}
-#endif
-
  #ifndef arch_needs_pgtable_deposit
  #define arch_needs_pgtable_deposit() (false)
  #endif
@@ -1351,13 +1370,13 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
   */
  static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
  {
-       pmd_t pmdval = pmd_read_atomic(pmd);
+       pmd_t pmdval = pmdp_get_lockless(pmd);
         /*
          * The barrier will stabilize the pmdval in a register or on
          * the stack so that it will stop changing under the code.
          *
          * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
-        * pmd_read_atomic is allowed to return a not atomic pmdval
+        * pmdp_get_lockless is allowed to return a not atomic pmdval
          * (for example pointing to an hugepage that has never been
          * mapped in the pmd). The below checks will only care about
          * the low part of the pmd with 32bit PAE x86 anyway, with the
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h

index d6c4816..357e006 100644 (file)
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -65,6 +65,7 @@ extern void sched_dead(struct task_struct *p);
  void __noreturn do_task_dead(void);
  void __noreturn make_task_dead(int signr);
  
+extern void mm_cache_init(void);
  extern void proc_caches_init(void);
  
  extern void fork_init(void);
@@ -90,7 +91,6 @@ extern void exit_itimers(struct task_struct *);
  extern pid_t kernel_clone(struct kernel_clone_args *kargs);
  struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
  struct task_struct *fork_idle(int);
-struct mm_struct *copy_init_mm(void);
  extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
  extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags);
  extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h

index 369769c..95ac839 100644 (file)
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -14,6 +14,16 @@ static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
  #endif
  
+#ifndef set_memory_rox
+static inline int set_memory_rox(unsigned long addr, int numpages)
+{
+       int ret = set_memory_ro(addr, numpages);
+       if (ret)
+               return ret;
+       return set_memory_x(addr, numpages);
+}
+#endif
+
  #ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP
  static inline int set_direct_map_invalid_noflush(struct page *page)
  {
diff --git a/init/main.c b/init/main.c

index d213371..e1c3911 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -863,6 +863,7 @@ static void __init mm_init(void)
         /* Should be run after espfix64 is set up. */
         pti_init();
         kmsan_init_runtime();
+       mm_cache_init();
  }
  
  #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
@@ -998,7 +999,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
         sort_main_extable();
         trap_init();
         mm_init();
-
+       poking_init();
         ftrace_init();
  
         /* trace_printk can be enabled here */
@@ -1137,7 +1138,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
         taskstats_init_early();
         delayacct_init();
  
-       poking_init();
         check_bugs();
  
         acpi_subsystem_init();
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c

index 84b2d9d..ece9870 100644 (file)
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -494,8 +494,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
         refcount_set(&kvalue->refcnt, 1);
         bpf_map_inc(map);
  
-       set_memory_ro((long)st_map->image, 1);
-       set_memory_x((long)st_map->image, 1);
+       set_memory_rox((long)st_map->image, 1);
         err = st_ops->reg(kdata);
         if (likely(!err)) {
                 /* Pair with smp_load_acquire() during lookup_elem().
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index 7f98dec..6cca66b 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -868,8 +868,7 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins
         list_add_tail(&pack->list, &pack_list);
  
         set_vm_flush_reset_perms(pack->ptr);
-       set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
-       set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
+       set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
         return pack;
  }
  
@@ -887,8 +886,7 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
                 if (ptr) {
                         bpf_fill_ill_insns(ptr, size);
                         set_vm_flush_reset_perms(ptr);
-                       set_memory_ro((unsigned long)ptr, size / PAGE_SIZE);
-                       set_memory_x((unsigned long)ptr, size / PAGE_SIZE);
+                       set_memory_rox((unsigned long)ptr, size / PAGE_SIZE);
                 }
                 goto out;
         }
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c

index d639521..11f5ec0 100644 (file)
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -468,8 +468,7 @@ again:
         if (err < 0)
                 goto out;
  
-       set_memory_ro((long)im->image, 1);
-       set_memory_x((long)im->image, 1);
+       set_memory_rox((long)im->image, 1);
  
         WARN_ON(tr->cur_image && tr->selector == 0);
         WARN_ON(!tr->cur_image && tr->selector);
diff --git a/kernel/events/core.c b/kernel/events/core.c

index e47914a..eacc370 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7493,7 +7493,7 @@ static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
                 return pud_leaf_size(pud);
  
         pmdp = pmd_offset_lockless(pudp, pud, addr);
-       pmd = READ_ONCE(*pmdp);
+       pmd = pmdp_get_lockless(pmdp);
         if (!pmd_present(pmd))
                 return 0;
  
diff --git a/kernel/fork.c b/kernel/fork.c

index 7a08025..9f7fe35 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2607,11 +2607,6 @@ struct task_struct * __init fork_idle(int cpu)
         return task;
  }
  
-struct mm_struct *copy_init_mm(void)
-{
-       return dup_mm(NULL, &init_mm);
-}
-
  /*
   * This is like kernel_clone(), but shaved down and tailored to just
   * creating io_uring workers. It returns a created task, or an error pointer.
@@ -3030,10 +3025,27 @@ static void sighand_ctor(void *data)
         init_waitqueue_head(&sighand->signalfd_wqh);
  }
  
-void __init proc_caches_init(void)
+void __init mm_cache_init(void)
  {
         unsigned int mm_size;
  
+       /*
+        * The mm_cpumask is located at the end of mm_struct, and is
+        * dynamically sized based on the maximum CPU number this system
+        * can have, taking hotplug into account (nr_cpu_ids).
+        */
+       mm_size = sizeof(struct mm_struct) + cpumask_size();
+
+       mm_cachep = kmem_cache_create_usercopy("mm_struct",
+                       mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
+                       offsetof(struct mm_struct, saved_auxv),
+                       sizeof_field(struct mm_struct, saved_auxv),
+                       NULL);
+}
+
+void __init proc_caches_init(void)
+{
         sighand_cachep = kmem_cache_create("sighand_cache",
                         sizeof(struct sighand_struct), 0,
                         SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -3051,19 +3063,6 @@ void __init proc_caches_init(void)
                         SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
                         NULL);
  
-       /*
-        * The mm_cpumask is located at the end of mm_struct, and is
-        * dynamically sized based on the maximum CPU number this system
-        * can have, taking hotplug into account (nr_cpu_ids).
-        */
-       mm_size = sizeof(struct mm_struct) + cpumask_size();
-
-       mm_cachep = kmem_cache_create_usercopy("mm_struct",
-                       mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
-                       SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
-                       offsetof(struct mm_struct, saved_auxv),
-                       sizeof_field(struct mm_struct, saved_auxv),
-                       NULL);
         vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
         mmap_init();
         nsproxy_cache_init();
diff --git a/mm/Kconfig b/mm/Kconfig

index 3425708..ff7b209 100644 (file)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1078,7 +1078,7 @@ config GUP_TEST
  comment "GUP_TEST needs to have DEBUG_FS enabled"
         depends on !GUP_TEST && !DEBUG_FS
  
-config GUP_GET_PTE_LOW_HIGH
+config GUP_GET_PXX_LOW_HIGH
         bool
  
  config ARCH_HAS_PTE_SPECIAL
diff --git a/mm/gup.c b/mm/gup.c

index 2b45d78..f45a3a5 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2721,7 +2721,7 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
  
         pmdp = pmd_offset_lockless(pudp, pud, addr);
         do {
-               pmd_t pmd = READ_ONCE(*pmdp);
+               pmd_t pmd = pmdp_get_lockless(pmdp);
  
                 next = pmd_addr_end(addr, end);
                 if (!pmd_present(pmd))
diff --git a/mm/hmm.c b/mm/hmm.c

index 3850fb6..601a99c 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -361,8 +361,7 @@ again:
                  * huge or device mapping one and compute corresponding pfn
                  * values.
                  */
-               pmd = pmd_read_atomic(pmdp);
-               barrier();
+               pmd = pmdp_get_lockless(pmdp);
                 if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
                         goto again;
  
diff --git a/mm/khugepaged.c b/mm/khugepaged.c

index 5a7d2d5..5cb401a 100644 (file)
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -857,7 +857,7 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
         if (!*pmd)
                 return SCAN_PMD_NULL;
  
-       pmde = pmd_read_atomic(*pmd);
+       pmde = pmdp_get_lockless(*pmd);
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
         /* See comments in pmd_none_or_trans_huge_or_clear_bad() */
diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c

index 1b0ab8f..175e424 100644 (file)
--- a/mm/mapping_dirty_helpers.c
+++ b/mm/mapping_dirty_helpers.c
@@ -126,7 +126,7 @@ static int clean_record_pte(pte_t *pte, unsigned long addr,
  static int wp_clean_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end,
                               struct mm_walk *walk)
  {
-       pmd_t pmdval = pmd_read_atomic(pmd);
+       pmd_t pmdval = pmdp_get_lockless(pmd);
  
         if (!pmd_trans_unstable(&pmdval))
                 return 0;
diff --git a/mm/mprotect.c b/mm/mprotect.c

index 093cb50..908df12 100644 (file)
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -297,7 +297,7 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
   */
  static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
  {
-       pmd_t pmdval = pmd_read_atomic(pmd);
+       pmd_t pmdval = pmdp_get_lockless(pmd);
  
         /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c

index b7a9479..0499907 100644 (file)
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -632,7 +632,7 @@ retry:
                         break;
                 }
  
-               dst_pmdval = pmd_read_atomic(dst_pmd);
+               dst_pmdval = pmdp_get_lockless(dst_pmd);
                 /*
                  * If the dst_pmd is mapped as THP don't
                  * override it and just be strict.
diff --git a/mm/vmscan.c b/mm/vmscan.c

index aba991c..bd6637f 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4084,10 +4084,7 @@ restart:
         /* walk_pte_range() may call get_next_vma() */
         vma = args->vma;
         for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) {
-               pmd_t val = pmd_read_atomic(pmd + i);
-
-               /* for pmd_read_atomic() */
-               barrier();
+               pmd_t val = pmdp_get_lockless(pmd + i);
  
                 next = pmd_addr_end(addr, end);
  
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c

index 2d434c1..1ac4467 100644 (file)
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -124,8 +124,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
         if (err < 0)
                 goto out;
  
-       set_memory_ro((long)image, 1);
-       set_memory_x((long)image, 1);
+       set_memory_rox((long)image, 1);
         prog_ret = dummy_ops_call_op(image, args);
  
         err = dummy_ops_copy_args(args);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 17 Dec 2022 20:06:53 +0000 (14:06 -0600)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 17 Dec 2022 20:06:53 +0000 (14:06 -0600)
arch/arm/mach-omap1/sram-init.c		patch \| blob \| history
arch/arm/mach-omap2/sram.c		patch \| blob \| history
arch/mips/Kconfig		patch \| blob \| history
arch/powerpc/include/asm/nohash/32/pgtable.h		patch \| blob \| history
arch/powerpc/kernel/kprobes.c		patch \| blob \| history
arch/sh/Kconfig		patch \| blob \| history
arch/sh/include/asm/pgtable-3level.h		patch \| blob \| history
arch/um/include/asm/pgtable-3level.h		patch \| blob \| history
arch/x86/Kconfig		patch \| blob \| history
arch/x86/include/asm/cmpxchg_32.h		patch \| blob \| history
arch/x86/include/asm/cmpxchg_64.h		patch \| blob \| history
arch/x86/include/asm/cpu_entry_area.h		patch \| blob \| history
arch/x86/include/asm/kasan.h		patch \| blob \| history
arch/x86/include/asm/page_types.h		patch \| blob \| history
arch/x86/include/asm/pgtable-3level.h		patch \| blob \| history
arch/x86/include/asm/pgtable-3level_types.h		patch \| blob \| history
arch/x86/include/asm/pgtable_64_types.h		patch \| blob \| history
arch/x86/include/asm/pgtable_areas.h		patch \| blob \| history
arch/x86/include/asm/pgtable_types.h		patch \| blob \| history
arch/x86/include/asm/processor-flags.h		patch \| blob \| history
arch/x86/include/asm/set_memory.h		patch \| blob \| history
arch/x86/kernel/alternative.c		patch \| blob \| history
arch/x86/kernel/amd_gart_64.c		patch \| blob \| history
arch/x86/kernel/ftrace.c		patch \| blob \| history
arch/x86/kernel/head64.c		patch \| blob \| history
arch/x86/kernel/hw_breakpoint.c		patch \| blob \| history
arch/x86/kernel/kprobes/core.c		patch \| blob \| history
arch/x86/mm/cpu_entry_area.c		patch \| blob \| history
arch/x86/mm/init.c		patch \| blob \| history
arch/x86/mm/kasan_init_64.c		patch \| blob \| history
arch/x86/mm/mem_encrypt_boot.S		patch \| blob \| history
arch/x86/mm/mem_encrypt_identity.c		patch \| blob \| history
arch/x86/mm/pat/set_memory.c		patch \| blob \| history
arch/x86/mm/pti.c		patch \| blob \| history
drivers/iommu/intel/irq_remapping.c		patch \| blob \| history
drivers/misc/sram-exec.c		patch \| blob \| history
include/linux/filter.h		patch \| blob \| history
include/linux/pgtable.h		patch \| blob \| history
include/linux/sched/task.h		patch \| blob \| history
include/linux/set_memory.h		patch \| blob \| history
init/main.c		patch \| blob \| history
kernel/bpf/bpf_struct_ops.c		patch \| blob \| history
kernel/bpf/core.c		patch \| blob \| history
kernel/bpf/trampoline.c		patch \| blob \| history
kernel/events/core.c		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
mm/Kconfig		patch \| blob \| history
mm/gup.c		patch \| blob \| history
mm/hmm.c		patch \| blob \| history
mm/khugepaged.c		patch \| blob \| history
mm/mapping_dirty_helpers.c		patch \| blob \| history
mm/mprotect.c		patch \| blob \| history
mm/userfaultfd.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history
net/bpf/bpf_dummy_struct_ops.c		patch \| blob \| history