powerpc/64s: Fix entry flush patching w/strict RWX & hash
authorMichael Ellerman <mpe@ellerman.id.au>
Thu, 13 May 2021 14:07:59 +0000 (00:07 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Fri, 14 May 2021 07:27:36 +0000 (17:27 +1000)
The entry flush mitigation can be enabled/disabled at runtime. When this
happens it results in the kernel patching its own instructions to
enable/disable the mitigation sequence.

With strict kernel RWX enabled instruction patching happens via a
secondary mapping of the kernel text, so that we don't have to make the
primary mapping writable. With the hash MMU this leads to a hash fault,
which causes us to execute the exception entry which contains the entry
flush mitigation.

This means we end up executing the entry flush in a semi-patched state,
ie. after we have patched the first instruction but before we patch the
second or third instruction of the sequence.

On machines with updated firmware the entry flush is a series of special
nops, and it's safe to to execute in a semi-patched state.

However when using the fallback flush the sequence is mflr/branch/mtlr,
and so it's not safe to execute if we have patched out the mflr but not
the other two instructions. Doing so leads to us corrputing LR, leading
to an oops, for example:

  # echo 0 > /sys/kernel/debug/powerpc/entry_flush
  kernel tried to execute exec-protected page (c000000002971000) - exploit attempt? (uid: 0)
  BUG: Unable to handle kernel instruction fetch
  Faulting instruction address: 0xc000000002971000
  Oops: Kernel access of bad area, sig: 11 [#1]
  LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
  CPU: 0 PID: 2215 Comm: bash Not tainted 5.13.0-rc1-00010-gda3bb206c9ce #1
  NIP:  c000000002971000 LR: c000000002971000 CTR: c000000000120c40
  REGS: c000000013243840 TRAP: 0400   Not tainted  (5.13.0-rc1-00010-gda3bb206c9ce)
  MSR:  8000000010009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 48428482  XER: 00000000
  ...
  NIP  0xc000000002971000
  LR   0xc000000002971000
  Call Trace:
    do_patch_instruction+0xc4/0x340 (unreliable)
    do_entry_flush_fixups+0x100/0x3b0
    entry_flush_set+0x50/0xe0
    simple_attr_write+0x160/0x1a0
    full_proxy_write+0x8c/0x110
    vfs_write+0xf0/0x340
    ksys_write+0x84/0x140
    system_call_exception+0x164/0x2d0
    system_call_common+0xec/0x278

The simplest fix is to change the order in which we patch the
instructions, so that the sequence is always safe to execute. For the
non-fallback flushes it doesn't matter what order we patch in.

Fixes: bd573a81312f ("powerpc/mm/64s: Allow STRICT_KERNEL_RWX again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210513140800.1391706-1-mpe@ellerman.id.au
arch/powerpc/lib/feature-fixups.c

index 0aefa6a..5d12e37 100644 (file)
@@ -325,6 +325,31 @@ static int __do_entry_flush_fixups(void *data)
        if (types & L1D_FLUSH_MTTRIG)
                instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
 
+       /*
+        * If we're patching in or out the fallback flush we need to be careful about the
+        * order in which we patch instructions. That's because it's possible we could
+        * take a page fault after patching one instruction, so the sequence of
+        * instructions must be safe even in a half patched state.
+        *
+        * To make that work, when patching in the fallback flush we patch in this order:
+        *  - the mflr          (dest)
+        *  - the mtlr          (dest + 2)
+        *  - the branch        (dest + 1)
+        *
+        * That ensures the sequence is safe to execute at any point. In contrast if we
+        * patch the mtlr last, it's possible we could return from the branch and not
+        * restore LR, leading to a crash later.
+        *
+        * When patching out the fallback flush (either with nops or another flush type),
+        * we patch in this order:
+        *  - the branch        (dest + 1)
+        *  - the mtlr          (dest + 2)
+        *  - the mflr          (dest)
+        *
+        * Note we are protected by stop_machine() from other CPUs executing the code in a
+        * semi-patched state.
+        */
+
        start = PTRRELOC(&__start___entry_flush_fixup);
        end = PTRRELOC(&__stop___entry_flush_fixup);
        for (i = 0; start < end; start++, i++) {
@@ -332,15 +357,16 @@ static int __do_entry_flush_fixups(void *data)
 
                pr_devel("patching dest %lx\n", (unsigned long)dest);
 
-               patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
-               if (types == L1D_FLUSH_FALLBACK)
-                       patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback,
-                                    BRANCH_SET_LINK);
-               else
+               if (types == L1D_FLUSH_FALLBACK) {
+                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_branch((struct ppc_inst *)(dest + 1),
+                                    (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
+               } else {
                        patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-
-               patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+               }
        }
 
        start = PTRRELOC(&__start___scv_entry_flush_fixup);
@@ -350,15 +376,16 @@ static int __do_entry_flush_fixups(void *data)
 
                pr_devel("patching dest %lx\n", (unsigned long)dest);
 
-               patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
-               if (types == L1D_FLUSH_FALLBACK)
-                       patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&scv_entry_flush_fallback,
-                                    BRANCH_SET_LINK);
-               else
+               if (types == L1D_FLUSH_FALLBACK) {
+                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_branch((struct ppc_inst *)(dest + 1),
+                                    (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
+               } else {
                        patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-
-               patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+               }
        }