Merge branch 'x86/pti' into x86/mm, to pick up dependencies

author Ingo Molnar <mingo@kernel.org>

Mon, 12 Mar 2018 11:10:03 +0000 (12:10 +0100)

committer Ingo Molnar <mingo@kernel.org>

Mon, 12 Mar 2018 11:10:03 +0000 (12:10 +0100)
author Ingo Molnar <mingo@kernel.org>
Mon, 12 Mar 2018 11:10:03 +0000 (12:10 +0100)
committer Ingo Molnar <mingo@kernel.org>
Mon, 12 Mar 2018 11:10:03 +0000 (12:10 +0100)
diff --combined Makefile

index 659a778,3dfce4d..fb94072
--- 1/Makefile
--- 2/Makefile
+++ b/Makefile
@@@ -2,7 -2,7 +2,7 @@@
   VERSION = 4
   PATCHLEVEL = 16
   SUBLEVEL = 0
- -EXTRAVERSION = -rc1
+ +EXTRAVERSION = -rc3
   NAME = Fearless Coyote
   
   # *DOCUMENTATION*
@@@ -489,6 -489,11 +489,11 @@@ KBUILD_CFLAGS += $(CLANG_TARGET) $(CLAN
   KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
   endif
   
+ RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
+ RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
+ RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
+ export RETPOLINE_CFLAGS
+ 
   ifeq ($(config-targets),1)
   # ===========================================================================
   # *config targets only - make sure prerequisites are updated, and descend
diff --combined arch/x86/Kconfig

index 552b3d0,09c599e..18233e4
--- 1/arch/x86/Kconfig
--- 2/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@@ -423,6 -423,12 +423,6 @@@ config X86_MPPARS
           For old smp systems that do not have proper acpi support. Newer systems
           (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
   
- -config X86_BIGSMP
- -      bool "Support for big SMP systems with more than 8 CPUs"
- -      depends on X86_32 && SMP
- -      ---help---
- -        This option is needed for the systems that have more than 8 CPUs
- -
   config GOLDFISH
          def_bool y
          depends on X86_GOLDFISH
@@@ -430,6 -436,7 +430,7 @@@
   config RETPOLINE
         bool "Avoid speculative indirect branches in kernel"
         default y
+       select STACK_VALIDATION if HAVE_STACK_VALIDATION
         help
           Compile kernel with the retpoline compiler options to guard against
           kernel-to-user data leaks by avoiding speculative indirect
@@@ -454,12 -461,6 +455,12 @@@ config INTEL_RD
           Say N if unsure.
   
   if X86_32
+ +config X86_BIGSMP
+ +      bool "Support for big SMP systems with more than 8 CPUs"
+ +      depends on SMP
+ +      ---help---
+ +        This option is needed for the systems that have more than 8 CPUs
+ +
   config X86_EXTENDED_PLATFORM
         bool "Support for extended (non-PC) x86 platforms"
         default y
@@@ -949,66 -950,25 +950,66 @@@ config MAXSM
           Enable maximum number of CPUS and NUMA Nodes for this architecture.
           If unsure, say N.
   
+ +#
+ +# The maximum number of CPUs supported:
+ +#
+ +# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+ +# and which can be configured interactively in the
+ +# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+ +#
+ +# The ranges are different on 32-bit and 64-bit kernels, depending on
+ +# hardware capabilities and scalability features of the kernel.
+ +#
+ +# ( If MAXSMP is enabled we just use the highest possible value and disable
+ +#   interactive configuration. )
+ +#
+ +
+ +config NR_CPUS_RANGE_BEGIN
+ +      int
+ +      default NR_CPUS_RANGE_END if MAXSMP
+ +      default    1 if !SMP
+ +      default    2
+ +
+ +config NR_CPUS_RANGE_END
+ +      int
+ +      depends on X86_32
+ +      default   64 if  SMP &&  X86_BIGSMP
+ +      default    8 if  SMP && !X86_BIGSMP
+ +      default    1 if !SMP
+ +
+ +config NR_CPUS_RANGE_END
+ +      int
+ +      depends on X86_64
+ +      default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
+ +      default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+ +      default    1 if !SMP
+ +
+ +config NR_CPUS_DEFAULT
+ +      int
+ +      depends on X86_32
+ +      default   32 if  X86_BIGSMP
+ +      default    8 if  SMP
+ +      default    1 if !SMP
+ +
+ +config NR_CPUS_DEFAULT
+ +      int
+ +      depends on X86_64
+ +      default 8192 if  MAXSMP
+ +      default   64 if  SMP
+ +      default    1 if !SMP
+ +
   config NR_CPUS
         int "Maximum number of CPUs" if SMP && !MAXSMP
- -      range 2 8 if SMP && X86_32 && !X86_BIGSMP
- -      range 2 64 if SMP && X86_32 && X86_BIGSMP
- -      range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
- -      range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
- -      default "1" if !SMP
- -      default "8192" if MAXSMP
- -      default "32" if SMP && X86_BIGSMP
- -      default "8" if SMP && X86_32
- -      default "64" if SMP
+ +      range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+ +      default NR_CPUS_DEFAULT
         ---help---
           This allows you to specify the maximum number of CPUs which this
           kernel will support.  If CPUMASK_OFFSTACK is enabled, the maximum
           supported value is 8192, otherwise the maximum value is 512.  The
           minimum value which makes sense is 2.
   
- -        This is purely to save memory - each supported CPU adds
- -        approximately eight kilobytes to the kernel image.
+ +        This is purely to save memory: each supported CPU adds about 8KB
+ +        to the kernel image.
   
   config SCHED_SMT
         bool "SMT (Hyperthreading) scheduler support"
@@@ -1404,7 -1364,7 +1405,7 @@@ config HIGHMEM4
   
   config HIGHMEM64G
         bool "64GB"
- -      depends on !M486
+ +      depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
         select X86_PAE
         ---help---
           Select this if you have a 32-bit processor and more than 4
@@@ -1471,8 -1431,6 +1472,8 @@@ config X86_PA
   
   config X86_5LEVEL
         bool "Enable 5-level page tables support"
+ +      select DYNAMIC_MEMORY_LAYOUT
+ +      select SPARSEMEM_VMEMMAP
         depends on X86_64
         ---help---
           5-level paging enables access to larger address space:
@@@ -1481,8 -1439,8 +1482,8 @@@
   
           It will be supported by future Intel CPUs.
   
- -        Note: a kernel with this option enabled can only be booted
- -        on machines that support the feature.
+ +        A kernel with the option enabled can be booted on machines that
+ +        support 4- or 5-level paging.
   
           See Documentation/x86/x86_64/5level-paging.txt for more
           information.
@@@ -2186,17 -2144,10 +2187,17 @@@ config PHYSICAL_ALIG
   
           Don't change this unless you know what you are doing.
   
+ +config DYNAMIC_MEMORY_LAYOUT
+ +      bool
+ +      ---help---
+ +        This option makes base addresses of vmalloc and vmemmap as well as
+ +        __PAGE_OFFSET movable during boot.
+ +
   config RANDOMIZE_MEMORY
         bool "Randomize the kernel memory sections"
         depends on X86_64
         depends on RANDOMIZE_BASE
+ +      select DYNAMIC_MEMORY_LAYOUT
         default RANDOMIZE_BASE
         ---help---
            Randomizes the base virtual address of kernel memory sections
@@@ -2315,7 -2266,7 +2316,7 @@@ choic
           it can be used to assist security vulnerability exploitation.
   
           This setting can be changed at boot time via the kernel command
-         line parameter vsyscall=[native|emulate|none].
+         line parameter vsyscall=[emulate|none].
   
           On a system with recent enough glibc (2.14 or newer) and no
           static binaries, you can say None without a performance penalty
@@@ -2323,15 -2274,6 +2324,6 @@@
   
           If unsure, select "Emulate".
   
-       config LEGACY_VSYSCALL_NATIVE
-               bool "Native"
-               help
-                 Actual executable code is located in the fixed vsyscall
-                 address mapping, implementing time() efficiently. Since
-                 this makes the mapping executable, it can be used during
-                 security vulnerability exploitation (traditionally as
-                 ROP gadgets). This configuration is not recommended.
- 
         config LEGACY_VSYSCALL_EMULATE
                 bool "Emulate"
                 help
diff --combined arch/x86/entry/entry_64.S

index c9e55b8,d5c7f18..8a78030
--- 1/arch/x86/entry/entry_64.S
--- 2/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@@ -260,13 -260,8 +260,13 @@@ GLOBAL(entry_SYSCALL_64_after_hwframe
          * Change top bits to match most significant bit (47th or 56th bit
          * depending on paging mode) in the address.
          */
+ +#ifdef CONFIG_X86_5LEVEL
+ +      ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
+ +              "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
+ +#else
         shl     $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
         sar     $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
+ +#endif
   
         /* If this changed %rcx, it was not canonical */
         cmpq    %rcx, %r11
@@@ -369,8 -364,7 +369,7 @@@ ENTRY(__switch_to_asm
          * exist, overwrite the RSB with entries which capture
          * speculative execution to prevent attack.
          */
-       /* Clobbers %rbx */
-       FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+       FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
   #endif
   
         /* restore callee-saved registers */
@@@ -454,9 -448,19 +453,19 @@@ END(irq_entries_start
    *
    * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
    */
- .macro ENTER_IRQ_STACK regs=1 old_rsp
+ .macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
         DEBUG_ENTRY_ASSERT_IRQS_OFF
+ 
+       .if \save_ret
+       /*
+        * If save_ret is set, the original stack contains one additional
+        * entry -- the return address. Therefore, move the address one
+        * entry below %rsp to \old_rsp.
+        */
+       leaq    8(%rsp), \old_rsp
+       .else
         movq    %rsp, \old_rsp
+       .endif
   
         .if \regs
         UNWIND_HINT_REGS base=\old_rsp
@@@ -502,6 -506,15 +511,15 @@@
         .if \regs
         UNWIND_HINT_REGS indirect=1
         .endif
+ 
+       .if \save_ret
+       /*
+        * Push the return address to the stack. This return address can
+        * be found at the "real" original RSP, which was offset by 8 at
+        * the beginning of this macro.
+        */
+       pushq   -8(\old_rsp)
+       .endif
   .endm
   
   /*
@@@ -525,27 -538,65 +543,65 @@@
   .endm
   
   /*
-  * Interrupt entry/exit.
-  *
-  * Interrupt entry points save only callee clobbered registers in fast path.
+  * Interrupt entry helper function.
    *
-  * Entry runs with interrupts off.
+  * Entry runs with interrupts off. Stack layout at entry:
+  * +----------------------------------------------------+
+  * | regs->ss                                         |
+  * | regs->rsp                                                |
+  * | regs->eflags                                     |
+  * | regs->cs                                         |
+  * | regs->ip                                         |
+  * +----------------------------------------------------+
+  * | regs->orig_ax = ~(interrupt number)              |
+  * +----------------------------------------------------+
+  * | return address                                   |
+  * +----------------------------------------------------+
    */
- 
- /* 0(%rsp): ~(interrupt number) */
-       .macro interrupt func
+ ENTRY(interrupt_entry)
+       UNWIND_HINT_FUNC
+       ASM_CLAC
         cld
   
-       testb   $3, CS-ORIG_RAX(%rsp)
+       testb   $3, CS-ORIG_RAX+8(%rsp)
         jz      1f
         SWAPGS
-       call    switch_to_thread_stack
+ 
+       /*
+        * Switch to the thread stack. The IRET frame and orig_ax are
+        * on the stack, as well as the return address. RDI..R12 are
+        * not (yet) on the stack and space has not (yet) been
+        * allocated for them.
+        */
+       pushq   %rdi
+ 
+       /* Need to switch before accessing the thread stack. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ 
+        /*
+         * We have RDI, return address, and orig_ax on the stack on
+         * top of the IRET frame. That means offset=24
+         */
+       UNWIND_HINT_IRET_REGS base=%rdi offset=24
+ 
+       pushq   7*8(%rdi)               /* regs->ss */
+       pushq   6*8(%rdi)               /* regs->rsp */
+       pushq   5*8(%rdi)               /* regs->eflags */
+       pushq   4*8(%rdi)               /* regs->cs */
+       pushq   3*8(%rdi)               /* regs->ip */
+       pushq   2*8(%rdi)               /* regs->orig_ax */
+       pushq   8(%rdi)                 /* return address */
+       UNWIND_HINT_FUNC
+ 
+       movq    (%rdi), %rdi
   1:
   
-       PUSH_AND_CLEAR_REGS
-       ENCODE_FRAME_POINTER
+       PUSH_AND_CLEAR_REGS save_ret=1
+       ENCODE_FRAME_POINTER 8
   
-       testb   $3, CS(%rsp)
+       testb   $3, CS+8(%rsp)
         jz      1f
   
         /*
@@@ -553,7 -604,7 +609,7 @@@
          *
          * We need to tell lockdep that IRQs are off.  We can't do this until
          * we fix gsbase, and we should do it before enter_from_user_mode
-        * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
+        * (which can take locks).  Since TRACE_IRQS_OFF is idempotent,
          * the simplest way to handle it is to just call it twice if
          * we enter from user mode.  There's no reason to optimize this since
          * TRACE_IRQS_OFF is a no-op if lockdep is off.
@@@ -563,12 -614,15 +619,15 @@@
         CALL_enter_from_user_mode
   
   1:
-       ENTER_IRQ_STACK old_rsp=%rdi
+       ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
         /* We entered an interrupt context - irqs are off: */
         TRACE_IRQS_OFF
   
-       call    \func   /* rdi points to pt_regs */
-       .endm
+       ret
+ END(interrupt_entry)
+ 
+ 
+ /* Interrupt entry/exit. */
   
         /*
          * The interrupt stubs push (~vector+0x80) onto the stack and
@@@ -576,9 -630,10 +635,10 @@@
          */
         .p2align CONFIG_X86_L1_CACHE_SHIFT
   common_interrupt:
-       ASM_CLAC
         addq    $-0x80, (%rsp)                  /* Adjust vector to [-256, -1] range */
-       interrupt do_IRQ
+       call    interrupt_entry
+       UNWIND_HINT_REGS indirect=1
+       call    do_IRQ  /* rdi points to pt_regs */
         /* 0(%rsp): old RSP */
   ret_from_intr:
         DISABLE_INTERRUPTS(CLBR_ANY)
@@@ -771,10 -826,11 +831,11 @@@ END(common_interrupt
   .macro apicinterrupt3 num sym do_sym
   ENTRY(\sym)
         UNWIND_HINT_IRET_REGS
-       ASM_CLAC
         pushq   $~(\num)
   .Lcommon_\sym:
-       interrupt \do_sym
+       call    interrupt_entry
+       UNWIND_HINT_REGS indirect=1
+       call    \do_sym /* rdi points to pt_regs */
         jmp     ret_from_intr
   END(\sym)
   .endm
@@@ -837,34 -893,6 +898,6 @@@ apicinterrupt IRQ_WORK_VECTOR                     irq_wor
    */
   #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
   
- /*
-  * Switch to the thread stack.  This is called with the IRET frame and
-  * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
-  * space has not been allocated for them.)
-  */
- ENTRY(switch_to_thread_stack)
-       UNWIND_HINT_FUNC
- 
-       pushq   %rdi
-       /* Need to switch before accessing the thread stack. */
-       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
-       movq    %rsp, %rdi
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-       UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
- 
-       pushq   7*8(%rdi)               /* regs->ss */
-       pushq   6*8(%rdi)               /* regs->rsp */
-       pushq   5*8(%rdi)               /* regs->eflags */
-       pushq   4*8(%rdi)               /* regs->cs */
-       pushq   3*8(%rdi)               /* regs->ip */
-       pushq   2*8(%rdi)               /* regs->orig_ax */
-       pushq   8(%rdi)                 /* return address */
-       UNWIND_HINT_FUNC
- 
-       movq    (%rdi), %rdi
-       ret
- END(switch_to_thread_stack)
- 
   .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
   ENTRY(\sym)
         UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@@ -880,12 -908,8 +913,8 @@@
         pushq   $-1                             /* ORIG_RAX: no syscall to restart */
         .endif
   
-       /* Save all registers in pt_regs */
-       PUSH_AND_CLEAR_REGS
-       ENCODE_FRAME_POINTER
- 
         .if \paranoid < 2
-       testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
+       testb   $3, CS-ORIG_RAX(%rsp)           /* If coming from userspace, switch stacks */
         jnz     .Lfrom_usermode_switch_stack_\@
         .endif
   
@@@ -1135,13 -1159,15 +1164,15 @@@ idtentry machine_check               do_mce                  has_er
   #endif
   
   /*
-  * Switch gs if needed.
+  * Save all registers in pt_regs, and switch gs if needed.
    * Use slow, but surefire "are we in kernel?" check.
    * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
    */
   ENTRY(paranoid_entry)
         UNWIND_HINT_FUNC
         cld
+       PUSH_AND_CLEAR_REGS save_ret=1
+       ENCODE_FRAME_POINTER 8
         movl    $1, %ebx
         movl    $MSR_GS_BASE, %ecx
         rdmsr
@@@ -1186,12 -1212,14 +1217,14 @@@ ENTRY(paranoid_exit
   END(paranoid_exit)
   
   /*
-  * Switch gs if needed.
+  * Save all registers in pt_regs, and switch GS if needed.
    * Return: EBX=0: came from user mode; EBX=1: otherwise
    */
   ENTRY(error_entry)
-       UNWIND_HINT_REGS offset=8
+       UNWIND_HINT_FUNC
         cld
+       PUSH_AND_CLEAR_REGS save_ret=1
+       ENCODE_FRAME_POINTER 8
         testb   $3, CS+8(%rsp)
         jz      .Lerror_kernelspace
   
@@@ -1582,8 -1610,6 +1615,6 @@@ end_repeat_nmi
          * frame to point back to repeat_nmi.
          */
         pushq   $-1                             /* ORIG_RAX: no syscall to restart */
-       PUSH_AND_CLEAR_REGS
-       ENCODE_FRAME_POINTER
   
         /*
          * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
diff --combined arch/x86/include/asm/nospec-branch.h

index 81a1be3,b7063cf..d0dabea
--- 1/arch/x86/include/asm/nospec-branch.h
--- 2/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@@ -8,6 -8,50 +8,50 @@@
   #include <asm/cpufeatures.h>
   #include <asm/msr-index.h>
   
+ /*
+  * Fill the CPU return stack buffer.
+  *
+  * Each entry in the RSB, if used for a speculative 'ret', contains an
+  * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+  *
+  * This is required in various cases for retpoline and IBRS-based
+  * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+  * eliminate potentially bogus entries from the RSB, and sometimes
+  * purely to ensure that it doesn't get empty, which on some CPUs would
+  * allow predictions from other (unwanted!) sources to be used.
+  *
+  * We define a CPP macro such that it can be used from both .S files and
+  * inline assembly. It's possible to do a .macro and then include that
+  * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+  */
+ 
+ #define RSB_CLEAR_LOOPS               32      /* To forcibly overwrite all entries */
+ #define RSB_FILL_LOOPS                16      /* To avoid underflow */
+ 
+ /*
+  * Google experimented with loop-unrolling and this turned out to be
+  * the optimal version — two calls, each with their own speculation
+  * trap should their return address end up getting used, in a loop.
+  */
+ #define __FILL_RETURN_BUFFER(reg, nr, sp)     \
+       mov     $(nr/2), reg;                   \
+ 771:                                          \
+       call    772f;                           \
+ 773:  /* speculation trap */                  \
+       pause;                                  \
+       lfence;                                 \
+       jmp     773b;                           \
+ 772:                                          \
+       call    774f;                           \
+ 775:  /* speculation trap */                  \
+       pause;                                  \
+       lfence;                                 \
+       jmp     775b;                           \
+ 774:                                          \
+       dec     reg;                            \
+       jnz     771b;                           \
+       add     $(BITS_PER_LONG/8) * nr, sp;
+ 
   #ifdef __ASSEMBLY__
   
   /*
@@@ -23,6 -67,18 +67,18 @@@
         .popsection
   .endm
   
+ /*
+  * This should be used immediately before an indirect jump/call. It tells
+  * objtool the subsequent indirect jump/call is vouched safe for retpoline
+  * builds.
+  */
+ .macro ANNOTATE_RETPOLINE_SAFE
+       .Lannotate_\@:
+       .pushsection .discard.retpoline_safe
+       _ASM_PTR .Lannotate_\@
+       .popsection
+ .endm
+ 
   /*
    * These are the bare retpoline primitives for indirect jmp and call.
    * Do not use these directly; they only exist to make the ALTERNATIVE
@@@ -59,9 -115,9 +115,9 @@@
   .macro JMP_NOSPEC reg:req
   #ifdef CONFIG_RETPOLINE
         ANNOTATE_NOSPEC_ALTERNATIVE
-       ALTERNATIVE_2 __stringify(jmp *\reg),                           \
+       ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg),  \
                 __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
-               __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+               __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
   #else
         jmp     *\reg
   #endif
@@@ -70,18 -126,25 +126,25 @@@
   .macro CALL_NOSPEC reg:req
   #ifdef CONFIG_RETPOLINE
         ANNOTATE_NOSPEC_ALTERNATIVE
-       ALTERNATIVE_2 __stringify(call *\reg),                          \
+       ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
                 __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
-               __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+               __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
   #else
         call    *\reg
   #endif
   .endm
   
- /* This clobbers the BX register */
- .macro FILL_RETURN_BUFFER nr:req ftr:req
+  /*
+   * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+   * monstrosity above, manually.
+   */
+ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
   #ifdef CONFIG_RETPOLINE
-       ALTERNATIVE "", "call __clear_rsb", \ftr
+       ANNOTATE_NOSPEC_ALTERNATIVE
+       ALTERNATIVE "jmp .Lskip_rsb_\@",                                \
+               __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))    \
+               \ftr
+ .Lskip_rsb_\@:
   #endif
   .endm
   
@@@ -93,6 -156,12 +156,12 @@@
         ".long 999b - .\n\t"                                    \
         ".popsection\n\t"
   
+ #define ANNOTATE_RETPOLINE_SAFE                                       \
+       "999:\n\t"                                              \
+       ".pushsection .discard.retpoline_safe\n\t"              \
+       _ASM_PTR " 999b\n\t"                                    \
+       ".popsection\n\t"
+ 
   #if defined(CONFIG_X86_64) && defined(RETPOLINE)
   
   /*
@@@ -102,6 -171,7 +171,7 @@@
   # define CALL_NOSPEC                                          \
         ANNOTATE_NOSPEC_ALTERNATIVE                             \
         ALTERNATIVE(                                            \
+       ANNOTATE_RETPOLINE_SAFE                                 \
         "call *%[thunk_target]\n",                              \
         "call __x86_indirect_thunk_%V[thunk_target]\n",         \
         X86_FEATURE_RETPOLINE)
@@@ -156,62 -226,53 +226,90 @@@ extern char __indirect_thunk_end[]
   static inline void vmexit_fill_RSB(void)
   {
   #ifdef CONFIG_RETPOLINE
-       alternative_input("",
-                         "call __fill_rsb",
-                         X86_FEATURE_RETPOLINE,
-                         ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+       unsigned long loops;
+ 
+       asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+                     ALTERNATIVE("jmp 910f",
+                                 __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+                                 X86_FEATURE_RETPOLINE)
+                     "910:"
+                     : "=r" (loops), ASM_CALL_CONSTRAINT
+                     : : "memory" );
   #endif
   }
   
+ #define alternative_msr_write(_msr, _val, _feature)           \
+       asm volatile(ALTERNATIVE("",                            \
+                                "movl %[msr], %%ecx\n\t"       \
+                                "movl %[val], %%eax\n\t"       \
+                                "movl $0, %%edx\n\t"           \
+                                "wrmsr",                       \
+                                _feature)                      \
+                    : : [msr] "i" (_msr), [val] "i" (_val)     \
+                    : "eax", "ecx", "edx", "memory")
+ 
   static inline void indirect_branch_prediction_barrier(void)
   {
-       asm volatile(ALTERNATIVE("",
-                                "movl %[msr], %%ecx\n\t"
-                                "movl %[val], %%eax\n\t"
-                                "movl $0, %%edx\n\t"
-                                "wrmsr",
-                                X86_FEATURE_USE_IBPB)
-                    : : [msr] "i" (MSR_IA32_PRED_CMD),
-                        [val] "i" (PRED_CMD_IBPB)
-                    : "eax", "ecx", "edx", "memory");
+       alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
+                             X86_FEATURE_USE_IBPB);
   }
   
+ /*
+  * With retpoline, we must use IBRS to restrict branch prediction
+  * before calling into firmware.
+  *
+  * (Implemented as CPP macros due to header hell.)
+  */
+ #define firmware_restrict_branch_speculation_start()                  \
+ do {                                                                  \
+       preempt_disable();                                              \
+       alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,       \
+                             X86_FEATURE_USE_IBRS_FW);                 \
+ } while (0)
+ 
+ #define firmware_restrict_branch_speculation_end()                    \
+ do {                                                                  \
+       alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,                    \
+                             X86_FEATURE_USE_IBRS_FW);                 \
+       preempt_enable();                                               \
+ } while (0)
+ 
   #endif /* __ASSEMBLY__ */
+ +
+ +/*
+ + * Below is used in the eBPF JIT compiler and emits the byte sequence
+ + * for the following assembly:
+ + *
+ + * With retpolines configured:
+ + *
+ + *    callq do_rop
+ + *  spec_trap:
+ + *    pause
+ + *    lfence
+ + *    jmp spec_trap
+ + *  do_rop:
+ + *    mov %rax,(%rsp)
+ + *    retq
+ + *
+ + * Without retpolines configured:
+ + *
+ + *    jmp *%rax
+ + */
+ +#ifdef CONFIG_RETPOLINE
+ +# define RETPOLINE_RAX_BPF_JIT_SIZE   17
+ +# define RETPOLINE_RAX_BPF_JIT()                              \
+ +      EMIT1_off32(0xE8, 7);    /* callq do_rop */             \
+ +      /* spec_trap: */                                        \
+ +      EMIT2(0xF3, 0x90);       /* pause */                    \
+ +      EMIT3(0x0F, 0xAE, 0xE8); /* lfence */                   \
+ +      EMIT2(0xEB, 0xF9);       /* jmp spec_trap */            \
+ +      /* do_rop: */                                           \
+ +      EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */    \
+ +      EMIT1(0xC3);             /* retq */
+ +#else
+ +# define RETPOLINE_RAX_BPF_JIT_SIZE   2
+ +# define RETPOLINE_RAX_BPF_JIT()                              \
+ +      EMIT2(0xFF, 0xE0);       /* jmp *%rax */
+ +#endif
+ +
   #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --combined arch/x86/include/asm/paravirt.h

index 6d3b921,c83a2f4..9be2bf1
--- 1/arch/x86/include/asm/paravirt.h
--- 2/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@@ -7,6 -7,7 +7,7 @@@
   #ifdef CONFIG_PARAVIRT
   #include <asm/pgtable_types.h>
   #include <asm/asm.h>
+ #include <asm/nospec-branch.h>
   
   #include <asm/paravirt_types.h>
   
@@@ -567,22 -568,17 +568,22 @@@ static inline p4dval_t p4d_val(p4d_t p4
         return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d);
   }
   
- -static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+ +static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
   {
- -      pgdval_t val = native_pgd_val(pgd);
- -
- -      PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val);
+ +      PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd));
   }
   
- -static inline void pgd_clear(pgd_t *pgdp)
- -{
- -      set_pgd(pgdp, __pgd(0));
- -}
+ +#define set_pgd(pgdp, pgdval) do {                                    \
+ +      if (pgtable_l5_enabled)                                         \
+ +              __set_pgd(pgdp, pgdval);                                \
+ +      else                                                            \
+ +              set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd });     \
+ +} while (0)
+ +
+ +#define pgd_clear(pgdp) do {                                          \
+ +      if (pgtable_l5_enabled)                                         \
+ +              set_pgd(pgdp, __pgd(0));                                \
+ +} while (0)
   
   #endif  /* CONFIG_PGTABLE_LEVELS == 5 */
   
@@@ -884,23 -880,27 +885,27 @@@ extern void default_banner(void)
   
   #define INTERRUPT_RETURN                                              \
         PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,       \
-                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
+                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
   
   #define DISABLE_INTERRUPTS(clobbers)                                  \
         PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
                   PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);            \
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
                   call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);    \
                   PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
   
   #define ENABLE_INTERRUPTS(clobbers)                                   \
         PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,  \
                   PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);            \
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
                   call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);     \
                   PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
   
   #ifdef CONFIG_X86_32
   #define GET_CR0_INTO_EAX                              \
         push %ecx; push %edx;                           \
+       ANNOTATE_RETPOLINE_SAFE;                                \
         call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
         pop %edx; pop %ecx
   #else /* !CONFIG_X86_32 */
@@@ -922,21 -922,25 +927,25 @@@
    */
   #define SWAPGS                                                                \
         PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,     \
-                 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)          \
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
+                 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);         \
                  )
   
   #define GET_CR2_INTO_RAX                              \
-       call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
+       ANNOTATE_RETPOLINE_SAFE;                                \
+       call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);
   
   #define USERGS_SYSRET64                                                       \
         PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
                   CLBR_NONE,                                            \
-                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
+                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
   
   #ifdef CONFIG_DEBUG_ENTRY
   #define SAVE_FLAGS(clobbers)                                        \
         PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
                   PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
+                 ANNOTATE_RETPOLINE_SAFE;                                  \
                   call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
                   PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
   #endif
diff --combined arch/x86/include/asm/pgtable.h

index c8baa7f,b444d83..89d5c88
--- 1/arch/x86/include/asm/pgtable.h
--- 2/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@@ -65,7 -65,7 +65,7 @@@ extern pmdval_t early_pmd_flags
   
   #ifndef __PAGETABLE_P4D_FOLDED
   #define set_pgd(pgdp, pgd)            native_set_pgd(pgdp, pgd)
- -#define pgd_clear(pgd)                        native_pgd_clear(pgd)
+ +#define pgd_clear(pgd)                        (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0)
   #endif
   
   #ifndef set_p4d
@@@ -350,14 -350,14 +350,14 @@@ static inline pmd_t pmd_set_flags(pmd_
   {
         pmdval_t v = native_pmd_val(pmd);
   
-       return __pmd(v | set);
+       return native_make_pmd(v | set);
   }
   
   static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
   {
         pmdval_t v = native_pmd_val(pmd);
   
-       return __pmd(v & ~clear);
+       return native_make_pmd(v & ~clear);
   }
   
   static inline pmd_t pmd_mkold(pmd_t pmd)
@@@ -409,14 -409,14 +409,14 @@@ static inline pud_t pud_set_flags(pud_
   {
         pudval_t v = native_pud_val(pud);
   
-       return __pud(v | set);
+       return native_make_pud(v | set);
   }
   
   static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
   {
         pudval_t v = native_pud_val(pud);
   
-       return __pud(v & ~clear);
+       return native_make_pud(v & ~clear);
   }
   
   static inline pud_t pud_mkold(pud_t pud)
@@@ -859,8 -859,6 +859,8 @@@ static inline unsigned long p4d_index(u
   #if CONFIG_PGTABLE_LEVELS > 4
   static inline int pgd_present(pgd_t pgd)
   {
+ +      if (!pgtable_l5_enabled)
+ +              return 1;
         return pgd_flags(pgd) & _PAGE_PRESENT;
   }
   
@@@ -878,8 -876,6 +878,8 @@@ static inline unsigned long pgd_page_va
   /* to find an entry in a page-table-directory. */
   static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
   {
+ +      if (!pgtable_l5_enabled)
+ +              return (p4d_t *)pgd;
         return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
   }
   
@@@ -887,9 -883,6 +887,9 @@@ static inline int pgd_bad(pgd_t pgd
   {
         unsigned long ignore_flags = _PAGE_USER;
   
+ +      if (!pgtable_l5_enabled)
+ +              return 0;
+ +
         if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
                 ignore_flags |= _PAGE_NX;
   
@@@ -898,8 -891,6 +898,8 @@@
   
   static inline int pgd_none(pgd_t pgd)
   {
+ +      if (!pgtable_l5_enabled)
+ +              return 0;
         /*
          * There is no need to do a workaround for the KNL stray
          * A/D bit erratum here.  PGDs only point to page tables
diff --combined arch/x86/include/asm/pgtable_32.h

index b838c51,b3ec519..88a056b
--- 1/arch/x86/include/asm/pgtable_32.h
--- 2/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@@ -32,9 -32,8 +32,10 @@@ extern pmd_t initial_pg_pmd[]
   static inline void pgtable_cache_init(void) { }
   static inline void check_pgt_cache(void) { }
   void paging_init(void);
+ void sync_initial_page_table(void);
   
+ +static inline int pgd_large(pgd_t pgd) { return 0; }
+ +
   /*
    * Define this if things work differently on an i386 and an i486:
    * it will (on an i486) warn about kernel memory accesses that are
diff --combined arch/x86/include/asm/pgtable_64.h

index 163e01a,1149d21..877bc27
--- 1/arch/x86/include/asm/pgtable_64.h
--- 2/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@@ -28,6 -28,7 +28,7 @@@ extern pgd_t init_top_pgt[]
   #define swapper_pg_dir init_top_pgt
   
   extern void paging_init(void);
+ static inline void sync_initial_page_table(void) { }
   
   #define pte_ERROR(e)                                  \
         pr_err("%s:%d: bad pte %p(%016lx)\n",           \
@@@ -217,26 -218,29 +218,26 @@@ static inline pgd_t pti_set_user_pgd(pg
   
   static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
   {
- -#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
- -      p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
- -#else
- -      *p4dp = p4d;
- -#endif
+ +      pgd_t pgd;
+ +
+ +      if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
+ +              *p4dp = p4d;
+ +              return;
+ +      }
+ +
+ +      pgd = native_make_pgd(native_p4d_val(p4d));
+ +      pgd = pti_set_user_pgd((pgd_t *)p4dp, pgd);
+ +      *p4dp = native_make_p4d(native_pgd_val(pgd));
   }
   
   static inline void native_p4d_clear(p4d_t *p4d)
   {
- -#ifdef CONFIG_X86_5LEVEL
         native_set_p4d(p4d, native_make_p4d(0));
- -#else
- -      native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)});
- -#endif
   }
   
   static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
   {
- -#ifdef CONFIG_PAGE_TABLE_ISOLATION
         *pgdp = pti_set_user_pgd(pgdp, pgd);
- -#else
- -      *pgdp = pgd;
- -#endif
   }
   
   static inline void native_pgd_clear(pgd_t *pgd)
diff --combined arch/x86/kernel/head_64.S

index 326c631,0f545b3..48385c1
--- 1/arch/x86/kernel/head_64.S
--- 2/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@@ -23,6 -23,7 +23,7 @@@
   #include <asm/nops.h>
   #include "../entry/calling.h"
   #include <asm/export.h>
+ #include <asm/nospec-branch.h>
   
   #ifdef CONFIG_PARAVIRT
   #include <asm/asm-offsets.h>
@@@ -38,12 -39,12 +39,12 @@@
    *
    */
   
+ +#define l4_index(x)   (((x) >> 39) & 511)
   #define pud_index(x)  (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
   
- -#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
- -PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
- -PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
- -#endif
+ +L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
+ +L4_START_KERNEL = l4_index(__START_KERNEL_map)
+ +
   L3_START_KERNEL = pud_index(__START_KERNEL_map)
   
         .text
@@@ -124,10 -125,7 +125,10 @@@ ENTRY(secondary_startup_64
         /* Enable PAE mode, PGE and LA57 */
         movl    $(X86_CR4_PAE | X86_CR4_PGE), %ecx
   #ifdef CONFIG_X86_5LEVEL
+ +      testl   $1, __pgtable_l5_enabled(%rip)
+ +      jz      1f
         orl     $X86_CR4_LA57, %ecx
+ +1:
   #endif
         movq    %rcx, %cr4
   
@@@ -137,6 -135,7 +138,7 @@@
   
         /* Ensure I am executing from virtual addresses */
         movq    $1f, %rax
+       ANNOTATE_RETPOLINE_SAFE
         jmp     *%rax
   1:
         UNWIND_HINT_EMPTY
@@@ -375,7 -374,12 +377,7 @@@ GLOBAL(name
   
         __INITDATA
   NEXT_PGD_PAGE(early_top_pgt)
- -      .fill   511,8,0
- -#ifdef CONFIG_X86_5LEVEL
- -      .quad   level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
- -#else
- -      .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
- -#endif
+ +      .fill   512,8,0
         .fill   PTI_USER_PGD_FILL,8,0
   
   NEXT_PAGE(early_dynamic_pgts)
@@@ -386,9 -390,9 +388,9 @@@
   #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
   NEXT_PGD_PAGE(init_top_pgt)
         .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
- -      .org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
+ +      .org    init_top_pgt + L4_PAGE_OFFSET*8, 0
         .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
- -      .org    init_top_pgt + PGD_START_KERNEL*8, 0
+ +      .org    init_top_pgt + L4_START_KERNEL*8, 0
         /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
         .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
         .fill   PTI_USER_PGD_FILL,8,0
diff --combined arch/x86/kernel/setup.c

index 399d0f7,4c616be..6285697
--- 1/arch/x86/kernel/setup.c
--- 2/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@@ -189,7 -189,9 +189,7 @@@ struct ist_info ist_info
   #endif
   
   #else
- -struct cpuinfo_x86 boot_cpu_data __read_mostly = {
- -      .x86_phys_bits = MAX_PHYSMEM_BITS,
- -};
+ +struct cpuinfo_x86 boot_cpu_data __read_mostly;
   EXPORT_SYMBOL(boot_cpu_data);
   #endif
   
@@@ -849,7 -851,6 +849,7 @@@ void __init setup_arch(char **cmdline_p
         __flush_tlb_all();
   #else
         printk(KERN_INFO "Command line: %s\n", boot_command_line);
+ +      boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS;
   #endif
   
         /*
@@@ -1203,20 -1204,13 +1203,13 @@@
   
         kasan_init();
   
- #ifdef CONFIG_X86_32
-       /* sync back kernel address range */
-       clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
-                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-                       KERNEL_PGD_PTRS);
- 
         /*
-        * sync back low identity map too.  It is used for example
-        * in the 32-bit EFI stub.
+        * Sync back kernel address range.
+        *
+        * FIXME: Can the later sync in setup_cpu_entry_areas() replace
+        * this call?
          */
-       clone_pgd_range(initial_page_table,
-                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-                       min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
- #endif
+       sync_initial_page_table();
   
         tboot_probe();
   
diff --combined arch/x86/mm/fault.c

index 321b780,c88573d..e6af2b4
--- 1/arch/x86/mm/fault.c
--- 2/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@@ -439,7 -439,7 +439,7 @@@ static noinline int vmalloc_fault(unsig
         if (pgd_none(*pgd_ref))
                 return -1;
   
- -      if (CONFIG_PGTABLE_LEVELS > 4) {
+ +      if (pgtable_l5_enabled) {
                 if (pgd_none(*pgd)) {
                         set_pgd(pgd, *pgd_ref);
                         arch_flush_lazy_mmu_mode();
@@@ -454,7 -454,7 +454,7 @@@
         if (p4d_none(*p4d_ref))
                 return -1;
   
- -      if (p4d_none(*p4d) && CONFIG_PGTABLE_LEVELS == 4) {
+ +      if (p4d_none(*p4d) && !pgtable_l5_enabled) {
                 set_p4d(p4d, *p4d_ref);
                 arch_flush_lazy_mmu_mode();
         } else {
@@@ -1248,10 -1248,6 +1248,6 @@@ __do_page_fault(struct pt_regs *regs, u
         tsk = current;
         mm = tsk->mm;
   
-       /*
-        * Detect and handle instructions that would cause a page fault for
-        * both a tracked kernel page and a userspace page.
-        */
         prefetchw(&mm->mmap_sem);
   
         if (unlikely(kmmio_fault(regs, address)))
diff --combined include/linux/compiler-gcc.h

index 901c1cc,673fbf9..e2c7f43
--- 1/include/linux/compiler-gcc.h
--- 2/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@@ -93,6 -93,10 +93,10 @@@
   #define __weak                __attribute__((weak))
   #define __alias(symbol)       __attribute__((alias(#symbol)))
   
+ #ifdef RETPOLINE
+ #define __noretpoline __attribute__((indirect_branch("keep")))
+ #endif
+ 
   /*
    * it doesn't make sense on ARM (currently the only user of __naked)
    * to trace naked functions because then mcount is called without
@@@ -207,15 -211,6 +211,15 @@@
   #endif
   #endif
   
+ +/*
+ + * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
+ + * confuse the stack allocation in gcc, leading to overly large stack
+ + * frames, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365
+ + *
+ + * Adding an empty inline assembly before it works around the problem
+ + */
+ +#define barrier_before_unreachable() asm volatile("")
+ +
   /*
    * Mark a position in code as unreachable.  This can be used to
    * suppress control flow warnings after asm blocks that transfer
@@@ -226,11 -221,7 +230,11 @@@
    * unreleased.  Really, we need to have autoconf for the kernel.
    */
   #define unreachable() \
- -      do { annotate_unreachable(); __builtin_unreachable(); } while (0)
+ +      do {                                    \
+ +              annotate_unreachable();         \
+ +              barrier_before_unreachable();   \
+ +              __builtin_unreachable();        \
+ +      } while (0)
   
   /* Mark a function definition as prohibited from being cloned. */
   #define __noclone     __attribute__((__noclone__, __optimize__("no-tracer")))
author	Ingo Molnar <mingo@kernel.org>
	Mon, 12 Mar 2018 11:10:03 +0000 (12:10 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Mon, 12 Mar 2018 11:10:03 +0000 (12:10 +0100)
		1	2
Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/entry/entry_64.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/nospec-branch.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/paravirt.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/pgtable_32.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/pgtable_64.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/head_64.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/setup.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/fault.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/compiler-gcc.h	patch \|	diff1 \|	diff2 \|	blob \| history