Merge tag 'v5.12-rc5' into WIP.x86/core, to pick up recent NOP related changes
authorIngo Molnar <mingo@kernel.org>
Fri, 2 Apr 2021 10:33:16 +0000 (12:33 +0200)
committerIngo Molnar <mingo@kernel.org>
Fri, 2 Apr 2021 10:33:16 +0000 (12:33 +0200)
In particular we want to have this upstream commit:

  b90829704780: ("bpf: Use NOP_ATOMIC5 instead of emit_nops(&prog, 5) for BPF_TRAMP_F_CALL_ORIG")

... before merging in x86/cpu changes and the removal of the NOP optimizations, and
applying PeterZ's !retpoline objtool series.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
88 files changed:
arch/arm/include/asm/paravirt.h
arch/arm/kernel/paravirt.c
arch/arm64/include/asm/paravirt.h
arch/arm64/kernel/paravirt.c
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/boot/compressed/sev-es.c
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/entry/vdso/vdso32/system_call.S
arch/x86/events/intel/ds.c
arch/x86/events/intel/lbr.c
arch/x86/include/asm/alternative-asm.h [deleted file]
arch/x86/include/asm/alternative.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/inat.h
arch/x86/include/asm/insn-eval.h
arch/x86/include/asm/insn.h
arch/x86/include/asm/irqflags.h
arch/x86/include/asm/kprobes.h
arch/x86/include/asm/mshyperv.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/ptrace.h
arch/x86/include/asm/segment.h
arch/x86/include/asm/smap.h
arch/x86/include/asm/stackprotector.h
arch/x86/include/asm/suspend_32.h
arch/x86/kernel/Makefile
arch/x86/kernel/alternative.c
arch/x86/kernel/asm-offsets.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/mce/severity.c
arch/x86/kernel/cpu/vmware.c
arch/x86/kernel/doublefault_32.c
arch/x86/kernel/head_32.S
arch/x86/kernel/kprobes/core.c
arch/x86/kernel/kprobes/opt.c
arch/x86/kernel/kvm.c
arch/x86/kernel/kvmclock.c
arch/x86/kernel/paravirt-spinlocks.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/paravirt_patch.c [deleted file]
arch/x86/kernel/setup_percpu.c
arch/x86/kernel/sev-es.c
arch/x86/kernel/tls.c
arch/x86/kernel/traps.c
arch/x86/kernel/tsc.c
arch/x86/kernel/umip.c
arch/x86/kernel/uprobes.c
arch/x86/lib/atomic64_386_32.S
arch/x86/lib/atomic64_cx8_32.S
arch/x86/lib/copy_page_64.S
arch/x86/lib/copy_user_64.S
arch/x86/lib/inat.c
arch/x86/lib/insn-eval.c
arch/x86/lib/insn.c
arch/x86/lib/memcpy_64.S
arch/x86/lib/memmove_64.S
arch/x86/lib/memset_64.S
arch/x86/lib/retpoline.S
arch/x86/mm/mem_encrypt.c
arch/x86/platform/pvh/head.S
arch/x86/power/cpu.c
arch/x86/tools/insn_decoder_test.c
arch/x86/tools/insn_sanity.c
arch/x86/xen/enlighten_pv.c
arch/x86/xen/time.c
drivers/xen/time.c
include/linux/static_call.h
include/linux/static_call_types.h
scripts/gcc-x86_32-has-stack-protector.sh
tools/arch/x86/include/asm/inat.h
tools/arch/x86/include/asm/insn.h
tools/arch/x86/lib/inat.c
tools/arch/x86/lib/insn.c
tools/include/linux/kconfig.h [new file with mode: 0644]
tools/include/linux/static_call_types.h
tools/objtool/arch/x86/decode.c
tools/objtool/sync-check.sh
tools/perf/arch/x86/tests/insn-x86.c
tools/perf/arch/x86/util/archinsn.c
tools/perf/check-headers.sh
tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c

index cdbf02d..95d5b0d 100644 (file)
@@ -3,23 +3,19 @@
 #define _ASM_ARM_PARAVIRT_H
 
 #ifdef CONFIG_PARAVIRT
+#include <linux/static_call_types.h>
+
 struct static_key;
 extern struct static_key paravirt_steal_enabled;
 extern struct static_key paravirt_steal_rq_enabled;
 
-struct pv_time_ops {
-       unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
-       struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
 
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
-       return pv_ops.time.steal_clock(cpu);
+       return static_call(pv_steal_clock)(cpu);
 }
 #endif
 
index 4cfed91..7dd9806 100644 (file)
@@ -9,10 +9,15 @@
 #include <linux/export.h>
 #include <linux/jump_label.h>
 #include <linux/types.h>
+#include <linux/static_call.h>
 #include <asm/paravirt.h>
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+       return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
index cf3a0fd..9aa193e 100644 (file)
@@ -3,23 +3,19 @@
 #define _ASM_ARM64_PARAVIRT_H
 
 #ifdef CONFIG_PARAVIRT
+#include <linux/static_call_types.h>
+
 struct static_key;
 extern struct static_key paravirt_steal_enabled;
 extern struct static_key paravirt_steal_rq_enabled;
 
-struct pv_time_ops {
-       unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
-       struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
 
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
-       return pv_ops.time.steal_clock(cpu);
+       return static_call(pv_steal_clock)(cpu);
 }
 
 int __init pv_time_init(void);
index c07d7a0..75fed44 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/reboot.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/static_call.h>
 
 #include <asm/paravirt.h>
 #include <asm/pvclock-abi.h>
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+       return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
 struct pv_time_stolen_time_region {
        struct pvclock_vcpu_stolen_time *kaddr;
@@ -45,7 +50,7 @@ static int __init parse_no_stealacc(char *arg)
 early_param("no-steal-acc", parse_no_stealacc);
 
 /* return stolen time in ns by asking the hypervisor */
-static u64 pv_steal_clock(int cpu)
+static u64 para_steal_clock(int cpu)
 {
        struct pv_time_stolen_time_region *reg;
 
@@ -150,7 +155,7 @@ int __init pv_time_init(void)
        if (ret)
                return ret;
 
-       pv_ops.time.steal_clock = pv_steal_clock;
+       static_call_update(pv_steal_clock, para_steal_clock);
 
        static_key_slow_inc(&paravirt_steal_enabled);
        if (steal_acc)
index 2792879..513895a 100644 (file)
@@ -360,10 +360,6 @@ config X86_64_SMP
        def_bool y
        depends on X86_64 && SMP
 
-config X86_32_LAZY_GS
-       def_bool y
-       depends on X86_32 && !STACKPROTECTOR
-
 config ARCH_SUPPORTS_UPROBES
        def_bool y
 
@@ -386,7 +382,8 @@ config CC_HAS_SANE_STACKPROTECTOR
        default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC))
        help
           We have to make sure stack protector is unconditionally disabled if
-          the compiler produces broken code.
+          the compiler produces broken code or if it does not let us control
+          the segment on 32-bit kernels.
 
 menu "Processor type and features"
 
@@ -777,6 +774,7 @@ if HYPERVISOR_GUEST
 
 config PARAVIRT
        bool "Enable paravirtualization code"
+       depends on HAVE_STATIC_CALL
        help
          This changes the kernel so it can modify itself when it is run
          under a hypervisor, potentially improving performance significantly
@@ -1518,6 +1516,7 @@ config AMD_MEM_ENCRYPT
        select ARCH_USE_MEMREMAP_PROT
        select ARCH_HAS_FORCE_DMA_UNENCRYPTED
        select INSTRUCTION_DECODER
+       select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
        help
          Say yes to enable support for the encryption of system memory.
          This requires an AMD processor that supports Secure Memory
index 9a85eae..c84d0dc 100644 (file)
@@ -79,6 +79,14 @@ ifeq ($(CONFIG_X86_32),y)
 
         # temporary until string.h is fixed
         KBUILD_CFLAGS += -ffreestanding
+
+       ifeq ($(CONFIG_STACKPROTECTOR),y)
+               ifeq ($(CONFIG_SMP),y)
+                       KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
+               else
+                       KBUILD_CFLAGS += -mstack-protector-guard=global
+               endif
+       endif
 else
         BITS := 64
         UTS_MACHINE := x86_64
index 27826c2..801c626 100644 (file)
@@ -78,16 +78,15 @@ static inline void sev_es_wr_ghcb_msr(u64 val)
 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
 {
        char buffer[MAX_INSN_SIZE];
-       enum es_result ret;
+       int ret;
 
        memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
 
-       insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE, 1);
-       insn_get_length(&ctxt->insn);
+       ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+       if (ret < 0)
+               return ES_DECODE_FAILED;
 
-       ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
-
-       return ret;
+       return ES_OK;
 }
 
 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
index df8c017..8096b86 100644 (file)
@@ -20,7 +20,7 @@
  *     1C(%esp) - %ds
  *     20(%esp) - %es
  *     24(%esp) - %fs
- *     28(%esp) - %gs          saved iff !CONFIG_X86_32_LAZY_GS
+ *     28(%esp) - unused -- was %gs on old stackprotector kernels
  *     2C(%esp) - orig_eax
  *     30(%esp) - %eip
  *     34(%esp) - %cs
@@ -40,7 +40,7 @@
 #include <asm/processor-flags.h>
 #include <asm/irq_vectors.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/frame.h>
 
 #define PTI_SWITCH_MASK         (1 << PAGE_SHIFT)
 
-/*
- * User gs save/restore
- *
- * %gs is used for userland TLS and kernel only uses it for stack
- * canary which is required to be at %gs:20 by gcc.  Read the comment
- * at the top of stackprotector.h for more info.
- *
- * Local labels 98 and 99 are used.
- */
-#ifdef CONFIG_X86_32_LAZY_GS
-
- /* unfortunately push/pop can't be no-op */
-.macro PUSH_GS
-       pushl   $0
-.endm
-.macro POP_GS pop=0
-       addl    $(4 + \pop), %esp
-.endm
-.macro POP_GS_EX
-.endm
-
- /* all the rest are no-op */
-.macro PTGS_TO_GS
-.endm
-.macro PTGS_TO_GS_EX
-.endm
-.macro GS_TO_REG reg
-.endm
-.macro REG_TO_PTGS reg
-.endm
-.macro SET_KERNEL_GS reg
-.endm
-
-#else  /* CONFIG_X86_32_LAZY_GS */
-
-.macro PUSH_GS
-       pushl   %gs
-.endm
-
-.macro POP_GS pop=0
-98:    popl    %gs
-  .if \pop <> 0
-       add     $\pop, %esp
-  .endif
-.endm
-.macro POP_GS_EX
-.pushsection .fixup, "ax"
-99:    movl    $0, (%esp)
-       jmp     98b
-.popsection
-       _ASM_EXTABLE(98b, 99b)
-.endm
-
-.macro PTGS_TO_GS
-98:    mov     PT_GS(%esp), %gs
-.endm
-.macro PTGS_TO_GS_EX
-.pushsection .fixup, "ax"
-99:    movl    $0, PT_GS(%esp)
-       jmp     98b
-.popsection
-       _ASM_EXTABLE(98b, 99b)
-.endm
-
-.macro GS_TO_REG reg
-       movl    %gs, \reg
-.endm
-.macro REG_TO_PTGS reg
-       movl    \reg, PT_GS(%esp)
-.endm
-.macro SET_KERNEL_GS reg
-       movl    $(__KERNEL_STACK_CANARY), \reg
-       movl    \reg, %gs
-.endm
-
-#endif /* CONFIG_X86_32_LAZY_GS */
-
 /* Unconditionally switch to user cr3 */
 .macro SWITCH_TO_USER_CR3 scratch_reg:req
        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 .macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0
        cld
 .if \skip_gs == 0
-       PUSH_GS
+       pushl   $0
 .endif
        pushl   %fs
 
        movl    $(__USER_DS), %edx
        movl    %edx, %ds
        movl    %edx, %es
-.if \skip_gs == 0
-       SET_KERNEL_GS %edx
-.endif
        /* Switch to kernel stack if necessary */
 .if \switch_stacks > 0
        SWITCH_TO_KERNEL_STACK
 1:     popl    %ds
 2:     popl    %es
 3:     popl    %fs
-       POP_GS \pop
+       addl    $(4 + \pop), %esp       /* pop the unused "gs" slot */
        IRET_FRAME
 .pushsection .fixup, "ax"
 4:     movl    $0, (%esp)
        _ASM_EXTABLE(1b, 4b)
        _ASM_EXTABLE(2b, 5b)
        _ASM_EXTABLE(3b, 6b)
-       POP_GS_EX
 .endm
 
 .macro RESTORE_ALL_NMI cr3_reg:req pop=0
         * will soon execute iret and the tracer was already set to
         * the irqstate after the IRET:
         */
-       DISABLE_INTERRUPTS(CLBR_ANY)
+       cli
        lss     (%esp), %esp                    /* switch to espfix segment */
 .Lend_\@:
 #endif /* CONFIG_X86_ESPFIX32 */
@@ -779,7 +698,7 @@ SYM_CODE_START(__switch_to_asm)
 
 #ifdef CONFIG_STACKPROTECTOR
        movl    TASK_stack_canary(%edx), %ebx
-       movl    %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+       movl    %ebx, PER_CPU_VAR(__stack_chk_guard)
 #endif
 
 #ifdef CONFIG_RETPOLINE
@@ -976,7 +895,6 @@ SYM_FUNC_START(entry_SYSENTER_32)
        movl    PT_EIP(%esp), %edx      /* pt_regs->ip */
        movl    PT_OLDESP(%esp), %ecx   /* pt_regs->sp */
 1:     mov     PT_FS(%esp), %fs
-       PTGS_TO_GS
 
        popl    %ebx                    /* pt_regs->bx */
        addl    $2*4, %esp              /* skip pt_regs->cx and pt_regs->dx */
@@ -1012,7 +930,6 @@ SYM_FUNC_START(entry_SYSENTER_32)
        jmp     1b
 .popsection
        _ASM_EXTABLE(1b, 2b)
-       PTGS_TO_GS_EX
 
 .Lsysenter_fix_flags:
        pushl   $X86_EFLAGS_FIXED
@@ -1077,7 +994,7 @@ restore_all_switch_stack:
         * when returning from IPI handler and when returning from
         * scheduler to user-space.
         */
-       INTERRUPT_RETURN
+       iret
 
 .section .fixup, "ax"
 SYM_CODE_START(asm_iret_error)
@@ -1154,11 +1071,7 @@ SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
        SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
        ENCODE_FRAME_POINTER
 
-       /* fixup %gs */
-       GS_TO_REG %ecx
        movl    PT_GS(%esp), %edi               # get the function address
-       REG_TO_PTGS %ecx
-       SET_KERNEL_GS %ecx
 
        /* fixup orig %eax */
        movl    PT_ORIG_EAX(%esp), %edx         # get the error code
index 400908d..12e2e3c 100644 (file)
@@ -305,7 +305,7 @@ SYM_CODE_END(ret_from_fork)
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
 #ifdef CONFIG_DEBUG_ENTRY
        pushq %rax
-       SAVE_FLAGS(CLBR_RAX)
+       SAVE_FLAGS
        testl $X86_EFLAGS_IF, %eax
        jz .Lokay_\@
        ud2
index de1fff7..d6a6080 100644 (file)
@@ -6,7 +6,7 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 
        .text
        .globl __kernel_vsyscall
index d32b302..16f226f 100644 (file)
@@ -1353,14 +1353,13 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
                is_64bit = kernel_ip(to) || any_64bit_mode(regs);
 #endif
                insn_init(&insn, kaddr, size, is_64bit);
-               insn_get_length(&insn);
+
                /*
-                * Make sure there was not a problem decoding the
-                * instruction and getting the length.  This is
-                * doubly important because we have an infinite
-                * loop if insn.length=0.
+                * Make sure there was not a problem decoding the instruction.
+                * This is doubly important because we have an infinite loop if
+                * insn.length=0.
                 */
-               if (!insn.length)
+               if (insn_get_length(&insn))
                        break;
 
                to += insn.length;
index 21890da..9ecf502 100644 (file)
@@ -1224,8 +1224,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
        is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
 #endif
        insn_init(&insn, addr, bytes_read, is64);
-       insn_get_opcode(&insn);
-       if (!insn.opcode.got)
+       if (insn_get_opcode(&insn))
                return X86_BR_ABORT;
 
        switch (insn.opcode.bytes[0]) {
@@ -1262,8 +1261,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
                ret = X86_BR_INT;
                break;
        case 0xe8: /* call near rel */
-               insn_get_immediate(&insn);
-               if (insn.immediate1.value == 0) {
+               if (insn_get_immediate(&insn) || insn.immediate1.value == 0) {
                        /* zero length call */
                        ret = X86_BR_ZERO_CALL;
                        break;
@@ -1279,7 +1277,9 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
                ret = X86_BR_JMP;
                break;
        case 0xff: /* call near absolute, call far absolute ind */
-               insn_get_modrm(&insn);
+               if (insn_get_modrm(&insn))
+                       return X86_BR_ABORT;
+
                ext = (insn.modrm.bytes[0] >> 3) & 0x7;
                switch (ext) {
                case 2: /* near ind call */
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
deleted file mode 100644 (file)
index 464034d..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_ALTERNATIVE_ASM_H
-#define _ASM_X86_ALTERNATIVE_ASM_H
-
-#ifdef __ASSEMBLY__
-
-#include <asm/asm.h>
-
-#ifdef CONFIG_SMP
-       .macro LOCK_PREFIX
-672:   lock
-       .pushsection .smp_locks,"a"
-       .balign 4
-       .long 672b - .
-       .popsection
-       .endm
-#else
-       .macro LOCK_PREFIX
-       .endm
-#endif
-
-/*
- * objtool annotation to ignore the alternatives and only consider the original
- * instruction(s).
- */
-.macro ANNOTATE_IGNORE_ALTERNATIVE
-       .Lannotate_\@:
-       .pushsection .discard.ignore_alts
-       .long .Lannotate_\@ - .
-       .popsection
-.endm
-
-/*
- * Issue one struct alt_instr descriptor entry (need to put it into
- * the section .altinstructions, see below). This entry contains
- * enough information for the alternatives patching code to patch an
- * instruction. See apply_alternatives().
- */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
-       .long \orig - .
-       .long \alt - .
-       .word \feature
-       .byte \orig_len
-       .byte \alt_len
-       .byte \pad_len
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
- */
-.macro ALTERNATIVE oldinstr, newinstr, feature
-140:
-       \oldinstr
-141:
-       .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
-       .pushsection .altinstructions,"a"
-       altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
-       .popsection
-
-       .pushsection .altinstr_replacement,"ax"
-143:
-       \newinstr
-144:
-       .popsection
-.endm
-
-#define old_len                        141b-140b
-#define new_len1               144f-143f
-#define new_len2               145f-144f
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b)    ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
-
-
-/*
- * Same as ALTERNATIVE macro above but for two alternatives. If CPU
- * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
- * @feature2, it replaces @oldinstr with @feature2.
- */
-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
-140:
-       \oldinstr
-141:
-       .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
-               (alt_max_short(new_len1, new_len2) - (old_len)),0x90
-142:
-
-       .pushsection .altinstructions,"a"
-       altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
-       altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
-       .popsection
-
-       .pushsection .altinstr_replacement,"ax"
-143:
-       \newinstr1
-144:
-       \newinstr2
-145:
-       .popsection
-.endm
-
-#endif  /*  __ASSEMBLY__  */
-
-#endif /* _ASM_X86_ALTERNATIVE_ASM_H */
index 13adca3..17b3609 100644 (file)
@@ -2,13 +2,17 @@
 #ifndef _ASM_X86_ALTERNATIVE_H
 #define _ASM_X86_ALTERNATIVE_H
 
-#ifndef __ASSEMBLY__
-
 #include <linux/types.h>
-#include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <asm/asm.h>
 
+#define ALTINSTR_FLAG_INV      (1 << 15)
+#define ALT_NOT(feat)          ((feat) | ALTINSTR_FLAG_INV)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stddef.h>
+
 /*
  * Alternative inline assembly for SMP.
  *
@@ -150,7 +154,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
        " .byte " alt_rlen(num) "\n"                    /* replacement len */ \
        " .byte " alt_pad_len "\n"                      /* pad len */
 
-#define ALTINSTR_REPLACEMENT(newinstr, feature, num)   /* replacement */       \
+#define ALTINSTR_REPLACEMENT(newinstr, num)            /* replacement */       \
        "# ALT: replacement " #num "\n"                                         \
        b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
 
@@ -161,7 +165,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
        ALTINSTR_ENTRY(feature, 1)                                      \
        ".popsection\n"                                                 \
        ".pushsection .altinstr_replacement, \"ax\"\n"                  \
-       ALTINSTR_REPLACEMENT(newinstr, feature, 1)                      \
+       ALTINSTR_REPLACEMENT(newinstr, 1)                               \
        ".popsection\n"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
@@ -171,10 +175,15 @@ static inline int alternatives_text_reserved(void *start, void *end)
        ALTINSTR_ENTRY(feature2, 2)                                     \
        ".popsection\n"                                                 \
        ".pushsection .altinstr_replacement, \"ax\"\n"                  \
-       ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)                    \
-       ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)                    \
+       ALTINSTR_REPLACEMENT(newinstr1, 1)                              \
+       ALTINSTR_REPLACEMENT(newinstr2, 2)                              \
        ".popsection\n"
 
+/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
+       ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS,        \
+                     newinstr_yes, feature)
+
 #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \
        OLDINSTR_3(oldinsn, 1, 2, 3)                                            \
        ".pushsection .altinstructions,\"a\"\n"                                 \
@@ -183,9 +192,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
        ALTINSTR_ENTRY(feat3, 3)                                                \
        ".popsection\n"                                                         \
        ".pushsection .altinstr_replacement, \"ax\"\n"                          \
-       ALTINSTR_REPLACEMENT(newinsn1, feat1, 1)                                \
-       ALTINSTR_REPLACEMENT(newinsn2, feat2, 2)                                \
-       ALTINSTR_REPLACEMENT(newinsn3, feat3, 3)                                \
+       ALTINSTR_REPLACEMENT(newinsn1, 1)                                       \
+       ALTINSTR_REPLACEMENT(newinsn2, 2)                                       \
+       ALTINSTR_REPLACEMENT(newinsn3, 3)                                       \
        ".popsection\n"
 
 /*
@@ -206,6 +215,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
 #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
        asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
 
+#define alternative_ternary(oldinstr, feature, newinstr_yes, newinstr_no) \
+       asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) ::: "memory")
+
 /*
  * Alternative inline assembly with input.
  *
@@ -271,6 +283,116 @@ static inline int alternatives_text_reserved(void *start, void *end)
  */
 #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
 
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_SMP
+       .macro LOCK_PREFIX
+672:   lock
+       .pushsection .smp_locks,"a"
+       .balign 4
+       .long 672b - .
+       .popsection
+       .endm
+#else
+       .macro LOCK_PREFIX
+       .endm
+#endif
+
+/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+.macro ANNOTATE_IGNORE_ALTERNATIVE
+       .Lannotate_\@:
+       .pushsection .discard.ignore_alts
+       .long .Lannotate_\@ - .
+       .popsection
+.endm
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
+       .long \orig - .
+       .long \alt - .
+       .word \feature
+       .byte \orig_len
+       .byte \alt_len
+       .byte \pad_len
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+140:
+       \oldinstr
+141:
+       .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
+142:
+
+       .pushsection .altinstructions,"a"
+       altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+       .popsection
+
+       .pushsection .altinstr_replacement,"ax"
+143:
+       \newinstr
+144:
+       .popsection
+.endm
+
+#define old_len                        141b-140b
+#define new_len1               144f-143f
+#define new_len2               145f-144f
+
+/*
+ * gas compatible max based on the idea from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas uses a "true" value of -1.
+ */
+#define alt_max_short(a, b)    ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
+
+/*
+ * Same as ALTERNATIVE macro above but for two alternatives. If CPU
+ * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
+ * @feature2, it replaces @oldinstr with @feature2.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+140:
+       \oldinstr
+141:
+       .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+               (alt_max_short(new_len1, new_len2) - (old_len)),0x90
+142:
+
+       .pushsection .altinstructions,"a"
+       altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
+       altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+       .popsection
+
+       .pushsection .altinstr_replacement,"ax"
+143:
+       \newinstr1
+144:
+       \newinstr2
+145:
+       .popsection
+.endm
+
+/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
+       ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS,        \
+       newinstr_yes, feature
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_ALTERNATIVE_H */
index 1728d4c..16a51e7 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <asm/asm.h>
 #include <linux/bitops.h>
+#include <asm/alternative.h>
 
 enum cpuid_leafs
 {
@@ -175,39 +176,15 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  */
 static __always_inline bool _static_cpu_has(u16 bit)
 {
-       asm_volatile_goto("1: jmp 6f\n"
-                "2:\n"
-                ".skip -(((5f-4f) - (2b-1b)) > 0) * "
-                        "((5f-4f) - (2b-1b)),0x90\n"
-                "3:\n"
-                ".section .altinstructions,\"a\"\n"
-                " .long 1b - .\n"              /* src offset */
-                " .long 4f - .\n"              /* repl offset */
-                " .word %P[always]\n"          /* always replace */
-                " .byte 3b - 1b\n"             /* src len */
-                " .byte 5f - 4f\n"             /* repl len */
-                " .byte 3b - 2b\n"             /* pad len */
-                ".previous\n"
-                ".section .altinstr_replacement,\"ax\"\n"
-                "4: jmp %l[t_no]\n"
-                "5:\n"
-                ".previous\n"
-                ".section .altinstructions,\"a\"\n"
-                " .long 1b - .\n"              /* src offset */
-                " .long 0\n"                   /* no replacement */
-                " .word %P[feature]\n"         /* feature bit */
-                " .byte 3b - 1b\n"             /* src len */
-                " .byte 0\n"                   /* repl len */
-                " .byte 0\n"                   /* pad len */
-                ".previous\n"
-                ".section .altinstr_aux,\"ax\"\n"
-                "6:\n"
-                " testb %[bitnum],%[cap_byte]\n"
-                " jnz %l[t_yes]\n"
-                " jmp %l[t_no]\n"
-                ".previous\n"
+       asm_volatile_goto(
+               ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
+               ".section .altinstr_aux,\"ax\"\n"
+               "6:\n"
+               " testb %[bitnum],%[cap_byte]\n"
+               " jnz %l[t_yes]\n"
+               " jmp %l[t_no]\n"
+               ".previous\n"
                 : : [feature]  "i" (bit),
-                    [always]   "i" (X86_FEATURE_ALWAYS),
                     [bitnum]   "i" (1 << (bit & 7)),
                     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
                 : : t_yes, t_no);
index cc96e26..b440c95 100644 (file)
 #define X86_FEATURE_EPT_AD             ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
 #define X86_FEATURE_VMCALL             ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL                ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK           ( 8*32+20) /* "" PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT                ( 8*32+21) /* "" PV vcpu_is_preempted function */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE           ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
index 4cf2ad5..b56c574 100644 (file)
@@ -6,7 +6,7 @@
  *
  * Written by Masami Hiramatsu <mhiramat@redhat.com>
  */
-#include <asm/inat_types.h>
+#include <asm/inat_types.h> /* __ignore_sync_check__ */
 
 /*
  * Internal bits. Don't use bitmasks directly, because these bits are
index 98b4dae..91d7182 100644 (file)
@@ -25,7 +25,7 @@ int insn_fetch_from_user(struct pt_regs *regs,
                         unsigned char buf[MAX_INSN_SIZE]);
 int insn_fetch_from_user_inatomic(struct pt_regs *regs,
                                  unsigned char buf[MAX_INSN_SIZE]);
-bool insn_decode(struct insn *insn, struct pt_regs *regs,
-                unsigned char buf[MAX_INSN_SIZE], int buf_size);
+bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
+                          unsigned char buf[MAX_INSN_SIZE], int buf_size);
 
 #endif /* _ASM_X86_INSN_EVAL_H */
index 95a448f..05a6ab9 100644 (file)
@@ -9,7 +9,7 @@
 
 #include <asm/byteorder.h>
 /* insn_attr_t is defined in inat.h */
-#include <asm/inat.h>
+#include <asm/inat.h> /* __ignore_sync_check__ */
 
 #if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
 
@@ -132,13 +132,25 @@ struct insn {
 #define X86_VEX_M_MAX  0x1f                    /* VEX3.M Maximum value */
 
 extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
-extern void insn_get_prefixes(struct insn *insn);
-extern void insn_get_opcode(struct insn *insn);
-extern void insn_get_modrm(struct insn *insn);
-extern void insn_get_sib(struct insn *insn);
-extern void insn_get_displacement(struct insn *insn);
-extern void insn_get_immediate(struct insn *insn);
-extern void insn_get_length(struct insn *insn);
+extern int insn_get_prefixes(struct insn *insn);
+extern int insn_get_opcode(struct insn *insn);
+extern int insn_get_modrm(struct insn *insn);
+extern int insn_get_sib(struct insn *insn);
+extern int insn_get_displacement(struct insn *insn);
+extern int insn_get_immediate(struct insn *insn);
+extern int insn_get_length(struct insn *insn);
+
+enum insn_mode {
+       INSN_MODE_32,
+       INSN_MODE_64,
+       /* Mode is determined by the current kernel build. */
+       INSN_MODE_KERN,
+       INSN_NUM_MODES,
+};
+
+extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+
+#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN)
 
 /* Attribute will be determined after getting ModRM (for opcode groups) */
 static inline void insn_get_attribute(struct insn *insn)
@@ -149,17 +161,6 @@ static inline void insn_get_attribute(struct insn *insn)
 /* Instruction uses RIP-relative addressing */
 extern int insn_rip_relative(struct insn *insn);
 
-/* Init insn for kernel text */
-static inline void kernel_insn_init(struct insn *insn,
-                                   const void *kaddr, int buf_len)
-{
-#ifdef CONFIG_X86_64
-       insn_init(insn, kaddr, buf_len, 1);
-#else /* CONFIG_X86_32 */
-       insn_init(insn, kaddr, buf_len, 0);
-#endif
-}
-
 static inline int insn_is_avx(struct insn *insn)
 {
        if (!insn->prefixes.got)
@@ -179,13 +180,6 @@ static inline int insn_has_emulate_prefix(struct insn *insn)
        return !!insn->emulate_prefix_size;
 }
 
-/* Ensure this instruction is decoded completely */
-static inline int insn_complete(struct insn *insn)
-{
-       return insn->opcode.got && insn->modrm.got && insn->sib.got &&
-               insn->displacement.got && insn->immediate.got;
-}
-
 static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
 {
        if (insn->vex_prefix.nbytes == 2)       /* 2 bytes VEX */
index 144d70e..c5ce984 100644 (file)
@@ -109,18 +109,13 @@ static __always_inline unsigned long arch_local_irq_save(void)
 }
 #else
 
-#define ENABLE_INTERRUPTS(x)   sti
-#define DISABLE_INTERRUPTS(x)  cli
-
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(x)          pushfq; popq %rax
+#define SAVE_FLAGS             pushfq; popq %rax
 #endif
 
 #define INTERRUPT_RETURN       jmp native_iret
 
-#else
-#define INTERRUPT_RETURN               iret
 #endif
 
 #endif /* __ASSEMBLY__ */
index d20a3d6..bd7f588 100644 (file)
@@ -65,10 +65,22 @@ struct arch_specific_insn {
         * a post_handler).
         */
        unsigned boostable:1;
-       unsigned if_modifier:1;
-       unsigned is_call:1;
-       unsigned is_pushf:1;
-       unsigned is_abs_ip:1;
+       unsigned char size;     /* The size of insn */
+       union {
+               unsigned char opcode;
+               struct {
+                       unsigned char type;
+               } jcc;
+               struct {
+                       unsigned char type;
+                       unsigned char asize;
+               } loop;
+               struct {
+                       unsigned char reg;
+               } indirect;
+       };
+       s32 rel32;      /* relative offset must be s32, s16, or s8 */
+       void (*emulate_op)(struct kprobe *p, struct pt_regs *regs);
        /* Number of bytes of text poked */
        int tp_len;
 };
@@ -107,7 +119,6 @@ extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
                                    unsigned long val, void *data);
 extern int kprobe_int3_handler(struct pt_regs *regs);
-extern int kprobe_debug_handler(struct pt_regs *regs);
 
 #else
 
index ccf60a8..e7be720 100644 (file)
@@ -63,7 +63,7 @@ typedef int (*hyperv_fill_flush_list_func)(
 static __always_inline void hv_setup_sched_clock(void *sched_clock)
 {
 #ifdef CONFIG_PARAVIRT
-       pv_ops.time.sched_clock = sched_clock;
+       paravirt_set_sched_clock(sched_clock);
 #endif
 }
 
index cb9ad6b..529f8e9 100644 (file)
@@ -7,7 +7,6 @@
 #include <linux/objtool.h>
 
 #include <asm/alternative.h>
-#include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
 #include <asm/unwind_hints.h>
index 4abf110..43992e5 100644 (file)
 #include <linux/bug.h>
 #include <linux/types.h>
 #include <linux/cpumask.h>
+#include <linux/static_call_types.h>
 #include <asm/frame.h>
 
-static inline unsigned long long paravirt_sched_clock(void)
+u64 dummy_steal_clock(int cpu);
+u64 dummy_sched_clock(void);
+
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock);
+
+void paravirt_set_sched_clock(u64 (*func)(void));
+
+static inline u64 paravirt_sched_clock(void)
 {
-       return PVOP_CALL0(unsigned long long, time.sched_clock);
+       return static_call(pv_sched_clock)();
 }
 
 struct static_key;
@@ -33,9 +42,13 @@ bool pv_is_native_vcpu_is_preempted(void);
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
-       return PVOP_CALL1(u64, time.steal_clock, cpu);
+       return static_call(pv_steal_clock)(cpu);
 }
 
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init paravirt_set_cap(void);
+#endif
+
 /* The paravirtualized I/O functions */
 static inline void slow_down_io(void)
 {
@@ -122,7 +135,9 @@ static inline void write_cr0(unsigned long x)
 
 static inline unsigned long read_cr2(void)
 {
-       return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
+       return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
+                               "mov %%cr2, %%rax;",
+                               ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void write_cr2(unsigned long x)
@@ -132,12 +147,14 @@ static inline void write_cr2(unsigned long x)
 
 static inline unsigned long __read_cr3(void)
 {
-       return PVOP_CALL0(unsigned long, mmu.read_cr3);
+       return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
+                             "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void write_cr3(unsigned long x)
 {
-       PVOP_VCALL1(mmu.write_cr3, x);
+       PVOP_ALT_VCALL1(mmu.write_cr3, x,
+                       "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void __write_cr4(unsigned long x)
@@ -157,7 +174,7 @@ static inline void halt(void)
 
 static inline void wbinvd(void)
 {
-       PVOP_VCALL0(cpu.wbinvd);
+       PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline u64 paravirt_read_msr(unsigned msr)
@@ -371,22 +388,28 @@ static inline void paravirt_release_p4d(unsigned long pfn)
 
 static inline pte_t __pte(pteval_t val)
 {
-       return (pte_t) { PVOP_CALLEE1(pteval_t, mmu.make_pte, val) };
+       return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
+                                         "mov %%rdi, %%rax",
+                                         ALT_NOT(X86_FEATURE_XENPV)) };
 }
 
 static inline pteval_t pte_val(pte_t pte)
 {
-       return PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte);
+       return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
+                               "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline pgd_t __pgd(pgdval_t val)
 {
-       return (pgd_t) { PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val) };
+       return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
+                                         "mov %%rdi, %%rax",
+                                         ALT_NOT(X86_FEATURE_XENPV)) };
 }
 
 static inline pgdval_t pgd_val(pgd_t pgd)
 {
-       return PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd);
+       return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
+                               "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 #define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -419,12 +442,15 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 
 static inline pmd_t __pmd(pmdval_t val)
 {
-       return (pmd_t) { PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val) };
+       return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
+                                         "mov %%rdi, %%rax",
+                                         ALT_NOT(X86_FEATURE_XENPV)) };
 }
 
 static inline pmdval_t pmd_val(pmd_t pmd)
 {
-       return PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd);
+       return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd,
+                               "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
@@ -436,14 +462,16 @@ static inline pud_t __pud(pudval_t val)
 {
        pudval_t ret;
 
-       ret = PVOP_CALLEE1(pudval_t, mmu.make_pud, val);
+       ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val,
+                              "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
 
        return (pud_t) { ret };
 }
 
 static inline pudval_t pud_val(pud_t pud)
 {
-       return PVOP_CALLEE1(pudval_t, mmu.pud_val, pud.pud);
+       return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud,
+                               "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void pud_clear(pud_t *pudp)
@@ -462,14 +490,17 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
 
 static inline p4d_t __p4d(p4dval_t val)
 {
-       p4dval_t ret = PVOP_CALLEE1(p4dval_t, mmu.make_p4d, val);
+       p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val,
+                                       "mov %%rdi, %%rax",
+                                       ALT_NOT(X86_FEATURE_XENPV));
 
        return (p4d_t) { ret };
 }
 
 static inline p4dval_t p4d_val(p4d_t p4d)
 {
-       return PVOP_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d);
+       return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d,
+                               "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -556,7 +587,9 @@ static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
 
 static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
 {
-       PVOP_VCALLEE1(lock.queued_spin_unlock, lock);
+       PVOP_ALT_VCALLEE1(lock.queued_spin_unlock, lock,
+                         "movb $0, (%%" _ASM_ARG1 ");",
+                         ALT_NOT(X86_FEATURE_PVUNLOCK));
 }
 
 static __always_inline void pv_wait(u8 *ptr, u8 val)
@@ -571,7 +604,9 @@ static __always_inline void pv_kick(int cpu)
 
 static __always_inline bool pv_vcpu_is_preempted(long cpu)
 {
-       return PVOP_CALLEE1(bool, lock.vcpu_is_preempted, cpu);
+       return PVOP_ALT_CALLEE1(bool, lock.vcpu_is_preempted, cpu,
+                               "xor %%" _ASM_AX ", %%" _ASM_AX ";",
+                               ALT_NOT(X86_FEATURE_VCPUPREEMPT));
 }
 
 void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock);
@@ -645,17 +680,18 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
 #ifdef CONFIG_PARAVIRT_XXL
 static inline notrace unsigned long arch_local_save_flags(void)
 {
-       return PVOP_CALLEE0(unsigned long, irq.save_fl);
+       return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;",
+                               ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline notrace void arch_local_irq_disable(void)
 {
-       PVOP_VCALLEE0(irq.irq_disable);
+       PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline notrace void arch_local_irq_enable(void)
 {
-       PVOP_VCALLEE0(irq.irq_enable);
+       PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline notrace unsigned long arch_local_irq_save(void)
@@ -700,84 +736,27 @@ extern void default_banner(void);
        .popsection
 
 
-#define COND_PUSH(set, mask, reg)                      \
-       .if ((~(set)) & mask); push %reg; .endif
-#define COND_POP(set, mask, reg)                       \
-       .if ((~(set)) & mask); pop %reg; .endif
-
 #ifdef CONFIG_X86_64
-
-#define PV_SAVE_REGS(set)                      \
-       COND_PUSH(set, CLBR_RAX, rax);          \
-       COND_PUSH(set, CLBR_RCX, rcx);          \
-       COND_PUSH(set, CLBR_RDX, rdx);          \
-       COND_PUSH(set, CLBR_RSI, rsi);          \
-       COND_PUSH(set, CLBR_RDI, rdi);          \
-       COND_PUSH(set, CLBR_R8, r8);            \
-       COND_PUSH(set, CLBR_R9, r9);            \
-       COND_PUSH(set, CLBR_R10, r10);          \
-       COND_PUSH(set, CLBR_R11, r11)
-#define PV_RESTORE_REGS(set)                   \
-       COND_POP(set, CLBR_R11, r11);           \
-       COND_POP(set, CLBR_R10, r10);           \
-       COND_POP(set, CLBR_R9, r9);             \
-       COND_POP(set, CLBR_R8, r8);             \
-       COND_POP(set, CLBR_RDI, rdi);           \
-       COND_POP(set, CLBR_RSI, rsi);           \
-       COND_POP(set, CLBR_RDX, rdx);           \
-       COND_POP(set, CLBR_RCX, rcx);           \
-       COND_POP(set, CLBR_RAX, rax)
+#ifdef CONFIG_PARAVIRT_XXL
 
 #define PARA_PATCH(off)                ((off) / 8)
 #define PARA_SITE(ptype, ops)  _PVSITE(ptype, ops, .quad, 8)
 #define PARA_INDIRECT(addr)    *addr(%rip)
-#else
-#define PV_SAVE_REGS(set)                      \
-       COND_PUSH(set, CLBR_EAX, eax);          \
-       COND_PUSH(set, CLBR_EDI, edi);          \
-       COND_PUSH(set, CLBR_ECX, ecx);          \
-       COND_PUSH(set, CLBR_EDX, edx)
-#define PV_RESTORE_REGS(set)                   \
-       COND_POP(set, CLBR_EDX, edx);           \
-       COND_POP(set, CLBR_ECX, ecx);           \
-       COND_POP(set, CLBR_EDI, edi);           \
-       COND_POP(set, CLBR_EAX, eax)
-
-#define PARA_PATCH(off)                ((off) / 4)
-#define PARA_SITE(ptype, ops)  _PVSITE(ptype, ops, .long, 4)
-#define PARA_INDIRECT(addr)    *%cs:addr
-#endif
 
-#ifdef CONFIG_PARAVIRT_XXL
 #define INTERRUPT_RETURN                                               \
-       PARA_SITE(PARA_PATCH(PV_CPU_iret),                              \
-                 ANNOTATE_RETPOLINE_SAFE;                              \
-                 jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
-
-#define DISABLE_INTERRUPTS(clobbers)                                   \
-       PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable),                       \
-                 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);            \
-                 ANNOTATE_RETPOLINE_SAFE;                              \
-                 call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable);        \
-                 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-
-#define ENABLE_INTERRUPTS(clobbers)                                    \
-       PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable),                        \
-                 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);            \
-                 ANNOTATE_RETPOLINE_SAFE;                              \
-                 call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable);         \
-                 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-#endif
+       ANNOTATE_RETPOLINE_SAFE;                                        \
+       ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);",                \
+               X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;")
 
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT_XXL
 #ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(clobbers)                                        \
-       PARA_SITE(PARA_PATCH(PV_IRQ_save_fl),                       \
-                 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
-                 ANNOTATE_RETPOLINE_SAFE;                          \
-                 call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);        \
-                 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+.macro PARA_IRQ_save_fl
+       PARA_SITE(PARA_PATCH(PV_IRQ_save_fl),
+                 ANNOTATE_RETPOLINE_SAFE;
+                 call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);)
+.endm
+
+#define SAVE_FLAGS     ALTERNATIVE "PARA_IRQ_save_fl;", "pushf; pop %rax;", \
+                                   ALT_NOT(X86_FEATURE_XENPV)
 #endif
 #endif /* CONFIG_PARAVIRT_XXL */
 #endif /* CONFIG_X86_64 */
@@ -800,5 +779,11 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
 {
 }
 #endif
+
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+static inline void paravirt_set_cap(void)
+{
+}
+#endif
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_PARAVIRT_H */
index de87087..9d1ddb7 100644 (file)
@@ -3,7 +3,6 @@
 #define _ASM_X86_PARAVIRT_TYPES_H
 
 /* Bitmask of what can be clobbered: usually at least eax. */
-#define CLBR_NONE 0
 #define CLBR_EAX  (1 << 0)
 #define CLBR_ECX  (1 << 1)
 #define CLBR_EDX  (1 << 2)
@@ -15,7 +14,6 @@
 
 #define CLBR_ARG_REGS  (CLBR_EAX | CLBR_EDX | CLBR_ECX)
 #define CLBR_RET_REG   (CLBR_EAX | CLBR_EDX)
-#define CLBR_SCRATCH   (0)
 #else
 #define CLBR_RAX  CLBR_EAX
 #define CLBR_RCX  CLBR_ECX
 #define CLBR_ARG_REGS  (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
                         CLBR_RCX | CLBR_R8 | CLBR_R9)
 #define CLBR_RET_REG   (CLBR_RAX)
-#define CLBR_SCRATCH   (CLBR_R10 | CLBR_R11)
 
 #endif /* X86_64 */
 
-#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
-
 #ifndef __ASSEMBLY__
 
 #include <asm/desc_defs.h>
@@ -73,19 +68,6 @@ struct pv_info {
        const char *name;
 };
 
-struct pv_init_ops {
-       /*
-        * Patch may replace one of the defined code sequences with
-        * arbitrary code, subject to the same register constraints.
-        * This generally means the code is not free to clobber any
-        * registers other than EAX.  The patch function should return
-        * the number of bytes of code generated, as we nop pad the
-        * rest in generic code.
-        */
-       unsigned (*patch)(u8 type, void *insn_buff,
-                         unsigned long addr, unsigned len);
-} __no_randomize_layout;
-
 #ifdef CONFIG_PARAVIRT_XXL
 struct pv_lazy_ops {
        /* Set deferred update mode, used for batching operations. */
@@ -95,11 +77,6 @@ struct pv_lazy_ops {
 } __no_randomize_layout;
 #endif
 
-struct pv_time_ops {
-       unsigned long long (*sched_clock)(void);
-       unsigned long long (*steal_clock)(int cpu);
-} __no_randomize_layout;
-
 struct pv_cpu_ops {
        /* hooks for various privileged instructions */
        void (*io_delay)(void);
@@ -156,10 +133,6 @@ struct pv_cpu_ops {
 
        u64 (*read_pmc)(int counter);
 
-       /* Normal iret.  Jump to this with the standard iret stack
-          frame set up. */
-       void (*iret)(void);
-
        void (*start_context_switch)(struct task_struct *prev);
        void (*end_context_switch)(struct task_struct *next);
 #endif
@@ -290,8 +263,6 @@ struct pv_lock_ops {
  * number for each function using the offset which we use to indicate
  * what to patch. */
 struct paravirt_patch_template {
-       struct pv_init_ops      init;
-       struct pv_time_ops      time;
        struct pv_cpu_ops       cpu;
        struct pv_irq_ops       irq;
        struct pv_mmu_ops       mmu;
@@ -300,6 +271,7 @@ struct paravirt_patch_template {
 
 extern struct pv_info pv_info;
 extern struct paravirt_patch_template pv_ops;
+extern void (*paravirt_iret)(void);
 
 #define PARAVIRT_PATCH(x)                                      \
        (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
@@ -331,11 +303,7 @@ extern struct paravirt_patch_template pv_ops;
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
 
-unsigned paravirt_patch_ident_64(void *insn_buff, unsigned len);
-unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, unsigned len);
-unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char *start, const char *end);
-
-unsigned native_patch(u8 type, void *insn_buff, unsigned long addr, unsigned len);
+unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, unsigned int len);
 
 int paravirt_disable_iospace(void);
 
@@ -414,11 +382,9 @@ int paravirt_disable_iospace(void);
  * makes sure the incoming and outgoing types are always correct.
  */
 #ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS                                                        \
+#define PVOP_CALL_ARGS                                                 \
        unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
 
-#define PVOP_CALL_ARGS                 PVOP_VCALL_ARGS
-
 #define PVOP_CALL_ARG1(x)              "a" ((unsigned long)(x))
 #define PVOP_CALL_ARG2(x)              "d" ((unsigned long)(x))
 #define PVOP_CALL_ARG3(x)              "c" ((unsigned long)(x))
@@ -434,12 +400,10 @@ int paravirt_disable_iospace(void);
 #define VEXTRA_CLOBBERS
 #else  /* CONFIG_X86_64 */
 /* [re]ax isn't an arg, but the return val */
-#define PVOP_VCALL_ARGS                                                \
+#define PVOP_CALL_ARGS                                         \
        unsigned long __edi = __edi, __esi = __esi,             \
                __edx = __edx, __ecx = __ecx, __eax = __eax;
 
-#define PVOP_CALL_ARGS         PVOP_VCALL_ARGS
-
 #define PVOP_CALL_ARG1(x)              "D" ((unsigned long)(x))
 #define PVOP_CALL_ARG2(x)              "S" ((unsigned long)(x))
 #define PVOP_CALL_ARG3(x)              "d" ((unsigned long)(x))
@@ -464,152 +428,138 @@ int paravirt_disable_iospace(void);
 #define PVOP_TEST_NULL(op)     ((void)pv_ops.op)
 #endif
 
-#define PVOP_RETMASK(rettype)                                          \
+#define PVOP_RETVAL(rettype)                                           \
        ({      unsigned long __mask = ~0UL;                            \
+               BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long));  \
                switch (sizeof(rettype)) {                              \
                case 1: __mask =       0xffUL; break;                   \
                case 2: __mask =     0xffffUL; break;                   \
                case 4: __mask = 0xffffffffUL; break;                   \
                default: break;                                         \
                }                                                       \
-               __mask;                                                 \
+               __mask & __eax;                                         \
        })
 
 
-#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr,                \
-                     pre, post, ...)                                   \
+#define ____PVOP_CALL(ret, op, clbr, call_clbr, extra_clbr, ...)       \
        ({                                                              \
-               rettype __ret;                                          \
                PVOP_CALL_ARGS;                                         \
                PVOP_TEST_NULL(op);                                     \
-               /* This is 32-bit specific, but is okay in 64-bit */    \
-               /* since this condition will never hold */              \
-               if (sizeof(rettype) > sizeof(unsigned long)) {          \
-                       asm volatile(pre                                \
-                                    paravirt_alt(PARAVIRT_CALL)        \
-                                    post                               \
-                                    : call_clbr, ASM_CALL_CONSTRAINT   \
-                                    : paravirt_type(op),               \
-                                      paravirt_clobber(clbr),          \
-                                      ##__VA_ARGS__                    \
-                                    : "memory", "cc" extra_clbr);      \
-                       __ret = (rettype)((((u64)__edx) << 32) | __eax); \
-               } else {                                                \
-                       asm volatile(pre                                \
-                                    paravirt_alt(PARAVIRT_CALL)        \
-                                    post                               \
-                                    : call_clbr, ASM_CALL_CONSTRAINT   \
-                                    : paravirt_type(op),               \
-                                      paravirt_clobber(clbr),          \
-                                      ##__VA_ARGS__                    \
-                                    : "memory", "cc" extra_clbr);      \
-                       __ret = (rettype)(__eax & PVOP_RETMASK(rettype));       \
-               }                                                       \
-               __ret;                                                  \
+               asm volatile(paravirt_alt(PARAVIRT_CALL)                \
+                            : call_clbr, ASM_CALL_CONSTRAINT           \
+                            : paravirt_type(op),                       \
+                              paravirt_clobber(clbr),                  \
+                              ##__VA_ARGS__                            \
+                            : "memory", "cc" extra_clbr);              \
+               ret;                                                    \
        })
 
-#define __PVOP_CALL(rettype, op, pre, post, ...)                       \
-       ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS,        \
-                     EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
-
-#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...)                 \
-       ____PVOP_CALL(rettype, op.func, CLBR_RET_REG,                   \
-                     PVOP_CALLEE_CLOBBERS, ,                           \
-                     pre, post, ##__VA_ARGS__)
-
-
-#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...)        \
+#define ____PVOP_ALT_CALL(ret, op, alt, cond, clbr, call_clbr,         \
+                         extra_clbr, ...)                              \
        ({                                                              \
-               PVOP_VCALL_ARGS;                                        \
+               PVOP_CALL_ARGS;                                         \
                PVOP_TEST_NULL(op);                                     \
-               asm volatile(pre                                        \
-                            paravirt_alt(PARAVIRT_CALL)                \
-                            post                                       \
+               asm volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL),   \
+                                        alt, cond)                     \
                             : call_clbr, ASM_CALL_CONSTRAINT           \
                             : paravirt_type(op),                       \
                               paravirt_clobber(clbr),                  \
                               ##__VA_ARGS__                            \
                             : "memory", "cc" extra_clbr);              \
+               ret;                                                    \
        })
 
-#define __PVOP_VCALL(op, pre, post, ...)                               \
-       ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS,               \
-                      VEXTRA_CLOBBERS,                                 \
-                      pre, post, ##__VA_ARGS__)
+#define __PVOP_CALL(rettype, op, ...)                                  \
+       ____PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY,               \
+                     PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
+
+#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...)                   \
+       ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, CLBR_ANY,\
+                         PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS,           \
+                         ##__VA_ARGS__)
+
+#define __PVOP_CALLEESAVE(rettype, op, ...)                            \
+       ____PVOP_CALL(PVOP_RETVAL(rettype), op.func, CLBR_RET_REG,      \
+                     PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...)             \
+       ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond,     \
+                         CLBR_RET_REG, PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+
+#define __PVOP_VCALL(op, ...)                                          \
+       (void)____PVOP_CALL(, op, CLBR_ANY, PVOP_VCALL_CLOBBERS,        \
+                      VEXTRA_CLOBBERS, ##__VA_ARGS__)
+
+#define __PVOP_ALT_VCALL(op, alt, cond, ...)                           \
+       (void)____PVOP_ALT_CALL(, op, alt, cond, CLBR_ANY,              \
+                               PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS,   \
+                               ##__VA_ARGS__)
 
-#define __PVOP_VCALLEESAVE(op, pre, post, ...)                         \
-       ____PVOP_VCALL(op.func, CLBR_RET_REG,                           \
-                     PVOP_VCALLEE_CLOBBERS, ,                          \
-                     pre, post, ##__VA_ARGS__)
+#define __PVOP_VCALLEESAVE(op, ...)                                    \
+       (void)____PVOP_CALL(, op.func, CLBR_RET_REG,                    \
+                           PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
+#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...)                     \
+       (void)____PVOP_ALT_CALL(, op.func, alt, cond, CLBR_RET_REG,     \
+                               PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
 
 #define PVOP_CALL0(rettype, op)                                                \
-       __PVOP_CALL(rettype, op, "", "")
+       __PVOP_CALL(rettype, op)
 #define PVOP_VCALL0(op)                                                        \
-       __PVOP_VCALL(op, "", "")
+       __PVOP_VCALL(op)
+#define PVOP_ALT_CALL0(rettype, op, alt, cond)                         \
+       __PVOP_ALT_CALL(rettype, op, alt, cond)
+#define PVOP_ALT_VCALL0(op, alt, cond)                                 \
+       __PVOP_ALT_VCALL(op, alt, cond)
 
 #define PVOP_CALLEE0(rettype, op)                                      \
-       __PVOP_CALLEESAVE(rettype, op, "", "")
+       __PVOP_CALLEESAVE(rettype, op)
 #define PVOP_VCALLEE0(op)                                              \
-       __PVOP_VCALLEESAVE(op, "", "")
+       __PVOP_VCALLEESAVE(op)
+#define PVOP_ALT_CALLEE0(rettype, op, alt, cond)                       \
+       __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond)
+#define PVOP_ALT_VCALLEE0(op, alt, cond)                               \
+       __PVOP_ALT_VCALLEESAVE(op, alt, cond)
 
 
 #define PVOP_CALL1(rettype, op, arg1)                                  \
-       __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+       __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1))
 #define PVOP_VCALL1(op, arg1)                                          \
-       __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
+       __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALL1(op, arg1, alt, cond)                           \
+       __PVOP_ALT_VCALL(op, alt, cond, PVOP_CALL_ARG1(arg1))
 
 #define PVOP_CALLEE1(rettype, op, arg1)                                        \
-       __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+       __PVOP_CALLEESAVE(rettype, op, PVOP_CALL_ARG1(arg1))
 #define PVOP_VCALLEE1(op, arg1)                                                \
-       __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
+       __PVOP_VCALLEESAVE(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_CALLEE1(rettype, op, arg1, alt, cond)                 \
+       __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALLEE1(op, arg1, alt, cond)                         \
+       __PVOP_ALT_VCALLEESAVE(op, alt, cond, PVOP_CALL_ARG1(arg1))
 
 
 #define PVOP_CALL2(rettype, op, arg1, arg2)                            \
-       __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),          \
-                   PVOP_CALL_ARG2(arg2))
+       __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2))
 #define PVOP_VCALL2(op, arg1, arg2)                                    \
-       __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),                  \
-                    PVOP_CALL_ARG2(arg2))
-
-#define PVOP_CALLEE2(rettype, op, arg1, arg2)                          \
-       __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1),    \
-                         PVOP_CALL_ARG2(arg2))
-#define PVOP_VCALLEE2(op, arg1, arg2)                                  \
-       __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1),            \
-                          PVOP_CALL_ARG2(arg2))
-
+       __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2))
 
 #define PVOP_CALL3(rettype, op, arg1, arg2, arg3)                      \
-       __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),          \
+       __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1),                  \
                    PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
 #define PVOP_VCALL3(op, arg1, arg2, arg3)                              \
-       __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),                  \
+       __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1),                          \
                     PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
 
-/* This is the only difference in x86_64. We can make it much simpler */
-#ifdef CONFIG_X86_32
 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)                        \
        __PVOP_CALL(rettype, op,                                        \
-                   "push %[_arg4];", "lea 4(%%esp),%%esp;",            \
-                   PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),         \
-                   PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
-#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)                                \
-       __PVOP_VCALL(op,                                                \
-                   "push %[_arg4];", "lea 4(%%esp),%%esp;",            \
-                   "0" ((u32)(arg1)), "1" ((u32)(arg2)),               \
-                   "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
-#else
-#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)                        \
-       __PVOP_CALL(rettype, op, "", "",                                \
                    PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),         \
                    PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
 #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)                                \
-       __PVOP_VCALL(op, "", "",                                        \
-                    PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),        \
+       __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),    \
                     PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
-#endif
 
 /* Lazy mode for batching updates / context switch */
 enum paravirt_lazy_mode {
index f1b9ed5..242e942 100644 (file)
@@ -439,6 +439,9 @@ struct fixed_percpu_data {
         * GCC hardcodes the stack canary as %gs:40.  Since the
         * irq_stack is the object at %gs:0, we reserve the bottom
         * 48 bytes of the irq stack for the canary.
+        *
+        * Once we are willing to require -mstack-protector-guard-symbol=
+        * support for x86_64 stackprotector, we can get rid of this.
         */
        char            gs_base[40];
        unsigned long   stack_canary;
@@ -460,17 +463,7 @@ extern asmlinkage void ignore_sysret(void);
 void current_save_fsgs(void);
 #else  /* X86_64 */
 #ifdef CONFIG_STACKPROTECTOR
-/*
- * Make sure stack canary segment base is cached-aligned:
- *   "For Intel Atom processors, avoid non zero segment base address
- *    that is not aligned to cache line boundary at all cost."
- * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
- */
-struct stack_canary {
-       char __pad[20];         /* canary at %gs:20 */
-       unsigned long canary;
-};
-DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
+DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
 #endif
 DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
index 409f661..b94f615 100644 (file)
@@ -37,7 +37,10 @@ struct pt_regs {
        unsigned short __esh;
        unsigned short fs;
        unsigned short __fsh;
-       /* On interrupt, gs and __gsh store the vector number. */
+       /*
+        * On interrupt, gs and __gsh store the vector number.  They never
+        * store gs any more.
+        */
        unsigned short gs;
        unsigned short __gsh;
        /* On interrupt, this is the error code. */
index 7fdd4fa..7204402 100644 (file)
@@ -95,7 +95,7 @@
  *
  *  26 - ESPFIX small SS
  *  27 - per-cpu                       [ offset to per-cpu data area ]
- *  28 - stack_canary-20               [ for stack protector ]         <=== cacheline #8
+ *  28 - unused
  *  29 - unused
  *  30 - unused
  *  31 - TSS for double fault handler
 
 #define GDT_ENTRY_ESPFIX_SS            26
 #define GDT_ENTRY_PERCPU               27
-#define GDT_ENTRY_STACK_CANARY         28
 
 #define GDT_ENTRY_DOUBLEFAULT_TSS      31
 
 # define __KERNEL_PERCPU               0
 #endif
 
-#ifdef CONFIG_STACKPROTECTOR
-# define __KERNEL_STACK_CANARY         (GDT_ENTRY_STACK_CANARY*8)
-#else
-# define __KERNEL_STACK_CANARY         0
-#endif
-
 #else /* 64-bit: */
 
 #include <asm/cache.h>
@@ -364,22 +357,15 @@ static inline void __loadsegment_fs(unsigned short value)
        asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
 
 /*
- * x86-32 user GS accessors:
+ * x86-32 user GS accessors.  This is ugly and could do with some cleaning up.
  */
 #ifdef CONFIG_X86_32
-# ifdef CONFIG_X86_32_LAZY_GS
-#  define get_user_gs(regs)            (u16)({ unsigned long v; savesegment(gs, v); v; })
-#  define set_user_gs(regs, v)         loadsegment(gs, (unsigned long)(v))
-#  define task_user_gs(tsk)            ((tsk)->thread.gs)
-#  define lazy_save_gs(v)              savesegment(gs, (v))
-#  define lazy_load_gs(v)              loadsegment(gs, (v))
-# else /* X86_32_LAZY_GS */
-#  define get_user_gs(regs)            (u16)((regs)->gs)
-#  define set_user_gs(regs, v)         do { (regs)->gs = (v); } while (0)
-#  define task_user_gs(tsk)            (task_pt_regs(tsk)->gs)
-#  define lazy_save_gs(v)              do { } while (0)
-#  define lazy_load_gs(v)              do { } while (0)
-# endif        /* X86_32_LAZY_GS */
+# define get_user_gs(regs)             (u16)({ unsigned long v; savesegment(gs, v); v; })
+# define set_user_gs(regs, v)          loadsegment(gs, (unsigned long)(v))
+# define task_user_gs(tsk)             ((tsk)->thread.gs)
+# define lazy_save_gs(v)               savesegment(gs, (v))
+# define lazy_load_gs(v)               loadsegment(gs, (v))
+# define load_gs_index(v)              loadsegment(gs, (v))
 #endif /* X86_32 */
 
 #endif /* !__ASSEMBLY__ */
index 0bc9b08..d17b398 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <asm/nops.h>
 #include <asm/cpufeatures.h>
+#include <asm/alternative.h>
 
 /* "Raw" instruction opcodes */
 #define __ASM_CLAC     ".byte 0x0f,0x01,0xca"
@@ -18,8 +19,6 @@
 
 #ifdef __ASSEMBLY__
 
-#include <asm/alternative-asm.h>
-
 #ifdef CONFIG_X86_SMAP
 
 #define ASM_CLAC \
@@ -37,8 +36,6 @@
 
 #else /* __ASSEMBLY__ */
 
-#include <asm/alternative.h>
-
 #ifdef CONFIG_X86_SMAP
 
 static __always_inline void clac(void)
index 7fb482f..b6ffe58 100644 (file)
@@ -5,30 +5,23 @@
  * Stack protector works by putting predefined pattern at the start of
  * the stack frame and verifying that it hasn't been overwritten when
  * returning from the function.  The pattern is called stack canary
- * and unfortunately gcc requires it to be at a fixed offset from %gs.
- * On x86_64, the offset is 40 bytes and on x86_32 20 bytes.  x86_64
- * and x86_32 use segment registers differently and thus handles this
- * requirement differently.
+ * and unfortunately gcc historically required it to be at a fixed offset
+ * from the percpu segment base.  On x86_64, the offset is 40 bytes.
  *
- * On x86_64, %gs is shared by percpu area and stack canary.  All
- * percpu symbols are zero based and %gs points to the base of percpu
- * area.  The first occupant of the percpu area is always
- * fixed_percpu_data which contains stack_canary at offset 40.  Userland
- * %gs is always saved and restored on kernel entry and exit using
- * swapgs, so stack protector doesn't add any complexity there.
+ * The same segment is shared by percpu area and stack canary.  On
+ * x86_64, percpu symbols are zero based and %gs (64-bit) points to the
+ * base of percpu area.  The first occupant of the percpu area is always
+ * fixed_percpu_data which contains stack_canary at the approproate
+ * offset.  On x86_32, the stack canary is just a regular percpu
+ * variable.
  *
- * On x86_32, it's slightly more complicated.  As in x86_64, %gs is
- * used for userland TLS.  Unfortunately, some processors are much
- * slower at loading segment registers with different value when
- * entering and leaving the kernel, so the kernel uses %fs for percpu
- * area and manages %gs lazily so that %gs is switched only when
- * necessary, usually during task switch.
+ * Putting percpu data in %fs on 32-bit is a minor optimization compared to
+ * using %gs.  Since 32-bit userspace normally has %fs == 0, we are likely
+ * to load 0 into %fs on exit to usermode, whereas with percpu data in
+ * %gs, we are likely to load a non-null %gs on return to user mode.
  *
- * As gcc requires the stack canary at %gs:20, %gs can't be managed
- * lazily if stack protector is enabled, so the kernel saves and
- * restores userland %gs on kernel entry and exit.  This behavior is
- * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
- * system.h to hide the details.
+ * Once we are willing to require GCC 8.1 or better for 64-bit stackprotector
+ * support, we can remove some of this complexity.
  */
 
 #ifndef _ASM_STACKPROTECTOR_H
 #include <linux/random.h>
 #include <linux/sched.h>
 
-/*
- * 24 byte read-only segment initializer for stack canary.  Linker
- * can't handle the address bit shifting.  Address will be set in
- * head_32 for boot CPU and setup_per_cpu_areas() for others.
- */
-#define GDT_STACK_CANARY_INIT                                          \
-       [GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),
-
 /*
  * Initialize the stackprotector canary value.
  *
@@ -86,7 +71,7 @@ static __always_inline void boot_init_stack_canary(void)
 #ifdef CONFIG_X86_64
        this_cpu_write(fixed_percpu_data.stack_canary, canary);
 #else
-       this_cpu_write(stack_canary.canary, canary);
+       this_cpu_write(__stack_chk_guard, canary);
 #endif
 }
 
@@ -95,48 +80,16 @@ static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
 #ifdef CONFIG_X86_64
        per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
 #else
-       per_cpu(stack_canary.canary, cpu) = idle->stack_canary;
-#endif
-}
-
-static inline void setup_stack_canary_segment(int cpu)
-{
-#ifdef CONFIG_X86_32
-       unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
-       struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
-       struct desc_struct desc;
-
-       desc = gdt_table[GDT_ENTRY_STACK_CANARY];
-       set_desc_base(&desc, canary);
-       write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
-#endif
-}
-
-static inline void load_stack_canary_segment(void)
-{
-#ifdef CONFIG_X86_32
-       asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
+       per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
 #endif
 }
 
 #else  /* STACKPROTECTOR */
 
-#define GDT_STACK_CANARY_INIT
-
 /* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
 
-static inline void setup_stack_canary_segment(int cpu)
-{ }
-
 static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
 { }
 
-static inline void load_stack_canary_segment(void)
-{
-#ifdef CONFIG_X86_32
-       asm volatile ("mov %0, %%gs" : : "r" (0));
-#endif
-}
-
 #endif /* STACKPROTECTOR */
 #endif /* _ASM_STACKPROTECTOR_H */
index fdbd9d7..7b132d0 100644 (file)
 /* image of the saved processor state */
 struct saved_context {
        /*
-        * On x86_32, all segment registers, with the possible exception of
-        * gs, are saved at kernel entry in pt_regs.
+        * On x86_32, all segment registers except gs are saved at kernel
+        * entry in pt_regs.
         */
-#ifdef CONFIG_X86_32_LAZY_GS
        u16 gs;
-#endif
        unsigned long cr0, cr2, cr3, cr4;
        u64 misc_enable;
        bool misc_enable_saved;
index 2ddf083..0704c2a 100644 (file)
@@ -35,7 +35,6 @@ KASAN_SANITIZE_sev-es.o                                       := n
 KCSAN_SANITIZE := n
 
 OBJECT_FILES_NON_STANDARD_test_nx.o                    := y
-OBJECT_FILES_NON_STANDARD_paravirt_patch.o             := y
 
 ifdef CONFIG_FRAME_POINTER
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o             := y
@@ -121,7 +120,7 @@ obj-$(CONFIG_AMD_NB)                += amd_nb.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
 
 obj-$(CONFIG_KVM_GUEST)                += kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT)         += paravirt.o paravirt_patch.o
+obj-$(CONFIG_PARAVIRT)         += paravirt.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)   += pvclock.o
 obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
index 8d778e4..f902f28 100644 (file)
@@ -28,6 +28,7 @@
 #include <asm/insn.h>
 #include <asm/io.h>
 #include <asm/fixmap.h>
+#include <asm/paravirt.h>
 
 int __read_mostly alternatives_patched;
 
@@ -388,21 +389,31 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
         */
        for (a = start; a < end; a++) {
                int insn_buff_sz = 0;
+               /* Mask away "NOT" flag bit for feature to test. */
+               u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV;
 
                instr = (u8 *)&a->instr_offset + a->instr_offset;
                replacement = (u8 *)&a->repl_offset + a->repl_offset;
                BUG_ON(a->instrlen > sizeof(insn_buff));
-               BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
-               if (!boot_cpu_has(a->cpuid)) {
+               BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32);
+
+               /*
+                * Patch if either:
+                * - feature is present
+                * - feature not present but ALTINSTR_FLAG_INV is set to mean,
+                *   patch if feature is *NOT* present.
+                */
+               if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
                        if (a->padlen > 1)
                                optimize_nops(a, instr);
 
                        continue;
                }
 
-               DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
-                       a->cpuid >> 5,
-                       a->cpuid & 0x1f,
+               DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
+                       (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
+                       feature >> 5,
+                       feature & 0x1f,
                        instr, instr, a->instrlen,
                        replacement, a->replacementlen, a->padlen);
 
@@ -605,7 +616,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
                BUG_ON(p->len > MAX_PATCH_LEN);
                /* prep the buffer with the original instructions */
                memcpy(insn_buff, p->instr, p->len);
-               used = pv_ops.init.patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
+               used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
 
                BUG_ON(used > p->len);
 
@@ -723,6 +734,33 @@ void __init alternative_instructions(void)
         * patching.
         */
 
+       /*
+        * Paravirt patching and alternative patching can be combined to
+        * replace a function call with a short direct code sequence (e.g.
+        * by setting a constant return value instead of doing that in an
+        * external function).
+        * In order to make this work the following sequence is required:
+        * 1. set (artificial) features depending on used paravirt
+        *    functions which can later influence alternative patching
+        * 2. apply paravirt patching (generally replacing an indirect
+        *    function call with a direct one)
+        * 3. apply alternative patching (e.g. replacing a direct function
+        *    call with a custom code sequence)
+        * Doing paravirt patching after alternative patching would clobber
+        * the optimization of the custom code with a function call again.
+        */
+       paravirt_set_cap();
+
+       /*
+        * First patch paravirt functions, such that we overwrite the indirect
+        * call with the direct call.
+        */
+       apply_paravirt(__parainstructions, __parainstructions_end);
+
+       /*
+        * Then patch alternatives, such that those paravirt calls that are in
+        * alternatives can be overwritten by their immediate fragments.
+        */
        apply_alternatives(__alt_instructions, __alt_instructions_end);
 
 #ifdef CONFIG_SMP
@@ -741,8 +779,6 @@ void __init alternative_instructions(void)
        }
 #endif
 
-       apply_paravirt(__parainstructions, __parainstructions_end);
-
        restart_nmi();
        alternatives_patched = 1;
 }
@@ -1274,15 +1310,15 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
                               const void *opcode, size_t len, const void *emulate)
 {
        struct insn insn;
+       int ret;
 
        memcpy((void *)tp->text, opcode, len);
        if (!emulate)
                emulate = opcode;
 
-       kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
-       insn_get_length(&insn);
+       ret = insn_decode_kernel(&insn, emulate);
 
-       BUG_ON(!insn_complete(&insn));
+       BUG_ON(ret < 0);
        BUG_ON(len != insn.length);
 
        tp->rel_addr = addr - (void *)_stext;
index 60b9f42..ecd3fd6 100644 (file)
@@ -61,13 +61,6 @@ static void __used common(void)
        OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
 #endif
 
-#ifdef CONFIG_PARAVIRT_XXL
-       BLANK();
-       OFFSET(PV_IRQ_irq_disable, paravirt_patch_template, irq.irq_disable);
-       OFFSET(PV_IRQ_irq_enable, paravirt_patch_template, irq.irq_enable);
-       OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret);
-#endif
-
 #ifdef CONFIG_XEN
        BLANK();
        OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
index 6e043f2..2b411cd 100644 (file)
@@ -53,11 +53,6 @@ void foo(void)
               offsetof(struct cpu_entry_area, tss.x86_tss.sp1) -
               offsetofend(struct cpu_entry_area, entry_stack_page.stack));
 
-#ifdef CONFIG_STACKPROTECTOR
-       BLANK();
-       OFFSET(stack_canary_offset, stack_canary, canary);
-#endif
-
        BLANK();
        DEFINE(EFI_svam, offsetof(efi_runtime_services_t, set_virtual_address_map));
 }
index ab640ab..23cb9d6 100644 (file)
@@ -161,7 +161,6 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
 
        [GDT_ENTRY_ESPFIX_SS]           = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
        [GDT_ENTRY_PERCPU]              = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
-       GDT_STACK_CANARY_INIT
 #endif
 } };
 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
@@ -599,7 +598,6 @@ void load_percpu_segment(int cpu)
        __loadsegment_simple(gs, 0);
        wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
 #endif
-       load_stack_canary_segment();
 }
 
 #ifdef CONFIG_X86_32
@@ -1796,7 +1794,8 @@ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
 EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
 
 #ifdef CONFIG_STACKPROTECTOR
-DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
+DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
+EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
 #endif
 
 #endif /* CONFIG_X86_64 */
index 83df991..abdd2e4 100644 (file)
@@ -218,15 +218,15 @@ static struct severity {
 static bool is_copy_from_user(struct pt_regs *regs)
 {
        u8 insn_buf[MAX_INSN_SIZE];
-       struct insn insn;
        unsigned long addr;
+       struct insn insn;
+       int ret;
 
        if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
                return false;
 
-       kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
-       insn_get_opcode(&insn);
-       if (!insn.opcode.got)
+       ret = insn_decode_kernel(&insn, insn_buf);
+       if (ret < 0)
                return false;
 
        switch (insn.opcode.value) {
@@ -234,10 +234,6 @@ static bool is_copy_from_user(struct pt_regs *regs)
        case 0x8A: case 0x8B:
        /* MOVZ mem,reg */
        case 0xB60F: case 0xB70F:
-               insn_get_modrm(&insn);
-               insn_get_sib(&insn);
-               if (!insn.modrm.got || !insn.sib.got)
-                       return false;
                addr = (unsigned long)insn_get_addr_ref(&insn, regs);
                break;
        /* REP MOVS */
index c6ede3b..84fb8e3 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/clocksource.h>
 #include <linux/cpu.h>
 #include <linux/reboot.h>
+#include <linux/static_call.h>
 #include <asm/div64.h>
 #include <asm/x86_init.h>
 #include <asm/hypervisor.h>
@@ -336,11 +337,11 @@ static void __init vmware_paravirt_ops_setup(void)
        vmware_cyc2ns_setup();
 
        if (vmw_sched_clock)
-               pv_ops.time.sched_clock = vmware_sched_clock;
+               paravirt_set_sched_clock(vmware_sched_clock);
 
        if (vmware_is_stealclock_available()) {
                has_steal_clock = true;
-               pv_ops.time.steal_clock = vmware_steal_clock;
+               static_call_update(pv_steal_clock, vmware_steal_clock);
 
                /* We use reboot notifier only to disable steal clock */
                register_reboot_notifier(&vmware_pv_reboot_nb);
index 759d392..d1d49e3 100644 (file)
@@ -100,9 +100,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
                .ss             = __KERNEL_DS,
                .ds             = __USER_DS,
                .fs             = __KERNEL_PERCPU,
-#ifndef CONFIG_X86_32_LAZY_GS
-               .gs             = __KERNEL_STACK_CANARY,
-#endif
+               .gs             = 0,
 
                .__cr3          = __pa_nodebug(swapper_pg_dir),
        },
index 7ed84c2..67f5904 100644 (file)
@@ -318,8 +318,8 @@ SYM_FUNC_START(startup_32_smp)
        movl $(__KERNEL_PERCPU), %eax
        movl %eax,%fs                   # set this cpu's percpu
 
-       movl $(__KERNEL_STACK_CANARY),%eax
-       movl %eax,%gs
+       xorl %eax,%eax
+       movl %eax,%gs                   # clear possible garbage in %gs
 
        xorl %eax,%eax                  # Clear LDT
        lldt %ax
@@ -339,20 +339,6 @@ SYM_FUNC_END(startup_32_smp)
  */
 __INIT
 setup_once:
-#ifdef CONFIG_STACKPROTECTOR
-       /*
-        * Configure the stack canary. The linker can't handle this by
-        * relocation.  Manually set base address in stack canary
-        * segment descriptor.
-        */
-       movl $gdt_page,%eax
-       movl $stack_canary,%ecx
-       movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
-       shrl $16, %ecx
-       movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
-       movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
-#endif
-
        andl $0,setup_once_ref  /* Once is enough, thanks */
        ret
 
index df776cd..1319ff4 100644 (file)
@@ -139,6 +139,8 @@ NOKPROBE_SYMBOL(synthesize_relcall);
 int can_boost(struct insn *insn, void *addr)
 {
        kprobe_opcode_t opcode;
+       insn_byte_t prefix;
+       int i;
 
        if (search_exception_tables((unsigned long)addr))
                return 0;       /* Page fault may occur on this address. */
@@ -151,35 +153,39 @@ int can_boost(struct insn *insn, void *addr)
        if (insn->opcode.nbytes != 1)
                return 0;
 
-       /* Can't boost Address-size override prefix */
-       if (unlikely(inat_is_address_size_prefix(insn->attr)))
-               return 0;
+       for_each_insn_prefix(insn, i, prefix) {
+               insn_attr_t attr;
+
+               attr = inat_get_opcode_attribute(prefix);
+               /* Can't boost Address-size override prefix and CS override prefix */
+               if (prefix == 0x2e || inat_is_address_size_prefix(attr))
+                       return 0;
+       }
 
        opcode = insn->opcode.bytes[0];
 
-       switch (opcode & 0xf0) {
-       case 0x60:
-               /* can't boost "bound" */
-               return (opcode != 0x62);
-       case 0x70:
-               return 0; /* can't boost conditional jump */
-       case 0x90:
-               return opcode != 0x9a;  /* can't boost call far */
-       case 0xc0:
-               /* can't boost software-interruptions */
-               return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
-       case 0xd0:
-               /* can boost AA* and XLAT */
-               return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
-       case 0xe0:
-               /* can boost in/out and absolute jmps */
-               return ((opcode & 0x04) || opcode == 0xea);
-       case 0xf0:
-               /* clear and set flags are boostable */
-               return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
+       switch (opcode) {
+       case 0x62:              /* bound */
+       case 0x70 ... 0x7f:     /* Conditional jumps */
+       case 0x9a:              /* Call far */
+       case 0xc0 ... 0xc1:     /* Grp2 */
+       case 0xcc ... 0xce:     /* software exceptions */
+       case 0xd0 ... 0xd3:     /* Grp2 */
+       case 0xd6:              /* (UD) */
+       case 0xd8 ... 0xdf:     /* ESC */
+       case 0xe0 ... 0xe3:     /* LOOP*, JCXZ */
+       case 0xe8 ... 0xe9:     /* near Call, JMP */
+       case 0xeb:              /* Short JMP */
+       case 0xf0 ... 0xf4:     /* LOCK/REP, HLT */
+       case 0xf6 ... 0xf7:     /* Grp3 */
+       case 0xfe:              /* Grp4 */
+               /* ... are not boostable */
+               return 0;
+       case 0xff:              /* Grp5 */
+               /* Only indirect jmp is boostable */
+               return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
        default:
-               /* CS override prefix and call are not boostable */
-               return (opcode != 0x2e && opcode != 0x9a);
+               return 1;
        }
 }
 
@@ -265,6 +271,8 @@ static int can_probe(unsigned long paddr)
        /* Decode instructions */
        addr = paddr - offset;
        while (addr < paddr) {
+               int ret;
+
                /*
                 * Check if the instruction has been modified by another
                 * kprobe, in which case we replace the breakpoint by the
@@ -276,8 +284,10 @@ static int can_probe(unsigned long paddr)
                __addr = recover_probed_instruction(buf, addr);
                if (!__addr)
                        return 0;
-               kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
-               insn_get_length(&insn);
+
+               ret = insn_decode_kernel(&insn, (void *)__addr);
+               if (ret < 0)
+                       return 0;
 
                /*
                 * Another debugging subsystem might insert this breakpoint.
@@ -301,8 +311,8 @@ static int can_probe(unsigned long paddr)
 int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
 {
        kprobe_opcode_t buf[MAX_INSN_SIZE];
-       unsigned long recovered_insn =
-               recover_probed_instruction(buf, (unsigned long)src);
+       unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src);
+       int ret;
 
        if (!recovered_insn || !insn)
                return 0;
@@ -312,8 +322,9 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
                        MAX_INSN_SIZE))
                return 0;
 
-       kernel_insn_init(insn, dest, MAX_INSN_SIZE);
-       insn_get_length(insn);
+       ret = insn_decode_kernel(insn, dest);
+       if (ret < 0)
+               return 0;
 
        /* We can not probe force emulate prefixed instruction */
        if (insn_has_emulate_prefix(insn))
@@ -357,13 +368,14 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
        return insn->length;
 }
 
-/* Prepare reljump right after instruction to boost */
-static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
-                         struct insn *insn)
+/* Prepare reljump or int3 right after instruction */
+static int prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p,
+                             struct insn *insn)
 {
        int len = insn->length;
 
-       if (can_boost(insn, p->addr) &&
+       if (!IS_ENABLED(CONFIG_PREEMPTION) &&
+           !p->post_handler && can_boost(insn, p->addr) &&
            MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) {
                /*
                 * These instructions can be executed directly if it
@@ -374,7 +386,12 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
                len += JMP32_INSN_SIZE;
                p->ainsn.boostable = 1;
        } else {
-               p->ainsn.boostable = 0;
+               /* Otherwise, put an int3 for trapping singlestep */
+               if (MAX_INSN_SIZE - len < INT3_INSN_SIZE)
+                       return -ENOSPC;
+
+               buf[len] = INT3_INSN_OPCODE;
+               len += INT3_INSN_SIZE;
        }
 
        return len;
@@ -411,86 +428,290 @@ void free_insn_page(void *page)
        module_memfree(page);
 }
 
-static void set_resume_flags(struct kprobe *p, struct insn *insn)
+/* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */
+
+static void kprobe_emulate_ifmodifiers(struct kprobe *p, struct pt_regs *regs)
+{
+       switch (p->ainsn.opcode) {
+       case 0xfa:      /* cli */
+               regs->flags &= ~(X86_EFLAGS_IF);
+               break;
+       case 0xfb:      /* sti */
+               regs->flags |= X86_EFLAGS_IF;
+               break;
+       case 0x9c:      /* pushf */
+               int3_emulate_push(regs, regs->flags);
+               break;
+       case 0x9d:      /* popf */
+               regs->flags = int3_emulate_pop(regs);
+               break;
+       }
+       regs->ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
+}
+NOKPROBE_SYMBOL(kprobe_emulate_ifmodifiers);
+
+static void kprobe_emulate_ret(struct kprobe *p, struct pt_regs *regs)
+{
+       int3_emulate_ret(regs);
+}
+NOKPROBE_SYMBOL(kprobe_emulate_ret);
+
+static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs)
+{
+       unsigned long func = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
+
+       func += p->ainsn.rel32;
+       int3_emulate_call(regs, func);
+}
+NOKPROBE_SYMBOL(kprobe_emulate_call);
+
+static nokprobe_inline
+void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond)
+{
+       unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
+
+       if (cond)
+               ip += p->ainsn.rel32;
+       int3_emulate_jmp(regs, ip);
+}
+
+static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs)
+{
+       __kprobe_emulate_jmp(p, regs, true);
+}
+NOKPROBE_SYMBOL(kprobe_emulate_jmp);
+
+static const unsigned long jcc_mask[6] = {
+       [0] = X86_EFLAGS_OF,
+       [1] = X86_EFLAGS_CF,
+       [2] = X86_EFLAGS_ZF,
+       [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF,
+       [4] = X86_EFLAGS_SF,
+       [5] = X86_EFLAGS_PF,
+};
+
+static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs)
+{
+       bool invert = p->ainsn.jcc.type & 1;
+       bool match;
+
+       if (p->ainsn.jcc.type < 0xc) {
+               match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1];
+       } else {
+               match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
+                       ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
+               if (p->ainsn.jcc.type >= 0xe)
+                       match = match && (regs->flags & X86_EFLAGS_ZF);
+       }
+       __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert));
+}
+NOKPROBE_SYMBOL(kprobe_emulate_jcc);
+
+static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs)
+{
+       bool match;
+
+       if (p->ainsn.loop.type != 3) {  /* LOOP* */
+               if (p->ainsn.loop.asize == 32)
+                       match = ((*(u32 *)&regs->cx)--) != 0;
+#ifdef CONFIG_X86_64
+               else if (p->ainsn.loop.asize == 64)
+                       match = ((*(u64 *)&regs->cx)--) != 0;
+#endif
+               else
+                       match = ((*(u16 *)&regs->cx)--) != 0;
+       } else {                        /* JCXZ */
+               if (p->ainsn.loop.asize == 32)
+                       match = *(u32 *)(&regs->cx) == 0;
+#ifdef CONFIG_X86_64
+               else if (p->ainsn.loop.asize == 64)
+                       match = *(u64 *)(&regs->cx) == 0;
+#endif
+               else
+                       match = *(u16 *)(&regs->cx) == 0;
+       }
+
+       if (p->ainsn.loop.type == 0)    /* LOOPNE */
+               match = match && !(regs->flags & X86_EFLAGS_ZF);
+       else if (p->ainsn.loop.type == 1)       /* LOOPE */
+               match = match && (regs->flags & X86_EFLAGS_ZF);
+
+       __kprobe_emulate_jmp(p, regs, match);
+}
+NOKPROBE_SYMBOL(kprobe_emulate_loop);
+
+static const int addrmode_regoffs[] = {
+       offsetof(struct pt_regs, ax),
+       offsetof(struct pt_regs, cx),
+       offsetof(struct pt_regs, dx),
+       offsetof(struct pt_regs, bx),
+       offsetof(struct pt_regs, sp),
+       offsetof(struct pt_regs, bp),
+       offsetof(struct pt_regs, si),
+       offsetof(struct pt_regs, di),
+#ifdef CONFIG_X86_64
+       offsetof(struct pt_regs, r8),
+       offsetof(struct pt_regs, r9),
+       offsetof(struct pt_regs, r10),
+       offsetof(struct pt_regs, r11),
+       offsetof(struct pt_regs, r12),
+       offsetof(struct pt_regs, r13),
+       offsetof(struct pt_regs, r14),
+       offsetof(struct pt_regs, r15),
+#endif
+};
+
+static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs)
+{
+       unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg];
+
+       int3_emulate_call(regs, regs_get_register(regs, offs));
+}
+NOKPROBE_SYMBOL(kprobe_emulate_call_indirect);
+
+static void kprobe_emulate_jmp_indirect(struct kprobe *p, struct pt_regs *regs)
+{
+       unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg];
+
+       int3_emulate_jmp(regs, regs_get_register(regs, offs));
+}
+NOKPROBE_SYMBOL(kprobe_emulate_jmp_indirect);
+
+static int prepare_emulation(struct kprobe *p, struct insn *insn)
 {
        insn_byte_t opcode = insn->opcode.bytes[0];
 
        switch (opcode) {
        case 0xfa:              /* cli */
        case 0xfb:              /* sti */
+       case 0x9c:              /* pushfl */
        case 0x9d:              /* popf/popfd */
-               /* Check whether the instruction modifies Interrupt Flag or not */
-               p->ainsn.if_modifier = 1;
-               break;
-       case 0x9c:      /* pushfl */
-               p->ainsn.is_pushf = 1;
+               /*
+                * IF modifiers must be emulated since it will enable interrupt while
+                * int3 single stepping.
+                */
+               p->ainsn.emulate_op = kprobe_emulate_ifmodifiers;
+               p->ainsn.opcode = opcode;
                break;
-       case 0xcf:      /* iret */
-               p->ainsn.if_modifier = 1;
-               fallthrough;
        case 0xc2:      /* ret/lret */
        case 0xc3:
        case 0xca:
        case 0xcb:
-       case 0xea:      /* jmp absolute -- ip is correct */
-               /* ip is already adjusted, no more changes required */
-               p->ainsn.is_abs_ip = 1;
-               /* Without resume jump, this is boostable */
-               p->ainsn.boostable = 1;
+               p->ainsn.emulate_op = kprobe_emulate_ret;
                break;
-       case 0xe8:      /* call relative - Fix return addr */
-               p->ainsn.is_call = 1;
+       case 0x9a:      /* far call absolute -- segment is not supported */
+       case 0xea:      /* far jmp absolute -- segment is not supported */
+       case 0xcc:      /* int3 */
+       case 0xcf:      /* iret -- in-kernel IRET is not supported */
+               return -EOPNOTSUPP;
                break;
-#ifdef CONFIG_X86_32
-       case 0x9a:      /* call absolute -- same as call absolute, indirect */
-               p->ainsn.is_call = 1;
-               p->ainsn.is_abs_ip = 1;
+       case 0xe8:      /* near call relative */
+               p->ainsn.emulate_op = kprobe_emulate_call;
+               if (insn->immediate.nbytes == 2)
+                       p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
+               else
+                       p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
                break;
-#endif
-       case 0xff:
+       case 0xeb:      /* short jump relative */
+       case 0xe9:      /* near jump relative */
+               p->ainsn.emulate_op = kprobe_emulate_jmp;
+               if (insn->immediate.nbytes == 1)
+                       p->ainsn.rel32 = *(s8 *)&insn->immediate.value;
+               else if (insn->immediate.nbytes == 2)
+                       p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
+               else
+                       p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
+               break;
+       case 0x70 ... 0x7f:
+               /* 1 byte conditional jump */
+               p->ainsn.emulate_op = kprobe_emulate_jcc;
+               p->ainsn.jcc.type = opcode & 0xf;
+               p->ainsn.rel32 = *(char *)insn->immediate.bytes;
+               break;
+       case 0x0f:
                opcode = insn->opcode.bytes[1];
+               if ((opcode & 0xf0) == 0x80) {
+                       /* 2 bytes Conditional Jump */
+                       p->ainsn.emulate_op = kprobe_emulate_jcc;
+                       p->ainsn.jcc.type = opcode & 0xf;
+                       if (insn->immediate.nbytes == 2)
+                               p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
+                       else
+                               p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
+               } else if (opcode == 0x01 &&
+                          X86_MODRM_REG(insn->modrm.bytes[0]) == 0 &&
+                          X86_MODRM_MOD(insn->modrm.bytes[0]) == 3) {
+                       /* VM extensions - not supported */
+                       return -EOPNOTSUPP;
+               }
+               break;
+       case 0xe0:      /* Loop NZ */
+       case 0xe1:      /* Loop */
+       case 0xe2:      /* Loop */
+       case 0xe3:      /* J*CXZ */
+               p->ainsn.emulate_op = kprobe_emulate_loop;
+               p->ainsn.loop.type = opcode & 0x3;
+               p->ainsn.loop.asize = insn->addr_bytes * 8;
+               p->ainsn.rel32 = *(s8 *)&insn->immediate.value;
+               break;
+       case 0xff:
+               /*
+                * Since the 0xff is an extended group opcode, the instruction
+                * is determined by the MOD/RM byte.
+                */
+               opcode = insn->modrm.bytes[0];
                if ((opcode & 0x30) == 0x10) {
-                       /*
-                        * call absolute, indirect
-                        * Fix return addr; ip is correct.
-                        * But this is not boostable
-                        */
-                       p->ainsn.is_call = 1;
-                       p->ainsn.is_abs_ip = 1;
+                       if ((opcode & 0x8) == 0x8)
+                               return -EOPNOTSUPP;     /* far call */
+                       /* call absolute, indirect */
+                       p->ainsn.emulate_op = kprobe_emulate_call_indirect;
+               } else if ((opcode & 0x30) == 0x20) {
+                       if ((opcode & 0x8) == 0x8)
+                               return -EOPNOTSUPP;     /* far jmp */
+                       /* jmp near absolute indirect */
+                       p->ainsn.emulate_op = kprobe_emulate_jmp_indirect;
+               } else
                        break;
-               } else if (((opcode & 0x31) == 0x20) ||
-                          ((opcode & 0x31) == 0x21)) {
-                       /*
-                        * jmp near and far, absolute indirect
-                        * ip is correct.
-                        */
-                       p->ainsn.is_abs_ip = 1;
-                       /* Without resume jump, this is boostable */
-                       p->ainsn.boostable = 1;
-               }
+
+               if (insn->addr_bytes != sizeof(unsigned long))
+                       return -EOPNOTSUPP;     /* Don't support differnt size */
+               if (X86_MODRM_MOD(opcode) != 3)
+                       return -EOPNOTSUPP;     /* TODO: support memory addressing */
+
+               p->ainsn.indirect.reg = X86_MODRM_RM(opcode);
+#ifdef CONFIG_X86_64
+               if (X86_REX_B(insn->rex_prefix.value))
+                       p->ainsn.indirect.reg += 8;
+#endif
+               break;
+       default:
                break;
        }
+       p->ainsn.size = insn->length;
+
+       return 0;
 }
 
 static int arch_copy_kprobe(struct kprobe *p)
 {
        struct insn insn;
        kprobe_opcode_t buf[MAX_INSN_SIZE];
-       int len;
+       int ret, len;
 
        /* Copy an instruction with recovering if other optprobe modifies it.*/
        len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn);
        if (!len)
                return -EINVAL;
 
-       /*
-        * __copy_instruction can modify the displacement of the instruction,
-        * but it doesn't affect boostable check.
-        */
-       len = prepare_boost(buf, p, &insn);
+       /* Analyze the opcode and setup emulate functions */
+       ret = prepare_emulation(p, &insn);
+       if (ret < 0)
+               return ret;
 
-       /* Analyze the opcode and set resume flags */
-       set_resume_flags(p, &insn);
+       /* Add int3 for single-step or booster jmp */
+       len = prepare_singlestep(buf, p, &insn);
+       if (len < 0)
+               return len;
 
        /* Also, displacement change doesn't affect the first byte */
        p->opcode = buf[0];
@@ -583,29 +804,7 @@ set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 {
        __this_cpu_write(current_kprobe, p);
        kcb->kprobe_saved_flags = kcb->kprobe_old_flags
-               = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
-       if (p->ainsn.if_modifier)
-               kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
-}
-
-static nokprobe_inline void clear_btf(void)
-{
-       if (test_thread_flag(TIF_BLOCKSTEP)) {
-               unsigned long debugctl = get_debugctlmsr();
-
-               debugctl &= ~DEBUGCTLMSR_BTF;
-               update_debugctlmsr(debugctl);
-       }
-}
-
-static nokprobe_inline void restore_btf(void)
-{
-       if (test_thread_flag(TIF_BLOCKSTEP)) {
-               unsigned long debugctl = get_debugctlmsr();
-
-               debugctl |= DEBUGCTLMSR_BTF;
-               update_debugctlmsr(debugctl);
-       }
+               = (regs->flags & X86_EFLAGS_IF);
 }
 
 void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
@@ -620,6 +819,22 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(arch_prepare_kretprobe);
 
+static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs,
+                              struct kprobe_ctlblk *kcb)
+{
+       if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+               kcb->kprobe_status = KPROBE_HIT_SSDONE;
+               cur->post_handler(cur, regs, 0);
+       }
+
+       /* Restore back the original saved kprobes variables and continue. */
+       if (kcb->kprobe_status == KPROBE_REENTER)
+               restore_previous_kprobe(kcb);
+       else
+               reset_current_kprobe();
+}
+NOKPROBE_SYMBOL(kprobe_post_process);
+
 static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
                             struct kprobe_ctlblk *kcb, int reenter)
 {
@@ -627,7 +842,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
                return;
 
 #if !defined(CONFIG_PREEMPTION)
-       if (p->ainsn.boostable && !p->post_handler) {
+       if (p->ainsn.boostable) {
                /* Boost up -- we can execute copied instructions directly */
                if (!reenter)
                        reset_current_kprobe();
@@ -646,18 +861,50 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
                kcb->kprobe_status = KPROBE_REENTER;
        } else
                kcb->kprobe_status = KPROBE_HIT_SS;
-       /* Prepare real single stepping */
-       clear_btf();
-       regs->flags |= X86_EFLAGS_TF;
+
+       if (p->ainsn.emulate_op) {
+               p->ainsn.emulate_op(p, regs);
+               kprobe_post_process(p, regs, kcb);
+               return;
+       }
+
+       /* Disable interrupt, and set ip register on trampoline */
        regs->flags &= ~X86_EFLAGS_IF;
-       /* single step inline if the instruction is an int3 */
-       if (p->opcode == INT3_INSN_OPCODE)
-               regs->ip = (unsigned long)p->addr;
-       else
-               regs->ip = (unsigned long)p->ainsn.insn;
+       regs->ip = (unsigned long)p->ainsn.insn;
 }
 NOKPROBE_SYMBOL(setup_singlestep);
 
+/*
+ * Called after single-stepping.  p->addr is the address of the
+ * instruction whose first byte has been replaced by the "int3"
+ * instruction.  To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.  The address of this
+ * copy is p->ainsn.insn. We also doesn't use trap, but "int3" again
+ * right after the copied instruction.
+ * Different from the trap single-step, "int3" single-step can not
+ * handle the instruction which changes the ip register, e.g. jmp,
+ * call, conditional jmp, and the instructions which changes the IF
+ * flags because interrupt must be disabled around the single-stepping.
+ * Such instructions are software emulated, but others are single-stepped
+ * using "int3".
+ *
+ * When the 2nd "int3" handled, the regs->ip and regs->flags needs to
+ * be adjusted, so that we can resume execution on correct code.
+ */
+static void resume_singlestep(struct kprobe *p, struct pt_regs *regs,
+                             struct kprobe_ctlblk *kcb)
+{
+       unsigned long copy_ip = (unsigned long)p->ainsn.insn;
+       unsigned long orig_ip = (unsigned long)p->addr;
+
+       /* Restore saved interrupt flag and ip register */
+       regs->flags |= kcb->kprobe_saved_flags;
+       /* Note that regs->ip is executed int3 so must be a step back */
+       regs->ip += (orig_ip - copy_ip) - INT3_INSN_SIZE;
+}
+NOKPROBE_SYMBOL(resume_singlestep);
+
 /*
  * We have reentered the kprobe_handler(), since another probe was hit while
  * within the handler. We save the original kprobes variables and just single
@@ -693,6 +940,12 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 }
 NOKPROBE_SYMBOL(reenter_kprobe);
 
+static nokprobe_inline int kprobe_is_ss(struct kprobe_ctlblk *kcb)
+{
+       return (kcb->kprobe_status == KPROBE_HIT_SS ||
+               kcb->kprobe_status == KPROBE_REENTER);
+}
+
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
  * remain disabled throughout this function.
@@ -737,7 +990,18 @@ int kprobe_int3_handler(struct pt_regs *regs)
                                reset_current_kprobe();
                        return 1;
                }
-       } else if (*addr != INT3_INSN_OPCODE) {
+       } else if (kprobe_is_ss(kcb)) {
+               p = kprobe_running();
+               if ((unsigned long)p->ainsn.insn < regs->ip &&
+                   (unsigned long)p->ainsn.insn + MAX_INSN_SIZE > regs->ip) {
+                       /* Most provably this is the second int3 for singlestep */
+                       resume_singlestep(p, regs, kcb);
+                       kprobe_post_process(p, regs, kcb);
+                       return 1;
+               }
+       }
+
+       if (*addr != INT3_INSN_OPCODE) {
                /*
                 * The breakpoint instruction was removed right
                 * after we hit it.  Another cpu has removed
@@ -810,91 +1074,6 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(trampoline_handler);
 
-/*
- * Called after single-stepping.  p->addr is the address of the
- * instruction whose first byte has been replaced by the "int 3"
- * instruction.  To avoid the SMP problems that can occur when we
- * temporarily put back the original opcode to single-step, we
- * single-stepped a copy of the instruction.  The address of this
- * copy is p->ainsn.insn.
- *
- * This function prepares to return from the post-single-step
- * interrupt.  We have to fix up the stack as follows:
- *
- * 0) Except in the case of absolute or indirect jump or call instructions,
- * the new ip is relative to the copied instruction.  We need to make
- * it relative to the original instruction.
- *
- * 1) If the single-stepped instruction was pushfl, then the TF and IF
- * flags are set in the just-pushed flags, and may need to be cleared.
- *
- * 2) If the single-stepped instruction was a call, the return address
- * that is atop the stack is the address following the copied instruction.
- * We need to make it the address following the original instruction.
- */
-static void resume_execution(struct kprobe *p, struct pt_regs *regs,
-                            struct kprobe_ctlblk *kcb)
-{
-       unsigned long *tos = stack_addr(regs);
-       unsigned long copy_ip = (unsigned long)p->ainsn.insn;
-       unsigned long orig_ip = (unsigned long)p->addr;
-
-       regs->flags &= ~X86_EFLAGS_TF;
-
-       /* Fixup the contents of top of stack */
-       if (p->ainsn.is_pushf) {
-               *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
-               *tos |= kcb->kprobe_old_flags;
-       } else if (p->ainsn.is_call) {
-               *tos = orig_ip + (*tos - copy_ip);
-       }
-
-       if (!p->ainsn.is_abs_ip)
-               regs->ip += orig_ip - copy_ip;
-
-       restore_btf();
-}
-NOKPROBE_SYMBOL(resume_execution);
-
-/*
- * Interrupts are disabled on entry as trap1 is an interrupt gate and they
- * remain disabled throughout this function.
- */
-int kprobe_debug_handler(struct pt_regs *regs)
-{
-       struct kprobe *cur = kprobe_running();
-       struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
-       if (!cur)
-               return 0;
-
-       resume_execution(cur, regs, kcb);
-       regs->flags |= kcb->kprobe_saved_flags;
-
-       if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
-               kcb->kprobe_status = KPROBE_HIT_SSDONE;
-               cur->post_handler(cur, regs, 0);
-       }
-
-       /* Restore back the original saved kprobes variables and continue. */
-       if (kcb->kprobe_status == KPROBE_REENTER) {
-               restore_previous_kprobe(kcb);
-               goto out;
-       }
-       reset_current_kprobe();
-out:
-       /*
-        * if somebody else is singlestepping across a probe point, flags
-        * will have TF set, in which case, continue the remaining processing
-        * of do_debug, as if this is not a probe hit.
-        */
-       if (regs->flags & X86_EFLAGS_TF)
-               return 0;
-
-       return 1;
-}
-NOKPROBE_SYMBOL(kprobe_debug_handler);
-
 int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
        struct kprobe *cur = kprobe_running();
@@ -912,20 +1091,9 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
                 * normal page fault.
                 */
                regs->ip = (unsigned long)cur->addr;
-               /*
-                * Trap flag (TF) has been set here because this fault
-                * happened where the single stepping will be done.
-                * So clear it by resetting the current kprobe:
-                */
-               regs->flags &= ~X86_EFLAGS_TF;
-               /*
-                * Since the single step (trap) has been cancelled,
-                * we need to restore BTF here.
-                */
-               restore_btf();
 
                /*
-                * If the TF flag was set before the kprobe hit,
+                * If the IF flag was set before the kprobe hit,
                 * don't touch it:
                 */
                regs->flags |= kcb->kprobe_old_flags;
index 08eb230..71425eb 100644 (file)
@@ -312,6 +312,8 @@ static int can_optimize(unsigned long paddr)
        addr = paddr - offset;
        while (addr < paddr - offset + size) { /* Decode until function end */
                unsigned long recovered_insn;
+               int ret;
+
                if (search_exception_tables(addr))
                        /*
                         * Since some fixup code will jumps into this function,
@@ -321,8 +323,11 @@ static int can_optimize(unsigned long paddr)
                recovered_insn = recover_probed_instruction(buf, addr);
                if (!recovered_insn)
                        return 0;
-               kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
-               insn_get_length(&insn);
+
+               ret = insn_decode_kernel(&insn, (void *)recovered_insn);
+               if (ret < 0)
+                       return 0;
+
                /*
                 * In the case of detecting unknown breakpoint, this could be
                 * a padding INT3 between functions. Let's check that all the
index 78bb0fa..172c947 100644 (file)
@@ -650,7 +650,7 @@ static void __init kvm_guest_init(void)
 
        if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
                has_steal_clock = 1;
-               pv_ops.time.steal_clock = kvm_steal_clock;
+               static_call_update(pv_steal_clock, kvm_steal_clock);
        }
 
        if (pv_tlb_flush_supported()) {
index 1fc0962..d37ed4e 100644 (file)
@@ -106,7 +106,7 @@ static inline void kvm_sched_clock_init(bool stable)
        if (!stable)
                clear_sched_clock_stable();
        kvm_sched_clock_offset = kvm_clock_read();
-       pv_ops.time.sched_clock = kvm_sched_clock_read;
+       paravirt_set_sched_clock(kvm_sched_clock_read);
 
        pr_info("kvm-clock: using sched offset of %llu cycles",
                kvm_sched_clock_offset);
index 4f75d0c..9e1ea99 100644 (file)
@@ -32,3 +32,12 @@ bool pv_is_native_vcpu_is_preempted(void)
        return pv_ops.lock.vcpu_is_preempted.func ==
                __raw_callee_save___native_vcpu_is_preempted;
 }
+
+void __init paravirt_set_cap(void)
+{
+       if (!pv_is_native_spin_unlock())
+               setup_force_cpu_cap(X86_FEATURE_PVUNLOCK);
+
+       if (!pv_is_native_vcpu_is_preempted())
+               setup_force_cpu_cap(X86_FEATURE_VCPUPREEMPT);
+}
index c60222a..d073026 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/highmem.h>
 #include <linux/kprobes.h>
 #include <linux/pgtable.h>
+#include <linux/static_call.h>
 
 #include <asm/bug.h>
 #include <asm/paravirt.h>
@@ -52,7 +53,10 @@ void __init default_banner(void)
 }
 
 /* Undefined instruction for dealing with missing ops pointers. */
-static const unsigned char ud2a[] = { 0x0f, 0x0b };
+static void paravirt_BUG(void)
+{
+       BUG();
+}
 
 struct branch {
        unsigned char opcode;
@@ -85,25 +89,6 @@ u64 notrace _paravirt_ident_64(u64 x)
 {
        return x;
 }
-
-static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
-                                  unsigned long addr, unsigned len)
-{
-       struct branch *b = insn_buff;
-       unsigned long delta = (unsigned long)target - (addr+5);
-
-       if (len < 5) {
-#ifdef CONFIG_RETPOLINE
-               WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
-#endif
-               return len;     /* call too long for patch site */
-       }
-
-       b->opcode = 0xe9;       /* jmp */
-       b->delta = delta;
-
-       return 5;
-}
 #endif
 
 DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
@@ -114,8 +99,8 @@ void __init native_pv_lock_init(void)
                static_branch_disable(&virt_spin_lock_key);
 }
 
-unsigned paravirt_patch_default(u8 type, void *insn_buff,
-                               unsigned long addr, unsigned len)
+unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr,
+                           unsigned int len)
 {
        /*
         * Neat trick to map patch type back to the call within the
@@ -125,20 +110,10 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff,
        unsigned ret;
 
        if (opfunc == NULL)
-               /* If there's no function, patch it with a ud2a (BUG) */
-               ret = paravirt_patch_insns(insn_buff, len, ud2a, ud2a+sizeof(ud2a));
+               /* If there's no function, patch it with paravirt_BUG() */
+               ret = paravirt_patch_call(insn_buff, paravirt_BUG, addr, len);
        else if (opfunc == _paravirt_nop)
                ret = 0;
-
-#ifdef CONFIG_PARAVIRT_XXL
-       /* identity functions just return their single argument */
-       else if (opfunc == _paravirt_ident_64)
-               ret = paravirt_patch_ident_64(insn_buff, len);
-
-       else if (type == PARAVIRT_PATCH(cpu.iret))
-               /* If operation requires a jmp, then jmp */
-               ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
-#endif
        else
                /* Otherwise call the function. */
                ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
@@ -146,19 +121,6 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff,
        return ret;
 }
 
-unsigned paravirt_patch_insns(void *insn_buff, unsigned len,
-                             const char *start, const char *end)
-{
-       unsigned insn_len = end - start;
-
-       /* Alternative instruction is too large for the patch site and we cannot continue: */
-       BUG_ON(insn_len > len || start == NULL);
-
-       memcpy(insn_buff, start, insn_len);
-
-       return insn_len;
-}
-
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
@@ -167,6 +129,14 @@ static u64 native_steal_clock(int cpu)
        return 0;
 }
 
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+DEFINE_STATIC_CALL(pv_sched_clock, native_sched_clock);
+
+void paravirt_set_sched_clock(u64 (*func)(void))
+{
+       static_call_update(pv_sched_clock, func);
+}
+
 /* These are in entry.S */
 extern void native_iret(void);
 
@@ -269,13 +239,6 @@ struct pv_info pv_info = {
 #define PTE_IDENT      __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
 
 struct paravirt_patch_template pv_ops = {
-       /* Init ops. */
-       .init.patch             = native_patch,
-
-       /* Time ops. */
-       .time.sched_clock       = native_sched_clock,
-       .time.steal_clock       = native_steal_clock,
-
        /* Cpu ops. */
        .cpu.io_delay           = native_io_delay,
 
@@ -308,8 +271,6 @@ struct paravirt_patch_template pv_ops = {
 
        .cpu.load_sp0           = native_load_sp0,
 
-       .cpu.iret               = native_iret,
-
 #ifdef CONFIG_X86_IOPL_IOPERM
        .cpu.invalidate_io_bitmap       = native_tss_invalidate_io_bitmap,
        .cpu.update_io_bitmap           = native_tss_update_io_bitmap,
@@ -414,6 +375,8 @@ struct paravirt_patch_template pv_ops = {
 NOKPROBE_SYMBOL(native_get_debugreg);
 NOKPROBE_SYMBOL(native_set_debugreg);
 NOKPROBE_SYMBOL(native_load_idt);
+
+void (*paravirt_iret)(void) = native_iret;
 #endif
 
 EXPORT_SYMBOL(pv_ops);
diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
deleted file mode 100644 (file)
index abd27ec..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/stringify.h>
-
-#include <asm/paravirt.h>
-#include <asm/asm-offsets.h>
-
-#define PSTART(d, m)                                                   \
-       patch_data_##d.m
-
-#define PEND(d, m)                                                     \
-       (PSTART(d, m) + sizeof(patch_data_##d.m))
-
-#define PATCH(d, m, insn_buff, len)                                            \
-       paravirt_patch_insns(insn_buff, len, PSTART(d, m), PEND(d, m))
-
-#define PATCH_CASE(ops, m, data, insn_buff, len)                               \
-       case PARAVIRT_PATCH(ops.m):                                     \
-               return PATCH(data, ops##_##m, insn_buff, len)
-
-#ifdef CONFIG_PARAVIRT_XXL
-struct patch_xxl {
-       const unsigned char     irq_irq_disable[1];
-       const unsigned char     irq_irq_enable[1];
-       const unsigned char     irq_save_fl[2];
-       const unsigned char     mmu_read_cr2[3];
-       const unsigned char     mmu_read_cr3[3];
-       const unsigned char     mmu_write_cr3[3];
-       const unsigned char     cpu_wbinvd[2];
-       const unsigned char     mov64[3];
-};
-
-static const struct patch_xxl patch_data_xxl = {
-       .irq_irq_disable        = { 0xfa },             // cli
-       .irq_irq_enable         = { 0xfb },             // sti
-       .irq_save_fl            = { 0x9c, 0x58 },       // pushf; pop %[re]ax
-       .mmu_read_cr2           = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
-       .mmu_read_cr3           = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
-       .mmu_write_cr3          = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
-       .cpu_wbinvd             = { 0x0f, 0x09 },       // wbinvd
-       .mov64                  = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
-};
-
-unsigned int paravirt_patch_ident_64(void *insn_buff, unsigned int len)
-{
-       return PATCH(xxl, mov64, insn_buff, len);
-}
-# endif /* CONFIG_PARAVIRT_XXL */
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-struct patch_lock {
-       unsigned char queued_spin_unlock[3];
-       unsigned char vcpu_is_preempted[2];
-};
-
-static const struct patch_lock patch_data_lock = {
-       .vcpu_is_preempted      = { 0x31, 0xc0 },       // xor %eax, %eax
-
-# ifdef CONFIG_X86_64
-       .queued_spin_unlock     = { 0xc6, 0x07, 0x00 }, // movb $0, (%rdi)
-# else
-       .queued_spin_unlock     = { 0xc6, 0x00, 0x00 }, // movb $0, (%eax)
-# endif
-};
-#endif /* CONFIG_PARAVIRT_SPINLOCKS */
-
-unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
-                         unsigned int len)
-{
-       switch (type) {
-
-#ifdef CONFIG_PARAVIRT_XXL
-       PATCH_CASE(irq, save_fl, xxl, insn_buff, len);
-       PATCH_CASE(irq, irq_enable, xxl, insn_buff, len);
-       PATCH_CASE(irq, irq_disable, xxl, insn_buff, len);
-
-       PATCH_CASE(mmu, read_cr2, xxl, insn_buff, len);
-       PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len);
-       PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);
-
-       PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len);
-#endif
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-       case PARAVIRT_PATCH(lock.queued_spin_unlock):
-               if (pv_is_native_spin_unlock())
-                       return PATCH(lock, queued_spin_unlock, insn_buff, len);
-               break;
-
-       case PARAVIRT_PATCH(lock.vcpu_is_preempted):
-               if (pv_is_native_vcpu_is_preempted())
-                       return PATCH(lock, vcpu_is_preempted, insn_buff, len);
-               break;
-#endif
-       default:
-               break;
-       }
-
-       return paravirt_patch_default(type, insn_buff, addr, len);
-}
index fd945ce..0941d2f 100644 (file)
@@ -224,7 +224,6 @@ void __init setup_per_cpu_areas(void)
                per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
                per_cpu(cpu_number, cpu) = cpu;
                setup_percpu_segment(cpu);
-               setup_stack_canary_segment(cpu);
                /*
                 * Copy data used in early init routines from the
                 * initial arrays to the per cpu data areas.  These
index 04a780a..75c7df3 100644 (file)
@@ -251,39 +251,54 @@ static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
        return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
 }
 
-static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
 {
        char buffer[MAX_INSN_SIZE];
-       enum es_result ret;
        int res;
 
-       if (user_mode(ctxt->regs)) {
-               res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
-               if (!res) {
-                       ctxt->fi.vector     = X86_TRAP_PF;
-                       ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
-                       ctxt->fi.cr2        = ctxt->regs->ip;
-                       return ES_EXCEPTION;
-               }
+       res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
+       if (!res) {
+               ctxt->fi.vector     = X86_TRAP_PF;
+               ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
+               ctxt->fi.cr2        = ctxt->regs->ip;
+               return ES_EXCEPTION;
+       }
 
-               if (!insn_decode(&ctxt->insn, ctxt->regs, buffer, res))
-                       return ES_DECODE_FAILED;
-       } else {
-               res = vc_fetch_insn_kernel(ctxt, buffer);
-               if (res) {
-                       ctxt->fi.vector     = X86_TRAP_PF;
-                       ctxt->fi.error_code = X86_PF_INSTR;
-                       ctxt->fi.cr2        = ctxt->regs->ip;
-                       return ES_EXCEPTION;
-               }
+       if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res))
+               return ES_DECODE_FAILED;
+
+       if (ctxt->insn.immediate.got)
+               return ES_OK;
+       else
+               return ES_DECODE_FAILED;
+}
 
-               insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE - res, 1);
-               insn_get_length(&ctxt->insn);
+static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
+{
+       char buffer[MAX_INSN_SIZE];
+       int res, ret;
+
+       res = vc_fetch_insn_kernel(ctxt, buffer);
+       if (res) {
+               ctxt->fi.vector     = X86_TRAP_PF;
+               ctxt->fi.error_code = X86_PF_INSTR;
+               ctxt->fi.cr2        = ctxt->regs->ip;
+               return ES_EXCEPTION;
        }
 
-       ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
+       ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+       if (ret < 0)
+               return ES_DECODE_FAILED;
+       else
+               return ES_OK;
+}
 
-       return ret;
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+       if (user_mode(ctxt->regs))
+               return __vc_decode_user_insn(ctxt);
+       else
+               return __vc_decode_kern_insn(ctxt);
 }
 
 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
index 64a496a..3c883e0 100644 (file)
@@ -164,17 +164,11 @@ int do_set_thread_area(struct task_struct *p, int idx,
                savesegment(fs, sel);
                if (sel == modified_sel)
                        loadsegment(fs, sel);
-
-               savesegment(gs, sel);
-               if (sel == modified_sel)
-                       load_gs_index(sel);
 #endif
 
-#ifdef CONFIG_X86_32_LAZY_GS
                savesegment(gs, sel);
                if (sel == modified_sel)
-                       loadsegment(gs, sel);
-#endif
+                       load_gs_index(sel);
        } else {
 #ifdef CONFIG_X86_64
                if (p->thread.fsindex == modified_sel)
index ac1874a..034f27f 100644 (file)
@@ -498,14 +498,15 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
 {
        u8 insn_buf[MAX_INSN_SIZE];
        struct insn insn;
+       int ret;
 
        if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip,
                        MAX_INSN_SIZE))
                return GP_NO_HINT;
 
-       kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
-       insn_get_modrm(&insn);
-       insn_get_sib(&insn);
+       ret = insn_decode_kernel(&insn, insn_buf);
+       if (ret < 0)
+               return GP_NO_HINT;
 
        *addr = (unsigned long)insn_get_addr_ref(&insn, regs);
        if (*addr == -1UL)
@@ -889,9 +890,6 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
        if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
                dr6 &= ~DR_STEP;
 
-       if (kprobe_debug_handler(regs))
-               goto out;
-
        /*
         * The kernel doesn't use INT1
         */
index f70dffc..9f59292 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/percpu.h>
 #include <linux/timex.h>
 #include <linux/static_key.h>
+#include <linux/static_call.h>
 
 #include <asm/hpet.h>
 #include <asm/timer.h>
@@ -254,7 +255,7 @@ unsigned long long sched_clock(void)
 
 bool using_native_sched_clock(void)
 {
-       return pv_ops.time.sched_clock == native_sched_clock;
+       return static_call_query(pv_sched_clock) == native_sched_clock;
 }
 #else
 unsigned long long
index f6225bf..8032f5f 100644 (file)
@@ -356,7 +356,7 @@ bool fixup_umip_exception(struct pt_regs *regs)
        if (!nr_copied)
                return false;
 
-       if (!insn_decode(&insn, regs, buf, nr_copied))
+       if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
                return false;
 
        umip_inst = identify_insn(&insn);
index a2b4133..b63cf8f 100644 (file)
@@ -276,12 +276,12 @@ static bool is_prefix_bad(struct insn *insn)
 
 static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64)
 {
+       enum insn_mode m = x86_64 ? INSN_MODE_64 : INSN_MODE_32;
        u32 volatile *good_insns;
+       int ret;
 
-       insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64);
-       /* has the side-effect of processing the entire instruction */
-       insn_get_length(insn);
-       if (!insn_complete(insn))
+       ret = insn_decode(insn, auprobe->insn, sizeof(auprobe->insn), m);
+       if (ret < 0)
                return -ENOEXEC;
 
        if (is_prefix_bad(insn))
index 3b65441..16bc913 100644 (file)
@@ -6,7 +6,7 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 
 /* if you want SMP support, implement these with real spinlocks */
 .macro LOCK reg
index 1c5c81c..ce69356 100644 (file)
@@ -6,7 +6,7 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 
 .macro read64 reg
        movl %ebx, %eax
index 2402d4c..db4b4f9 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <linux/linkage.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/export.h>
 
 /*
index 77b9b2a..57b79c5 100644 (file)
@@ -11,7 +11,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/export.h>
index 12539fc..b0f3b2a 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Written by Masami Hiramatsu <mhiramat@redhat.com>
  */
-#include <asm/insn.h>
+#include <asm/insn.h> /* __ignore_sync_check__ */
 
 /* Attribute tables are generated from opcode map */
 #include "inat-tables.c"
index bb0b3fe..321a157 100644 (file)
@@ -404,10 +404,6 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
        case INAT_SEG_REG_FS:
                return (unsigned short)(regs->fs & 0xffff);
        case INAT_SEG_REG_GS:
-               /*
-                * GS may or may not be in regs as per CONFIG_X86_32_LAZY_GS.
-                * The macro below takes care of both cases.
-                */
                return get_user_gs(regs);
        case INAT_SEG_REG_IGNORE:
        default:
@@ -928,10 +924,11 @@ static int get_seg_base_limit(struct insn *insn, struct pt_regs *regs,
 static int get_eff_addr_reg(struct insn *insn, struct pt_regs *regs,
                            int *regoff, long *eff_addr)
 {
-       insn_get_modrm(insn);
+       int ret;
 
-       if (!insn->modrm.nbytes)
-               return -EINVAL;
+       ret = insn_get_modrm(insn);
+       if (ret)
+               return ret;
 
        if (X86_MODRM_MOD(insn->modrm.value) != 3)
                return -EINVAL;
@@ -977,14 +974,14 @@ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs,
                              int *regoff, long *eff_addr)
 {
        long tmp;
+       int ret;
 
        if (insn->addr_bytes != 8 && insn->addr_bytes != 4)
                return -EINVAL;
 
-       insn_get_modrm(insn);
-
-       if (!insn->modrm.nbytes)
-               return -EINVAL;
+       ret = insn_get_modrm(insn);
+       if (ret)
+               return ret;
 
        if (X86_MODRM_MOD(insn->modrm.value) > 2)
                return -EINVAL;
@@ -1106,18 +1103,21 @@ static int get_eff_addr_modrm_16(struct insn *insn, struct pt_regs *regs,
  * @base_offset will have a register, as an offset from the base of pt_regs,
  * that can be used to resolve the associated segment.
  *
- * -EINVAL on error.
+ * Negative value on error.
  */
 static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs,
                            int *base_offset, long *eff_addr)
 {
        long base, indx;
        int indx_offset;
+       int ret;
 
        if (insn->addr_bytes != 8 && insn->addr_bytes != 4)
                return -EINVAL;
 
-       insn_get_modrm(insn);
+       ret = insn_get_modrm(insn);
+       if (ret)
+               return ret;
 
        if (!insn->modrm.nbytes)
                return -EINVAL;
@@ -1125,7 +1125,9 @@ static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs,
        if (X86_MODRM_MOD(insn->modrm.value) > 2)
                return -EINVAL;
 
-       insn_get_sib(insn);
+       ret = insn_get_sib(insn);
+       if (ret)
+               return ret;
 
        if (!insn->sib.nbytes)
                return -EINVAL;
@@ -1194,8 +1196,8 @@ static void __user *get_addr_ref_16(struct insn *insn, struct pt_regs *regs)
        short eff_addr;
        long tmp;
 
-       insn_get_modrm(insn);
-       insn_get_displacement(insn);
+       if (insn_get_displacement(insn))
+               goto out;
 
        if (insn->addr_bytes != 2)
                goto out;
@@ -1492,7 +1494,7 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_IN
 }
 
 /**
- * insn_decode() - Decode an instruction
+ * insn_decode_from_regs() - Decode an instruction
  * @insn:      Structure to store decoded instruction
  * @regs:      Structure with register values as seen when entering kernel mode
  * @buf:       Buffer containing the instruction bytes
@@ -1505,8 +1507,8 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_IN
  *
  * True if instruction was decoded, False otherwise.
  */
-bool insn_decode(struct insn *insn, struct pt_regs *regs,
-                unsigned char buf[MAX_INSN_SIZE], int buf_size)
+bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
+                          unsigned char buf[MAX_INSN_SIZE], int buf_size)
 {
        int seg_defs;
 
@@ -1529,7 +1531,9 @@ bool insn_decode(struct insn *insn, struct pt_regs *regs,
        insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
        insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
 
-       insn_get_length(insn);
+       if (insn_get_length(insn))
+               return false;
+
        if (buf_size < insn->length)
                return false;
 
index 435630a..058f19b 100644 (file)
 #else
 #include <string.h>
 #endif
-#include <asm/inat.h>
-#include <asm/insn.h>
+#include <asm/inat.h> /*__ignore_sync_check__ */
+#include <asm/insn.h> /* __ignore_sync_check__ */
 
-#include <asm/emulate_prefix.h>
+#include <linux/errno.h>
+#include <linux/kconfig.h>
+
+#include <asm/emulate_prefix.h> /* __ignore_sync_check__ */
 
 #define leXX_to_cpu(t, r)                                              \
 ({                                                                     \
@@ -51,6 +54,7 @@
  * insn_init() - initialize struct insn
  * @insn:      &struct insn to be initialized
  * @kaddr:     address (in kernel memory) of instruction (or copy thereof)
+ * @buf_len:   length of the insn buffer at @kaddr
  * @x86_64:    !0 for 64-bit kernel or 64-bit app
  */
 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
@@ -111,8 +115,12 @@ static void insn_get_emulate_prefix(struct insn *insn)
  * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
  * to point to the (first) opcode.  No effect if @insn->prefixes.got
  * is already set.
+ *
+ * * Returns:
+ * 0:  on success
+ * < 0: on error
  */
-void insn_get_prefixes(struct insn *insn)
+int insn_get_prefixes(struct insn *insn)
 {
        struct insn_field *prefixes = &insn->prefixes;
        insn_attr_t attr;
@@ -120,7 +128,7 @@ void insn_get_prefixes(struct insn *insn)
        int i, nb;
 
        if (prefixes->got)
-               return;
+               return 0;
 
        insn_get_emulate_prefix(insn);
 
@@ -230,8 +238,10 @@ vex_end:
 
        prefixes->got = 1;
 
+       return 0;
+
 err_out:
-       return;
+       return -ENODATA;
 }
 
 /**
@@ -243,16 +253,25 @@ err_out:
  * If necessary, first collects any preceding (prefix) bytes.
  * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
  * is already 1.
+ *
+ * Returns:
+ * 0:  on success
+ * < 0: on error
  */
-void insn_get_opcode(struct insn *insn)
+int insn_get_opcode(struct insn *insn)
 {
        struct insn_field *opcode = &insn->opcode;
+       int pfx_id, ret;
        insn_byte_t op;
-       int pfx_id;
+
        if (opcode->got)
-               return;
-       if (!insn->prefixes.got)
-               insn_get_prefixes(insn);
+               return 0;
+
+       if (!insn->prefixes.got) {
+               ret = insn_get_prefixes(insn);
+               if (ret)
+                       return ret;
+       }
 
        /* Get first opcode */
        op = get_next(insn_byte_t, insn);
@@ -267,9 +286,13 @@ void insn_get_opcode(struct insn *insn)
                insn->attr = inat_get_avx_attribute(op, m, p);
                if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
                    (!inat_accept_vex(insn->attr) &&
-                    !inat_is_group(insn->attr)))
-                       insn->attr = 0; /* This instruction is bad */
-               goto end;       /* VEX has only 1 byte for opcode */
+                    !inat_is_group(insn->attr))) {
+                       /* This instruction is bad */
+                       insn->attr = 0;
+                       return -EINVAL;
+               }
+               /* VEX has only 1 byte for opcode */
+               goto end;
        }
 
        insn->attr = inat_get_opcode_attribute(op);
@@ -280,13 +303,18 @@ void insn_get_opcode(struct insn *insn)
                pfx_id = insn_last_prefix_id(insn);
                insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
        }
-       if (inat_must_vex(insn->attr))
-               insn->attr = 0; /* This instruction is bad */
+
+       if (inat_must_vex(insn->attr)) {
+               /* This instruction is bad */
+               insn->attr = 0;
+               return -EINVAL;
+       }
 end:
        opcode->got = 1;
+       return 0;
 
 err_out:
-       return;
+       return -ENODATA;
 }
 
 /**
@@ -296,15 +324,25 @@ err_out:
  * Populates @insn->modrm and updates @insn->next_byte to point past the
  * ModRM byte, if any.  If necessary, first collects the preceding bytes
  * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
+ *
+ * Returns:
+ * 0:  on success
+ * < 0: on error
  */
-void insn_get_modrm(struct insn *insn)
+int insn_get_modrm(struct insn *insn)
 {
        struct insn_field *modrm = &insn->modrm;
        insn_byte_t pfx_id, mod;
+       int ret;
+
        if (modrm->got)
-               return;
-       if (!insn->opcode.got)
-               insn_get_opcode(insn);
+               return 0;
+
+       if (!insn->opcode.got) {
+               ret = insn_get_opcode(insn);
+               if (ret)
+                       return ret;
+       }
 
        if (inat_has_modrm(insn->attr)) {
                mod = get_next(insn_byte_t, insn);
@@ -313,17 +351,22 @@ void insn_get_modrm(struct insn *insn)
                        pfx_id = insn_last_prefix_id(insn);
                        insn->attr = inat_get_group_attribute(mod, pfx_id,
                                                              insn->attr);
-                       if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
-                               insn->attr = 0; /* This is bad */
+                       if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
+                               /* Bad insn */
+                               insn->attr = 0;
+                               return -EINVAL;
+                       }
                }
        }
 
        if (insn->x86_64 && inat_is_force64(insn->attr))
                insn->opnd_bytes = 8;
+
        modrm->got = 1;
+       return 0;
 
 err_out:
-       return;
+       return -ENODATA;
 }
 
 
@@ -337,11 +380,16 @@ err_out:
 int insn_rip_relative(struct insn *insn)
 {
        struct insn_field *modrm = &insn->modrm;
+       int ret;
 
        if (!insn->x86_64)
                return 0;
-       if (!modrm->got)
-               insn_get_modrm(insn);
+
+       if (!modrm->got) {
+               ret = insn_get_modrm(insn);
+               if (ret)
+                       return 0;
+       }
        /*
         * For rip-relative instructions, the mod field (top 2 bits)
         * is zero and the r/m field (bottom 3 bits) is 0x5.
@@ -355,15 +403,25 @@ int insn_rip_relative(struct insn *insn)
  *
  * If necessary, first collects the instruction up to and including the
  * ModRM byte.
+ *
+ * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
  */
-void insn_get_sib(struct insn *insn)
+int insn_get_sib(struct insn *insn)
 {
        insn_byte_t modrm;
+       int ret;
 
        if (insn->sib.got)
-               return;
-       if (!insn->modrm.got)
-               insn_get_modrm(insn);
+               return 0;
+
+       if (!insn->modrm.got) {
+               ret = insn_get_modrm(insn);
+               if (ret)
+                       return ret;
+       }
+
        if (insn->modrm.nbytes) {
                modrm = insn->modrm.bytes[0];
                if (insn->addr_bytes != 2 &&
@@ -374,8 +432,10 @@ void insn_get_sib(struct insn *insn)
        }
        insn->sib.got = 1;
 
+       return 0;
+
 err_out:
-       return;
+       return -ENODATA;
 }
 
 
@@ -386,15 +446,25 @@ err_out:
  * If necessary, first collects the instruction up to and including the
  * SIB byte.
  * Displacement value is sign-expanded.
+ *
+ * * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
  */
-void insn_get_displacement(struct insn *insn)
+int insn_get_displacement(struct insn *insn)
 {
        insn_byte_t mod, rm, base;
+       int ret;
 
        if (insn->displacement.got)
-               return;
-       if (!insn->sib.got)
-               insn_get_sib(insn);
+               return 0;
+
+       if (!insn->sib.got) {
+               ret = insn_get_sib(insn);
+               if (ret)
+                       return ret;
+       }
+
        if (insn->modrm.nbytes) {
                /*
                 * Interpreting the modrm byte:
@@ -436,9 +506,10 @@ void insn_get_displacement(struct insn *insn)
        }
 out:
        insn->displacement.got = 1;
+       return 0;
 
 err_out:
-       return;
+       return -ENODATA;
 }
 
 /* Decode moffset16/32/64. Return 0 if failed */
@@ -537,20 +608,30 @@ err_out:
 }
 
 /**
- * insn_get_immediate() - Get the immediates of instruction
+ * insn_get_immediate() - Get the immediate in an instruction
  * @insn:      &struct insn containing instruction
  *
  * If necessary, first collects the instruction up to and including the
  * displacement bytes.
  * Basically, most of immediates are sign-expanded. Unsigned-value can be
- * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ * computed by bit masking with ((1 << (nbytes * 8)) - 1)
+ *
+ * Returns:
+ * 0:  on success
+ * < 0: on error
  */
-void insn_get_immediate(struct insn *insn)
+int insn_get_immediate(struct insn *insn)
 {
+       int ret;
+
        if (insn->immediate.got)
-               return;
-       if (!insn->displacement.got)
-               insn_get_displacement(insn);
+               return 0;
+
+       if (!insn->displacement.got) {
+               ret = insn_get_displacement(insn);
+               if (ret)
+                       return ret;
+       }
 
        if (inat_has_moffset(insn->attr)) {
                if (!__get_moffset(insn))
@@ -597,9 +678,10 @@ void insn_get_immediate(struct insn *insn)
        }
 done:
        insn->immediate.got = 1;
+       return 0;
 
 err_out:
-       return;
+       return -ENODATA;
 }
 
 /**
@@ -608,13 +690,65 @@ err_out:
  *
  * If necessary, first collects the instruction up to and including the
  * immediates bytes.
- */
-void insn_get_length(struct insn *insn)
+ *
+ * Returns:
+ *  - 0 on success
+ *  - < 0 on error
+*/
+int insn_get_length(struct insn *insn)
 {
+       int ret;
+
        if (insn->length)
-               return;
-       if (!insn->immediate.got)
-               insn_get_immediate(insn);
+               return 0;
+
+       if (!insn->immediate.got) {
+               ret = insn_get_immediate(insn);
+               if (ret)
+                       return ret;
+       }
+
        insn->length = (unsigned char)((unsigned long)insn->next_byte
                                     - (unsigned long)insn->kaddr);
+
+       return 0;
+}
+
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+       return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+               insn->displacement.got && insn->immediate.got;
+}
+
+/**
+ * insn_decode() - Decode an x86 instruction
+ * @insn:      &struct insn to be initialized
+ * @kaddr:     address (in kernel memory) of instruction (or copy thereof)
+ * @buf_len:   length of the insn buffer at @kaddr
+ * @m:         insn mode, see enum insn_mode
+ *
+ * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
+ */
+int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m)
+{
+       int ret;
+
+/* #define INSN_MODE_KERN      -1 __ignore_sync_check__ mode is only valid in the kernel */
+
+       if (m == INSN_MODE_KERN)
+               insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64));
+       else
+               insn_init(insn, kaddr, buf_len, m == INSN_MODE_64);
+
+       ret = insn_get_length(insn);
+       if (ret)
+               return ret;
+
+       if (insn_complete(insn))
+               return 0;
+
+       return -EINVAL;
 }
index 1e299ac..1cc9da6 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/export.h>
 
 .pushsection .noinstr.text, "ax"
index 41902fe..6480101 100644 (file)
@@ -8,7 +8,7 @@
  */
 #include <linux/linkage.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/export.h>
 
 #undef memmove
index 0bfd26e..9827ae2 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <linux/linkage.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/export.h>
 
 /*
index f6fb1d2..6bb74b5 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
 #include <asm/unwind_hints.h>
index ae78cef..f633f9e 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/dma-mapping.h>
+#include <linux/virtio_config.h>
 
 #include <asm/tlbflush.h>
 #include <asm/fixmap.h>
@@ -484,3 +485,8 @@ void __init mem_encrypt_init(void)
        print_mem_encrypt_feature_info();
 }
 
+int arch_has_restricted_virtio_memory_access(void)
+{
+       return sev_active();
+}
+EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access);
index d2ccadc..b049070 100644 (file)
 
 #define PVH_GDT_ENTRY_CS       1
 #define PVH_GDT_ENTRY_DS       2
-#define PVH_GDT_ENTRY_CANARY   3
 #define PVH_CS_SEL             (PVH_GDT_ENTRY_CS * 8)
 #define PVH_DS_SEL             (PVH_GDT_ENTRY_DS * 8)
-#define PVH_CANARY_SEL         (PVH_GDT_ENTRY_CANARY * 8)
 
 SYM_CODE_START_LOCAL(pvh_start_xen)
        cld
@@ -111,17 +109,6 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
 
 #else /* CONFIG_X86_64 */
 
-       /* Set base address in stack canary descriptor. */
-       movl $_pa(gdt_start),%eax
-       movl $_pa(canary),%ecx
-       movw %cx, (PVH_GDT_ENTRY_CANARY * 8) + 2(%eax)
-       shrl $16, %ecx
-       movb %cl, (PVH_GDT_ENTRY_CANARY * 8) + 4(%eax)
-       movb %ch, (PVH_GDT_ENTRY_CANARY * 8) + 7(%eax)
-
-       mov $PVH_CANARY_SEL,%eax
-       mov %eax,%gs
-
        call mk_early_pgtbl_32
 
        mov $_pa(initial_page_table), %eax
@@ -165,7 +152,6 @@ SYM_DATA_START_LOCAL(gdt_start)
        .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* PVH_CS_SEL */
 #endif
        .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* PVH_DS_SEL */
-       .quad GDT_ENTRY(0x4090, 0, 0x18)    /* PVH_CANARY_SEL */
 SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end)
 
        .balign 16
index db1378c..ef4329d 100644 (file)
@@ -99,11 +99,8 @@ static void __save_processor_state(struct saved_context *ctxt)
        /*
         * segment registers
         */
-#ifdef CONFIG_X86_32_LAZY_GS
        savesegment(gs, ctxt->gs);
-#endif
 #ifdef CONFIG_X86_64
-       savesegment(gs, ctxt->gs);
        savesegment(fs, ctxt->fs);
        savesegment(ds, ctxt->ds);
        savesegment(es, ctxt->es);
@@ -232,7 +229,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
        wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
 #else
        loadsegment(fs, __KERNEL_PERCPU);
-       loadsegment(gs, __KERNEL_STACK_CANARY);
 #endif
 
        /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
@@ -255,7 +251,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
         */
        wrmsrl(MSR_FS_BASE, ctxt->fs_base);
        wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
-#elif defined(CONFIG_X86_32_LAZY_GS)
+#else
        loadsegment(gs, ctxt->gs);
 #endif
 
index 34eda63..472540a 100644 (file)
@@ -120,7 +120,7 @@ int main(int argc, char **argv)
 
        while (fgets(line, BUFSIZE, stdin)) {
                char copy[BUFSIZE], *s, *tab1, *tab2;
-               int nb = 0;
+               int nb = 0, ret;
                unsigned int b;
 
                if (line[0] == '<') {
@@ -148,10 +148,12 @@ int main(int argc, char **argv)
                        } else
                                break;
                }
+
                /* Decode an instruction */
-               insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
-               insn_get_length(&insn);
-               if (insn.length != nb) {
+               ret = insn_decode(&insn, insn_buff, sizeof(insn_buff),
+                                 x86_64 ? INSN_MODE_64 : INSN_MODE_32);
+
+               if (ret < 0 || insn.length != nb) {
                        warnings++;
                        pr_warn("Found an x86 instruction decoder bug, "
                                "please report this.\n", sym);
index c6a0000..213f35f 100644 (file)
@@ -218,8 +218,8 @@ static void parse_args(int argc, char **argv)
 
 int main(int argc, char **argv)
 {
+       int insns = 0, ret;
        struct insn insn;
-       int insns = 0;
        int errors = 0;
        unsigned long i;
        unsigned char insn_buff[MAX_INSN_SIZE * 2];
@@ -237,15 +237,15 @@ int main(int argc, char **argv)
                        continue;
 
                /* Decode an instruction */
-               insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
-               insn_get_length(&insn);
+               ret = insn_decode(&insn, insn_buff, sizeof(insn_buff),
+                                 x86_64 ? INSN_MODE_64 : INSN_MODE_32);
 
                if (insn.next_byte <= insn.kaddr ||
                    insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
                        /* Access out-of-range memory */
                        dump_stream(stderr, "Error: Found an access violation", i, insn_buff, &insn);
                        errors++;
-               } else if (verbose && !insn_complete(&insn))
+               } else if (verbose && ret < 0)
                        dump_stream(stdout, "Info: Found an undecodable input", i, insn_buff, &insn);
                else if (verbose >= 2)
                        dump_insn(stdout, &insn);
index dc0a337..17503fe 100644 (file)
@@ -1070,8 +1070,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 
        .read_pmc = xen_read_pmc,
 
-       .iret = xen_iret,
-
        .load_tr_desc = paravirt_nop,
        .set_ldt = xen_set_ldt,
        .load_gdt = xen_load_gdt,
@@ -1204,7 +1202,6 @@ static void __init xen_setup_gdt(int cpu)
        pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
        pv_ops.cpu.load_gdt = xen_load_gdt_boot;
 
-       setup_stack_canary_segment(cpu);
        switch_to_new_gdt(cpu);
 
        pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
@@ -1233,8 +1230,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
        /* Install Xen paravirt ops */
        pv_info = xen_info;
-       pv_ops.init.patch = paravirt_patch_default;
        pv_ops.cpu = xen_cpu_ops;
+       paravirt_iret = xen_iret;
        xen_init_irq_ops();
 
        /*
index 91f5b33..d9c945e 100644 (file)
@@ -379,11 +379,6 @@ void xen_timer_resume(void)
        }
 }
 
-static const struct pv_time_ops xen_time_ops __initconst = {
-       .sched_clock = xen_sched_clock,
-       .steal_clock = xen_steal_clock,
-};
-
 static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
 static u64 xen_clock_value_saved;
 
@@ -525,17 +520,24 @@ static void __init xen_time_init(void)
                pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 }
 
-void __init xen_init_time_ops(void)
+static void __init xen_init_time_common(void)
 {
        xen_sched_clock_offset = xen_clocksource_read();
-       pv_ops.time = xen_time_ops;
+       static_call_update(pv_steal_clock, xen_steal_clock);
+       paravirt_set_sched_clock(xen_sched_clock);
+
+       x86_platform.calibrate_tsc = xen_tsc_khz;
+       x86_platform.get_wallclock = xen_get_wallclock;
+}
+
+void __init xen_init_time_ops(void)
+{
+       xen_init_time_common();
 
        x86_init.timers.timer_init = xen_time_init;
        x86_init.timers.setup_percpu_clockev = x86_init_noop;
        x86_cpuinit.setup_percpu_clockev = x86_init_noop;
 
-       x86_platform.calibrate_tsc = xen_tsc_khz;
-       x86_platform.get_wallclock = xen_get_wallclock;
        /* Dom0 uses the native method to set the hardware RTC. */
        if (!xen_initial_domain())
                x86_platform.set_wallclock = xen_set_wallclock;
@@ -569,13 +571,11 @@ void __init xen_hvm_init_time_ops(void)
                return;
        }
 
-       xen_sched_clock_offset = xen_clocksource_read();
-       pv_ops.time = xen_time_ops;
+       xen_init_time_common();
+
        x86_init.timers.setup_percpu_clockev = xen_time_init;
        x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
 
-       x86_platform.calibrate_tsc = xen_tsc_khz;
-       x86_platform.get_wallclock = xen_get_wallclock;
        x86_platform.set_wallclock = xen_set_wallclock;
 }
 #endif
index 108edbc..152dd33 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/math64.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/static_call.h>
 
 #include <asm/paravirt.h>
 #include <asm/xen/hypervisor.h>
@@ -175,7 +176,7 @@ void __init xen_time_setup_guest(void)
        xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable,
                                        VMASST_TYPE_runstate_update_flag);
 
-       pv_ops.time.steal_clock = xen_steal_clock;
+       static_call_update(pv_steal_clock, xen_steal_clock);
 
        static_key_slow_inc(&paravirt_steal_enabled);
        if (xen_runstate_remote)
index 85ecc78..e01b61a 100644 (file)
@@ -20,6 +20,7 @@
  *   static_call(name)(args...);
  *   static_call_cond(name)(args...);
  *   static_call_update(name, func);
+ *   static_call_query(name);
  *
  * Usage example:
  *
  *
  *   which will include the required value tests to avoid NULL-pointer
  *   dereferences.
+ *
+ *   To query which function is currently set to be called, use:
+ *
+ *   func = static_call_query(name);
  */
 
 #include <linux/types.h>
@@ -118,6 +123,8 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool
                             STATIC_CALL_TRAMP_ADDR(name), func);       \
 })
 
+#define static_call_query(name) (READ_ONCE(STATIC_CALL_KEY(name).func))
+
 #ifdef CONFIG_HAVE_STATIC_CALL_INLINE
 
 extern int __init static_call_init(void);
@@ -128,16 +135,6 @@ struct static_call_mod {
        struct static_call_site *sites;
 };
 
-struct static_call_key {
-       void *func;
-       union {
-               /* bit 0: 0 = mods, 1 = sites */
-               unsigned long type;
-               struct static_call_mod *mods;
-               struct static_call_site *sites;
-       };
-};
-
 /* For finding the key associated with a trampoline */
 struct static_call_tramp_key {
        s32 tramp;
@@ -187,10 +184,6 @@ extern long __static_call_return0(void);
 
 static inline int static_call_init(void) { return 0; }
 
-struct static_call_key {
-       void *func;
-};
-
 #define __DEFINE_STATIC_CALL(name, _func, _func_init)                  \
        DECLARE_STATIC_CALL(name, _func);                               \
        struct static_call_key STATIC_CALL_KEY(name) = {                \
@@ -205,6 +198,7 @@ struct static_call_key {
        };                                                              \
        ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
 
+
 #define static_call_cond(name) (void)__static_call(name)
 
 static inline
@@ -243,10 +237,6 @@ static inline long __static_call_return0(void)
 
 static inline int static_call_init(void) { return 0; }
 
-struct static_call_key {
-       void *func;
-};
-
 static inline long __static_call_return0(void)
 {
        return 0;
index ae5662d..5a00b8b 100644 (file)
@@ -58,11 +58,25 @@ struct static_call_site {
        __raw_static_call(name);                                        \
 })
 
+struct static_call_key {
+       void *func;
+       union {
+               /* bit 0: 0 = mods, 1 = sites */
+               unsigned long type;
+               struct static_call_mod *mods;
+               struct static_call_site *sites;
+       };
+};
+
 #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #define __STATIC_CALL_ADDRESSABLE(name)
 #define __static_call(name)    __raw_static_call(name)
 
+struct static_call_key {
+       void *func;
+};
+
 #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #ifdef MODULE
@@ -77,6 +91,10 @@ struct static_call_site {
 
 #else
 
+struct static_call_key {
+       void *func;
+};
+
 #define static_call(name)                                              \
        ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
 
index f5c1194..825c75c 100755 (executable)
@@ -1,4 +1,8 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
-echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -m32 -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+# This requires GCC 8.1 or better.  Specifically, we require
+# -mstack-protector-guard-reg, added by
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81708
+
+echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -m32 -O0 -fstack-protector -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - -o - 2> /dev/null | grep -q "%fs"
index 877827b..a610514 100644 (file)
@@ -6,7 +6,7 @@
  *
  * Written by Masami Hiramatsu <mhiramat@redhat.com>
  */
-#include "inat_types.h"
+#include "inat_types.h" /* __ignore_sync_check__ */
 
 /*
  * Internal bits. Don't use bitmasks directly, because these bits are
index cc777c1..dc632b4 100644 (file)
@@ -9,7 +9,7 @@
 
 #include <asm/byteorder.h>
 /* insn_attr_t is defined in inat.h */
-#include "inat.h"
+#include "inat.h" /* __ignore_sync_check__ */
 
 #if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
 
@@ -132,13 +132,25 @@ struct insn {
 #define X86_VEX_M_MAX  0x1f                    /* VEX3.M Maximum value */
 
 extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
-extern void insn_get_prefixes(struct insn *insn);
-extern void insn_get_opcode(struct insn *insn);
-extern void insn_get_modrm(struct insn *insn);
-extern void insn_get_sib(struct insn *insn);
-extern void insn_get_displacement(struct insn *insn);
-extern void insn_get_immediate(struct insn *insn);
-extern void insn_get_length(struct insn *insn);
+extern int insn_get_prefixes(struct insn *insn);
+extern int insn_get_opcode(struct insn *insn);
+extern int insn_get_modrm(struct insn *insn);
+extern int insn_get_sib(struct insn *insn);
+extern int insn_get_displacement(struct insn *insn);
+extern int insn_get_immediate(struct insn *insn);
+extern int insn_get_length(struct insn *insn);
+
+enum insn_mode {
+       INSN_MODE_32,
+       INSN_MODE_64,
+       /* Mode is determined by the current kernel build. */
+       INSN_MODE_KERN,
+       INSN_NUM_MODES,
+};
+
+extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+
+#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN)
 
 /* Attribute will be determined after getting ModRM (for opcode groups) */
 static inline void insn_get_attribute(struct insn *insn)
@@ -149,17 +161,6 @@ static inline void insn_get_attribute(struct insn *insn)
 /* Instruction uses RIP-relative addressing */
 extern int insn_rip_relative(struct insn *insn);
 
-/* Init insn for kernel text */
-static inline void kernel_insn_init(struct insn *insn,
-                                   const void *kaddr, int buf_len)
-{
-#ifdef CONFIG_X86_64
-       insn_init(insn, kaddr, buf_len, 1);
-#else /* CONFIG_X86_32 */
-       insn_init(insn, kaddr, buf_len, 0);
-#endif
-}
-
 static inline int insn_is_avx(struct insn *insn)
 {
        if (!insn->prefixes.got)
@@ -179,13 +180,6 @@ static inline int insn_has_emulate_prefix(struct insn *insn)
        return !!insn->emulate_prefix_size;
 }
 
-/* Ensure this instruction is decoded completely */
-static inline int insn_complete(struct insn *insn)
-{
-       return insn->opcode.got && insn->modrm.got && insn->sib.got &&
-               insn->displacement.got && insn->immediate.got;
-}
-
 static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
 {
        if (insn->vex_prefix.nbytes == 2)       /* 2 bytes VEX */
index 4f5ed49..dfbcc64 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Written by Masami Hiramatsu <mhiramat@redhat.com>
  */
-#include "../include/asm/insn.h"
+#include "../include/asm/insn.h" /* __ignore_sync_check__ */
 
 /* Attribute tables are generated from opcode map */
 #include "inat-tables.c"
index 3d9355e..c41f958 100644 (file)
 #else
 #include <string.h>
 #endif
-#include "../include/asm/inat.h"
-#include "../include/asm/insn.h"
+#include "../include/asm/inat.h" /* __ignore_sync_check__ */
+#include "../include/asm/insn.h" /* __ignore_sync_check__ */
 
-#include "../include/asm/emulate_prefix.h"
+#include <linux/errno.h>
+#include <linux/kconfig.h>
+
+#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */
 
 #define leXX_to_cpu(t, r)                                              \
 ({                                                                     \
@@ -51,6 +54,7 @@
  * insn_init() - initialize struct insn
  * @insn:      &struct insn to be initialized
  * @kaddr:     address (in kernel memory) of instruction (or copy thereof)
+ * @buf_len:   length of the insn buffer at @kaddr
  * @x86_64:    !0 for 64-bit kernel or 64-bit app
  */
 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
@@ -111,8 +115,12 @@ static void insn_get_emulate_prefix(struct insn *insn)
  * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
  * to point to the (first) opcode.  No effect if @insn->prefixes.got
  * is already set.
+ *
+ * * Returns:
+ * 0:  on success
+ * < 0: on error
  */
-void insn_get_prefixes(struct insn *insn)
+int insn_get_prefixes(struct insn *insn)
 {
        struct insn_field *prefixes = &insn->prefixes;
        insn_attr_t attr;
@@ -120,7 +128,7 @@ void insn_get_prefixes(struct insn *insn)
        int i, nb;
 
        if (prefixes->got)
-               return;
+               return 0;
 
        insn_get_emulate_prefix(insn);
 
@@ -230,8 +238,10 @@ vex_end:
 
        prefixes->got = 1;
 
+       return 0;
+
 err_out:
-       return;
+       return -ENODATA;
 }
 
 /**
@@ -243,16 +253,25 @@ err_out:
  * If necessary, first collects any preceding (prefix) bytes.
  * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
  * is already 1.
+ *
+ * Returns:
+ * 0:  on success
+ * < 0: on error
  */
-void insn_get_opcode(struct insn *insn)
+int insn_get_opcode(struct insn *insn)
 {
        struct insn_field *opcode = &insn->opcode;
+       int pfx_id, ret;
        insn_byte_t op;
-       int pfx_id;
+
        if (opcode->got)
-               return;
-       if (!insn->prefixes.got)
-               insn_get_prefixes(insn);
+               return 0;
+
+       if (!insn->prefixes.got) {
+               ret = insn_get_prefixes(insn);
+               if (ret)
+                       return ret;
+       }
 
        /* Get first opcode */
        op = get_next(insn_byte_t, insn);
@@ -267,9 +286,13 @@ void insn_get_opcode(struct insn *insn)
                insn->attr = inat_get_avx_attribute(op, m, p);
                if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
                    (!inat_accept_vex(insn->attr) &&
-                    !inat_is_group(insn->attr)))
-                       insn->attr = 0; /* This instruction is bad */
-               goto end;       /* VEX has only 1 byte for opcode */
+                    !inat_is_group(insn->attr))) {
+                       /* This instruction is bad */
+                       insn->attr = 0;
+                       return -EINVAL;
+               }
+               /* VEX has only 1 byte for opcode */
+               goto end;
        }
 
        insn->attr = inat_get_opcode_attribute(op);
@@ -280,13 +303,18 @@ void insn_get_opcode(struct insn *insn)
                pfx_id = insn_last_prefix_id(insn);
                insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
        }
-       if (inat_must_vex(insn->attr))
-               insn->attr = 0; /* This instruction is bad */
+
+       if (inat_must_vex(insn->attr)) {
+               /* This instruction is bad */
+               insn->attr = 0;
+               return -EINVAL;
+       }
 end:
        opcode->got = 1;
+       return 0;
 
 err_out:
-       return;
+       return -ENODATA;
 }
 
 /**
@@ -296,15 +324,25 @@ err_out:
  * Populates @insn->modrm and updates @insn->next_byte to point past the
  * ModRM byte, if any.  If necessary, first collects the preceding bytes
  * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
+ *
+ * Returns:
+ * 0:  on success
+ * < 0: on error
  */
-void insn_get_modrm(struct insn *insn)
+int insn_get_modrm(struct insn *insn)
 {
        struct insn_field *modrm = &insn->modrm;
        insn_byte_t pfx_id, mod;
+       int ret;
+
        if (modrm->got)
-               return;
-       if (!insn->opcode.got)
-               insn_get_opcode(insn);
+               return 0;
+
+       if (!insn->opcode.got) {
+               ret = insn_get_opcode(insn);
+               if (ret)
+                       return ret;
+       }
 
        if (inat_has_modrm(insn->attr)) {
                mod = get_next(insn_byte_t, insn);
@@ -313,17 +351,22 @@ void insn_get_modrm(struct insn *insn)
                        pfx_id = insn_last_prefix_id(insn);
                        insn->attr = inat_get_group_attribute(mod, pfx_id,
                                                              insn->attr);
-                       if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
-                               insn->attr = 0; /* This is bad */
+                       if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
+                               /* Bad insn */
+                               insn->attr = 0;
+                               return -EINVAL;
+                       }
                }
        }
 
        if (insn->x86_64 && inat_is_force64(insn->attr))
                insn->opnd_bytes = 8;
+
        modrm->got = 1;
+       return 0;
 
 err_out:
-       return;
+       return -ENODATA;
 }
 
 
@@ -337,11 +380,16 @@ err_out:
 int insn_rip_relative(struct insn *insn)
 {
        struct insn_field *modrm = &insn->modrm;
+       int ret;
 
        if (!insn->x86_64)
                return 0;
-       if (!modrm->got)
-               insn_get_modrm(insn);
+
+       if (!modrm->got) {
+               ret = insn_get_modrm(insn);
+               if (ret)
+                       return 0;
+       }
        /*
         * For rip-relative instructions, the mod field (top 2 bits)
         * is zero and the r/m field (bottom 3 bits) is 0x5.
@@ -355,15 +403,25 @@ int insn_rip_relative(struct insn *insn)
  *
  * If necessary, first collects the instruction up to and including the
  * ModRM byte.
+ *
+ * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
  */
-void insn_get_sib(struct insn *insn)
+int insn_get_sib(struct insn *insn)
 {
        insn_byte_t modrm;
+       int ret;
 
        if (insn->sib.got)
-               return;
-       if (!insn->modrm.got)
-               insn_get_modrm(insn);
+               return 0;
+
+       if (!insn->modrm.got) {
+               ret = insn_get_modrm(insn);
+               if (ret)
+                       return ret;
+       }
+
        if (insn->modrm.nbytes) {
                modrm = insn->modrm.bytes[0];
                if (insn->addr_bytes != 2 &&
@@ -374,8 +432,10 @@ void insn_get_sib(struct insn *insn)
        }
        insn->sib.got = 1;
 
+       return 0;
+
 err_out:
-       return;
+       return -ENODATA;
 }
 
 
@@ -386,15 +446,25 @@ err_out:
  * If necessary, first collects the instruction up to and including the
  * SIB byte.
  * Displacement value is sign-expanded.
+ *
+ * * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
  */
-void insn_get_displacement(struct insn *insn)
+int insn_get_displacement(struct insn *insn)
 {
        insn_byte_t mod, rm, base;
+       int ret;
 
        if (insn->displacement.got)
-               return;
-       if (!insn->sib.got)
-               insn_get_sib(insn);
+               return 0;
+
+       if (!insn->sib.got) {
+               ret = insn_get_sib(insn);
+               if (ret)
+                       return ret;
+       }
+
        if (insn->modrm.nbytes) {
                /*
                 * Interpreting the modrm byte:
@@ -436,9 +506,10 @@ void insn_get_displacement(struct insn *insn)
        }
 out:
        insn->displacement.got = 1;
+       return 0;
 
 err_out:
-       return;
+       return -ENODATA;
 }
 
 /* Decode moffset16/32/64. Return 0 if failed */
@@ -537,20 +608,30 @@ err_out:
 }
 
 /**
- * insn_get_immediate() - Get the immediates of instruction
+ * insn_get_immediate() - Get the immediate in an instruction
  * @insn:      &struct insn containing instruction
  *
  * If necessary, first collects the instruction up to and including the
  * displacement bytes.
  * Basically, most of immediates are sign-expanded. Unsigned-value can be
- * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ * computed by bit masking with ((1 << (nbytes * 8)) - 1)
+ *
+ * Returns:
+ * 0:  on success
+ * < 0: on error
  */
-void insn_get_immediate(struct insn *insn)
+int insn_get_immediate(struct insn *insn)
 {
+       int ret;
+
        if (insn->immediate.got)
-               return;
-       if (!insn->displacement.got)
-               insn_get_displacement(insn);
+               return 0;
+
+       if (!insn->displacement.got) {
+               ret = insn_get_displacement(insn);
+               if (ret)
+                       return ret;
+       }
 
        if (inat_has_moffset(insn->attr)) {
                if (!__get_moffset(insn))
@@ -597,9 +678,10 @@ void insn_get_immediate(struct insn *insn)
        }
 done:
        insn->immediate.got = 1;
+       return 0;
 
 err_out:
-       return;
+       return -ENODATA;
 }
 
 /**
@@ -608,13 +690,65 @@ err_out:
  *
  * If necessary, first collects the instruction up to and including the
  * immediates bytes.
- */
-void insn_get_length(struct insn *insn)
+ *
+ * Returns:
+ *  - 0 on success
+ *  - < 0 on error
+*/
+int insn_get_length(struct insn *insn)
 {
+       int ret;
+
        if (insn->length)
-               return;
-       if (!insn->immediate.got)
-               insn_get_immediate(insn);
+               return 0;
+
+       if (!insn->immediate.got) {
+               ret = insn_get_immediate(insn);
+               if (ret)
+                       return ret;
+       }
+
        insn->length = (unsigned char)((unsigned long)insn->next_byte
                                     - (unsigned long)insn->kaddr);
+
+       return 0;
+}
+
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+       return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+               insn->displacement.got && insn->immediate.got;
+}
+
+/**
+ * insn_decode() - Decode an x86 instruction
+ * @insn:      &struct insn to be initialized
+ * @kaddr:     address (in kernel memory) of instruction (or copy thereof)
+ * @buf_len:   length of the insn buffer at @kaddr
+ * @m:         insn mode, see enum insn_mode
+ *
+ * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
+ */
+int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m)
+{
+       int ret;
+
+#define INSN_MODE_KERN (enum insn_mode)-1 /* __ignore_sync_check__ mode is only valid in the kernel */
+
+       if (m == INSN_MODE_KERN)
+               insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64));
+       else
+               insn_init(insn, kaddr, buf_len, m == INSN_MODE_64);
+
+       ret = insn_get_length(insn);
+       if (ret)
+               return ret;
+
+       if (insn_complete(insn))
+               return 0;
+
+       return -EINVAL;
 }
diff --git a/tools/include/linux/kconfig.h b/tools/include/linux/kconfig.h
new file mode 100644 (file)
index 0000000..1555a0c
--- /dev/null
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_KCONFIG_H
+#define _TOOLS_LINUX_KCONFIG_H
+
+/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define __BIG_ENDIAN 4321
+#else
+#define __LITTLE_ENDIAN 1234
+#endif
+
+#define __ARG_PLACEHOLDER_1 0,
+#define __take_second_arg(__ignored, val, ...) val
+
+/*
+ * The use of "&&" / "||" is limited in certain expressions.
+ * The following enable to calculate "and" / "or" with macro expansion only.
+ */
+#define __and(x, y)                    ___and(x, y)
+#define ___and(x, y)                   ____and(__ARG_PLACEHOLDER_##x, y)
+#define ____and(arg1_or_junk, y)       __take_second_arg(arg1_or_junk y, 0)
+
+#define __or(x, y)                     ___or(x, y)
+#define ___or(x, y)                    ____or(__ARG_PLACEHOLDER_##x, y)
+#define ____or(arg1_or_junk, y)                __take_second_arg(arg1_or_junk 1, y)
+
+/*
+ * Helper macros to use CONFIG_ options in C/CPP expressions. Note that
+ * these only work with boolean and tristate options.
+ */
+
+/*
+ * Getting something that works in C and CPP for an arg that may or may
+ * not be defined is tricky.  Here, if we have "#define CONFIG_BOOGER 1"
+ * we match on the placeholder define, insert the "0," for arg1 and generate
+ * the triplet (0, 1, 0).  Then the last step cherry picks the 2nd arg (a one).
+ * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when
+ * the last step cherry picks the 2nd arg, we get a zero.
+ */
+#define __is_defined(x)                        ___is_defined(x)
+#define ___is_defined(val)             ____is_defined(__ARG_PLACEHOLDER_##val)
+#define ____is_defined(arg1_or_junk)   __take_second_arg(arg1_or_junk 1, 0)
+
+/*
+ * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0
+ * otherwise. For boolean options, this is equivalent to
+ * IS_ENABLED(CONFIG_FOO).
+ */
+#define IS_BUILTIN(option) __is_defined(option)
+
+/*
+ * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0
+ * otherwise.
+ */
+#define IS_MODULE(option) __is_defined(option##_MODULE)
+
+/*
+ * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled
+ * code can call a function defined in code compiled based on CONFIG_FOO.
+ * This is similar to IS_ENABLED(), but returns false when invoked from
+ * built-in code when CONFIG_FOO is set to 'm'.
+ */
+#define IS_REACHABLE(option) __or(IS_BUILTIN(option), \
+                               __and(IS_MODULE(option), __is_defined(MODULE)))
+
+/*
+ * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm',
+ * 0 otherwise.
+ */
+#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option))
+
+#endif /* _TOOLS_LINUX_KCONFIG_H */
index ae5662d..5a00b8b 100644 (file)
@@ -58,11 +58,25 @@ struct static_call_site {
        __raw_static_call(name);                                        \
 })
 
+struct static_call_key {
+       void *func;
+       union {
+               /* bit 0: 0 = mods, 1 = sites */
+               unsigned long type;
+               struct static_call_mod *mods;
+               struct static_call_site *sites;
+       };
+};
+
 #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #define __STATIC_CALL_ADDRESSABLE(name)
 #define __static_call(name)    __raw_static_call(name)
 
+struct static_call_key {
+       void *func;
+};
+
 #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #ifdef MODULE
@@ -77,6 +91,10 @@ struct static_call_site {
 
 #else
 
+struct static_call_key {
+       void *func;
+};
+
 #define static_call(name)                                              \
        ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
 
index 549813c..8380d0b 100644 (file)
@@ -90,7 +90,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
                            struct list_head *ops_list)
 {
        struct insn insn;
-       int x86_64, sign;
+       int x86_64, sign, ret;
        unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
                      rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
                      modrm_reg = 0, sib = 0;
@@ -101,10 +101,9 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
        if (x86_64 == -1)
                return -1;
 
-       insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64);
-       insn_get_length(&insn);
-
-       if (!insn_complete(&insn)) {
+       ret = insn_decode(&insn, sec->data->d_buf + offset, maxlen,
+                         x86_64 ? INSN_MODE_64 : INSN_MODE_32);
+       if (ret < 0) {
                WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
                return -1;
        }
index 606a4b5..4bbabae 100755 (executable)
@@ -16,11 +16,14 @@ arch/x86/include/asm/emulate_prefix.h
 arch/x86/lib/x86-opcode-map.txt
 arch/x86/tools/gen-insn-attr-x86.awk
 include/linux/static_call_types.h
-arch/x86/include/asm/inat.h     -I '^#include [\"<]\(asm/\)*inat_types.h[\">]'
-arch/x86/include/asm/insn.h     -I '^#include [\"<]\(asm/\)*inat.h[\">]'
-arch/x86/lib/inat.c             -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]'
-arch/x86/lib/insn.c             -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]'
 "
+
+SYNC_CHECK_FILES='
+arch/x86/include/asm/inat.h
+arch/x86/include/asm/insn.h
+arch/x86/lib/inat.c
+arch/x86/lib/insn.c
+'
 fi
 
 check_2 () {
@@ -63,3 +66,9 @@ while read -r file_entry; do
 done <<EOF
 $FILES
 EOF
+
+if [ "$SRCARCH" = "x86" ]; then
+       for i in $SYNC_CHECK_FILES; do
+               check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"'
+       done
+fi
index 4f75ae9..0262b0d 100644 (file)
@@ -96,13 +96,12 @@ static int get_branch(const char *branch_str)
 static int test_data_item(struct test_data *dat, int x86_64)
 {
        struct intel_pt_insn intel_pt_insn;
+       int op, branch, ret;
        struct insn insn;
-       int op, branch;
 
-       insn_init(&insn, dat->data, MAX_INSN_SIZE, x86_64);
-       insn_get_length(&insn);
-
-       if (!insn_complete(&insn)) {
+       ret = insn_decode(&insn, dat->data, MAX_INSN_SIZE,
+                         x86_64 ? INSN_MODE_64 : INSN_MODE_32);
+       if (ret < 0) {
                pr_debug("Failed to decode: %s\n", dat->asm_rep);
                return -1;
        }
index 34d600c..546feda 100644 (file)
@@ -11,7 +11,7 @@ void arch_fetch_insn(struct perf_sample *sample,
                     struct machine *machine)
 {
        struct insn insn;
-       int len;
+       int len, ret;
        bool is64bit = false;
 
        if (!sample->ip)
@@ -19,8 +19,9 @@ void arch_fetch_insn(struct perf_sample *sample,
        len = thread__memcpy(thread, machine, sample->insn, sample->ip, sizeof(sample->insn), &is64bit);
        if (len <= 0)
                return;
-       insn_init(&insn, sample->insn, len, is64bit);
-       insn_get_length(&insn);
-       if (insn_complete(&insn) && insn.length <= len)
+
+       ret = insn_decode(&insn, sample->insn, len,
+                         is64bit ? INSN_MODE_64 : INSN_MODE_32);
+       if (ret >= 0 && insn.length <= len)
                sample->insn_len = insn.length;
 }
index dded93a..07857df 100755 (executable)
@@ -75,6 +75,13 @@ include/uapi/asm-generic/mman-common.h
 include/uapi/asm-generic/unistd.h
 '
 
+SYNC_CHECK_FILES='
+arch/x86/include/asm/inat.h
+arch/x86/include/asm/insn.h
+arch/x86/lib/inat.c
+arch/x86/lib/insn.c
+'
+
 # These copies are under tools/perf/trace/beauty/ as they are not used to in
 # building object files only by scripts in tools/perf/trace/beauty/ to generate
 # tables that then gets included in .c files for things like id->string syscall
@@ -129,6 +136,10 @@ for i in $FILES; do
   check $i -B
 done
 
+for i in $SYNC_CHECK_FILES; do
+  check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"'
+done
+
 # diff with extra ignore lines
 check arch/x86/lib/memcpy_64.S        '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"'
 check arch/x86/lib/memset_64.S        '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
@@ -137,10 +148,6 @@ check include/uapi/linux/mman.h       '-I "^#include <\(uapi/\)*asm/mman.h>"'
 check include/linux/build_bug.h       '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
 check include/linux/ctype.h          '-I "isdigit("'
 check lib/ctype.c                    '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B'
-check arch/x86/include/asm/inat.h     '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
-check arch/x86/include/asm/insn.h     '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
-check arch/x86/lib/inat.c            '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
-check arch/x86/lib/insn.c             '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
 
 # diff non-symmetric files
 check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
index 2f6cc7e..593f20e 100644 (file)
@@ -169,11 +169,13 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
                      struct intel_pt_insn *intel_pt_insn)
 {
        struct insn insn;
+       int ret;
 
-       insn_init(&insn, buf, len, x86_64);
-       insn_get_length(&insn);
-       if (!insn_complete(&insn) || insn.length > len)
+       ret = insn_decode(&insn, buf, len,
+                         x86_64 ? INSN_MODE_64 : INSN_MODE_32);
+       if (ret < 0 || insn.length > len)
                return -1;
+
        intel_pt_insn_decoder(&insn, intel_pt_insn);
        if (insn.length < INTEL_PT_INSN_BUF_SZ)
                memcpy(intel_pt_insn->buf, buf, insn.length);
@@ -194,12 +196,13 @@ const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
                      u8 *inbuf, int inlen, int *lenp)
 {
        struct insn insn;
-       int n, i;
+       int n, i, ret;
        int left;
 
-       insn_init(&insn, inbuf, inlen, x->is64bit);
-       insn_get_length(&insn);
-       if (!insn_complete(&insn) || insn.length > inlen)
+       ret = insn_decode(&insn, inbuf, inlen,
+                         x->is64bit ? INSN_MODE_64 : INSN_MODE_32);
+
+       if (ret < 0 || insn.length > inlen)
                return "<bad>";
        if (lenp)
                *lenp = insn.length;