Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Sun, 26 Aug 2018 17:13:21 +0000 (10:13 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sun, 26 Aug 2018 17:13:21 +0000 (10:13 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sun, 26 Aug 2018 17:13:21 +0000 (10:13 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sun, 26 Aug 2018 17:13:21 +0000 (10:13 -0700)
diff --combined Makefile

index 2e2b009,7bab2e9..487f8f2
--- 1/Makefile
--- 2/Makefile
+++ b/Makefile
@@@ -440,7 -440,7 +440,7 @@@ KBUILD_CFLAGS_KERNEL :
   KBUILD_AFLAGS_MODULE  := -DMODULE
   KBUILD_CFLAGS_MODULE  := -DMODULE
   KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
- -LDFLAGS :=
+ +KBUILD_LDFLAGS :=
   GCC_PLUGINS_CFLAGS :=
   
   export ARCH SRCARCH CONFIG_SHELL HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC
@@@ -448,7 -448,7 +448,7 @@@ export CPP AR NM STRIP OBJCOPY OBJDUMP 
   export MAKE LEX YACC AWK GENKSYMS INSTALLKERNEL PERL PYTHON PYTHON2 PYTHON3 UTS_MACHINE
   export HOSTCXX KBUILD_HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
   
- -export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
+ +export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS
   export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE
   export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE CFLAGS_UBSAN
   export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
@@@ -507,9 -507,13 +507,13 @@@ KBUILD_AFLAGS += $(call cc-option, -no-
   endif
   
   RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
+ RETPOLINE_VDSO_CFLAGS_GCC := -mindirect-branch=thunk-inline -mindirect-branch-register
   RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
+ RETPOLINE_VDSO_CFLAGS_CLANG := -mretpoline
   RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
+ RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_VDSO_CFLAGS_CLANG)))
   export RETPOLINE_CFLAGS
+ export RETPOLINE_VDSO_CFLAGS
   
   KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
   KBUILD_AFLAGS += $(call cc-option,-fno-PIE)
@@@ -754,28 -758,12 +758,28 @@@ ifdef CONFIG_FUNCTION_TRACE
   ifndef CC_FLAGS_FTRACE
   CC_FLAGS_FTRACE := -pg
   endif
- -export CC_FLAGS_FTRACE
+ +ifdef CONFIG_FTRACE_MCOUNT_RECORD
+ +  # gcc 5 supports generating the mcount tables directly
+ +  ifeq ($(call cc-option-yn,-mrecord-mcount),y)
+ +    CC_FLAGS_FTRACE   += -mrecord-mcount
+ +    export CC_USING_RECORD_MCOUNT := 1
+ +  endif
+ +  ifdef CONFIG_HAVE_NOP_MCOUNT
+ +    ifeq ($(call cc-option-yn, -mnop-mcount),y)
+ +      CC_FLAGS_FTRACE += -mnop-mcount
+ +      CC_FLAGS_USING  += -DCC_USING_NOP_MCOUNT
+ +    endif
+ +  endif
+ +endif
   ifdef CONFIG_HAVE_FENTRY
- -CC_USING_FENTRY       := $(call cc-option, -mfentry -DCC_USING_FENTRY)
+ +  ifeq ($(call cc-option-yn, -mfentry),y)
+ +    CC_FLAGS_FTRACE   += -mfentry
+ +    CC_FLAGS_USING    += -DCC_USING_FENTRY
+ +  endif
   endif
- -KBUILD_CFLAGS += $(CC_FLAGS_FTRACE) $(CC_USING_FENTRY)
- -KBUILD_AFLAGS += $(CC_USING_FENTRY)
+ +export CC_FLAGS_FTRACE
+ +KBUILD_CFLAGS += $(CC_FLAGS_FTRACE) $(CC_FLAGS_USING)
+ +KBUILD_AFLAGS += $(CC_FLAGS_USING)
   ifdef CONFIG_DYNAMIC_FTRACE
         ifdef CONFIG_HAVE_C_RECORDMCOUNT
                 BUILD_C_RECORDMCOUNT := y
@@@ -790,8 -778,8 +794,8 @@@ KBUILD_CFLAGS += $(call cc-option, -fno
   endif
   
   ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
- -KBUILD_CFLAGS_KERNEL  += $(call cc-option,-ffunction-sections,)
- -KBUILD_CFLAGS_KERNEL  += $(call cc-option,-fdata-sections,)
+ +KBUILD_CFLAGS_KERNEL += -ffunction-sections -fdata-sections
+ +LDFLAGS_vmlinux += --gc-sections
   endif
   
   # arch Makefile may override CC so keep this after arch Makefile is included
@@@ -857,6 -845,10 +861,6 @@@ LDFLAGS_BUILD_ID := $(call ld-option, -
   KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
   LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
   
- -ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
- -LDFLAGS_vmlinux       += $(call ld-option, --gc-sections,)
- -endif
- -
   ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
   LDFLAGS_vmlinux       += $(call ld-option, -X,)
   endif
@@@ -1020,7 -1012,7 +1024,7 @@@ ARCH_POSTLINK := $(wildcard $(srctree)/
   
   # Final link of vmlinux with optional arch pass after final link
   cmd_link-vmlinux =                                                 \
- -      $(CONFIG_SHELL) $< $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) ;    \
+ +      $(CONFIG_SHELL) $< $(LD) $(KBUILD_LDFLAGS) $(LDFLAGS_vmlinux) ;    \
         $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
   
   vmlinux: scripts/link-vmlinux.sh autoksyms_recursive $(vmlinux-deps) FORCE
@@@ -1350,12 -1342,16 +1354,12 @@@ distclean: mrprope
   
   # Packaging of the kernel to various formats
   # ---------------------------------------------------------------------------
- -# rpm target kept for backward compatibility
   package-dir   := scripts/package
   
   %src-pkg: FORCE
         $(Q)$(MAKE) $(build)=$(package-dir) $@
   %pkg: include/config/kernel.release FORCE
         $(Q)$(MAKE) $(build)=$(package-dir) $@
- -rpm: rpm-pkg
- -      @echo "  WARNING: \"rpm\" target will be removed after Linux 4.18"
- -      @echo "           Please use \"rpm-pkg\" instead."
   
   
   # Brief documentation of the typical targets used
diff --combined arch/x86/kvm/vmx.c

index 8dae47e,4be481c..1d26f3c
--- 1/arch/x86/kvm/vmx.c
--- 2/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@@ -38,7 -38,6 +38,7 @@@
   #include "kvm_cache_regs.h"
   #include "x86.h"
   
+ +#include <asm/asm.h>
   #include <asm/cpu.h>
   #include <asm/io.h>
   #include <asm/desc.h>
@@@ -198,14 -197,12 +198,14 @@@ static enum vmx_l1d_flush_state __read_
   
   static const struct {
         const char *option;
- -      enum vmx_l1d_flush_state cmd;
+ +      bool for_parse;
   } vmentry_l1d_param[] = {
- -      {"auto",        VMENTER_L1D_FLUSH_AUTO},
- -      {"never",       VMENTER_L1D_FLUSH_NEVER},
- -      {"cond",        VMENTER_L1D_FLUSH_COND},
- -      {"always",      VMENTER_L1D_FLUSH_ALWAYS},
+ +      [VMENTER_L1D_FLUSH_AUTO]         = {"auto", true},
+ +      [VMENTER_L1D_FLUSH_NEVER]        = {"never", true},
+ +      [VMENTER_L1D_FLUSH_COND]         = {"cond", true},
+ +      [VMENTER_L1D_FLUSH_ALWAYS]       = {"always", true},
+ +      [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
+ +      [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
   };
   
   #define L1D_CACHE_ORDER 4
@@@ -221,15 -218,15 +221,15 @@@ static int vmx_setup_l1d_flush(enum vmx
                 return 0;
         }
   
- -       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
- -             u64 msr;
+ +      if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
+ +              u64 msr;
   
- -             rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
- -             if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
- -                     l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
- -                     return 0;
- -             }
- -       }
+ +              rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
+ +              if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+ +                      l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+ +                      return 0;
+ +              }
+ +      }
   
         /* If set to auto use the default l1tf mitigation method */
         if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
@@@ -289,9 -286,8 +289,9 @@@ static int vmentry_l1d_flush_parse(cons
   
         if (s) {
                 for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
- -                      if (sysfs_streq(s, vmentry_l1d_param[i].option))
- -                              return vmentry_l1d_param[i].cmd;
+ +                      if (vmentry_l1d_param[i].for_parse &&
+ +                          sysfs_streq(s, vmentry_l1d_param[i].option))
+ +                              return i;
                 }
         }
         return -EINVAL;
@@@ -301,13 -297,13 +301,13 @@@ static int vmentry_l1d_flush_set(const 
   {
         int l1tf, ret;
   
- -      if (!boot_cpu_has(X86_BUG_L1TF))
- -              return 0;
- -
         l1tf = vmentry_l1d_flush_parse(s);
         if (l1tf < 0)
                 return l1tf;
   
+ +      if (!boot_cpu_has(X86_BUG_L1TF))
+ +              return 0;
+ +
         /*
          * Has vmx_init() run already? If not then this is the pre init
          * parameter parsing. In that case just store the value and let
@@@ -327,9 -323,6 +327,9 @@@
   
   static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
   {
+ +      if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
+ +              return sprintf(s, "???\n");
+ +
         return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
   }
   
@@@ -339,53 -332,22 +339,53 @@@ static const struct kernel_param_ops vm
   };
   module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
   
+ +enum ept_pointers_status {
+ +      EPT_POINTERS_CHECK = 0,
+ +      EPT_POINTERS_MATCH = 1,
+ +      EPT_POINTERS_MISMATCH = 2
+ +};
+ +
   struct kvm_vmx {
         struct kvm kvm;
   
         unsigned int tss_addr;
         bool ept_identity_pagetable_done;
         gpa_t ept_identity_map_addr;
+ +
+ +      enum ept_pointers_status ept_pointers_match;
+ +      spinlock_t ept_pointer_lock;
   };
   
   #define NR_AUTOLOAD_MSRS 8
   
+ +struct vmcs_hdr {
+ +      u32 revision_id:31;
+ +      u32 shadow_vmcs:1;
+ +};
+ +
   struct vmcs {
- -      u32 revision_id;
+ +      struct vmcs_hdr hdr;
         u32 abort;
         char data[0];
   };
   
+ +/*
+ + * vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT
+ + * and whose values change infrequently, but are not constant.  I.e. this is
+ + * used as a write-through cache of the corresponding VMCS fields.
+ + */
+ +struct vmcs_host_state {
+ +      unsigned long cr3;      /* May not match real cr3 */
+ +      unsigned long cr4;      /* May not match real cr4 */
+ +      unsigned long gs_base;
+ +      unsigned long fs_base;
+ +
+ +      u16           fs_sel, gs_sel, ldt_sel;
+ +#ifdef CONFIG_X86_64
+ +      u16           ds_sel, es_sel;
+ +#endif
+ +};
+ +
   /*
    * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
    * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
@@@ -397,13 -359,14 +397,13 @@@ struct loaded_vmcs 
         int cpu;
         bool launched;
         bool nmi_known_unmasked;
- -      unsigned long vmcs_host_cr3;    /* May not match real cr3 */
- -      unsigned long vmcs_host_cr4;    /* May not match real cr4 */
         /* Support for vnmi-less CPUs */
         int soft_vnmi_blocked;
         ktime_t entry_time;
         s64 vnmi_blocked_time;
         unsigned long *msr_bitmap;
         struct list_head loaded_vmcss_on_cpu_link;
+ +      struct vmcs_host_state host_state;
   };
   
   struct shared_msr_entry {
@@@ -434,7 -397,7 +434,7 @@@ struct __packed vmcs12 
         /* According to the Intel spec, a VMCS region must start with the
          * following two fields. Then follow implementation-specific data.
          */
- -      u32 revision_id;
+ +      struct vmcs_hdr hdr;
         u32 abort;
   
         u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
@@@ -602,7 -565,7 +602,7 @@@
                 "Offset of " #field " in struct vmcs12 has changed.")
   
   static inline void vmx_check_vmcs12_offsets(void) {
- -      CHECK_OFFSET(revision_id, 0);
+ +      CHECK_OFFSET(hdr, 0);
         CHECK_OFFSET(abort, 4);
         CHECK_OFFSET(launch_state, 8);
         CHECK_OFFSET(io_bitmap_a, 40);
@@@ -820,12 -783,6 +820,12 @@@ struct nested_vmx 
          * memory during VMCLEAR and VMPTRLD.
          */
         struct vmcs12 *cached_vmcs12;
+ +      /*
+ +       * Cache of the guest's shadow VMCS, existing outside of guest
+ +       * memory. Loaded from guest memory during VM entry. Flushed
+ +       * to guest memory during VM exit.
+ +       */
+ +      struct vmcs12 *cached_shadow_vmcs12;
         /*
          * Indicates if the shadow vmcs must be updated with the
          * data hold by vmcs12
@@@ -976,20 -933,25 +976,20 @@@ struct vcpu_vmx 
         /*
          * loaded_vmcs points to the VMCS currently used in this vcpu. For a
          * non-nested (L1) guest, it always points to vmcs01. For a nested
- -       * guest (L2), it points to a different VMCS.
+ +       * guest (L2), it points to a different VMCS.  loaded_cpu_state points
+ +       * to the VMCS whose state is loaded into the CPU registers that only
+ +       * need to be switched when transitioning to/from the kernel; a NULL
+ +       * value indicates that host state is loaded.
          */
         struct loaded_vmcs    vmcs01;
         struct loaded_vmcs   *loaded_vmcs;
+ +      struct loaded_vmcs   *loaded_cpu_state;
         bool                  __launched; /* temporary, used in vmx_vcpu_run */
         struct msr_autoload {
                 struct vmx_msrs guest;
                 struct vmx_msrs host;
         } msr_autoload;
- -      struct {
- -              int           loaded;
- -              u16           fs_sel, gs_sel, ldt_sel;
- -#ifdef CONFIG_X86_64
- -              u16           ds_sel, es_sel;
- -#endif
- -              int           gs_ldt_reload_needed;
- -              int           fs_reload_needed;
- -              u64           msr_host_bndcfgs;
- -      } host_state;
+ +
         struct {
                 int vm86_active;
                 ulong save_rflags;
@@@ -1039,7 -1001,6 +1039,7 @@@
          */
         u64 msr_ia32_feature_control;
         u64 msr_ia32_feature_control_valid_bits;
+ +      u64 ept_pointer;
   };
   
   enum segment_cache_field {
@@@ -1259,11 -1220,6 +1259,11 @@@ static inline struct vmcs12 *get_vmcs12
         return to_vmx(vcpu)->nested.cached_vmcs12;
   }
   
+ +static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu)
+ +{
+ +      return to_vmx(vcpu)->nested.cached_shadow_vmcs12;
+ +}
+ +
   static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
   static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
   static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
@@@ -1534,48 -1490,6 +1534,48 @@@ static void evmcs_sanitize_exec_ctrls(s
          *      GUEST_IA32_RTIT_CTL             = 0x00002814,
          */
   }
+ +
+ +/* check_ept_pointer() should be under protection of ept_pointer_lock. */
+ +static void check_ept_pointer_match(struct kvm *kvm)
+ +{
+ +      struct kvm_vcpu *vcpu;
+ +      u64 tmp_eptp = INVALID_PAGE;
+ +      int i;
+ +
+ +      kvm_for_each_vcpu(i, vcpu, kvm) {
+ +              if (!VALID_PAGE(tmp_eptp)) {
+ +                      tmp_eptp = to_vmx(vcpu)->ept_pointer;
+ +              } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
+ +                      to_kvm_vmx(kvm)->ept_pointers_match
+ +                              = EPT_POINTERS_MISMATCH;
+ +                      return;
+ +              }
+ +      }
+ +
+ +      to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
+ +}
+ +
+ +static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
+ +{
+ +      int ret;
+ +
+ +      spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ +
+ +      if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
+ +              check_ept_pointer_match(kvm);
+ +
+ +      if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
+ +              ret = -ENOTSUPP;
+ +              goto out;
+ +      }
+ +
+ +      ret = hyperv_flush_guest_mapping(
+ +                      to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer);
+ +
+ +out:
+ +      spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ +      return ret;
+ +}
   #else /* !IS_ENABLED(CONFIG_HYPERV) */
   static inline void evmcs_write64(unsigned long field, u64 value) {}
   static inline void evmcs_write32(unsigned long field, u32 value) {}
@@@ -1690,12 -1604,6 +1690,12 @@@ static inline bool cpu_has_vmx_virtual_
                 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
   }
   
+ +static inline bool cpu_has_vmx_encls_vmexit(void)
+ +{
+ +      return vmcs_config.cpu_based_2nd_exec_ctrl &
+ +              SECONDARY_EXEC_ENCLS_EXITING;
+ +}
+ +
   /*
    * Comment's format: document - errata name - stepping - processor name.
    * Refer from
@@@ -1956,12 -1864,6 +1956,12 @@@ static inline bool nested_cpu_supports_
                         CPU_BASED_MONITOR_TRAP_FLAG;
   }
   
+ +static inline bool nested_cpu_has_vmx_shadow_vmcs(struct kvm_vcpu *vcpu)
+ +{
+ +      return to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
+ +              SECONDARY_EXEC_SHADOW_VMCS;
+ +}
+ +
   static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
   {
         return vmcs12->cpu_based_vm_exec_control & bit;
@@@ -2042,11 -1944,6 +2042,11 @@@ static inline bool nested_cpu_has_eptp_
                  VMX_VMFUNC_EPTP_SWITCHING);
   }
   
+ +static inline bool nested_cpu_has_shadow_vmcs(struct vmcs12 *vmcs12)
+ +{
+ +      return nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS);
+ +}
+ +
   static inline bool is_nmi(u32 intr_info)
   {
         return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@@ -2077,12 -1974,11 +2077,12 @@@ static inline void __invvpid(int ext, u
         u64 rsvd : 48;
         u64 gva;
       } operand = { vpid, 0, gva };
+ +    bool error;
   
- -    asm volatile (__ex(ASM_VMX_INVVPID)
- -                /* CF==1 or ZF==1 --> rc = -1 */
- -                "; ja 1f ; ud2 ; 1:"
- -                : : "a"(&operand), "c"(ext) : "cc", "memory");
+ +    asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na)
+ +                : CC_OUT(na) (error) : "a"(&operand), "c"(ext)
+ +                : "memory");
+ +    BUG_ON(error);
   }
   
   static inline void __invept(int ext, u64 eptp, gpa_t gpa)
@@@ -2090,12 -1986,11 +2090,12 @@@
         struct {
                 u64 eptp, gpa;
         } operand = {eptp, gpa};
+ +      bool error;
   
- -      asm volatile (__ex(ASM_VMX_INVEPT)
- -                      /* CF==1 or ZF==1 --> rc = -1 */
- -                      "; ja 1f ; ud2 ; 1:\n"
- -                      : : "a" (&operand), "c" (ext) : "cc", "memory");
+ +      asm volatile (__ex(ASM_VMX_INVEPT) CC_SET(na)
+ +                    : CC_OUT(na) (error) : "a" (&operand), "c" (ext)
+ +                    : "memory");
+ +      BUG_ON(error);
   }
   
   static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
@@@ -2111,12 -2006,12 +2111,12 @@@
   static void vmcs_clear(struct vmcs *vmcs)
   {
         u64 phys_addr = __pa(vmcs);
- -      u8 error;
+ +      bool error;
   
- -      asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
- -                    : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
- -                    : "cc", "memory");
- -      if (error)
+ +      asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) CC_SET(na)
+ +                    : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
+ +                    : "memory");
+ +      if (unlikely(error))
                 printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
                        vmcs, phys_addr);
   }
@@@ -2133,15 -2028,15 +2133,15 @@@ static inline void loaded_vmcs_init(str
   static void vmcs_load(struct vmcs *vmcs)
   {
         u64 phys_addr = __pa(vmcs);
- -      u8 error;
+ +      bool error;
   
         if (static_branch_unlikely(&enable_evmcs))
                 return evmcs_load(phys_addr);
   
- -      asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
- -                      : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
- -                      : "cc", "memory");
- -      if (error)
+ +      asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) CC_SET(na)
+ +                    : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
+ +                    : "memory");
+ +      if (unlikely(error))
                 printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
                        vmcs, phys_addr);
   }
@@@ -2219,19 -2114,6 +2219,19 @@@ static void loaded_vmcs_clear(struct lo
                          __loaded_vmcs_clear, loaded_vmcs, 1);
   }
   
+ +static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
+ +{
+ +      if (vpid == 0)
+ +              return true;
+ +
+ +      if (cpu_has_vmx_invvpid_individual_addr()) {
+ +              __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr);
+ +              return true;
+ +      }
+ +
+ +      return false;
+ +}
+ +
   static inline void vpid_sync_vcpu_single(int vpid)
   {
         if (vpid == 0)
@@@ -2366,10 -2248,10 +2366,10 @@@ static noinline void vmwrite_error(unsi
   
   static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
   {
- -      u8 error;
+ +      bool error;
   
- -      asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0"
- -                     : "=q"(error) : "a"(value), "d"(field) : "cc");
+ +      asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) CC_SET(na)
+ +                    : CC_OUT(na) (error) : "a"(value), "d"(field));
         if (unlikely(error))
                 vmwrite_error(field, value);
   }
@@@ -2853,150 -2735,121 +2853,150 @@@ static unsigned long segment_base(u16 s
   }
   #endif
   
- -static void vmx_save_host_state(struct kvm_vcpu *vcpu)
+ +static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
+ +      struct vmcs_host_state *host_state;
   #ifdef CONFIG_X86_64
         int cpu = raw_smp_processor_id();
- -      unsigned long fs_base, kernel_gs_base;
   #endif
+ +      unsigned long fs_base, gs_base;
+ +      u16 fs_sel, gs_sel;
         int i;
   
- -      if (vmx->host_state.loaded)
+ +      if (vmx->loaded_cpu_state)
                 return;
   
- -      vmx->host_state.loaded = 1;
+ +      vmx->loaded_cpu_state = vmx->loaded_vmcs;
+ +      host_state = &vmx->loaded_cpu_state->host_state;
+ +
         /*
          * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
          * allow segment selectors with cpl > 0 or ti == 1.
          */
- -      vmx->host_state.ldt_sel = kvm_read_ldt();
- -      vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
+ +      host_state->ldt_sel = kvm_read_ldt();
   
   #ifdef CONFIG_X86_64
+ +      savesegment(ds, host_state->ds_sel);
+ +      savesegment(es, host_state->es_sel);
+ +
+ +      gs_base = cpu_kernelmode_gs_base(cpu);
         if (likely(is_64bit_mm(current->mm))) {
                 save_fsgs_for_kvm();
- -              vmx->host_state.fs_sel = current->thread.fsindex;
- -              vmx->host_state.gs_sel = current->thread.gsindex;
+ +              fs_sel = current->thread.fsindex;
+ +              gs_sel = current->thread.gsindex;
                 fs_base = current->thread.fsbase;
- -              kernel_gs_base = current->thread.gsbase;
+ +              vmx->msr_host_kernel_gs_base = current->thread.gsbase;
         } else {
- -#endif
- -              savesegment(fs, vmx->host_state.fs_sel);
- -              savesegment(gs, vmx->host_state.gs_sel);
- -#ifdef CONFIG_X86_64
+ +              savesegment(fs, fs_sel);
+ +              savesegment(gs, gs_sel);
                 fs_base = read_msr(MSR_FS_BASE);
- -              kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
- -      }
- -#endif
- -      if (!(vmx->host_state.fs_sel & 7)) {
- -              vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
- -              vmx->host_state.fs_reload_needed = 0;
- -      } else {
- -              vmcs_write16(HOST_FS_SELECTOR, 0);
- -              vmx->host_state.fs_reload_needed = 1;
- -      }
- -      if (!(vmx->host_state.gs_sel & 7))
- -              vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
- -      else {
- -              vmcs_write16(HOST_GS_SELECTOR, 0);
- -              vmx->host_state.gs_ldt_reload_needed = 1;
+ +              vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
         }
   
- -#ifdef CONFIG_X86_64
- -      savesegment(ds, vmx->host_state.ds_sel);
- -      savesegment(es, vmx->host_state.es_sel);
- -
- -      vmcs_writel(HOST_FS_BASE, fs_base);
- -      vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu));
- -
- -      vmx->msr_host_kernel_gs_base = kernel_gs_base;
         if (is_long_mode(&vmx->vcpu))
                 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
   #else
- -      vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
- -      vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
+ +      savesegment(fs, fs_sel);
+ +      savesegment(gs, gs_sel);
+ +      fs_base = segment_base(fs_sel);
+ +      gs_base = segment_base(gs_sel);
   #endif
- -      if (boot_cpu_has(X86_FEATURE_MPX))
- -              rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
+ +
+ +      if (unlikely(fs_sel != host_state->fs_sel)) {
+ +              if (!(fs_sel & 7))
+ +                      vmcs_write16(HOST_FS_SELECTOR, fs_sel);
+ +              else
+ +                      vmcs_write16(HOST_FS_SELECTOR, 0);
+ +              host_state->fs_sel = fs_sel;
+ +      }
+ +      if (unlikely(gs_sel != host_state->gs_sel)) {
+ +              if (!(gs_sel & 7))
+ +                      vmcs_write16(HOST_GS_SELECTOR, gs_sel);
+ +              else
+ +                      vmcs_write16(HOST_GS_SELECTOR, 0);
+ +              host_state->gs_sel = gs_sel;
+ +      }
+ +      if (unlikely(fs_base != host_state->fs_base)) {
+ +              vmcs_writel(HOST_FS_BASE, fs_base);
+ +              host_state->fs_base = fs_base;
+ +      }
+ +      if (unlikely(gs_base != host_state->gs_base)) {
+ +              vmcs_writel(HOST_GS_BASE, gs_base);
+ +              host_state->gs_base = gs_base;
+ +      }
+ +
         for (i = 0; i < vmx->save_nmsrs; ++i)
                 kvm_set_shared_msr(vmx->guest_msrs[i].index,
                                    vmx->guest_msrs[i].data,
                                    vmx->guest_msrs[i].mask);
   }
   
- -static void __vmx_load_host_state(struct vcpu_vmx *vmx)
+ +static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
   {
- -      if (!vmx->host_state.loaded)
+ +      struct vmcs_host_state *host_state;
+ +
+ +      if (!vmx->loaded_cpu_state)
                 return;
   
+ +      WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs);
+ +      host_state = &vmx->loaded_cpu_state->host_state;
+ +
         ++vmx->vcpu.stat.host_state_reload;
- -      vmx->host_state.loaded = 0;
+ +      vmx->loaded_cpu_state = NULL;
+ +
   #ifdef CONFIG_X86_64
         if (is_long_mode(&vmx->vcpu))
                 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
   #endif
- -      if (vmx->host_state.gs_ldt_reload_needed) {
- -              kvm_load_ldt(vmx->host_state.ldt_sel);
+ +      if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
+ +              kvm_load_ldt(host_state->ldt_sel);
   #ifdef CONFIG_X86_64
- -              load_gs_index(vmx->host_state.gs_sel);
+ +              load_gs_index(host_state->gs_sel);
   #else
- -              loadsegment(gs, vmx->host_state.gs_sel);
+ +              loadsegment(gs, host_state->gs_sel);
   #endif
         }
- -      if (vmx->host_state.fs_reload_needed)
- -              loadsegment(fs, vmx->host_state.fs_sel);
+ +      if (host_state->fs_sel & 7)
+ +              loadsegment(fs, host_state->fs_sel);
   #ifdef CONFIG_X86_64
- -      if (unlikely(vmx->host_state.ds_sel | vmx->host_state.es_sel)) {
- -              loadsegment(ds, vmx->host_state.ds_sel);
- -              loadsegment(es, vmx->host_state.es_sel);
+ +      if (unlikely(host_state->ds_sel | host_state->es_sel)) {
+ +              loadsegment(ds, host_state->ds_sel);
+ +              loadsegment(es, host_state->es_sel);
         }
   #endif
         invalidate_tss_limit();
   #ifdef CONFIG_X86_64
         wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
   #endif
- -      if (vmx->host_state.msr_host_bndcfgs)
- -              wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
         load_fixmap_gdt(raw_smp_processor_id());
   }
   
- -static void vmx_load_host_state(struct vcpu_vmx *vmx)
+ +#ifdef CONFIG_X86_64
+ +static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
   {
- -      preempt_disable();
- -      __vmx_load_host_state(vmx);
- -      preempt_enable();
+ +      if (is_long_mode(&vmx->vcpu)) {
+ +              preempt_disable();
+ +              if (vmx->loaded_cpu_state)
+ +                      rdmsrl(MSR_KERNEL_GS_BASE,
+ +                             vmx->msr_guest_kernel_gs_base);
+ +              preempt_enable();
+ +      }
+ +      return vmx->msr_guest_kernel_gs_base;
   }
   
+ +static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
+ +{
+ +      if (is_long_mode(&vmx->vcpu)) {
+ +              preempt_disable();
+ +              if (vmx->loaded_cpu_state)
+ +                      wrmsrl(MSR_KERNEL_GS_BASE, data);
+ +              preempt_enable();
+ +      }
+ +      vmx->msr_guest_kernel_gs_base = data;
+ +}
+ +#endif
+ +
   static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
   {
         struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@@ -3138,7 -2991,7 +3138,7 @@@ static void vmx_vcpu_put(struct kvm_vcp
   {
         vmx_vcpu_pi_put(vcpu);
   
- -      __vmx_load_host_state(to_vmx(vcpu));
+ +      vmx_prepare_switch_to_host(to_vmx(vcpu));
   }
   
   static bool emulation_required(struct kvm_vcpu *vcpu)
@@@ -3359,7 -3212,7 +3359,7 @@@ static bool vmx_rdtscp_supported(void
   
   static bool vmx_invpcid_supported(void)
   {
- -      return cpu_has_vmx_invpcid() && enable_ept;
+ +      return cpu_has_vmx_invpcid();
   }
   
   /*
@@@ -3602,12 -3455,6 +3602,12 @@@ static void nested_vmx_setup_ctls_msrs(
                 SECONDARY_EXEC_APIC_REGISTER_VIRT |
                 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                 SECONDARY_EXEC_WBINVD_EXITING;
+ +      /*
+ +       * We can emulate "VMCS shadowing," even if the hardware
+ +       * doesn't support it.
+ +       */
+ +      msrs->secondary_ctls_high |=
+ +              SECONDARY_EXEC_SHADOW_VMCS;
   
         if (enable_ept) {
                 /* nested EPT: emulate EPT also to L1 */
@@@ -4075,7 -3922,8 +4075,7 @@@ static int vmx_get_msr(struct kvm_vcpu 
                 msr_info->data = vmcs_readl(GUEST_GS_BASE);
                 break;
         case MSR_KERNEL_GS_BASE:
- -              vmx_load_host_state(vmx);
- -              msr_info->data = vmx->msr_guest_kernel_gs_base;
+ +              msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
                 break;
   #endif
         case MSR_EFER:
@@@ -4175,7 -4023,8 +4175,7 @@@ static int vmx_set_msr(struct kvm_vcpu 
                 vmcs_writel(GUEST_GS_BASE, data);
                 break;
         case MSR_KERNEL_GS_BASE:
- -              vmx_load_host_state(vmx);
- -              vmx->msr_guest_kernel_gs_base = data;
+ +              vmx_write_guest_kernel_gs_base(vmx, data);
                 break;
   #endif
         case MSR_IA32_SYSENTER_CS:
@@@ -4563,8 -4412,7 +4563,8 @@@ static __init int setup_vmcs_config(str
                         SECONDARY_EXEC_RDRAND_EXITING |
                         SECONDARY_EXEC_ENABLE_PML |
                         SECONDARY_EXEC_TSC_SCALING |
- -                      SECONDARY_EXEC_ENABLE_VMFUNC;
+ +                      SECONDARY_EXEC_ENABLE_VMFUNC |
+ +                      SECONDARY_EXEC_ENCLS_EXITING;
                 if (adjust_vmx_controls(min2, opt2,
                                         MSR_IA32_VMX_PROCBASED_CTLS2,
                                         &_cpu_based_2nd_exec_control) < 0)
@@@ -4711,7 -4559,7 +4711,7 @@@
         return 0;
   }
   
- -static struct vmcs *alloc_vmcs_cpu(int cpu)
+ +static struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu)
   {
         int node = cpu_to_node(cpu);
         struct page *pages;
@@@ -4725,12 -4573,10 +4725,12 @@@
   
         /* KVM supports Enlightened VMCS v1 only */
         if (static_branch_unlikely(&enable_evmcs))
- -              vmcs->revision_id = KVM_EVMCS_VERSION;
+ +              vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
         else
- -              vmcs->revision_id = vmcs_config.revision_id;
+ +              vmcs->hdr.revision_id = vmcs_config.revision_id;
   
+ +      if (shadow)
+ +              vmcs->hdr.shadow_vmcs = 1;
         return vmcs;
   }
   
@@@ -4754,14 -4600,14 +4754,14 @@@ static void free_loaded_vmcs(struct loa
         WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
   }
   
- -static struct vmcs *alloc_vmcs(void)
+ +static struct vmcs *alloc_vmcs(bool shadow)
   {
- -      return alloc_vmcs_cpu(raw_smp_processor_id());
+ +      return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
   }
   
   static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
   {
- -      loaded_vmcs->vmcs = alloc_vmcs();
+ +      loaded_vmcs->vmcs = alloc_vmcs(false);
         if (!loaded_vmcs->vmcs)
                 return -ENOMEM;
   
@@@ -4783,9 -4629,6 +4783,9 @@@
                         evmcs->hv_enlightenments_control.msr_bitmap = 1;
                 }
         }
+ +
+ +      memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
+ +
         return 0;
   
   out_vmcs:
@@@ -4895,7 -4738,7 +4895,7 @@@ static __init int alloc_kvm_area(void
         for_each_possible_cpu(cpu) {
                 struct vmcs *vmcs;
   
- -              vmcs = alloc_vmcs_cpu(cpu);
+ +              vmcs = alloc_vmcs_cpu(false, cpu);
                 if (!vmcs) {
                         free_kvm_area();
                         return -ENOMEM;
@@@ -4912,7 -4755,7 +4912,7 @@@
                  * physical CPU.
                  */
                 if (static_branch_unlikely(&enable_evmcs))
- -                      vmcs->revision_id = vmcs_config.revision_id;
+ +                      vmcs->hdr.revision_id = vmcs_config.revision_id;
   
                 per_cpu(vmxarea, cpu) = vmcs;
         }
@@@ -5069,18 -4912,10 +5069,18 @@@ static void vmx_set_efer(struct kvm_vcp
                 return;
   
         /*
- -       * Force kernel_gs_base reloading before EFER changes, as control
- -       * of this msr depends on is_long_mode().
+ +       * MSR_KERNEL_GS_BASE is not intercepted when the guest is in
+ +       * 64-bit mode as a 64-bit kernel may frequently access the
+ +       * MSR.  This means we need to manually save/restore the MSR
+ +       * when switching between guest and host state, but only if
+ +       * the guest is in 64-bit mode.  Sync our cached value if the
+ +       * guest is transitioning to 32-bit mode and the CPU contains
+ +       * guest state, i.e. the cache is stale.
          */
- -      vmx_load_host_state(to_vmx(vcpu));
+ +#ifdef CONFIG_X86_64
+ +      if (!(efer & EFER_LMA))
+ +              (void)vmx_read_guest_kernel_gs_base(vmx);
+ +#endif
         vcpu->arch.efer = efer;
         if (efer & EFER_LMA) {
                 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
@@@ -5137,20 -4972,6 +5137,20 @@@ static void vmx_flush_tlb(struct kvm_vc
         __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
   }
   
+ +static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
+ +{
+ +      int vpid = to_vmx(vcpu)->vpid;
+ +
+ +      if (!vpid_sync_vcpu_addr(vpid, addr))
+ +              vpid_sync_context(vpid);
+ +
+ +      /*
+ +       * If VPIDs are not supported or enabled, then the above is a no-op.
+ +       * But we don't really need a TLB flush in that case anyway, because
+ +       * each VM entry/exit includes an implicit flush when VPID is 0.
+ +       */
+ +}
+ +
   static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
   {
         ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@@ -5332,7 -5153,6 +5332,7 @@@ static u64 construct_eptp(struct kvm_vc
   
   static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
   {
+ +      struct kvm *kvm = vcpu->kvm;
         unsigned long guest_cr3;
         u64 eptp;
   
@@@ -5340,23 -5160,15 +5340,23 @@@
         if (enable_ept) {
                 eptp = construct_eptp(vcpu, cr3);
                 vmcs_write64(EPT_POINTER, eptp);
+ +
+ +              if (kvm_x86_ops->tlb_remote_flush) {
+ +                      spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ +                      to_vmx(vcpu)->ept_pointer = eptp;
+ +                      to_kvm_vmx(kvm)->ept_pointers_match
+ +                              = EPT_POINTERS_CHECK;
+ +                      spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ +              }
+ +
                 if (enable_unrestricted_guest || is_paging(vcpu) ||
                     is_guest_mode(vcpu))
                         guest_cr3 = kvm_read_cr3(vcpu);
                 else
- -                      guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr;
+ +                      guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
                 ept_load_pdptrs(vcpu);
         }
   
- -      vmx_flush_tlb(vcpu, true);
         vmcs_writel(GUEST_CR3, guest_cr3);
   }
   
@@@ -6292,19 -6104,19 +6292,19 @@@ static void vmx_set_constant_host_state
          */
         cr3 = __read_cr3();
         vmcs_writel(HOST_CR3, cr3);             /* 22.2.3  FIXME: shadow tables */
- -      vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
+ +      vmx->loaded_vmcs->host_state.cr3 = cr3;
   
         /* Save the most likely value for this task's CR4 in the VMCS. */
         cr4 = cr4_read_shadow();
         vmcs_writel(HOST_CR4, cr4);                     /* 22.2.3, 22.2.5 */
- -      vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
+ +      vmx->loaded_vmcs->host_state.cr4 = cr4;
   
         vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
   #ifdef CONFIG_X86_64
         /*
          * Load null selectors, so we can avoid reloading them in
- -       * __vmx_load_host_state(), in case userspace uses the null selectors
- -       * too (the expected case).
+ +       * vmx_prepare_switch_to_host(), in case userspace uses
+ +       * the null selectors too (the expected case).
          */
         vmcs_write16(HOST_DS_SELECTOR, 0);
         vmcs_write16(HOST_ES_SELECTOR, 0);
@@@ -6429,6 -6241,8 +6429,6 @@@ static void vmx_compute_secondary_exec_
         if (!enable_ept) {
                 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
                 enable_unrestricted_guest = 0;
- -              /* Enable INVPCID for non-ept guests may cause performance regression. */
- -              exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
         }
         if (!enable_unrestricted_guest)
                 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
@@@ -6557,6 -6371,9 +6557,6 @@@ static void ept_set_mmio_spte_mask(void
    */
   static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
   {
- -#ifdef CONFIG_X86_64
- -      unsigned long a;
- -#endif
         int i;
   
         if (enable_shadow_vmcs) {
@@@ -6611,8 -6428,15 +6611,8 @@@
         vmcs_write16(HOST_FS_SELECTOR, 0);            /* 22.2.4 */
         vmcs_write16(HOST_GS_SELECTOR, 0);            /* 22.2.4 */
         vmx_set_constant_host_state(vmx);
- -#ifdef CONFIG_X86_64
- -      rdmsrl(MSR_FS_BASE, a);
- -      vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
- -      rdmsrl(MSR_GS_BASE, a);
- -      vmcs_writel(HOST_GS_BASE, a); /* 22.2.4 */
- -#else
         vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
         vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
- -#endif
   
         if (cpu_has_vmx_vmfunc())
                 vmcs_write64(VM_FUNCTION_CONTROL, 0);
@@@ -6661,9 -6485,6 +6661,9 @@@
                 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
                 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
         }
+ +
+ +      if (cpu_has_vmx_encls_vmexit())
+ +              vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
   }
   
   static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@@ -7849,7 -7670,6 +7849,7 @@@ static void vmx_enable_tdp(void
   
   static __init int hardware_setup(void)
   {
+ +      unsigned long host_bndcfgs;
         int r = -ENOMEM, i;
   
         rdmsrl_safe(MSR_EFER, &host_efer);
@@@ -7874,11 -7694,6 +7874,11 @@@
         if (boot_cpu_has(X86_FEATURE_NX))
                 kvm_enable_efer_bits(EFER_NX);
   
+ +      if (boot_cpu_has(X86_FEATURE_MPX)) {
+ +              rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
+ +              WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
+ +      }
+ +
         if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
                 !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
                 enable_vpid = 0;
@@@ -7915,12 -7730,6 +7915,12 @@@
         if (enable_ept && !cpu_has_vmx_ept_2m_page())
                 kvm_disable_largepages();
   
+ +#if IS_ENABLED(CONFIG_HYPERV)
+ +      if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
+ +          && enable_ept)
+ +              kvm_x86_ops->tlb_remote_flush = vmx_hv_remote_flush_tlb;
+ +#endif
+ +
         if (!cpu_has_vmx_ple()) {
                 ple_gap = 0;
                 ple_window = 0;
@@@ -7947,11 -7756,6 +7947,11 @@@
         else
                 kvm_disable_tdp();
   
+ +      if (!nested) {
+ +              kvm_x86_ops->get_nested_state = NULL;
+ +              kvm_x86_ops->set_nested_state = NULL;
+ +      }
+ +
         /*
          * Only enable PML when hardware supports PML feature, and both EPT
          * and EPT A/D bit features are enabled -- PML depends on them to work.
@@@ -8228,35 -8032,10 +8228,35 @@@ static int nested_vmx_get_vmptr(struct 
         return 0;
   }
   
+ +/*
+ + * Allocate a shadow VMCS and associate it with the currently loaded
+ + * VMCS, unless such a shadow VMCS already exists. The newly allocated
+ + * VMCS is also VMCLEARed, so that it is ready for use.
+ + */
+ +static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
+ +{
+ +      struct vcpu_vmx *vmx = to_vmx(vcpu);
+ +      struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
+ +
+ +      /*
+ +       * We should allocate a shadow vmcs for vmcs01 only when L1
+ +       * executes VMXON and free it when L1 executes VMXOFF.
+ +       * As it is invalid to execute VMXON twice, we shouldn't reach
+ +       * here when vmcs01 already have an allocated shadow vmcs.
+ +       */
+ +      WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
+ +
+ +      if (!loaded_vmcs->shadow_vmcs) {
+ +              loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
+ +              if (loaded_vmcs->shadow_vmcs)
+ +                      vmcs_clear(loaded_vmcs->shadow_vmcs);
+ +      }
+ +      return loaded_vmcs->shadow_vmcs;
+ +}
+ +
   static int enter_vmx_operation(struct kvm_vcpu *vcpu)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
- -      struct vmcs *shadow_vmcs;
         int r;
   
         r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
@@@ -8267,12 -8046,16 +8267,12 @@@
         if (!vmx->nested.cached_vmcs12)
                 goto out_cached_vmcs12;
   
- -      if (enable_shadow_vmcs) {
- -              shadow_vmcs = alloc_vmcs();
- -              if (!shadow_vmcs)
- -                      goto out_shadow_vmcs;
- -              /* mark vmcs as shadow */
- -              shadow_vmcs->revision_id |= (1u << 31);
- -              /* init shadow vmcs */
- -              vmcs_clear(shadow_vmcs);
- -              vmx->vmcs01.shadow_vmcs = shadow_vmcs;
- -      }
+ +      vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
+ +      if (!vmx->nested.cached_shadow_vmcs12)
+ +              goto out_cached_shadow_vmcs12;
+ +
+ +      if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
+ +              goto out_shadow_vmcs;
   
         hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
                      HRTIMER_MODE_REL_PINNED);
@@@ -8284,9 -8067,6 +8284,9 @@@
         return 0;
   
   out_shadow_vmcs:
+ +      kfree(vmx->nested.cached_shadow_vmcs12);
+ +
+ +out_cached_shadow_vmcs12:
         kfree(vmx->nested.cached_vmcs12);
   
   out_cached_vmcs12:
@@@ -8329,7 -8109,7 +8329,7 @@@ static int handle_vmon(struct kvm_vcpu 
   
         /* CPL=0 must be checked manually. */
         if (vmx_get_cpl(vcpu)) {
- -              kvm_queue_exception(vcpu, UD_VECTOR);
+ +              kvm_inject_gp(vcpu, 0);
                 return 1;
         }
   
@@@ -8392,16 -8172,15 +8392,16 @@@
    */
   static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
   {
- -      if (vmx_get_cpl(vcpu)) {
+ +      if (!to_vmx(vcpu)->nested.vmxon) {
                 kvm_queue_exception(vcpu, UD_VECTOR);
                 return 0;
         }
   
- -      if (!to_vmx(vcpu)->nested.vmxon) {
- -              kvm_queue_exception(vcpu, UD_VECTOR);
+ +      if (vmx_get_cpl(vcpu)) {
+ +              kvm_inject_gp(vcpu, 0);
                 return 0;
         }
+ +
         return 1;
   }
   
@@@ -8454,7 -8233,6 +8454,7 @@@ static void free_nested(struct vcpu_vm
                 vmx->vmcs01.shadow_vmcs = NULL;
         }
         kfree(vmx->nested.cached_vmcs12);
+ +      kfree(vmx->nested.cached_shadow_vmcs12);
         /* Unpin physical memory we referred to in the vmcs02 */
         if (vmx->nested.apic_access_page) {
                 kvm_release_page_dirty(vmx->nested.apic_access_page);
@@@ -8540,7 -8318,7 +8540,7 @@@ static int handle_vmresume(struct kvm_v
    * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
    * 64-bit fields are to be returned).
    */
- -static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
+ +static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
                                   unsigned long field, u64 *ret)
   {
         short offset = vmcs_field_to_offset(field);
@@@ -8549,7 -8327,7 +8549,7 @@@
         if (offset < 0)
                 return offset;
   
- -      p = ((char *)(get_vmcs12(vcpu))) + offset;
+ +      p = (char *)vmcs12 + offset;
   
         switch (vmcs_field_width(field)) {
         case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
@@@ -8571,10 -8349,10 +8571,10 @@@
   }
   
   
- -static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
+ +static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
                                    unsigned long field, u64 field_value){
         short offset = vmcs_field_to_offset(field);
- -      char *p = ((char *) get_vmcs12(vcpu)) + offset;
+ +      char *p = (char *)vmcs12 + offset;
         if (offset < 0)
                 return offset;
   
@@@ -8627,7 -8405,7 +8627,7 @@@ static void copy_shadow_to_vmcs12(struc
                 for (i = 0; i < max_fields[q]; i++) {
                         field = fields[q][i];
                         field_value = __vmcs_readl(field);
- -                      vmcs12_write_any(&vmx->vcpu, field, field_value);
+ +                      vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value);
                 }
                 /*
                  * Skip the VM-exit information fields if they are read-only.
@@@ -8662,7 -8440,7 +8662,7 @@@ static void copy_vmcs12_to_shadow(struc
         for (q = 0; q < ARRAY_SIZE(fields); q++) {
                 for (i = 0; i < max_fields[q]; i++) {
                         field = fields[q][i];
- -                      vmcs12_read_any(&vmx->vcpu, field, &field_value);
+ +                      vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value);
                         __vmcs_writel(field, field_value);
                 }
         }
@@@ -8692,7 -8470,6 +8692,7 @@@ static int handle_vmread(struct kvm_vcp
         unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
         u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
         gva_t gva = 0;
+ +      struct vmcs12 *vmcs12;
   
         if (!nested_vmx_check_permission(vcpu))
                 return 1;
@@@ -8700,24 -8477,10 +8700,24 @@@
         if (!nested_vmx_check_vmcs12(vcpu))
                 return kvm_skip_emulated_instruction(vcpu);
   
+ +      if (!is_guest_mode(vcpu))
+ +              vmcs12 = get_vmcs12(vcpu);
+ +      else {
+ +              /*
+ +               * When vmcs->vmcs_link_pointer is -1ull, any VMREAD
+ +               * to shadowed-field sets the ALU flags for VMfailInvalid.
+ +               */
+ +              if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
+ +                      nested_vmx_failInvalid(vcpu);
+ +                      return kvm_skip_emulated_instruction(vcpu);
+ +              }
+ +              vmcs12 = get_shadow_vmcs12(vcpu);
+ +      }
+ +
         /* Decode instruction info and find the field to read */
         field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
         /* Read the field, zero-extended to a u64 field_value */
- -      if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
+ +      if (vmcs12_read_any(vmcs12, field, &field_value) < 0) {
                 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
                 return kvm_skip_emulated_instruction(vcpu);
         }
@@@ -8759,7 -8522,6 +8759,7 @@@ static int handle_vmwrite(struct kvm_vc
          */
         u64 field_value = 0;
         struct x86_exception e;
+ +      struct vmcs12 *vmcs12;
   
         if (!nested_vmx_check_permission(vcpu))
                 return 1;
@@@ -8794,44 -8556,23 +8794,44 @@@
                 return kvm_skip_emulated_instruction(vcpu);
         }
   
- -      if (vmcs12_write_any(vcpu, field, field_value) < 0) {
+ +      if (!is_guest_mode(vcpu))
+ +              vmcs12 = get_vmcs12(vcpu);
+ +      else {
+ +              /*
+ +               * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE
+ +               * to shadowed-field sets the ALU flags for VMfailInvalid.
+ +               */
+ +              if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
+ +                      nested_vmx_failInvalid(vcpu);
+ +                      return kvm_skip_emulated_instruction(vcpu);
+ +              }
+ +              vmcs12 = get_shadow_vmcs12(vcpu);
+ +
+ +      }
+ +
+ +      if (vmcs12_write_any(vmcs12, field, field_value) < 0) {
                 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
                 return kvm_skip_emulated_instruction(vcpu);
         }
   
- -      switch (field) {
+ +      /*
+ +       * Do not track vmcs12 dirty-state if in guest-mode
+ +       * as we actually dirty shadow vmcs12 instead of vmcs12.
+ +       */
+ +      if (!is_guest_mode(vcpu)) {
+ +              switch (field) {
   #define SHADOW_FIELD_RW(x) case x:
   #include "vmx_shadow_fields.h"
- -              /*
- -               * The fields that can be updated by L1 without a vmexit are
- -               * always updated in the vmcs02, the others go down the slow
- -               * path of prepare_vmcs02.
- -               */
- -              break;
- -      default:
- -              vmx->nested.dirty_vmcs12 = true;
- -              break;
+ +                      /*
+ +                       * The fields that can be updated by L1 without a vmexit are
+ +                       * always updated in the vmcs02, the others go down the slow
+ +                       * path of prepare_vmcs02.
+ +                       */
+ +                      break;
+ +              default:
+ +                      vmx->nested.dirty_vmcs12 = true;
+ +                      break;
+ +              }
         }
   
         nested_vmx_succeed(vcpu);
@@@ -8882,9 -8623,7 +8882,9 @@@ static int handle_vmptrld(struct kvm_vc
                         return kvm_skip_emulated_instruction(vcpu);
                 }
                 new_vmcs12 = kmap(page);
- -              if (new_vmcs12->revision_id != VMCS12_REVISION) {
+ +              if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
+ +                  (new_vmcs12->hdr.shadow_vmcs &&
+ +                   !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
                         kunmap(page);
                         kvm_release_page_clean(page);
                         nested_vmx_failValid(vcpu,
@@@ -9082,105 -8821,6 +9082,105 @@@ static int handle_invvpid(struct kvm_vc
         return kvm_skip_emulated_instruction(vcpu);
   }
   
+ +static int handle_invpcid(struct kvm_vcpu *vcpu)
+ +{
+ +      u32 vmx_instruction_info;
+ +      unsigned long type;
+ +      bool pcid_enabled;
+ +      gva_t gva;
+ +      struct x86_exception e;
+ +      unsigned i;
+ +      unsigned long roots_to_free = 0;
+ +      struct {
+ +              u64 pcid;
+ +              u64 gla;
+ +      } operand;
+ +
+ +      if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
+ +              kvm_queue_exception(vcpu, UD_VECTOR);
+ +              return 1;
+ +      }
+ +
+ +      vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ +      type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
+ +
+ +      if (type > 3) {
+ +              kvm_inject_gp(vcpu, 0);
+ +              return 1;
+ +      }
+ +
+ +      /* According to the Intel instruction reference, the memory operand
+ +       * is read even if it isn't needed (e.g., for type==all)
+ +       */
+ +      if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+ +                              vmx_instruction_info, false, &gva))
+ +              return 1;
+ +
+ +      if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
+ +              kvm_inject_page_fault(vcpu, &e);
+ +              return 1;
+ +      }
+ +
+ +      if (operand.pcid >> 12 != 0) {
+ +              kvm_inject_gp(vcpu, 0);
+ +              return 1;
+ +      }
+ +
+ +      pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
+ +
+ +      switch (type) {
+ +      case INVPCID_TYPE_INDIV_ADDR:
+ +              if ((!pcid_enabled && (operand.pcid != 0)) ||
+ +                  is_noncanonical_address(operand.gla, vcpu)) {
+ +                      kvm_inject_gp(vcpu, 0);
+ +                      return 1;
+ +              }
+ +              kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
+ +              return kvm_skip_emulated_instruction(vcpu);
+ +
+ +      case INVPCID_TYPE_SINGLE_CTXT:
+ +              if (!pcid_enabled && (operand.pcid != 0)) {
+ +                      kvm_inject_gp(vcpu, 0);
+ +                      return 1;
+ +              }
+ +
+ +              if (kvm_get_active_pcid(vcpu) == operand.pcid) {
+ +                      kvm_mmu_sync_roots(vcpu);
+ +                      kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ +              }
+ +
+ +              for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+ +                      if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3)
+ +                          == operand.pcid)
+ +                              roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+ +
+ +              kvm_mmu_free_roots(vcpu, roots_to_free);
+ +              /*
+ +               * If neither the current cr3 nor any of the prev_roots use the
+ +               * given PCID, then nothing needs to be done here because a
+ +               * resync will happen anyway before switching to any other CR3.
+ +               */
+ +
+ +              return kvm_skip_emulated_instruction(vcpu);
+ +
+ +      case INVPCID_TYPE_ALL_NON_GLOBAL:
+ +              /*
+ +               * Currently, KVM doesn't mark global entries in the shadow
+ +               * page tables, so a non-global flush just degenerates to a
+ +               * global flush. If needed, we could optimize this later by
+ +               * keeping track of global entries in shadow page tables.
+ +               */
+ +
+ +              /* fall-through */
+ +      case INVPCID_TYPE_ALL_INCL_GLOBAL:
+ +              kvm_mmu_unload(vcpu);
+ +              return kvm_skip_emulated_instruction(vcpu);
+ +
+ +      default:
+ +              BUG(); /* We have already checked above that type <= 3 */
+ +      }
+ +}
+ +
   static int handle_pml_full(struct kvm_vcpu *vcpu)
   {
         unsigned long exit_qualification;
@@@ -9330,17 -8970,6 +9330,17 @@@ fail
         return 1;
   }
   
+ +static int handle_encls(struct kvm_vcpu *vcpu)
+ +{
+ +      /*
+ +       * SGX virtualization is not yet supported.  There is no software
+ +       * enable bit for SGX, so we have to trap ENCLS and inject a #UD
+ +       * to prevent the guest from executing ENCLS.
+ +       */
+ +      kvm_queue_exception(vcpu, UD_VECTOR);
+ +      return 1;
+ +}
+ +
   /*
    * The exit handlers return 1 if the exit was handled fully and guest execution
    * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@@ -9395,10 -9024,8 +9395,10 @@@ static int (*const kvm_vmx_exit_handler
         [EXIT_REASON_XSAVES]                  = handle_xsaves,
         [EXIT_REASON_XRSTORS]                 = handle_xrstors,
         [EXIT_REASON_PML_FULL]                = handle_pml_full,
+ +      [EXIT_REASON_INVPCID]                 = handle_invpcid,
         [EXIT_REASON_VMFUNC]                  = handle_vmfunc,
         [EXIT_REASON_PREEMPTION_TIMER]        = handle_preemption_timer,
+ +      [EXIT_REASON_ENCLS]                   = handle_encls,
   };
   
   static const int kvm_vmx_max_exit_handlers =
@@@ -9569,30 -9196,6 +9569,30 @@@ static bool nested_vmx_exit_handled_cr(
         return false;
   }
   
+ +static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
+ +      struct vmcs12 *vmcs12, gpa_t bitmap)
+ +{
+ +      u32 vmx_instruction_info;
+ +      unsigned long field;
+ +      u8 b;
+ +
+ +      if (!nested_cpu_has_shadow_vmcs(vmcs12))
+ +              return true;
+ +
+ +      /* Decode instruction info and find the field to access */
+ +      vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ +      field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
+ +
+ +      /* Out-of-range fields always cause a VM exit from L2 to L1 */
+ +      if (field >> 15)
+ +              return true;
+ +
+ +      if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
+ +              return true;
+ +
+ +      return 1 & (b >> (field & 7));
+ +}
+ +
   /*
    * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we
    * should handle it ourselves in L0 (and then continue L2). Only call this
@@@ -9677,15 -9280,10 +9677,15 @@@ static bool nested_vmx_exit_reflected(s
                 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
         case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
                 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
+ +      case EXIT_REASON_VMREAD:
+ +              return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
+ +                      vmcs12->vmread_bitmap);
+ +      case EXIT_REASON_VMWRITE:
+ +              return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
+ +                      vmcs12->vmwrite_bitmap);
         case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
         case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
- -      case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
- -      case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
+ +      case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
         case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
         case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
                 /*
@@@ -9769,9 -9367,6 +9769,9 @@@
         case EXIT_REASON_VMFUNC:
                 /* VM functions are emulated through L2->L0 vmexits. */
                 return false;
+ +      case EXIT_REASON_ENCLS:
+ +              /* SGX is never exposed to L1 */
+ +              return false;
         default:
                 return true;
         }
@@@ -10131,9 -9726,6 +10131,6 @@@ static int vmx_handle_exit(struct kvm_v
    * information but as all relevant affected CPUs have 32KiB L1D cache size
    * there is no point in doing so.
    */
- #define L1D_CACHE_ORDER 4
- static void *vmx_l1d_flush_pages;
- 
   static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
   {
         int size = PAGE_SIZE << L1D_CACHE_ORDER;
@@@ -10649,15 -10241,15 +10646,15 @@@ static void __noclone vmx_vcpu_run(stru
                 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
   
         cr3 = __get_current_cr3_fast();
- -      if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
+ +      if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
                 vmcs_writel(HOST_CR3, cr3);
- -              vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
+ +              vmx->loaded_vmcs->host_state.cr3 = cr3;
         }
   
         cr4 = cr4_read_shadow();
- -      if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
+ +      if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
                 vmcs_writel(HOST_CR4, cr4);
- -              vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
+ +              vmx->loaded_vmcs->host_state.cr4 = cr4;
         }
   
         /* When single-stepping over STI and MOV SS, we must clear the
@@@ -10853,9 -10445,9 +10850,9 @@@
          * The sysexit path does not restore ds/es, so we must set them to
          * a reasonable value ourselves.
          *
- -       * We can't defer this to vmx_load_host_state() since that function
- -       * may be executed in interrupt context, which saves and restore segments
- -       * around it, nullifying its effect.
+ +       * We can't defer this to vmx_prepare_switch_to_host() since that
+ +       * function may be executed in interrupt context, which saves and
+ +       * restore segments around it, nullifying its effect.
          */
         loadsegment(ds, __USER_DS);
         loadsegment(es, __USER_DS);
@@@ -10916,8 -10508,8 +10913,8 @@@ static void vmx_switch_vmcs(struct kvm_
                 return;
   
         cpu = get_cpu();
- -      vmx->loaded_vmcs = vmcs;
         vmx_vcpu_put(vcpu);
+ +      vmx->loaded_vmcs = vmcs;
         vmx_vcpu_load(vcpu, cpu);
         put_cpu();
   }
@@@ -11057,8 -10649,6 +11054,8 @@@ free_vcpu
   
   static int vmx_vm_init(struct kvm *kvm)
   {
+ +      spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ +
         if (!ple_gap)
                 kvm->arch.pause_in_guest = true;
   
@@@ -11283,11 -10873,11 +11280,11 @@@ static int nested_ept_init_mmu_context(
         if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
                 return 1;
   
- -      kvm_mmu_unload(vcpu);
         kvm_init_shadow_ept_mmu(vcpu,
                         to_vmx(vcpu)->nested.msrs.ept_caps &
                         VMX_EPT_EXECUTE_ONLY_BIT,
- -                      nested_ept_ad_enabled(vcpu));
+ +                      nested_ept_ad_enabled(vcpu),
+ +                      nested_ept_get_cr3(vcpu));
         vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
         vcpu->arch.mmu.get_cr3           = nested_ept_get_cr3;
         vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
@@@ -11335,9 -10925,9 +11332,9 @@@ static void vmx_inject_page_fault_neste
   static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
                                                  struct vmcs12 *vmcs12);
   
- -static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
- -                                      struct vmcs12 *vmcs12)
+ +static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
   {
+ +      struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         struct page *page;
         u64 hpa;
@@@ -11578,38 -11168,6 +11575,38 @@@ static inline bool nested_vmx_prepare_m
         return true;
   }
   
+ +static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
+ +                                     struct vmcs12 *vmcs12)
+ +{
+ +      struct vmcs12 *shadow;
+ +      struct page *page;
+ +
+ +      if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
+ +          vmcs12->vmcs_link_pointer == -1ull)
+ +              return;
+ +
+ +      shadow = get_shadow_vmcs12(vcpu);
+ +      page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
+ +
+ +      memcpy(shadow, kmap(page), VMCS12_SIZE);
+ +
+ +      kunmap(page);
+ +      kvm_release_page_clean(page);
+ +}
+ +
+ +static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
+ +                                            struct vmcs12 *vmcs12)
+ +{
+ +      struct vcpu_vmx *vmx = to_vmx(vcpu);
+ +
+ +      if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
+ +          vmcs12->vmcs_link_pointer == -1ull)
+ +              return;
+ +
+ +      kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
+ +                      get_shadow_vmcs12(vcpu), VMCS12_SIZE);
+ +}
+ +
   static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
                                           struct vmcs12 *vmcs12)
   {
@@@ -11667,12 -11225,11 +11664,12 @@@ static int nested_vmx_check_msr_switch(
                                        unsigned long count_field,
                                        unsigned long addr_field)
   {
+ +      struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
         int maxphyaddr;
         u64 count, addr;
   
- -      if (vmcs12_read_any(vcpu, count_field, &count) ||
- -          vmcs12_read_any(vcpu, addr_field, &addr)) {
+ +      if (vmcs12_read_any(vmcs12, count_field, &count) ||
+ +          vmcs12_read_any(vmcs12, addr_field, &addr)) {
                 WARN_ON(1);
                 return -EINVAL;
         }
@@@ -11722,19 -11279,6 +11719,19 @@@ static int nested_vmx_check_pml_control
         return 0;
   }
   
+ +static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
+ +                                               struct vmcs12 *vmcs12)
+ +{
+ +      if (!nested_cpu_has_shadow_vmcs(vmcs12))
+ +              return 0;
+ +
+ +      if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) ||
+ +          !page_address_valid(vcpu, vmcs12->vmwrite_bitmap))
+ +              return -EINVAL;
+ +
+ +      return 0;
+ +}
+ +
   static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
                                        struct vmx_msr_entry *e)
   {
@@@ -11884,16 -11428,12 +11881,16 @@@ static int nested_vmx_load_cr3(struct k
                                 return 1;
                         }
                 }
- -
- -              vcpu->arch.cr3 = cr3;
- -              __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
         }
   
- -      kvm_mmu_reset_context(vcpu);
+ +      if (!nested_ept)
+ +              kvm_mmu_new_cr3(vcpu, cr3, false);
+ +
+ +      vcpu->arch.cr3 = cr3;
+ +      __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ +
+ +      kvm_init_mmu(vcpu, false);
+ +
         return 0;
   }
   
@@@ -11980,8 -11520,7 +11977,8 @@@ static void prepare_vmcs02_full(struct 
          * Set host-state according to L0's settings (vmcs12 is irrelevant here)
          * Some constant fields are set here by vmx_set_constant_host_state().
          * Other fields are different per CPU, and will be set later when
- -       * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
+ +       * vmx_vcpu_load() is called, and when vmx_prepare_switch_to_guest()
+ +       * is called.
          */
         vmx_set_constant_host_state(vmx);
   
@@@ -12053,6 -11592,11 +12050,6 @@@ static int prepare_vmcs02(struct kvm_vc
         vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
         vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
   
- -      /*
- -       * Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR, HOST_GS_SELECTOR,
- -       * HOST_FS_BASE, HOST_GS_BASE.
- -       */
- -
         if (vmx->nested.nested_run_pending &&
             (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
                 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
@@@ -12117,9 -11661,6 +12114,9 @@@
                         exec_control |= vmcs12_exec_ctrl;
                 }
   
+ +              /* VMCS shadowing for L2 is emulated for now */
+ +              exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
+ +
                 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
                         vmcs_write16(GUEST_INTR_STATUS,
                                 vmcs12->guest_intr_status);
@@@ -12132,9 -11673,6 +12129,9 @@@
                 if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
                         vmcs_write64(APIC_ACCESS_ADDR, -1ull);
   
+ +              if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
+ +                      vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
+ +
                 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
         }
   
@@@ -12342,9 -11880,6 +12339,9 @@@ static int check_vmentry_prereqs(struc
         if (nested_vmx_check_pml_controls(vcpu, vmcs12))
                 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
   
+ +      if (nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12))
+ +              return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ +
         if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
                                 vmx->nested.msrs.procbased_ctls_low,
                                 vmx->nested.msrs.procbased_ctls_high) ||
@@@ -12445,33 -11980,6 +12442,33 @@@
         return 0;
   }
   
+ +static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
+ +                                        struct vmcs12 *vmcs12)
+ +{
+ +      int r;
+ +      struct page *page;
+ +      struct vmcs12 *shadow;
+ +
+ +      if (vmcs12->vmcs_link_pointer == -1ull)
+ +              return 0;
+ +
+ +      if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
+ +              return -EINVAL;
+ +
+ +      page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
+ +      if (is_error_page(page))
+ +              return -EINVAL;
+ +
+ +      r = 0;
+ +      shadow = kmap(page);
+ +      if (shadow->hdr.revision_id != VMCS12_REVISION ||
+ +          shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
+ +              r = -EINVAL;
+ +      kunmap(page);
+ +      kvm_release_page_clean(page);
+ +      return r;
+ +}
+ +
   static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                                   u32 *exit_qual)
   {
@@@ -12483,7 -11991,8 +12480,7 @@@
             !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
                 return 1;
   
- -      if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS) &&
- -          vmcs12->vmcs_link_pointer != -1ull) {
+ +      if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
                 *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
                 return 1;
         }
@@@ -12530,17 -12039,12 +12527,17 @@@
         return 0;
   }
   
- -static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
+ +/*
+ + * If exit_qual is NULL, this is being called from state restore (either RSM
+ + * or KVM_SET_NESTED_STATE).  Otherwise it's called from vmlaunch/vmresume.
+ + */
+ +static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- -      u32 exit_qual;
- -      int r;
+ +      bool from_vmentry = !!exit_qual;
+ +      u32 dummy_exit_qual;
+ +      int r = 0;
   
         enter_guest_mode(vcpu);
   
@@@ -12554,28 -12058,17 +12551,28 @@@
                 vcpu->arch.tsc_offset += vmcs12->tsc_offset;
   
         r = EXIT_REASON_INVALID_STATE;
- -      if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
+ +      if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? exit_qual : &dummy_exit_qual))
                 goto fail;
   
- -      nested_get_vmcs12_pages(vcpu, vmcs12);
+ +      if (from_vmentry) {
+ +              nested_get_vmcs12_pages(vcpu);
   
- -      r = EXIT_REASON_MSR_LOAD_FAIL;
- -      exit_qual = nested_vmx_load_msr(vcpu,
- -                                      vmcs12->vm_entry_msr_load_addr,
- -                                      vmcs12->vm_entry_msr_load_count);
- -      if (exit_qual)
- -              goto fail;
+ +              r = EXIT_REASON_MSR_LOAD_FAIL;
+ +              *exit_qual = nested_vmx_load_msr(vcpu,
+ +                                               vmcs12->vm_entry_msr_load_addr,
+ +                                               vmcs12->vm_entry_msr_load_count);
+ +              if (*exit_qual)
+ +                      goto fail;
+ +      } else {
+ +              /*
+ +               * The MMU is not initialized to point at the right entities yet and
+ +               * "get pages" would need to read data from the guest (i.e. we will
+ +               * need to perform gpa to hpa translation). Request a call
+ +               * to nested_get_vmcs12_pages before the next VM-entry.  The MSRs
+ +               * have already been set at vmentry time and should not be reset.
+ +               */
+ +              kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
+ +      }
   
         /*
          * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
@@@ -12590,7 -12083,8 +12587,7 @@@ fail
                 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
         leave_guest_mode(vcpu);
         vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- -      nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual);
- -      return 1;
+ +      return r;
   }
   
   /*
@@@ -12613,17 -12107,6 +12610,17 @@@ static int nested_vmx_run(struct kvm_vc
   
         vmcs12 = get_vmcs12(vcpu);
   
+ +      /*
+ +       * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact
+ +       * that there *is* a valid VMCS pointer, RFLAGS.CF is set
+ +       * rather than RFLAGS.ZF, and no error number is stored to the
+ +       * VM-instruction error field.
+ +       */
+ +      if (vmcs12->hdr.shadow_vmcs) {
+ +              nested_vmx_failInvalid(vcpu);
+ +              goto out;
+ +      }
+ +
         if (enable_shadow_vmcs)
                 copy_shadow_to_vmcs12(vmx);
   
@@@ -12678,28 -12161,15 +12675,28 @@@
          */
   
         vmx->nested.nested_run_pending = 1;
- -      ret = enter_vmx_non_root_mode(vcpu);
+ +      ret = enter_vmx_non_root_mode(vcpu, &exit_qual);
         if (ret) {
+ +              nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual);
                 vmx->nested.nested_run_pending = 0;
- -              return ret;
+ +              return 1;
         }
   
         /* Hide L1D cache contents from the nested guest.  */
         vmx->vcpu.arch.l1tf_flush_l1d = true;
   
+ +      /*
+ +       * Must happen outside of enter_vmx_non_root_mode() as it will
+ +       * also be used as part of restoring nVMX state for
+ +       * snapshot restore (migration).
+ +       *
+ +       * In this flow, it is assumed that vmcs12 cache was
+ +       * trasferred as part of captured nVMX state and should
+ +       * therefore not be read from guest memory (which may not
+ +       * exist on destination host yet).
+ +       */
+ +      nested_cache_shadow_vmcs12(vcpu, vmcs12);
+ +
         /*
          * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
          * by event injection, halt vcpu.
@@@ -13209,17 -12679,6 +13206,17 @@@ static void nested_vmx_vmexit(struct kv
                         prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
                                        exit_qualification);
   
+ +              /*
+ +               * Must happen outside of sync_vmcs12() as it will
+ +               * also be used to capture vmcs12 cache as part of
+ +               * capturing nVMX state for snapshot (migration).
+ +               *
+ +               * Otherwise, this flush will dirty guest memory at a
+ +               * point it is already assumed by user-space to be
+ +               * immutable.
+ +               */
+ +              nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
+ +
                 if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
                                          vmcs12->vm_exit_msr_store_count))
                         nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
@@@ -13794,7 -13253,7 +13791,7 @@@ static int vmx_pre_leave_smm(struct kvm
   
         if (vmx->nested.smm.guest_mode) {
                 vcpu->arch.hflags &= ~HF_SMM_MASK;
- -              ret = enter_vmx_non_root_mode(vcpu);
+ +              ret = enter_vmx_non_root_mode(vcpu, NULL);
                 vcpu->arch.hflags |= HF_SMM_MASK;
                 if (ret)
                         return ret;
@@@ -13809,199 -13268,6 +13806,199 @@@ static int enable_smi_window(struct kvm
         return 0;
   }
   
+ +static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
+ +                              struct kvm_nested_state __user *user_kvm_nested_state,
+ +                              u32 user_data_size)
+ +{
+ +      struct vcpu_vmx *vmx;
+ +      struct vmcs12 *vmcs12;
+ +      struct kvm_nested_state kvm_state = {
+ +              .flags = 0,
+ +              .format = 0,
+ +              .size = sizeof(kvm_state),
+ +              .vmx.vmxon_pa = -1ull,
+ +              .vmx.vmcs_pa = -1ull,
+ +      };
+ +
+ +      if (!vcpu)
+ +              return kvm_state.size + 2 * VMCS12_SIZE;
+ +
+ +      vmx = to_vmx(vcpu);
+ +      vmcs12 = get_vmcs12(vcpu);
+ +      if (nested_vmx_allowed(vcpu) &&
+ +          (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
+ +              kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
+ +              kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr;
+ +
+ +              if (vmx->nested.current_vmptr != -1ull) {
+ +                      kvm_state.size += VMCS12_SIZE;
+ +
+ +                      if (is_guest_mode(vcpu) &&
+ +                          nested_cpu_has_shadow_vmcs(vmcs12) &&
+ +                          vmcs12->vmcs_link_pointer != -1ull)
+ +                              kvm_state.size += VMCS12_SIZE;
+ +              }
+ +
+ +              if (vmx->nested.smm.vmxon)
+ +                      kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
+ +
+ +              if (vmx->nested.smm.guest_mode)
+ +                      kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
+ +
+ +              if (is_guest_mode(vcpu)) {
+ +                      kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
+ +
+ +                      if (vmx->nested.nested_run_pending)
+ +                              kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
+ +              }
+ +      }
+ +
+ +      if (user_data_size < kvm_state.size)
+ +              goto out;
+ +
+ +      if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
+ +              return -EFAULT;
+ +
+ +      if (vmx->nested.current_vmptr == -1ull)
+ +              goto out;
+ +
+ +      /*
+ +       * When running L2, the authoritative vmcs12 state is in the
+ +       * vmcs02. When running L1, the authoritative vmcs12 state is
+ +       * in the shadow vmcs linked to vmcs01, unless
+ +       * sync_shadow_vmcs is set, in which case, the authoritative
+ +       * vmcs12 state is in the vmcs12 already.
+ +       */
+ +      if (is_guest_mode(vcpu))
+ +              sync_vmcs12(vcpu, vmcs12);
+ +      else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs)
+ +              copy_shadow_to_vmcs12(vmx);
+ +
+ +      if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12)))
+ +              return -EFAULT;
+ +
+ +      if (nested_cpu_has_shadow_vmcs(vmcs12) &&
+ +          vmcs12->vmcs_link_pointer != -1ull) {
+ +              if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE,
+ +                               get_shadow_vmcs12(vcpu), sizeof(*vmcs12)))
+ +                      return -EFAULT;
+ +      }
+ +
+ +out:
+ +      return kvm_state.size;
+ +}
+ +
+ +static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
+ +                              struct kvm_nested_state __user *user_kvm_nested_state,
+ +                              struct kvm_nested_state *kvm_state)
+ +{
+ +      struct vcpu_vmx *vmx = to_vmx(vcpu);
+ +      struct vmcs12 *vmcs12;
+ +      u32 exit_qual;
+ +      int ret;
+ +
+ +      if (kvm_state->format != 0)
+ +              return -EINVAL;
+ +
+ +      if (!nested_vmx_allowed(vcpu))
+ +              return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
+ +
+ +      if (kvm_state->vmx.vmxon_pa == -1ull) {
+ +              if (kvm_state->vmx.smm.flags)
+ +                      return -EINVAL;
+ +
+ +              if (kvm_state->vmx.vmcs_pa != -1ull)
+ +                      return -EINVAL;
+ +
+ +              vmx_leave_nested(vcpu);
+ +              return 0;
+ +      }
+ +
+ +      if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa))
+ +              return -EINVAL;
+ +
+ +      if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
+ +              return -EINVAL;
+ +
+ +      if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
+ +          !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
+ +              return -EINVAL;
+ +
+ +      if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
+ +          (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
+ +              return -EINVAL;
+ +
+ +      if (kvm_state->vmx.smm.flags &
+ +          ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
+ +              return -EINVAL;
+ +
+ +      if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
+ +          !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
+ +              return -EINVAL;
+ +
+ +      vmx_leave_nested(vcpu);
+ +      if (kvm_state->vmx.vmxon_pa == -1ull)
+ +              return 0;
+ +
+ +      vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa;
+ +      ret = enter_vmx_operation(vcpu);
+ +      if (ret)
+ +              return ret;
+ +
+ +      set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
+ +
+ +      if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
+ +              vmx->nested.smm.vmxon = true;
+ +              vmx->nested.vmxon = false;
+ +
+ +              if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
+ +                      vmx->nested.smm.guest_mode = true;
+ +      }
+ +
+ +      vmcs12 = get_vmcs12(vcpu);
+ +      if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12)))
+ +              return -EFAULT;
+ +
+ +      if (vmcs12->hdr.revision_id != VMCS12_REVISION)
+ +              return -EINVAL;
+ +
+ +      if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
+ +              return 0;
+ +
+ +      vmx->nested.nested_run_pending =
+ +              !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+ +
+ +      if (nested_cpu_has_shadow_vmcs(vmcs12) &&
+ +          vmcs12->vmcs_link_pointer != -1ull) {
+ +              struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
+ +              if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12))
+ +                      return -EINVAL;
+ +
+ +              if (copy_from_user(shadow_vmcs12,
+ +                                 user_kvm_nested_state->data + VMCS12_SIZE,
+ +                                 sizeof(*vmcs12)))
+ +                      return -EFAULT;
+ +
+ +              if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
+ +                  !shadow_vmcs12->hdr.shadow_vmcs)
+ +                      return -EINVAL;
+ +      }
+ +
+ +      if (check_vmentry_prereqs(vcpu, vmcs12) ||
+ +          check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
+ +              return -EINVAL;
+ +
+ +      if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
+ +              vmx->nested.nested_run_pending = 1;
+ +
+ +      vmx->nested.dirty_vmcs12 = true;
+ +      ret = enter_vmx_non_root_mode(vcpu, NULL);
+ +      if (ret)
+ +              return -EINVAL;
+ +
+ +      return 0;
+ +}
+ +
   static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
         .cpu_has_kvm_support = cpu_has_kvm_support,
         .disabled_by_bios = vmx_disabled_by_bios,
@@@ -14021,7 -13287,7 +14018,7 @@@
         .vcpu_free = vmx_free_vcpu,
         .vcpu_reset = vmx_vcpu_reset,
   
- -      .prepare_guest_switch = vmx_save_host_state,
+ +      .prepare_guest_switch = vmx_prepare_switch_to_guest,
         .vcpu_load = vmx_vcpu_load,
         .vcpu_put = vmx_vcpu_put,
   
@@@ -14054,7 -13320,6 +14051,7 @@@
         .set_rflags = vmx_set_rflags,
   
         .tlb_flush = vmx_flush_tlb,
+ +      .tlb_flush_gva = vmx_flush_tlb_gva,
   
         .run = vmx_vcpu_run,
         .handle_exit = vmx_handle_exit,
@@@ -14137,10 -13402,6 +14134,10 @@@
   
         .setup_mce = vmx_setup_mce,
   
+ +      .get_nested_state = vmx_get_nested_state,
+ +      .set_nested_state = vmx_set_nested_state,
+ +      .get_vmcs12_pages = nested_get_vmcs12_pages,
+ +
         .smi_allowed = vmx_smi_allowed,
         .pre_enter_smm = vmx_pre_enter_smm,
         .pre_leave_smm = vmx_pre_leave_smm,
diff --combined arch/x86/mm/init.c

index 5c32a76,63a6f9f..7a8fc26
--- 1/arch/x86/mm/init.c
--- 2/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@@ -99,22 -99,15 +99,22 @@@ __ref void *alloc_low_pages(unsigned in
         }
   
         if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
- -              unsigned long ret;
- -              if (min_pfn_mapped >= max_pfn_mapped)
- -                      panic("alloc_low_pages: ran out of memory");
- -              ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT,
+ +              unsigned long ret = 0;
+ +
+ +              if (min_pfn_mapped < max_pfn_mapped) {
+ +                      ret = memblock_find_in_range(
+ +                                      min_pfn_mapped << PAGE_SHIFT,
                                         max_pfn_mapped << PAGE_SHIFT,
                                         PAGE_SIZE * num , PAGE_SIZE);
+ +              }
+ +              if (ret)
+ +                      memblock_reserve(ret, PAGE_SIZE * num);
+ +              else if (can_use_brk_pgt)
+ +                      ret = __pa(extend_brk(PAGE_SIZE * num, PAGE_SIZE));
+ +
                 if (!ret)
                         panic("alloc_low_pages: can not alloc memory");
- -              memblock_reserve(ret, PAGE_SIZE * num);
+ +
                 pfn = ret >> PAGE_SHIFT;
         } else {
                 pfn = pgt_buf_end;
@@@ -930,7 -923,7 +930,7 @@@ unsigned long max_swapfile_size(void
   
         if (boot_cpu_has_bug(X86_BUG_L1TF)) {
                 /* Limit the swap file size to MAX_PA/2 for L1TF workaround */
-               unsigned long l1tf_limit = l1tf_pfn_limit() + 1;
+               unsigned long long l1tf_limit = l1tf_pfn_limit();
                 /*
                  * We encode swap offsets also with 3 bits below those for pfn
                  * which makes the usable limit higher.
@@@ -938,7 -931,7 +938,7 @@@
   #if CONFIG_PGTABLE_LEVELS > 2
                 l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
   #endif
-               pages = min_t(unsigned long, l1tf_limit, pages);
+               pages = min_t(unsigned long long, l1tf_limit, pages);
         }
         return pages;
   }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 26 Aug 2018 17:13:21 +0000 (10:13 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 26 Aug 2018 17:13:21 +0000 (10:13 -0700)
		1	2
Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/init.c	patch \|	diff1 \|	diff2 \|	blob \| history