KVM: SVM: Add support for CR0 write traps for an SEV-ES guest
[linux-2.6-microblaze.git] / arch / x86 / kvm / svm / svm.c
index 9709c98..ddcb739 100644 (file)
@@ -33,9 +33,9 @@
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
 #include <asm/irq_remapping.h>
-#include <asm/mce.h>
 #include <asm/spec-ctrl.h>
 #include <asm/cpu_device_id.h>
+#include <asm/traps.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -91,7 +91,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio);
 static const struct svm_direct_access_msrs {
        u32 index;   /* Index of the MSR */
        bool always; /* True if intercept is always on */
-} direct_access_msrs[] = {
+} direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = {
        { .index = MSR_STAR,                            .always = true  },
        { .index = MSR_IA32_SYSENTER_CS,                .always = true  },
 #ifdef CONFIG_X86_64
@@ -187,10 +187,14 @@ static int vgif = true;
 module_param(vgif, int, 0444);
 
 /* enable/disable SEV support */
-static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
 module_param(sev, int, 0444);
 
-static bool __read_mostly dump_invalid_vmcb = 0;
+/* enable/disable SEV-ES support */
+int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+module_param(sev_es, int, 0444);
+
+bool __read_mostly dump_invalid_vmcb;
 module_param(dump_invalid_vmcb, bool, 0644);
 
 static u8 rsm_ins_bytes[] = "\x0f\xaa";
@@ -263,9 +267,10 @@ static int get_max_npt_level(void)
 #endif
 }
 
-void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
+       u64 old_efer = vcpu->arch.efer;
        vcpu->arch.efer = efer;
 
        if (!npt_enabled) {
@@ -276,13 +281,32 @@ void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
                        efer &= ~EFER_LME;
        }
 
-       if (!(efer & EFER_SVME)) {
-               svm_leave_nested(svm);
-               svm_set_gif(svm, true);
+       if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
+               if (!(efer & EFER_SVME)) {
+                       svm_leave_nested(svm);
+                       svm_set_gif(svm, true);
+
+                       /*
+                        * Free the nested guest state, unless we are in SMM.
+                        * In this case we will return to the nested guest
+                        * as soon as we leave SMM.
+                        */
+                       if (!is_smm(&svm->vcpu))
+                               svm_free_nested(svm);
+
+               } else {
+                       int ret = svm_allocate_nested(svm);
+
+                       if (ret) {
+                               vcpu->arch.efer = old_efer;
+                               return ret;
+                       }
+               }
        }
 
        svm->vmcb->save.efer = efer | EFER_SVME;
        vmcb_mark_dirty(svm->vmcb, VMCB_CR);
+       return 0;
 }
 
 static int is_external_interrupt(u32 info)
@@ -316,6 +340,13 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       /*
+        * SEV-ES does not expose the next RIP. The RIP update is controlled by
+        * the type of exit and the #VC handler in the guest.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               goto done;
+
        if (nrips && svm->vmcb->control.next_rip != 0) {
                WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
                svm->next_rip = svm->vmcb->control.next_rip;
@@ -327,6 +358,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
        } else {
                kvm_rip_write(vcpu, svm->next_rip);
        }
+
+done:
        svm_set_interrupt_shadow(vcpu, 0);
 
        return 1;
@@ -553,18 +586,44 @@ free_cpu_data:
 
 }
 
-static bool valid_msr_intercept(u32 index)
+static int direct_access_msr_slot(u32 msr)
 {
-       int i;
+       u32 i;
 
        for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
-               if (direct_access_msrs[i].index == index)
-                       return true;
+               if (direct_access_msrs[i].index == msr)
+                       return i;
 
-       return false;
+       return -ENOENT;
+}
+
+static void set_shadow_msr_intercept(struct kvm_vcpu *vcpu, u32 msr, int read,
+                                    int write)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       int slot = direct_access_msr_slot(msr);
+
+       if (slot == -ENOENT)
+               return;
+
+       /* Set the shadow bitmaps to the desired intercept states */
+       if (read)
+               set_bit(slot, svm->shadow_msr_intercept.read);
+       else
+               clear_bit(slot, svm->shadow_msr_intercept.read);
+
+       if (write)
+               set_bit(slot, svm->shadow_msr_intercept.write);
+       else
+               clear_bit(slot, svm->shadow_msr_intercept.write);
+}
+
+static bool valid_msr_intercept(u32 index)
+{
+       return direct_access_msr_slot(index) != -ENOENT;
 }
 
-static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
 {
        u8 bit_write;
        unsigned long tmp;
@@ -583,8 +642,8 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
        return !!test_bit(bit_write,  &tmp);
 }
 
-static void set_msr_interception(u32 *msrpm, unsigned msr,
-                                int read, int write)
+static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
+                                       u32 msr, int read, int write)
 {
        u8 bit_read, bit_write;
        unsigned long tmp;
@@ -596,6 +655,13 @@ static void set_msr_interception(u32 *msrpm, unsigned msr,
         */
        WARN_ON(!valid_msr_intercept(msr));
 
+       /* Enforce non allowed MSRs to trap */
+       if (read && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ))
+               read = 0;
+
+       if (write && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE))
+               write = 0;
+
        offset    = svm_msrpm_offset(msr);
        bit_read  = 2 * (msr & 0x0f);
        bit_write = 2 * (msr & 0x0f) + 1;
@@ -609,17 +675,60 @@ static void set_msr_interception(u32 *msrpm, unsigned msr,
        msrpm[offset] = tmp;
 }
 
-static void svm_vcpu_init_msrpm(u32 *msrpm)
+static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
+                                int read, int write)
 {
-       int i;
+       set_shadow_msr_intercept(vcpu, msr, read, write);
+       set_msr_interception_bitmap(vcpu, msrpm, msr, read, write);
+}
 
+u32 *svm_vcpu_alloc_msrpm(void)
+{
+       struct page *pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
+       u32 *msrpm;
+
+       if (!pages)
+               return NULL;
+
+       msrpm = page_address(pages);
        memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
 
+       return msrpm;
+}
+
+void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
+{
+       int i;
+
        for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
                if (!direct_access_msrs[i].always)
                        continue;
+               set_msr_interception(vcpu, msrpm, direct_access_msrs[i].index, 1, 1);
+       }
+}
+
+
+void svm_vcpu_free_msrpm(u32 *msrpm)
+{
+       __free_pages(virt_to_page(msrpm), MSRPM_ALLOC_ORDER);
+}
 
-               set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
+static void svm_msr_filter_changed(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       u32 i;
+
+       /*
+        * Set intercept permissions for all direct access MSRs again. They
+        * will automatically get filtered through the MSR filter, so we are
+        * back in sync after this.
+        */
+       for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
+               u32 msr = direct_access_msrs[i].index;
+               u32 read = test_bit(i, svm->shadow_msr_intercept.read);
+               u32 write = test_bit(i, svm->shadow_msr_intercept.write);
+
+               set_msr_interception_bitmap(vcpu, svm->msrpm, msr, read, write);
        }
 }
 
@@ -666,26 +775,26 @@ static void init_msrpm_offsets(void)
        }
 }
 
-static void svm_enable_lbrv(struct vcpu_svm *svm)
+static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
 {
-       u32 *msrpm = svm->msrpm;
+       struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
-       set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
-       set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
-       set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
-       set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
 }
 
-static void svm_disable_lbrv(struct vcpu_svm *svm)
+static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
 {
-       u32 *msrpm = svm->msrpm;
+       struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
-       set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
-       set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
-       set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
-       set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
 }
 
 void disable_nmi_singlestep(struct vcpu_svm *svm)
@@ -813,6 +922,9 @@ static __init void svm_set_cpu_caps(void)
        if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
            boot_cpu_has(X86_FEATURE_AMD_SSBD))
                kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
+
+       /* Enable INVPCID feature */
+       kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID);
 }
 
 static __init int svm_hardware_setup(void)
@@ -860,15 +972,11 @@ static __init int svm_hardware_setup(void)
                kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
        }
 
-       if (sev) {
-               if (boot_cpu_has(X86_FEATURE_SEV) &&
-                   IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
-                       r = sev_hardware_setup();
-                       if (r)
-                               sev = false;
-               } else {
-                       sev = false;
-               }
+       if (IS_ENABLED(CONFIG_KVM_AMD_SEV) && sev) {
+               sev_hardware_setup();
+       } else {
+               sev = false;
+               sev_es = false;
        }
 
        svm_adjust_mmio_mask();
@@ -985,6 +1093,21 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
        return svm->vmcb->control.tsc_offset;
 }
 
+static void svm_check_invpcid(struct vcpu_svm *svm)
+{
+       /*
+        * Intercept INVPCID instruction only if shadow page table is
+        * enabled. Interception is not required with nested page table
+        * enabled.
+        */
+       if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) {
+               if (!npt_enabled)
+                       svm_set_intercept(svm, INTERCEPT_INVPCID);
+               else
+                       svm_clr_intercept(svm, INTERCEPT_INVPCID);
+       }
+}
+
 static void init_vmcb(struct vcpu_svm *svm)
 {
        struct vmcb_control_area *control = &svm->vmcb->control;
@@ -992,14 +1115,14 @@ static void init_vmcb(struct vcpu_svm *svm)
 
        svm->vcpu.arch.hflags = 0;
 
-       set_cr_intercept(svm, INTERCEPT_CR0_READ);
-       set_cr_intercept(svm, INTERCEPT_CR3_READ);
-       set_cr_intercept(svm, INTERCEPT_CR4_READ);
-       set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
-       set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
-       set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
+       svm_set_intercept(svm, INTERCEPT_CR0_READ);
+       svm_set_intercept(svm, INTERCEPT_CR3_READ);
+       svm_set_intercept(svm, INTERCEPT_CR4_READ);
+       svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
+       svm_set_intercept(svm, INTERCEPT_CR3_WRITE);
+       svm_set_intercept(svm, INTERCEPT_CR4_WRITE);
        if (!kvm_vcpu_apicv_active(&svm->vcpu))
-               set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+               svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
 
        set_dr_intercepts(svm);
 
@@ -1094,15 +1217,16 @@ static void init_vmcb(struct vcpu_svm *svm)
                control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
                svm_clr_intercept(svm, INTERCEPT_INVLPG);
                clr_exception_intercept(svm, PF_VECTOR);
-               clr_cr_intercept(svm, INTERCEPT_CR3_READ);
-               clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
+               svm_clr_intercept(svm, INTERCEPT_CR3_READ);
+               svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
                save->g_pat = svm->vcpu.arch.pat;
                save->cr3 = 0;
                save->cr4 = 0;
        }
        svm->asid_generation = 0;
+       svm->asid = 0;
 
-       svm->nested.vmcb = 0;
+       svm->nested.vmcb12_gpa = 0;
        svm->vcpu.arch.hflags = 0;
 
        if (!kvm_pause_in_guest(svm->vcpu.kvm)) {
@@ -1114,6 +1238,8 @@ static void init_vmcb(struct vcpu_svm *svm)
                svm_clr_intercept(svm, INTERCEPT_PAUSE);
        }
 
+       svm_check_invpcid(svm);
+
        if (kvm_vcpu_apicv_active(&svm->vcpu))
                avic_init_vmcb(svm);
 
@@ -1171,35 +1297,31 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm;
-       struct page *page;
-       struct page *msrpm_pages;
-       struct page *hsave_page;
-       struct page *nested_msrpm_pages;
+       struct page *vmcb_page;
+       struct page *vmsa_page = NULL;
        int err;
 
        BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
        svm = to_svm(vcpu);
 
        err = -ENOMEM;
-       page = alloc_page(GFP_KERNEL_ACCOUNT);
-       if (!page)
+       vmcb_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+       if (!vmcb_page)
                goto out;
 
-       msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
-       if (!msrpm_pages)
-               goto free_page1;
-
-       nested_msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
-       if (!nested_msrpm_pages)
-               goto free_page2;
-
-       hsave_page = alloc_page(GFP_KERNEL_ACCOUNT);
-       if (!hsave_page)
-               goto free_page3;
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               /*
+                * SEV-ES guests require a separate VMSA page used to contain
+                * the encrypted register state of the guest.
+                */
+               vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+               if (!vmsa_page)
+                       goto error_free_vmcb_page;
+       }
 
        err = avic_init_vcpu(svm);
        if (err)
-               goto free_page4;
+               goto error_free_vmsa_page;
 
        /* We initialize this flag to true to make sure that the is_running
         * bit would be set the first time the vcpu is loaded.
@@ -1207,18 +1329,18 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm))
                svm->avic_is_running = true;
 
-       svm->nested.hsave = page_address(hsave_page);
-       clear_page(svm->nested.hsave);
+       svm->msrpm = svm_vcpu_alloc_msrpm();
+       if (!svm->msrpm)
+               goto error_free_vmsa_page;
 
-       svm->msrpm = page_address(msrpm_pages);
-       svm_vcpu_init_msrpm(svm->msrpm);
+       svm_vcpu_init_msrpm(vcpu, svm->msrpm);
 
-       svm->nested.msrpm = page_address(nested_msrpm_pages);
-       svm_vcpu_init_msrpm(svm->nested.msrpm);
+       svm->vmcb = page_address(vmcb_page);
+       svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT);
+
+       if (vmsa_page)
+               svm->vmsa = page_address(vmsa_page);
 
-       svm->vmcb = page_address(page);
-       clear_page(svm->vmcb);
-       svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
        svm->asid_generation = 0;
        init_vmcb(svm);
 
@@ -1227,14 +1349,11 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 
        return 0;
 
-free_page4:
-       __free_page(hsave_page);
-free_page3:
-       __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
-free_page2:
-       __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
-free_page1:
-       __free_page(page);
+error_free_vmsa_page:
+       if (vmsa_page)
+               __free_page(vmsa_page);
+error_free_vmcb_page:
+       __free_page(vmcb_page);
 out:
        return err;
 }
@@ -1258,10 +1377,12 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
         */
        svm_clear_current_vmcb(svm->vmcb);
 
+       svm_free_nested(svm);
+
+       sev_free_vcpu(vcpu);
+
        __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
        __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
-       __free_page(virt_to_page(svm->nested.hsave));
-       __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
 }
 
 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1540,20 +1661,29 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
 
 static void update_cr0_intercept(struct vcpu_svm *svm)
 {
-       ulong gcr0 = svm->vcpu.arch.cr0;
-       u64 *hcr0 = &svm->vmcb->save.cr0;
+       ulong gcr0;
+       u64 *hcr0;
 
+       /*
+        * SEV-ES guests must always keep the CR intercepts cleared. CR
+        * tracking is done using the CR write traps.
+        */
+       if (sev_es_guest(svm->vcpu.kvm))
+               return;
+
+       gcr0 = svm->vcpu.arch.cr0;
+       hcr0 = &svm->vmcb->save.cr0;
        *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
                | (gcr0 & SVM_CR0_SELECTIVE_MASK);
 
        vmcb_mark_dirty(svm->vmcb, VMCB_CR);
 
        if (gcr0 == *hcr0) {
-               clr_cr_intercept(svm, INTERCEPT_CR0_READ);
-               clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
+               svm_clr_intercept(svm, INTERCEPT_CR0_READ);
+               svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
        } else {
-               set_cr_intercept(svm, INTERCEPT_CR0_READ);
-               set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
+               svm_set_intercept(svm, INTERCEPT_CR0_READ);
+               svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
        }
 }
 
@@ -1562,7 +1692,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        struct vcpu_svm *svm = to_svm(vcpu);
 
 #ifdef CONFIG_X86_64
-       if (vcpu->arch.efer & EFER_LME) {
+       if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
                if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
                        vcpu->arch.efer |= EFER_LMA;
                        svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
@@ -1591,13 +1721,15 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        update_cr0_intercept(svm);
 }
 
-int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+static bool svm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-       unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
-       unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
+       return true;
+}
 
-       if (cr4 & X86_CR4_VMXE)
-               return 1;
+void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+       unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
+       unsigned long old_cr4 = vcpu->arch.cr4;
 
        if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
                svm_flush_tlb(vcpu);
@@ -1608,7 +1740,9 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        cr4 |= host_cr4_mce;
        to_svm(vcpu)->vmcb->save.cr4 = cr4;
        vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
-       return 0;
+
+       if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+               kvm_update_cpuid_runtime(vcpu);
 }
 
 static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -1660,18 +1794,20 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
                ++sd->asid_generation;
                sd->next_asid = sd->min_asid;
                svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
+               vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
        }
 
        svm->asid_generation = sd->asid_generation;
-       svm->vmcb->control.asid = sd->next_asid++;
-
-       vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
+       svm->asid = sd->next_asid++;
 }
 
 static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
 {
        struct vmcb *vmcb = svm->vmcb;
 
+       if (svm->vcpu.arch.guest_state_protected)
+               return;
+
        if (unlikely(value != vmcb->save.dr6)) {
                vmcb->save.dr6 = value;
                vmcb_mark_dirty(vmcb, VMCB_DR);
@@ -1682,6 +1818,9 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       if (vcpu->arch.guest_state_protected)
+               return;
+
        get_debugreg(vcpu->arch.db[0], 0);
        get_debugreg(vcpu->arch.db[1], 1);
        get_debugreg(vcpu->arch.db[2], 2);
@@ -1700,6 +1839,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       if (vcpu->arch.guest_state_protected)
+               return;
+
        svm->vmcb->save.dr7 = value;
        vmcb_mark_dirty(svm->vmcb, VMCB_DR);
 }
@@ -1838,25 +1980,6 @@ static bool is_erratum_383(void)
        return true;
 }
 
-/*
- * Trigger machine check on the host. We assume all the MSRs are already set up
- * by the CPU and that we still run on the same CPU as the MCE occurred on.
- * We pass a fake environment to the machine check handler because we want
- * the guest to be always treated like user space, no matter what context
- * it used internally.
- */
-static void kvm_machine_check(void)
-{
-#if defined(CONFIG_X86_MCE)
-       struct pt_regs regs = {
-               .cs = 3, /* Fake ring 3 no matter what the guest ran on */
-               .flags = X86_EFLAGS_IF,
-       };
-
-       do_machine_check(&regs);
-#endif
-}
-
 static void svm_handle_mce(struct vcpu_svm *svm)
 {
        if (is_erratum_383()) {
@@ -1887,6 +2010,13 @@ static int shutdown_interception(struct vcpu_svm *svm)
 {
        struct kvm_run *kvm_run = svm->vcpu.run;
 
+       /*
+        * The VM save area has already been encrypted so it
+        * cannot be reinitialized - just terminate.
+        */
+       if (sev_es_guest(svm->vcpu.kvm))
+               return -EINVAL;
+
        /*
         * VMCB is undefined after a SHUTDOWN intercept
         * so reinitialize it.
@@ -1908,11 +2038,16 @@ static int io_interception(struct vcpu_svm *svm)
        ++svm->vcpu.stat.io_exits;
        string = (io_info & SVM_IOIO_STR_MASK) != 0;
        in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
-       if (string)
-               return kvm_emulate_instruction(vcpu, 0);
-
        port = io_info >> 16;
        size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
+
+       if (string) {
+               if (sev_es_guest(vcpu->kvm))
+                       return sev_es_string_io(svm, size, port, in);
+               else
+                       return kvm_emulate_instruction(vcpu, 0);
+       }
+
        svm->next_rip = svm->vmcb->control.exit_info_2;
 
        return kvm_fast_pio(&svm->vcpu, size, port, in);
@@ -2224,12 +2359,9 @@ static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
 {
        unsigned long cr0 = svm->vcpu.arch.cr0;
        bool ret = false;
-       u64 intercept;
-
-       intercept = svm->nested.ctl.intercept;
 
        if (!is_guest_mode(&svm->vcpu) ||
-           (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
+           (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
                return false;
 
        cr0 &= ~SVM_CR0_SELECTIVE_MASK;
@@ -2267,6 +2399,7 @@ static int cr_interception(struct vcpu_svm *svm)
        if (cr >= 16) { /* mov to cr */
                cr -= 16;
                val = kvm_register_read(&svm->vcpu, reg);
+               trace_kvm_cr_write(cr, val);
                switch (cr) {
                case 0:
                        if (!check_selective_cr0_intercepted(svm, val))
@@ -2312,10 +2445,36 @@ static int cr_interception(struct vcpu_svm *svm)
                        return 1;
                }
                kvm_register_write(&svm->vcpu, reg, val);
+               trace_kvm_cr_read(cr, val);
        }
        return kvm_complete_insn_gp(&svm->vcpu, err);
 }
 
+static int cr_trap(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       unsigned long old_value, new_value;
+       unsigned int cr;
+
+       new_value = (unsigned long)svm->vmcb->control.exit_info_1;
+
+       cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP;
+       switch (cr) {
+       case 0:
+               old_value = kvm_read_cr0(vcpu);
+               svm_set_cr0(vcpu, new_value);
+
+               kvm_post_set_cr0(vcpu, old_value, new_value);
+               break;
+       default:
+               WARN(1, "unhandled CR%d write trap", cr);
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       return kvm_complete_insn_gp(vcpu, 0);
+}
+
 static int dr_interception(struct vcpu_svm *svm)
 {
        int reg, dr;
@@ -2369,6 +2528,25 @@ static int cr8_write_interception(struct vcpu_svm *svm)
        return 0;
 }
 
+static int efer_trap(struct vcpu_svm *svm)
+{
+       struct msr_data msr_info;
+       int ret;
+
+       /*
+        * Clear the EFER_SVME bit from EFER. The SVM code always sets this
+        * bit in svm_set_efer(), but __kvm_valid_efer() checks it against
+        * whether the guest has X86_FEATURE_SVM - this avoids a failure if
+        * the guest doesn't have X86_FEATURE_SVM.
+        */
+       msr_info.host_initiated = false;
+       msr_info.index = MSR_EFER;
+       msr_info.data = svm->vmcb->control.exit_info_1 & ~EFER_SVME;
+       ret = kvm_set_msr_common(&svm->vcpu, &msr_info);
+
+       return kvm_complete_insn_gp(&svm->vcpu, ret);
+}
+
 static int svm_get_msr_feature(struct kvm_msr_entry *msr)
 {
        msr->data = 0;
@@ -2451,10 +2629,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+                   !guest_has_spec_ctrl_msr(vcpu))
                        return 1;
 
                msr_info->data = svm->spec_ctrl;
@@ -2492,6 +2667,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        return 0;
 }
 
+static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       if (!sev_es_guest(svm->vcpu.kvm) || !err)
+               return kvm_complete_insn_gp(&svm->vcpu, err);
+
+       ghcb_set_sw_exit_info_1(svm->ghcb, 1);
+       ghcb_set_sw_exit_info_2(svm->ghcb,
+                               X86_TRAP_GP |
+                               SVM_EVTINJ_TYPE_EXEPT |
+                               SVM_EVTINJ_VALID);
+       return 1;
+}
+
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
        return kvm_emulate_rdmsr(&svm->vcpu);
@@ -2538,10 +2727,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+                   !guest_has_spec_ctrl_msr(vcpu))
                        return 1;
 
                if (kvm_spec_ctrl_test_value(data))
@@ -2562,22 +2748,22 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                 * We update the L1 MSR bit as well since it will end up
                 * touching the MSR anyway now.
                 */
-               set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
+               set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
                break;
        case MSR_IA32_PRED_CMD:
                if (!msr->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
+                   !guest_has_pred_cmd_msr(vcpu))
                        return 1;
 
                if (data & ~PRED_CMD_IBPB)
                        return 1;
-               if (!boot_cpu_has(X86_FEATURE_AMD_IBPB))
+               if (!boot_cpu_has(X86_FEATURE_IBPB))
                        return 1;
                if (!data)
                        break;
 
                wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
-               set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
+               set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
                break;
        case MSR_AMD64_VIRT_SPEC_CTRL:
                if (!msr->host_initiated &&
@@ -2641,9 +2827,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                svm->vmcb->save.dbgctl = data;
                vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
                if (data & (1ULL<<0))
-                       svm_enable_lbrv(svm);
+                       svm_enable_lbrv(vcpu);
                else
-                       svm_disable_lbrv(svm);
+                       svm_disable_lbrv(vcpu);
                break;
        case MSR_VM_HSAVE_PA:
                svm->nested.hsave_msr = data;
@@ -2713,7 +2899,14 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
 static int pause_interception(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
-       bool in_kernel = (svm_get_cpl(vcpu) == 0);
+       bool in_kernel;
+
+       /*
+        * CPL is not made available for an SEV-ES guest, therefore
+        * vcpu->arch.preempted_in_kernel can never be true.  Just
+        * set in_kernel to false as well.
+        */
+       in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0;
 
        if (!kvm_pause_in_guest(vcpu->kvm))
                grow_ple_window(vcpu);
@@ -2739,6 +2932,33 @@ static int mwait_interception(struct vcpu_svm *svm)
        return nop_interception(svm);
 }
 
+static int invpcid_interception(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       unsigned long type;
+       gva_t gva;
+
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       /*
+        * For an INVPCID intercept:
+        * EXITINFO1 provides the linear address of the memory operand.
+        * EXITINFO2 provides the contents of the register operand.
+        */
+       type = svm->vmcb->control.exit_info_2;
+       gva = svm->vmcb->control.exit_info_1;
+
+       if (type > 3) {
+               kvm_inject_gp(vcpu, 0);
+               return 1;
+       }
+
+       return kvm_handle_invpcid(vcpu, type, gva);
+}
+
 static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_READ_CR0]                     = cr_interception,
        [SVM_EXIT_READ_CR3]                     = cr_interception,
@@ -2801,10 +3021,14 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_MWAIT]                        = mwait_interception,
        [SVM_EXIT_XSETBV]                       = xsetbv_interception,
        [SVM_EXIT_RDPRU]                        = rdpru_interception,
+       [SVM_EXIT_EFER_WRITE_TRAP]              = efer_trap,
+       [SVM_EXIT_CR0_WRITE_TRAP]               = cr_trap,
+       [SVM_EXIT_INVPCID]                      = invpcid_interception,
        [SVM_EXIT_NPF]                          = npf_interception,
        [SVM_EXIT_RSM]                          = rsm_interception,
        [SVM_EXIT_AVIC_INCOMPLETE_IPI]          = avic_incomplete_ipi_interception,
        [SVM_EXIT_AVIC_UNACCELERATED_ACCESS]    = avic_unaccelerated_access_interception,
+       [SVM_EXIT_VMGEXIT]                      = sev_handle_vmgexit,
 };
 
 static void dump_vmcb(struct kvm_vcpu *vcpu)
@@ -2819,12 +3043,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
        }
 
        pr_err("VMCB Control Area:\n");
-       pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
-       pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
-       pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
-       pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
-       pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
-       pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
+       pr_err("%-20s%04x\n", "cr_read:", control->intercepts[INTERCEPT_CR] & 0xffff);
+       pr_err("%-20s%04x\n", "cr_write:", control->intercepts[INTERCEPT_CR] >> 16);
+       pr_err("%-20s%04x\n", "dr_read:", control->intercepts[INTERCEPT_DR] & 0xffff);
+       pr_err("%-20s%04x\n", "dr_write:", control->intercepts[INTERCEPT_DR] >> 16);
+       pr_err("%-20s%08x\n", "exceptions:", control->intercepts[INTERCEPT_EXCEPTION]);
+       pr_err("%-20s%08x %08x\n", "intercepts:",
+              control->intercepts[INTERCEPT_WORD3],
+              control->intercepts[INTERCEPT_WORD4]);
        pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
        pr_err("%-20s%d\n", "pause filter threshold:",
               control->pause_filter_thresh);
@@ -2844,6 +3070,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
        pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
        pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
        pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
+       pr_err("%-20s%016llx\n", "ghcb:", control->ghcb_gpa);
        pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
        pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
        pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
@@ -2923,12 +3150,56 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
               "excp_to:", save->last_excp_to);
 }
 
-static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
+static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+{
+       if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
+           svm_exit_handlers[exit_code])
+               return 0;
+
+       vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
+       dump_vmcb(vcpu);
+       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+       vcpu->run->internal.ndata = 2;
+       vcpu->run->internal.data[0] = exit_code;
+       vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+
+       return -EINVAL;
+}
+
+int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code)
+{
+       if (svm_handle_invalid_exit(&svm->vcpu, exit_code))
+               return 0;
+
+#ifdef CONFIG_RETPOLINE
+       if (exit_code == SVM_EXIT_MSR)
+               return msr_interception(svm);
+       else if (exit_code == SVM_EXIT_VINTR)
+               return interrupt_window_interception(svm);
+       else if (exit_code == SVM_EXIT_INTR)
+               return intr_interception(svm);
+       else if (exit_code == SVM_EXIT_HLT)
+               return halt_interception(svm);
+       else if (exit_code == SVM_EXIT_NPF)
+               return npf_interception(svm);
+#endif
+       return svm_exit_handlers[exit_code](svm);
+}
+
+static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
+                             u32 *intr_info, u32 *error_code)
 {
        struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
 
        *info1 = control->exit_info_1;
        *info2 = control->exit_info_2;
+       *intr_info = control->exit_int_info;
+       if ((*intr_info & SVM_EXITINTINFO_VALID) &&
+           (*intr_info & SVM_EXITINTINFO_VALID_ERR))
+               *error_code = control->exit_int_info_err;
+       else
+               *error_code = 0;
 }
 
 static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
@@ -2939,20 +3210,18 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 
        trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
 
-       if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
-               vcpu->arch.cr0 = svm->vmcb->save.cr0;
-       if (npt_enabled)
-               vcpu->arch.cr3 = svm->vmcb->save.cr3;
+       /* SEV-ES guests must use the CR write traps to track CR registers. */
+       if (!sev_es_guest(vcpu->kvm)) {
+               if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
+                       vcpu->arch.cr0 = svm->vmcb->save.cr0;
+               if (npt_enabled)
+                       vcpu->arch.cr3 = svm->vmcb->save.cr3;
+       }
 
        if (is_guest_mode(vcpu)) {
                int vmexit;
 
-               trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
-                                       svm->vmcb->control.exit_info_1,
-                                       svm->vmcb->control.exit_info_2,
-                                       svm->vmcb->control.exit_int_info,
-                                       svm->vmcb->control.exit_int_info_err,
-                                       KVM_ISA_SVM);
+               trace_kvm_nested_vmexit(exit_code, vcpu, KVM_ISA_SVM);
 
                vmexit = nested_svm_exit_special(svm);
 
@@ -2984,32 +3253,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
        if (exit_fastpath != EXIT_FASTPATH_NONE)
                return 1;
 
-       if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
-           || !svm_exit_handlers[exit_code]) {
-               vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
-               dump_vmcb(vcpu);
-               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-               vcpu->run->internal.suberror =
-                       KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
-               vcpu->run->internal.ndata = 2;
-               vcpu->run->internal.data[0] = exit_code;
-               vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
-               return 0;
-       }
-
-#ifdef CONFIG_RETPOLINE
-       if (exit_code == SVM_EXIT_MSR)
-               return msr_interception(svm);
-       else if (exit_code == SVM_EXIT_VINTR)
-               return interrupt_window_interception(svm);
-       else if (exit_code == SVM_EXIT_INTR)
-               return intr_interception(svm);
-       else if (exit_code == SVM_EXIT_HLT)
-               return halt_interception(svm);
-       else if (exit_code == SVM_EXIT_NPF)
-               return npf_interception(svm);
-#endif
-       return svm_exit_handlers[exit_code](svm);
+       return svm_invoke_exit_handler(svm, exit_code);
 }
 
 static void reload_tss(struct kvm_vcpu *vcpu)
@@ -3059,16 +3303,23 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       /*
+        * SEV-ES guests must always keep the CR intercepts cleared. CR
+        * tracking is done using the CR write traps.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               return;
+
        if (nested_svm_virtualize_tpr(vcpu))
                return;
 
-       clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+       svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
 
        if (irr == -1)
                return;
 
        if (tpr >= irr)
-               set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+               svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
 }
 
 bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
@@ -3130,7 +3381,14 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
        if (!gif_set(svm))
                return true;
 
-       if (is_guest_mode(vcpu)) {
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               /*
+                * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
+                * bit to determine the state of the IF flag.
+                */
+               if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
+                       return true;
+       } else if (is_guest_mode(vcpu)) {
                /* As long as interrupts are being delivered...  */
                if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
                    ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF)
@@ -3256,7 +3514,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
        if (nested_svm_virtualize_tpr(vcpu))
                return;
 
-       if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
+       if (!svm_is_intercept(svm, INTERCEPT_CR8_WRITE)) {
                int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
                kvm_set_cr8(vcpu, cr8);
        }
@@ -3312,6 +3570,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
                svm->vcpu.arch.nmi_injected = true;
                break;
        case SVM_EXITINTINFO_TYPE_EXEPT:
+               /*
+                * Never re-inject a #VC exception.
+                */
+               if (vector == X86_TRAP_VC)
+                       break;
+
                /*
                 * In case of software exceptions, do not reinject the vector,
                 * but re-execute the instruction instead. Rewind RIP first
@@ -3353,8 +3617,7 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
 
 static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
 {
-       if (!is_guest_mode(vcpu) &&
-           to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+       if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
            to_svm(vcpu)->vmcb->control.exit_info_1)
                return handle_fastpath_set_msr_irqoff(vcpu);
 
@@ -3419,7 +3682,6 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 
 static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 {
-       fastpath_t exit_fastpath;
        struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
@@ -3446,6 +3708,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 
        sync_lapic_to_cr8(vcpu);
 
+       if (unlikely(svm->asid != svm->vmcb->control.asid)) {
+               svm->vmcb->control.asid = svm->asid;
+               vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
+       }
        svm->vmcb->save.cr2 = vcpu->arch.cr2;
 
        /*
@@ -3460,9 +3726,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
        clgi();
        kvm_load_guest_xsave_state(vcpu);
 
-       if (lapic_in_kernel(vcpu) &&
-               vcpu->arch.apic->lapic_timer.timer_advance_ns)
-               kvm_wait_lapic_expire(vcpu);
+       kvm_wait_lapic_expire(vcpu);
 
        /*
         * If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -3542,8 +3806,11 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
                svm_handle_mce(svm);
 
        svm_complete_interrupts(svm);
-       exit_fastpath = svm_exit_handlers_fastpath(vcpu);
-       return exit_fastpath;
+
+       if (is_guest_mode(vcpu))
+               return EXIT_FASTPATH_NONE;
+
+       return svm_exit_handlers_fastpath(vcpu);
 }
 
 static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root,
@@ -3620,6 +3887,7 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
+       struct kvm_cpuid_entry2 *best;
 
        vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
                                    boot_cpu_has(X86_FEATURE_XSAVE) &&
@@ -3629,6 +3897,16 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
        svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
                             guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
 
+       /* Check again if INVPCID interception if required */
+       svm_check_invpcid(svm);
+
+       /* For sev guests, the memory encryption bit is not reserved in CR3.  */
+       if (sev_guest(vcpu->kvm)) {
+               best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0);
+               if (best)
+                       vcpu->arch.cr3_lm_rsvd_bits &= ~(1UL << (best->ebx & 0x3f));
+       }
+
        if (!kvm_vcpu_apicv_active(vcpu))
                return;
 
@@ -3743,7 +4021,6 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
                break;
        case SVM_EXIT_WRITE_CR0: {
                unsigned long cr0, val;
-               u64 intercept;
 
                if (info->intercept == x86_intercept_cr_write)
                        icpt_info.exit_code += info->modrm_reg;
@@ -3752,9 +4029,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
                    info->intercept == x86_intercept_clts)
                        break;
 
-               intercept = svm->nested.ctl.intercept;
-
-               if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
+               if (!(vmcb_is_intercept(&svm->nested.ctl,
+                                       INTERCEPT_SELECTIVE_CR0)))
                        break;
 
                cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
@@ -3889,7 +4165,7 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
                /* FED8h - SVM Guest */
                put_smstate(u64, smstate, 0x7ed8, 1);
                /* FEE0h - SVM Guest VMCB Physical Address */
-               put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb);
+               put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
 
                svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
                svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
@@ -3911,7 +4187,7 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
        if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
                u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
                u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
-               u64 vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
+               u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
 
                if (guest) {
                        if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
@@ -3921,10 +4197,13 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
                                return 1;
 
                        if (kvm_vcpu_map(&svm->vcpu,
-                                        gpa_to_gfn(vmcb), &map) == -EINVAL)
+                                        gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
                                return 1;
 
-                       ret = enter_svm_guest_mode(svm, vmcb, map.hva);
+                       if (svm_allocate_nested(svm))
+                               return 1;
+
+                       ret = enter_svm_guest_mode(svm, vmcb12_gpa, map.hva);
                        kvm_vcpu_unmap(&svm->vcpu, &map, true);
                }
        }
@@ -3945,19 +4224,16 @@ static void enable_smi_window(struct kvm_vcpu *vcpu)
        }
 }
 
-static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
+static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
 {
-       unsigned long cr4 = kvm_read_cr4(vcpu);
-       bool smep = cr4 & X86_CR4_SMEP;
-       bool smap = cr4 & X86_CR4_SMAP;
-       bool is_user = svm_get_cpl(vcpu) == 3;
+       bool smep, smap, is_user;
+       unsigned long cr4;
 
        /*
-        * If RIP is invalid, go ahead with emulation which will cause an
-        * internal error exit.
+        * When the guest is an SEV-ES guest, emulation is not possible.
         */
-       if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
-               return true;
+       if (sev_es_guest(vcpu->kvm))
+               return false;
 
        /*
         * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
@@ -3999,6 +4275,20 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
         * instruction pointer so we will not able to workaround it. Lets
         * print the error and request to kill the guest.
         */
+       if (likely(!insn || insn_len))
+               return true;
+
+       /*
+        * If RIP is invalid, go ahead with emulation which will cause an
+        * internal error exit.
+        */
+       if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
+               return true;
+
+       cr4 = kvm_read_cr4(vcpu);
+       smep = cr4 & X86_CR4_SMEP;
+       smap = cr4 & X86_CR4_SMAP;
+       is_user = svm_get_cpl(vcpu) == 3;
        if (smap && (!smep || is_user)) {
                if (!sev_guest(vcpu->kvm))
                        return true;
@@ -4022,7 +4312,7 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
         * if an INIT signal is pending.
         */
        return !gif_set(svm) ||
-                  (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
+                  (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
 }
 
 static void svm_vm_destroy(struct kvm *kvm)
@@ -4077,6 +4367,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .get_cpl = svm_get_cpl,
        .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
        .set_cr0 = svm_set_cr0,
+       .is_valid_cr4 = svm_is_valid_cr4,
        .set_cr4 = svm_set_cr4,
        .set_efer = svm_set_efer,
        .get_idt = svm_get_idt,
@@ -4160,9 +4451,12 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .mem_enc_reg_region = svm_register_enc_region,
        .mem_enc_unreg_region = svm_unregister_enc_region,
 
-       .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
+       .can_emulate_instruction = svm_can_emulate_instruction,
 
        .apic_init_signal_blocked = svm_apic_init_signal_blocked,
+
+       .msr_filter_changed = svm_msr_filter_changed,
+       .complete_emulated_msr = svm_complete_emulated_msr,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {