KVM: SVM: Add support for CR0 write traps for an SEV-ES guest
[linux-2.6-microblaze.git] / arch / x86 / kvm / svm / svm.c
index 801e0a6..ddcb739 100644 (file)
@@ -35,6 +35,7 @@
 #include <asm/irq_remapping.h>
 #include <asm/spec-ctrl.h>
 #include <asm/cpu_device_id.h>
+#include <asm/traps.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -193,7 +194,7 @@ module_param(sev, int, 0444);
 int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
 module_param(sev_es, int, 0444);
 
-static bool __read_mostly dump_invalid_vmcb = 0;
+bool __read_mostly dump_invalid_vmcb;
 module_param(dump_invalid_vmcb, bool, 0644);
 
 static u8 rsm_ins_bytes[] = "\x0f\xaa";
@@ -339,6 +340,13 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       /*
+        * SEV-ES does not expose the next RIP. The RIP update is controlled by
+        * the type of exit and the #VC handler in the guest.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               goto done;
+
        if (nrips && svm->vmcb->control.next_rip != 0) {
                WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
                svm->next_rip = svm->vmcb->control.next_rip;
@@ -350,6 +358,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
        } else {
                kvm_rip_write(vcpu, svm->next_rip);
        }
+
+done:
        svm_set_interrupt_shadow(vcpu, 0);
 
        return 1;
@@ -1651,9 +1661,18 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
 
 static void update_cr0_intercept(struct vcpu_svm *svm)
 {
-       ulong gcr0 = svm->vcpu.arch.cr0;
-       u64 *hcr0 = &svm->vmcb->save.cr0;
+       ulong gcr0;
+       u64 *hcr0;
+
+       /*
+        * SEV-ES guests must always keep the CR intercepts cleared. CR
+        * tracking is done using the CR write traps.
+        */
+       if (sev_es_guest(svm->vcpu.kvm))
+               return;
 
+       gcr0 = svm->vcpu.arch.cr0;
+       hcr0 = &svm->vmcb->save.cr0;
        *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
                | (gcr0 & SVM_CR0_SELECTIVE_MASK);
 
@@ -1673,7 +1692,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        struct vcpu_svm *svm = to_svm(vcpu);
 
 #ifdef CONFIG_X86_64
-       if (vcpu->arch.efer & EFER_LME) {
+       if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
                if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
                        vcpu->arch.efer |= EFER_LMA;
                        svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
@@ -1786,6 +1805,9 @@ static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
 {
        struct vmcb *vmcb = svm->vmcb;
 
+       if (svm->vcpu.arch.guest_state_protected)
+               return;
+
        if (unlikely(value != vmcb->save.dr6)) {
                vmcb->save.dr6 = value;
                vmcb_mark_dirty(vmcb, VMCB_DR);
@@ -1796,6 +1818,9 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       if (vcpu->arch.guest_state_protected)
+               return;
+
        get_debugreg(vcpu->arch.db[0], 0);
        get_debugreg(vcpu->arch.db[1], 1);
        get_debugreg(vcpu->arch.db[2], 2);
@@ -1814,6 +1839,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       if (vcpu->arch.guest_state_protected)
+               return;
+
        svm->vmcb->save.dr7 = value;
        vmcb_mark_dirty(svm->vmcb, VMCB_DR);
 }
@@ -1982,6 +2010,13 @@ static int shutdown_interception(struct vcpu_svm *svm)
 {
        struct kvm_run *kvm_run = svm->vcpu.run;
 
+       /*
+        * The VM save area has already been encrypted so it
+        * cannot be reinitialized - just terminate.
+        */
+       if (sev_es_guest(svm->vcpu.kvm))
+               return -EINVAL;
+
        /*
         * VMCB is undefined after a SHUTDOWN intercept
         * so reinitialize it.
@@ -2003,11 +2038,16 @@ static int io_interception(struct vcpu_svm *svm)
        ++svm->vcpu.stat.io_exits;
        string = (io_info & SVM_IOIO_STR_MASK) != 0;
        in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
-       if (string)
-               return kvm_emulate_instruction(vcpu, 0);
-
        port = io_info >> 16;
        size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
+
+       if (string) {
+               if (sev_es_guest(vcpu->kvm))
+                       return sev_es_string_io(svm, size, port, in);
+               else
+                       return kvm_emulate_instruction(vcpu, 0);
+       }
+
        svm->next_rip = svm->vmcb->control.exit_info_2;
 
        return kvm_fast_pio(&svm->vcpu, size, port, in);
@@ -2410,6 +2450,31 @@ static int cr_interception(struct vcpu_svm *svm)
        return kvm_complete_insn_gp(&svm->vcpu, err);
 }
 
+static int cr_trap(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       unsigned long old_value, new_value;
+       unsigned int cr;
+
+       new_value = (unsigned long)svm->vmcb->control.exit_info_1;
+
+       cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP;
+       switch (cr) {
+       case 0:
+               old_value = kvm_read_cr0(vcpu);
+               svm_set_cr0(vcpu, new_value);
+
+               kvm_post_set_cr0(vcpu, old_value, new_value);
+               break;
+       default:
+               WARN(1, "unhandled CR%d write trap", cr);
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       return kvm_complete_insn_gp(vcpu, 0);
+}
+
 static int dr_interception(struct vcpu_svm *svm)
 {
        int reg, dr;
@@ -2463,6 +2528,25 @@ static int cr8_write_interception(struct vcpu_svm *svm)
        return 0;
 }
 
+static int efer_trap(struct vcpu_svm *svm)
+{
+       struct msr_data msr_info;
+       int ret;
+
+       /*
+        * Clear the EFER_SVME bit from EFER. The SVM code always sets this
+        * bit in svm_set_efer(), but __kvm_valid_efer() checks it against
+        * whether the guest has X86_FEATURE_SVM - this avoids a failure if
+        * the guest doesn't have X86_FEATURE_SVM.
+        */
+       msr_info.host_initiated = false;
+       msr_info.index = MSR_EFER;
+       msr_info.data = svm->vmcb->control.exit_info_1 & ~EFER_SVME;
+       ret = kvm_set_msr_common(&svm->vcpu, &msr_info);
+
+       return kvm_complete_insn_gp(&svm->vcpu, ret);
+}
+
 static int svm_get_msr_feature(struct kvm_msr_entry *msr)
 {
        msr->data = 0;
@@ -2583,6 +2667,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        return 0;
 }
 
+static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       if (!sev_es_guest(svm->vcpu.kvm) || !err)
+               return kvm_complete_insn_gp(&svm->vcpu, err);
+
+       ghcb_set_sw_exit_info_1(svm->ghcb, 1);
+       ghcb_set_sw_exit_info_2(svm->ghcb,
+                               X86_TRAP_GP |
+                               SVM_EVTINJ_TYPE_EXEPT |
+                               SVM_EVTINJ_VALID);
+       return 1;
+}
+
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
        return kvm_emulate_rdmsr(&svm->vcpu);
@@ -2801,7 +2899,14 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
 static int pause_interception(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
-       bool in_kernel = (svm_get_cpl(vcpu) == 0);
+       bool in_kernel;
+
+       /*
+        * CPL is not made available for an SEV-ES guest, therefore
+        * vcpu->arch.preempted_in_kernel can never be true.  Just
+        * set in_kernel to false as well.
+        */
+       in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0;
 
        if (!kvm_pause_in_guest(vcpu->kvm))
                grow_ple_window(vcpu);
@@ -2916,11 +3021,14 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_MWAIT]                        = mwait_interception,
        [SVM_EXIT_XSETBV]                       = xsetbv_interception,
        [SVM_EXIT_RDPRU]                        = rdpru_interception,
+       [SVM_EXIT_EFER_WRITE_TRAP]              = efer_trap,
+       [SVM_EXIT_CR0_WRITE_TRAP]               = cr_trap,
        [SVM_EXIT_INVPCID]                      = invpcid_interception,
        [SVM_EXIT_NPF]                          = npf_interception,
        [SVM_EXIT_RSM]                          = rsm_interception,
        [SVM_EXIT_AVIC_INCOMPLETE_IPI]          = avic_incomplete_ipi_interception,
        [SVM_EXIT_AVIC_UNACCELERATED_ACCESS]    = avic_unaccelerated_access_interception,
+       [SVM_EXIT_VMGEXIT]                      = sev_handle_vmgexit,
 };
 
 static void dump_vmcb(struct kvm_vcpu *vcpu)
@@ -2962,6 +3070,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
        pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
        pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
        pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
+       pr_err("%-20s%016llx\n", "ghcb:", control->ghcb_gpa);
        pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
        pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
        pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
@@ -3041,6 +3150,43 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
               "excp_to:", save->last_excp_to);
 }
 
+static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+{
+       if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
+           svm_exit_handlers[exit_code])
+               return 0;
+
+       vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
+       dump_vmcb(vcpu);
+       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+       vcpu->run->internal.ndata = 2;
+       vcpu->run->internal.data[0] = exit_code;
+       vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+
+       return -EINVAL;
+}
+
+int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code)
+{
+       if (svm_handle_invalid_exit(&svm->vcpu, exit_code))
+               return 0;
+
+#ifdef CONFIG_RETPOLINE
+       if (exit_code == SVM_EXIT_MSR)
+               return msr_interception(svm);
+       else if (exit_code == SVM_EXIT_VINTR)
+               return interrupt_window_interception(svm);
+       else if (exit_code == SVM_EXIT_INTR)
+               return intr_interception(svm);
+       else if (exit_code == SVM_EXIT_HLT)
+               return halt_interception(svm);
+       else if (exit_code == SVM_EXIT_NPF)
+               return npf_interception(svm);
+#endif
+       return svm_exit_handlers[exit_code](svm);
+}
+
 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
                              u32 *intr_info, u32 *error_code)
 {
@@ -3064,10 +3210,13 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 
        trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
 
-       if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
-               vcpu->arch.cr0 = svm->vmcb->save.cr0;
-       if (npt_enabled)
-               vcpu->arch.cr3 = svm->vmcb->save.cr3;
+       /* SEV-ES guests must use the CR write traps to track CR registers. */
+       if (!sev_es_guest(vcpu->kvm)) {
+               if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
+                       vcpu->arch.cr0 = svm->vmcb->save.cr0;
+               if (npt_enabled)
+                       vcpu->arch.cr3 = svm->vmcb->save.cr3;
+       }
 
        if (is_guest_mode(vcpu)) {
                int vmexit;
@@ -3104,32 +3253,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
        if (exit_fastpath != EXIT_FASTPATH_NONE)
                return 1;
 
-       if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
-           || !svm_exit_handlers[exit_code]) {
-               vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
-               dump_vmcb(vcpu);
-               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-               vcpu->run->internal.suberror =
-                       KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
-               vcpu->run->internal.ndata = 2;
-               vcpu->run->internal.data[0] = exit_code;
-               vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
-               return 0;
-       }
-
-#ifdef CONFIG_RETPOLINE
-       if (exit_code == SVM_EXIT_MSR)
-               return msr_interception(svm);
-       else if (exit_code == SVM_EXIT_VINTR)
-               return interrupt_window_interception(svm);
-       else if (exit_code == SVM_EXIT_INTR)
-               return intr_interception(svm);
-       else if (exit_code == SVM_EXIT_HLT)
-               return halt_interception(svm);
-       else if (exit_code == SVM_EXIT_NPF)
-               return npf_interception(svm);
-#endif
-       return svm_exit_handlers[exit_code](svm);
+       return svm_invoke_exit_handler(svm, exit_code);
 }
 
 static void reload_tss(struct kvm_vcpu *vcpu)
@@ -3179,6 +3303,13 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       /*
+        * SEV-ES guests must always keep the CR intercepts cleared. CR
+        * tracking is done using the CR write traps.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               return;
+
        if (nested_svm_virtualize_tpr(vcpu))
                return;
 
@@ -3250,7 +3381,14 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
        if (!gif_set(svm))
                return true;
 
-       if (is_guest_mode(vcpu)) {
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               /*
+                * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
+                * bit to determine the state of the IF flag.
+                */
+               if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
+                       return true;
+       } else if (is_guest_mode(vcpu)) {
                /* As long as interrupts are being delivered...  */
                if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
                    ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF)
@@ -3432,6 +3570,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
                svm->vcpu.arch.nmi_injected = true;
                break;
        case SVM_EXITINTINFO_TYPE_EXEPT:
+               /*
+                * Never re-inject a #VC exception.
+                */
+               if (vector == X86_TRAP_VC)
+                       break;
+
                /*
                 * In case of software exceptions, do not reinject the vector,
                 * but re-execute the instruction instead. Rewind RIP first
@@ -4085,6 +4229,12 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i
        bool smep, smap, is_user;
        unsigned long cr4;
 
+       /*
+        * When the guest is an SEV-ES guest, emulation is not possible.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               return false;
+
        /*
         * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
         *
@@ -4306,6 +4456,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .apic_init_signal_blocked = svm_apic_init_signal_blocked,
 
        .msr_filter_changed = svm_msr_filter_changed,
+       .complete_emulated_msr = svm_complete_emulated_msr,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {