X-Git-Url: http://git.monstr.eu/?p=linux-2.6-microblaze.git;a=blobdiff_plain;f=arch%2Fx86%2Fkvm%2Fsvm%2Fsvm.c;h=ddcb7390bb0e74ce9851432d6706e45d82222a7c;hp=544b6e362cf76da8ea957a61d17fc9dd148fb69e;hb=f27ad38aac23263c;hpb=3f1a18b9fa1c294802d2750d1ef6a1221b10b76b diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 544b6e362cf7..ddcb7390bb0e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "trace.h" @@ -186,10 +187,14 @@ static int vgif = true; module_param(vgif, int, 0444); /* enable/disable SEV support */ -static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); +int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); -static bool __read_mostly dump_invalid_vmcb = 0; +/* enable/disable SEV-ES support */ +int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); +module_param(sev_es, int, 0444); + +bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); static u8 rsm_ins_bytes[] = "\x0f\xaa"; @@ -335,6 +340,13 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * SEV-ES does not expose the next RIP. The RIP update is controlled by + * the type of exit and the #VC handler in the guest. + */ + if (sev_es_guest(vcpu->kvm)) + goto done; + if (nrips && svm->vmcb->control.next_rip != 0) { WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; @@ -346,6 +358,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) } else { kvm_rip_write(vcpu, svm->next_rip); } + +done: svm_set_interrupt_shadow(vcpu, 0); return 1; @@ -958,15 +972,11 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); } - if (sev) { - if (boot_cpu_has(X86_FEATURE_SEV) && - IS_ENABLED(CONFIG_KVM_AMD_SEV)) { - r = sev_hardware_setup(); - if (r) - sev = false; - } else { - sev = false; - } + if (IS_ENABLED(CONFIG_KVM_AMD_SEV) && sev) { + sev_hardware_setup(); + } else { + sev = false; + sev_es = false; } svm_adjust_mmio_mask(); @@ -1288,6 +1298,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm; struct page *vmcb_page; + struct page *vmsa_page = NULL; int err; BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0); @@ -1298,9 +1309,19 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) if (!vmcb_page) goto out; + if (sev_es_guest(svm->vcpu.kvm)) { + /* + * SEV-ES guests require a separate VMSA page used to contain + * the encrypted register state of the guest. + */ + vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!vmsa_page) + goto error_free_vmcb_page; + } + err = avic_init_vcpu(svm); if (err) - goto error_free_vmcb_page; + goto error_free_vmsa_page; /* We initialize this flag to true to make sure that the is_running * bit would be set the first time the vcpu is loaded. @@ -1310,12 +1331,16 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->msrpm = svm_vcpu_alloc_msrpm(); if (!svm->msrpm) - goto error_free_vmcb_page; + goto error_free_vmsa_page; svm_vcpu_init_msrpm(vcpu, svm->msrpm); svm->vmcb = page_address(vmcb_page); svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT); + + if (vmsa_page) + svm->vmsa = page_address(vmsa_page); + svm->asid_generation = 0; init_vmcb(svm); @@ -1324,6 +1349,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) return 0; +error_free_vmsa_page: + if (vmsa_page) + __free_page(vmsa_page); error_free_vmcb_page: __free_page(vmcb_page); out: @@ -1351,6 +1379,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) svm_free_nested(svm); + sev_free_vcpu(vcpu); + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); } @@ -1631,9 +1661,18 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) static void update_cr0_intercept(struct vcpu_svm *svm) { - ulong gcr0 = svm->vcpu.arch.cr0; - u64 *hcr0 = &svm->vmcb->save.cr0; + ulong gcr0; + u64 *hcr0; + /* + * SEV-ES guests must always keep the CR intercepts cleared. CR + * tracking is done using the CR write traps. + */ + if (sev_es_guest(svm->vcpu.kvm)) + return; + + gcr0 = svm->vcpu.arch.cr0; + hcr0 = &svm->vmcb->save.cr0; *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | (gcr0 & SVM_CR0_SELECTIVE_MASK); @@ -1653,7 +1692,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) struct vcpu_svm *svm = to_svm(vcpu); #ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME) { + if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { vcpu->arch.efer |= EFER_LMA; svm->vmcb->save.efer |= EFER_LMA | EFER_LME; @@ -1766,6 +1805,9 @@ static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) { struct vmcb *vmcb = svm->vmcb; + if (svm->vcpu.arch.guest_state_protected) + return; + if (unlikely(value != vmcb->save.dr6)) { vmcb->save.dr6 = value; vmcb_mark_dirty(vmcb, VMCB_DR); @@ -1776,6 +1818,9 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + if (vcpu->arch.guest_state_protected) + return; + get_debugreg(vcpu->arch.db[0], 0); get_debugreg(vcpu->arch.db[1], 1); get_debugreg(vcpu->arch.db[2], 2); @@ -1794,6 +1839,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) { struct vcpu_svm *svm = to_svm(vcpu); + if (vcpu->arch.guest_state_protected) + return; + svm->vmcb->save.dr7 = value; vmcb_mark_dirty(svm->vmcb, VMCB_DR); } @@ -1962,6 +2010,13 @@ static int shutdown_interception(struct vcpu_svm *svm) { struct kvm_run *kvm_run = svm->vcpu.run; + /* + * The VM save area has already been encrypted so it + * cannot be reinitialized - just terminate. + */ + if (sev_es_guest(svm->vcpu.kvm)) + return -EINVAL; + /* * VMCB is undefined after a SHUTDOWN intercept * so reinitialize it. @@ -1983,11 +2038,16 @@ static int io_interception(struct vcpu_svm *svm) ++svm->vcpu.stat.io_exits; string = (io_info & SVM_IOIO_STR_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; - if (string) - return kvm_emulate_instruction(vcpu, 0); - port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; + + if (string) { + if (sev_es_guest(vcpu->kvm)) + return sev_es_string_io(svm, size, port, in); + else + return kvm_emulate_instruction(vcpu, 0); + } + svm->next_rip = svm->vmcb->control.exit_info_2; return kvm_fast_pio(&svm->vcpu, size, port, in); @@ -2390,6 +2450,31 @@ static int cr_interception(struct vcpu_svm *svm) return kvm_complete_insn_gp(&svm->vcpu, err); } +static int cr_trap(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + unsigned long old_value, new_value; + unsigned int cr; + + new_value = (unsigned long)svm->vmcb->control.exit_info_1; + + cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP; + switch (cr) { + case 0: + old_value = kvm_read_cr0(vcpu); + svm_set_cr0(vcpu, new_value); + + kvm_post_set_cr0(vcpu, old_value, new_value); + break; + default: + WARN(1, "unhandled CR%d write trap", cr); + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + return kvm_complete_insn_gp(vcpu, 0); +} + static int dr_interception(struct vcpu_svm *svm) { int reg, dr; @@ -2443,6 +2528,25 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } +static int efer_trap(struct vcpu_svm *svm) +{ + struct msr_data msr_info; + int ret; + + /* + * Clear the EFER_SVME bit from EFER. The SVM code always sets this + * bit in svm_set_efer(), but __kvm_valid_efer() checks it against + * whether the guest has X86_FEATURE_SVM - this avoids a failure if + * the guest doesn't have X86_FEATURE_SVM. + */ + msr_info.host_initiated = false; + msr_info.index = MSR_EFER; + msr_info.data = svm->vmcb->control.exit_info_1 & ~EFER_SVME; + ret = kvm_set_msr_common(&svm->vcpu, &msr_info); + + return kvm_complete_insn_gp(&svm->vcpu, ret); +} + static int svm_get_msr_feature(struct kvm_msr_entry *msr) { msr->data = 0; @@ -2563,6 +2667,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 0; } +static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (!sev_es_guest(svm->vcpu.kvm) || !err) + return kvm_complete_insn_gp(&svm->vcpu, err); + + ghcb_set_sw_exit_info_1(svm->ghcb, 1); + ghcb_set_sw_exit_info_2(svm->ghcb, + X86_TRAP_GP | + SVM_EVTINJ_TYPE_EXEPT | + SVM_EVTINJ_VALID); + return 1; +} + static int rdmsr_interception(struct vcpu_svm *svm) { return kvm_emulate_rdmsr(&svm->vcpu); @@ -2781,7 +2899,14 @@ static int interrupt_window_interception(struct vcpu_svm *svm) static int pause_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - bool in_kernel = (svm_get_cpl(vcpu) == 0); + bool in_kernel; + + /* + * CPL is not made available for an SEV-ES guest, therefore + * vcpu->arch.preempted_in_kernel can never be true. Just + * set in_kernel to false as well. + */ + in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0; if (!kvm_pause_in_guest(vcpu->kvm)) grow_ple_window(vcpu); @@ -2896,11 +3021,14 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_RDPRU] = rdpru_interception, + [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap, + [SVM_EXIT_CR0_WRITE_TRAP] = cr_trap, [SVM_EXIT_INVPCID] = invpcid_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, + [SVM_EXIT_VMGEXIT] = sev_handle_vmgexit, }; static void dump_vmcb(struct kvm_vcpu *vcpu) @@ -2942,6 +3070,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar); + pr_err("%-20s%016llx\n", "ghcb:", control->ghcb_gpa); pr_err("%-20s%08x\n", "event_inj:", control->event_inj); pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext); @@ -3021,6 +3150,43 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) "excp_to:", save->last_excp_to); } +static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code) +{ + if (exit_code < ARRAY_SIZE(svm_exit_handlers) && + svm_exit_handlers[exit_code]) + return 0; + + vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code); + dump_vmcb(vcpu); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = exit_code; + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; + + return -EINVAL; +} + +int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code) +{ + if (svm_handle_invalid_exit(&svm->vcpu, exit_code)) + return 0; + +#ifdef CONFIG_RETPOLINE + if (exit_code == SVM_EXIT_MSR) + return msr_interception(svm); + else if (exit_code == SVM_EXIT_VINTR) + return interrupt_window_interception(svm); + else if (exit_code == SVM_EXIT_INTR) + return intr_interception(svm); + else if (exit_code == SVM_EXIT_HLT) + return halt_interception(svm); + else if (exit_code == SVM_EXIT_NPF) + return npf_interception(svm); +#endif + return svm_exit_handlers[exit_code](svm); +} + static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code) { @@ -3044,10 +3210,13 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); - if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE)) - vcpu->arch.cr0 = svm->vmcb->save.cr0; - if (npt_enabled) - vcpu->arch.cr3 = svm->vmcb->save.cr3; + /* SEV-ES guests must use the CR write traps to track CR registers. */ + if (!sev_es_guest(vcpu->kvm)) { + if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE)) + vcpu->arch.cr0 = svm->vmcb->save.cr0; + if (npt_enabled) + vcpu->arch.cr3 = svm->vmcb->save.cr3; + } if (is_guest_mode(vcpu)) { int vmexit; @@ -3084,32 +3253,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; - if (exit_code >= ARRAY_SIZE(svm_exit_handlers) - || !svm_exit_handlers[exit_code]) { - vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code); - dump_vmcb(vcpu); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 2; - vcpu->run->internal.data[0] = exit_code; - vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; - return 0; - } - -#ifdef CONFIG_RETPOLINE - if (exit_code == SVM_EXIT_MSR) - return msr_interception(svm); - else if (exit_code == SVM_EXIT_VINTR) - return interrupt_window_interception(svm); - else if (exit_code == SVM_EXIT_INTR) - return intr_interception(svm); - else if (exit_code == SVM_EXIT_HLT) - return halt_interception(svm); - else if (exit_code == SVM_EXIT_NPF) - return npf_interception(svm); -#endif - return svm_exit_handlers[exit_code](svm); + return svm_invoke_exit_handler(svm, exit_code); } static void reload_tss(struct kvm_vcpu *vcpu) @@ -3159,6 +3303,13 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * SEV-ES guests must always keep the CR intercepts cleared. CR + * tracking is done using the CR write traps. + */ + if (sev_es_guest(vcpu->kvm)) + return; + if (nested_svm_virtualize_tpr(vcpu)) return; @@ -3230,7 +3381,14 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (!gif_set(svm)) return true; - if (is_guest_mode(vcpu)) { + if (sev_es_guest(svm->vcpu.kvm)) { + /* + * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask + * bit to determine the state of the IF flag. + */ + if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK)) + return true; + } else if (is_guest_mode(vcpu)) { /* As long as interrupts are being delivered... */ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF) @@ -3412,6 +3570,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) svm->vcpu.arch.nmi_injected = true; break; case SVM_EXITINTINFO_TYPE_EXEPT: + /* + * Never re-inject a #VC exception. + */ + if (vector == X86_TRAP_VC) + break; + /* * In case of software exceptions, do not reinject the vector, * but re-execute the instruction instead. Rewind RIP first @@ -4065,6 +4229,12 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i bool smep, smap, is_user; unsigned long cr4; + /* + * When the guest is an SEV-ES guest, emulation is not possible. + */ + if (sev_es_guest(vcpu->kvm)) + return false; + /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * @@ -4286,6 +4456,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .apic_init_signal_blocked = svm_apic_init_signal_blocked, .msr_filter_changed = svm_msr_filter_changed, + .complete_emulated_msr = svm_complete_emulated_msr, }; static struct kvm_x86_init_ops svm_init_ops __initdata = {