KVM: nSVM: extract preparation of VMCB for nested run
[linux-2.6-microblaze.git] / arch / x86 / kvm / svm / nested.c
index 90a1ca9..73be7af 100644 (file)
 #include <linux/kernel.h>
 
 #include <asm/msr-index.h>
+#include <asm/debugreg.h>
 
 #include "kvm_emulate.h"
 #include "trace.h"
 #include "mmu.h"
 #include "x86.h"
+#include "lapic.h"
 #include "svm.h"
 
 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
@@ -85,7 +87,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
        vcpu->arch.mmu->get_guest_pgd     = nested_svm_get_tdp_cr3;
        vcpu->arch.mmu->get_pdptr         = nested_svm_get_tdp_pdptr;
        vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
-       vcpu->arch.mmu->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu);
+       vcpu->arch.mmu->shadow_root_level = vcpu->arch.tdp_level;
        reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
        vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
 }
@@ -110,6 +112,8 @@ void recalc_intercepts(struct vcpu_svm *svm)
        h = &svm->nested.hsave->control;
        g = &svm->nested;
 
+       svm->nested.host_intercept_exceptions = h->intercept_exceptions;
+
        c->intercept_cr = h->intercept_cr;
        c->intercept_dr = h->intercept_dr;
        c->intercept_exceptions = h->intercept_exceptions;
@@ -149,7 +153,7 @@ static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb
        dst->iopm_base_pa         = from->iopm_base_pa;
        dst->msrpm_base_pa        = from->msrpm_base_pa;
        dst->tsc_offset           = from->tsc_offset;
-       dst->asid                 = from->asid;
+       /* asid not copied, it is handled manually for svm->vmcb.  */
        dst->tlb_ctl              = from->tlb_ctl;
        dst->int_ctl              = from->int_ctl;
        dst->int_vector           = from->int_vector;
@@ -207,6 +211,10 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
        if ((vmcb->save.efer & EFER_SVME) == 0)
                return false;
 
+       if (((vmcb->save.cr0 & X86_CR0_CD) == 0) &&
+           (vmcb->save.cr0 & X86_CR0_NW))
+               return false;
+
        if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
                return false;
 
@@ -220,22 +228,27 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
        return true;
 }
 
-void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
-                         struct vmcb *nested_vmcb, struct kvm_host_map *map)
+static void load_nested_vmcb_control(struct vcpu_svm *svm,
+                                    struct vmcb_control_area *control)
 {
-       bool evaluate_pending_interrupts =
-               is_intercept(svm, INTERCEPT_VINTR) ||
-               is_intercept(svm, INTERCEPT_IRET);
+       svm->nested.nested_cr3 = control->nested_cr3;
 
-       if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
-               svm->vcpu.arch.hflags |= HF_HIF_MASK;
-       else
-               svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+       svm->nested.vmcb_msrpm = control->msrpm_base_pa & ~0x0fffULL;
+       svm->nested.vmcb_iopm  = control->iopm_base_pa  & ~0x0fffULL;
 
-       if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
-               svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
+       /* cache intercepts */
+       svm->nested.intercept_cr         = control->intercept_cr;
+       svm->nested.intercept_dr         = control->intercept_dr;
+       svm->nested.intercept_exceptions = control->intercept_exceptions;
+       svm->nested.intercept            = control->intercept;
+
+       svm->vcpu.arch.tsc_offset += control->tsc_offset;
+}
+
+static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
+{
+       if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
                nested_svm_init_mmu_context(&svm->vcpu);
-       }
 
        /* Load the nested guest state */
        svm->vmcb->save.es = nested_vmcb->save.es;
@@ -248,11 +261,7 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
        svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
        svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
        svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
-       if (npt_enabled) {
-               svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
-               svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
-       } else
-               (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
+       (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
 
        /* Guest paging mode is active - reset mmu */
        kvm_mmu_reset_context(&svm->vcpu);
@@ -267,28 +276,21 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
        svm->vmcb->save.rsp = nested_vmcb->save.rsp;
        svm->vmcb->save.rip = nested_vmcb->save.rip;
        svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
-       svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
+       svm->vcpu.arch.dr6  = nested_vmcb->save.dr6;
        svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+}
 
-       svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
-       svm->nested.vmcb_iopm  = nested_vmcb->control.iopm_base_pa  & ~0x0fffULL;
-
-       /* cache intercepts */
-       svm->nested.intercept_cr         = nested_vmcb->control.intercept_cr;
-       svm->nested.intercept_dr         = nested_vmcb->control.intercept_dr;
-       svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
-       svm->nested.intercept            = nested_vmcb->control.intercept;
-
-       svm_flush_tlb(&svm->vcpu, true);
-       svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
+static void nested_prepare_vmcb_control(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
+{
+       svm_flush_tlb(&svm->vcpu);
        if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
                svm->vcpu.arch.hflags |= HF_VINTR_MASK;
        else
                svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
 
-       svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
        svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
 
+       svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
        svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
        svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
        svm->vmcb->control.int_state = nested_vmcb->control.int_state;
@@ -300,8 +302,6 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
        svm->vmcb->control.pause_filter_thresh =
                nested_vmcb->control.pause_filter_thresh;
 
-       kvm_vcpu_unmap(&svm->vcpu, map, true);
-
        /* Enter Guest-Mode */
        enter_guest_mode(&svm->vcpu);
 
@@ -311,7 +311,25 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
         */
        recalc_intercepts(svm);
 
+       mark_all_dirty(svm->vmcb);
+}
+
+void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
+                         struct vmcb *nested_vmcb)
+{
+       bool evaluate_pending_interrupts =
+               is_intercept(svm, INTERCEPT_VINTR) ||
+               is_intercept(svm, INTERCEPT_IRET);
+
        svm->nested.vmcb = vmcb_gpa;
+       if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
+               svm->vcpu.arch.hflags |= HF_HIF_MASK;
+       else
+               svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+
+       load_nested_vmcb_control(svm, &nested_vmcb->control);
+       nested_prepare_vmcb_save(svm, nested_vmcb);
+       nested_prepare_vmcb_control(svm, nested_vmcb);
 
        /*
         * If L1 had a pending IRQ/NMI before executing VMRUN,
@@ -328,8 +346,6 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
        enable_gif(svm);
        if (unlikely(evaluate_pending_interrupts))
                kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
-
-       mark_all_dirty(svm->vmcb);
 }
 
 int nested_svm_vmrun(struct vcpu_svm *svm)
@@ -341,8 +357,12 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
        struct kvm_host_map map;
        u64 vmcb_gpa;
 
-       vmcb_gpa = svm->vmcb->save.rax;
+       if (is_smm(&svm->vcpu)) {
+               kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+               return 1;
+       }
 
+       vmcb_gpa = svm->vmcb->save.rax;
        ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
        if (ret == -EINVAL) {
                kvm_inject_gp(&svm->vcpu, 0);
@@ -360,10 +380,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
                nested_vmcb->control.exit_code_hi = 0;
                nested_vmcb->control.exit_info_1  = 0;
                nested_vmcb->control.exit_info_2  = 0;
-
-               kvm_vcpu_unmap(&svm->vcpu, &map, true);
-
-               return ret;
+               goto out;
        }
 
        trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
@@ -405,7 +422,8 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
 
        copy_vmcb_control_area(hsave, vmcb);
 
-       enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
+       svm->nested.nested_run_pending = 1;
+       enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb);
 
        if (!nested_svm_vmrun_msrpm(svm)) {
                svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
@@ -416,6 +434,9 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
                nested_svm_vmexit(svm);
        }
 
+out:
+       kvm_vcpu_unmap(&svm->vcpu, &map, true);
+
        return ret;
 }
 
@@ -463,6 +484,9 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
        leave_guest_mode(&svm->vcpu);
        svm->nested.vmcb = 0;
 
+       /* in case we halted in L2 */
+       svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
        /* Give the current vmcb to the guest */
        disable_gif(svm);
 
@@ -478,11 +502,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
        nested_vmcb->save.cr2    = vmcb->save.cr2;
        nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
        nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
-       nested_vmcb->save.rip    = vmcb->save.rip;
-       nested_vmcb->save.rsp    = vmcb->save.rsp;
-       nested_vmcb->save.rax    = vmcb->save.rax;
+       nested_vmcb->save.rip    = kvm_rip_read(&svm->vcpu);
+       nested_vmcb->save.rsp    = kvm_rsp_read(&svm->vcpu);
+       nested_vmcb->save.rax    = kvm_rax_read(&svm->vcpu);
        nested_vmcb->save.dr7    = vmcb->save.dr7;
-       nested_vmcb->save.dr6    = vmcb->save.dr6;
+       nested_vmcb->save.dr6    = svm->vcpu.arch.dr6;
        nested_vmcb->save.cpl    = vmcb->save.cpl;
 
        nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
@@ -603,31 +627,6 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
        return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
 }
 
-/* DB exceptions for our internal use must not cause vmexit */
-static int nested_svm_intercept_db(struct vcpu_svm *svm)
-{
-       unsigned long dr6;
-
-       /* if we're not singlestepping, it's not ours */
-       if (!svm->nmi_singlestep)
-               return NESTED_EXIT_DONE;
-
-       /* if it's not a singlestep exception, it's not ours */
-       if (kvm_get_dr(&svm->vcpu, 6, &dr6))
-               return NESTED_EXIT_DONE;
-       if (!(dr6 & DR6_BS))
-               return NESTED_EXIT_DONE;
-
-       /* if the guest is singlestepping, it should get the vmexit */
-       if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
-               disable_nmi_singlestep(svm);
-               return NESTED_EXIT_DONE;
-       }
-
-       /* it's ours, the nested hypervisor must not see this one */
-       return NESTED_EXIT_HOST;
-}
-
 static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
 {
        unsigned port, size, iopm_len;
@@ -678,17 +677,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
                break;
        }
        case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
-               u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
-               if (svm->nested.intercept_exceptions & excp_bits) {
-                       if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
-                               vmexit = nested_svm_intercept_db(svm);
-                       else
-                               vmexit = NESTED_EXIT_DONE;
-               }
-               /* async page fault always cause vmexit */
-               else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
-                        svm->vcpu.arch.exception.nested_apf != 0)
-                       vmexit = NESTED_EXIT_DONE;
+               /*
+                * Host-intercepted exceptions have been checked already in
+                * nested_svm_exit_special.  There is nothing to do here,
+                * the vmexit is injected by svm_check_nested_events.
+                */
+               vmexit = NESTED_EXIT_DONE;
                break;
        }
        case SVM_EXIT_ERR: {
@@ -733,62 +727,140 @@ int nested_svm_check_permissions(struct vcpu_svm *svm)
        return 0;
 }
 
-int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
-                              bool has_error_code, u32 error_code)
+static bool nested_exit_on_exception(struct vcpu_svm *svm)
 {
-       int vmexit;
+       unsigned int nr = svm->vcpu.arch.exception.nr;
 
-       if (!is_guest_mode(&svm->vcpu))
-               return 0;
+       return (svm->nested.intercept_exceptions & (1 << nr));
+}
 
-       vmexit = nested_svm_intercept(svm);
-       if (vmexit != NESTED_EXIT_DONE)
-               return 0;
+static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
+{
+       unsigned int nr = svm->vcpu.arch.exception.nr;
 
        svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
        svm->vmcb->control.exit_code_hi = 0;
-       svm->vmcb->control.exit_info_1 = error_code;
+
+       if (svm->vcpu.arch.exception.has_error_code)
+               svm->vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code;
 
        /*
         * EXITINFO2 is undefined for all exception intercepts other
         * than #PF.
         */
-       if (svm->vcpu.arch.exception.nested_apf)
-               svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
-       else if (svm->vcpu.arch.exception.has_payload)
-               svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
-       else
-               svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+       if (nr == PF_VECTOR) {
+               if (svm->vcpu.arch.exception.nested_apf)
+                       svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+               else if (svm->vcpu.arch.exception.has_payload)
+                       svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
+               else
+                       svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+       } else if (nr == DB_VECTOR) {
+               /* See inject_pending_event.  */
+               kvm_deliver_exception_payload(&svm->vcpu);
+               if (svm->vcpu.arch.dr7 & DR7_GD) {
+                       svm->vcpu.arch.dr7 &= ~DR7_GD;
+                       kvm_update_dr7(&svm->vcpu);
+               }
+       } else
+               WARN_ON(svm->vcpu.arch.exception.has_payload);
 
-       svm->nested.exit_required = true;
-       return vmexit;
+       nested_svm_vmexit(svm);
+}
+
+static void nested_svm_smi(struct vcpu_svm *svm)
+{
+       svm->vmcb->control.exit_code = SVM_EXIT_SMI;
+       svm->vmcb->control.exit_info_1 = 0;
+       svm->vmcb->control.exit_info_2 = 0;
+
+       nested_svm_vmexit(svm);
+}
+
+static void nested_svm_nmi(struct vcpu_svm *svm)
+{
+       svm->vmcb->control.exit_code = SVM_EXIT_NMI;
+       svm->vmcb->control.exit_info_1 = 0;
+       svm->vmcb->control.exit_info_2 = 0;
+
+       nested_svm_vmexit(svm);
 }
 
 static void nested_svm_intr(struct vcpu_svm *svm)
 {
+       trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+
        svm->vmcb->control.exit_code   = SVM_EXIT_INTR;
        svm->vmcb->control.exit_info_1 = 0;
        svm->vmcb->control.exit_info_2 = 0;
 
-       /* nested_svm_vmexit this gets called afterwards from handle_exit */
-       svm->nested.exit_required = true;
-       trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+       nested_svm_vmexit(svm);
+}
+
+static inline bool nested_exit_on_init(struct vcpu_svm *svm)
+{
+       return (svm->nested.intercept & (1ULL << INTERCEPT_INIT));
 }
 
-static bool nested_exit_on_intr(struct vcpu_svm *svm)
+static void nested_svm_init(struct vcpu_svm *svm)
 {
-       return (svm->nested.intercept & 1ULL);
+       svm->vmcb->control.exit_code   = SVM_EXIT_INIT;
+       svm->vmcb->control.exit_info_1 = 0;
+       svm->vmcb->control.exit_info_2 = 0;
+
+       nested_svm_vmexit(svm);
 }
 
-int svm_check_nested_events(struct kvm_vcpu *vcpu)
+
+static int svm_check_nested_events(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
        bool block_nested_events =
-               kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required;
+               kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending;
+       struct kvm_lapic *apic = vcpu->arch.apic;
+
+       if (lapic_in_kernel(vcpu) &&
+           test_bit(KVM_APIC_INIT, &apic->pending_events)) {
+               if (block_nested_events)
+                       return -EBUSY;
+               if (!nested_exit_on_init(svm))
+                       return 0;
+               nested_svm_init(svm);
+               return 0;
+       }
+
+       if (vcpu->arch.exception.pending) {
+               if (block_nested_events)
+                        return -EBUSY;
+               if (!nested_exit_on_exception(svm))
+                       return 0;
+               nested_svm_inject_exception_vmexit(svm);
+               return 0;
+       }
+
+       if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
+               if (block_nested_events)
+                       return -EBUSY;
+               if (!nested_exit_on_smi(svm))
+                       return 0;
+               nested_svm_smi(svm);
+               return 0;
+       }
+
+       if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) {
+               if (block_nested_events)
+                       return -EBUSY;
+               if (!nested_exit_on_nmi(svm))
+                       return 0;
+               nested_svm_nmi(svm);
+               return 0;
+       }
 
-       if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) {
+       if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) {
                if (block_nested_events)
                        return -EBUSY;
+               if (!nested_exit_on_intr(svm))
+                       return 0;
                nested_svm_intr(svm);
                return 0;
        }
@@ -803,21 +875,26 @@ int nested_svm_exit_special(struct vcpu_svm *svm)
        switch (exit_code) {
        case SVM_EXIT_INTR:
        case SVM_EXIT_NMI:
-       case SVM_EXIT_EXCP_BASE + MC_VECTOR:
-               return NESTED_EXIT_HOST;
        case SVM_EXIT_NPF:
-               /* For now we are always handling NPFs when using them */
-               if (npt_enabled)
+               return NESTED_EXIT_HOST;
+       case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
+               u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
+
+               if (get_host_vmcb(svm)->control.intercept_exceptions & excp_bits)
                        return NESTED_EXIT_HOST;
-               break;
-       case SVM_EXIT_EXCP_BASE + PF_VECTOR:
-               /* When we're shadowing, trap PFs, but not async PF */
-               if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
+               else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR &&
+                        svm->vcpu.arch.apf.host_apf_reason)
+                       /* Trap async PF even if not shadowing */
                        return NESTED_EXIT_HOST;
                break;
+       }
        default:
                break;
        }
 
        return NESTED_EXIT_CONTINUE;
 }
+
+struct kvm_x86_nested_ops svm_nested_ops = {
+       .check_events = svm_check_nested_events,
+};