KVM: VMX: Improve error reporting during invalid guest state emulation
[linux-2.6-microblaze.git] / arch / x86 / kvm / vmx.c
index 32eb588..c61eb34 100644 (file)
@@ -71,6 +71,9 @@ static bool __read_mostly enable_unrestricted_guest = 1;
 module_param_named(unrestricted_guest,
                        enable_unrestricted_guest, bool, S_IRUGO);
 
+static bool __read_mostly enable_ept_ad_bits = 1;
+module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
+
 static bool __read_mostly emulate_invalid_guest_state = 0;
 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 
@@ -615,6 +618,10 @@ static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
+static void vmx_set_segment(struct kvm_vcpu *vcpu,
+                           struct kvm_segment *var, int seg);
+static void vmx_get_segment(struct kvm_vcpu *vcpu,
+                           struct kvm_segment *var, int seg);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -789,6 +796,11 @@ static inline bool cpu_has_vmx_ept_4levels(void)
        return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
 }
 
+static inline bool cpu_has_vmx_ept_ad_bits(void)
+{
+       return vmx_capability.ept & VMX_EPT_AD_BIT;
+}
+
 static inline bool cpu_has_vmx_invept_individual_addr(void)
 {
        return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
@@ -2645,8 +2657,12 @@ static __init int hardware_setup(void)
            !cpu_has_vmx_ept_4levels()) {
                enable_ept = 0;
                enable_unrestricted_guest = 0;
+               enable_ept_ad_bits = 0;
        }
 
+       if (!cpu_has_vmx_ept_ad_bits())
+               enable_ept_ad_bits = 0;
+
        if (!cpu_has_vmx_unrestricted_guest())
                enable_unrestricted_guest = 0;
 
@@ -2770,6 +2786,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 {
        unsigned long flags;
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       struct kvm_segment var;
 
        if (enable_unrestricted_guest)
                return;
@@ -2813,20 +2830,23 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
        if (emulate_invalid_guest_state)
                goto continue_rmode;
 
-       vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
-       vmcs_write32(GUEST_SS_LIMIT, 0xffff);
-       vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
+       vmx_get_segment(vcpu, &var, VCPU_SREG_SS);
+       vmx_set_segment(vcpu, &var, VCPU_SREG_SS);
+
+       vmx_get_segment(vcpu, &var, VCPU_SREG_CS);
+       vmx_set_segment(vcpu, &var, VCPU_SREG_CS);
+
+       vmx_get_segment(vcpu, &var, VCPU_SREG_ES);
+       vmx_set_segment(vcpu, &var, VCPU_SREG_ES);
 
-       vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
-       vmcs_write32(GUEST_CS_LIMIT, 0xffff);
-       if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
-               vmcs_writel(GUEST_CS_BASE, 0xf0000);
-       vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
+       vmx_get_segment(vcpu, &var, VCPU_SREG_DS);
+       vmx_set_segment(vcpu, &var, VCPU_SREG_DS);
 
-       fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
-       fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
-       fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
-       fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
+       vmx_get_segment(vcpu, &var, VCPU_SREG_GS);
+       vmx_set_segment(vcpu, &var, VCPU_SREG_GS);
+
+       vmx_get_segment(vcpu, &var, VCPU_SREG_FS);
+       vmx_set_segment(vcpu, &var, VCPU_SREG_FS);
 
 continue_rmode:
        kvm_mmu_reset_context(vcpu);
@@ -3027,6 +3047,8 @@ static u64 construct_eptp(unsigned long root_hpa)
        /* TODO write the value reading from MSR */
        eptp = VMX_EPT_DEFAULT_MT |
                VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
+       if (enable_ept_ad_bits)
+               eptp |= VMX_EPT_AD_ENABLE_BIT;
        eptp |= (root_hpa & PAGE_MASK);
 
        return eptp;
@@ -3153,11 +3175,22 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
 
 static int vmx_get_cpl(struct kvm_vcpu *vcpu)
 {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       /*
+        * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations
+        * fail; use the cache instead.
+        */
+       if (unlikely(vmx->emulation_required && emulate_invalid_guest_state)) {
+               return vmx->cpl;
+       }
+
        if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
                __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
-               to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu);
+               vmx->cpl = __vmx_get_cpl(vcpu);
        }
-       return to_vmx(vcpu)->cpl;
+
+       return vmx->cpl;
 }
 
 
@@ -3165,7 +3198,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
 {
        u32 ar;
 
-       if (var->unusable)
+       if (var->unusable || !var->present)
                ar = 1 << 16;
        else {
                ar = var->type & 15;
@@ -3177,8 +3210,6 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
                ar |= (var->db & 1) << 14;
                ar |= (var->g & 1) << 15;
        }
-       if (ar == 0) /* a 0 value means unusable */
-               ar = AR_UNUSABLE_MASK;
 
        return ar;
 }
@@ -3229,6 +3260,44 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 
        vmcs_write32(sf->ar_bytes, ar);
        __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+
+       /*
+        * Fix segments for real mode guest in hosts that don't have
+        * "unrestricted_mode" or it was disabled.
+        * This is done to allow migration of the guests from hosts with
+        * unrestricted guest like Westmere to older host that don't have
+        * unrestricted guest like Nehelem.
+        */
+       if (!enable_unrestricted_guest && vmx->rmode.vm86_active) {
+               switch (seg) {
+               case VCPU_SREG_CS:
+                       vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
+                       vmcs_write32(GUEST_CS_LIMIT, 0xffff);
+                       if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
+                               vmcs_writel(GUEST_CS_BASE, 0xf0000);
+                       vmcs_write16(GUEST_CS_SELECTOR,
+                                    vmcs_readl(GUEST_CS_BASE) >> 4);
+                       break;
+               case VCPU_SREG_ES:
+                       fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
+                       break;
+               case VCPU_SREG_DS:
+                       fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
+                       break;
+               case VCPU_SREG_GS:
+                       fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
+                       break;
+               case VCPU_SREG_FS:
+                       fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
+                       break;
+               case VCPU_SREG_SS:
+                       vmcs_write16(GUEST_SS_SELECTOR,
+                                    vmcs_readl(GUEST_SS_BASE) >> 4);
+                       vmcs_write32(GUEST_SS_LIMIT, 0xffff);
+                       vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
+                       break;
+               }
+       }
 }
 
 static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -4489,7 +4558,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
                break;
        }
        vcpu->run->exit_reason = 0;
-       pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
+       vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
               (int)(exit_qualification >> 4) & 3, cr);
        return 0;
 }
@@ -4908,15 +4977,18 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
        int ret = 1;
        u32 cpu_exec_ctrl;
        bool intr_window_requested;
+       unsigned count = 130;
 
        cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
        intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
 
-       while (!guest_state_valid(vcpu)) {
-               if (intr_window_requested
-                   && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF))
+       while (!guest_state_valid(vcpu) && count-- != 0) {
+               if (intr_window_requested && vmx_interrupt_allowed(vcpu))
                        return handle_interrupt_window(&vmx->vcpu);
 
+               if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
+                       return 1;
+
                err = emulate_instruction(vcpu, 0);
 
                if (err == EMULATE_DO_MMIO) {
@@ -4924,8 +4996,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                        goto out;
                }
 
-               if (err != EMULATE_DONE)
+               if (err != EMULATE_DONE) {
+                       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+                       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+                       vcpu->run->internal.ndata = 0;
                        return 0;
+               }
 
                if (signal_pending(current))
                        goto out;
@@ -4933,7 +5009,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                        schedule();
        }
 
-       vmx->emulation_required = 0;
+       vmx->emulation_required = !guest_state_valid(vcpu);
 out:
        return ret;
 }
@@ -7230,23 +7306,21 @@ static int __init vmx_init(void)
        if (!vmx_io_bitmap_a)
                return -ENOMEM;
 
+       r = -ENOMEM;
+
        vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_io_bitmap_b) {
-               r = -ENOMEM;
+       if (!vmx_io_bitmap_b)
                goto out;
-       }
 
        vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_legacy) {
-               r = -ENOMEM;
+       if (!vmx_msr_bitmap_legacy)
                goto out1;
-       }
+
 
        vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_longmode) {
-               r = -ENOMEM;
+       if (!vmx_msr_bitmap_longmode)
                goto out2;
-       }
+
 
        /*
         * Allow direct access to the PC debug port (it is often used for I/O
@@ -7275,8 +7349,10 @@ static int __init vmx_init(void)
        vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
 
        if (enable_ept) {
-               kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
-                               VMX_EPT_EXECUTABLE_MASK);
+               kvm_mmu_set_mask_ptes(0ull,
+                       (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+                       (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
+                       0ull, VMX_EPT_EXECUTABLE_MASK);
                ept_set_mmio_spte_mask();
                kvm_enable_tdp();
        } else