KVM: VMX: Add a helper and macros to reduce boilerplate for sec exec ctls
[linux-2.6-microblaze.git] / arch / x86 / kvm / vmx / vmx.c
index f002d34..b0ba1cc 100644 (file)
@@ -146,9 +146,6 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
        RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
        RTIT_STATUS_BYTECNT))
 
-#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \
-       (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f)
-
 /*
  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
  * ple_gap:    upper bound on the amount of time between two successive
@@ -1037,6 +1034,12 @@ static inline bool pt_can_write_msr(struct vcpu_vmx *vmx)
               !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
 }
 
+static inline bool pt_output_base_valid(struct kvm_vcpu *vcpu, u64 base)
+{
+       /* The base must be 128-byte aligned and a legal physical address. */
+       return !kvm_vcpu_is_illegal_gpa(vcpu, base) && !(base & 0x7f);
+}
+
 static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
 {
        u32 i;
@@ -2172,7 +2175,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                    !intel_pt_validate_cap(vmx->pt_desc.caps,
                                           PT_CAP_single_range_output))
                        return 1;
-               if (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK)
+               if (!pt_output_base_valid(vcpu, data))
                        return 1;
                vmx->pt_desc.guest.output_base = data;
                break;
@@ -2427,7 +2430,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
                        SECONDARY_EXEC_UNRESTRICTED_GUEST |
                        SECONDARY_EXEC_PAUSE_LOOP_EXITING |
                        SECONDARY_EXEC_DESC |
-                       SECONDARY_EXEC_RDTSCP |
+                       SECONDARY_EXEC_ENABLE_RDTSCP |
                        SECONDARY_EXEC_ENABLE_INVPCID |
                        SECONDARY_EXEC_APIC_REGISTER_VIRT |
                        SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
@@ -4078,6 +4081,61 @@ u32 vmx_exec_control(struct vcpu_vmx *vmx)
        return exec_control;
 }
 
+/*
+ * Adjust a single secondary execution control bit to intercept/allow an
+ * instruction in the guest.  This is usually done based on whether or not a
+ * feature has been exposed to the guest in order to correctly emulate faults.
+ */
+static inline void
+vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
+                                 u32 control, bool enabled, bool exiting)
+{
+       /*
+        * If the control is for an opt-in feature, clear the control if the
+        * feature is not exposed to the guest, i.e. not enabled.  If the
+        * control is opt-out, i.e. an exiting control, clear the control if
+        * the feature _is_ exposed to the guest, i.e. exiting/interception is
+        * disabled for the associated instruction.  Note, the caller is
+        * responsible presetting exec_control to set all supported bits.
+        */
+       if (enabled == exiting)
+               *exec_control &= ~control;
+
+       /*
+        * Update the nested MSR settings so that a nested VMM can/can't set
+        * controls for features that are/aren't exposed to the guest.
+        */
+       if (nested) {
+               if (enabled)
+                       vmx->nested.msrs.secondary_ctls_high |= control;
+               else
+                       vmx->nested.msrs.secondary_ctls_high &= ~control;
+       }
+}
+
+/*
+ * Wrapper macro for the common case of adjusting a secondary execution control
+ * based on a single guest CPUID bit, with a dedicated feature bit.  This also
+ * verifies that the control is actually supported by KVM and hardware.
+ */
+#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \
+({                                                                      \
+       bool __enabled;                                                  \
+                                                                        \
+       if (cpu_has_vmx_##name()) {                                      \
+               __enabled = guest_cpuid_has(&(vmx)->vcpu,                \
+                                           X86_FEATURE_##feat_name);    \
+               vmx_adjust_secondary_exec_control(vmx, exec_control,     \
+                       SECONDARY_EXEC_##ctrl_name, __enabled, exiting); \
+       }                                                                \
+})
+
+/* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */
+#define vmx_adjust_sec_exec_feature(vmx, exec_control, lname, uname) \
+       vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, ENABLE_##uname, false)
+
+#define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) \
+       vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, uname##_EXITING, true)
 
 static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 {
@@ -4118,7 +4176,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
        if (!enable_pml)
                exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
-       if (vmx_xsaves_supported()) {
+       if (cpu_has_vmx_xsaves()) {
                /* Exposing XSAVES only when XSAVE is exposed */
                bool xsaves_enabled =
                        boot_cpu_has(X86_FEATURE_XSAVE) &&
@@ -4127,101 +4185,29 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
                vcpu->arch.xsaves_enabled = xsaves_enabled;
 
-               if (!xsaves_enabled)
-                       exec_control &= ~SECONDARY_EXEC_XSAVES;
-
-               if (nested) {
-                       if (xsaves_enabled)
-                               vmx->nested.msrs.secondary_ctls_high |=
-                                       SECONDARY_EXEC_XSAVES;
-                       else
-                               vmx->nested.msrs.secondary_ctls_high &=
-                                       ~SECONDARY_EXEC_XSAVES;
-               }
-       }
-
-       if (cpu_has_vmx_rdtscp()) {
-               bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
-               if (!rdtscp_enabled)
-                       exec_control &= ~SECONDARY_EXEC_RDTSCP;
-
-               if (nested) {
-                       if (rdtscp_enabled)
-                               vmx->nested.msrs.secondary_ctls_high |=
-                                       SECONDARY_EXEC_RDTSCP;
-                       else
-                               vmx->nested.msrs.secondary_ctls_high &=
-                                       ~SECONDARY_EXEC_RDTSCP;
-               }
-       }
-
-       if (cpu_has_vmx_invpcid()) {
-               /* Exposing INVPCID only when PCID is exposed */
-               bool invpcid_enabled =
-                       guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_PCID);
-
-               if (!invpcid_enabled) {
-                       exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
-                       guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
-               }
-
-               if (nested) {
-                       if (invpcid_enabled)
-                               vmx->nested.msrs.secondary_ctls_high |=
-                                       SECONDARY_EXEC_ENABLE_INVPCID;
-                       else
-                               vmx->nested.msrs.secondary_ctls_high &=
-                                       ~SECONDARY_EXEC_ENABLE_INVPCID;
-               }
-       }
-
-       if (vmx_rdrand_supported()) {
-               bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
-               if (rdrand_enabled)
-                       exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
-
-               if (nested) {
-                       if (rdrand_enabled)
-                               vmx->nested.msrs.secondary_ctls_high |=
-                                       SECONDARY_EXEC_RDRAND_EXITING;
-                       else
-                               vmx->nested.msrs.secondary_ctls_high &=
-                                       ~SECONDARY_EXEC_RDRAND_EXITING;
-               }
+               vmx_adjust_secondary_exec_control(vmx, &exec_control,
+                                                 SECONDARY_EXEC_XSAVES,
+                                                 xsaves_enabled, false);
        }
 
-       if (vmx_rdseed_supported()) {
-               bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
-               if (rdseed_enabled)
-                       exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
+       vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP);
 
-               if (nested) {
-                       if (rdseed_enabled)
-                               vmx->nested.msrs.secondary_ctls_high |=
-                                       SECONDARY_EXEC_RDSEED_EXITING;
-                       else
-                               vmx->nested.msrs.secondary_ctls_high &=
-                                       ~SECONDARY_EXEC_RDSEED_EXITING;
-               }
-       }
+       /*
+        * Expose INVPCID if and only if PCID is also exposed to the guest.
+        * INVPCID takes a #UD when it's disabled in the VMCS, but a #GP or #PF
+        * if CR4.PCIDE=0.  Enumerating CPUID.INVPCID=1 would lead to incorrect
+        * behavior from the guest perspective (it would expect #GP or #PF).
+        */
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
+               guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
+       vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
 
-       if (vmx_waitpkg_supported()) {
-               bool waitpkg_enabled =
-                       guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG);
 
-               if (!waitpkg_enabled)
-                       exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+       vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
+       vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdseed, RDSEED);
 
-               if (nested) {
-                       if (waitpkg_enabled)
-                               vmx->nested.msrs.secondary_ctls_high |=
-                                       SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
-                       else
-                               vmx->nested.msrs.secondary_ctls_high &=
-                                       ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
-               }
-       }
+       vmx_adjust_sec_exec_control(vmx, &exec_control, waitpkg, WAITPKG,
+                                   ENABLE_USR_WAIT_PAUSE, false);
 
        vmx->secondary_exec_control = exec_control;
 }
@@ -4314,7 +4300,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
        if (vmx->vpid != 0)
                vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
 
-       if (vmx_xsaves_supported())
+       if (cpu_has_vmx_xsaves())
                vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
 
        if (enable_pml) {
@@ -5127,7 +5113,8 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
 
 static int handle_invd(struct kvm_vcpu *vcpu)
 {
-       return kvm_emulate_instruction(vcpu, 0);
+       /* Treat an INVD instruction as a NOP and just skip it. */
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_invlpg(struct kvm_vcpu *vcpu)
@@ -5310,7 +5297,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
         * would also use advanced VM-exit information for EPT violations to
         * reconstruct the page fault error code.
         */
-       if (unlikely(kvm_mmu_is_illegal_gpa(vcpu, gpa)))
+       if (unlikely(kvm_vcpu_is_illegal_gpa(vcpu, gpa)))
                return kvm_emulate_instruction(vcpu, 0);
 
        return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
@@ -6323,70 +6310,43 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
        memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
 }
 
+void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
+
+static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, u32 intr_info)
+{
+       unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
+       gate_desc *desc = (gate_desc *)host_idt_base + vector;
+
+       kvm_before_interrupt(vcpu);
+       vmx_do_interrupt_nmi_irqoff(gate_offset(desc));
+       kvm_after_interrupt(vcpu);
+}
+
 static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
 {
        u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
 
        /* if exit due to PF check for async PF */
-       if (is_page_fault(intr_info)) {
+       if (is_page_fault(intr_info))
                vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
        /* Handle machine checks before interrupts are enabled */
-       } else if (is_machine_check(intr_info)) {
+       else if (is_machine_check(intr_info))
                kvm_machine_check();
        /* We need to handle NMIs before interrupts are enabled */
-       } else if (is_nmi(intr_info)) {
-               kvm_before_interrupt(&vmx->vcpu);
-               asm("int $2");
-               kvm_after_interrupt(&vmx->vcpu);
-       }
+       else if (is_nmi(intr_info))
+               handle_interrupt_nmi_irqoff(&vmx->vcpu, intr_info);
 }
 
 static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 {
-       unsigned int vector;
-       unsigned long entry;
-#ifdef CONFIG_X86_64
-       unsigned long tmp;
-#endif
-       gate_desc *desc;
        u32 intr_info = vmx_get_intr_info(vcpu);
 
        if (WARN_ONCE(!is_external_intr(intr_info),
            "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
                return;
 
-       vector = intr_info & INTR_INFO_VECTOR_MASK;
-       desc = (gate_desc *)host_idt_base + vector;
-       entry = gate_offset(desc);
-
-       kvm_before_interrupt(vcpu);
-
-       asm volatile(
-#ifdef CONFIG_X86_64
-               "mov %%rsp, %[sp]\n\t"
-               "and $-16, %%rsp\n\t"
-               "push %[ss]\n\t"
-               "push %[sp]\n\t"
-#endif
-               "pushf\n\t"
-               "push %[cs]\n\t"
-               CALL_NOSPEC
-               :
-#ifdef CONFIG_X86_64
-               [sp]"=&r"(tmp),
-#endif
-               ASM_CALL_CONSTRAINT
-               :
-               [thunk_target]"r"(entry),
-#ifdef CONFIG_X86_64
-               [ss]"i"(__KERNEL_DS),
-#endif
-               [cs]"i"(__KERNEL_CS)
-       );
-
-       kvm_after_interrupt(vcpu);
+       handle_interrupt_nmi_irqoff(vcpu, intr_info);
 }
-STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
 
 static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 {
@@ -7277,14 +7237,14 @@ static __init void vmx_set_cpu_caps(void)
 
        /* CPUID 0xD.1 */
        supported_xss = 0;
-       if (!vmx_xsaves_supported())
+       if (!cpu_has_vmx_xsaves())
                kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
 
        /* CPUID 0x80000001 */
        if (!cpu_has_vmx_rdtscp())
                kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
 
-       if (vmx_waitpkg_supported())
+       if (cpu_has_vmx_waitpkg())
                kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
 }
 
@@ -7340,7 +7300,7 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
         * Because it is marked as EmulateOnUD, we need to intercept it here.
         */
        case x86_intercept_rdtscp:
-               if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
+               if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
                        exception->vector = UD_VECTOR;
                        exception->error_code_valid = false;
                        return X86EMUL_PROPAGATE_FAULT;