Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

[linux-2.6-microblaze.git] / arch / x86 / kvm / vmx / vmx.c
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index 305237d..c37a89e 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -50,6 +50,8 @@
  #include <asm/spec-ctrl.h>
  #include <asm/vmx.h>
  
+#include <trace/events/ipi.h>
+
  #include "capabilities.h"
  #include "cpuid.h"
  #include "hyperv.h"
@@ -160,7 +162,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
  
  /*
   * List of MSRs that can be directly passed to the guest.
- * In addition to these x2apic and PT MSRs are handled specially.
+ * In addition to these x2apic, PT and LBR MSRs are handled specially.
   */
  static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
         MSR_IA32_SPEC_CTRL,
@@ -668,25 +670,14 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
         return flexpriority_enabled && lapic_in_kernel(vcpu);
  }
  
-static int possible_passthrough_msr_slot(u32 msr)
-{
-       u32 i;
-
-       for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++)
-               if (vmx_possible_passthrough_msrs[i] == msr)
-                       return i;
-
-       return -ENOENT;
-}
-
-static bool is_valid_passthrough_msr(u32 msr)
+static int vmx_get_passthrough_msr_slot(u32 msr)
  {
-       bool r;
+       int i;
  
         switch (msr) {
         case 0x800 ... 0x8ff:
                 /* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
-               return true;
+               return -ENOENT;
         case MSR_IA32_RTIT_STATUS:
         case MSR_IA32_RTIT_OUTPUT_BASE:
         case MSR_IA32_RTIT_OUTPUT_MASK:
@@ -701,14 +692,16 @@ static bool is_valid_passthrough_msr(u32 msr)
         case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
         case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
                 /* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */
-               return true;
+               return -ENOENT;
         }
  
-       r = possible_passthrough_msr_slot(msr) != -ENOENT;
-
-       WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+       for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
+               if (vmx_possible_passthrough_msrs[i] == msr)
+                       return i;
+       }
  
-       return r;
+       WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+       return -ENOENT;
  }
  
  struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
@@ -1291,8 +1284,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
         u16 fs_sel, gs_sel;
         int i;
  
-       vmx->req_immediate_exit = false;
-
         /*
          * Note that guest MSRs to be saved/restored can also be changed
          * when guest state is loaded. This happens when guest transitions
@@ -3964,6 +3955,7 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+       int idx;
  
         if (!cpu_has_vmx_msr_bitmap())
                 return;
@@ -3973,16 +3965,13 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
         /*
          * Mark the desired intercept state in shadow bitmap, this is needed
          * for resync when the MSR filters change.
-       */
-       if (is_valid_passthrough_msr(msr)) {
-               int idx = possible_passthrough_msr_slot(msr);
-
-               if (idx != -ENOENT) {
-                       if (type & MSR_TYPE_R)
-                               clear_bit(idx, vmx->shadow_msr_intercept.read);
-                       if (type & MSR_TYPE_W)
-                               clear_bit(idx, vmx->shadow_msr_intercept.write);
-               }
+        */
+       idx = vmx_get_passthrough_msr_slot(msr);
+       if (idx >= 0) {
+               if (type & MSR_TYPE_R)
+                       clear_bit(idx, vmx->shadow_msr_intercept.read);
+               if (type & MSR_TYPE_W)
+                       clear_bit(idx, vmx->shadow_msr_intercept.write);
         }
  
         if ((type & MSR_TYPE_R) &&
@@ -4008,6 +3997,7 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+       int idx;
  
         if (!cpu_has_vmx_msr_bitmap())
                 return;
@@ -4017,16 +4007,13 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
         /*
          * Mark the desired intercept state in shadow bitmap, this is needed
          * for resync when the MSR filter changes.
-       */
-       if (is_valid_passthrough_msr(msr)) {
-               int idx = possible_passthrough_msr_slot(msr);
-
-               if (idx != -ENOENT) {
-                       if (type & MSR_TYPE_R)
-                               set_bit(idx, vmx->shadow_msr_intercept.read);
-                       if (type & MSR_TYPE_W)
-                               set_bit(idx, vmx->shadow_msr_intercept.write);
-               }
+        */
+       idx = vmx_get_passthrough_msr_slot(msr);
+       if (idx >= 0) {
+               if (type & MSR_TYPE_R)
+                       set_bit(idx, vmx->shadow_msr_intercept.read);
+               if (type & MSR_TYPE_W)
+                       set_bit(idx, vmx->shadow_msr_intercept.write);
         }
  
         if (type & MSR_TYPE_R)
@@ -4137,6 +4124,9 @@ static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         u32 i;
  
+       if (!cpu_has_vmx_msr_bitmap())
+               return;
+
         /*
          * Redo intercept permissions for MSRs that KVM is passing through to
          * the guest.  Disabling interception will check the new MSR filter and
@@ -5576,10 +5566,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
  
         reg = DEBUG_REG_ACCESS_REG(exit_qualification);
         if (exit_qualification & TYPE_MOV_FROM_DR) {
-               unsigned long val;
-
-               kvm_get_dr(vcpu, dr, &val);
-               kvm_register_write(vcpu, reg, val);
+               kvm_register_write(vcpu, reg, kvm_get_dr(vcpu, dr));
                 err = 0;
         } else {
                 err = kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg));
@@ -6001,22 +5988,46 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
         return 1;
  }
  
-static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu)
+static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu,
+                                                  bool force_immediate_exit)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
  
-       if (!vmx->req_immediate_exit &&
-           !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) {
-               kvm_lapic_expired_hv_timer(vcpu);
+       /*
+        * In the *extremely* unlikely scenario that this is a spurious VM-Exit
+        * due to the timer expiring while it was "soft" disabled, just eat the
+        * exit and re-enter the guest.
+        */
+       if (unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
                 return EXIT_FASTPATH_REENTER_GUEST;
-       }
  
-       return EXIT_FASTPATH_NONE;
+       /*
+        * If the timer expired because KVM used it to force an immediate exit,
+        * then mission accomplished.
+        */
+       if (force_immediate_exit)
+               return EXIT_FASTPATH_EXIT_HANDLED;
+
+       /*
+        * If L2 is active, go down the slow path as emulating the guest timer
+        * expiration likely requires synthesizing a nested VM-Exit.
+        */
+       if (is_guest_mode(vcpu))
+               return EXIT_FASTPATH_NONE;
+
+       kvm_lapic_expired_hv_timer(vcpu);
+       return EXIT_FASTPATH_REENTER_GUEST;
  }
  
  static int handle_preemption_timer(struct kvm_vcpu *vcpu)
  {
-       handle_fastpath_preemption_timer(vcpu);
+       /*
+        * This non-fastpath handler is reached if and only if the preemption
+        * timer was being used to emulate a guest timer while L2 is active.
+        * All other scenarios are supposed to be handled in the fastpath.
+        */
+       WARN_ON_ONCE(!is_guest_mode(vcpu));
+       kvm_lapic_expired_hv_timer(vcpu);
         return 1;
  }
  
@@ -6519,7 +6530,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
                 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
                 vcpu->run->internal.data[0] = vectoring_info;
                 vcpu->run->internal.data[1] = exit_reason.full;
-               vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
+               vcpu->run->internal.data[2] = vmx_get_exit_qual(vcpu);
                 if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) {
                         vcpu->run->internal.data[ndata++] =
                                 vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@ -7158,13 +7169,13 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
                                         msrs[i].host, false);
  }
  
-static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
+static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         u64 tscl;
         u32 delta_tsc;
  
-       if (vmx->req_immediate_exit) {
+       if (force_immediate_exit) {
                 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
                 vmx->loaded_vmcs->hv_timer_soft_disabled = false;
         } else if (vmx->hv_deadline_tsc != -1) {
@@ -7217,13 +7228,22 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
         barrier_nospec();
  }
  
-static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu,
+                                            bool force_immediate_exit)
  {
+       /*
+        * If L2 is active, some VMX preemption timer exits can be handled in
+        * the fastpath even, all other exits must use the slow path.
+        */
+       if (is_guest_mode(vcpu) &&
+           to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_PREEMPTION_TIMER)
+               return EXIT_FASTPATH_NONE;
+
         switch (to_vmx(vcpu)->exit_reason.basic) {
         case EXIT_REASON_MSR_WRITE:
                 return handle_fastpath_set_msr_irqoff(vcpu);
         case EXIT_REASON_PREEMPTION_TIMER:
-               return handle_fastpath_preemption_timer(vcpu);
+               return handle_fastpath_preemption_timer(vcpu, force_immediate_exit);
         default:
                 return EXIT_FASTPATH_NONE;
         }
@@ -7286,7 +7306,7 @@ out:
         guest_state_exit_irqoff();
  }
  
-static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         unsigned long cr3, cr4;
@@ -7313,7 +7333,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
                 return EXIT_FASTPATH_NONE;
         }
  
-       trace_kvm_entry(vcpu);
+       trace_kvm_entry(vcpu, force_immediate_exit);
  
         if (vmx->ple_window_dirty) {
                 vmx->ple_window_dirty = false;
@@ -7372,7 +7392,9 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
                 vmx_passthrough_lbr_msrs(vcpu);
  
         if (enable_preemption_timer)
-               vmx_update_hv_timer(vcpu);
+               vmx_update_hv_timer(vcpu, force_immediate_exit);
+       else if (force_immediate_exit)
+               smp_send_reschedule(vcpu->cpu);
  
         kvm_wait_lapic_expire(vcpu);
  
@@ -7436,10 +7458,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
         vmx_recover_nmi_blocking(vmx);
         vmx_complete_interrupts(vmx);
  
-       if (is_guest_mode(vcpu))
-               return EXIT_FASTPATH_NONE;
-
-       return vmx_exit_handlers_fastpath(vcpu);
+       return vmx_exit_handlers_fastpath(vcpu, force_immediate_exit);
  }
  
  static void vmx_vcpu_free(struct kvm_vcpu *vcpu)
@@ -7919,11 +7938,6 @@ static __init void vmx_set_cpu_caps(void)
                 kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
  }
  
-static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
-{
-       to_vmx(vcpu)->req_immediate_exit = true;
-}
-
  static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
                                   struct x86_instruction_info *info)
  {
@@ -8376,8 +8390,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
         .check_intercept = vmx_check_intercept,
         .handle_exit_irqoff = vmx_handle_exit_irqoff,
  
-       .request_immediate_exit = vmx_request_immediate_exit,
-
         .sched_in = vmx_sched_in,
  
         .cpu_dirty_log_size = PML_ENTITY_NUM,
@@ -8637,7 +8649,6 @@ static __init int hardware_setup(void)
         if (!enable_preemption_timer) {
                 vmx_x86_ops.set_hv_timer = NULL;
                 vmx_x86_ops.cancel_hv_timer = NULL;
-               vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
         }
  
         kvm_caps.supported_mce_cap |= MCG_LMCE_P;