Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[linux-2.6-microblaze.git] / arch / x86 / kvm / vmx.c
index 6970249..a2b804e 100644 (file)
@@ -200,6 +200,8 @@ struct loaded_vmcs {
        int cpu;
        bool launched;
        bool nmi_known_unmasked;
+       unsigned long vmcs_host_cr3;    /* May not match real cr3 */
+       unsigned long vmcs_host_cr4;    /* May not match real cr4 */
        struct list_head loaded_vmcss_on_cpu_link;
 };
 
@@ -600,8 +602,6 @@ struct vcpu_vmx {
                int           gs_ldt_reload_needed;
                int           fs_reload_needed;
                u64           msr_host_bndcfgs;
-               unsigned long vmcs_host_cr3;    /* May not match real cr3 */
-               unsigned long vmcs_host_cr4;    /* May not match real cr4 */
        } host_state;
        struct {
                int vm86_active;
@@ -2202,46 +2202,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
        struct pi_desc old, new;
        unsigned int dest;
 
-       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
-               !kvm_vcpu_apicv_active(vcpu))
+       /*
+        * In case of hot-plug or hot-unplug, we may have to undo
+        * vmx_vcpu_pi_put even if there is no assigned device.  And we
+        * always keep PI.NDST up to date for simplicity: it makes the
+        * code easier, and CPU migration is not a fast path.
+        */
+       if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
                return;
 
+       /*
+        * First handle the simple case where no cmpxchg is necessary; just
+        * allow posting non-urgent interrupts.
+        *
+        * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+        * PI.NDST: pi_post_block will do it for us and the wakeup_handler
+        * expects the VCPU to be on the blocked_vcpu_list that matches
+        * PI.NDST.
+        */
+       if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
+           vcpu->cpu == cpu) {
+               pi_clear_sn(pi_desc);
+               return;
+       }
+
+       /* The full case.  */
        do {
                old.control = new.control = pi_desc->control;
 
-               /*
-                * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
-                * are two possible cases:
-                * 1. After running 'pre_block', context switch
-                *    happened. For this case, 'sn' was set in
-                *    vmx_vcpu_put(), so we need to clear it here.
-                * 2. After running 'pre_block', we were blocked,
-                *    and woken up by some other guy. For this case,
-                *    we don't need to do anything, 'pi_post_block'
-                *    will do everything for us. However, we cannot
-                *    check whether it is case #1 or case #2 here
-                *    (maybe, not needed), so we also clear sn here,
-                *    I think it is not a big deal.
-                */
-               if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
-                       if (vcpu->cpu != cpu) {
-                               dest = cpu_physical_id(cpu);
-
-                               if (x2apic_enabled())
-                                       new.ndst = dest;
-                               else
-                                       new.ndst = (dest << 8) & 0xFF00;
-                       }
+               dest = cpu_physical_id(cpu);
 
-                       /* set 'NV' to 'notification vector' */
-                       new.nv = POSTED_INTR_VECTOR;
-               }
+               if (x2apic_enabled())
+                       new.ndst = dest;
+               else
+                       new.ndst = (dest << 8) & 0xFF00;
 
-               /* Allow posting non-urgent interrupts */
                new.sn = 0;
-       } while (cmpxchg(&pi_desc->control, old.control,
-                       new.control) != old.control);
+       } while (cmpxchg64(&pi_desc->control, old.control,
+                          new.control) != old.control);
 }
 
 static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
@@ -5178,12 +5176,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
         */
        cr3 = __read_cr3();
        vmcs_writel(HOST_CR3, cr3);             /* 22.2.3  FIXME: shadow tables */
-       vmx->host_state.vmcs_host_cr3 = cr3;
+       vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
 
        /* Save the most likely value for this task's CR4 in the VMCS. */
        cr4 = cr4_read_shadow();
        vmcs_writel(HOST_CR4, cr4);                     /* 22.2.3, 22.2.5 */
-       vmx->host_state.vmcs_host_cr4 = cr4;
+       vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
 
        vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
 #ifdef CONFIG_X86_64
@@ -9273,15 +9271,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
                vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
        cr3 = __get_current_cr3_fast();
-       if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
+       if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
                vmcs_writel(HOST_CR3, cr3);
-               vmx->host_state.vmcs_host_cr3 = cr3;
+               vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
        }
 
        cr4 = cr4_read_shadow();
-       if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
+       if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
                vmcs_writel(HOST_CR4, cr4);
-               vmx->host_state.vmcs_host_cr4 = cr4;
+               vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
        }
 
        /* When single-stepping over STI and MOV SS, we must clear the
@@ -9591,6 +9589,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
        vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
 
+       /*
+        * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
+        * or POSTED_INTR_WAKEUP_VECTOR.
+        */
+       vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+       vmx->pi_desc.sn = 1;
+
        return &vmx->vcpu;
 
 free_vmcs:
@@ -9839,7 +9844,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 
        WARN_ON(!is_guest_mode(vcpu));
 
-       if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) {
+       if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
+               !to_vmx(vcpu)->nested.nested_run_pending) {
                vmcs12->vm_exit_intr_error_code = fault->error_code;
                nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
                                  PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
@@ -11704,6 +11710,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
        kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
+static void __pi_post_block(struct kvm_vcpu *vcpu)
+{
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+       struct pi_desc old, new;
+       unsigned int dest;
+
+       do {
+               old.control = new.control = pi_desc->control;
+               WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+                    "Wakeup handler not enabled while the VCPU is blocked\n");
+
+               dest = cpu_physical_id(vcpu->cpu);
+
+               if (x2apic_enabled())
+                       new.ndst = dest;
+               else
+                       new.ndst = (dest << 8) & 0xFF00;
+
+               /* set 'NV' to 'notification vector' */
+               new.nv = POSTED_INTR_VECTOR;
+       } while (cmpxchg64(&pi_desc->control, old.control,
+                          new.control) != old.control);
+
+       if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               list_del(&vcpu->blocked_vcpu_list);
+               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               vcpu->pre_pcpu = -1;
+       }
+}
+
 /*
  * This routine does the following things for vCPU which is going
  * to be blocked if VT-d PI is enabled.
@@ -11719,7 +11756,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
  */
 static int pi_pre_block(struct kvm_vcpu *vcpu)
 {
-       unsigned long flags;
        unsigned int dest;
        struct pi_desc old, new;
        struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -11729,34 +11765,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
                !kvm_vcpu_apicv_active(vcpu))
                return 0;
 
-       vcpu->pre_pcpu = vcpu->cpu;
-       spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
-                         vcpu->pre_pcpu), flags);
-       list_add_tail(&vcpu->blocked_vcpu_list,
-                     &per_cpu(blocked_vcpu_on_cpu,
-                     vcpu->pre_pcpu));
-       spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
-                              vcpu->pre_pcpu), flags);
+       WARN_ON(irqs_disabled());
+       local_irq_disable();
+       if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+               vcpu->pre_pcpu = vcpu->cpu;
+               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               list_add_tail(&vcpu->blocked_vcpu_list,
+                             &per_cpu(blocked_vcpu_on_cpu,
+                                      vcpu->pre_pcpu));
+               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+       }
 
        do {
                old.control = new.control = pi_desc->control;
 
-               /*
-                * We should not block the vCPU if
-                * an interrupt is posted for it.
-                */
-               if (pi_test_on(pi_desc) == 1) {
-                       spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
-                                         vcpu->pre_pcpu), flags);
-                       list_del(&vcpu->blocked_vcpu_list);
-                       spin_unlock_irqrestore(
-                                       &per_cpu(blocked_vcpu_on_cpu_lock,
-                                       vcpu->pre_pcpu), flags);
-                       vcpu->pre_pcpu = -1;
-
-                       return 1;
-               }
-
                WARN((pi_desc->sn == 1),
                     "Warning: SN field of posted-interrupts "
                     "is set before blocking\n");
@@ -11778,10 +11800,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
 
                /* set 'NV' to 'wakeup vector' */
                new.nv = POSTED_INTR_WAKEUP_VECTOR;
-       } while (cmpxchg(&pi_desc->control, old.control,
-                       new.control) != old.control);
+       } while (cmpxchg64(&pi_desc->control, old.control,
+                          new.control) != old.control);
 
-       return 0;
+       /* We should not block the vCPU if an interrupt is posted for it.  */
+       if (pi_test_on(pi_desc) == 1)
+               __pi_post_block(vcpu);
+
+       local_irq_enable();
+       return (vcpu->pre_pcpu == -1);
 }
 
 static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -11797,44 +11824,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
 
 static void pi_post_block(struct kvm_vcpu *vcpu)
 {
-       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-       struct pi_desc old, new;
-       unsigned int dest;
-       unsigned long flags;
-
-       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
-               !kvm_vcpu_apicv_active(vcpu))
+       if (vcpu->pre_pcpu == -1)
                return;
 
-       do {
-               old.control = new.control = pi_desc->control;
-
-               dest = cpu_physical_id(vcpu->cpu);
-
-               if (x2apic_enabled())
-                       new.ndst = dest;
-               else
-                       new.ndst = (dest << 8) & 0xFF00;
-
-               /* Allow posting non-urgent interrupts */
-               new.sn = 0;
-
-               /* set 'NV' to 'notification vector' */
-               new.nv = POSTED_INTR_VECTOR;
-       } while (cmpxchg(&pi_desc->control, old.control,
-                       new.control) != old.control);
-
-       if(vcpu->pre_pcpu != -1) {
-               spin_lock_irqsave(
-                       &per_cpu(blocked_vcpu_on_cpu_lock,
-                       vcpu->pre_pcpu), flags);
-               list_del(&vcpu->blocked_vcpu_list);
-               spin_unlock_irqrestore(
-                       &per_cpu(blocked_vcpu_on_cpu_lock,
-                       vcpu->pre_pcpu), flags);
-               vcpu->pre_pcpu = -1;
-       }
+       WARN_ON(irqs_disabled());
+       local_irq_disable();
+       __pi_post_block(vcpu);
+       local_irq_enable();
 }
 
 static void vmx_post_block(struct kvm_vcpu *vcpu)