kvm: x86: Defer setting of CR2 until #PF delivery

author Jim Mattson <jmattson@google.com>

Tue, 16 Oct 2018 21:29:22 +0000 (14:29 -0700)

committer Paolo Bonzini <pbonzini@redhat.com>

Wed, 17 Oct 2018 17:07:43 +0000 (19:07 +0200)
author Jim Mattson <jmattson@google.com>
Tue, 16 Oct 2018 21:29:22 +0000 (14:29 -0700)
committer Paolo Bonzini <pbonzini@redhat.com>
Wed, 17 Oct 2018 17:07:43 +0000 (19:07 +0200)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c

index 47b0721..1086beb 100644 (file)
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -805,6 +805,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
             nested_svm_check_exception(svm, nr, has_error_code, error_code))
                 return;
  
+       kvm_deliver_exception_payload(&svm->vcpu);
+
         if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
                 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
  
@@ -2965,16 +2967,13 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
         svm->vmcb->control.exit_info_1 = error_code;
  
         /*
-        * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
-        * The fix is to add the ancillary datum (CR2 or DR6) to structs
-        * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
-        * written only when inject_pending_event runs (DR6 would written here
-        * too).  This should be conditional on a new capability---if the
-        * capability is disabled, kvm_multiple_exception would write the
-        * ancillary information to CR2 or DR6, for backwards ABI-compatibility.
+        * EXITINFO2 is undefined for all exception intercepts other
+        * than #PF.
          */
         if (svm->vcpu.arch.exception.nested_apf)
                 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+       else if (svm->vcpu.arch.exception.has_payload)
+               svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
         else
                 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
  
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 8d7c60f..0c7efb1 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3292,27 +3292,24 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
  {
         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
         unsigned int nr = vcpu->arch.exception.nr;
+       bool has_payload = vcpu->arch.exception.has_payload;
+       unsigned long payload = vcpu->arch.exception.payload;
  
         if (nr == PF_VECTOR) {
                 if (vcpu->arch.exception.nested_apf) {
                         *exit_qual = vcpu->arch.apf.nested_apf_token;
                         return 1;
                 }
-               /*
-                * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
-                * The fix is to add the ancillary datum (CR2 or DR6) to structs
-                * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
-                * can be written only when inject_pending_event runs.  This should be
-                * conditional on a new capability---if the capability is disabled,
-                * kvm_multiple_exception would write the ancillary information to
-                * CR2 or DR6, for backwards ABI-compatibility.
-                */
                 if (nested_vmx_is_page_fault_vmexit(vmcs12,
                                                     vcpu->arch.exception.error_code)) {
-                       *exit_qual = vcpu->arch.cr2;
+                       *exit_qual = has_payload ? payload : vcpu->arch.cr2;
                         return 1;
                 }
         } else {
+               /*
+                * FIXME: we must not write DR6 when L1 intercepts an
+                * L2 #DB exception.
+                */
                 if (vmcs12->exception_bitmap & (1u << nr)) {
                         if (nr == DB_VECTOR) {
                                 *exit_qual = vcpu->arch.dr6;
@@ -3349,6 +3346,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
         u32 error_code = vcpu->arch.exception.error_code;
         u32 intr_info = nr | INTR_INFO_VALID_MASK;
  
+       kvm_deliver_exception_payload(vcpu);
+
         if (has_error_code) {
                 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
                 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 25a2bac..4e9536c 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -400,6 +400,26 @@ static int exception_type(int vector)
         return EXCPT_FAULT;
  }
  
+void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
+{
+       unsigned nr = vcpu->arch.exception.nr;
+       bool has_payload = vcpu->arch.exception.has_payload;
+       unsigned long payload = vcpu->arch.exception.payload;
+
+       if (!has_payload)
+               return;
+
+       switch (nr) {
+       case PF_VECTOR:
+               vcpu->arch.cr2 = payload;
+               break;
+       }
+
+       vcpu->arch.exception.has_payload = false;
+       vcpu->arch.exception.payload = 0;
+}
+EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
+
  static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
                 unsigned nr, bool has_error, u32 error_code,
                 bool has_payload, unsigned long payload, bool reinject)
@@ -441,6 +461,18 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
                 vcpu->arch.exception.error_code = error_code;
                 vcpu->arch.exception.has_payload = has_payload;
                 vcpu->arch.exception.payload = payload;
+               /*
+                * In guest mode, payload delivery should be deferred,
+                * so that the L1 hypervisor can intercept #PF before
+                * CR2 is modified.  However, for ABI compatibility
+                * with KVM_GET_VCPU_EVENTS and KVM_SET_VCPU_EVENTS,
+                * we can't delay payload delivery unless userspace
+                * has enabled this functionality via the per-VM
+                * capability, KVM_CAP_EXCEPTION_PAYLOAD.
+                */
+               if (!vcpu->kvm->arch.exception_payload_enabled ||
+                   !is_guest_mode(vcpu))
+                       kvm_deliver_exception_payload(vcpu);
                 return;
         }
  
@@ -486,6 +518,13 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
  }
  EXPORT_SYMBOL_GPL(kvm_requeue_exception);
  
+static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
+                                   u32 error_code, unsigned long payload)
+{
+       kvm_multiple_exception(vcpu, nr, true, error_code,
+                              true, payload, false);
+}
+
  int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
  {
         if (err)
@@ -502,11 +541,13 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
         ++vcpu->stat.pf_guest;
         vcpu->arch.exception.nested_apf =
                 is_guest_mode(vcpu) && fault->async_page_fault;
-       if (vcpu->arch.exception.nested_apf)
+       if (vcpu->arch.exception.nested_apf) {
                 vcpu->arch.apf.nested_apf_token = fault->address;
-       else
-               vcpu->arch.cr2 = fault->address;
-       kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
+               kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
+       } else {
+               kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
+                                       fault->address);
+       }
  }
  EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
  
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h

index 67b9568..224cd0a 100644 (file)
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -266,6 +266,8 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu,
  
  int handle_ud(struct kvm_vcpu *vcpu);
  
+void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu);
+
  void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
  u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
  bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
author	Jim Mattson <jmattson@google.com>
	Tue, 16 Oct 2018 21:29:22 +0000 (14:29 -0700)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 17 Oct 2018 17:07:43 +0000 (19:07 +0200)
arch/x86/kvm/svm.c		patch \| blob \| history
arch/x86/kvm/vmx.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
arch/x86/kvm/x86.h		patch \| blob \| history