KVM: x86: interrupt based APF 'page ready' event delivery

author Vitaly Kuznetsov <vkuznets@redhat.com>

Mon, 25 May 2020 14:41:20 +0000 (16:41 +0200)

committer Paolo Bonzini <pbonzini@redhat.com>

Mon, 1 Jun 2020 08:26:07 +0000 (04:26 -0400)
author Vitaly Kuznetsov <vkuznets@redhat.com>
Mon, 25 May 2020 14:41:20 +0000 (16:41 +0200)
committer Paolo Bonzini <pbonzini@redhat.com>
Mon, 1 Jun 2020 08:26:07 +0000 (04:26 -0400)
diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst

index 3389203..be08df1 100644 (file)
--- a/Documentation/virt/kvm/msr.rst
+++ b/Documentation/virt/kvm/msr.rst
@@ -190,41 +190,68 @@ MSR_KVM_ASYNC_PF_EN:
         0x4b564d02
  
  data:
-       Bits 63-6 hold 64-byte aligned physical address of a
-       64 byte memory area which must be in guest RAM and must be
-       zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
-       when asynchronous page faults are enabled on the vcpu 0 when
-       disabled. Bit 1 is 1 if asynchronous page faults can be injected
-       when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
-       are delivered to L1 as #PF vmexits.  Bit 2 can be set only if
-       KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID.
-
-       First 4 byte of 64 byte memory location will be written to by
-       the hypervisor at the time of asynchronous page fault (APF)
-       injection to indicate type of asynchronous page fault. Value
-       of 1 means that the page referred to by the page fault is not
-       present. Value 2 means that the page is now available. Disabling
-       interrupt inhibits APFs. Guest must not enable interrupt
-       before the reason is read, or it may be overwritten by another
-       APF. Since APF uses the same exception vector as regular page
-       fault guest must reset the reason to 0 before it does
-       something that can generate normal page fault.  If during page
-       fault APF reason is 0 it means that this is regular page
-       fault.
-
-       During delivery of type 1 APF cr2 contains a token that will
-       be used to notify a guest when missing page becomes
-       available. When page becomes available type 2 APF is sent with
-       cr2 set to the token associated with the page. There is special
-       kind of token 0xffffffff which tells vcpu that it should wake
-       up all processes waiting for APFs and no individual type 2 APFs
-       will be sent.
+       Asynchronous page fault (APF) control MSR.
+
+       Bits 63-6 hold 64-byte aligned physical address of a 64 byte memory area
+       which must be in guest RAM and must be zeroed. This memory is expected
+       to hold a copy of the following structure::
+
+         struct kvm_vcpu_pv_apf_data {
+               /* Used for 'page not present' events delivered via #PF */
+               __u32 flags;
+
+               /* Used for 'page ready' events delivered via interrupt notification */
+               __u32 token;
+
+               __u8 pad[56];
+               __u32 enabled;
+         };
+
+       Bits 5-4 of the MSR are reserved and should be zero. Bit 0 is set to 1
+       when asynchronous page faults are enabled on the vcpu, 0 when disabled.
+       Bit 1 is 1 if asynchronous page faults can be injected when vcpu is in
+       cpl == 0. Bit 2 is 1 if asynchronous page faults are delivered to L1 as
+       #PF vmexits.  Bit 2 can be set only if KVM_FEATURE_ASYNC_PF_VMEXIT is
+       present in CPUID. Bit 3 enables interrupt based delivery of 'page ready'
+       events.
+
+       'Page not present' events are currently always delivered as synthetic
+       #PF exception. During delivery of these events APF CR2 register contains
+       a token that will be used to notify the guest when missing page becomes
+       available. Also, to make it possible to distinguish between real #PF and
+       APF, first 4 bytes of 64 byte memory location ('flags') will be written
+       to by the hypervisor at the time of injection. Only first bit of 'flags'
+       is currently supported, when set, it indicates that the guest is dealing
+       with asynchronous 'page not present' event. If during a page fault APF
+       'flags' is '0' it means that this is regular page fault. Guest is
+       supposed to clear 'flags' when it is done handling #PF exception so the
+       next event can be delivered.
+
+       Note, since APF 'page not present' events use the same exception vector
+       as regular page fault, guest must reset 'flags' to '0' before it does
+       something that can generate normal page fault.
+
+       Bytes 5-7 of 64 byte memory location ('token') will be written to by the
+       hypervisor at the time of APF 'page ready' event injection. The content
+       of these bytes is a token which was previously delivered as 'page not
+       present' event. The event indicates the page in now available. Guest is
+       supposed to write '0' to 'token' when it is done handling 'page ready'
+       event so the next one can be delivered.
+
+       Note, MSR_KVM_ASYNC_PF_INT MSR specifying the interrupt vector for 'page
+       ready' APF delivery needs to be written to before enabling APF mechanism
+       in MSR_KVM_ASYNC_PF_EN or interrupt #0 can get injected.
+
+       Note, previously, 'page ready' events were delivered via the same #PF
+       exception as 'page not present' events but this is now deprecated. If
+       bit 3 (interrupt based delivery) is not set APF events are not delivered.
  
         If APF is disabled while there are outstanding APFs, they will
         not be delivered.
  
-       Currently type 2 APF will be always delivered on the same vcpu as
-       type 1 was, but guest should not rely on that.
+       Currently 'page ready' APF events will be always delivered on the
+       same vcpu as 'page not present' event was, but guest should not rely on
+       that.
  
  MSR_KVM_STEAL_TIME:
         0x4b564d03
@@ -319,3 +346,16 @@ data:
  
         KVM guests can request the host not to poll on HLT, for example if
         they are performing polling themselves.
+
+MSR_KVM_ASYNC_PF_INT:
+       0x4b564d06
+
+data:
+       Second asynchronous page fault (APF) control MSR.
+
+       Bits 0-7: APIC vector for delivery of 'page ready' APF events.
+       Bits 8-63: Reserved
+
+       Interrupt vector for asynchnonous 'page ready' notifications delivery.
+       The vector has to be set up before asynchronous page fault mechanism
+       is enabled in MSR_KVM_ASYNC_PF_EN.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index f3897e4..2d39571 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -767,7 +767,9 @@ struct kvm_vcpu_arch {
                 bool halted;
                 gfn_t gfns[ASYNC_PF_PER_VCPU];
                 struct gfn_to_hva_cache data;
-               u64 msr_val;
+               u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */
+               u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
+               u16 vec;
                 u32 id;
                 bool send_user_only;
                 u32 host_apf_flags;
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h

index d1cd5c0..1d37d61 100644 (file)
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -50,6 +50,7 @@
  #define MSR_KVM_STEAL_TIME  0x4b564d03
  #define MSR_KVM_PV_EOI_EN      0x4b564d04
  #define MSR_KVM_POLL_CONTROL   0x4b564d05
+#define MSR_KVM_ASYNC_PF_INT   0x4b564d06
  
  struct kvm_steal_time {
         __u64 steal;
@@ -81,6 +82,11 @@ struct kvm_clock_pairing {
  #define KVM_ASYNC_PF_ENABLED                   (1 << 0)
  #define KVM_ASYNC_PF_SEND_ALWAYS               (1 << 1)
  #define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT     (1 << 2)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT           (1 << 3)
+
+/* MSR_KVM_ASYNC_PF_INT */
+#define KVM_ASYNC_PF_VEC_MASK                  GENMASK(7, 0)
+
  
  /* Operations for KVM_HC_MMU_OP */
  #define KVM_MMU_OP_WRITE_PTE            1
@@ -112,8 +118,12 @@ struct kvm_mmu_op_release_pt {
  #define KVM_PV_REASON_PAGE_READY 2
  
  struct kvm_vcpu_pv_apf_data {
+       /* Used for 'page not present' events delivered via #PF */
         __u32 flags;
-       __u32 token; /* Used for page ready notification only */
+
+       /* Used for 'page ready' events delivered via interrupt notification */
+       __u32 token;
+
         __u8 pad[56];
         __u32 enabled;
  };
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 0e79b37..e6f3ec5 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1248,7 +1248,7 @@ static const u32 emulated_msrs_all[] = {
         HV_X64_MSR_TSC_EMULATION_STATUS,
  
         MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
-       MSR_KVM_PV_EOI_EN,
+       MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT,
  
         MSR_IA32_TSC_ADJUST,
         MSR_IA32_TSCDEADLINE,
@@ -2673,17 +2673,24 @@ out:
         return r;
  }
  
+static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
+{
+       u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
+
+       return (vcpu->arch.apf.msr_en_val & mask) == mask;
+}
+
  static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
  {
         gpa_t gpa = data & ~0x3f;
  
-       /* Bits 3:5 are reserved, Should be zero */
-       if (data & 0x38)
+       /* Bits 4:5 are reserved, Should be zero */
+       if (data & 0x30)
                 return 1;
  
-       vcpu->arch.apf.msr_val = data;
+       vcpu->arch.apf.msr_en_val = data;
  
-       if (!(data & KVM_ASYNC_PF_ENABLED)) {
+       if (!kvm_pv_async_pf_enabled(vcpu)) {
                 kvm_clear_async_pf_completion_queue(vcpu);
                 kvm_async_pf_hash_reset(vcpu);
                 return 0;
@@ -2695,7 +2702,25 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
  
         vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
         vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+
         kvm_async_pf_wakeup_all(vcpu);
+
+       return 0;
+}
+
+static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
+{
+       /* Bits 8-63 are reserved */
+       if (data >> 8)
+               return 1;
+
+       if (!lapic_in_kernel(vcpu))
+               return 1;
+
+       vcpu->arch.apf.msr_int_val = data;
+
+       vcpu->arch.apf.vec = data & KVM_ASYNC_PF_VEC_MASK;
+
         return 0;
  }
  
@@ -2917,6 +2942,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 if (kvm_pv_enable_async_pf(vcpu, data))
                         return 1;
                 break;
+       case MSR_KVM_ASYNC_PF_INT:
+               if (kvm_pv_enable_async_pf_int(vcpu, data))
+                       return 1;
+               break;
         case MSR_KVM_STEAL_TIME:
  
                 if (unlikely(!sched_info_on()))
@@ -3191,7 +3220,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 msr_info->data = vcpu->arch.time;
                 break;
         case MSR_KVM_ASYNC_PF_EN:
-               msr_info->data = vcpu->arch.apf.msr_val;
+               msr_info->data = vcpu->arch.apf.msr_en_val;
+               break;
+       case MSR_KVM_ASYNC_PF_INT:
+               msr_info->data = vcpu->arch.apf.msr_int_val;
                 break;
         case MSR_KVM_STEAL_TIME:
                 msr_info->data = vcpu->arch.st.msr_val;
@@ -9553,7 +9585,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
         vcpu->arch.cr2 = 0;
  
         kvm_make_request(KVM_REQ_EVENT, vcpu);
-       vcpu->arch.apf.msr_val = 0;
+       vcpu->arch.apf.msr_en_val = 0;
+       vcpu->arch.apf.msr_int_val = 0;
         vcpu->arch.st.msr_val = 0;
  
         kvmclock_reset(vcpu);
@@ -10430,10 +10463,22 @@ static inline int apf_put_user_notpresent(struct kvm_vcpu *vcpu)
  
  static inline int apf_put_user_ready(struct kvm_vcpu *vcpu, u32 token)
  {
-       u64 val = (u64)token << 32 | KVM_PV_REASON_PAGE_READY;
+       unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
  
-       return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
-                                     sizeof(val));
+       return kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
+                                            &token, offset, sizeof(token));
+}
+
+static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu)
+{
+       unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
+       u32 val;
+
+       if (kvm_read_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
+                                        &val, offset, sizeof(val)))
+               return false;
+
+       return !val;
  }
  
  static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
@@ -10441,9 +10486,8 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
         if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
                 return false;
  
-       if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
-           (vcpu->arch.apf.send_user_only &&
-            kvm_x86_ops.get_cpl(vcpu) == 0))
+       if (!kvm_pv_async_pf_enabled(vcpu) ||
+           (vcpu->arch.apf.send_user_only && kvm_x86_ops.get_cpl(vcpu) == 0))
                 return false;
  
         return true;
@@ -10499,7 +10543,10 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
  void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
                                  struct kvm_async_pf *work)
  {
-       struct x86_exception fault;
+       struct kvm_lapic_irq irq = {
+               .delivery_mode = APIC_DM_FIXED,
+               .vector = vcpu->arch.apf.vec
+       };
  
         if (work->wakeup_all)
                 work->arch.token = ~0; /* broadcast wakeup */
@@ -10507,26 +10554,20 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
                 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
         trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
  
-       if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
-           !apf_put_user_ready(vcpu, work->arch.token)) {
-                       fault.vector = PF_VECTOR;
-                       fault.error_code_valid = true;
-                       fault.error_code = 0;
-                       fault.nested_page_fault = false;
-                       fault.address = work->arch.token;
-                       fault.async_page_fault = true;
-                       kvm_inject_page_fault(vcpu, &fault);
-       }
+       if (kvm_pv_async_pf_enabled(vcpu) &&
+           !apf_put_user_ready(vcpu, work->arch.token))
+               kvm_apic_set_irq(vcpu, &irq, NULL);
+
         vcpu->arch.apf.halted = false;
         vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
  }
  
  bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
  {
-       if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
+       if (!kvm_pv_async_pf_enabled(vcpu))
                 return true;
         else
-               return kvm_can_do_async_pf(vcpu);
+               return apf_pageready_slot_free(vcpu);
  }
  
  void kvm_arch_start_assignment(struct kvm *kvm)
author	Vitaly Kuznetsov <vkuznets@redhat.com>
	Mon, 25 May 2020 14:41:20 +0000 (16:41 +0200)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Mon, 1 Jun 2020 08:26:07 +0000 (04:26 -0400)
Documentation/virt/kvm/msr.rst		patch \| blob \| history
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/include/uapi/asm/kvm_para.h		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history