Merge branch 'kvm-older-features' into HEAD

author Paolo Bonzini <pbonzini@redhat.com>

Fri, 8 Apr 2022 16:43:40 +0000 (12:43 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Wed, 13 Apr 2022 17:37:17 +0000 (13:37 -0400)
author Paolo Bonzini <pbonzini@redhat.com>
Fri, 8 Apr 2022 16:43:40 +0000 (12:43 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Wed, 13 Apr 2022 17:37:17 +0000 (13:37 -0400)
diff --combined Documentation/virt/kvm/api.rst

index 85c7abc,b102ba7..e7a0dfd
--- 1/Documentation/virt/kvm/api.rst
--- 2/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@@ -982,12 -982,22 +982,22 @@@ memory
         __u8 pad2[30];
     };
   
- If the KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL flag is returned from the
- KVM_CAP_XEN_HVM check, it may be set in the flags field of this ioctl.
- This requests KVM to generate the contents of the hypercall page
- automatically; hypercalls will be intercepted and passed to userspace
- through KVM_EXIT_XEN.  In this case, all of the blob size and address
- fields must be zero.
+ If certain flags are returned from the KVM_CAP_XEN_HVM check, they may
+ be set in the flags field of this ioctl:
+ 
+ The KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL flag requests KVM to generate
+ the contents of the hypercall page automatically; hypercalls will be
+ intercepted and passed to userspace through KVM_EXIT_XEN.  In this
+ ase, all of the blob size and address fields must be zero.
+ 
+ The KVM_XEN_HVM_CONFIG_EVTCHN_SEND flag indicates to KVM that userspace
+ will always use the KVM_XEN_HVM_EVTCHN_SEND ioctl to deliver event
+ channel interrupts rather than manipulating the guest's shared_info
+ structures directly. This, in turn, may allow KVM to enable features
+ such as intercepting the SCHEDOP_poll hypercall to accelerate PV
+ spinlock operation for the guest. Userspace may still use the ioctl
+ to deliver events if it was advertised, even if userspace does not
+ send this indication that it will always do so
   
   No other flags are currently valid in the struct kvm_xen_hvm_config.
   
@@@ -1887,22 -1897,25 +1897,25 @@@ the future
   4.55 KVM_SET_TSC_KHZ
   --------------------
   
- :Capability: KVM_CAP_TSC_CONTROL
+ :Capability: KVM_CAP_TSC_CONTROL / KVM_CAP_VM_TSC_CONTROL
   :Architectures: x86
- :Type: vcpu ioctl
+ :Type: vcpu ioctl / vm ioctl
   :Parameters: virtual tsc_khz
   :Returns: 0 on success, -1 on error
   
   Specifies the tsc frequency for the virtual machine. The unit of the
   frequency is KHz.
   
+ If the KVM_CAP_VM_TSC_CONTROL capability is advertised, this can also
+ be used as a vm ioctl to set the initial tsc frequency of subsequently
+ created vCPUs.
   
   4.56 KVM_GET_TSC_KHZ
   --------------------
   
- :Capability: KVM_CAP_GET_TSC_KHZ
+ :Capability: KVM_CAP_GET_TSC_KHZ / KVM_CAP_VM_TSC_CONTROL
   :Architectures: x86
- :Type: vcpu ioctl
+ :Type: vcpu ioctl / vm ioctl
   :Parameters: none
   :Returns: virtual tsc-khz on success, negative value on error
   
@@@ -5216,7 -5229,25 +5229,25 @@@ have deterministic behavior
                 struct {
                         __u64 gfn;
                 } shared_info;
-               __u64 pad[4];
+               struct {
+                       __u32 send_port;
+                       __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
+                       __u32 flags;
+                       union {
+                               struct {
+                                       __u32 port;
+                                       __u32 vcpu;
+                                       __u32 priority;
+                               } port;
+                               struct {
+                                       __u32 port; /* Zero for eventfd */
+                                       __s32 fd;
+                               } eventfd;
+                               __u32 padding[4];
+                       } deliver;
+               } evtchn;
+               __u32 xen_version;
+               __u64 pad[8];
         } u;
     };
   
@@@ -5247,6 -5278,30 +5278,30 @@@ KVM_XEN_ATTR_TYPE_SHARED_INF
   
   KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
     Sets the exception vector used to deliver Xen event channel upcalls.
+   This is the HVM-wide vector injected directly by the hypervisor
+   (not through the local APIC), typically configured by a guest via
+   HVM_PARAM_CALLBACK_IRQ.
+ 
+ KVM_XEN_ATTR_TYPE_EVTCHN
+   This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
+   support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It configures
+   an outbound port number for interception of EVTCHNOP_send requests
+   from the guest. A given sending port number may be directed back
+   to a specified vCPU (by APIC ID) / port / priority on the guest,
+   or to trigger events on an eventfd. The vCPU and priority can be
+   changed by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call,
+   but other fields cannot change for a given sending port. A port
+   mapping is removed by using KVM_XEN_EVTCHN_DEASSIGN in the flags
+   field.
+ 
+ KVM_XEN_ATTR_TYPE_XEN_VERSION
+   This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
+   support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It configures
+   the 32-bit version code returned to the guest when it invokes the
+   XENVER_version call; typically (XEN_MAJOR << 16 | XEN_MINOR). PV
+   Xen guests will often use this to as a dummy hypercall to trigger
+   event channel delivery, so responding within the kernel without
+   exiting to userspace is beneficial.
   
   4.127 KVM_XEN_HVM_GET_ATTR
   --------------------------
@@@ -5258,7 -5313,8 +5313,8 @@@
   :Returns: 0 on success, < 0 on error
   
   Allows Xen VM attributes to be read. For the structure and types,
- see KVM_XEN_HVM_SET_ATTR above.
+ see KVM_XEN_HVM_SET_ATTR above. The KVM_XEN_ATTR_TYPE_EVTCHN
+ attribute cannot be read.
   
   4.128 KVM_XEN_VCPU_SET_ATTR
   ---------------------------
@@@ -5285,6 -5341,13 +5341,13 @@@
                         __u64 time_blocked;
                         __u64 time_offline;
                 } runstate;
+               __u32 vcpu_id;
+               struct {
+                       __u32 port;
+                       __u32 priority;
+                       __u64 expires_ns;
+               } timer;
+               __u8 vector;
         } u;
     };
   
@@@ -5326,6 -5389,27 +5389,27 @@@ KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUS
     or RUNSTATE_offline) to set the current accounted state as of the
     adjusted state_entry_time.
   
+ KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID
+   This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
+   support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the Xen
+   vCPU ID of the given vCPU, to allow timer-related VCPU operations to
+   be intercepted by KVM.
+ 
+ KVM_XEN_VCPU_ATTR_TYPE_TIMER
+   This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
+   support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the
+   event channel port/priority for the VIRQ_TIMER of the vCPU, as well
+   as allowing a pending timer to be saved/restored.
+ 
+ KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
+   This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
+   support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the
+   per-vCPU local APIC upcall vector, configured by a Xen guest with
+   the HVMOP_set_evtchn_upcall_vector hypercall. This is typically
+   used by Windows guests, and is distinct from the HVM-wide upcall
+   vector configured with HVM_PARAM_CALLBACK_IRQ.
+ 
+ 
   4.129 KVM_XEN_VCPU_GET_ATTR
   ---------------------------
   
@@@ -5645,6 -5729,25 +5729,25 @@@ enabled with ``arch_prctl()``, but thi
   The offsets of the state save areas in struct kvm_xsave follow the contents
   of CPUID leaf 0xD on the host.
   
+ 4.135 KVM_XEN_HVM_EVTCHN_SEND
+ -----------------------------
+ 
+ :Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND
+ :Architectures: x86
+ :Type: vm ioctl
+ :Parameters: struct kvm_irq_routing_xen_evtchn
+ :Returns: 0 on success, < 0 on error
+ 
+ 
+ ::
+ 
+    struct kvm_irq_routing_xen_evtchn {
+       __u32 port;
+       __u32 vcpu;
+       __u32 priority;
+    };
+ 
+ This ioctl injects an event channel interrupt directly to the guest vCPU.
   
   5. The kvm_run structure
   ========================
@@@ -6190,7 -6293,6 +6293,7 @@@ Valid values for 'type' are
                         unsigned long args[6];
                         unsigned long ret[2];
                 } riscv_sbi;
+ +
   If exit reason is KVM_EXIT_RISCV_SBI then it indicates that the VCPU has
   done a SBI call which is not handled by KVM RISC-V kernel module. The details
   of the SBI call are available in 'riscv_sbi' member of kvm_run structure. The
@@@ -7135,6 -7237,15 +7238,15 @@@ The valid bits in cap.args[0] are
                                       Additionally, when this quirk is disabled,
                                       KVM clears CPUID.01H:ECX[bit 3] if
                                       IA32_MISC_ENABLE[bit 18] is cleared.
+ 
+  KVM_X86_QUIRK_FIX_HYPERCALL_INSN   By default, KVM rewrites guest
+                                     VMMCALL/VMCALL instructions to match the
+                                     vendor's hypercall instruction for the
+                                     system. When this quirk is disabled, KVM
+                                     will no longer rewrite invalid guest
+                                     hypercall instructions. Executing the
+                                     incorrect hypercall instruction will
+                                     generate a #UD within the guest.
   =================================== ============================================
   
   8. Other capabilities.
@@@ -7612,8 -7723,9 +7724,9 @@@ PVHVM guests. Valid flags are:
     #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR    (1 << 0)
     #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL  (1 << 1)
     #define KVM_XEN_HVM_CONFIG_SHARED_INFO      (1 << 2)
-   #define KVM_XEN_HVM_CONFIG_RUNSTATE         (1 << 2)
-   #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL    (1 << 3)
+   #define KVM_XEN_HVM_CONFIG_RUNSTATE         (1 << 3)
+   #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL    (1 << 4)
+   #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND      (1 << 5)
   
   The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
   ioctl is available, for the guest to set its hypercall page.
@@@ -7637,6 -7749,14 +7750,14 @@@ The KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL fl
   of the type KVM_IRQ_ROUTING_XEN_EVTCHN are supported, with the priority
   field set to indicate 2 level event channel delivery.
   
+ The KVM_XEN_HVM_CONFIG_EVTCHN_SEND flag indicates that KVM supports
+ injecting event channel events directly into the guest with the
+ KVM_XEN_HVM_EVTCHN_SEND ioctl. It also indicates support for the
+ KVM_XEN_ATTR_TYPE_EVTCHN/XEN_VERSION HVM attributes and the
+ KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID/TIMER/UPCALL_VECTOR vCPU attributes.
+ related to event channel delivery, timers, and the XENVER_version
+ interception.
+ 
   8.31 KVM_CAP_PPC_MULTITCE
   -------------------------
   
diff --combined arch/x86/include/asm/kvm_host.h

index e0c0f0e,676705a..f1dfa06
--- 1/arch/x86/include/asm/kvm_host.h
--- 2/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@@ -502,7 -502,6 +502,7 @@@ struct kvm_pmc 
         bool intr;
   };
   
+ +#define KVM_PMC_MAX_FIXED     3
   struct kvm_pmu {
         unsigned nr_arch_gp_counters;
         unsigned nr_arch_fixed_counters;
@@@ -517,7 -516,7 +517,7 @@@
         u64 raw_event_mask;
         u8 version;
         struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
- -      struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
+ +      struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
         struct irq_work irq_work;
         DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
         DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
@@@ -607,16 -606,21 +607,21 @@@ struct kvm_vcpu_hv 
   struct kvm_vcpu_xen {
         u64 hypercall_rip;
         u32 current_runstate;
-       bool vcpu_info_set;
-       bool vcpu_time_info_set;
-       bool runstate_set;
-       struct gfn_to_hva_cache vcpu_info_cache;
-       struct gfn_to_hva_cache vcpu_time_info_cache;
-       struct gfn_to_hva_cache runstate_cache;
+       u8 upcall_vector;
+       struct gfn_to_pfn_cache vcpu_info_cache;
+       struct gfn_to_pfn_cache vcpu_time_info_cache;
+       struct gfn_to_pfn_cache runstate_cache;
         u64 last_steal;
         u64 runstate_entry_time;
         u64 runstate_times[4];
         unsigned long evtchn_pending_sel;
+       u32 vcpu_id; /* The Xen / ACPI vCPU ID */
+       u32 timer_virq;
+       u64 timer_expires; /* In guest epoch */
+       atomic_t timer_pending;
+       struct hrtimer timer;
+       int poll_evtchn;
+       struct timer_list poll_timer;
   };
   
   struct kvm_vcpu_arch {
@@@ -753,8 -757,7 +758,7 @@@
         gpa_t time;
         struct pvclock_vcpu_time_info hv_clock;
         unsigned int hw_tsc_khz;
-       struct gfn_to_hva_cache pv_time;
-       bool pv_time_enabled;
+       struct gfn_to_pfn_cache pv_time;
         /* set guest stopped flag in pvclock flags field */
         bool pvclock_set_guest_stopped_request;
   
@@@ -974,10 -977,12 +978,10 @@@ enum hv_tsc_page_status 
         HV_TSC_PAGE_UNSET = 0,
         /* TSC page MSR was written by the guest, update pending */
         HV_TSC_PAGE_GUEST_CHANGED,
- -      /* TSC page MSR was written by KVM userspace, update pending */
+ +      /* TSC page update was triggered from the host side */
         HV_TSC_PAGE_HOST_CHANGED,
         /* TSC page was properly set up and is currently active  */
         HV_TSC_PAGE_SET,
- -      /* TSC page is currently being updated and therefore is inactive */
- -      HV_TSC_PAGE_UPDATING,
         /* TSC page was set up with an inaccessible GPA */
         HV_TSC_PAGE_BROKEN,
   };
@@@ -1024,9 -1029,12 +1028,12 @@@ struct msr_bitmap_range 
   
   /* Xen emulation context */
   struct kvm_xen {
+       u32 xen_version;
         bool long_mode;
         u8 upcall_vector;
         struct gfn_to_pfn_cache shinfo_cache;
+       struct idr evtchn_ports;
+       unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
   };
   
   enum kvm_irqchip_mode {
@@@ -1050,7 -1058,6 +1057,7 @@@ enum kvm_apicv_inhibit 
         APICV_INHIBIT_REASON_X2APIC,
         APICV_INHIBIT_REASON_BLOCKIRQ,
         APICV_INHIBIT_REASON_ABSENT,
+ +      APICV_INHIBIT_REASON_SEV,
   };
   
   struct kvm_arch {
@@@ -1119,6 -1126,8 +1126,8 @@@
         u64 cur_tsc_generation;
         int nr_vcpus_matched_tsc;
   
+       u32 default_tsc_khz;
+ 
         seqcount_raw_spinlock_t pvclock_sc;
         bool use_master_clock;
         u64 master_kernel_ns;
@@@ -1498,6 -1507,11 +1507,11 @@@ struct kvm_x86_ops 
         int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
   
         void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
+ 
+       /*
+        * Returns vCPU specific APICv inhibit reasons
+        */
+       unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
   };
   
   struct kvm_x86_nested_ops {
@@@ -1584,9 -1598,8 +1598,9 @@@ static inline int kvm_arch_flush_remote
   #define kvm_arch_pmi_in_guest(vcpu) \
         ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
   
- -int kvm_mmu_module_init(void);
- -void kvm_mmu_module_exit(void);
+ +void kvm_mmu_x86_module_init(void);
+ +int kvm_mmu_vendor_module_init(void);
+ +void kvm_mmu_vendor_module_exit(void);
   
   void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
   int kvm_mmu_create(struct kvm_vcpu *vcpu);
@@@ -1799,6 -1812,7 +1813,7 @@@ gpa_t kvm_mmu_gva_to_gpa_system(struct 
                                 struct x86_exception *exception);
   
   bool kvm_apicv_activated(struct kvm *kvm);
+ bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
   void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
   void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
                                       enum kvm_apicv_inhibit reason, bool set);
@@@ -1988,6 -2002,7 +2003,7 @@@ int memslot_rmap_alloc(struct kvm_memor
          KVM_X86_QUIRK_CD_NW_CLEARED |          \
          KVM_X86_QUIRK_LAPIC_MMIO_HOLE |        \
          KVM_X86_QUIRK_OUT_7E_INC_RIP |         \
-        KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)
+        KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT |   \
+        KVM_X86_QUIRK_FIX_HYPERCALL_INSN)
   
   #endif /* _ASM_X86_KVM_HOST_H */
diff --combined arch/x86/kernel/kvm.c

index a22deb5,774d924..d0bb2b3
--- 1/arch/x86/kernel/kvm.c
--- 2/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@@ -752,6 -752,41 +752,42 @@@ static void kvm_crash_shutdown(struct p
   }
   #endif
   
- -"ret;"
+ #if defined(CONFIG_X86_32) || !defined(CONFIG_SMP)
+ bool __kvm_vcpu_is_preempted(long cpu);
+ 
+ __visible bool __kvm_vcpu_is_preempted(long cpu)
+ {
+       struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
+ 
+       return !!(src->preempted & KVM_VCPU_PREEMPTED);
+ }
+ PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
+ 
+ #else
+ 
+ #include <asm/asm-offsets.h>
+ 
+ extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
+ 
+ /*
+  * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
+  * restoring to/from the stack.
+  */
+ asm(
+ ".pushsection .text;"
+ ".global __raw_callee_save___kvm_vcpu_is_preempted;"
+ ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
+ "__raw_callee_save___kvm_vcpu_is_preempted:"
++ASM_ENDBR
+ "movq __per_cpu_offset(,%rdi,8), %rax;"
+ "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
+ "setne        %al;"
++ASM_RET
+ ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
+ ".popsection");
+ 
+ #endif
+ 
   static void __init kvm_guest_init(void)
   {
         int i;
@@@ -764,6 -799,9 +800,9 @@@
         if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
                 has_steal_clock = 1;
                 static_call_update(pv_steal_clock, kvm_steal_clock);
+ 
+               pv_ops.lock.vcpu_is_preempted =
+                       PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
         }
   
         if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@@ -1005,40 -1043,6 +1044,6 @@@ static void kvm_wait(u8 *ptr, u8 val
         }
   }
   
- #ifdef CONFIG_X86_32
- __visible bool __kvm_vcpu_is_preempted(long cpu)
- {
-       struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
- 
-       return !!(src->preempted & KVM_VCPU_PREEMPTED);
- }
- PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
- 
- #else
- 
- #include <asm/asm-offsets.h>
- 
- extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
- 
- /*
-  * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
-  * restoring to/from the stack.
-  */
- asm(
- ".pushsection .text;"
- ".global __raw_callee_save___kvm_vcpu_is_preempted;"
- ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
- "__raw_callee_save___kvm_vcpu_is_preempted:"
- ASM_ENDBR
- "movq __per_cpu_offset(,%rdi,8), %rax;"
- "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
- "setne        %al;"
- ASM_RET
- ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
- ".popsection");
- 
- #endif
- 
   /*
    * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
    */
@@@ -1082,10 -1086,6 +1087,6 @@@ void __init kvm_spinlock_init(void
         pv_ops.lock.wait = kvm_wait;
         pv_ops.lock.kick = kvm_kick_cpu;
   
-       if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
-               pv_ops.lock.vcpu_is_preempted =
-                       PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
-       }
         /*
          * When PV spinlock is enabled which is preferred over
          * virt_spin_lock(), virt_spin_lock_key's value is meaningless.
diff --combined arch/x86/kvm/mmu/mmu.c

index f9080ee,dbf46dd..c623019
--- 1/arch/x86/kvm/mmu/mmu.c
--- 2/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@@ -1866,17 -1866,14 +1866,14 @@@ static void kvm_mmu_commit_zap_page(str
           &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)])     \
                 if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
   
- static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                          struct list_head *invalid_list)
   {
         int ret = vcpu->arch.mmu->sync_page(vcpu, sp);
   
-       if (ret < 0) {
+       if (ret < 0)
                 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
-               return false;
-       }
- 
-       return !!ret;
+       return ret;
   }
   
   static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
@@@ -1998,7 -1995,7 +1995,7 @@@ static int mmu_sync_children(struct kvm
   
                 for_each_sp(pages, sp, parents, i) {
                         kvm_unlink_unsync_page(vcpu->kvm, sp);
-                       flush |= kvm_sync_page(vcpu, sp, &invalid_list);
+                       flush |= kvm_sync_page(vcpu, sp, &invalid_list) > 0;
                         mmu_pages_clear_parents(&parents);
                 }
                 if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
@@@ -2039,6 -2036,7 +2036,7 @@@ static struct kvm_mmu_page *kvm_mmu_get
         struct hlist_head *sp_list;
         unsigned quadrant;
         struct kvm_mmu_page *sp;
+       int ret;
         int collisions = 0;
         LIST_HEAD(invalid_list);
   
@@@ -2091,11 -2089,13 +2089,13 @@@
                          * If the sync fails, the page is zapped.  If so, break
                          * in order to rebuild it.
                          */
-                       if (!kvm_sync_page(vcpu, sp, &invalid_list))
+                       ret = kvm_sync_page(vcpu, sp, &invalid_list);
+                       if (ret < 0)
                                 break;
   
                         WARN_ON(!list_empty(&invalid_list));
-                       kvm_flush_remote_tlbs(vcpu->kvm);
+                       if (ret > 0)
+                               kvm_flush_remote_tlbs(vcpu->kvm);
                 }
   
                 __clear_sp_write_flooding_count(sp);
@@@ -6237,24 -6237,12 +6237,24 @@@ static int set_nx_huge_pages(const cha
         return 0;
   }
   
- -int kvm_mmu_module_init(void)
+ +/*
+ + * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as
+ + * its default value of -1 is technically undefined behavior for a boolean.
+ + */
+ +void kvm_mmu_x86_module_init(void)
   {
- -      int ret = -ENOMEM;
- -
         if (nx_huge_pages == -1)
                 __set_nx_huge_pages(get_nx_auto_mode());
+ +}
+ +
+ +/*
+ + * The bulk of the MMU initialization is deferred until the vendor module is
+ + * loaded as many of the masks/values may be modified by VMX or SVM, i.e. need
+ + * to be reset when a potentially different vendor module is loaded.
+ + */
+ +int kvm_mmu_vendor_module_init(void)
+ +{
+ +      int ret = -ENOMEM;
   
         /*
          * MMU roles use union aliasing which is, generally speaking, an
@@@ -6302,7 -6290,7 +6302,7 @@@ void kvm_mmu_destroy(struct kvm_vcpu *v
         mmu_free_memory_caches(vcpu);
   }
   
- -void kvm_mmu_module_exit(void)
+ +void kvm_mmu_vendor_module_exit(void)
   {
         mmu_destroy_caches();
         percpu_counter_destroy(&kvm_total_used_mmu_pages);
diff --combined arch/x86/kvm/svm/avic.c

index 4216195,6c81abe..9b85921
--- 1/arch/x86/kvm/svm/avic.c
--- 2/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@@ -165,9 -165,8 +165,8 @@@ free_avic
         return err;
   }
   
- void avic_init_vmcb(struct vcpu_svm *svm)
+ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
   {
-       struct vmcb *vmcb = svm->vmcb;
         struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
         phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
         phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
@@@ -357,6 -356,13 +356,13 @@@ int avic_incomplete_ipi_interception(st
         return 1;
   }
   
+ unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu)
+ {
+       if (is_guest_mode(vcpu))
+               return APICV_INHIBIT_REASON_NESTED;
+       return 0;
+ }
+ 
   static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
   {
         struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
@@@ -837,8 -843,7 +843,8 @@@ bool avic_check_apicv_inhibit_reasons(e
                           BIT(APICV_INHIBIT_REASON_IRQWIN) |
                           BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
                           BIT(APICV_INHIBIT_REASON_X2APIC) |
- -                        BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
+ +                        BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
+ +                        BIT(APICV_INHIBIT_REASON_SEV);
   
         return supported & BIT(reason);
   }
diff --combined arch/x86/kvm/x86.c

index 547ba00,7a066cf..10ad102
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -961,11 -961,13 +961,13 @@@ void kvm_load_guest_xsave_state(struct 
                         wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
         }
   
+ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
         if (static_cpu_has(X86_FEATURE_PKU) &&
-           (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
-            (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
-           vcpu->arch.pkru != vcpu->arch.host_pkru)
+           vcpu->arch.pkru != vcpu->arch.host_pkru &&
+           ((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
+            kvm_read_cr4_bits(vcpu, X86_CR4_PKE)))
                 write_pkru(vcpu->arch.pkru);
+ #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
   }
   EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
   
@@@ -974,13 -976,15 +976,15 @@@ void kvm_load_host_xsave_state(struct k
         if (vcpu->arch.guest_state_protected)
                 return;
   
+ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
         if (static_cpu_has(X86_FEATURE_PKU) &&
-           (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
-            (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
+           ((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
+            kvm_read_cr4_bits(vcpu, X86_CR4_PKE))) {
                 vcpu->arch.pkru = rdpkru();
                 if (vcpu->arch.pkru != vcpu->arch.host_pkru)
                         write_pkru(vcpu->arch.host_pkru);
         }
+ #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
   
         if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
   
@@@ -1771,7 -1775,7 +1775,7 @@@ static int __kvm_set_msr(struct kvm_vcp
                  * value, and that something deterministic happens if the guest
                  * invokes 64-bit SYSENTER.
                  */
- -              data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
+ +              data = __canonical_address(data, vcpu_virt_addr_bits(vcpu));
                 break;
         case MSR_TSC_AUX:
                 if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
@@@ -2249,14 -2253,13 +2253,13 @@@ static void kvm_write_system_time(struc
         kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
   
         /* we verify if the enable bit is set... */
-       vcpu->arch.pv_time_enabled = false;
-       if (!(system_time & 1))
-               return;
- 
-       if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
-                                      &vcpu->arch.pv_time, system_time & ~1ULL,
-                                      sizeof(struct pvclock_vcpu_time_info)))
-               vcpu->arch.pv_time_enabled = true;
+       if (system_time & 1) {
+               kvm_gfn_to_pfn_cache_init(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
+                                         KVM_HOST_USES_PFN, system_time & ~1ULL,
+                                         sizeof(struct pvclock_vcpu_time_info));
+       } else {
+               kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
+       }
   
         return;
   }
@@@ -2901,7 -2904,7 +2904,7 @@@ static void kvm_end_pvclock_update(stru
   
   static void kvm_update_masterclock(struct kvm *kvm)
   {
- -      kvm_hv_invalidate_tsc_page(kvm);
+ +      kvm_hv_request_tsc_page_update(kvm);
         kvm_start_pvclock_update(kvm);
         pvclock_update_vm_gtod_copy(kvm);
         kvm_end_pvclock_update(kvm);
@@@ -2961,63 -2964,55 +2964,55 @@@ u64 get_kvmclock_ns(struct kvm *kvm
         return data.clock;
   }
   
- static void kvm_setup_pvclock_page(struct kvm_vcpu *v,
-                                  struct gfn_to_hva_cache *cache,
-                                  unsigned int offset)
+ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
+                                   struct gfn_to_pfn_cache *gpc,
+                                   unsigned int offset)
   {
         struct kvm_vcpu_arch *vcpu = &v->arch;
-       struct pvclock_vcpu_time_info guest_hv_clock;
+       struct pvclock_vcpu_time_info *guest_hv_clock;
+       unsigned long flags;
   
-       if (unlikely(kvm_read_guest_offset_cached(v->kvm, cache,
-               &guest_hv_clock, offset, sizeof(guest_hv_clock))))
-               return;
+       read_lock_irqsave(&gpc->lock, flags);
+       while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
+                                          offset + sizeof(*guest_hv_clock))) {
+               read_unlock_irqrestore(&gpc->lock, flags);
+ 
+               if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
+                                                offset + sizeof(*guest_hv_clock)))
+                       return;
   
-       /* This VCPU is paused, but it's legal for a guest to read another
+               read_lock_irqsave(&gpc->lock, flags);
+       }
+ 
+       guest_hv_clock = (void *)(gpc->khva + offset);
+ 
+       /*
+        * This VCPU is paused, but it's legal for a guest to read another
          * VCPU's kvmclock, so we really have to follow the specification where
          * it says that version is odd if data is being modified, and even after
          * it is consistent.
-        *
-        * Version field updates must be kept separate.  This is because
-        * kvm_write_guest_cached might use a "rep movs" instruction, and
-        * writes within a string instruction are weakly ordered.  So there
-        * are three writes overall.
-        *
-        * As a small optimization, only write the version field in the first
-        * and third write.  The vcpu->pv_time cache is still valid, because the
-        * version field is the first in the struct.
          */
-       BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
- 
-       if (guest_hv_clock.version & 1)
-               ++guest_hv_clock.version;  /* first time write, random junk */
- 
-       vcpu->hv_clock.version = guest_hv_clock.version + 1;
-       kvm_write_guest_offset_cached(v->kvm, cache,
-                                     &vcpu->hv_clock, offset,
-                                     sizeof(vcpu->hv_clock.version));
   
+       guest_hv_clock->version = vcpu->hv_clock.version = (guest_hv_clock->version + 1) | 1;
         smp_wmb();
   
         /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
-       vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
+       vcpu->hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
   
         if (vcpu->pvclock_set_guest_stopped_request) {
                 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
                 vcpu->pvclock_set_guest_stopped_request = false;
         }
   
-       trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
+       memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
+       smp_wmb();
   
-       kvm_write_guest_offset_cached(v->kvm, cache,
-                                     &vcpu->hv_clock, offset,
-                                     sizeof(vcpu->hv_clock));
+       guest_hv_clock->version = ++vcpu->hv_clock.version;
   
-       smp_wmb();
+       mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
+       read_unlock_irqrestore(&gpc->lock, flags);
   
-       vcpu->hv_clock.version++;
-       kvm_write_guest_offset_cached(v->kvm, cache,
-                                    &vcpu->hv_clock, offset,
-                                    sizeof(vcpu->hv_clock.version));
+       trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
   }
   
   static int kvm_guest_time_update(struct kvm_vcpu *v)
@@@ -3106,14 -3101,15 +3101,14 @@@
   
         vcpu->hv_clock.flags = pvclock_flags;
   
-       if (vcpu->pv_time_enabled)
-               kvm_setup_pvclock_page(v, &vcpu->pv_time, 0);
-       if (vcpu->xen.vcpu_info_set)
-               kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_info_cache,
-                                      offsetof(struct compat_vcpu_info, time));
-       if (vcpu->xen.vcpu_time_info_set)
-               kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
+       if (vcpu->pv_time.active)
+               kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0);
+       if (vcpu->xen.vcpu_info_cache.active)
+               kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
+                                       offsetof(struct compat_vcpu_info, time));
+       if (vcpu->xen.vcpu_time_info_cache.active)
+               kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
- -      if (!v->vcpu_idx)
- -              kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
+ +      kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
         return 0;
   }
   
@@@ -3300,7 -3296,7 +3295,7 @@@ static int kvm_pv_enable_async_pf_int(s
   
   static void kvmclock_reset(struct kvm_vcpu *vcpu)
   {
-       vcpu->arch.pv_time_enabled = false;
+       kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
         vcpu->arch.time = 0;
   }
   
@@@ -4284,7 -4280,8 +4279,8 @@@ int kvm_vm_ioctl_check_extension(struc
                 r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
                     KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
                     KVM_XEN_HVM_CONFIG_SHARED_INFO |
-                   KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL;
+                   KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
+                   KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
                 if (sched_info_on())
                         r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
                 break;
@@@ -4331,6 -4328,7 +4327,7 @@@
                 r = boot_cpu_has(X86_FEATURE_XSAVE);
                 break;
         case KVM_CAP_TSC_CONTROL:
+       case KVM_CAP_VM_TSC_CONTROL:
                 r = kvm_has_tsc_control;
                 break;
         case KVM_CAP_X2APIC_API:
@@@ -5102,7 -5100,7 +5099,7 @@@ static int kvm_vcpu_ioctl_x86_set_xcrs(
    */
   static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
   {
-       if (!vcpu->arch.pv_time_enabled)
+       if (!vcpu->arch.pv_time.active)
                 return -EINVAL;
         vcpu->arch.pvclock_set_guest_stopped_request = true;
         kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@@ -6186,7 -6184,7 +6183,7 @@@ static int kvm_arch_suspend_notifier(st
   
         mutex_lock(&kvm->lock);
         kvm_for_each_vcpu(i, vcpu, kvm) {
-               if (!vcpu->arch.pv_time_enabled)
+               if (!vcpu->arch.pv_time.active)
                         continue;
   
                 ret = kvm_set_guest_paused(vcpu);
@@@ -6240,7 -6238,7 +6237,7 @@@ static int kvm_vm_ioctl_set_clock(struc
         if (data.flags & ~KVM_CLOCK_VALID_FLAGS)
                 return -EINVAL;
   
- -      kvm_hv_invalidate_tsc_page(kvm);
+ +      kvm_hv_request_tsc_page_update(kvm);
         kvm_start_pvclock_update(kvm);
         pvclock_update_vm_gtod_copy(kvm);
   
@@@ -6513,6 -6511,15 +6510,15 @@@ set_pit2_out
                 r = kvm_xen_hvm_set_attr(kvm, &xha);
                 break;
         }
+       case KVM_XEN_HVM_EVTCHN_SEND: {
+               struct kvm_irq_routing_xen_evtchn uxe;
+ 
+               r = -EFAULT;
+               if (copy_from_user(&uxe, argp, sizeof(uxe)))
+                       goto out;
+               r = kvm_xen_hvm_evtchn_send(kvm, &uxe);
+               break;
+       }
   #endif
         case KVM_SET_CLOCK:
                 r = kvm_vm_ioctl_set_clock(kvm, argp);
@@@ -6520,6 -6527,28 +6526,28 @@@
         case KVM_GET_CLOCK:
                 r = kvm_vm_ioctl_get_clock(kvm, argp);
                 break;
+       case KVM_SET_TSC_KHZ: {
+               u32 user_tsc_khz;
+ 
+               r = -EINVAL;
+               user_tsc_khz = (u32)arg;
+ 
+               if (kvm_has_tsc_control &&
+                   user_tsc_khz >= kvm_max_guest_tsc_khz)
+                       goto out;
+ 
+               if (user_tsc_khz == 0)
+                       user_tsc_khz = tsc_khz;
+ 
+               WRITE_ONCE(kvm->arch.default_tsc_khz, user_tsc_khz);
+               r = 0;
+ 
+               goto out;
+       }
+       case KVM_GET_TSC_KHZ: {
+               r = READ_ONCE(kvm->arch.default_tsc_khz);
+               goto out;
+       }
         case KVM_MEMORY_ENCRYPT_OP: {
                 r = -ENOTTY;
                 if (!kvm_x86_ops.mem_enc_ioctl)
@@@ -6584,7 -6613,7 +6612,7 @@@ static void kvm_init_msr_list(void
         u32 dummy[2];
         unsigned i;
   
- -      BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
+ +      BUILD_BUG_ON_MSG(KVM_PMC_MAX_FIXED != 3,
                          "Please update the fixed PMCs in msrs_to_saved_all[]");
   
         perf_get_x86_pmu_capability(&x86_pmu);
@@@ -8789,22 -8818,22 +8817,22 @@@ static int kvmclock_cpu_online(unsigne
   
   static void kvm_timer_init(void)
   {
-       max_tsc_khz = tsc_khz;
- 
         if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
- #ifdef CONFIG_CPU_FREQ
-               struct cpufreq_policy *policy;
-               int cpu;
- 
-               cpu = get_cpu();
-               policy = cpufreq_cpu_get(cpu);
-               if (policy) {
-                       if (policy->cpuinfo.max_freq)
-                               max_tsc_khz = policy->cpuinfo.max_freq;
-                       cpufreq_cpu_put(policy);
+               max_tsc_khz = tsc_khz;
+ 
+               if (IS_ENABLED(CONFIG_CPU_FREQ)) {
+                       struct cpufreq_policy *policy;
+                       int cpu;
+ 
+                       cpu = get_cpu();
+                       policy = cpufreq_cpu_get(cpu);
+                       if (policy) {
+                               if (policy->cpuinfo.max_freq)
+                                       max_tsc_khz = policy->cpuinfo.max_freq;
+                               cpufreq_cpu_put(policy);
+                       }
+                       put_cpu();
                 }
-               put_cpu();
- #endif
                 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
                                           CPUFREQ_TRANSITION_NOTIFIER);
         }
@@@ -8925,7 -8954,7 +8953,7 @@@ int kvm_arch_init(void *opaque
         }
         kvm_nr_uret_msrs = 0;
   
- -      r = kvm_mmu_module_init();
+ +      r = kvm_mmu_vendor_module_init();
         if (r)
                 goto out_free_percpu;
   
@@@ -8937,7 -8966,7 +8965,7 @@@
         }
   
         if (pi_inject_timer == -1)
- -              pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
+ +              pi_inject_timer = housekeeping_enabled(HK_TYPE_TIMER);
   #ifdef CONFIG_X86_64
         pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
   
@@@ -8973,7 -9002,7 +9001,7 @@@ void kvm_arch_exit(void
         cancel_work_sync(&pvclock_gtod_work);
   #endif
         kvm_x86_ops.hardware_enable = NULL;
- -      kvm_mmu_module_exit();
+ +      kvm_mmu_vendor_module_exit();
         free_percpu(user_return_msrs);
         kmem_cache_destroy(x86_emulator_cache);
   #ifdef CONFIG_KVM_XEN
@@@ -9089,6 -9118,14 +9117,14 @@@ bool kvm_apicv_activated(struct kvm *kv
   }
   EXPORT_SYMBOL_GPL(kvm_apicv_activated);
   
+ bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu)
+ {
+       ulong vm_reasons = READ_ONCE(vcpu->kvm->arch.apicv_inhibit_reasons);
+       ulong vcpu_reasons = static_call(kvm_x86_vcpu_get_apicv_inhibit_reasons)(vcpu);
+ 
+       return (vm_reasons | vcpu_reasons) == 0;
+ }
+ EXPORT_SYMBOL_GPL(kvm_vcpu_apicv_activated);
   
   static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
                                        enum kvm_apicv_inhibit reason, bool set)
@@@ -9266,6 -9303,17 +9302,17 @@@ static int emulator_fix_hypercall(struc
         char instruction[3];
         unsigned long rip = kvm_rip_read(vcpu);
   
+       /*
+        * If the quirk is disabled, synthesize a #UD and let the guest pick up
+        * the pieces.
+        */
+       if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_FIX_HYPERCALL_INSN)) {
+               ctxt->exception.error_code_valid = false;
+               ctxt->exception.vector = UD_VECTOR;
+               ctxt->have_exception = true;
+               return X86EMUL_PROPAGATE_FAULT;
+       }
+ 
         static_call(kvm_x86_patch_hypercall)(vcpu, instruction);
   
         return emulator_write_emulated(ctxt, rip, instruction, 3,
@@@ -9763,7 -9811,8 +9810,8 @@@ void kvm_vcpu_update_apicv(struct kvm_v
   
         down_read(&vcpu->kvm->arch.apicv_update_lock);
   
-       activate = kvm_apicv_activated(vcpu->kvm);
+       activate = kvm_vcpu_apicv_activated(vcpu);
+ 
         if (vcpu->arch.apicv_active == activate)
                 goto out;
   
@@@ -10164,7 -10213,7 +10212,7 @@@ static int vcpu_enter_guest(struct kvm_
                  * per-VM state, and responsing vCPUs must wait for the update
                  * to complete before servicing KVM_REQ_APICV_UPDATE.
                  */
-               WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu));
+               WARN_ON_ONCE(kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu));
   
                 exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu);
                 if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
@@@ -10362,6 -10411,9 +10410,9 @@@ static int vcpu_run(struct kvm_vcpu *vc
                         break;
   
                 kvm_clear_request(KVM_REQ_UNBLOCK, vcpu);
+               if (kvm_xen_has_pending_events(vcpu))
+                       kvm_xen_inject_pending_events(vcpu);
+ 
                 if (kvm_cpu_has_pending_timer(vcpu))
                         kvm_inject_pending_timer_irqs(vcpu);
   
@@@ -11247,9 -11299,10 +11298,10 @@@ int kvm_arch_vcpu_create(struct kvm_vcp
   
         vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
         vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
+       kvm_xen_init_vcpu(vcpu);
         kvm_vcpu_mtrr_init(vcpu);
         vcpu_load(vcpu);
-       kvm_set_tsc_khz(vcpu, max_tsc_khz);
+       kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz);
         kvm_vcpu_reset(vcpu, false);
         kvm_init_mmu(vcpu);
         vcpu_put(vcpu);
@@@ -11304,6 -11357,7 +11356,7 @@@ void kvm_arch_vcpu_destroy(struct kvm_v
         free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
         fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
   
+       kvm_xen_destroy_vcpu(vcpu);
         kvm_hv_vcpu_uninit(vcpu);
         kvm_pmu_destroy(vcpu);
         kfree(vcpu->arch.mce_banks);
@@@ -11696,6 -11750,7 +11749,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
         pvclock_update_vm_gtod_copy(kvm);
         raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
   
+       kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz;
         kvm->arch.guest_can_read_msr_platform_info = true;
         kvm->arch.enable_pmu = enable_pmu;
   
@@@ -12173,6 -12228,9 +12227,9 @@@ static inline bool kvm_vcpu_has_events(
             kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
                 return true;
   
+       if (kvm_xen_has_pending_events(vcpu))
+               return true;
+ 
         return false;
   }
   
@@@ -12985,19 -13043,3 +13042,19 @@@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexi
   EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
   EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
   EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
+ +
+ +static int __init kvm_x86_init(void)
+ +{
+ +      kvm_mmu_x86_module_init();
+ +      return 0;
+ +}
+ +module_init(kvm_x86_init);
+ +
+ +static void __exit kvm_x86_exit(void)
+ +{
+ +      /*
+ +       * If module_init() is implemented, module_exit() must also be
+ +       * implemented to allow module unload.
+ +       */
+ +}
+ +module_exit(kvm_x86_exit);
diff --combined tools/testing/selftests/kvm/.gitignore

index 0b0e440,1f1b6c9..5614006
--- 1/tools/testing/selftests/kvm/.gitignore
--- 2/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@@ -3,7 -3,6 +3,7 @@@
   /aarch64/debug-exceptions
   /aarch64/get-reg-list
   /aarch64/psci_cpu_on_test
+ +/aarch64/vcpu_width_config
   /aarch64/vgic_init
   /aarch64/vgic_irq
   /s390x/memop
@@@ -16,13 -15,13 +16,14 @@@
   /x86_64/debug_regs
   /x86_64/evmcs_test
   /x86_64/emulator_error_test
+ /x86_64/fix_hypercall_test
   /x86_64/get_msr_index_features
   /x86_64/kvm_clock_test
   /x86_64/kvm_pv_test
   /x86_64/hyperv_clock
   /x86_64/hyperv_cpuid
   /x86_64/hyperv_features
+ +/x86_64/hyperv_svm_test
   /x86_64/mmio_warning_test
   /x86_64/mmu_role_test
   /x86_64/platform_info_test
@@@ -34,7 -33,6 +35,7 @@@
   /x86_64/state_test
   /x86_64/svm_vmcall_test
   /x86_64/svm_int_ctl_test
+ +/x86_64/tsc_scaling_sync
   /x86_64/sync_regs_test
   /x86_64/tsc_msrs_test
   /x86_64/userspace_io_test
diff --combined tools/testing/selftests/kvm/Makefile

index 681b173,c9cdbd2..af582d1
--- 1/tools/testing/selftests/kvm/Makefile
--- 2/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@@ -48,6 -48,7 +48,7 @@@ TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpu
   TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
   TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
   TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test
+ TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
   TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
   TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
   TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
@@@ -65,6 -66,7 +66,7 @@@ TEST_GEN_PROGS_x86_64 += x86_64/state_t
   TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
   TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
   TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
+ TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
   TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
   TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
   TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
@@@ -106,7 -108,6 +108,7 @@@ TEST_GEN_PROGS_aarch64 += aarch64/arch_
   TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
   TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
   TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
+ +TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
   TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
   TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
   TEST_GEN_PROGS_aarch64 += demand_paging_test
@@@ -154,7 -155,7 +156,7 @@@ endi
   CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
         -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
         -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
- -      -I$(<D) -Iinclude/$(UNAME_M) -I.. $(EXTRA_CFLAGS)
+ +      -I$(<D) -Iinclude/$(UNAME_M) -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
   
   no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
           $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
author	Paolo Bonzini <pbonzini@redhat.com>
	Fri, 8 Apr 2022 16:43:40 +0000 (12:43 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 13 Apr 2022 17:37:17 +0000 (13:37 -0400)
		1	2
Documentation/virt/kvm/api.rst	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/kvm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/mmu/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm/avic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/.gitignore	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history