Merge tag 'kvm-x86-vmx-6.5' of https://github.com/kvm-x86/linux into HEAD

author Paolo Bonzini <pbonzini@redhat.com>

Sat, 1 Jul 2023 11:20:04 +0000 (07:20 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Sat, 1 Jul 2023 11:20:04 +0000 (07:20 -0400)
author Paolo Bonzini <pbonzini@redhat.com>
Sat, 1 Jul 2023 11:20:04 +0000 (07:20 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Sat, 1 Jul 2023 11:20:04 +0000 (07:20 -0400)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index 03ff06c..ec169f5 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1603,6 +1603,10 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
         if (tdp_mmu_enabled)
                 flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
  
+       if (kvm_x86_ops.set_apic_access_page_addr &&
+           range->slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT)
+               kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+
         return flush;
  }
  
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h

index 45162c1..d0abee3 100644 (file)
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -152,8 +152,8 @@ static inline bool cpu_has_vmx_ept(void)
  
  static inline bool vmx_umip_emulated(void)
  {
-       return vmcs_config.cpu_based_2nd_exec_ctrl &
-               SECONDARY_EXEC_DESC;
+       return !boot_cpu_has(X86_FEATURE_UMIP) &&
+              (vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_DESC);
  }
  
  static inline bool cpu_has_vmx_rdtscp(void)
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c

index ba2ed6d..516391c 100644 (file)
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2328,8 +2328,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
                  * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4()
                  * will not have to rewrite the controls just for this bit.
                  */
-               if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() &&
-                   (vmcs12->guest_cr4 & X86_CR4_UMIP))
+               if (vmx_umip_emulated() && (vmcs12->guest_cr4 & X86_CR4_UMIP))
                         exec_control |= SECONDARY_EXEC_DESC;
  
                 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c

index 30ec9cc..80c769c 100644 (file)
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -385,8 +385,6 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 }
                 break;
         case MSR_IA32_DS_AREA:
-               if (msr_info->host_initiated && data && !guest_cpuid_has(vcpu, X86_FEATURE_DS))
-                       return 1;
                 if (is_noncanonical_address(data, vcpu))
                         return 1;
  
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c

index 2261b68..3e822e5 100644 (file)
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -357,11 +357,12 @@ static int handle_encls_einit(struct kvm_vcpu *vcpu)
  
  static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
  {
-       if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX))
-               return false;
-
+       /*
+        * ENCLS generates a #UD if SGX1 isn't supported, i.e. this point will
+        * be reached if and only if the SGX1 leafs are enabled.
+        */
         if (leaf >= ECREATE && leaf <= ETRACK)
-               return guest_cpuid_has(vcpu, X86_FEATURE_SGX1);
+               return true;
  
         if (leaf >= EAUG && leaf <= EMODT)
                 return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
@@ -380,9 +381,11 @@ int handle_encls(struct kvm_vcpu *vcpu)
  {
         u32 leaf = (u32)kvm_rax_read(vcpu);
  
-       if (!encls_leaf_enabled_in_guest(vcpu, leaf)) {
+       if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
+           !guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
                 kvm_queue_exception(vcpu, UD_VECTOR);
-       } else if (!sgx_enabled_in_guest_bios(vcpu)) {
+       } else if (!encls_leaf_enabled_in_guest(vcpu, leaf) ||
+                  !sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) {
                 kvm_inject_gp(vcpu, 0);
         } else {
                 if (leaf == ECREATE)
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S

index 631fd7d..07e927d 100644 (file)
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -187,7 +187,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
         _ASM_EXTABLE(.Lvmresume, .Lfixup)
         _ASM_EXTABLE(.Lvmlaunch, .Lfixup)
  
-SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
+SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
  
         /* Restore unwind state from before the VMRESUME/VMLAUNCH. */
         UNWIND_HINT_RESTORE
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index 2d9d155..0ecf4be 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3384,15 +3384,15 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  
  void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  {
-       unsigned long old_cr4 = vcpu->arch.cr4;
+       unsigned long old_cr4 = kvm_read_cr4(vcpu);
         struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned long hw_cr4;
+
         /*
          * Pass through host's Machine Check Enable value to hw_cr4, which
          * is in force while we are in guest mode.  Do not let guests control
          * this bit, even if host CR4.MCE == 0.
          */
-       unsigned long hw_cr4;
-
         hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
         if (is_unrestricted_guest(vcpu))
                 hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
@@ -3401,7 +3401,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
         else
                 hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
  
-       if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
+       if (vmx_umip_emulated()) {
                 if (cr4 & X86_CR4_UMIP) {
                         secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
                         hw_cr4 &= ~X86_CR4_UMIP;
@@ -5399,7 +5399,13 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
  
  static int handle_desc(struct kvm_vcpu *vcpu)
  {
-       WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
+       /*
+        * UMIP emulation relies on intercepting writes to CR4.UMIP, i.e. this
+        * and other code needs to be updated if UMIP can be guest owned.
+        */
+       BUILD_BUG_ON(KVM_POSSIBLE_CR4_GUEST_BITS & X86_CR4_UMIP);
+
+       WARN_ON_ONCE(!kvm_is_cr4_bit_set(vcpu, X86_CR4_UMIP));
         return kvm_emulate_instruction(vcpu, 0);
  }
  
@@ -6705,7 +6711,12 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
  
  static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
  {
-       struct page *page;
+       const gfn_t gfn = APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT;
+       struct kvm *kvm = vcpu->kvm;
+       struct kvm_memslots *slots = kvm_memslots(kvm);
+       struct kvm_memory_slot *slot;
+       unsigned long mmu_seq;
+       kvm_pfn_t pfn;
  
         /* Defer reload until vmcs01 is the current VMCS. */
         if (is_guest_mode(vcpu)) {
@@ -6717,18 +6728,53 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
             SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
                 return;
  
-       page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
-       if (is_error_page(page))
+       /*
+        * Grab the memslot so that the hva lookup for the mmu_notifier retry
+        * is guaranteed to use the same memslot as the pfn lookup, i.e. rely
+        * on the pfn lookup's validation of the memslot to ensure a valid hva
+        * is used for the retry check.
+        */
+       slot = id_to_memslot(slots, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT);
+       if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
                 return;
  
-       vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page));
+       /*
+        * Ensure that the mmu_notifier sequence count is read before KVM
+        * retrieves the pfn from the primary MMU.  Note, the memslot is
+        * protected by SRCU, not the mmu_notifier.  Pairs with the smp_wmb()
+        * in kvm_mmu_invalidate_end().
+        */
+       mmu_seq = kvm->mmu_invalidate_seq;
+       smp_rmb();
+
+       /*
+        * No need to retry if the memslot does not exist or is invalid.  KVM
+        * controls the APIC-access page memslot, and only deletes the memslot
+        * if APICv is permanently inhibited, i.e. the memslot won't reappear.
+        */
+       pfn = gfn_to_pfn_memslot(slot, gfn);
+       if (is_error_noslot_pfn(pfn))
+               return;
+
+       read_lock(&vcpu->kvm->mmu_lock);
+       if (mmu_invalidate_retry_hva(kvm, mmu_seq,
+                                    gfn_to_hva_memslot(slot, gfn))) {
+               kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
+               read_unlock(&vcpu->kvm->mmu_lock);
+               goto out;
+       }
+
+       vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn));
+       read_unlock(&vcpu->kvm->mmu_lock);
+
         vmx_flush_tlb_current(vcpu);
  
+out:
         /*
          * Do not pin apic access page in memory, the MMU notifier
          * will call us again if it is migrated or swapped out.
          */
-       put_page(page);
+       kvm_release_pfn_clean(pfn);
  }
  
  static void vmx_hwapic_isr_update(int max_isr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 7d6e044..8bca4d2 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10449,20 +10449,6 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
                 vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
  }
  
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-                                           unsigned long start, unsigned long end)
-{
-       unsigned long apic_address;
-
-       /*
-        * The physical address of apic access page is stored in the VMCS.
-        * Update it when it becomes invalid.
-        */
-       apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
-       if (start <= apic_address && apic_address < end)
-               kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
-}
-
  void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
  {
         static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index 84ba21c..9d3ac77 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2239,9 +2239,6 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
  }
  #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
  
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-                                           unsigned long start, unsigned long end);
-
  void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
  
  #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index ab8c8eb..b838c8f 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -154,11 +154,6 @@ static unsigned long long kvm_active_vms;
  
  static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask);
  
-__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-                                                  unsigned long start, unsigned long end)
-{
-}
-
  __weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
  {
  }
@@ -521,18 +516,6 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
         return container_of(mn, struct kvm, mmu_notifier);
  }
  
-static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
-                                             struct mm_struct *mm,
-                                             unsigned long start, unsigned long end)
-{
-       struct kvm *kvm = mmu_notifier_to_kvm(mn);
-       int idx;
-
-       idx = srcu_read_lock(&kvm->srcu);
-       kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
-       srcu_read_unlock(&kvm->srcu, idx);
-}
-
  typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
  
  typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
@@ -910,7 +893,6 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
  }
  
  static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
-       .invalidate_range       = kvm_mmu_notifier_invalidate_range,
         .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
         .invalidate_range_end   = kvm_mmu_notifier_invalidate_range_end,
         .clear_flush_young      = kvm_mmu_notifier_clear_flush_young,
author	Paolo Bonzini <pbonzini@redhat.com>
	Sat, 1 Jul 2023 11:20:04 +0000 (07:20 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Sat, 1 Jul 2023 11:20:04 +0000 (07:20 -0400)
arch/x86/kvm/mmu/mmu.c		patch \| blob \| history
arch/x86/kvm/vmx/capabilities.h		patch \| blob \| history
arch/x86/kvm/vmx/nested.c		patch \| blob \| history
arch/x86/kvm/vmx/pmu_intel.c		patch \| blob \| history
arch/x86/kvm/vmx/sgx.c		patch \| blob \| history
arch/x86/kvm/vmx/vmenter.S		patch \| blob \| history
arch/x86/kvm/vmx/vmx.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
include/linux/kvm_host.h		patch \| blob \| history
virt/kvm/kvm_main.c		patch \| blob \| history