Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <torvalds@linux-foundation.org>

Sun, 14 Mar 2021 19:35:02 +0000 (12:35 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sun, 14 Mar 2021 19:35:02 +0000 (12:35 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sun, 14 Mar 2021 19:35:02 +0000 (12:35 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sun, 14 Mar 2021 19:35:02 +0000 (12:35 -0700)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst

index 1a2b521..38e327d 100644 (file)
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -182,6 +182,9 @@ is dependent on the CPU capability and the kernel configuration. The limit can
  be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
  ioctl() at run-time.
  
+Creation of the VM will fail if the requested IPA size (whether it is
+implicit or explicit) is unsupported on the host.
+
  Please note that configuring the IPA size does not affect the capability
  exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
  size of the address translated by the stage2 level (guest physical to
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h

index 22d933e..a7ab84f 100644 (file)
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -47,10 +47,10 @@
  #define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context           2
  #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa         3
  #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid             4
-#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid       5
+#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context          5
  #define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff          6
  #define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs                        7
-#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el2                8
+#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config         8
  #define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr              9
  #define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr             10
  #define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs               11
@@ -183,16 +183,16 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
  #define __bp_harden_hyp_vecs   CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
  
  extern void __kvm_flush_vm_context(void);
+extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
  extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
                                      int level);
  extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
  
  extern void __kvm_timer_set_cntvoff(u64 cntvoff);
  
  extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
  
-extern u64 __vgic_v3_get_ich_vtr_el2(void);
+extern u64 __vgic_v3_get_gic_config(void);
  extern u64 __vgic_v3_read_vmcr(void);
  extern void __vgic_v3_write_vmcr(u32 vmcr);
  extern void __vgic_v3_init_lrs(void);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h

index c045082..32ae676 100644 (file)
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -83,6 +83,11 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt);
  void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
  void __debug_switch_to_host(struct kvm_vcpu *vcpu);
  
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+#endif
+
  void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
  void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
  
@@ -97,7 +102,8 @@ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt);
  
  void __noreturn hyp_panic(void);
  #ifdef __KVM_NVHE_HYPERVISOR__
-void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
+void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
+                              u64 elr, u64 par);
  #endif
  
  #endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h

index 23f1a55..5aa9ed1 100644 (file)
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -101,6 +101,9 @@ KVM_NVHE_ALIAS(__stop___kvm_ex_table);
  /* Array containing bases of nVHE per-CPU memory regions. */
  KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
  
+/* PMU available static key */
+KVM_NVHE_ALIAS(kvm_arm_pmu_available);
+
  #endif /* CONFIG_KVM */
  
  #endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c

index fc4c95d..7f06ba7 100644 (file)
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -385,11 +385,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
         last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
  
         /*
+        * We guarantee that both TLBs and I-cache are private to each
+        * vcpu. If detecting that a vcpu from the same VM has
+        * previously run on the same physical CPU, call into the
+        * hypervisor code to nuke the relevant contexts.
+        *
          * We might get preempted before the vCPU actually runs, but
          * over-invalidation doesn't affect correctness.
          */
         if (*last_ran != vcpu->vcpu_id) {
-               kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu);
+               kvm_call_hyp(__kvm_flush_cpu_context, mmu);
                 *last_ran = vcpu->vcpu_id;
         }
  
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S

index b0afad7..e831d3d 100644 (file)
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -85,8 +85,10 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL)
  
         // If the hyp context is loaded, go straight to hyp_panic
         get_loaded_vcpu x0, x1
-       cbz     x0, hyp_panic
+       cbnz    x0, 1f
+       b       hyp_panic
  
+1:
         // The hyp context is saved so make sure it is restored to allow
         // hyp_panic to run at hyp and, subsequently, panic to run in the host.
         // This makes use of __guest_exit to avoid duplication but sets the
@@ -94,7 +96,7 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL)
         // current state is saved to the guest context but it will only be
         // accurate if the guest had been completely restored.
         adr_this_cpu x0, kvm_hyp_ctxt, x1
-       adr     x1, hyp_panic
+       adr_l   x1, hyp_panic
         str     x1, [x0, #CPU_XREG_OFFSET(30)]
  
         get_vcpu_ptr    x1, x0
@@ -146,7 +148,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
         // Now restore the hyp regs
         restore_callee_saved_regs x2
  
-       set_loaded_vcpu xzr, x1, x2
+       set_loaded_vcpu xzr, x2, x3
  
  alternative_if ARM64_HAS_RAS_EXTN
         // If we have the RAS extensions we can consume a pending error
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h

index 54f4860..6c1f51f 100644 (file)
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -90,15 +90,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
          * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
          * EL1 instead of being trapped to EL2.
          */
-       write_sysreg(0, pmselr_el0);
-       write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+       if (kvm_arm_support_pmu_v3()) {
+               write_sysreg(0, pmselr_el0);
+               write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+       }
         write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
  }
  
  static inline void __deactivate_traps_common(void)
  {
         write_sysreg(0, hstr_el2);
-       write_sysreg(0, pmuserenr_el0);
+       if (kvm_arm_support_pmu_v3())
+               write_sysreg(0, pmuserenr_el0);
  }
  
  static inline void ___activate_traps(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c

index 91a711a..f401724 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -58,16 +58,24 @@ static void __debug_restore_spe(u64 pmscr_el1)
         write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
  }
  
-void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
  {
         /* Disable and flush SPE data generation */
         __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
+}
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
         __debug_switch_to_guest_common(vcpu);
  }
  
-void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
  {
         __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+}
+
+void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
         __debug_switch_to_host_common(vcpu);
  }
  
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S

index 6585a7c..5d94584 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -71,7 +71,8 @@ SYM_FUNC_START(__host_enter)
  SYM_FUNC_END(__host_enter)
  
  /*
- * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
+ * void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
+ *                               u64 elr, u64 par);
   */
  SYM_FUNC_START(__hyp_do_panic)
         /* Prepare and exit to the host's panic funciton. */
@@ -82,9 +83,11 @@ SYM_FUNC_START(__hyp_do_panic)
         hyp_kimg_va lr, x6
         msr     elr_el2, lr
  
-       /* Set the panic format string. Use the, now free, LR as scratch. */
-       ldr     lr, =__hyp_panic_string
-       hyp_kimg_va lr, x6
+       mov     x29, x0
+
+       /* Load the format string into x0 and arguments into x1-7 */
+       ldr     x0, =__hyp_panic_string
+       hyp_kimg_va x0, x6
  
         /* Load the format arguments into x1-7. */
         mov     x6, x3
@@ -94,9 +97,7 @@ SYM_FUNC_START(__hyp_do_panic)
         mrs     x5, hpfar_el2
  
         /* Enter the host, conditionally restoring the host context. */
-       cmp     x0, xzr
-       mov     x0, lr
-       b.eq    __host_enter_without_restoring
+       cbz     x29, __host_enter_without_restoring
         b       __host_enter_for_panic
  SYM_FUNC_END(__hyp_do_panic)
  
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c

index f012f86..9363282 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -46,11 +46,11 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
         __kvm_tlb_flush_vmid(kern_hyp_va(mmu));
  }
  
-static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt)
+static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt)
  {
         DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
  
-       __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu));
+       __kvm_flush_cpu_context(kern_hyp_va(mmu));
  }
  
  static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt)
@@ -67,9 +67,9 @@ static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt)
         write_sysreg_el2(tmp, SYS_SCTLR);
  }
  
-static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt)
+static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
  {
-       cpu_reg(host_ctxt, 1) = __vgic_v3_get_ich_vtr_el2();
+       cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
  }
  
  static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt)
@@ -115,10 +115,10 @@ static const hcall_t host_hcall[] = {
         HANDLE_FUNC(__kvm_flush_vm_context),
         HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
         HANDLE_FUNC(__kvm_tlb_flush_vmid),
-       HANDLE_FUNC(__kvm_tlb_flush_local_vmid),
+       HANDLE_FUNC(__kvm_flush_cpu_context),
         HANDLE_FUNC(__kvm_timer_set_cntvoff),
         HANDLE_FUNC(__kvm_enable_ssbs),
-       HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2),
+       HANDLE_FUNC(__vgic_v3_get_gic_config),
         HANDLE_FUNC(__vgic_v3_read_vmcr),
         HANDLE_FUNC(__vgic_v3_write_vmcr),
         HANDLE_FUNC(__vgic_v3_init_lrs),
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c

index f3d0e9e..68ab6b4 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -192,6 +192,14 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
         pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
  
         __sysreg_save_state_nvhe(host_ctxt);
+       /*
+        * We must flush and disable the SPE buffer for nVHE, as
+        * the translation regime(EL1&0) is going to be loaded with
+        * that of the guest. And we must do this before we change the
+        * translation regime to EL2 (via MDCR_EL2_E2PB == 0) and
+        * before we load guest Stage1.
+        */
+       __debug_save_host_buffers_nvhe(vcpu);
  
         __adjust_pc(vcpu);
  
@@ -234,11 +242,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
         if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
                 __fpsimd_save_fpexc32(vcpu);
  
+       __debug_switch_to_host(vcpu);
         /*
          * This must come after restoring the host sysregs, since a non-VHE
          * system may enable SPE here and make use of the TTBRs.
          */
-       __debug_switch_to_host(vcpu);
+       __debug_restore_host_buffers_nvhe(vcpu);
  
         if (pmu_switch_needed)
                 __pmu_switch_to_host(host_ctxt);
@@ -257,7 +266,6 @@ void __noreturn hyp_panic(void)
         u64 spsr = read_sysreg_el2(SYS_SPSR);
         u64 elr = read_sysreg_el2(SYS_ELR);
         u64 par = read_sysreg_par();
-       bool restore_host = true;
         struct kvm_cpu_context *host_ctxt;
         struct kvm_vcpu *vcpu;
  
@@ -271,7 +279,7 @@ void __noreturn hyp_panic(void)
                 __sysreg_restore_state_nvhe(host_ctxt);
         }
  
-       __hyp_do_panic(restore_host, spsr, elr, par);
+       __hyp_do_panic(host_ctxt, spsr, elr, par);
         unreachable();
  }
  
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c

index fbde89a..229b067 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -123,7 +123,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
         __tlb_switch_to_host(&cxt);
  }
  
-void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
  {
         struct tlb_inv_context cxt;
  
@@ -131,6 +131,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
         __tlb_switch_to_guest(mmu, &cxt);
  
         __tlbi(vmalle1);
+       asm volatile("ic iallu");
         dsb(nsh);
         isb();
  
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c

index 4d177ce..926fc07 100644 (file)
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -223,6 +223,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
                 goto out;
  
         if (!table) {
+               data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
                 data->addr += kvm_granule_size(level);
                 goto out;
         }
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c

index 80406f4..ee3682b 100644 (file)
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -405,9 +405,45 @@ void __vgic_v3_init_lrs(void)
                 __gic_v3_set_lr(0, i);
  }
  
-u64 __vgic_v3_get_ich_vtr_el2(void)
+/*
+ * Return the GIC CPU configuration:
+ * - [31:0]  ICH_VTR_EL2
+ * - [62:32] RES0
+ * - [63]    MMIO (GICv2) capable
+ */
+u64 __vgic_v3_get_gic_config(void)
  {
-       return read_gicreg(ICH_VTR_EL2);
+       u64 val, sre = read_gicreg(ICC_SRE_EL1);
+       unsigned long flags = 0;
+
+       /*
+        * To check whether we have a MMIO-based (GICv2 compatible)
+        * CPU interface, we need to disable the system register
+        * view. To do that safely, we have to prevent any interrupt
+        * from firing (which would be deadly).
+        *
+        * Note that this only makes sense on VHE, as interrupts are
+        * already masked for nVHE as part of the exception entry to
+        * EL2.
+        */
+       if (has_vhe())
+               flags = local_daif_save();
+
+       write_gicreg(0, ICC_SRE_EL1);
+       isb();
+
+       val = read_gicreg(ICC_SRE_EL1);
+
+       write_gicreg(sre, ICC_SRE_EL1);
+       isb();
+
+       if (has_vhe())
+               local_daif_restore(flags);
+
+       val  = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63);
+       val |= read_gicreg(ICH_VTR_EL2);
+
+       return val;
  }
  
  u64 __vgic_v3_read_vmcr(void)
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c

index fd78959..66f1734 100644 (file)
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -127,7 +127,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
         __tlb_switch_to_host(&cxt);
  }
  
-void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
  {
         struct tlb_inv_context cxt;
  
@@ -135,6 +135,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
         __tlb_switch_to_guest(mmu, &cxt);
  
         __tlbi(vmalle1);
+       asm volatile("ic iallu");
         dsb(nsh);
         isb();
  
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c

index 77cb2d2..8711894 100644 (file)
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1312,8 +1312,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
          * Prevent userspace from creating a memory region outside of the IPA
          * space addressable by the KVM guest IPA space.
          */
-       if (memslot->base_gfn + memslot->npages >=
-           (kvm_phys_size(kvm) >> PAGE_SHIFT))
+       if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
                 return -EFAULT;
  
         mmap_read_lock(current->mm);
diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c

index d45b8b9..7391643 100644 (file)
--- a/arch/arm64/kvm/perf.c
+++ b/arch/arm64/kvm/perf.c
@@ -11,6 +11,8 @@
  
  #include <asm/kvm_emulate.h>
  
+DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
+
  static int kvm_is_in_guest(void)
  {
          return kvm_get_running_vcpu() != NULL;
@@ -48,6 +50,14 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
  
  int kvm_perf_init(void)
  {
+       /*
+        * Check if HW_PERF_EVENTS are supported by checking the number of
+        * hardware performance counters. This could ensure the presence of
+        * a physical PMU and CONFIG_PERF_EVENT is selected.
+        */
+       if (IS_ENABLED(CONFIG_ARM_PMU) && perf_num_counters() > 0)
+               static_branch_enable(&kvm_arm_pmu_available);
+
         return perf_register_guest_info_callbacks(&kvm_guest_cbs);
  }
  
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c

index e9ec08b..e32c6e1 100644 (file)
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -823,16 +823,6 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
         return val & mask;
  }
  
-bool kvm_arm_support_pmu_v3(void)
-{
-       /*
-        * Check if HW_PERF_EVENTS are supported by checking the number of
-        * hardware performance counters. This could ensure the presence of
-        * a physical PMU and CONFIG_PERF_EVENT is selected.
-        */
-       return (perf_num_counters() > 0);
-}
-
  int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
  {
         if (!kvm_vcpu_has_pmu(vcpu))
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c

index e81c7ec..bd354cd 100644 (file)
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -326,10 +326,9 @@ int kvm_set_ipa_limit(void)
         }
  
         kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange);
-       WARN(kvm_ipa_limit < KVM_PHYS_SHIFT,
-            "KVM IPA Size Limit (%d bits) is smaller than default size\n",
-            kvm_ipa_limit);
-       kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit);
+       kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit,
+                ((kvm_ipa_limit < KVM_PHYS_SHIFT) ?
+                 " (Reduced IPA size, limited VM/VMM compatibility)" : ""));
  
         return 0;
  }
@@ -358,6 +357,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
                         return -EINVAL;
         } else {
                 phys_shift = KVM_PHYS_SHIFT;
+               if (phys_shift > kvm_ipa_limit) {
+                       pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
+                                    current->comm);
+                       return -EINVAL;
+               }
         }
  
         mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c

index 52915b3..6f53092 100644 (file)
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -574,9 +574,13 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
   */
  int vgic_v3_probe(const struct gic_kvm_info *info)
  {
-       u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2);
+       u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
+       bool has_v2;
         int ret;
  
+       has_v2 = ich_vtr_el2 >> 63;
+       ich_vtr_el2 = (u32)ich_vtr_el2;
+
         /*
          * The ListRegs field is 5 bits, but there is an architectural
          * maximum of 16 list registers. Just ignore bit 4...
@@ -594,13 +598,15 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
                          gicv4_enable ? "en" : "dis");
         }
  
+       kvm_vgic_global_state.vcpu_base = 0;
+
         if (!info->vcpu.start) {
                 kvm_info("GICv3: no GICV resource entry\n");
-               kvm_vgic_global_state.vcpu_base = 0;
+       } else if (!has_v2) {
+               pr_warn(FW_BUG "CPU interface incapable of MMIO access\n");
         } else if (!PAGE_ALIGNED(info->vcpu.start)) {
                 pr_warn("GICV physical address 0x%llx not page aligned\n",
                         (unsigned long long)info->vcpu.start);
-               kvm_vgic_global_state.vcpu_base = 0;
         } else {
                 kvm_vgic_global_state.vcpu_base = info->vcpu.start;
                 kvm_vgic_global_state.can_emulate_gicv2 = true;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 877a402..9bc091e 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -963,7 +963,7 @@ struct kvm_arch {
         struct kvm_pit *vpit;
         atomic_t vapics_in_nmi_mode;
         struct mutex apic_map_lock;
-       struct kvm_apic_map *apic_map;
+       struct kvm_apic_map __rcu *apic_map;
         atomic_t apic_map_dirty;
  
         bool apic_access_page_done;
@@ -1036,7 +1036,7 @@ struct kvm_arch {
  
         bool bus_lock_detection_enabled;
  
-       struct kvm_pmu_event_filter *pmu_event_filter;
+       struct kvm_pmu_event_filter __rcu *pmu_event_filter;
         struct task_struct *nx_lpage_recovery_thread;
  
  #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c

index aa59374..1fc0962 100644 (file)
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -268,21 +268,20 @@ static void __init kvmclock_init_mem(void)
  
  static int __init kvm_setup_vsyscall_timeinfo(void)
  {
-#ifdef CONFIG_X86_64
-       u8 flags;
+       kvmclock_init_mem();
  
-       if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall)
-               return 0;
+#ifdef CONFIG_X86_64
+       if (per_cpu(hv_clock_per_cpu, 0) && kvmclock_vsyscall) {
+               u8 flags;
  
-       flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
-       if (!(flags & PVCLOCK_TSC_STABLE_BIT))
-               return 0;
+               flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
+               if (!(flags & PVCLOCK_TSC_STABLE_BIT))
+                       return 0;
  
-       kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
+               kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
+       }
  #endif
  
-       kvmclock_init_mem();
-
         return 0;
  }
  early_initcall(kvm_setup_vsyscall_timeinfo);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c

index 45d40bf..cc369b9 100644 (file)
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1642,7 +1642,16 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
         }
  
         if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
-               kvm_wait_lapic_expire(vcpu);
+               /*
+                * Ensure the guest's timer has truly expired before posting an
+                * interrupt.  Open code the relevant checks to avoid querying
+                * lapic_timer_int_injected(), which will be false since the
+                * interrupt isn't yet injected.  Waiting until after injecting
+                * is not an option since that won't help a posted interrupt.
+                */
+               if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
+                   vcpu->arch.apic->lapic_timer.timer_advance_ns)
+                       __kvm_wait_lapic_expire(vcpu);
                 kvm_apic_inject_pending_timer_irqs(apic);
                 return;
         }
@@ -2595,6 +2604,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
  
         apic_update_ppr(apic);
         hrtimer_cancel(&apic->lapic_timer.timer);
+       apic->lapic_timer.expired_tscdeadline = 0;
         apic_update_lvtt(apic);
         apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
         update_divide_count(apic);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c

index c926c6b..d789150 100644 (file)
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -337,7 +337,18 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
                                 cpu_relax();
                         }
                 } else {
+                       /*
+                        * If the SPTE is not MMU-present, there is no backing
+                        * page associated with the SPTE and so no side effects
+                        * that need to be recorded, and exclusive ownership of
+                        * mmu_lock ensures the SPTE can't be made present.
+                        * Note, zapping MMIO SPTEs is also unnecessary as they
+                        * are guarded by the memslots generation, not by being
+                        * unreachable.
+                        */
                         old_child_spte = READ_ONCE(*sptep);
+                       if (!is_shadow_present_pte(old_child_spte))
+                               continue;
  
                         /*
                          * Marking the SPTE as a removed SPTE is not
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

index baee91c..58a45bb 100644 (file)
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -115,13 +115,6 @@ static const struct svm_direct_access_msrs {
         { .index = MSR_INVALID,                         .always = false },
  };
  
-/* enable NPT for AMD64 and X86 with PAE */
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-bool npt_enabled = true;
-#else
-bool npt_enabled;
-#endif
-
  /*
   * These 2 parameters are used to config the controls for Pause-Loop Exiting:
   * pause_filter_count: On processors that support Pause filtering(indicated
@@ -170,9 +163,12 @@ module_param(pause_filter_count_shrink, ushort, 0444);
  static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
  module_param(pause_filter_count_max, ushort, 0444);
  
-/* allow nested paging (virtualized MMU) for all guests */
-static int npt = true;
-module_param(npt, int, S_IRUGO);
+/*
+ * Use nested page tables by default.  Note, NPT may get forced off by
+ * svm_hardware_setup() if it's unsupported by hardware or the host kernel.
+ */
+bool npt_enabled = true;
+module_param_named(npt, npt_enabled, bool, 0444);
  
  /* allow nested virtualization in KVM/SVM */
  static int nested = true;
@@ -988,10 +984,15 @@ static __init int svm_hardware_setup(void)
                         goto err;
         }
  
-       if (!boot_cpu_has(X86_FEATURE_NPT))
+       /*
+        * KVM's MMU doesn't support using 2-level paging for itself, and thus
+        * NPT isn't supported if the host is using 2-level paging since host
+        * CR4 is unchanged on VMRUN.
+        */
+       if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
                 npt_enabled = false;
  
-       if (npt_enabled && !npt)
+       if (!boot_cpu_has(X86_FEATURE_NPT))
                 npt_enabled = false;
  
         kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 2a20ce6..47e021b 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10601,7 +10601,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
                         return (void __user *)hva;
         } else {
                 if (!slot || !slot->npages)
-                       return 0;
+                       return NULL;
  
                 old_npages = slot->npages;
                 hva = slot->userspace_addr;
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h

index 8dcb3e1..6fd3cda 100644 (file)
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -13,6 +13,13 @@
  #define ARMV8_PMU_CYCLE_IDX            (ARMV8_PMU_MAX_COUNTERS - 1)
  #define ARMV8_PMU_MAX_COUNTER_PAIRS    ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
  
+DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
+
+static __always_inline bool kvm_arm_support_pmu_v3(void)
+{
+       return static_branch_likely(&kvm_arm_pmu_available);
+}
+
  #ifdef CONFIG_HW_PERF_EVENTS
  
  struct kvm_pmc {
@@ -47,7 +54,6 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val);
  void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val);
  void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
                                     u64 select_idx);
-bool kvm_arm_support_pmu_v3(void);
  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu,
                             struct kvm_device_attr *attr);
  int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
@@ -87,7 +93,6 @@ static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {}
  static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {}
  static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu,
                                                   u64 data, u64 select_idx) {}
-static inline bool kvm_arm_support_pmu_v3(void) { return false; }
  static inline int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu,
                                           struct kvm_device_attr *attr)
  {
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 14 Mar 2021 19:35:02 +0000 (12:35 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 14 Mar 2021 19:35:02 +0000 (12:35 -0700)
Documentation/virt/kvm/api.rst		patch \| blob \| history
arch/arm64/include/asm/kvm_asm.h		patch \| blob \| history
arch/arm64/include/asm/kvm_hyp.h		patch \| blob \| history
arch/arm64/kernel/image-vars.h		patch \| blob \| history
arch/arm64/kvm/arm.c		patch \| blob \| history
arch/arm64/kvm/hyp/entry.S		patch \| blob \| history
arch/arm64/kvm/hyp/include/hyp/switch.h		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/debug-sr.c		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/host.S		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/hyp-main.c		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/switch.c		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/tlb.c		patch \| blob \| history
arch/arm64/kvm/hyp/pgtable.c		patch \| blob \| history
arch/arm64/kvm/hyp/vgic-v3-sr.c		patch \| blob \| history
arch/arm64/kvm/hyp/vhe/tlb.c		patch \| blob \| history
arch/arm64/kvm/mmu.c		patch \| blob \| history
arch/arm64/kvm/perf.c		patch \| blob \| history
arch/arm64/kvm/pmu-emul.c		patch \| blob \| history
arch/arm64/kvm/reset.c		patch \| blob \| history
arch/arm64/kvm/vgic/vgic-v3.c		patch \| blob \| history
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kernel/kvmclock.c		patch \| blob \| history
arch/x86/kvm/lapic.c		patch \| blob \| history
arch/x86/kvm/mmu/tdp_mmu.c		patch \| blob \| history
arch/x86/kvm/svm/svm.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
include/kvm/arm_pmu.h		patch \| blob \| history