KVM: VMX: Configure list of user return MSRs at module init
authorSean Christopherson <seanjc@google.com>
Tue, 4 May 2021 17:17:27 +0000 (10:17 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 7 May 2021 10:06:17 +0000 (06:06 -0400)
Configure the list of user return MSRs that are actually supported at
module init instead of reprobing the list of possible MSRs every time a
vCPU is created.  Curating the list on a per-vCPU basis is pointless; KVM
is completely hosed if the set of supported MSRs changes after module init,
or if the set of MSRs differs per physical PCU.

The per-vCPU lists also increase complexity (see __vmx_find_uret_msr()) and
creates corner cases that _should_ be impossible, but theoretically exist
in KVM, e.g. advertising RDTSCP to userspace without actually being able to
virtualize RDTSCP if probing MSR_TSC_AUX fails.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h

index b304e37..887db1a 100644 (file)
@@ -461,7 +461,7 @@ static unsigned long host_idt_base;
  * support this emulation, IA32_STAR must always be included in
  * vmx_uret_msrs_list[], even in i386 builds.
  */
-static const u32 vmx_uret_msrs_list[] = {
+static u32 vmx_uret_msrs_list[] = {
 #ifdef CONFIG_X86_64
        MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
 #endif
@@ -469,6 +469,12 @@ static const u32 vmx_uret_msrs_list[] = {
        MSR_IA32_TSX_CTRL,
 };
 
+/*
+ * Number of user return MSRs that are actually supported in hardware.
+ * vmx_uret_msrs_list is modified when KVM is loaded to drop unsupported MSRs.
+ */
+static int vmx_nr_uret_msrs;
+
 #if IS_ENABLED(CONFIG_HYPERV)
 static bool __read_mostly enlightened_vmcs = true;
 module_param(enlightened_vmcs, bool, 0444);
@@ -700,9 +706,16 @@ static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 {
        int i;
 
-       for (i = 0; i < vmx->nr_uret_msrs; ++i)
+       /*
+        * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list,
+        * but is ordered differently.  The MSR is matched against the list of
+        * supported uret MSRs using "slot", but the index that is returned is
+        * the index into guest_uret_msrs.
+        */
+       for (i = 0; i < vmx_nr_uret_msrs; ++i) {
                if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
                        return i;
+       }
        return -1;
 }
 
@@ -6929,18 +6942,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
                        goto free_vpid;
        }
 
-       BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
-
-       for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
-               u32 index = vmx_uret_msrs_list[i];
-               int j = vmx->nr_uret_msrs;
+       for (i = 0; i < vmx_nr_uret_msrs; ++i) {
+               vmx->guest_uret_msrs[i].data = 0;
 
-               if (kvm_probe_user_return_msr(index))
-                       continue;
-
-               vmx->guest_uret_msrs[j].slot = i;
-               vmx->guest_uret_msrs[j].data = 0;
-               switch (index) {
+               switch (vmx_uret_msrs_list[i]) {
                case MSR_IA32_TSX_CTRL:
                        /*
                         * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
@@ -6954,15 +6959,14 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
                         * host so that TSX remains always disabled.
                         */
                        if (boot_cpu_has(X86_FEATURE_RTM))
-                               vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+                               vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
                        else
-                               vmx->guest_uret_msrs[j].mask = 0;
+                               vmx->guest_uret_msrs[i].mask = 0;
                        break;
                default:
-                       vmx->guest_uret_msrs[j].mask = -1ull;
+                       vmx->guest_uret_msrs[i].mask = -1ull;
                        break;
                }
-               ++vmx->nr_uret_msrs;
        }
 
        err = alloc_loaded_vmcs(&vmx->vmcs01);
@@ -7821,17 +7825,34 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
 };
 
+static __init void vmx_setup_user_return_msrs(void)
+{
+       u32 msr;
+       int i;
+
+       BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
+
+       for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
+               msr = vmx_uret_msrs_list[i];
+
+               if (kvm_probe_user_return_msr(msr))
+                       continue;
+
+               kvm_define_user_return_msr(vmx_nr_uret_msrs, msr);
+               vmx_uret_msrs_list[vmx_nr_uret_msrs++] = msr;
+       }
+}
+
 static __init int hardware_setup(void)
 {
        unsigned long host_bndcfgs;
        struct desc_ptr dt;
-       int r, i, ept_lpage_level;
+       int r, ept_lpage_level;
 
        store_idt(&dt);
        host_idt_base = dt.address;
 
-       for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
-               kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]);
+       vmx_setup_user_return_msrs();
 
        if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
                return -EIO;
index 008cb87..d71ed8b 100644 (file)
@@ -245,8 +245,16 @@ struct vcpu_vmx {
        u32                   idt_vectoring_info;
        ulong                 rflags;
 
+       /*
+        * User return MSRs are always emulated when enabled in the guest, but
+        * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
+        * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
+        * be loaded into hardware if those conditions aren't met.
+        * nr_active_uret_msrs tracks the number of MSRs that need to be loaded
+        * into hardware when running the guest.  guest_uret_msrs[] is resorted
+        * whenever the number of "active" uret MSRs is modified.
+        */
        struct vmx_uret_msr   guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
-       int                   nr_uret_msrs;
        int                   nr_active_uret_msrs;
        bool                  guest_uret_msrs_loaded;
 #ifdef CONFIG_X86_64