kvm: x86: only provide PV features if enabled in guest's CPUID
authorOliver Upton <oupton@google.com>
Tue, 18 Aug 2020 15:24:28 +0000 (15:24 +0000)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 21 Oct 2020 21:36:32 +0000 (17:36 -0400)
KVM unconditionally provides PV features to the guest, regardless of the
configured CPUID. An unwitting guest that doesn't check
KVM_CPUID_FEATURES before use could access paravirt features that
userspace did not intend to provide. Fix this by checking the guest's
CPUID before performing any paravirtual operations.

Introduce a capability, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, to gate the
aforementioned enforcement. Migrating a VM from a host w/o this patch to
a host with this patch could silently change the ABI exposed to the
guest, warranting that we default to the old behavior and opt-in for
the new one.

Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Change-Id: I202a0926f65035b872bfe8ad15307c026de59a98
Message-Id: <20200818152429.1923996-4-oupton@google.com>
Reviewed-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Documentation/virt/kvm/api.rst
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/x86.c
include/uapi/linux/kvm.h

index 9ece9a8..7631722 100644 (file)
@@ -6380,3 +6380,14 @@ ranges that KVM should reject access to.
 In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
 trap and emulate MSRs that are outside of the scope of KVM as well as
 limit the attack surface on KVM's MSR emulation code.
+
+
+8.26 KVM_CAP_ENFORCE_PV_CPUID
+-----------------------------
+
+Architectures: x86
+
+When enabled, KVM will disable paravirtual features provided to the
+guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
+(0x40000001). Otherwise, a guest may use the paravirtual features
+regardless of what has actually been exposed through the CPUID leaf.
index d0f7723..15e5134 100644 (file)
@@ -789,6 +789,21 @@ struct kvm_vcpu_arch {
 
        /* AMD MSRC001_0015 Hardware Configuration */
        u64 msr_hwcr;
+
+       /* pv related cpuid info */
+       struct {
+               /*
+                * value of the eax register in the KVM_CPUID_FEATURES CPUID
+                * leaf.
+                */
+               u32 features;
+
+               /*
+                * indicates whether pv emulation should be disabled if features
+                * are not present in the guest's cpuid
+                */
+               bool enforce;
+       } pv_cpuid;
 };
 
 struct kvm_lpage_info {
index 37c3668..d253c02 100644 (file)
@@ -107,6 +107,13 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
                (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
                best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
 
+       /*
+        * save the feature bitmap to avoid cpuid lookup for every PV
+        * operation
+        */
+       if (best)
+               vcpu->arch.pv_cpuid.features = best->eax;
+
        if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
                best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
                if (best)
index 1d2c4f2..bf85779 100644 (file)
@@ -5,6 +5,7 @@
 #include "x86.h"
 #include <asm/cpu.h>
 #include <asm/processor.h>
+#include <uapi/asm/kvm_para.h>
 
 extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
 void kvm_set_cpu_caps(void);
@@ -313,4 +314,13 @@ static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
        return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
 }
 
+static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
+                                        unsigned int kvm_feature)
+{
+       if (!vcpu->arch.pv_cpuid.enforce)
+               return true;
+
+       return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
+}
+
 #endif
index b928e09..ca940de 100644 (file)
@@ -2877,6 +2877,14 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
        if (data & 0x30)
                return 1;
 
+       if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) &&
+           (data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT))
+               return 1;
+
+       if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) &&
+           (data & KVM_ASYNC_PF_DELIVERY_AS_INT))
+               return 1;
+
        if (!lapic_in_kernel(vcpu))
                return data ? 1 : 0;
 
@@ -2954,10 +2962,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
         * Doing a TLB flush here, on the guest's behalf, can avoid
         * expensive IPIs.
         */
-       trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
-               st->preempted & KVM_VCPU_FLUSH_TLB);
-       if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
-               kvm_vcpu_flush_tlb_guest(vcpu);
+       if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
+               trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
+                                      st->preempted & KVM_VCPU_FLUSH_TLB);
+               if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
+                       kvm_vcpu_flush_tlb_guest(vcpu);
+       }
 
        vcpu->arch.st.preempted = 0;
 
@@ -3118,30 +3128,54 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                vcpu->arch.smi_count = data;
                break;
        case MSR_KVM_WALL_CLOCK_NEW:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
+                       return 1;
+
+               kvm_write_wall_clock(vcpu->kvm, data);
+               break;
        case MSR_KVM_WALL_CLOCK:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
+                       return 1;
+
                kvm_write_wall_clock(vcpu->kvm, data);
                break;
        case MSR_KVM_SYSTEM_TIME_NEW:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
+                       return 1;
+
                kvm_write_system_time(vcpu, data, false, msr_info->host_initiated);
                break;
        case MSR_KVM_SYSTEM_TIME:
-               kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
+               if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
+                       return 1;
+
+               kvm_write_system_time(vcpu, data, true,  msr_info->host_initiated);
                break;
        case MSR_KVM_ASYNC_PF_EN:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+                       return 1;
+
                if (kvm_pv_enable_async_pf(vcpu, data))
                        return 1;
                break;
        case MSR_KVM_ASYNC_PF_INT:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
+                       return 1;
+
                if (kvm_pv_enable_async_pf_int(vcpu, data))
                        return 1;
                break;
        case MSR_KVM_ASYNC_PF_ACK:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+                       return 1;
                if (data & 0x1) {
                        vcpu->arch.apf.pageready_pending = false;
                        kvm_check_async_pf_completion(vcpu);
                }
                break;
        case MSR_KVM_STEAL_TIME:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
+                       return 1;
 
                if (unlikely(!sched_info_on()))
                        return 1;
@@ -3158,11 +3192,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
                break;
        case MSR_KVM_PV_EOI_EN:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
+                       return 1;
+
                if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
                        return 1;
                break;
 
        case MSR_KVM_POLL_CONTROL:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
+                       return 1;
+
                /* only enable bit supported */
                if (data & (-1ULL << 1))
                        return 1;
@@ -3658,6 +3698,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_LAST_CPU:
        case KVM_CAP_X86_USER_SPACE_MSR:
        case KVM_CAP_X86_MSR_FILTER:
+       case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
                r = 1;
                break;
        case KVM_CAP_SYNC_REGS:
@@ -4528,6 +4569,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
                return kvm_x86_ops.enable_direct_tlbflush(vcpu);
 
+       case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
+               vcpu->arch.pv_cpuid.enforce = cap->args[0];
+
+               return 0;
+
        default:
                return -EINVAL;
        }
@@ -8000,11 +8046,16 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
                goto out;
        }
 
+       ret = -KVM_ENOSYS;
+
        switch (nr) {
        case KVM_HC_VAPIC_POLL_IRQ:
                ret = 0;
                break;
        case KVM_HC_KICK_CPU:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
+                       break;
+
                kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
                kvm_sched_yield(vcpu->kvm, a1);
                ret = 0;
@@ -8015,9 +8066,15 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
                break;
 #endif
        case KVM_HC_SEND_IPI:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
+                       break;
+
                ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
                break;
        case KVM_HC_SCHED_YIELD:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
+                       break;
+
                kvm_sched_yield(vcpu->kvm, a0);
                ret = 0;
                break;
index 58f43aa..ca41220 100644 (file)
@@ -1052,6 +1052,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_STEAL_TIME 187
 #define KVM_CAP_X86_USER_SPACE_MSR 188
 #define KVM_CAP_X86_MSR_FILTER 189
+#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
 
 #ifdef KVM_CAP_IRQ_ROUTING