KVM: x86: nVMX: close leak of L0's x2APIC MSRs (CVE-2019-3887)
authorMarc Orr <marcorr@google.com>
Tue, 2 Apr 2019 06:55:59 +0000 (23:55 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 5 Apr 2019 19:08:22 +0000 (21:08 +0200)
The nested_vmx_prepare_msr_bitmap() function doesn't directly guard the
x2APIC MSR intercepts with the "virtualize x2APIC mode" MSR. As a
result, we discovered the potential for a buggy or malicious L1 to get
access to L0's x2APIC MSRs, via an L2, as follows.

1. L1 executes WRMSR(IA32_SPEC_CTRL, 1). This causes the spec_ctrl
variable, in nested_vmx_prepare_msr_bitmap() to become true.
2. L1 disables "virtualize x2APIC mode" in VMCS12.
3. L1 enables "APIC-register virtualization" in VMCS12.

Now, KVM will set VMCS02's x2APIC MSR intercepts from VMCS12, and then
set "virtualize x2APIC mode" to 0 in VMCS02. Oops.

This patch closes the leak by explicitly guarding VMCS02's x2APIC MSR
intercepts with VMCS12's "virtualize x2APIC mode" control.

The scenario outlined above and fix prescribed here, were verified with
a related patch in kvm-unit-tests titled "Add leak scenario to
virt_x2apic_mode_test".

Note, it looks like this issue may have been introduced inadvertently
during a merge---see 15303ba5d1cd.

Signed-off-by: Marc Orr <marcorr@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/vmx/nested.c

index 153e539..897d70e 100644 (file)
@@ -500,6 +500,17 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
        }
 }
 
+static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
+       int msr;
+
+       for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+               unsigned word = msr / BITS_PER_LONG;
+
+               msr_bitmap[word] = ~0;
+               msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+       }
+}
+
 /*
  * Merge L0's and L1's MSR bitmap, return false to indicate that
  * we do not use the hardware.
@@ -541,39 +552,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
                return false;
 
        msr_bitmap_l1 = (unsigned long *)kmap(page);
-       if (nested_cpu_has_apic_reg_virt(vmcs12)) {
-               /*
-                * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
-                * just lets the processor take the value from the virtual-APIC page;
-                * take those 256 bits directly from the L1 bitmap.
-                */
-               for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
-                       unsigned word = msr / BITS_PER_LONG;
-                       msr_bitmap_l0[word] = msr_bitmap_l1[word];
-                       msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
-               }
-       } else {
-               for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
-                       unsigned word = msr / BITS_PER_LONG;
-                       msr_bitmap_l0[word] = ~0;
-                       msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
-               }
-       }
 
-       nested_vmx_disable_intercept_for_msr(
-               msr_bitmap_l1, msr_bitmap_l0,
-               X2APIC_MSR(APIC_TASKPRI),
-               MSR_TYPE_W);
+       /*
+        * To keep the control flow simple, pay eight 8-byte writes (sixteen
+        * 4-byte writes on 32-bit systems) up front to enable intercepts for
+        * the x2APIC MSR range and selectively disable them below.
+        */
+       enable_x2apic_msr_intercepts(msr_bitmap_l0);
+
+       if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
+               if (nested_cpu_has_apic_reg_virt(vmcs12)) {
+                       /*
+                        * L0 need not intercept reads for MSRs between 0x800
+                        * and 0x8ff, it just lets the processor take the value
+                        * from the virtual-APIC page; take those 256 bits
+                        * directly from the L1 bitmap.
+                        */
+                       for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+                               unsigned word = msr / BITS_PER_LONG;
+
+                               msr_bitmap_l0[word] = msr_bitmap_l1[word];
+                       }
+               }
 
-       if (nested_cpu_has_vid(vmcs12)) {
-               nested_vmx_disable_intercept_for_msr(
-                       msr_bitmap_l1, msr_bitmap_l0,
-                       X2APIC_MSR(APIC_EOI),
-                       MSR_TYPE_W);
                nested_vmx_disable_intercept_for_msr(
                        msr_bitmap_l1, msr_bitmap_l0,
-                       X2APIC_MSR(APIC_SELF_IPI),
+                       X2APIC_MSR(APIC_TASKPRI),
                        MSR_TYPE_W);
+
+               if (nested_cpu_has_vid(vmcs12)) {
+                       nested_vmx_disable_intercept_for_msr(
+                               msr_bitmap_l1, msr_bitmap_l0,
+                               X2APIC_MSR(APIC_EOI),
+                               MSR_TYPE_W);
+                       nested_vmx_disable_intercept_for_msr(
+                               msr_bitmap_l1, msr_bitmap_l0,
+                               X2APIC_MSR(APIC_SELF_IPI),
+                               MSR_TYPE_W);
+               }
        }
 
        if (spec_ctrl)