KVM: nVMX: Allow L1 to use 5-level page walks for nested EPT
authorSean Christopherson <sean.j.christopherson@intel.com>
Tue, 3 Mar 2020 02:02:36 +0000 (18:02 -0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Mon, 16 Mar 2020 16:57:44 +0000 (17:57 +0100)
Add support for 5-level nested EPT, and advertise said support in the
EPT capabilities MSR.  KVM's MMU can already handle 5-level legacy page
tables, there's no reason to force an L1 VMM to use shadow paging if it
wants to employ 5-level page tables.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/vmx.h
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/paging_tmpl.h
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmx.c

index 8521af3..5e090d1 100644 (file)
@@ -500,6 +500,18 @@ enum vmcs_field {
                                                 VMX_EPT_EXECUTABLE_MASK)
 #define VMX_EPT_MT_MASK                                (7ull << VMX_EPT_MT_EPTE_SHIFT)
 
+static inline u8 vmx_eptp_page_walk_level(u64 eptp)
+{
+       u64 encoded_level = eptp & VMX_EPTP_PWL_MASK;
+
+       if (encoded_level == VMX_EPTP_PWL_5)
+               return 5;
+
+       /* @eptp must be pre-validated by the caller. */
+       WARN_ON_ONCE(encoded_level != VMX_EPTP_PWL_4);
+       return 4;
+}
+
 /* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */
 #define VMX_EPT_MISCONFIG_WX_VALUE             (VMX_EPT_WRITABLE_MASK |       \
                                                 VMX_EPT_EXECUTABLE_MASK)
index 374ccbc..a214e10 100644 (file)
@@ -5008,14 +5008,14 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
 static union kvm_mmu_role
 kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
-                                  bool execonly)
+                                  bool execonly, u8 level)
 {
        union kvm_mmu_role role = {0};
 
        /* SMM flag is inherited from root_mmu */
        role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm;
 
-       role.base.level = PT64_ROOT_4LEVEL;
+       role.base.level = level;
        role.base.gpte_is_8_bytes = true;
        role.base.direct = false;
        role.base.ad_disabled = !accessed_dirty;
@@ -5039,16 +5039,17 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
                             bool accessed_dirty, gpa_t new_eptp)
 {
        struct kvm_mmu *context = vcpu->arch.mmu;
+       u8 level = vmx_eptp_page_walk_level(new_eptp);
        union kvm_mmu_role new_role =
                kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
-                                                  execonly);
+                                                  execonly, level);
 
        __kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false);
 
        if (new_role.as_u64 == context->mmu_role.as_u64)
                return;
 
-       context->shadow_root_level = PT64_ROOT_4LEVEL;
+       context->shadow_root_level = level;
 
        context->nx = true;
        context->ept_ad = accessed_dirty;
@@ -5057,7 +5058,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
        context->sync_page = ept_sync_page;
        context->invlpg = ept_invlpg;
        context->update_pte = ept_update_pte;
-       context->root_level = PT64_ROOT_4LEVEL;
+       context->root_level = level;
        context->direct_map = false;
        context->mmu_role.as_u64 = new_role.as_u64;
 
index e4c8a4c..6b15b58 100644 (file)
@@ -66,7 +66,7 @@
        #define PT_GUEST_ACCESSED_SHIFT 8
        #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
        #define CMPXCHG cmpxchg64
-       #define PT_MAX_FULL_LEVELS 4
+       #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
 #else
        #error Invalid PTTYPE value
 #endif
index 300d87f..5f19b90 100644 (file)
@@ -2582,9 +2582,19 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
                return false;
        }
 
-       /* only 4 levels page-walk length are valid */
-       if (CC((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4))
+       /* Page-walk levels validity. */
+       switch (address & VMX_EPTP_PWL_MASK) {
+       case VMX_EPTP_PWL_5:
+               if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
+                       return false;
+               break;
+       case VMX_EPTP_PWL_4:
+               if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
+                       return false;
+               break;
+       default:
                return false;
+       }
 
        /* Reserved bits should not be set */
        if (CC(address >> maxphyaddr || ((address >> 7) & 0x1f)))
@@ -6119,8 +6129,11 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
                /* nested EPT: emulate EPT also to L1 */
                msrs->secondary_ctls_high |=
                        SECONDARY_EXEC_ENABLE_EPT;
-               msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
-                        VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
+               msrs->ept_caps =
+                       VMX_EPT_PAGE_WALK_4_BIT |
+                       VMX_EPT_PAGE_WALK_5_BIT |
+                       VMX_EPTP_WB_BIT |
+                       VMX_EPT_INVEPT_BIT;
                if (cpu_has_vmx_ept_execute_only())
                        msrs->ept_caps |=
                                VMX_EPT_EXECUTE_ONLY_BIT;
index de4bf79..c369ab3 100644 (file)
@@ -2985,9 +2985,8 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
 static int get_ept_level(struct kvm_vcpu *vcpu)
 {
-       /* Nested EPT currently only supports 4-level walks. */
        if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
-               return 4;
+               return vmx_eptp_page_walk_level(nested_ept_get_cr3(vcpu));
        if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
                return 5;
        return 4;