KVM: nSVM: implement on demand allocation of the nested state
authorMaxim Levitsky <mlevitsk@redhat.com>
Thu, 1 Oct 2020 11:29:54 +0000 (14:29 +0300)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 21 Oct 2020 21:48:48 +0000 (17:48 -0400)
This way we don't waste memory on VMs which don't use nesting
virtualization even when the host enabled it for them.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20201001112954.6258-5-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h

index ba50ff6..9e4c226 100644 (file)
@@ -481,6 +481,9 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
 
        vmcb12 = map.hva;
 
+       if (WARN_ON_ONCE(!svm->nested.initialized))
+               return -EINVAL;
+
        if (!nested_vmcb_checks(svm, vmcb12)) {
                vmcb12->control.exit_code    = SVM_EXIT_ERR;
                vmcb12->control.exit_code_hi = 0;
@@ -698,6 +701,45 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
        return 0;
 }
 
+int svm_allocate_nested(struct vcpu_svm *svm)
+{
+       struct page *hsave_page;
+
+       if (svm->nested.initialized)
+               return 0;
+
+       hsave_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+       if (!hsave_page)
+               return -ENOMEM;
+       svm->nested.hsave = page_address(hsave_page);
+
+       svm->nested.msrpm = svm_vcpu_alloc_msrpm();
+       if (!svm->nested.msrpm)
+               goto err_free_hsave;
+       svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm);
+
+       svm->nested.initialized = true;
+       return 0;
+
+err_free_hsave:
+       __free_page(hsave_page);
+       return -ENOMEM;
+}
+
+void svm_free_nested(struct vcpu_svm *svm)
+{
+       if (!svm->nested.initialized)
+               return;
+
+       svm_vcpu_free_msrpm(svm->nested.msrpm);
+       svm->nested.msrpm = NULL;
+
+       __free_page(virt_to_page(svm->nested.hsave));
+       svm->nested.hsave = NULL;
+
+       svm->nested.initialized = false;
+}
+
 /*
  * Forcibly leave nested mode in order to be able to reset the VCPU later on.
  */
index 57e0f27..dc4fe57 100644 (file)
@@ -266,6 +266,7 @@ static int get_max_npt_level(void)
 int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
+       u64 old_efer = vcpu->arch.efer;
        vcpu->arch.efer = efer;
 
        if (!npt_enabled) {
@@ -276,9 +277,27 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
                        efer &= ~EFER_LME;
        }
 
-       if (!(efer & EFER_SVME)) {
-               svm_leave_nested(svm);
-               svm_set_gif(svm, true);
+       if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
+               if (!(efer & EFER_SVME)) {
+                       svm_leave_nested(svm);
+                       svm_set_gif(svm, true);
+
+                       /*
+                        * Free the nested guest state, unless we are in SMM.
+                        * In this case we will return to the nested guest
+                        * as soon as we leave SMM.
+                        */
+                       if (!is_smm(&svm->vcpu))
+                               svm_free_nested(svm);
+
+               } else {
+                       int ret = svm_allocate_nested(svm);
+
+                       if (ret) {
+                               vcpu->arch.efer = old_efer;
+                               return ret;
+                       }
+               }
        }
 
        svm->vmcb->save.efer = efer | EFER_SVME;
@@ -650,7 +669,7 @@ static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
        set_msr_interception_bitmap(vcpu, msrpm, msr, read, write);
 }
 
-static u32 *svm_vcpu_alloc_msrpm(void)
+u32 *svm_vcpu_alloc_msrpm(void)
 {
        struct page *pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
        u32 *msrpm;
@@ -664,7 +683,7 @@ static u32 *svm_vcpu_alloc_msrpm(void)
        return msrpm;
 }
 
-static void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
+void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
 {
        int i;
 
@@ -675,7 +694,8 @@ static void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
        }
 }
 
-static void svm_vcpu_free_msrpm(u32 *msrpm)
+
+void svm_vcpu_free_msrpm(u32 *msrpm)
 {
        __free_pages(virt_to_page(msrpm), MSRPM_ALLOC_ORDER);
 }
@@ -1268,7 +1288,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm;
        struct page *vmcb_page;
-       struct page *hsave_page;
        int err;
 
        BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
@@ -1279,13 +1298,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        if (!vmcb_page)
                goto out;
 
-       hsave_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
-       if (!hsave_page)
-               goto error_free_vmcb_page;
-
        err = avic_init_vcpu(svm);
        if (err)
-               goto error_free_hsave_page;
+               goto error_free_vmcb_page;
 
        /* We initialize this flag to true to make sure that the is_running
         * bit would be set the first time the vcpu is loaded.
@@ -1293,21 +1308,12 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm))
                svm->avic_is_running = true;
 
-       svm->nested.hsave = page_address(hsave_page);
-
        svm->msrpm = svm_vcpu_alloc_msrpm();
        if (!svm->msrpm)
-               goto error_free_hsave_page;
+               goto error_free_vmcb_page;
 
        svm_vcpu_init_msrpm(vcpu, svm->msrpm);
 
-       svm->nested.msrpm = svm_vcpu_alloc_msrpm();
-       if (!svm->nested.msrpm)
-               goto error_free_msrpm;
-
-       /* We only need the L1 pass-through MSR state, so leave vcpu as NULL */
-       svm_vcpu_init_msrpm(vcpu, svm->nested.msrpm);
-
        svm->vmcb = page_address(vmcb_page);
        svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT);
        svm->asid_generation = 0;
@@ -1318,10 +1324,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 
        return 0;
 
-error_free_msrpm:
-       svm_vcpu_free_msrpm(svm->msrpm);
-error_free_hsave_page:
-       __free_page(hsave_page);
 error_free_vmcb_page:
        __free_page(vmcb_page);
 out:
@@ -1347,10 +1349,10 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
         */
        svm_clear_current_vmcb(svm->vmcb);
 
+       svm_free_nested(svm);
+
        __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
        __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
-       __free_page(virt_to_page(svm->nested.hsave));
-       __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
 }
 
 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -4038,6 +4040,9 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
                                         gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
                                return 1;
 
+                       if (svm_allocate_nested(svm))
+                               return 1;
+
                        ret = enter_svm_guest_mode(svm, vmcb12_gpa, map.hva);
                        kvm_vcpu_unmap(&svm->vcpu, &map, true);
                }
index e7af21e..1d853fe 100644 (file)
@@ -97,6 +97,8 @@ struct svm_nested_state {
 
        /* cache for control fields of the guest */
        struct vmcb_control_area ctl;
+
+       bool initialized;
 };
 
 struct vcpu_svm {
@@ -350,6 +352,10 @@ static inline bool gif_set(struct vcpu_svm *svm)
 #define MSR_INVALID                            0xffffffffU
 
 u32 svm_msrpm_offset(u32 msr);
+u32 *svm_vcpu_alloc_msrpm(void);
+void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
+void svm_vcpu_free_msrpm(u32 *msrpm);
+
 int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
@@ -391,6 +397,8 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
 int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
                         struct vmcb *nested_vmcb);
 void svm_leave_nested(struct vcpu_svm *svm);
+void svm_free_nested(struct vcpu_svm *svm);
+int svm_allocate_nested(struct vcpu_svm *svm);
 int nested_svm_vmrun(struct vcpu_svm *svm);
 void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
 int nested_svm_vmexit(struct vcpu_svm *svm);