KVM: x86: only allocate gfn_track when necessary
authorDavid Stevens <stevensd@chromium.org>
Wed, 22 Sep 2021 04:58:59 +0000 (13:58 +0900)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 1 Oct 2021 07:44:58 +0000 (03:44 -0400)
Avoid allocating the gfn_track arrays if nothing needs them. If there
are no external to KVM users of the API (i.e. no GVT-g), then page
tracking is only needed for shadow page tables. This means that when tdp
is enabled and there are no external users, then the gfn_track arrays
can be lazily allocated when the shadow MMU is actually used. This avoid
allocations equal to .05% of guest memory when nested virtualization is
not used, if the kernel is compiled without GVT-g.

Signed-off-by: David Stevens <stevensd@chromium.org>
Message-Id: <20210922045859.2011227-3-stevensd@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_page_track.h
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/page_track.c
arch/x86/kvm/x86.c

index 1b28029..5271fce 100644 (file)
@@ -1211,6 +1211,14 @@ struct kvm_arch {
         */
        bool memslots_have_rmaps;
 
+       /*
+        * Set when the KVM mmu needs guest write access page tracking. If
+        * set, the necessary gfn_track arrays have been allocated for
+        * all memslots and should be allocated for any newly created or
+        * modified memslots.
+        */
+       bool memslots_mmu_write_tracking;
+
 #if IS_ENABLED(CONFIG_HYPERV)
        hpa_t   hv_root_tdp;
        spinlock_t hv_root_tdp_lock;
index 5c12f97..79d84a9 100644 (file)
@@ -49,8 +49,11 @@ struct kvm_page_track_notifier_node {
 int kvm_page_track_init(struct kvm *kvm);
 void kvm_page_track_cleanup(struct kvm *kvm);
 
+int kvm_page_track_enable_mmu_write_tracking(struct kvm *kvm);
+
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
-int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
+int kvm_page_track_create_memslot(struct kvm *kvm,
+                                 struct kvm_memory_slot *slot,
                                  unsigned long npages);
 
 void kvm_slot_page_track_add_page(struct kvm *kvm,
@@ -59,7 +62,8 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
 void kvm_slot_page_track_remove_page(struct kvm *kvm,
                                     struct kvm_memory_slot *slot, gfn_t gfn,
                                     enum kvm_page_track_mode mode);
-bool kvm_slot_page_track_is_active(struct kvm_memory_slot *slot, gfn_t gfn,
+bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu,
+                                  struct kvm_memory_slot *slot, gfn_t gfn,
                                   enum kvm_page_track_mode mode);
 
 void
index 9129200..24a9f4c 100644 (file)
@@ -2583,7 +2583,7 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
         * track machinery is used to write-protect upper-level shadow pages,
         * i.e. this guards the role.level == 4K assertion below!
         */
-       if (kvm_slot_page_track_is_active(slot, gfn, KVM_PAGE_TRACK_WRITE))
+       if (kvm_slot_page_track_is_active(vcpu, slot, gfn, KVM_PAGE_TRACK_WRITE))
                return -EPERM;
 
        /*
@@ -3431,6 +3431,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
        if (r)
                return r;
 
+       r = kvm_page_track_enable_mmu_write_tracking(vcpu->kvm);
+       if (r)
+               return r;
+
        write_lock(&vcpu->kvm->mmu_lock);
        r = make_mmu_pages_available(vcpu);
        if (r < 0)
@@ -3790,7 +3794,7 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
         * guest is writing the page which is write tracked which can
         * not be fixed by page fault handler.
         */
-       if (kvm_slot_page_track_is_active(fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
+       if (kvm_slot_page_track_is_active(vcpu, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
                return true;
 
        return false;
@@ -5607,6 +5611,9 @@ void kvm_mmu_init_vm(struct kvm *kvm)
                 */
                kvm->arch.memslots_have_rmaps = true;
 
+       if (!tdp_enabled)
+               kvm->arch.memslots_mmu_write_tracking = true;
+
        node->track_write = kvm_mmu_pte_write;
        node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
        kvm_page_track_register_notifier(kvm, node);
index 16e7176..bb5d60b 100644 (file)
 #include "mmu.h"
 #include "mmu_internal.h"
 
+static bool write_tracking_enabled(struct kvm *kvm)
+{
+       /*
+        * Read memslots_mmu_write_tracking before gfn_track pointers. Pairs
+        * with smp_store_release in kvm_page_track_enable_mmu_write_tracking.
+        */
+       return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) ||
+              smp_load_acquire(&kvm->arch.memslots_mmu_write_tracking);
+}
+
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
 {
        int i;
@@ -29,12 +39,16 @@ void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
        }
 }
 
-int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
+int kvm_page_track_create_memslot(struct kvm *kvm,
+                                 struct kvm_memory_slot *slot,
                                  unsigned long npages)
 {
-       int  i;
+       int i;
 
        for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
+               if (i == KVM_PAGE_TRACK_WRITE && !write_tracking_enabled(kvm))
+                       continue;
+
                slot->arch.gfn_track[i] =
                        kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
                                 GFP_KERNEL_ACCOUNT);
@@ -57,6 +71,46 @@ static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
        return true;
 }
 
+int kvm_page_track_enable_mmu_write_tracking(struct kvm *kvm)
+{
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *slot;
+       unsigned short **gfn_track;
+       int i;
+
+       if (write_tracking_enabled(kvm))
+               return 0;
+
+       mutex_lock(&kvm->slots_arch_lock);
+
+       if (write_tracking_enabled(kvm)) {
+               mutex_unlock(&kvm->slots_arch_lock);
+               return 0;
+       }
+
+       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+               slots = __kvm_memslots(kvm, i);
+               kvm_for_each_memslot(slot, slots) {
+                       gfn_track = slot->arch.gfn_track + KVM_PAGE_TRACK_WRITE;
+                       *gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track),
+                                             GFP_KERNEL_ACCOUNT);
+                       if (*gfn_track == NULL) {
+                               mutex_unlock(&kvm->slots_arch_lock);
+                               return -ENOMEM;
+                       }
+               }
+       }
+
+       /*
+        * Ensure that memslots_mmu_write_tracking becomes true strictly
+        * after all the pointers are set.
+        */
+       smp_store_release(&kvm->arch.memslots_mmu_write_tracking, true);
+       mutex_unlock(&kvm->slots_arch_lock);
+
+       return 0;
+}
+
 static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
                             enum kvm_page_track_mode mode, short count)
 {
@@ -92,6 +146,10 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
        if (WARN_ON(!page_track_mode_is_valid(mode)))
                return;
 
+       if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
+                   !write_tracking_enabled(kvm)))
+               return;
+
        update_gfn_track(slot, gfn, mode, 1);
 
        /*
@@ -126,6 +184,10 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm,
        if (WARN_ON(!page_track_mode_is_valid(mode)))
                return;
 
+       if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
+                   !write_tracking_enabled(kvm)))
+               return;
+
        update_gfn_track(slot, gfn, mode, -1);
 
        /*
@@ -139,7 +201,8 @@ EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
 /*
  * check if the corresponding access on the specified guest page is tracked.
  */
-bool kvm_slot_page_track_is_active(struct kvm_memory_slot *slot, gfn_t gfn,
+bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu,
+                                  struct kvm_memory_slot *slot, gfn_t gfn,
                                   enum kvm_page_track_mode mode)
 {
        int index;
@@ -150,6 +213,9 @@ bool kvm_slot_page_track_is_active(struct kvm_memory_slot *slot, gfn_t gfn,
        if (!slot)
                return false;
 
+       if (mode == KVM_PAGE_TRACK_WRITE && !write_tracking_enabled(vcpu->kvm))
+               return false;
+
        index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
        return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
 }
index 03091a2..db7fa13 100644 (file)
@@ -11476,7 +11476,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
                }
        }
 
-       if (kvm_page_track_create_memslot(slot, npages))
+       if (kvm_page_track_create_memslot(kvm, slot, npages))
                goto out_free;
 
        return 0;