KVM: x86/mmu: do not pass vcpu to root freeing functions
[linux-2.6-microblaze.git] / arch / x86 / kvm / mmu / mmu.c
index 593093b..07cc9b1 100644 (file)
@@ -104,15 +104,6 @@ static int max_huge_page_level __read_mostly;
 static int tdp_root_level __read_mostly;
 static int max_tdp_level __read_mostly;
 
-enum {
-       AUDIT_PRE_PAGE_FAULT,
-       AUDIT_POST_PAGE_FAULT,
-       AUDIT_PRE_PTE_WRITE,
-       AUDIT_POST_PTE_WRITE,
-       AUDIT_PRE_SYNC,
-       AUDIT_POST_SYNC
-};
-
 #ifdef MMU_DEBUG
 bool dbg = 0;
 module_param(dbg, bool, 0644);
@@ -529,6 +520,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
        u64 old_spte = *sptep;
 
        WARN_ON(!is_shadow_present_pte(new_spte));
+       check_spte_writable_invariants(new_spte);
 
        if (!is_shadow_present_pte(old_spte)) {
                mmu_spte_set(sptep, new_spte);
@@ -548,11 +540,9 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
 /* Rules for using mmu_spte_update:
  * Update the state bits, it means the mapped pfn is not changed.
  *
- * Whenever we overwrite a writable spte with a read-only one we
- * should flush remote TLBs. Otherwise rmap_write_protect
- * will find a read-only spte, even though the writable spte
- * might be cached on a CPU's TLB, the return value indicates this
- * case.
+ * Whenever an MMU-writable SPTE is overwritten with a read-only SPTE, remote
+ * TLBs must be flushed. Otherwise rmap_write_protect will find a read-only
+ * spte, even though the writable spte might be cached on a CPU's TLB.
  *
  * Returns true if the TLB needs to be flushed
  */
@@ -646,24 +636,6 @@ static u64 mmu_spte_get_lockless(u64 *sptep)
        return __get_spte_lockless(sptep);
 }
 
-/* Restore an acc-track PTE back to a regular PTE */
-static u64 restore_acc_track_spte(u64 spte)
-{
-       u64 new_spte = spte;
-       u64 saved_bits = (spte >> SHADOW_ACC_TRACK_SAVED_BITS_SHIFT)
-                        & SHADOW_ACC_TRACK_SAVED_BITS_MASK;
-
-       WARN_ON_ONCE(spte_ad_enabled(spte));
-       WARN_ON_ONCE(!is_access_track_spte(spte));
-
-       new_spte &= ~shadow_acc_track_mask;
-       new_spte &= ~(SHADOW_ACC_TRACK_SAVED_BITS_MASK <<
-                     SHADOW_ACC_TRACK_SAVED_BITS_SHIFT);
-       new_spte |= saved_bits;
-
-       return new_spte;
-}
-
 /* Returns the Accessed status of the PTE and resets it at the same time. */
 static bool mmu_spte_age(u64 *sptep)
 {
@@ -1229,9 +1201,8 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
        return mmu_spte_update(sptep, spte);
 }
 
-static bool __rmap_write_protect(struct kvm *kvm,
-                                struct kvm_rmap_head *rmap_head,
-                                bool pt_protect)
+static bool rmap_write_protect(struct kvm_rmap_head *rmap_head,
+                              bool pt_protect)
 {
        u64 *sptep;
        struct rmap_iterator iter;
@@ -1311,7 +1282,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
        while (mask) {
                rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
                                        PG_LEVEL_4K, slot);
-               __rmap_write_protect(kvm, rmap_head, false);
+               rmap_write_protect(rmap_head, false);
 
                /* clear the first set bit */
                mask &= mask - 1;
@@ -1378,6 +1349,9 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
                gfn_t start = slot->base_gfn + gfn_offset + __ffs(mask);
                gfn_t end = slot->base_gfn + gfn_offset + __fls(mask);
 
+               if (READ_ONCE(eager_page_split))
+                       kvm_mmu_try_split_huge_pages(kvm, slot, start, end, PG_LEVEL_4K);
+
                kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M);
 
                /* Cross two large pages? */
@@ -1410,7 +1384,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
        if (kvm_memslots_have_rmaps(kvm)) {
                for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
                        rmap_head = gfn_to_rmap(gfn, i, slot);
-                       write_protected |= __rmap_write_protect(kvm, rmap_head, true);
+                       write_protected |= rmap_write_protect(rmap_head, true);
                }
        }
 
@@ -1421,7 +1395,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
        return write_protected;
 }
 
-static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
+static bool kvm_vcpu_write_protect_gfn(struct kvm_vcpu *vcpu, u64 gfn)
 {
        struct kvm_memory_slot *slot;
 
@@ -1921,13 +1895,6 @@ static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
        return true;
 }
 
-#ifdef CONFIG_KVM_MMU_AUDIT
-#include "mmu_audit.c"
-#else
-static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
-static void mmu_audit_disable(void) { }
-#endif
-
 static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
        if (sp->role.invalid)
@@ -2024,7 +1991,7 @@ static int mmu_sync_children(struct kvm_vcpu *vcpu,
                bool protected = false;
 
                for_each_sp(pages, sp, parents, i)
-                       protected |= rmap_write_protect(vcpu, sp->gfn);
+                       protected |= kvm_vcpu_write_protect_gfn(vcpu, sp->gfn);
 
                if (protected) {
                        kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, true);
@@ -2149,7 +2116,7 @@ trace_get_page:
        hlist_add_head(&sp->hash_link, sp_list);
        if (!direct) {
                account_shadowed(vcpu->kvm, sp);
-               if (level == PG_LEVEL_4K && rmap_write_protect(vcpu, gfn))
+               if (level == PG_LEVEL_4K && kvm_vcpu_write_protect_gfn(vcpu, gfn))
                        kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, 1);
        }
        trace_kvm_mmu_get_page(sp, true);
@@ -2179,7 +2146,7 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
                 * prev_root is currently only used for 64-bit hosts. So only
                 * the active root_hpa is valid here.
                 */
-               BUG_ON(root != vcpu->arch.mmu->root_hpa);
+               BUG_ON(root != vcpu->arch.mmu->root.hpa);
 
                iterator->shadow_addr
                        = vcpu->arch.mmu->pae_root[(addr >> 30) & 3];
@@ -2193,7 +2160,7 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
 static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
                             struct kvm_vcpu *vcpu, u64 addr)
 {
-       shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root_hpa,
+       shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root.hpa,
                                    addr);
 }
 
@@ -2307,7 +2274,7 @@ static int kvm_mmu_page_unlink_children(struct kvm *kvm,
        return zapped;
 }
 
-static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
+static void kvm_mmu_unlink_parents(struct kvm_mmu_page *sp)
 {
        u64 *sptep;
        struct rmap_iterator iter;
@@ -2351,7 +2318,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
        ++kvm->stat.mmu_shadow_zapped;
        *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
        *nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list);
-       kvm_mmu_unlink_parents(kvm, sp);
+       kvm_mmu_unlink_parents(sp);
 
        /* Zapping children means active_mmu_pages has become unstable. */
        list_unstable = *nr_zapped;
@@ -3239,6 +3206,8 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
                return;
 
        sp = to_shadow_page(*root_hpa & PT64_BASE_ADDR_MASK);
+       if (WARN_ON(!sp))
+               return;
 
        if (is_tdp_mmu_page(sp))
                kvm_tdp_mmu_put_root(kvm, sp, false);
@@ -3249,18 +3218,20 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
 }
 
 /* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
                        ulong roots_to_free)
 {
-       struct kvm *kvm = vcpu->kvm;
        int i;
        LIST_HEAD(invalid_list);
-       bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
+       bool free_active_root;
 
        BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
 
        /* Before acquiring the MMU lock, see if we need to do any real work. */
-       if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) {
+       free_active_root = (roots_to_free & KVM_MMU_ROOT_CURRENT)
+               && VALID_PAGE(mmu->root.hpa);
+
+       if (!free_active_root) {
                for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
                        if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
                            VALID_PAGE(mmu->prev_roots[i].hpa))
@@ -3278,9 +3249,8 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
                                           &invalid_list);
 
        if (free_active_root) {
-               if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
-                   (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
-                       mmu_free_root_page(kvm, &mmu->root_hpa, &invalid_list);
+               if (to_shadow_page(mmu->root.hpa)) {
+                       mmu_free_root_page(kvm, &mmu->root.hpa, &invalid_list);
                } else if (mmu->pae_root) {
                        for (i = 0; i < 4; ++i) {
                                if (!IS_VALID_PAE_ROOT(mmu->pae_root[i]))
@@ -3291,8 +3261,8 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
                                mmu->pae_root[i] = INVALID_PAE_ROOT;
                        }
                }
-               mmu->root_hpa = INVALID_PAGE;
-               mmu->root_pgd = 0;
+               mmu->root.hpa = INVALID_PAGE;
+               mmu->root.pgd = 0;
        }
 
        kvm_mmu_commit_zap_page(kvm, &invalid_list);
@@ -3300,7 +3270,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
 
-void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
 {
        unsigned long roots_to_free = 0;
        hpa_t root_hpa;
@@ -3322,7 +3292,7 @@ void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
                        roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
        }
 
-       kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
+       kvm_mmu_free_roots(kvm, mmu, roots_to_free);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_free_guest_mode_roots);
 
@@ -3365,10 +3335,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
        if (is_tdp_mmu_enabled(vcpu->kvm)) {
                root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu);
-               mmu->root_hpa = root;
+               mmu->root.hpa = root;
        } else if (shadow_root_level >= PT64_ROOT_4LEVEL) {
                root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level, true);
-               mmu->root_hpa = root;
+               mmu->root.hpa = root;
        } else if (shadow_root_level == PT32E_ROOT_LEVEL) {
                if (WARN_ON_ONCE(!mmu->pae_root)) {
                        r = -EIO;
@@ -3383,15 +3353,15 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                        mmu->pae_root[i] = root | PT_PRESENT_MASK |
                                           shadow_me_mask;
                }
-               mmu->root_hpa = __pa(mmu->pae_root);
+               mmu->root.hpa = __pa(mmu->pae_root);
        } else {
                WARN_ONCE(1, "Bad TDP root level = %d\n", shadow_root_level);
                r = -EIO;
                goto out_unlock;
        }
 
-       /* root_pgd is ignored for direct MMUs. */
-       mmu->root_pgd = 0;
+       /* root.pgd is ignored for direct MMUs. */
+       mmu->root.pgd = 0;
 out_unlock:
        write_unlock(&vcpu->kvm->mmu_lock);
        return r;
@@ -3504,7 +3474,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
        if (mmu->root_level >= PT64_ROOT_4LEVEL) {
                root = mmu_alloc_root(vcpu, root_gfn, 0,
                                      mmu->shadow_root_level, false);
-               mmu->root_hpa = root;
+               mmu->root.hpa = root;
                goto set_root_pgd;
        }
 
@@ -3554,14 +3524,14 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
        }
 
        if (mmu->shadow_root_level == PT64_ROOT_5LEVEL)
-               mmu->root_hpa = __pa(mmu->pml5_root);
+               mmu->root.hpa = __pa(mmu->pml5_root);
        else if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
-               mmu->root_hpa = __pa(mmu->pml4_root);
+               mmu->root.hpa = __pa(mmu->pml4_root);
        else
-               mmu->root_hpa = __pa(mmu->pae_root);
+               mmu->root.hpa = __pa(mmu->pae_root);
 
 set_root_pgd:
-       mmu->root_pgd = root_pgd;
+       mmu->root.pgd = root_pgd;
 out_unlock:
        write_unlock(&vcpu->kvm->mmu_lock);
 
@@ -3674,30 +3644,25 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
        if (vcpu->arch.mmu->direct_map)
                return;
 
-       if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
+       if (!VALID_PAGE(vcpu->arch.mmu->root.hpa))
                return;
 
        vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
 
        if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
-               hpa_t root = vcpu->arch.mmu->root_hpa;
+               hpa_t root = vcpu->arch.mmu->root.hpa;
                sp = to_shadow_page(root);
 
                if (!is_unsync_root(root))
                        return;
 
                write_lock(&vcpu->kvm->mmu_lock);
-               kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
-
                mmu_sync_children(vcpu, sp, true);
-
-               kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
                write_unlock(&vcpu->kvm->mmu_lock);
                return;
        }
 
        write_lock(&vcpu->kvm->mmu_lock);
-       kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
 
        for (i = 0; i < 4; ++i) {
                hpa_t root = vcpu->arch.mmu->pae_root[i];
@@ -3709,7 +3674,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
                }
        }
 
-       kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
        write_unlock(&vcpu->kvm->mmu_lock);
 }
 
@@ -3723,7 +3687,7 @@ void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu)
                        roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
 
        /* sync prev_roots by simply freeing them */
-       kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
+       kvm_mmu_free_roots(vcpu->kvm, vcpu->arch.mmu, roots_to_free);
 }
 
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
@@ -3971,7 +3935,7 @@ out_retry:
 static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
                                struct kvm_page_fault *fault, int mmu_seq)
 {
-       struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root_hpa);
+       struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa);
 
        /* Special roots, e.g. pae_root, are not backed by shadow pages. */
        if (sp && is_obsolete_sp(vcpu->kvm, sp))
@@ -4128,34 +4092,27 @@ static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd,
 /*
  * Find out if a previously cached root matching the new pgd/role is available.
  * The current root is also inserted into the cache.
- * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is
+ * If a matching root was found, it is assigned to kvm_mmu->root.hpa and true is
  * returned.
- * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and
+ * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root.hpa and
  * false is returned. This root should now be freed by the caller.
  */
 static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_pgd,
                                  union kvm_mmu_page_role new_role)
 {
        uint i;
-       struct kvm_mmu_root_info root;
        struct kvm_mmu *mmu = vcpu->arch.mmu;
 
-       root.pgd = mmu->root_pgd;
-       root.hpa = mmu->root_hpa;
-
-       if (is_root_usable(&root, new_pgd, new_role))
+       if (is_root_usable(&mmu->root, new_pgd, new_role))
                return true;
 
        for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
-               swap(root, mmu->prev_roots[i]);
+               swap(mmu->root, mmu->prev_roots[i]);
 
-               if (is_root_usable(&root, new_pgd, new_role))
+               if (is_root_usable(&mmu->root, new_pgd, new_role))
                        break;
        }
 
-       mmu->root_hpa = root.hpa;
-       mmu->root_pgd = root.pgd;
-
        return i < KVM_MMU_NUM_PREV_ROOTS;
 }
 
@@ -4179,8 +4136,10 @@ static bool fast_pgd_switch(struct kvm_vcpu *vcpu, gpa_t new_pgd,
 static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd,
                              union kvm_mmu_page_role new_role)
 {
+       struct kvm_mmu *mmu = vcpu->arch.mmu;
+
        if (!fast_pgd_switch(vcpu, new_pgd, new_role)) {
-               kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, KVM_MMU_ROOT_CURRENT);
+               kvm_mmu_free_roots(vcpu->kvm, mmu, KVM_MMU_ROOT_CURRENT);
                return;
        }
 
@@ -4211,7 +4170,7 @@ static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd,
         */
        if (!new_role.direct)
                __clear_sp_write_flooding_count(
-                               to_shadow_page(vcpu->arch.mmu->root_hpa));
+                               to_shadow_page(vcpu->arch.mmu->root.hpa));
 }
 
 void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
@@ -4474,8 +4433,7 @@ static inline bool boot_cpu_is_amd(void)
  * possible, however, kvm currently does not do execution-protection.
  */
 static void
-reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
-                               struct kvm_mmu *context)
+reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context)
 {
        struct rsvd_bits_validate *shadow_zero_check;
        int i;
@@ -4506,8 +4464,7 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
  * is the shadow page table for intel nested guest.
  */
 static void
-reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
-                               struct kvm_mmu *context, bool execonly)
+reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly)
 {
        __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
                                    reserved_hpa_bits(), execonly,
@@ -4794,7 +4751,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
                context->gva_to_gpa = paging32_gva_to_gpa;
 
        reset_guest_paging_metadata(vcpu, context);
-       reset_tdp_shadow_zero_bits_mask(vcpu, context);
+       reset_tdp_shadow_zero_bits_mask(context);
 }
 
 static union kvm_mmu_role
@@ -4948,7 +4905,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
        update_permission_bitmask(context, true);
        context->pkru_mask = 0;
        reset_rsvds_bits_mask_ept(vcpu, context, execonly, huge_page_level);
-       reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
+       reset_ept_shadow_zero_bits_mask(context, execonly);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
 
@@ -5100,17 +5057,19 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
        kvm_mmu_sync_roots(vcpu);
 
        kvm_mmu_load_pgd(vcpu);
-       static_call(kvm_x86_tlb_flush_current)(vcpu);
+       static_call(kvm_x86_flush_tlb_current)(vcpu);
 out:
        return r;
 }
 
 void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
-       kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
-       WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root_hpa));
-       kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
-       WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root_hpa));
+       struct kvm *kvm = vcpu->kvm;
+
+       kvm_mmu_free_roots(kvm, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
+       WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
+       kvm_mmu_free_roots(kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+       WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
 }
 
 static bool need_remote_flush(u64 old, u64 new)
@@ -5260,7 +5219,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
 
        ++vcpu->kvm->stat.mmu_pte_write;
-       kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
 
        for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
                if (detect_write_misaligned(sp, gpa, bytes) ||
@@ -5285,7 +5243,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                }
        }
        kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, flush);
-       kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
        write_unlock(&vcpu->kvm->mmu_lock);
 }
 
@@ -5295,7 +5252,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
        int r, emulation_type = EMULTYPE_PF;
        bool direct = vcpu->arch.mmu->direct_map;
 
-       if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
+       if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
                return RET_PF_RETRY;
 
        r = RET_PF_INVALID;
@@ -5360,14 +5317,14 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
                if (is_noncanonical_address(gva, vcpu))
                        return;
 
-               static_call(kvm_x86_tlb_flush_gva)(vcpu, gva);
+               static_call(kvm_x86_flush_tlb_gva)(vcpu, gva);
        }
 
        if (!mmu->invlpg)
                return;
 
        if (root_hpa == INVALID_PAGE) {
-               mmu->invlpg(vcpu, gva, mmu->root_hpa);
+               mmu->invlpg(vcpu, gva, mmu->root.hpa);
 
                /*
                 * INVLPG is required to invalidate any global mappings for the VA,
@@ -5403,7 +5360,7 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
        uint i;
 
        if (pcid == kvm_get_active_pcid(vcpu)) {
-               mmu->invlpg(vcpu, gva, mmu->root_hpa);
+               mmu->invlpg(vcpu, gva, mmu->root.hpa);
                tlb_flush = true;
        }
 
@@ -5416,7 +5373,7 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
        }
 
        if (tlb_flush)
-               static_call(kvm_x86_tlb_flush_gva)(vcpu, gva);
+               static_call(kvm_x86_flush_tlb_gva)(vcpu, gva);
 
        ++vcpu->stat.invlpg;
 
@@ -5516,8 +5473,8 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
        struct page *page;
        int i;
 
-       mmu->root_hpa = INVALID_PAGE;
-       mmu->root_pgd = 0;
+       mmu->root.hpa = INVALID_PAGE;
+       mmu->root.pgd = 0;
        for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
                mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
 
@@ -5802,7 +5759,7 @@ static bool slot_rmap_write_protect(struct kvm *kvm,
                                    struct kvm_rmap_head *rmap_head,
                                    const struct kvm_memory_slot *slot)
 {
-       return __rmap_write_protect(kvm, rmap_head, false);
+       return rmap_write_protect(rmap_head, false);
 }
 
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@@ -5846,12 +5803,52 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
         * will clear a separate software-only bit (MMU-writable) and skip the
         * flush if-and-only-if this bit was already clear.
         *
-        * See DEFAULT_SPTE_MMU_WRITEABLE for more details.
+        * See is_writable_pte() for more details.
         */
        if (flush)
                kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
 }
 
+/* Must be called with the mmu_lock held in write-mode. */
+void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
+                                  const struct kvm_memory_slot *memslot,
+                                  u64 start, u64 end,
+                                  int target_level)
+{
+       if (is_tdp_mmu_enabled(kvm))
+               kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end,
+                                                target_level, false);
+
+       /*
+        * A TLB flush is unnecessary at this point for the same resons as in
+        * kvm_mmu_slot_try_split_huge_pages().
+        */
+}
+
+void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
+                                       const struct kvm_memory_slot *memslot,
+                                       int target_level)
+{
+       u64 start = memslot->base_gfn;
+       u64 end = start + memslot->npages;
+
+       if (is_tdp_mmu_enabled(kvm)) {
+               read_lock(&kvm->mmu_lock);
+               kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level, true);
+               read_unlock(&kvm->mmu_lock);
+       }
+
+       /*
+        * No TLB flush is necessary here. KVM will flush TLBs after
+        * write-protecting and/or clearing dirty on the newly split SPTEs to
+        * ensure that guest writes are reflected in the dirty log before the
+        * ioctl to enable dirty logging on this memslot completes. Since the
+        * split SPTEs retain the write and dirty bits of the huge SPTE, it is
+        * safe for KVM to decide if a TLB flush is necessary based on the split
+        * SPTEs.
+        */
+}
+
 static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
                                         struct kvm_rmap_head *rmap_head,
                                         const struct kvm_memory_slot *slot)
@@ -6191,7 +6188,6 @@ void kvm_mmu_module_exit(void)
        mmu_destroy_caches();
        percpu_counter_destroy(&kvm_total_used_mmu_pages);
        unregister_shrinker(&mmu_shrinker);
-       mmu_audit_disable();
 }
 
 /*