arch/x86/kvm/mmu/tdp_mmu.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 #include "mmu.h"
   4 #include "mmu_internal.h"
   5 #include "mmutrace.h"
   6 #include "tdp_iter.h"
   7 #include "tdp_mmu.h"
   8 #include "spte.h"
   9
  10 static bool __read_mostly tdp_mmu_enabled = false;
  11
  12 static bool is_tdp_mmu_enabled(void)
  13 {
  14 #ifdef CONFIG_X86_64
  15         return tdp_enabled && READ_ONCE(tdp_mmu_enabled);
  16 #else
  17         return false;
  18 #endif /* CONFIG_X86_64 */
  19 }
  20
  21 /* Initializes the TDP MMU for the VM, if enabled. */
  22 void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
  23 {
  24         if (!is_tdp_mmu_enabled())
  25                 return;
  26
  27         /* This should not be changed for the lifetime of the VM. */
  28         kvm->arch.tdp_mmu_enabled = true;
  29
  30         INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
  31         INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
  32 }
  33
  34 void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
  35 {
  36         if (!kvm->arch.tdp_mmu_enabled)
  37                 return;
  38
  39         WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
  40 }
  41
  42 #define for_each_tdp_mmu_root(_kvm, _root)                          \
  43         list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)
  44
  45 bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
  46 {
  47         struct kvm_mmu_page *sp;
  48
  49         sp = to_shadow_page(hpa);
  50
  51         return sp->tdp_mmu_page && sp->root_count;
  52 }
  53
  54 static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
  55                           gfn_t start, gfn_t end, bool can_yield);
  56
  57 void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
  58 {
  59         gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
  60
  61         lockdep_assert_held(&kvm->mmu_lock);
  62
  63         WARN_ON(root->root_count);
  64         WARN_ON(!root->tdp_mmu_page);
  65
  66         list_del(&root->link);
  67
  68         zap_gfn_range(kvm, root, 0, max_gfn, false);
  69
  70         free_page((unsigned long)root->spt);
  71         kmem_cache_free(mmu_page_header_cache, root);
  72 }
  73
  74 static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu,
  75                                                    int level)
  76 {
  77         union kvm_mmu_page_role role;
  78
  79         role = vcpu->arch.mmu->mmu_role.base;
  80         role.level = level;
  81         role.direct = true;
  82         role.gpte_is_8_bytes = true;
  83         role.access = ACC_ALL;
  84
  85         return role;
  86 }
  87
  88 static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
  89                                                int level)
  90 {
  91         struct kvm_mmu_page *sp;
  92
  93         sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
  94         sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache);
  95         set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
  96
  97         sp->role.word = page_role_for_level(vcpu, level).word;
  98         sp->gfn = gfn;
  99         sp->tdp_mmu_page = true;
 100
 101         return sp;
 102 }
 103
 104 static struct kvm_mmu_page *get_tdp_mmu_vcpu_root(struct kvm_vcpu *vcpu)
 105 {
 106         union kvm_mmu_page_role role;
 107         struct kvm *kvm = vcpu->kvm;
 108         struct kvm_mmu_page *root;
 109
 110         role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level);
 111
 112         spin_lock(&kvm->mmu_lock);
 113
 114         /* Check for an existing root before allocating a new one. */
 115         for_each_tdp_mmu_root(kvm, root) {
 116                 if (root->role.word == role.word) {
 117                         kvm_mmu_get_root(kvm, root);
 118                         spin_unlock(&kvm->mmu_lock);
 119                         return root;
 120                 }
 121         }
 122
 123         root = alloc_tdp_mmu_page(vcpu, 0, vcpu->arch.mmu->shadow_root_level);
 124         root->root_count = 1;
 125
 126         list_add(&root->link, &kvm->arch.tdp_mmu_roots);
 127
 128         spin_unlock(&kvm->mmu_lock);
 129
 130         return root;
 131 }
 132
 133 hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
 134 {
 135         struct kvm_mmu_page *root;
 136
 137         root = get_tdp_mmu_vcpu_root(vcpu);
 138         if (!root)
 139                 return INVALID_PAGE;
 140
 141         return __pa(root->spt);
 142 }
 143
 144 static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 145                                 u64 old_spte, u64 new_spte, int level);
 146
 147 static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
 148 {
 149         return sp->role.smm ? 1 : 0;
 150 }
 151
 152 static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level)
 153 {
 154         bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
 155
 156         if (!is_shadow_present_pte(old_spte) || !is_last_spte(old_spte, level))
 157                 return;
 158
 159         if (is_accessed_spte(old_spte) &&
 160             (!is_accessed_spte(new_spte) || pfn_changed))
 161                 kvm_set_pfn_accessed(spte_to_pfn(old_spte));
 162 }
 163
 164 /**
 165  * handle_changed_spte - handle bookkeeping associated with an SPTE change
 166  * @kvm: kvm instance
 167  * @as_id: the address space of the paging structure the SPTE was a part of
 168  * @gfn: the base GFN that was mapped by the SPTE
 169  * @old_spte: The value of the SPTE before the change
 170  * @new_spte: The value of the SPTE after the change
 171  * @level: the level of the PT the SPTE is part of in the paging structure
 172  *
 173  * Handle bookkeeping that might result from the modification of a SPTE.
 174  * This function must be called for all TDP SPTE modifications.
 175  */
 176 static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 177                                 u64 old_spte, u64 new_spte, int level)
 178 {
 179         bool was_present = is_shadow_present_pte(old_spte);
 180         bool is_present = is_shadow_present_pte(new_spte);
 181         bool was_leaf = was_present && is_last_spte(old_spte, level);
 182         bool is_leaf = is_present && is_last_spte(new_spte, level);
 183         bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
 184         u64 *pt;
 185         struct kvm_mmu_page *sp;
 186         u64 old_child_spte;
 187         int i;
 188
 189         WARN_ON(level > PT64_ROOT_MAX_LEVEL);
 190         WARN_ON(level < PG_LEVEL_4K);
 191         WARN_ON(gfn % KVM_PAGES_PER_HPAGE(level));
 192
 193         /*
 194          * If this warning were to trigger it would indicate that there was a
 195          * missing MMU notifier or a race with some notifier handler.
 196          * A present, leaf SPTE should never be directly replaced with another
 197          * present leaf SPTE pointing to a differnt PFN. A notifier handler
 198          * should be zapping the SPTE before the main MM's page table is
 199          * changed, or the SPTE should be zeroed, and the TLBs flushed by the
 200          * thread before replacement.
 201          */
 202         if (was_leaf && is_leaf && pfn_changed) {
 203                 pr_err("Invalid SPTE change: cannot replace a present leaf\n"
 204                        "SPTE with another present leaf SPTE mapping a\n"
 205                        "different PFN!\n"
 206                        "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d",
 207                        as_id, gfn, old_spte, new_spte, level);
 208
 209                 /*
 210                  * Crash the host to prevent error propagation and guest data
 211                  * courruption.
 212                  */
 213                 BUG();
 214         }
 215
 216         if (old_spte == new_spte)
 217                 return;
 218
 219         /*
 220          * The only times a SPTE should be changed from a non-present to
 221          * non-present state is when an MMIO entry is installed/modified/
 222          * removed. In that case, there is nothing to do here.
 223          */
 224         if (!was_present && !is_present) {
 225                 /*
 226                  * If this change does not involve a MMIO SPTE, it is
 227                  * unexpected. Log the change, though it should not impact the
 228                  * guest since both the former and current SPTEs are nonpresent.
 229                  */
 230                 if (WARN_ON(!is_mmio_spte(old_spte) && !is_mmio_spte(new_spte)))
 231                         pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
 232                                "should not be replaced with another,\n"
 233                                "different nonpresent SPTE, unless one or both\n"
 234                                "are MMIO SPTEs.\n"
 235                                "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d",
 236                                as_id, gfn, old_spte, new_spte, level);
 237                 return;
 238         }
 239
 240
 241         if (was_leaf && is_dirty_spte(old_spte) &&
 242             (!is_dirty_spte(new_spte) || pfn_changed))
 243                 kvm_set_pfn_dirty(spte_to_pfn(old_spte));
 244
 245         /*
 246          * Recursively handle child PTs if the change removed a subtree from
 247          * the paging structure.
 248          */
 249         if (was_present && !was_leaf && (pfn_changed || !is_present)) {
 250                 pt = spte_to_child_pt(old_spte, level);
 251                 sp = sptep_to_sp(pt);
 252
 253                 list_del(&sp->link);
 254
 255                 for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
 256                         old_child_spte = READ_ONCE(*(pt + i));
 257                         WRITE_ONCE(*(pt + i), 0);
 258                         handle_changed_spte(kvm, as_id,
 259                                 gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)),
 260                                 old_child_spte, 0, level - 1);
 261                 }
 262
 263                 kvm_flush_remote_tlbs_with_address(kvm, gfn,
 264                                                    KVM_PAGES_PER_HPAGE(level));
 265
 266                 free_page((unsigned long)pt);
 267                 kmem_cache_free(mmu_page_header_cache, sp);
 268         }
 269 }
 270
 271 static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 272                                 u64 old_spte, u64 new_spte, int level)
 273 {
 274         __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level);
 275         handle_changed_spte_acc_track(old_spte, new_spte, level);
 276 }
 277
 278 static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
 279                                       u64 new_spte, bool record_acc_track)
 280 {
 281         u64 *root_pt = tdp_iter_root_pt(iter);
 282         struct kvm_mmu_page *root = sptep_to_sp(root_pt);
 283         int as_id = kvm_mmu_page_as_id(root);
 284
 285         WRITE_ONCE(*iter->sptep, new_spte);
 286
 287         __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
 288                               iter->level);
 289         if (record_acc_track)
 290                 handle_changed_spte_acc_track(iter->old_spte, new_spte,
 291                                               iter->level);
 292 }
 293
 294 static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
 295                                     u64 new_spte)
 296 {
 297         __tdp_mmu_set_spte(kvm, iter, new_spte, true);
 298 }
 299
 300 static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm,
 301                                                  struct tdp_iter *iter,
 302                                                  u64 new_spte)
 303 {
 304         __tdp_mmu_set_spte(kvm, iter, new_spte, false);
 305 }
 306
 307 #define tdp_root_for_each_pte(_iter, _root, _start, _end) \
 308         for_each_tdp_pte(_iter, _root->spt, _root->role.level, _start, _end)
 309
 310 #define tdp_root_for_each_leaf_pte(_iter, _root, _start, _end)  \
 311         tdp_root_for_each_pte(_iter, _root, _start, _end)               \
 312                 if (!is_shadow_present_pte(_iter.old_spte) ||           \
 313                     !is_last_spte(_iter.old_spte, _iter.level))         \
 314                         continue;                                       \
 315                 else
 316
 317 #define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end)         \
 318         for_each_tdp_pte(_iter, __va(_mmu->root_hpa),           \
 319                          _mmu->shadow_root_level, _start, _end)
 320
 321 /*
 322  * Flush the TLB if the process should drop kvm->mmu_lock.
 323  * Return whether the caller still needs to flush the tlb.
 324  */
 325 static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *iter)
 326 {
 327         if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
 328                 kvm_flush_remote_tlbs(kvm);
 329                 cond_resched_lock(&kvm->mmu_lock);
 330                 tdp_iter_refresh_walk(iter);
 331                 return false;
 332         } else {
 333                 return true;
 334         }
 335 }
 336
 337 /*
 338  * Tears down the mappings for the range of gfns, [start, end), and frees the
 339  * non-root pages mapping GFNs strictly within that range. Returns true if
 340  * SPTEs have been cleared and a TLB flush is needed before releasing the
 341  * MMU lock.
 342  * If can_yield is true, will release the MMU lock and reschedule if the
 343  * scheduler needs the CPU or there is contention on the MMU lock. If this
 344  * function cannot yield, it will not release the MMU lock or reschedule and
 345  * the caller must ensure it does not supply too large a GFN range, or the
 346  * operation can cause a soft lockup.
 347  */
 348 static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 349                           gfn_t start, gfn_t end, bool can_yield)
 350 {
 351         struct tdp_iter iter;
 352         bool flush_needed = false;
 353
 354         tdp_root_for_each_pte(iter, root, start, end) {
 355                 if (!is_shadow_present_pte(iter.old_spte))
 356                         continue;
 357
 358                 /*
 359                  * If this is a non-last-level SPTE that covers a larger range
 360                  * than should be zapped, continue, and zap the mappings at a
 361                  * lower level.
 362                  */
 363                 if ((iter.gfn < start ||
 364                      iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) &&
 365                     !is_last_spte(iter.old_spte, iter.level))
 366                         continue;
 367
 368                 tdp_mmu_set_spte(kvm, &iter, 0);
 369
 370                 if (can_yield)
 371                         flush_needed = tdp_mmu_iter_flush_cond_resched(kvm, &iter);
 372                 else
 373                         flush_needed = true;
 374         }
 375         return flush_needed;
 376 }
 377
 378 /*
 379  * Tears down the mappings for the range of gfns, [start, end), and frees the
 380  * non-root pages mapping GFNs strictly within that range. Returns true if
 381  * SPTEs have been cleared and a TLB flush is needed before releasing the
 382  * MMU lock.
 383  */
 384 bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
 385 {
 386         struct kvm_mmu_page *root;
 387         bool flush = false;
 388
 389         for_each_tdp_mmu_root(kvm, root) {
 390                 /*
 391                  * Take a reference on the root so that it cannot be freed if
 392                  * this thread releases the MMU lock and yields in this loop.
 393                  */
 394                 kvm_mmu_get_root(kvm, root);
 395
 396                 flush |= zap_gfn_range(kvm, root, start, end, true);
 397
 398                 kvm_mmu_put_root(kvm, root);
 399         }
 400
 401         return flush;
 402 }
 403
 404 void kvm_tdp_mmu_zap_all(struct kvm *kvm)
 405 {
 406         gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
 407         bool flush;
 408
 409         flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn);
 410         if (flush)
 411                 kvm_flush_remote_tlbs(kvm);
 412 }
 413
 414 /*
 415  * Installs a last-level SPTE to handle a TDP page fault.
 416  * (NPT/EPT violation/misconfiguration)
 417  */
 418 static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
 419                                           int map_writable,
 420                                           struct tdp_iter *iter,
 421                                           kvm_pfn_t pfn, bool prefault)
 422 {
 423         u64 new_spte;
 424         int ret = 0;
 425         int make_spte_ret = 0;
 426
 427         if (unlikely(is_noslot_pfn(pfn))) {
 428                 new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
 429                 trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte);
 430         } else
 431                 make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn,
 432                                          pfn, iter->old_spte, prefault, true,
 433                                          map_writable, !shadow_accessed_mask,
 434                                          &new_spte);
 435
 436         if (new_spte == iter->old_spte)
 437                 ret = RET_PF_SPURIOUS;
 438         else
 439                 tdp_mmu_set_spte(vcpu->kvm, iter, new_spte);
 440
 441         /*
 442          * If the page fault was caused by a write but the page is write
 443          * protected, emulation is needed. If the emulation was skipped,
 444          * the vCPU would have the same fault again.
 445          */
 446         if (make_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) {
 447                 if (write)
 448                         ret = RET_PF_EMULATE;
 449                 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 450         }
 451
 452         /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
 453         if (unlikely(is_mmio_spte(new_spte)))
 454                 ret = RET_PF_EMULATE;
 455
 456         trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep);
 457         if (!prefault)
 458                 vcpu->stat.pf_fixed++;
 459
 460         return ret;
 461 }
 462
 463 /*
 464  * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing
 465  * page tables and SPTEs to translate the faulting guest physical address.
 466  */
 467 int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 468                     int map_writable, int max_level, kvm_pfn_t pfn,
 469                     bool prefault)
 470 {
 471         bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
 472         bool write = error_code & PFERR_WRITE_MASK;
 473         bool exec = error_code & PFERR_FETCH_MASK;
 474         bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
 475         struct kvm_mmu *mmu = vcpu->arch.mmu;
 476         struct tdp_iter iter;
 477         struct kvm_mmu_page *sp;
 478         u64 *child_pt;
 479         u64 new_spte;
 480         int ret;
 481         gfn_t gfn = gpa >> PAGE_SHIFT;
 482         int level;
 483         int req_level;
 484
 485         if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
 486                 return RET_PF_RETRY;
 487         if (WARN_ON(!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)))
 488                 return RET_PF_RETRY;
 489
 490         level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn,
 491                                         huge_page_disallowed, &req_level);
 492
 493         trace_kvm_mmu_spte_requested(gpa, level, pfn);
 494         tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
 495                 if (nx_huge_page_workaround_enabled)
 496                         disallowed_hugepage_adjust(iter.old_spte, gfn,
 497                                                    iter.level, &pfn, &level);
 498
 499                 if (iter.level == level)
 500                         break;
 501
 502                 /*
 503                  * If there is an SPTE mapping a large page at a higher level
 504                  * than the target, that SPTE must be cleared and replaced
 505                  * with a non-leaf SPTE.
 506                  */
 507                 if (is_shadow_present_pte(iter.old_spte) &&
 508                     is_large_pte(iter.old_spte)) {
 509                         tdp_mmu_set_spte(vcpu->kvm, &iter, 0);
 510
 511                         kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn,
 512                                         KVM_PAGES_PER_HPAGE(iter.level));
 513
 514                         /*
 515                          * The iter must explicitly re-read the spte here
 516                          * because the new value informs the !present
 517                          * path below.
 518                          */
 519                         iter.old_spte = READ_ONCE(*iter.sptep);
 520                 }
 521
 522                 if (!is_shadow_present_pte(iter.old_spte)) {
 523                         sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
 524                         list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages);
 525                         child_pt = sp->spt;
 526                         clear_page(child_pt);
 527                         new_spte = make_nonleaf_spte(child_pt,
 528                                                      !shadow_accessed_mask);
 529
 530                         trace_kvm_mmu_get_page(sp, true);
 531                         tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte);
 532                 }
 533         }
 534
 535         if (WARN_ON(iter.level != level))
 536                 return RET_PF_RETRY;
 537
 538         ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter,
 539                                               pfn, prefault);
 540
 541         return ret;
 542 }
 543
 544 static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start,
 545                 unsigned long end, unsigned long data,
 546                 int (*handler)(struct kvm *kvm, struct kvm_memory_slot *slot,
 547                                struct kvm_mmu_page *root, gfn_t start,
 548                                gfn_t end, unsigned long data))
 549 {
 550         struct kvm_memslots *slots;
 551         struct kvm_memory_slot *memslot;
 552         struct kvm_mmu_page *root;
 553         int ret = 0;
 554         int as_id;
 555
 556         for_each_tdp_mmu_root(kvm, root) {
 557                 /*
 558                  * Take a reference on the root so that it cannot be freed if
 559                  * this thread releases the MMU lock and yields in this loop.
 560                  */
 561                 kvm_mmu_get_root(kvm, root);
 562
 563                 as_id = kvm_mmu_page_as_id(root);
 564                 slots = __kvm_memslots(kvm, as_id);
 565                 kvm_for_each_memslot(memslot, slots) {
 566                         unsigned long hva_start, hva_end;
 567                         gfn_t gfn_start, gfn_end;
 568
 569                         hva_start = max(start, memslot->userspace_addr);
 570                         hva_end = min(end, memslot->userspace_addr +
 571                                       (memslot->npages << PAGE_SHIFT));
 572                         if (hva_start >= hva_end)
 573                                 continue;
 574                         /*
 575                          * {gfn(page) | page intersects with [hva_start, hva_end)} =
 576                          * {gfn_start, gfn_start+1, ..., gfn_end-1}.
 577                          */
 578                         gfn_start = hva_to_gfn_memslot(hva_start, memslot);
 579                         gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
 580
 581                         ret |= handler(kvm, memslot, root, gfn_start,
 582                                        gfn_end, data);
 583                 }
 584
 585                 kvm_mmu_put_root(kvm, root);
 586         }
 587
 588         return ret;
 589 }
 590
 591 static int zap_gfn_range_hva_wrapper(struct kvm *kvm,
 592                                      struct kvm_memory_slot *slot,
 593                                      struct kvm_mmu_page *root, gfn_t start,
 594                                      gfn_t end, unsigned long unused)
 595 {
 596         return zap_gfn_range(kvm, root, start, end, false);
 597 }
 598
 599 int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start,
 600                               unsigned long end)
 601 {
 602         return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0,
 603                                             zap_gfn_range_hva_wrapper);
 604 }
 605
 606 /*
 607  * Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero
 608  * if any of the GFNs in the range have been accessed.
 609  */
 610 static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot,
 611                          struct kvm_mmu_page *root, gfn_t start, gfn_t end,
 612                          unsigned long unused)
 613 {
 614         struct tdp_iter iter;
 615         int young = 0;
 616         u64 new_spte = 0;
 617
 618         tdp_root_for_each_leaf_pte(iter, root, start, end) {
 619                 /*
 620                  * If we have a non-accessed entry we don't need to change the
 621                  * pte.
 622                  */
 623                 if (!is_accessed_spte(iter.old_spte))
 624                         continue;
 625
 626                 new_spte = iter.old_spte;
 627
 628                 if (spte_ad_enabled(new_spte)) {
 629                         clear_bit((ffs(shadow_accessed_mask) - 1),
 630                                   (unsigned long *)&new_spte);
 631                 } else {
 632                         /*
 633                          * Capture the dirty status of the page, so that it doesn't get
 634                          * lost when the SPTE is marked for access tracking.
 635                          */
 636                         if (is_writable_pte(new_spte))
 637                                 kvm_set_pfn_dirty(spte_to_pfn(new_spte));
 638
 639                         new_spte = mark_spte_for_access_track(new_spte);
 640                 }
 641
 642                 tdp_mmu_set_spte_no_acc_track(kvm, &iter, new_spte);
 643                 young = 1;
 644         }
 645
 646         return young;
 647 }
 648
 649 int kvm_tdp_mmu_age_hva_range(struct kvm *kvm, unsigned long start,
 650                               unsigned long end)
 651 {
 652         return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0,
 653                                             age_gfn_range);
 654 }
 655
 656 static int test_age_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
 657                         struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused,
 658                         unsigned long unused2)
 659 {
 660         struct tdp_iter iter;
 661
 662         tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1)
 663                 if (is_accessed_spte(iter.old_spte))
 664                         return 1;
 665
 666         return 0;
 667 }
 668
 669 int kvm_tdp_mmu_test_age_hva(struct kvm *kvm, unsigned long hva)
 670 {
 671         return kvm_tdp_mmu_handle_hva_range(kvm, hva, hva + 1, 0,
 672                                             test_age_gfn);
 673 }