virt/kvm/pfncache.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Kernel-based Virtual Machine driver for Linux
   4  *
   5  * This module enables kernel and guest-mode vCPU access to guest physical
   6  * memory with suitable invalidation mechanisms.
   7  *
   8  * Copyright © 2021 Amazon.com, Inc. or its affiliates.
   9  *
  10  * Authors:
  11  *   David Woodhouse <dwmw2@infradead.org>
  12  */
  13
  14 #include <linux/kvm_host.h>
  15 #include <linux/kvm.h>
  16 #include <linux/highmem.h>
  17 #include <linux/module.h>
  18 #include <linux/errno.h>
  19
  20 #include "kvm_mm.h"
  21
  22 /*
  23  * MMU notifier 'invalidate_range_start' hook.
  24  */
  25 void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
  26                                        unsigned long end, bool may_block)
  27 {
  28         DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
  29         struct gfn_to_pfn_cache *gpc;
  30         bool evict_vcpus = false;
  31
  32         spin_lock(&kvm->gpc_lock);
  33         list_for_each_entry(gpc, &kvm->gpc_list, list) {
  34                 write_lock_irq(&gpc->lock);
  35
  36                 /* Only a single page so no need to care about length */
  37                 if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
  38                     gpc->uhva >= start && gpc->uhva < end) {
  39                         gpc->valid = false;
  40
  41                         /*
  42                          * If a guest vCPU could be using the physical address,
  43                          * it needs to be forced out of guest mode.
  44                          */
  45                         if (gpc->usage & KVM_GUEST_USES_PFN) {
  46                                 if (!evict_vcpus) {
  47                                         evict_vcpus = true;
  48                                         bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
  49                                 }
  50                                 __set_bit(gpc->vcpu->vcpu_idx, vcpu_bitmap);
  51                         }
  52
  53                         /*
  54                          * We cannot call mark_page_dirty() from here because
  55                          * this physical CPU might not have an active vCPU
  56                          * with which to do the KVM dirty tracking.
  57                          *
  58                          * Neither is there any point in telling the kernel MM
  59                          * that the underlying page is dirty. A vCPU in guest
  60                          * mode might still be writing to it up to the point
  61                          * where we wake them a few lines further down anyway.
  62                          *
  63                          * So all the dirty marking happens on the unmap.
  64                          */
  65                 }
  66                 write_unlock_irq(&gpc->lock);
  67         }
  68         spin_unlock(&kvm->gpc_lock);
  69
  70         if (evict_vcpus) {
  71                 /*
  72                  * KVM needs to ensure the vCPU is fully out of guest context
  73                  * before allowing the invalidation to continue.
  74                  */
  75                 unsigned int req = KVM_REQ_OUTSIDE_GUEST_MODE;
  76                 bool called;
  77
  78                 /*
  79                  * If the OOM reaper is active, then all vCPUs should have
  80                  * been stopped already, so perform the request without
  81                  * KVM_REQUEST_WAIT and be sad if any needed to be IPI'd.
  82                  */
  83                 if (!may_block)
  84                         req &= ~KVM_REQUEST_WAIT;
  85
  86                 called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap);
  87
  88                 WARN_ON_ONCE(called && !may_block);
  89         }
  90 }
  91
  92 bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
  93                                 gpa_t gpa, unsigned long len)
  94 {
  95         struct kvm_memslots *slots = kvm_memslots(kvm);
  96
  97         if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE)
  98                 return false;
  99
 100         if (gpc->gpa != gpa || gpc->generation != slots->generation ||
 101             kvm_is_error_hva(gpc->uhva))
 102                 return false;
 103
 104         if (!gpc->valid)
 105                 return false;
 106
 107         return true;
 108 }
 109 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check);
 110
 111 static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva,
 112                           gpa_t gpa, bool dirty)
 113 {
 114         /* Unmap the old page if it was mapped before, and release it */
 115         if (!is_error_noslot_pfn(pfn)) {
 116                 if (khva) {
 117                         if (pfn_valid(pfn))
 118                                 kunmap(pfn_to_page(pfn));
 119 #ifdef CONFIG_HAS_IOMEM
 120                         else
 121                                 memunmap(khva);
 122 #endif
 123                 }
 124
 125                 kvm_release_pfn(pfn, dirty);
 126                 if (dirty)
 127                         mark_page_dirty(kvm, gpa);
 128         }
 129 }
 130
 131 static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva)
 132 {
 133         unsigned long mmu_seq;
 134         kvm_pfn_t new_pfn;
 135         int retry;
 136
 137         do {
 138                 mmu_seq = kvm->mmu_notifier_seq;
 139                 smp_rmb();
 140
 141                 /* We always request a writeable mapping */
 142                 new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL);
 143                 if (is_error_noslot_pfn(new_pfn))
 144                         break;
 145
 146                 KVM_MMU_READ_LOCK(kvm);
 147                 retry = mmu_notifier_retry_hva(kvm, mmu_seq, uhva);
 148                 KVM_MMU_READ_UNLOCK(kvm);
 149                 if (!retry)
 150                         break;
 151
 152                 cond_resched();
 153         } while (1);
 154
 155         return new_pfn;
 156 }
 157
 158 int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 159                                  gpa_t gpa, unsigned long len, bool dirty)
 160 {
 161         struct kvm_memslots *slots = kvm_memslots(kvm);
 162         unsigned long page_offset = gpa & ~PAGE_MASK;
 163         kvm_pfn_t old_pfn, new_pfn;
 164         unsigned long old_uhva;
 165         gpa_t old_gpa;
 166         void *old_khva;
 167         bool old_valid, old_dirty;
 168         int ret = 0;
 169
 170         /*
 171          * If must fit within a single page. The 'len' argument is
 172          * only to enforce that.
 173          */
 174         if (page_offset + len > PAGE_SIZE)
 175                 return -EINVAL;
 176
 177         write_lock_irq(&gpc->lock);
 178
 179         old_gpa = gpc->gpa;
 180         old_pfn = gpc->pfn;
 181         old_khva = gpc->khva - offset_in_page(gpc->khva);
 182         old_uhva = gpc->uhva;
 183         old_valid = gpc->valid;
 184         old_dirty = gpc->dirty;
 185
 186         /* If the userspace HVA is invalid, refresh that first */
 187         if (gpc->gpa != gpa || gpc->generation != slots->generation ||
 188             kvm_is_error_hva(gpc->uhva)) {
 189                 gfn_t gfn = gpa_to_gfn(gpa);
 190
 191                 gpc->dirty = false;
 192                 gpc->gpa = gpa;
 193                 gpc->generation = slots->generation;
 194                 gpc->memslot = __gfn_to_memslot(slots, gfn);
 195                 gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
 196
 197                 if (kvm_is_error_hva(gpc->uhva)) {
 198                         gpc->pfn = KVM_PFN_ERR_FAULT;
 199                         ret = -EFAULT;
 200                         goto out;
 201                 }
 202
 203                 gpc->uhva += page_offset;
 204         }
 205
 206         /*
 207          * If the userspace HVA changed or the PFN was already invalid,
 208          * drop the lock and do the HVA to PFN lookup again.
 209          */
 210         if (!old_valid || old_uhva != gpc->uhva) {
 211                 unsigned long uhva = gpc->uhva;
 212                 void *new_khva = NULL;
 213
 214                 /* Placeholders for "hva is valid but not yet mapped" */
 215                 gpc->pfn = KVM_PFN_ERR_FAULT;
 216                 gpc->khva = NULL;
 217                 gpc->valid = true;
 218
 219                 write_unlock_irq(&gpc->lock);
 220
 221                 new_pfn = hva_to_pfn_retry(kvm, uhva);
 222                 if (is_error_noslot_pfn(new_pfn)) {
 223                         ret = -EFAULT;
 224                         goto map_done;
 225                 }
 226
 227                 if (gpc->usage & KVM_HOST_USES_PFN) {
 228                         if (new_pfn == old_pfn) {
 229                                 new_khva = old_khva;
 230                                 old_pfn = KVM_PFN_ERR_FAULT;
 231                                 old_khva = NULL;
 232                         } else if (pfn_valid(new_pfn)) {
 233                                 new_khva = kmap(pfn_to_page(new_pfn));
 234 #ifdef CONFIG_HAS_IOMEM
 235                         } else {
 236                                 new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
 237 #endif
 238                         }
 239                         if (new_khva)
 240                                 new_khva += page_offset;
 241                         else
 242                                 ret = -EFAULT;
 243                 }
 244
 245         map_done:
 246                 write_lock_irq(&gpc->lock);
 247                 if (ret) {
 248                         gpc->valid = false;
 249                         gpc->pfn = KVM_PFN_ERR_FAULT;
 250                         gpc->khva = NULL;
 251                 } else {
 252                         /* At this point, gpc->valid may already have been cleared */
 253                         gpc->pfn = new_pfn;
 254                         gpc->khva = new_khva;
 255                 }
 256         } else {
 257                 /* If the HVA→PFN mapping was already valid, don't unmap it. */
 258                 old_pfn = KVM_PFN_ERR_FAULT;
 259                 old_khva = NULL;
 260         }
 261
 262  out:
 263         if (ret)
 264                 gpc->dirty = false;
 265         else
 266                 gpc->dirty = dirty;
 267
 268         write_unlock_irq(&gpc->lock);
 269
 270         __release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty);
 271
 272         return ret;
 273 }
 274 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_refresh);
 275
 276 void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 277 {
 278         void *old_khva;
 279         kvm_pfn_t old_pfn;
 280         bool old_dirty;
 281         gpa_t old_gpa;
 282
 283         write_lock_irq(&gpc->lock);
 284
 285         gpc->valid = false;
 286
 287         old_khva = gpc->khva - offset_in_page(gpc->khva);
 288         old_dirty = gpc->dirty;
 289         old_gpa = gpc->gpa;
 290         old_pfn = gpc->pfn;
 291
 292         /*
 293          * We can leave the GPA → uHVA map cache intact but the PFN
 294          * lookup will need to be redone even for the same page.
 295          */
 296         gpc->khva = NULL;
 297         gpc->pfn = KVM_PFN_ERR_FAULT;
 298
 299         write_unlock_irq(&gpc->lock);
 300
 301         __release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty);
 302 }
 303 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
 304
 305
 306 int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 307                               struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
 308                               gpa_t gpa, unsigned long len, bool dirty)
 309 {
 310         WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
 311
 312         if (!gpc->active) {
 313                 rwlock_init(&gpc->lock);
 314
 315                 gpc->khva = NULL;
 316                 gpc->pfn = KVM_PFN_ERR_FAULT;
 317                 gpc->uhva = KVM_HVA_ERR_BAD;
 318                 gpc->vcpu = vcpu;
 319                 gpc->usage = usage;
 320                 gpc->valid = false;
 321                 gpc->active = true;
 322
 323                 spin_lock(&kvm->gpc_lock);
 324                 list_add(&gpc->list, &kvm->gpc_list);
 325                 spin_unlock(&kvm->gpc_lock);
 326         }
 327         return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len, dirty);
 328 }
 329 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_init);
 330
 331 void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 332 {
 333         if (gpc->active) {
 334                 spin_lock(&kvm->gpc_lock);
 335                 list_del(&gpc->list);
 336                 spin_unlock(&kvm->gpc_lock);
 337
 338                 kvm_gfn_to_pfn_cache_unmap(kvm, gpc);
 339                 gpc->active = false;
 340         }
 341 }
 342 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_destroy);