efb69c923027c29e219a1c10a3d49998576a979e
[linux-2.6-microblaze.git] / virt / kvm / pfncache.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Kernel-based Virtual Machine driver for Linux
4  *
5  * This module enables kernel and guest-mode vCPU access to guest physical
6  * memory with suitable invalidation mechanisms.
7  *
8  * Copyright © 2021 Amazon.com, Inc. or its affiliates.
9  *
10  * Authors:
11  *   David Woodhouse <dwmw2@infradead.org>
12  */
13
14 #include <linux/kvm_host.h>
15 #include <linux/kvm.h>
16 #include <linux/highmem.h>
17 #include <linux/module.h>
18 #include <linux/errno.h>
19
20 #include "kvm_mm.h"
21
22 /*
23  * MMU notifier 'invalidate_range_start' hook.
24  */
25 void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
26                                        unsigned long end, bool may_block)
27 {
28         DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
29         struct gfn_to_pfn_cache *gpc;
30         bool evict_vcpus = false;
31
32         spin_lock(&kvm->gpc_lock);
33         list_for_each_entry(gpc, &kvm->gpc_list, list) {
34                 write_lock_irq(&gpc->lock);
35
36                 /* Only a single page so no need to care about length */
37                 if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
38                     gpc->uhva >= start && gpc->uhva < end) {
39                         gpc->valid = false;
40
41                         /*
42                          * If a guest vCPU could be using the physical address,
43                          * it needs to be forced out of guest mode.
44                          */
45                         if (gpc->usage & KVM_GUEST_USES_PFN) {
46                                 if (!evict_vcpus) {
47                                         evict_vcpus = true;
48                                         bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
49                                 }
50                                 __set_bit(gpc->vcpu->vcpu_idx, vcpu_bitmap);
51                         }
52
53                         /*
54                          * We cannot call mark_page_dirty() from here because
55                          * this physical CPU might not have an active vCPU
56                          * with which to do the KVM dirty tracking.
57                          *
58                          * Neither is there any point in telling the kernel MM
59                          * that the underlying page is dirty. A vCPU in guest
60                          * mode might still be writing to it up to the point
61                          * where we wake them a few lines further down anyway.
62                          *
63                          * So all the dirty marking happens on the unmap.
64                          */
65                 }
66                 write_unlock_irq(&gpc->lock);
67         }
68         spin_unlock(&kvm->gpc_lock);
69
70         if (evict_vcpus) {
71                 /*
72                  * KVM needs to ensure the vCPU is fully out of guest context
73                  * before allowing the invalidation to continue.
74                  */
75                 unsigned int req = KVM_REQ_OUTSIDE_GUEST_MODE;
76                 bool called;
77
78                 /*
79                  * If the OOM reaper is active, then all vCPUs should have
80                  * been stopped already, so perform the request without
81                  * KVM_REQUEST_WAIT and be sad if any needed to be IPI'd.
82                  */
83                 if (!may_block)
84                         req &= ~KVM_REQUEST_WAIT;
85
86                 called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap);
87
88                 WARN_ON_ONCE(called && !may_block);
89         }
90 }
91
92 bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
93                                 gpa_t gpa, unsigned long len)
94 {
95         struct kvm_memslots *slots = kvm_memslots(kvm);
96
97         if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE)
98                 return false;
99
100         if (gpc->gpa != gpa || gpc->generation != slots->generation ||
101             kvm_is_error_hva(gpc->uhva))
102                 return false;
103
104         if (!gpc->valid)
105                 return false;
106
107         return true;
108 }
109 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check);
110
111 static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva,
112                           gpa_t gpa, bool dirty)
113 {
114         /* Unmap the old page if it was mapped before, and release it */
115         if (!is_error_noslot_pfn(pfn)) {
116                 if (khva) {
117                         if (pfn_valid(pfn))
118                                 kunmap(pfn_to_page(pfn));
119 #ifdef CONFIG_HAS_IOMEM
120                         else
121                                 memunmap(khva);
122 #endif
123                 }
124
125                 kvm_release_pfn(pfn, dirty);
126                 if (dirty)
127                         mark_page_dirty(kvm, gpa);
128         }
129 }
130
131 static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva)
132 {
133         unsigned long mmu_seq;
134         kvm_pfn_t new_pfn;
135         int retry;
136
137         do {
138                 mmu_seq = kvm->mmu_notifier_seq;
139                 smp_rmb();
140
141                 /* We always request a writeable mapping */
142                 new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL);
143                 if (is_error_noslot_pfn(new_pfn))
144                         break;
145
146                 KVM_MMU_READ_LOCK(kvm);
147                 retry = mmu_notifier_retry_hva(kvm, mmu_seq, uhva);
148                 KVM_MMU_READ_UNLOCK(kvm);
149                 if (!retry)
150                         break;
151
152                 cond_resched();
153         } while (1);
154
155         return new_pfn;
156 }
157
158 int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
159                                  gpa_t gpa, unsigned long len, bool dirty)
160 {
161         struct kvm_memslots *slots = kvm_memslots(kvm);
162         unsigned long page_offset = gpa & ~PAGE_MASK;
163         kvm_pfn_t old_pfn, new_pfn;
164         unsigned long old_uhva;
165         gpa_t old_gpa;
166         void *old_khva;
167         bool old_valid, old_dirty;
168         int ret = 0;
169
170         /*
171          * If must fit within a single page. The 'len' argument is
172          * only to enforce that.
173          */
174         if (page_offset + len > PAGE_SIZE)
175                 return -EINVAL;
176
177         write_lock_irq(&gpc->lock);
178
179         old_gpa = gpc->gpa;
180         old_pfn = gpc->pfn;
181         old_khva = gpc->khva - offset_in_page(gpc->khva);
182         old_uhva = gpc->uhva;
183         old_valid = gpc->valid;
184         old_dirty = gpc->dirty;
185
186         /* If the userspace HVA is invalid, refresh that first */
187         if (gpc->gpa != gpa || gpc->generation != slots->generation ||
188             kvm_is_error_hva(gpc->uhva)) {
189                 gfn_t gfn = gpa_to_gfn(gpa);
190
191                 gpc->dirty = false;
192                 gpc->gpa = gpa;
193                 gpc->generation = slots->generation;
194                 gpc->memslot = __gfn_to_memslot(slots, gfn);
195                 gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
196
197                 if (kvm_is_error_hva(gpc->uhva)) {
198                         gpc->pfn = KVM_PFN_ERR_FAULT;
199                         ret = -EFAULT;
200                         goto out;
201                 }
202
203                 gpc->uhva += page_offset;
204         }
205
206         /*
207          * If the userspace HVA changed or the PFN was already invalid,
208          * drop the lock and do the HVA to PFN lookup again.
209          */
210         if (!old_valid || old_uhva != gpc->uhva) {
211                 unsigned long uhva = gpc->uhva;
212                 void *new_khva = NULL;
213
214                 /* Placeholders for "hva is valid but not yet mapped" */
215                 gpc->pfn = KVM_PFN_ERR_FAULT;
216                 gpc->khva = NULL;
217                 gpc->valid = true;
218
219                 write_unlock_irq(&gpc->lock);
220
221                 new_pfn = hva_to_pfn_retry(kvm, uhva);
222                 if (is_error_noslot_pfn(new_pfn)) {
223                         ret = -EFAULT;
224                         goto map_done;
225                 }
226
227                 if (gpc->usage & KVM_HOST_USES_PFN) {
228                         if (new_pfn == old_pfn) {
229                                 new_khva = old_khva;
230                                 old_pfn = KVM_PFN_ERR_FAULT;
231                                 old_khva = NULL;
232                         } else if (pfn_valid(new_pfn)) {
233                                 new_khva = kmap(pfn_to_page(new_pfn));
234 #ifdef CONFIG_HAS_IOMEM
235                         } else {
236                                 new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
237 #endif
238                         }
239                         if (new_khva)
240                                 new_khva += page_offset;
241                         else
242                                 ret = -EFAULT;
243                 }
244
245         map_done:
246                 write_lock_irq(&gpc->lock);
247                 if (ret) {
248                         gpc->valid = false;
249                         gpc->pfn = KVM_PFN_ERR_FAULT;
250                         gpc->khva = NULL;
251                 } else {
252                         /* At this point, gpc->valid may already have been cleared */
253                         gpc->pfn = new_pfn;
254                         gpc->khva = new_khva;
255                 }
256         } else {
257                 /* If the HVA→PFN mapping was already valid, don't unmap it. */
258                 old_pfn = KVM_PFN_ERR_FAULT;
259                 old_khva = NULL;
260         }
261
262  out:
263         if (ret)
264                 gpc->dirty = false;
265         else
266                 gpc->dirty = dirty;
267
268         write_unlock_irq(&gpc->lock);
269
270         __release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty);
271
272         return ret;
273 }
274 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_refresh);
275
276 void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
277 {
278         void *old_khva;
279         kvm_pfn_t old_pfn;
280         bool old_dirty;
281         gpa_t old_gpa;
282
283         write_lock_irq(&gpc->lock);
284
285         gpc->valid = false;
286
287         old_khva = gpc->khva - offset_in_page(gpc->khva);
288         old_dirty = gpc->dirty;
289         old_gpa = gpc->gpa;
290         old_pfn = gpc->pfn;
291
292         /*
293          * We can leave the GPA → uHVA map cache intact but the PFN
294          * lookup will need to be redone even for the same page.
295          */
296         gpc->khva = NULL;
297         gpc->pfn = KVM_PFN_ERR_FAULT;
298
299         write_unlock_irq(&gpc->lock);
300
301         __release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty);
302 }
303 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
304
305
306 int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
307                               struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
308                               gpa_t gpa, unsigned long len, bool dirty)
309 {
310         WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
311
312         if (!gpc->active) {
313                 rwlock_init(&gpc->lock);
314
315                 gpc->khva = NULL;
316                 gpc->pfn = KVM_PFN_ERR_FAULT;
317                 gpc->uhva = KVM_HVA_ERR_BAD;
318                 gpc->vcpu = vcpu;
319                 gpc->usage = usage;
320                 gpc->valid = false;
321                 gpc->active = true;
322
323                 spin_lock(&kvm->gpc_lock);
324                 list_add(&gpc->list, &kvm->gpc_list);
325                 spin_unlock(&kvm->gpc_lock);
326         }
327         return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len, dirty);
328 }
329 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_init);
330
331 void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
332 {
333         if (gpc->active) {
334                 spin_lock(&kvm->gpc_lock);
335                 list_del(&gpc->list);
336                 spin_unlock(&kvm->gpc_lock);
337
338                 kvm_gfn_to_pfn_cache_unmap(kvm, gpc);
339                 gpc->active = false;
340         }
341 }
342 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_destroy);