Merge branches 'pm-cpufreq', 'pm-sleep' and 'pm-em'
[linux-2.6-microblaze.git] / arch / arm64 / kvm / hyp / nvhe / mem_protect.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2020 Google LLC
4  * Author: Quentin Perret <qperret@google.com>
5  */
6
7 #include <linux/kvm_host.h>
8 #include <asm/kvm_emulate.h>
9 #include <asm/kvm_hyp.h>
10 #include <asm/kvm_mmu.h>
11 #include <asm/kvm_pgtable.h>
12 #include <asm/stage2_pgtable.h>
13
14 #include <hyp/switch.h>
15
16 #include <nvhe/gfp.h>
17 #include <nvhe/memory.h>
18 #include <nvhe/mem_protect.h>
19 #include <nvhe/mm.h>
20
21 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP)
22
23 extern unsigned long hyp_nr_cpus;
24 struct host_kvm host_kvm;
25
26 static struct hyp_pool host_s2_pool;
27
28 /*
29  * Copies of the host's CPU features registers holding sanitized values.
30  */
31 u64 id_aa64mmfr0_el1_sys_val;
32 u64 id_aa64mmfr1_el1_sys_val;
33
34 const u8 pkvm_hyp_id = 1;
35
36 static void *host_s2_zalloc_pages_exact(size_t size)
37 {
38         return hyp_alloc_pages(&host_s2_pool, get_order(size));
39 }
40
41 static void *host_s2_zalloc_page(void *pool)
42 {
43         return hyp_alloc_pages(pool, 0);
44 }
45
46 static void host_s2_get_page(void *addr)
47 {
48         hyp_get_page(&host_s2_pool, addr);
49 }
50
51 static void host_s2_put_page(void *addr)
52 {
53         hyp_put_page(&host_s2_pool, addr);
54 }
55
56 static int prepare_s2_pool(void *pgt_pool_base)
57 {
58         unsigned long nr_pages, pfn;
59         int ret;
60
61         pfn = hyp_virt_to_pfn(pgt_pool_base);
62         nr_pages = host_s2_pgtable_pages();
63         ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
64         if (ret)
65                 return ret;
66
67         host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) {
68                 .zalloc_pages_exact = host_s2_zalloc_pages_exact,
69                 .zalloc_page = host_s2_zalloc_page,
70                 .phys_to_virt = hyp_phys_to_virt,
71                 .virt_to_phys = hyp_virt_to_phys,
72                 .page_count = hyp_page_count,
73                 .get_page = host_s2_get_page,
74                 .put_page = host_s2_put_page,
75         };
76
77         return 0;
78 }
79
80 static void prepare_host_vtcr(void)
81 {
82         u32 parange, phys_shift;
83
84         /* The host stage 2 is id-mapped, so use parange for T0SZ */
85         parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
86         phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
87
88         host_kvm.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
89                                           id_aa64mmfr1_el1_sys_val, phys_shift);
90 }
91
92 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
93
94 int kvm_host_prepare_stage2(void *pgt_pool_base)
95 {
96         struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
97         int ret;
98
99         prepare_host_vtcr();
100         hyp_spin_lock_init(&host_kvm.lock);
101
102         ret = prepare_s2_pool(pgt_pool_base);
103         if (ret)
104                 return ret;
105
106         ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch,
107                                         &host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
108                                         host_stage2_force_pte_cb);
109         if (ret)
110                 return ret;
111
112         mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
113         mmu->arch = &host_kvm.arch;
114         mmu->pgt = &host_kvm.pgt;
115         WRITE_ONCE(mmu->vmid.vmid_gen, 0);
116         WRITE_ONCE(mmu->vmid.vmid, 0);
117
118         return 0;
119 }
120
121 int __pkvm_prot_finalize(void)
122 {
123         struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
124         struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
125
126         params->vttbr = kvm_get_vttbr(mmu);
127         params->vtcr = host_kvm.arch.vtcr;
128         params->hcr_el2 |= HCR_VM;
129         kvm_flush_dcache_to_poc(params, sizeof(*params));
130
131         write_sysreg(params->hcr_el2, hcr_el2);
132         __load_stage2(&host_kvm.arch.mmu, &host_kvm.arch);
133
134         /*
135          * Make sure to have an ISB before the TLB maintenance below but only
136          * when __load_stage2() doesn't include one already.
137          */
138         asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
139
140         /* Invalidate stale HCR bits that may be cached in TLBs */
141         __tlbi(vmalls12e1);
142         dsb(nsh);
143         isb();
144
145         return 0;
146 }
147
148 static int host_stage2_unmap_dev_all(void)
149 {
150         struct kvm_pgtable *pgt = &host_kvm.pgt;
151         struct memblock_region *reg;
152         u64 addr = 0;
153         int i, ret;
154
155         /* Unmap all non-memory regions to recycle the pages */
156         for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) {
157                 reg = &hyp_memory[i];
158                 ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr);
159                 if (ret)
160                         return ret;
161         }
162         return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
163 }
164
165 struct kvm_mem_range {
166         u64 start;
167         u64 end;
168 };
169
170 static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
171 {
172         int cur, left = 0, right = hyp_memblock_nr;
173         struct memblock_region *reg;
174         phys_addr_t end;
175
176         range->start = 0;
177         range->end = ULONG_MAX;
178
179         /* The list of memblock regions is sorted, binary search it */
180         while (left < right) {
181                 cur = (left + right) >> 1;
182                 reg = &hyp_memory[cur];
183                 end = reg->base + reg->size;
184                 if (addr < reg->base) {
185                         right = cur;
186                         range->end = reg->base;
187                 } else if (addr >= end) {
188                         left = cur + 1;
189                         range->start = end;
190                 } else {
191                         range->start = reg->base;
192                         range->end = end;
193                         return true;
194                 }
195         }
196
197         return false;
198 }
199
200 bool addr_is_memory(phys_addr_t phys)
201 {
202         struct kvm_mem_range range;
203
204         return find_mem_range(phys, &range);
205 }
206
207 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
208 {
209         return range->start <= addr && addr < range->end;
210 }
211
212 static bool range_is_memory(u64 start, u64 end)
213 {
214         struct kvm_mem_range r;
215
216         if (!find_mem_range(start, &r))
217                 return false;
218
219         return is_in_mem_range(end - 1, &r);
220 }
221
222 static inline int __host_stage2_idmap(u64 start, u64 end,
223                                       enum kvm_pgtable_prot prot)
224 {
225         return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
226                                       prot, &host_s2_pool);
227 }
228
229 /*
230  * The pool has been provided with enough pages to cover all of memory with
231  * page granularity, but it is difficult to know how much of the MMIO range
232  * we will need to cover upfront, so we may need to 'recycle' the pages if we
233  * run out.
234  */
235 #define host_stage2_try(fn, ...)                                        \
236         ({                                                              \
237                 int __ret;                                              \
238                 hyp_assert_lock_held(&host_kvm.lock);                   \
239                 __ret = fn(__VA_ARGS__);                                \
240                 if (__ret == -ENOMEM) {                                 \
241                         __ret = host_stage2_unmap_dev_all();            \
242                         if (!__ret)                                     \
243                                 __ret = fn(__VA_ARGS__);                \
244                 }                                                       \
245                 __ret;                                                  \
246          })
247
248 static inline bool range_included(struct kvm_mem_range *child,
249                                   struct kvm_mem_range *parent)
250 {
251         return parent->start <= child->start && child->end <= parent->end;
252 }
253
254 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
255 {
256         struct kvm_mem_range cur;
257         kvm_pte_t pte;
258         u32 level;
259         int ret;
260
261         hyp_assert_lock_held(&host_kvm.lock);
262         ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level);
263         if (ret)
264                 return ret;
265
266         if (kvm_pte_valid(pte))
267                 return -EAGAIN;
268
269         if (pte)
270                 return -EPERM;
271
272         do {
273                 u64 granule = kvm_granule_size(level);
274                 cur.start = ALIGN_DOWN(addr, granule);
275                 cur.end = cur.start + granule;
276                 level++;
277         } while ((level < KVM_PGTABLE_MAX_LEVELS) &&
278                         !(kvm_level_supports_block_mapping(level) &&
279                           range_included(&cur, range)));
280
281         *range = cur;
282
283         return 0;
284 }
285
286 int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
287                              enum kvm_pgtable_prot prot)
288 {
289         hyp_assert_lock_held(&host_kvm.lock);
290
291         return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
292 }
293
294 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
295 {
296         hyp_assert_lock_held(&host_kvm.lock);
297
298         return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
299                                addr, size, &host_s2_pool, owner_id);
300 }
301
302 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
303 {
304         /*
305          * Block mappings must be used with care in the host stage-2 as a
306          * kvm_pgtable_stage2_map() operation targeting a page in the range of
307          * an existing block will delete the block under the assumption that
308          * mappings in the rest of the block range can always be rebuilt lazily.
309          * That assumption is correct for the host stage-2 with RWX mappings
310          * targeting memory or RW mappings targeting MMIO ranges (see
311          * host_stage2_idmap() below which implements some of the host memory
312          * abort logic). However, this is not safe for any other mappings where
313          * the host stage-2 page-table is in fact the only place where this
314          * state is stored. In all those cases, it is safer to use page-level
315          * mappings, hence avoiding to lose the state because of side-effects in
316          * kvm_pgtable_stage2_map().
317          */
318         if (range_is_memory(addr, end))
319                 return prot != PKVM_HOST_MEM_PROT;
320         else
321                 return prot != PKVM_HOST_MMIO_PROT;
322 }
323
324 static int host_stage2_idmap(u64 addr)
325 {
326         struct kvm_mem_range range;
327         bool is_memory = find_mem_range(addr, &range);
328         enum kvm_pgtable_prot prot;
329         int ret;
330
331         prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
332
333         hyp_spin_lock(&host_kvm.lock);
334         ret = host_stage2_adjust_range(addr, &range);
335         if (ret)
336                 goto unlock;
337
338         ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
339 unlock:
340         hyp_spin_unlock(&host_kvm.lock);
341
342         return ret;
343 }
344
345 static inline bool check_prot(enum kvm_pgtable_prot prot,
346                               enum kvm_pgtable_prot required,
347                               enum kvm_pgtable_prot denied)
348 {
349         return (prot & (required | denied)) == required;
350 }
351
352 int __pkvm_host_share_hyp(u64 pfn)
353 {
354         phys_addr_t addr = hyp_pfn_to_phys(pfn);
355         enum kvm_pgtable_prot prot, cur;
356         void *virt = __hyp_va(addr);
357         enum pkvm_page_state state;
358         kvm_pte_t pte;
359         int ret;
360
361         if (!addr_is_memory(addr))
362                 return -EINVAL;
363
364         hyp_spin_lock(&host_kvm.lock);
365         hyp_spin_lock(&pkvm_pgd_lock);
366
367         ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL);
368         if (ret)
369                 goto unlock;
370         if (!pte)
371                 goto map_shared;
372
373         /*
374          * Check attributes in the host stage-2 PTE. We need the page to be:
375          *  - mapped RWX as we're sharing memory;
376          *  - not borrowed, as that implies absence of ownership.
377          * Otherwise, we can't let it got through
378          */
379         cur = kvm_pgtable_stage2_pte_prot(pte);
380         prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
381         if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) {
382                 ret = -EPERM;
383                 goto unlock;
384         }
385
386         state = pkvm_getstate(cur);
387         if (state == PKVM_PAGE_OWNED)
388                 goto map_shared;
389
390         /*
391          * Tolerate double-sharing the same page, but this requires
392          * cross-checking the hypervisor stage-1.
393          */
394         if (state != PKVM_PAGE_SHARED_OWNED) {
395                 ret = -EPERM;
396                 goto unlock;
397         }
398
399         ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
400         if (ret)
401                 goto unlock;
402
403         /*
404          * If the page has been shared with the hypervisor, it must be
405          * already mapped as SHARED_BORROWED in its stage-1.
406          */
407         cur = kvm_pgtable_hyp_pte_prot(pte);
408         prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
409         if (!check_prot(cur, prot, ~prot))
410                 ret = -EPERM;
411         goto unlock;
412
413 map_shared:
414         /*
415          * If the page is not yet shared, adjust mappings in both page-tables
416          * while both locks are held.
417          */
418         prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
419         ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
420         BUG_ON(ret);
421
422         prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
423         ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot);
424         BUG_ON(ret);
425
426 unlock:
427         hyp_spin_unlock(&pkvm_pgd_lock);
428         hyp_spin_unlock(&host_kvm.lock);
429
430         return ret;
431 }
432
433 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
434 {
435         struct kvm_vcpu_fault_info fault;
436         u64 esr, addr;
437         int ret = 0;
438
439         esr = read_sysreg_el2(SYS_ESR);
440         BUG_ON(!__get_fault_info(esr, &fault));
441
442         addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
443         ret = host_stage2_idmap(addr);
444         BUG_ON(ret && ret != -EAGAIN);
445 }