1 // SPDX-License-Identifier: GPL-2.0-only
3 * Stand-alone page-table allocator for hyp stage-1 and guest stage-2.
4 * No bombay mix was harmed in the writing of this file.
6 * Copyright (C) 2020 Google LLC
7 * Author: Will Deacon <will@kernel.org>
10 #include <linux/bitfield.h>
11 #include <asm/kvm_pgtable.h>
13 #define KVM_PGTABLE_MAX_LEVELS 4U
15 #define KVM_PTE_VALID BIT(0)
17 #define KVM_PTE_TYPE BIT(1)
18 #define KVM_PTE_TYPE_BLOCK 0
19 #define KVM_PTE_TYPE_PAGE 1
20 #define KVM_PTE_TYPE_TABLE 1
22 #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
23 #define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
25 #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
27 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
28 #define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
29 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3
30 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1
31 #define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
32 #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
33 #define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
35 #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51)
37 #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
39 struct kvm_pgtable_walk_data {
40 struct kvm_pgtable *pgt;
41 struct kvm_pgtable_walker *walker;
47 static u64 kvm_granule_shift(u32 level)
49 /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
50 return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
53 static u64 kvm_granule_size(u32 level)
55 return BIT(kvm_granule_shift(level));
58 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
60 u64 granule = kvm_granule_size(level);
63 * Reject invalid block mappings and don't bother with 4TB mappings for
66 if (level == 0 || (PAGE_SIZE != SZ_4K && level == 1))
69 if (granule > (end - addr))
72 return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule);
75 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
77 u64 shift = kvm_granule_shift(level);
78 u64 mask = BIT(PAGE_SHIFT - 3) - 1;
80 return (data->addr >> shift) & mask;
83 static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
85 u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
86 u64 mask = BIT(pgt->ia_bits) - 1;
88 return (addr & mask) >> shift;
91 static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data)
93 return __kvm_pgd_page_idx(data->pgt, data->addr);
96 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
98 struct kvm_pgtable pgt = {
100 .start_level = start_level,
103 return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
106 static bool kvm_pte_valid(kvm_pte_t pte)
108 return pte & KVM_PTE_VALID;
111 static bool kvm_pte_table(kvm_pte_t pte, u32 level)
113 if (level == KVM_PGTABLE_MAX_LEVELS - 1)
116 if (!kvm_pte_valid(pte))
119 return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
122 static u64 kvm_pte_to_phys(kvm_pte_t pte)
124 u64 pa = pte & KVM_PTE_ADDR_MASK;
126 if (PAGE_SHIFT == 16)
127 pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
132 static kvm_pte_t kvm_phys_to_pte(u64 pa)
134 kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
136 if (PAGE_SHIFT == 16)
137 pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
142 static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte)
144 return __va(kvm_pte_to_phys(pte));
147 static void kvm_set_invalid_pte(kvm_pte_t *ptep)
149 kvm_pte_t pte = *ptep;
150 WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID);
153 static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp)
155 kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(__pa(childp));
157 pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
158 pte |= KVM_PTE_VALID;
160 WARN_ON(kvm_pte_valid(old));
161 smp_store_release(ptep, pte);
164 static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr,
167 kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(pa);
168 u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
171 pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
172 pte |= FIELD_PREP(KVM_PTE_TYPE, type);
173 pte |= KVM_PTE_VALID;
175 /* Tolerate KVM recreating the exact same mapping. */
176 if (kvm_pte_valid(old))
179 smp_store_release(ptep, pte);
183 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
184 u32 level, kvm_pte_t *ptep,
185 enum kvm_pgtable_walk_flags flag)
187 struct kvm_pgtable_walker *walker = data->walker;
188 return walker->cb(addr, data->end, level, ptep, flag, walker->arg);
191 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
192 kvm_pte_t *pgtable, u32 level);
194 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
195 kvm_pte_t *ptep, u32 level)
198 u64 addr = data->addr;
199 kvm_pte_t *childp, pte = *ptep;
200 bool table = kvm_pte_table(pte, level);
201 enum kvm_pgtable_walk_flags flags = data->walker->flags;
203 if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
204 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
205 KVM_PGTABLE_WALK_TABLE_PRE);
208 if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) {
209 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
210 KVM_PGTABLE_WALK_LEAF);
212 table = kvm_pte_table(pte, level);
219 data->addr += kvm_granule_size(level);
223 childp = kvm_pte_follow(pte);
224 ret = __kvm_pgtable_walk(data, childp, level + 1);
228 if (flags & KVM_PGTABLE_WALK_TABLE_POST) {
229 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
230 KVM_PGTABLE_WALK_TABLE_POST);
237 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
238 kvm_pte_t *pgtable, u32 level)
243 if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
246 for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
247 kvm_pte_t *ptep = &pgtable[idx];
249 if (data->addr >= data->end)
252 ret = __kvm_pgtable_visit(data, ptep, level);
260 static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data)
264 struct kvm_pgtable *pgt = data->pgt;
265 u64 limit = BIT(pgt->ia_bits);
267 if (data->addr > limit || data->end > limit)
273 for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) {
274 kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE];
276 ret = __kvm_pgtable_walk(data, ptep, pgt->start_level);
284 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
285 struct kvm_pgtable_walker *walker)
287 struct kvm_pgtable_walk_data walk_data = {
289 .addr = ALIGN_DOWN(addr, PAGE_SIZE),
290 .end = PAGE_ALIGN(walk_data.addr + size),
294 return _kvm_pgtable_walk(&walk_data);
297 struct hyp_map_data {
302 static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot,
303 struct hyp_map_data *data)
305 bool device = prot & KVM_PGTABLE_PROT_DEVICE;
306 u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL;
307 kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype);
308 u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS;
309 u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW :
310 KVM_PTE_LEAF_ATTR_LO_S1_AP_RO;
312 if (!(prot & KVM_PGTABLE_PROT_R))
315 if (prot & KVM_PGTABLE_PROT_X) {
316 if (prot & KVM_PGTABLE_PROT_W)
322 attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
325 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
326 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
327 attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
332 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
333 kvm_pte_t *ptep, struct hyp_map_data *data)
335 u64 granule = kvm_granule_size(level), phys = data->phys;
337 if (!kvm_block_mapping_supported(addr, end, phys, level))
340 WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level));
341 data->phys += granule;
345 static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
346 enum kvm_pgtable_walk_flags flag, void * const arg)
350 if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg))
353 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
356 childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL);
360 kvm_set_table_pte(ptep, childp);
364 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
365 enum kvm_pgtable_prot prot)
368 struct hyp_map_data map_data = {
369 .phys = ALIGN_DOWN(phys, PAGE_SIZE),
371 struct kvm_pgtable_walker walker = {
372 .cb = hyp_map_walker,
373 .flags = KVM_PGTABLE_WALK_LEAF,
377 ret = hyp_map_set_prot_attr(prot, &map_data);
381 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
387 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits)
389 u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
391 pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL);
395 pgt->ia_bits = va_bits;
396 pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
401 static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
402 enum kvm_pgtable_walk_flags flag, void * const arg)
404 free_page((unsigned long)kvm_pte_follow(*ptep));
408 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
410 struct kvm_pgtable_walker walker = {
411 .cb = hyp_free_walker,
412 .flags = KVM_PGTABLE_WALK_TABLE_POST,
415 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
416 free_page((unsigned long)pgt->pgd);