X-Git-Url: http://git.monstr.eu/?p=linux-2.6-microblaze.git;a=blobdiff_plain;f=arch%2Farm64%2Fkvm%2Fhyp%2Fnvhe%2Fmem_protect.c;h=bacd493a4eacdefc1af400ec1a97309f02ac3510;hp=a6ce991b146793d6dbd18f708763cd6f04e98bd4;hb=fdfc346302a7b63e3d5b9168be74bb12b1975999;hpb=7ba88a2a09f47e2e4f3e34215677a1d78a9e6a73 diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index a6ce991b1467..bacd493a4eac 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -31,7 +31,7 @@ static struct hyp_pool host_s2_pool; u64 id_aa64mmfr0_el1_sys_val; u64 id_aa64mmfr1_el1_sys_val; -static const u8 pkvm_hyp_id = 1; +const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) { @@ -89,6 +89,8 @@ static void prepare_host_vtcr(void) id_aa64mmfr1_el1_sys_val, phys_shift); } +static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); + int kvm_host_prepare_stage2(void *pgt_pool_base) { struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; @@ -101,16 +103,17 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) if (ret) return ret; - ret = kvm_pgtable_stage2_init_flags(&host_kvm.pgt, &host_kvm.arch, - &host_kvm.mm_ops, KVM_HOST_S2_FLAGS); + ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch, + &host_kvm.mm_ops, KVM_HOST_S2_FLAGS, + host_stage2_force_pte_cb); if (ret) return ret; mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); mmu->arch = &host_kvm.arch; mmu->pgt = &host_kvm.pgt; - mmu->vmid.vmid_gen = 0; - mmu->vmid.vmid = 0; + WRITE_ONCE(mmu->vmid.vmid_gen, 0); + WRITE_ONCE(mmu->vmid.vmid, 0); return 0; } @@ -126,7 +129,7 @@ int __pkvm_prot_finalize(void) kvm_flush_dcache_to_poc(params, sizeof(*params)); write_sysreg(params->hcr_el2, hcr_el2); - __load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr); + __load_stage2(&host_kvm.arch.mmu, &host_kvm.arch); /* * Make sure to have an ISB before the TLB maintenance below but only @@ -159,6 +162,11 @@ static int host_stage2_unmap_dev_all(void) return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); } +struct kvm_mem_range { + u64 start; + u64 end; +}; + static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) { int cur, left = 0, right = hyp_memblock_nr; @@ -189,16 +197,26 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) return false; } +bool addr_is_memory(phys_addr_t phys) +{ + struct kvm_mem_range range; + + return find_mem_range(phys, &range); +} + +static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) +{ + return range->start <= addr && addr < range->end; +} + static bool range_is_memory(u64 start, u64 end) { - struct kvm_mem_range r1, r2; + struct kvm_mem_range r; - if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2)) - return false; - if (r1.start != r2.start) + if (!find_mem_range(start, &r)) return false; - return true; + return is_in_mem_range(end - 1, &r); } static inline int __host_stage2_idmap(u64 start, u64 end, @@ -208,60 +226,208 @@ static inline int __host_stage2_idmap(u64 start, u64 end, prot, &host_s2_pool); } +/* + * The pool has been provided with enough pages to cover all of memory with + * page granularity, but it is difficult to know how much of the MMIO range + * we will need to cover upfront, so we may need to 'recycle' the pages if we + * run out. + */ +#define host_stage2_try(fn, ...) \ + ({ \ + int __ret; \ + hyp_assert_lock_held(&host_kvm.lock); \ + __ret = fn(__VA_ARGS__); \ + if (__ret == -ENOMEM) { \ + __ret = host_stage2_unmap_dev_all(); \ + if (!__ret) \ + __ret = fn(__VA_ARGS__); \ + } \ + __ret; \ + }) + +static inline bool range_included(struct kvm_mem_range *child, + struct kvm_mem_range *parent) +{ + return parent->start <= child->start && child->end <= parent->end; +} + +static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) +{ + struct kvm_mem_range cur; + kvm_pte_t pte; + u32 level; + int ret; + + hyp_assert_lock_held(&host_kvm.lock); + ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level); + if (ret) + return ret; + + if (kvm_pte_valid(pte)) + return -EAGAIN; + + if (pte) + return -EPERM; + + do { + u64 granule = kvm_granule_size(level); + cur.start = ALIGN_DOWN(addr, granule); + cur.end = cur.start + granule; + level++; + } while ((level < KVM_PGTABLE_MAX_LEVELS) && + !(kvm_level_supports_block_mapping(level) && + range_included(&cur, range))); + + *range = cur; + + return 0; +} + +int host_stage2_idmap_locked(phys_addr_t addr, u64 size, + enum kvm_pgtable_prot prot) +{ + hyp_assert_lock_held(&host_kvm.lock); + + return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); +} + +int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) +{ + hyp_assert_lock_held(&host_kvm.lock); + + return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, + addr, size, &host_s2_pool, owner_id); +} + +static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) +{ + /* + * Block mappings must be used with care in the host stage-2 as a + * kvm_pgtable_stage2_map() operation targeting a page in the range of + * an existing block will delete the block under the assumption that + * mappings in the rest of the block range can always be rebuilt lazily. + * That assumption is correct for the host stage-2 with RWX mappings + * targeting memory or RW mappings targeting MMIO ranges (see + * host_stage2_idmap() below which implements some of the host memory + * abort logic). However, this is not safe for any other mappings where + * the host stage-2 page-table is in fact the only place where this + * state is stored. In all those cases, it is safer to use page-level + * mappings, hence avoiding to lose the state because of side-effects in + * kvm_pgtable_stage2_map(). + */ + if (range_is_memory(addr, end)) + return prot != PKVM_HOST_MEM_PROT; + else + return prot != PKVM_HOST_MMIO_PROT; +} + static int host_stage2_idmap(u64 addr) { - enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; struct kvm_mem_range range; bool is_memory = find_mem_range(addr, &range); + enum kvm_pgtable_prot prot; int ret; - if (is_memory) - prot |= KVM_PGTABLE_PROT_X; + prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; hyp_spin_lock(&host_kvm.lock); - ret = kvm_pgtable_stage2_find_range(&host_kvm.pgt, addr, prot, &range); + ret = host_stage2_adjust_range(addr, &range); if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot); - if (ret != -ENOMEM) + ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot); +unlock: + hyp_spin_unlock(&host_kvm.lock); + + return ret; +} + +static inline bool check_prot(enum kvm_pgtable_prot prot, + enum kvm_pgtable_prot required, + enum kvm_pgtable_prot denied) +{ + return (prot & (required | denied)) == required; +} + +int __pkvm_host_share_hyp(u64 pfn) +{ + phys_addr_t addr = hyp_pfn_to_phys(pfn); + enum kvm_pgtable_prot prot, cur; + void *virt = __hyp_va(addr); + enum pkvm_page_state state; + kvm_pte_t pte; + int ret; + + if (!addr_is_memory(addr)) + return -EINVAL; + + hyp_spin_lock(&host_kvm.lock); + hyp_spin_lock(&pkvm_pgd_lock); + + ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL); + if (ret) goto unlock; + if (!pte) + goto map_shared; /* - * The pool has been provided with enough pages to cover all of memory - * with page granularity, but it is difficult to know how much of the - * MMIO range we will need to cover upfront, so we may need to 'recycle' - * the pages if we run out. + * Check attributes in the host stage-2 PTE. We need the page to be: + * - mapped RWX as we're sharing memory; + * - not borrowed, as that implies absence of ownership. + * Otherwise, we can't let it got through */ - ret = host_stage2_unmap_dev_all(); - if (ret) + cur = kvm_pgtable_stage2_pte_prot(pte); + prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED); + if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) { + ret = -EPERM; goto unlock; + } - ret = __host_stage2_idmap(range.start, range.end, prot); + state = pkvm_getstate(cur); + if (state == PKVM_PAGE_OWNED) + goto map_shared; -unlock: - hyp_spin_unlock(&host_kvm.lock); + /* + * Tolerate double-sharing the same page, but this requires + * cross-checking the hypervisor stage-1. + */ + if (state != PKVM_PAGE_SHARED_OWNED) { + ret = -EPERM; + goto unlock; + } - return ret; -} + ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL); + if (ret) + goto unlock; -int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) -{ - int ret; + /* + * If the page has been shared with the hypervisor, it must be + * already mapped as SHARED_BORROWED in its stage-1. + */ + cur = kvm_pgtable_hyp_pte_prot(pte); + prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); + if (!check_prot(cur, prot, ~prot)) + ret = -EPERM; + goto unlock; +map_shared: /* - * host_stage2_unmap_dev_all() currently relies on MMIO mappings being - * non-persistent, so don't allow changing page ownership in MMIO range. + * If the page is not yet shared, adjust mappings in both page-tables + * while both locks are held. */ - if (!range_is_memory(start, end)) - return -EINVAL; + prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); + ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot); + BUG_ON(ret); - hyp_spin_lock(&host_kvm.lock); - ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start, - &host_s2_pool, pkvm_hyp_id); + prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED); + ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot); + BUG_ON(ret); + +unlock: + hyp_spin_unlock(&pkvm_pgd_lock); hyp_spin_unlock(&host_kvm.lock); - return ret != -EAGAIN ? ret : 0; + return ret; } void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)