KVM: arm64: Add support for creating kernel-agnostic stage-2 page tables
authorWill Deacon <will@kernel.org>
Fri, 11 Sep 2020 13:25:13 +0000 (14:25 +0100)
committerMarc Zyngier <maz@kernel.org>
Fri, 11 Sep 2020 14:51:13 +0000 (15:51 +0100)
Introduce alloc() and free() functions to the generic page-table code
for guest stage-2 page-tables and plumb these into the existing KVM
page-table allocator. Subsequent patches will convert other operations
within the KVM allocator over to the generic code.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Quentin Perret <qperret@google.com>
Link: https://lore.kernel.org/r/20200911132529.19844-6-will@kernel.org
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/mmu.c

index e52c927..0b7c702 100644 (file)
@@ -81,6 +81,7 @@ struct kvm_s2_mmu {
         */
        pgd_t           *pgd;
        phys_addr_t     pgd_phys;
+       struct kvm_pgtable *pgt;
 
        /* The last vcpu id that ran on each physical CPU */
        int __percpu *last_vcpu_ran;
index ff5d7d2..21d7139 100644 (file)
@@ -122,6 +122,24 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
                        enum kvm_pgtable_prot prot);
 
+/**
+ * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
+ * @pgt:       Uninitialised page-table structure to initialise.
+ * @kvm:       KVM structure representing the guest virtual machine.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm);
+
+/**
+ * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
+ * @pgt:       Page-table structure initialised by kvm_pgtable_stage2_init().
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior
+ * to freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
+
 /**
  * kvm_pgtable_walk() - Walk a page-table.
  * @pgt:       Page-table structure initialised by kvm_pgtable_*_init().
index 23a1006..16b34d1 100644 (file)
@@ -416,3 +416,57 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
        free_page((unsigned long)pgt->pgd);
        pgt->pgd = NULL;
 }
+
+int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm)
+{
+       size_t pgd_sz;
+       u64 vtcr = kvm->arch.vtcr;
+       u32 ia_bits = VTCR_EL2_IPA(vtcr);
+       u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
+       u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
+
+       pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
+       pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL | __GFP_ZERO);
+       if (!pgt->pgd)
+               return -ENOMEM;
+
+       pgt->ia_bits            = ia_bits;
+       pgt->start_level        = start_level;
+       pgt->mmu                = &kvm->arch.mmu;
+
+       /* Ensure zeroed PGD pages are visible to the hardware walker */
+       dsb(ishst);
+       return 0;
+}
+
+static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+                             enum kvm_pgtable_walk_flags flag,
+                             void * const arg)
+{
+       kvm_pte_t pte = *ptep;
+
+       if (!kvm_pte_valid(pte))
+               return 0;
+
+       put_page(virt_to_page(ptep));
+
+       if (kvm_pte_table(pte, level))
+               free_page((unsigned long)kvm_pte_follow(pte));
+
+       return 0;
+}
+
+void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
+{
+       size_t pgd_sz;
+       struct kvm_pgtable_walker walker = {
+               .cb     = stage2_free_walker,
+               .flags  = KVM_PGTABLE_WALK_LEAF |
+                         KVM_PGTABLE_WALK_TABLE_POST,
+       };
+
+       WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
+       pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
+       free_pages_exact(pgt->pgd, pgd_sz);
+       pgt->pgd = NULL;
+}
index fabd72b..4607e9c 100644 (file)
@@ -668,47 +668,49 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
  * @kvm:       The pointer to the KVM structure
  * @mmu:       The pointer to the s2 MMU structure
  *
- * Allocates only the stage-2 HW PGD level table(s) of size defined by
- * stage2_pgd_size(mmu->kvm).
- *
+ * Allocates only the stage-2 HW PGD level table(s).
  * Note we don't need locking here as this is only called when the VM is
  * created, which can only be done once.
  */
 int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 {
-       phys_addr_t pgd_phys;
-       pgd_t *pgd;
-       int cpu;
+       int cpu, err;
+       struct kvm_pgtable *pgt;
 
-       if (mmu->pgd != NULL) {
+       if (mmu->pgt != NULL) {
                kvm_err("kvm_arch already initialized?\n");
                return -EINVAL;
        }
 
-       /* Allocate the HW PGD, making sure that each page gets its own refcount */
-       pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO);
-       if (!pgd)
+       pgt = kzalloc(sizeof(*pgt), GFP_KERNEL);
+       if (!pgt)
                return -ENOMEM;
 
-       pgd_phys = virt_to_phys(pgd);
-       if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)))
-               return -EINVAL;
+       err = kvm_pgtable_stage2_init(pgt, kvm);
+       if (err)
+               goto out_free_pgtable;
 
        mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
        if (!mmu->last_vcpu_ran) {
-               free_pages_exact(pgd, stage2_pgd_size(kvm));
-               return -ENOMEM;
+               err = -ENOMEM;
+               goto out_destroy_pgtable;
        }
 
        for_each_possible_cpu(cpu)
                *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
 
        mmu->kvm = kvm;
-       mmu->pgd = pgd;
-       mmu->pgd_phys = pgd_phys;
+       mmu->pgt = pgt;
+       mmu->pgd_phys = __pa(pgt->pgd);
+       mmu->pgd = (void *)pgt->pgd;
        mmu->vmid.vmid_gen = 0;
-
        return 0;
+
+out_destroy_pgtable:
+       kvm_pgtable_stage2_destroy(pgt);
+out_free_pgtable:
+       kfree(pgt);
+       return err;
 }
 
 static void stage2_unmap_memslot(struct kvm *kvm,
@@ -781,20 +783,21 @@ void stage2_unmap_vm(struct kvm *kvm)
 void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
 {
        struct kvm *kvm = mmu->kvm;
-       void *pgd = NULL;
+       struct kvm_pgtable *pgt = NULL;
 
        spin_lock(&kvm->mmu_lock);
-       if (mmu->pgd) {
-               unmap_stage2_range(mmu, 0, kvm_phys_size(kvm));
-               pgd = READ_ONCE(mmu->pgd);
+       pgt = mmu->pgt;
+       if (pgt) {
                mmu->pgd = NULL;
+               mmu->pgd_phys = 0;
+               mmu->pgt = NULL;
+               free_percpu(mmu->last_vcpu_ran);
        }
        spin_unlock(&kvm->mmu_lock);
 
-       /* Free the HW pgd, one page at a time */
-       if (pgd) {
-               free_pages_exact(pgd, stage2_pgd_size(kvm));
-               free_percpu(mmu->last_vcpu_ran);
+       if (pgt) {
+               kvm_pgtable_stage2_destroy(pgt);
+               kfree(pgt);
        }
 }