Merge branch kvm-arm64/mmu/mte into kvmarm-master/next
authorMarc Zyngier <maz@kernel.org>
Thu, 24 Jun 2021 14:12:43 +0000 (15:12 +0100)
committerMarc Zyngier <maz@kernel.org>
Fri, 25 Jun 2021 13:25:56 +0000 (14:25 +0100)
Last minute fix for MTE, making sure the pages are
flagged as MTE before they are released.

* kvm-arm64/mmu/mte:
  KVM: arm64: Set the MTE tag bit before releasing the page

Signed-off-by: Marc Zyngier <maz@kernel.org>
71 files changed:
MAINTAINERS
arch/arm64/include/asm/alternative-macros.h
arch/arm64/include/asm/arch_gicv3.h
arch/arm64/include/asm/assembler.h
arch/arm64/include/asm/cacheflush.h
arch/arm64/include/asm/efi.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/kernel/alternative.c
arch/arm64/kernel/efi-entry.S
arch/arm64/kernel/head.S
arch/arm64/kernel/hibernate-asm.S
arch/arm64/kernel/hibernate.c
arch/arm64/kernel/idreg-override.c
arch/arm64/kernel/image-vars.h
arch/arm64/kernel/insn.c
arch/arm64/kernel/kaslr.c
arch/arm64/kernel/machine_kexec.c
arch/arm64/kernel/probes/uprobes.c
arch/arm64/kernel/smp.c
arch/arm64/kernel/smp_spin_table.c
arch/arm64/kernel/sys_compat.c
arch/arm64/kvm/arch_timer.c
arch/arm64/kvm/arm.c
arch/arm64/kvm/hyp/hyp-entry.S
arch/arm64/kvm/hyp/include/nvhe/gfp.h
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
arch/arm64/kvm/hyp/include/nvhe/memory.h
arch/arm64/kvm/hyp/include/nvhe/mm.h
arch/arm64/kvm/hyp/nvhe/cache.S
arch/arm64/kvm/hyp/nvhe/mem_protect.c
arch/arm64/kvm/hyp/nvhe/page_alloc.c
arch/arm64/kvm/hyp/nvhe/setup.c
arch/arm64/kvm/hyp/nvhe/tlb.c
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/hyp/reserved_mem.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/pmu-emul.c
arch/arm64/kvm/vgic/vgic-init.c
arch/arm64/kvm/vgic/vgic-v2.c
arch/arm64/kvm/vgic/vgic-v3.c
arch/arm64/kvm/vgic/vgic.c
arch/arm64/lib/uaccess_flushcache.c
arch/arm64/mm/cache.S
arch/arm64/mm/flush.c
drivers/irqchip/irq-apple-aic.c
drivers/irqchip/irq-gic-common.c
drivers/irqchip/irq-gic-common.h
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-gic.c
include/kvm/arm_vgic.h
include/linux/irqchip/arm-gic-common.h
include/linux/irqchip/arm-vgic-info.h [new file with mode: 0644]
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/aarch64/debug-exceptions.c [new file with mode: 0644]
tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c [deleted file]
tools/testing/selftests/kvm/aarch64/get-reg-list.c
tools/testing/selftests/kvm/include/aarch64/processor.h
tools/testing/selftests/kvm/include/kvm_util.h
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/lib/aarch64/handlers.S [new file with mode: 0644]
tools/testing/selftests/kvm/lib/aarch64/processor.c
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/x86_64/evmcs_test.c
tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
tools/testing/selftests/kvm/x86_64/sync_regs_test.c
tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c

index 503fd21..b9d5999 100644 (file)
@@ -9993,6 +9993,8 @@ F:        arch/arm64/include/asm/kvm*
 F:     arch/arm64/include/uapi/asm/kvm*
 F:     arch/arm64/kvm/
 F:     include/kvm/arm_*
+F:     tools/testing/selftests/kvm/*/aarch64/
+F:     tools/testing/selftests/kvm/aarch64/
 
 KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
 M:     Huacai Chen <chenhuacai@kernel.org>
index 8a078fc..4777035 100644 (file)
@@ -197,11 +197,6 @@ alternative_endif
 #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...)  \
        alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
 
-.macro user_alt, label, oldinstr, newinstr, cond
-9999:  alternative_insn "\oldinstr", "\newinstr", \cond
-       _asm_extable 9999b, \label
-.endm
-
 #endif  /*  __ASSEMBLY__  */
 
 /*
index 934b9be..4ad22c3 100644 (file)
@@ -124,7 +124,8 @@ static inline u32 gic_read_rpr(void)
 #define gic_read_lpir(c)               readq_relaxed(c)
 #define gic_write_lpir(v, c)           writeq_relaxed(v, c)
 
-#define gic_flush_dcache_to_poc(a,l)   __flush_dcache_area((a), (l))
+#define gic_flush_dcache_to_poc(a,l)   \
+       dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
 
 #define gits_read_baser(c)             readq_relaxed(c)
 #define gits_write_baser(v, c)         writeq_relaxed(v, c)
index 8418c1b..c4cecf8 100644 (file)
@@ -130,15 +130,27 @@ alternative_endif
        .endm
 
 /*
- * Emit an entry into the exception table
+ * Create an exception table entry for `insn`, which will branch to `fixup`
+ * when an unhandled fault is taken.
  */
-       .macro          _asm_extable, from, to
+       .macro          _asm_extable, insn, fixup
        .pushsection    __ex_table, "a"
        .align          3
-       .long           (\from - .), (\to - .)
+       .long           (\insn - .), (\fixup - .)
        .popsection
        .endm
 
+/*
+ * Create an exception table entry for `insn` if `fixup` is provided. Otherwise
+ * do nothing.
+ */
+       .macro          _cond_extable, insn, fixup
+       .ifnc           \fixup,
+       _asm_extable    \insn, \fixup
+       .endif
+       .endm
+
+
 #define USER(l, x...)                          \
 9999:  x;                                      \
        _asm_extable    9999b, l
@@ -375,51 +387,53 @@ alternative_cb_end
        bfi     \tcr, \tmp0, \pos, #3
        .endm
 
+       .macro __dcache_op_workaround_clean_cache, op, addr
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+       dc      \op, \addr
+alternative_else
+       dc      civac, \addr
+alternative_endif
+       .endm
+
 /*
  * Macro to perform a data cache maintenance for the interval
- * [kaddr, kaddr + size)
+ * [start, end)
  *
  *     op:             operation passed to dc instruction
  *     domain:         domain used in dsb instruciton
- *     kaddr:          starting virtual address of the region
- *     size:           size of the region
- *     Corrupts:       kaddr, size, tmp1, tmp2
+ *     start:          starting virtual address of the region
+ *     end:            end virtual address of the region
+ *     fixup:          optional label to branch to on user fault
+ *     Corrupts:       start, end, tmp1, tmp2
  */
-       .macro __dcache_op_workaround_clean_cache, op, kaddr
-alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
-       dc      \op, \kaddr
-alternative_else
-       dc      civac, \kaddr
-alternative_endif
-       .endm
-
-       .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
+       .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
        dcache_line_size \tmp1, \tmp2
-       add     \size, \kaddr, \size
        sub     \tmp2, \tmp1, #1
-       bic     \kaddr, \kaddr, \tmp2
-9998:
+       bic     \start, \start, \tmp2
+.Ldcache_op\@:
        .ifc    \op, cvau
-       __dcache_op_workaround_clean_cache \op, \kaddr
+       __dcache_op_workaround_clean_cache \op, \start
        .else
        .ifc    \op, cvac
-       __dcache_op_workaround_clean_cache \op, \kaddr
+       __dcache_op_workaround_clean_cache \op, \start
        .else
        .ifc    \op, cvap
-       sys     3, c7, c12, 1, \kaddr   // dc cvap
+       sys     3, c7, c12, 1, \start   // dc cvap
        .else
        .ifc    \op, cvadp
-       sys     3, c7, c13, 1, \kaddr   // dc cvadp
+       sys     3, c7, c13, 1, \start   // dc cvadp
        .else
-       dc      \op, \kaddr
+       dc      \op, \start
        .endif
        .endif
        .endif
        .endif
-       add     \kaddr, \kaddr, \tmp1
-       cmp     \kaddr, \size
-       b.lo    9998b
+       add     \start, \start, \tmp1
+       cmp     \start, \end
+       b.lo    .Ldcache_op\@
        dsb     \domain
+
+       _cond_extable .Ldcache_op\@, \fixup
        .endm
 
 /*
@@ -427,20 +441,22 @@ alternative_endif
  * [start, end)
  *
  *     start, end:     virtual addresses describing the region
- *     label:          A label to branch to on user fault.
+ *     fixup:          optional label to branch to on user fault
  *     Corrupts:       tmp1, tmp2
  */
-       .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+       .macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup
        icache_line_size \tmp1, \tmp2
        sub     \tmp2, \tmp1, #1
        bic     \tmp2, \start, \tmp2
-9997:
-USER(\label, ic        ivau, \tmp2)                    // invalidate I line PoU
+.Licache_op\@:
+       ic      ivau, \tmp2                     // invalidate I line PoU
        add     \tmp2, \tmp2, \tmp1
        cmp     \tmp2, \end
-       b.lo    9997b
+       b.lo    .Licache_op\@
        dsb     ish
        isb
+
+       _cond_extable .Licache_op\@, \fixup
        .endm
 
 /*
index 52e5c16..543c997 100644 (file)
  *     the implementation assumes non-aliasing VIPT D-cache and (aliasing)
  *     VIPT I-cache.
  *
- *     flush_icache_range(start, end)
+ *     All functions below apply to the interval [start, end)
+ *             - start  - virtual start address (inclusive)
+ *             - end    - virtual end address (exclusive)
  *
- *             Ensure coherency between the I-cache and the D-cache in the
- *             region described by start, end.
- *             - start  - virtual start address
- *             - end    - virtual end address
+ *     caches_clean_inval_pou(start, end)
  *
- *     invalidate_icache_range(start, end)
+ *             Ensure coherency between the I-cache and the D-cache region to
+ *             the Point of Unification.
  *
- *             Invalidate the I-cache in the region described by start, end.
- *             - start  - virtual start address
- *             - end    - virtual end address
+ *     caches_clean_inval_user_pou(start, end)
  *
- *     __flush_cache_user_range(start, end)
+ *             Ensure coherency between the I-cache and the D-cache region to
+ *             the Point of Unification.
+ *             Use only if the region might access user memory.
  *
- *             Ensure coherency between the I-cache and the D-cache in the
- *             region described by start, end.
- *             - start  - virtual start address
- *             - end    - virtual end address
+ *     icache_inval_pou(start, end)
  *
- *     __flush_dcache_area(kaddr, size)
+ *             Invalidate I-cache region to the Point of Unification.
  *
- *             Ensure that the data held in page is written back.
- *             - kaddr  - page address
- *             - size   - region size
+ *     dcache_clean_inval_poc(start, end)
+ *
+ *             Clean and invalidate D-cache region to the Point of Coherency.
+ *
+ *     dcache_inval_poc(start, end)
+ *
+ *             Invalidate D-cache region to the Point of Coherency.
+ *
+ *     dcache_clean_poc(start, end)
+ *
+ *             Clean D-cache region to the Point of Coherency.
+ *
+ *     dcache_clean_pop(start, end)
+ *
+ *             Clean D-cache region to the Point of Persistence.
+ *
+ *     dcache_clean_pou(start, end)
+ *
+ *             Clean D-cache region to the Point of Unification.
  */
-extern void __flush_icache_range(unsigned long start, unsigned long end);
-extern int  invalidate_icache_range(unsigned long start, unsigned long end);
-extern void __flush_dcache_area(void *addr, size_t len);
-extern void __inval_dcache_area(void *addr, size_t len);
-extern void __clean_dcache_area_poc(void *addr, size_t len);
-extern void __clean_dcache_area_pop(void *addr, size_t len);
-extern void __clean_dcache_area_pou(void *addr, size_t len);
-extern long __flush_cache_user_range(unsigned long start, unsigned long end);
-extern void sync_icache_aliases(void *kaddr, unsigned long len);
+extern void caches_clean_inval_pou(unsigned long start, unsigned long end);
+extern void icache_inval_pou(unsigned long start, unsigned long end);
+extern void dcache_clean_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_pop(unsigned long start, unsigned long end);
+extern void dcache_clean_pou(unsigned long start, unsigned long end);
+extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end);
+extern void sync_icache_aliases(unsigned long start, unsigned long end);
 
 static inline void flush_icache_range(unsigned long start, unsigned long end)
 {
-       __flush_icache_range(start, end);
+       caches_clean_inval_pou(start, end);
 
        /*
         * IPI all online CPUs so that they undergo a context synchronization
@@ -122,7 +135,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
-static __always_inline void __flush_icache_all(void)
+static __always_inline void icache_inval_all_pou(void)
 {
        if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
                return;
index 3578aba..1bed37e 100644 (file)
@@ -137,7 +137,7 @@ void efi_virtmap_unload(void);
 
 static inline void efi_capsule_flush_cache_range(void *addr, int size)
 {
-       __flush_dcache_area(addr, size);
+       dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size);
 }
 
 #endif /* _ASM_EFI_H */
index c93a719..61d9749 100644 (file)
@@ -46,6 +46,7 @@
 #define KVM_REQ_VCPU_RESET     KVM_ARCH_REQ(2)
 #define KVM_REQ_RECORD_STEAL   KVM_ARCH_REQ(3)
 #define KVM_REQ_RELOAD_GICv4   KVM_ARCH_REQ(4)
+#define KVM_REQ_RELOAD_PMU     KVM_ARCH_REQ(5)
 
 #define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
                                     KVM_DIRTY_LOG_INITIALLY_SET)
index 25ed956..b52c5c4 100644 (file)
@@ -180,17 +180,16 @@ static inline void *__kvm_vector_slot2addr(void *base,
 
 struct kvm;
 
-#define kvm_flush_dcache_to_poc(a,l)   __flush_dcache_area((a), (l))
+#define kvm_flush_dcache_to_poc(a,l)   \
+       dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
 
 static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 {
        return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
 }
 
-static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
+static inline void __clean_dcache_guest_page(void *va, size_t size)
 {
-       void *va = page_address(pfn_to_page(pfn));
-
        /*
         * With FWB, we ensure that the guest always accesses memory using
         * cacheable attributes, and we don't have to clean to PoC when
@@ -203,18 +202,14 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
        kvm_flush_dcache_to_poc(va, size);
 }
 
-static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
-                                                 unsigned long size)
+static inline void __invalidate_icache_guest_page(void *va, size_t size)
 {
        if (icache_is_aliasing()) {
                /* any kind of VIPT cache */
-               __flush_icache_all();
+               icache_inval_all_pou();
        } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
                /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
-               void *va = page_address(pfn_to_page(pfn));
-
-               invalidate_icache_range((unsigned long)va,
-                                       (unsigned long)va + size);
+               icache_inval_pou((unsigned long)va, (unsigned long)va + size);
        }
 }
 
index c3674c4..f004c01 100644 (file)
@@ -27,23 +27,29 @@ typedef u64 kvm_pte_t;
 
 /**
  * struct kvm_pgtable_mm_ops - Memory management callbacks.
- * @zalloc_page:       Allocate a single zeroed memory page. The @arg parameter
- *                     can be used by the walker to pass a memcache. The
- *                     initial refcount of the page is 1.
- * @zalloc_pages_exact:        Allocate an exact number of zeroed memory pages. The
- *                     @size parameter is in bytes, and is rounded-up to the
- *                     next page boundary. The resulting allocation is
- *                     physically contiguous.
- * @free_pages_exact:  Free an exact number of memory pages previously
- *                     allocated by zalloc_pages_exact.
- * @get_page:          Increment the refcount on a page.
- * @put_page:          Decrement the refcount on a page. When the refcount
- *                     reaches 0 the page is automatically freed.
- * @page_count:                Return the refcount of a page.
- * @phys_to_virt:      Convert a physical address into a virtual address mapped
- *                     in the current context.
- * @virt_to_phys:      Convert a virtual address mapped in the current context
- *                     into a physical address.
+ * @zalloc_page:               Allocate a single zeroed memory page.
+ *                             The @arg parameter can be used by the walker
+ *                             to pass a memcache. The initial refcount of
+ *                             the page is 1.
+ * @zalloc_pages_exact:                Allocate an exact number of zeroed memory pages.
+ *                             The @size parameter is in bytes, and is rounded
+ *                             up to the next page boundary. The resulting
+ *                             allocation is physically contiguous.
+ * @free_pages_exact:          Free an exact number of memory pages previously
+ *                             allocated by zalloc_pages_exact.
+ * @get_page:                  Increment the refcount on a page.
+ * @put_page:                  Decrement the refcount on a page. When the
+ *                             refcount reaches 0 the page is automatically
+ *                             freed.
+ * @page_count:                        Return the refcount of a page.
+ * @phys_to_virt:              Convert a physical address into a virtual
+ *                             address mapped in the current context.
+ * @virt_to_phys:              Convert a virtual address mapped in the current
+ *                             context into a physical address.
+ * @dcache_clean_inval_poc:    Clean and invalidate the data cache to the PoC
+ *                             for the specified memory address range.
+ * @icache_inval_pou:          Invalidate the instruction cache to the PoU
+ *                             for the specified memory address range.
  */
 struct kvm_pgtable_mm_ops {
        void*           (*zalloc_page)(void *arg);
@@ -54,6 +60,8 @@ struct kvm_pgtable_mm_ops {
        int             (*page_count)(void *addr);
        void*           (*phys_to_virt)(phys_addr_t phys);
        phys_addr_t     (*virt_to_phys)(void *addr);
+       void            (*dcache_clean_inval_poc)(void *addr, size_t size);
+       void            (*icache_inval_pou)(void *addr, size_t size);
 };
 
 /**
index c906d20..3fb79b7 100644 (file)
@@ -181,7 +181,7 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu
         */
        if (!is_module) {
                dsb(ish);
-               __flush_icache_all();
+               icache_inval_all_pou();
                isb();
 
                /* Ignore ARM64_CB bit from feature mask */
index 0073b24..61a87fa 100644 (file)
@@ -28,7 +28,8 @@ SYM_CODE_START(efi_enter_kernel)
         * stale icache entries from before relocation.
         */
        ldr     w1, =kernel_size
-       bl      __clean_dcache_area_poc
+       add     x1, x0, x1
+       bl      dcache_clean_poc
        ic      ialluis
 
        /*
@@ -36,8 +37,8 @@ SYM_CODE_START(efi_enter_kernel)
         * so that we can safely disable the MMU and caches.
         */
        adr     x0, 0f
-       ldr     w1, 3f
-       bl      __clean_dcache_area_poc
+       adr     x1, 3f
+       bl      dcache_clean_poc
 0:
        /* Turn off Dcache and MMU */
        mrs     x0, CurrentEL
@@ -64,5 +65,5 @@ SYM_CODE_START(efi_enter_kernel)
        mov     x2, xzr
        mov     x3, xzr
        br      x19
+3:
 SYM_CODE_END(efi_enter_kernel)
-3:     .long   . - 0b
index 96873df..6928cb6 100644 (file)
@@ -117,8 +117,8 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
        dmb     sy                              // needed before dc ivac with
                                                // MMU off
 
-       mov     x1, #0x20                       // 4 x 8 bytes
-       b       __inval_dcache_area             // tail call
+       add     x1, x0, #0x20                   // 4 x 8 bytes
+       b       dcache_inval_poc                // tail call
 SYM_CODE_END(preserve_boot_args)
 
 /*
@@ -268,8 +268,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
         */
        adrp    x0, init_pg_dir
        adrp    x1, init_pg_end
-       sub     x1, x1, x0
-       bl      __inval_dcache_area
+       bl      dcache_inval_poc
 
        /*
         * Clear the init page tables.
@@ -382,13 +381,11 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 
        adrp    x0, idmap_pg_dir
        adrp    x1, idmap_pg_end
-       sub     x1, x1, x0
-       bl      __inval_dcache_area
+       bl      dcache_inval_poc
 
        adrp    x0, init_pg_dir
        adrp    x1, init_pg_end
-       sub     x1, x1, x0
-       bl      __inval_dcache_area
+       bl      dcache_inval_poc
 
        ret     x28
 SYM_FUNC_END(__create_page_tables)
index 8ccca66..81c0186 100644 (file)
@@ -45,7 +45,7 @@
  * Because this code has to be copied to a 'safe' page, it can't call out to
  * other functions by PC-relative address. Also remember that it may be
  * mid-way through over-writing other functions. For this reason it contains
- * code from flush_icache_range() and uses the copy_page() macro.
+ * code from caches_clean_inval_pou() and uses the copy_page() macro.
  *
  * This 'safe' page is mapped via ttbr0, and executed from there. This function
  * switches to a copy of the linear map in ttbr1, performs the restore, then
@@ -87,11 +87,12 @@ SYM_CODE_START(swsusp_arch_suspend_exit)
        copy_page       x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
 
        add     x1, x10, #PAGE_SIZE
-       /* Clean the copied page to PoU - based on flush_icache_range() */
+       /* Clean the copied page to PoU - based on caches_clean_inval_pou() */
        raw_dcache_line_size x2, x3
        sub     x3, x2, #1
        bic     x4, x10, x3
-2:     dc      cvau, x4        /* clean D line / unified line */
+2:     /* clean D line / unified line */
+alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
        add     x4, x4, x2
        cmp     x4, x1
        b.lo    2b
index b1cef37..46a0b4d 100644 (file)
@@ -210,7 +210,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
                return -ENOMEM;
 
        memcpy(page, src_start, length);
-       __flush_icache_range((unsigned long)page, (unsigned long)page + length);
+       caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length);
        rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page);
        if (rc)
                return rc;
@@ -240,8 +240,6 @@ static int create_safe_exec_page(void *src_start, size_t length,
        return 0;
 }
 
-#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start))
-
 #ifdef CONFIG_ARM64_MTE
 
 static DEFINE_XARRAY(mte_pages);
@@ -383,13 +381,18 @@ int swsusp_arch_suspend(void)
                ret = swsusp_save();
        } else {
                /* Clean kernel core startup/idle code to PoC*/
-               dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end);
-               dcache_clean_range(__idmap_text_start, __idmap_text_end);
+               dcache_clean_inval_poc((unsigned long)__mmuoff_data_start,
+                                   (unsigned long)__mmuoff_data_end);
+               dcache_clean_inval_poc((unsigned long)__idmap_text_start,
+                                   (unsigned long)__idmap_text_end);
 
                /* Clean kvm setup code to PoC? */
                if (el2_reset_needed()) {
-                       dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
-                       dcache_clean_range(__hyp_text_start, __hyp_text_end);
+                       dcache_clean_inval_poc(
+                               (unsigned long)__hyp_idmap_text_start,
+                               (unsigned long)__hyp_idmap_text_end);
+                       dcache_clean_inval_poc((unsigned long)__hyp_text_start,
+                                           (unsigned long)__hyp_text_end);
                }
 
                swsusp_mte_restore_tags();
@@ -474,7 +477,8 @@ int swsusp_arch_resume(void)
         * The hibernate exit text contains a set of el2 vectors, that will
         * be executed at el2 with the mmu off in order to reload hyp-stub.
         */
-       __flush_dcache_area(hibernate_exit, exit_size);
+       dcache_clean_inval_poc((unsigned long)hibernate_exit,
+                           (unsigned long)hibernate_exit + exit_size);
 
        /*
         * KASLR will cause the el2 vectors to be in a different location in
index e628c8c..53a381a 100644 (file)
@@ -237,7 +237,8 @@ asmlinkage void __init init_feature_override(void)
 
        for (i = 0; i < ARRAY_SIZE(regs); i++) {
                if (regs[i]->override)
-                       __flush_dcache_area(regs[i]->override,
+                       dcache_clean_inval_poc((unsigned long)regs[i]->override,
+                                           (unsigned long)regs[i]->override +
                                            sizeof(*regs[i]->override));
        }
 }
index bcf3c27..c96a9a0 100644 (file)
@@ -35,7 +35,7 @@ __efistub_strnlen             = __pi_strnlen;
 __efistub_strcmp               = __pi_strcmp;
 __efistub_strncmp              = __pi_strncmp;
 __efistub_strrchr              = __pi_strrchr;
-__efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc;
+__efistub_dcache_clean_poc = __pi_dcache_clean_poc;
 
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 __efistub___memcpy             = __pi_memcpy;
index 6c0de2f..51cb8dc 100644 (file)
@@ -198,7 +198,7 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
 
        ret = aarch64_insn_write(tp, insn);
        if (ret == 0)
-               __flush_icache_range((uintptr_t)tp,
+               caches_clean_inval_pou((uintptr_t)tp,
                                     (uintptr_t)tp + AARCH64_INSN_SIZE);
 
        return ret;
index 341342b..cfa2cfd 100644 (file)
@@ -72,7 +72,9 @@ u64 __init kaslr_early_init(void)
         * we end up running with module randomization disabled.
         */
        module_alloc_base = (u64)_etext - MODULES_VSIZE;
-       __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
+       dcache_clean_inval_poc((unsigned long)&module_alloc_base,
+                           (unsigned long)&module_alloc_base +
+                                   sizeof(module_alloc_base));
 
        /*
         * Try to map the FDT early. If this fails, we simply bail,
@@ -170,8 +172,12 @@ u64 __init kaslr_early_init(void)
        module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
        module_alloc_base &= PAGE_MASK;
 
-       __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
-       __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed));
+       dcache_clean_inval_poc((unsigned long)&module_alloc_base,
+                           (unsigned long)&module_alloc_base +
+                                   sizeof(module_alloc_base));
+       dcache_clean_inval_poc((unsigned long)&memstart_offset_seed,
+                           (unsigned long)&memstart_offset_seed +
+                                   sizeof(memstart_offset_seed));
 
        return offset;
 }
index 90a335c..03ceabe 100644 (file)
@@ -68,10 +68,16 @@ int machine_kexec_post_load(struct kimage *kimage)
        kimage->arch.kern_reloc = __pa(reloc_code);
        kexec_image_info(kimage);
 
-       /* Flush the reloc_code in preparation for its execution. */
-       __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size);
-       flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code +
-                          arm64_relocate_new_kernel_size);
+       /*
+        * For execution with the MMU off, reloc_code needs to be cleaned to the
+        * PoC and invalidated from the I-cache.
+        */
+       dcache_clean_inval_poc((unsigned long)reloc_code,
+                           (unsigned long)reloc_code +
+                                   arm64_relocate_new_kernel_size);
+       icache_inval_pou((uintptr_t)reloc_code,
+                               (uintptr_t)reloc_code +
+                                       arm64_relocate_new_kernel_size);
 
        return 0;
 }
@@ -102,16 +108,18 @@ static void kexec_list_flush(struct kimage *kimage)
 
        for (entry = &kimage->head; ; entry++) {
                unsigned int flag;
-               void *addr;
+               unsigned long addr;
 
                /* flush the list entries. */
-               __flush_dcache_area(entry, sizeof(kimage_entry_t));
+               dcache_clean_inval_poc((unsigned long)entry,
+                                   (unsigned long)entry +
+                                           sizeof(kimage_entry_t));
 
                flag = *entry & IND_FLAGS;
                if (flag == IND_DONE)
                        break;
 
-               addr = phys_to_virt(*entry & PAGE_MASK);
+               addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK);
 
                switch (flag) {
                case IND_INDIRECTION:
@@ -120,7 +128,7 @@ static void kexec_list_flush(struct kimage *kimage)
                        break;
                case IND_SOURCE:
                        /* flush the source pages. */
-                       __flush_dcache_area(addr, PAGE_SIZE);
+                       dcache_clean_inval_poc(addr, addr + PAGE_SIZE);
                        break;
                case IND_DESTINATION:
                        break;
@@ -147,8 +155,10 @@ static void kexec_segment_flush(const struct kimage *kimage)
                        kimage->segment[i].memsz,
                        kimage->segment[i].memsz /  PAGE_SIZE);
 
-               __flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
-                       kimage->segment[i].memsz);
+               dcache_clean_inval_poc(
+                       (unsigned long)phys_to_virt(kimage->segment[i].mem),
+                       (unsigned long)phys_to_virt(kimage->segment[i].mem) +
+                               kimage->segment[i].memsz);
        }
 }
 
index 2c24763..9be668f 100644 (file)
@@ -21,7 +21,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
        memcpy(dst, src, len);
 
        /* flush caches (dcache/icache) */
-       sync_icache_aliases(dst, len);
+       sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len);
 
        kunmap_atomic(xol_page_kaddr);
 }
index dcd7041..9b4c111 100644 (file)
@@ -122,7 +122,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
        secondary_data.task = idle;
        secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
        update_cpu_boot_status(CPU_MMU_OFF);
-       __flush_dcache_area(&secondary_data, sizeof(secondary_data));
+       dcache_clean_inval_poc((unsigned long)&secondary_data,
+                           (unsigned long)&secondary_data +
+                                   sizeof(secondary_data));
 
        /* Now bring the CPU into our world */
        ret = boot_secondary(cpu, idle);
@@ -143,7 +145,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
        pr_crit("CPU%u: failed to come online\n", cpu);
        secondary_data.task = NULL;
        secondary_data.stack = NULL;
-       __flush_dcache_area(&secondary_data, sizeof(secondary_data));
+       dcache_clean_inval_poc((unsigned long)&secondary_data,
+                           (unsigned long)&secondary_data +
+                                   sizeof(secondary_data));
        status = READ_ONCE(secondary_data.status);
        if (status == CPU_MMU_OFF)
                status = READ_ONCE(__early_cpu_boot_status);
index c45a835..7e1624e 100644 (file)
@@ -36,7 +36,7 @@ static void write_pen_release(u64 val)
        unsigned long size = sizeof(secondary_holding_pen_release);
 
        secondary_holding_pen_release = val;
-       __flush_dcache_area(start, size);
+       dcache_clean_inval_poc((unsigned long)start, (unsigned long)start + size);
 }
 
 
@@ -90,8 +90,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
         * the boot protocol.
         */
        writeq_relaxed(pa_holding_pen, release_addr);
-       __flush_dcache_area((__force void *)release_addr,
-                           sizeof(*release_addr));
+       dcache_clean_inval_poc((__force unsigned long)release_addr,
+                           (__force unsigned long)release_addr +
+                                   sizeof(*release_addr));
 
        /*
         * Send an event to wake up the secondary CPU.
index 265fe3e..db5159a 100644 (file)
@@ -41,7 +41,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
                        dsb(ish);
                }
 
-               ret = __flush_cache_user_range(start, start + chunk);
+               ret = caches_clean_inval_user_pou(start, start + chunk);
                if (ret)
                        return ret;
 
index 74e0699..3df67c1 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/kvm_host.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/uaccess.h>
 
 #include <clocksource/arm_arch_timer.h>
@@ -973,36 +974,154 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
        return 0;
 }
 
-int kvm_timer_hyp_init(bool has_gic)
+static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
 {
-       struct arch_timer_kvm_info *info;
-       int err;
+       if (vcpu)
+               irqd_set_forwarded_to_vcpu(d);
+       else
+               irqd_clr_forwarded_to_vcpu(d);
 
-       info = arch_timer_get_kvm_info();
-       timecounter = &info->timecounter;
+       return 0;
+}
 
-       if (!timecounter->cc) {
-               kvm_err("kvm_arch_timer: uninitialized timecounter\n");
-               return -ENODEV;
+static int timer_irq_set_irqchip_state(struct irq_data *d,
+                                      enum irqchip_irq_state which, bool val)
+{
+       if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
+               return irq_chip_set_parent_state(d, which, val);
+
+       if (val)
+               irq_chip_mask_parent(d);
+       else
+               irq_chip_unmask_parent(d);
+
+       return 0;
+}
+
+static void timer_irq_eoi(struct irq_data *d)
+{
+       if (!irqd_is_forwarded_to_vcpu(d))
+               irq_chip_eoi_parent(d);
+}
+
+static void timer_irq_ack(struct irq_data *d)
+{
+       d = d->parent_data;
+       if (d->chip->irq_ack)
+               d->chip->irq_ack(d);
+}
+
+static struct irq_chip timer_chip = {
+       .name                   = "KVM",
+       .irq_ack                = timer_irq_ack,
+       .irq_mask               = irq_chip_mask_parent,
+       .irq_unmask             = irq_chip_unmask_parent,
+       .irq_eoi                = timer_irq_eoi,
+       .irq_set_type           = irq_chip_set_type_parent,
+       .irq_set_vcpu_affinity  = timer_irq_set_vcpu_affinity,
+       .irq_set_irqchip_state  = timer_irq_set_irqchip_state,
+};
+
+static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+                                 unsigned int nr_irqs, void *arg)
+{
+       irq_hw_number_t hwirq = (uintptr_t)arg;
+
+       return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
+                                            &timer_chip, NULL);
+}
+
+static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+                                 unsigned int nr_irqs)
+{
+}
+
+static const struct irq_domain_ops timer_domain_ops = {
+       .alloc  = timer_irq_domain_alloc,
+       .free   = timer_irq_domain_free,
+};
+
+static struct irq_ops arch_timer_irq_ops = {
+       .get_input_level = kvm_arch_timer_get_input_level,
+};
+
+static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
+{
+       *flags = irq_get_trigger_type(virq);
+       if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
+               kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
+                       virq);
+               *flags = IRQF_TRIGGER_LOW;
        }
+}
 
-       /* First, do the virtual EL1 timer irq */
+static int kvm_irq_init(struct arch_timer_kvm_info *info)
+{
+       struct irq_domain *domain = NULL;
 
        if (info->virtual_irq <= 0) {
                kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
                        info->virtual_irq);
                return -ENODEV;
        }
+
        host_vtimer_irq = info->virtual_irq;
+       kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
+
+       if (kvm_vgic_global_state.no_hw_deactivation) {
+               struct fwnode_handle *fwnode;
+               struct irq_data *data;
+
+               fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
+               if (!fwnode)
+                       return -ENOMEM;
+
+               /* Assume both vtimer and ptimer in the same parent */
+               data = irq_get_irq_data(host_vtimer_irq);
+               domain = irq_domain_create_hierarchy(data->domain, 0,
+                                                    NR_KVM_TIMERS, fwnode,
+                                                    &timer_domain_ops, NULL);
+               if (!domain) {
+                       irq_domain_free_fwnode(fwnode);
+                       return -ENOMEM;
+               }
+
+               arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
+               WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
+                                           (void *)TIMER_VTIMER));
+       }
 
-       host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
-       if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
-           host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
-               kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n",
-                       host_vtimer_irq);
-               host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
+       if (info->physical_irq > 0) {
+               host_ptimer_irq = info->physical_irq;
+               kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
+
+               if (domain)
+                       WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
+                                                   (void *)TIMER_PTIMER));
        }
 
+       return 0;
+}
+
+int kvm_timer_hyp_init(bool has_gic)
+{
+       struct arch_timer_kvm_info *info;
+       int err;
+
+       info = arch_timer_get_kvm_info();
+       timecounter = &info->timecounter;
+
+       if (!timecounter->cc) {
+               kvm_err("kvm_arch_timer: uninitialized timecounter\n");
+               return -ENODEV;
+       }
+
+       err = kvm_irq_init(info);
+       if (err)
+               return err;
+
+       /* First, do the virtual EL1 timer irq */
+
        err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
                                 "kvm guest vtimer", kvm_get_running_vcpus());
        if (err) {
@@ -1027,15 +1146,6 @@ int kvm_timer_hyp_init(bool has_gic)
        /* Now let's do the physical EL1 timer irq */
 
        if (info->physical_irq > 0) {
-               host_ptimer_irq = info->physical_irq;
-               host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq);
-               if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH &&
-                   host_ptimer_irq_flags != IRQF_TRIGGER_LOW) {
-                       kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n",
-                               host_ptimer_irq);
-                       host_ptimer_irq_flags = IRQF_TRIGGER_LOW;
-               }
-
                err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
                                         "kvm guest ptimer", kvm_get_running_vcpus());
                if (err) {
@@ -1143,7 +1253,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
        ret = kvm_vgic_map_phys_irq(vcpu,
                                    map.direct_vtimer->host_timer_irq,
                                    map.direct_vtimer->irq.irq,
-                                   kvm_arch_timer_get_input_level);
+                                   &arch_timer_irq_ops);
        if (ret)
                return ret;
 
@@ -1151,7 +1261,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
                ret = kvm_vgic_map_phys_irq(vcpu,
                                            map.direct_ptimer->host_timer_irq,
                                            map.direct_ptimer->irq.irq,
-                                           kvm_arch_timer_get_input_level);
+                                           &arch_timer_irq_ops);
        }
 
        if (ret)
index 511f371..e0b8187 100644 (file)
@@ -698,6 +698,10 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
                        vgic_v4_load(vcpu);
                        preempt_enable();
                }
+
+               if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
+                       kvm_pmu_handle_pmcr(vcpu,
+                                           __vcpu_sys_reg(vcpu, PMCR_EL0));
        }
 }
 
@@ -1087,7 +1091,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
                if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
                        stage2_unmap_vm(vcpu->kvm);
                else
-                       __flush_icache_all();
+                       icache_inval_all_pou();
        }
 
        vcpu_reset_hcr(vcpu);
index 5f49df4..9aa9b73 100644 (file)
@@ -76,6 +76,7 @@ el1_trap:
        b       __guest_exit
 
 el1_irq:
+el1_fiq:
        get_vcpu_ptr    x1, x0
        mov     x0, #ARM_EXCEPTION_IRQ
        b       __guest_exit
@@ -131,7 +132,6 @@ SYM_CODE_END(\label)
        invalid_vector  el2t_error_invalid
        invalid_vector  el2h_irq_invalid
        invalid_vector  el2h_fiq_invalid
-       invalid_vector  el1_fiq_invalid
 
        .ltorg
 
@@ -179,12 +179,12 @@ SYM_CODE_START(__kvm_hyp_vector)
 
        valid_vect      el1_sync                // Synchronous 64-bit EL1
        valid_vect      el1_irq                 // IRQ 64-bit EL1
-       invalid_vect    el1_fiq_invalid         // FIQ 64-bit EL1
+       valid_vect      el1_fiq                 // FIQ 64-bit EL1
        valid_vect      el1_error               // Error 64-bit EL1
 
        valid_vect      el1_sync                // Synchronous 32-bit EL1
        valid_vect      el1_irq                 // IRQ 32-bit EL1
-       invalid_vect    el1_fiq_invalid         // FIQ 32-bit EL1
+       valid_vect      el1_fiq                 // FIQ 32-bit EL1
        valid_vect      el1_error               // Error 32-bit EL1
 SYM_CODE_END(__kvm_hyp_vector)
 
index 18a4494..fb0f523 100644 (file)
@@ -7,7 +7,7 @@
 #include <nvhe/memory.h>
 #include <nvhe/spinlock.h>
 
-#define HYP_NO_ORDER   UINT_MAX
+#define HYP_NO_ORDER   USHRT_MAX
 
 struct hyp_pool {
        /*
@@ -19,48 +19,13 @@ struct hyp_pool {
        struct list_head free_area[MAX_ORDER];
        phys_addr_t range_start;
        phys_addr_t range_end;
-       unsigned int max_order;
+       unsigned short max_order;
 };
 
-static inline void hyp_page_ref_inc(struct hyp_page *p)
-{
-       struct hyp_pool *pool = hyp_page_to_pool(p);
-
-       hyp_spin_lock(&pool->lock);
-       p->refcount++;
-       hyp_spin_unlock(&pool->lock);
-}
-
-static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
-{
-       struct hyp_pool *pool = hyp_page_to_pool(p);
-       int ret;
-
-       hyp_spin_lock(&pool->lock);
-       p->refcount--;
-       ret = (p->refcount == 0);
-       hyp_spin_unlock(&pool->lock);
-
-       return ret;
-}
-
-static inline void hyp_set_page_refcounted(struct hyp_page *p)
-{
-       struct hyp_pool *pool = hyp_page_to_pool(p);
-
-       hyp_spin_lock(&pool->lock);
-       if (p->refcount) {
-               hyp_spin_unlock(&pool->lock);
-               BUG();
-       }
-       p->refcount = 1;
-       hyp_spin_unlock(&pool->lock);
-}
-
 /* Allocation */
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order);
-void hyp_get_page(void *addr);
-void hyp_put_page(void *addr);
+void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
+void hyp_get_page(struct hyp_pool *pool, void *addr);
+void hyp_put_page(struct hyp_pool *pool, void *addr);
 
 /* Used pages cannot be freed */
 int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
index 42d81ec..9c227d8 100644 (file)
@@ -23,7 +23,7 @@ extern struct host_kvm host_kvm;
 int __pkvm_prot_finalize(void);
 int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
 
-int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool);
+int kvm_host_prepare_stage2(void *pgt_pool_base);
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 
 static __always_inline void __load_host_stage2(void)
index fd78bde..592b7ed 100644 (file)
@@ -7,12 +7,9 @@
 
 #include <linux/types.h>
 
-struct hyp_pool;
 struct hyp_page {
-       unsigned int refcount;
-       unsigned int order;
-       struct hyp_pool *pool;
-       struct list_head node;
+       unsigned short refcount;
+       unsigned short order;
 };
 
 extern u64 __hyp_vmemmap;
index 0095f62..8ec3a5a 100644 (file)
@@ -78,19 +78,20 @@ static inline unsigned long hyp_s1_pgtable_pages(void)
        return res;
 }
 
-static inline unsigned long host_s2_mem_pgtable_pages(void)
+static inline unsigned long host_s2_pgtable_pages(void)
 {
+       unsigned long res;
+
        /*
         * Include an extra 16 pages to safely upper-bound the worst case of
         * concatenated pgds.
         */
-       return __hyp_pgtable_total_pages() + 16;
-}
+       res = __hyp_pgtable_total_pages() + 16;
 
-static inline unsigned long host_s2_dev_pgtable_pages(void)
-{
        /* Allow 1 GiB for MMIO mappings */
-       return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+       res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+
+       return res;
 }
 
 #endif /* __KVM_HYP_MM_H */
index 36cef69..958734f 100644 (file)
@@ -7,7 +7,7 @@
 #include <asm/assembler.h>
 #include <asm/alternative.h>
 
-SYM_FUNC_START_PI(__flush_dcache_area)
+SYM_FUNC_START_PI(dcache_clean_inval_poc)
        dcache_by_line_op civac, sy, x0, x1, x2, x3
        ret
-SYM_FUNC_END_PI(__flush_dcache_area)
+SYM_FUNC_END_PI(dcache_clean_inval_poc)
index 4b60c00..d938ce9 100644 (file)
@@ -23,8 +23,7 @@
 extern unsigned long hyp_nr_cpus;
 struct host_kvm host_kvm;
 
-static struct hyp_pool host_s2_mem;
-static struct hyp_pool host_s2_dev;
+static struct hyp_pool host_s2_pool;
 
 /*
  * Copies of the host's CPU features registers holding sanitized values.
@@ -36,7 +35,7 @@ static const u8 pkvm_hyp_id = 1;
 
 static void *host_s2_zalloc_pages_exact(size_t size)
 {
-       return hyp_alloc_pages(&host_s2_mem, get_order(size));
+       return hyp_alloc_pages(&host_s2_pool, get_order(size));
 }
 
 static void *host_s2_zalloc_page(void *pool)
@@ -44,20 +43,24 @@ static void *host_s2_zalloc_page(void *pool)
        return hyp_alloc_pages(pool, 0);
 }
 
-static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
+static void host_s2_get_page(void *addr)
+{
+       hyp_get_page(&host_s2_pool, addr);
+}
+
+static void host_s2_put_page(void *addr)
+{
+       hyp_put_page(&host_s2_pool, addr);
+}
+
+static int prepare_s2_pool(void *pgt_pool_base)
 {
        unsigned long nr_pages, pfn;
        int ret;
 
-       pfn = hyp_virt_to_pfn(mem_pgt_pool);
-       nr_pages = host_s2_mem_pgtable_pages();
-       ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0);
-       if (ret)
-               return ret;
-
-       pfn = hyp_virt_to_pfn(dev_pgt_pool);
-       nr_pages = host_s2_dev_pgtable_pages();
-       ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0);
+       pfn = hyp_virt_to_pfn(pgt_pool_base);
+       nr_pages = host_s2_pgtable_pages();
+       ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
        if (ret)
                return ret;
 
@@ -67,8 +70,8 @@ static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
                .phys_to_virt = hyp_phys_to_virt,
                .virt_to_phys = hyp_virt_to_phys,
                .page_count = hyp_page_count,
-               .get_page = hyp_get_page,
-               .put_page = hyp_put_page,
+               .get_page = host_s2_get_page,
+               .put_page = host_s2_put_page,
        };
 
        return 0;
@@ -86,7 +89,7 @@ static void prepare_host_vtcr(void)
                                          id_aa64mmfr1_el1_sys_val, phys_shift);
 }
 
-int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
+int kvm_host_prepare_stage2(void *pgt_pool_base)
 {
        struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
        int ret;
@@ -94,7 +97,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
        prepare_host_vtcr();
        hyp_spin_lock_init(&host_kvm.lock);
 
-       ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool);
+       ret = prepare_s2_pool(pgt_pool_base);
        if (ret)
                return ret;
 
@@ -199,11 +202,10 @@ static bool range_is_memory(u64 start, u64 end)
 }
 
 static inline int __host_stage2_idmap(u64 start, u64 end,
-                                     enum kvm_pgtable_prot prot,
-                                     struct hyp_pool *pool)
+                                     enum kvm_pgtable_prot prot)
 {
        return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
-                                     prot, pool);
+                                     prot, &host_s2_pool);
 }
 
 static int host_stage2_idmap(u64 addr)
@@ -211,7 +213,6 @@ static int host_stage2_idmap(u64 addr)
        enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
        struct kvm_mem_range range;
        bool is_memory = find_mem_range(addr, &range);
-       struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev;
        int ret;
 
        if (is_memory)
@@ -222,22 +223,21 @@ static int host_stage2_idmap(u64 addr)
        if (ret)
                goto unlock;
 
-       ret = __host_stage2_idmap(range.start, range.end, prot, pool);
-       if (is_memory || ret != -ENOMEM)
+       ret = __host_stage2_idmap(range.start, range.end, prot);
+       if (ret != -ENOMEM)
                goto unlock;
 
        /*
-        * host_s2_mem has been provided with enough pages to cover all of
-        * memory with page granularity, so we should never hit the ENOMEM case.
-        * However, it is difficult to know how much of the MMIO range we will
-        * need to cover upfront, so we may need to 'recycle' the pages if we
-        * run out.
+        * The pool has been provided with enough pages to cover all of memory
+        * with page granularity, but it is difficult to know how much of the
+        * MMIO range we will need to cover upfront, so we may need to 'recycle'
+        * the pages if we run out.
         */
        ret = host_stage2_unmap_dev_all();
        if (ret)
                goto unlock;
 
-       ret = __host_stage2_idmap(range.start, range.end, prot, pool);
+       ret = __host_stage2_idmap(range.start, range.end, prot);
 
 unlock:
        hyp_spin_unlock(&host_kvm.lock);
@@ -258,7 +258,7 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
 
        hyp_spin_lock(&host_kvm.lock);
        ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
-                                          &host_s2_mem, pkvm_hyp_id);
+                                          &host_s2_pool, pkvm_hyp_id);
        hyp_spin_unlock(&host_kvm.lock);
 
        return ret != -EAGAIN ? ret : 0;
index 237e03b..41fc25b 100644 (file)
@@ -32,7 +32,7 @@ u64 __hyp_vmemmap;
  */
 static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
                                             struct hyp_page *p,
-                                            unsigned int order)
+                                            unsigned short order)
 {
        phys_addr_t addr = hyp_page_to_phys(p);
 
@@ -51,21 +51,49 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
 /* Find a buddy page currently available for allocation */
 static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
                                           struct hyp_page *p,
-                                          unsigned int order)
+                                          unsigned short order)
 {
        struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order);
 
-       if (!buddy || buddy->order != order || list_empty(&buddy->node))
+       if (!buddy || buddy->order != order || buddy->refcount)
                return NULL;
 
        return buddy;
 
 }
 
+/*
+ * Pages that are available for allocation are tracked in free-lists, so we use
+ * the pages themselves to store the list nodes to avoid wasting space. As the
+ * allocator always returns zeroed pages (which are zeroed on the hyp_put_page()
+ * path to optimize allocation speed), we also need to clean-up the list node in
+ * each page when we take it out of the list.
+ */
+static inline void page_remove_from_list(struct hyp_page *p)
+{
+       struct list_head *node = hyp_page_to_virt(p);
+
+       __list_del_entry(node);
+       memset(node, 0, sizeof(*node));
+}
+
+static inline void page_add_to_list(struct hyp_page *p, struct list_head *head)
+{
+       struct list_head *node = hyp_page_to_virt(p);
+
+       INIT_LIST_HEAD(node);
+       list_add_tail(node, head);
+}
+
+static inline struct hyp_page *node_to_page(struct list_head *node)
+{
+       return hyp_virt_to_page(node);
+}
+
 static void __hyp_attach_page(struct hyp_pool *pool,
                              struct hyp_page *p)
 {
-       unsigned int order = p->order;
+       unsigned short order = p->order;
        struct hyp_page *buddy;
 
        memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
@@ -83,32 +111,23 @@ static void __hyp_attach_page(struct hyp_pool *pool,
                        break;
 
                /* Take the buddy out of its list, and coallesce with @p */
-               list_del_init(&buddy->node);
+               page_remove_from_list(buddy);
                buddy->order = HYP_NO_ORDER;
                p = min(p, buddy);
        }
 
        /* Mark the new head, and insert it */
        p->order = order;
-       list_add_tail(&p->node, &pool->free_area[order]);
-}
-
-static void hyp_attach_page(struct hyp_page *p)
-{
-       struct hyp_pool *pool = hyp_page_to_pool(p);
-
-       hyp_spin_lock(&pool->lock);
-       __hyp_attach_page(pool, p);
-       hyp_spin_unlock(&pool->lock);
+       page_add_to_list(p, &pool->free_area[order]);
 }
 
 static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
                                           struct hyp_page *p,
-                                          unsigned int order)
+                                          unsigned short order)
 {
        struct hyp_page *buddy;
 
-       list_del_init(&p->node);
+       page_remove_from_list(p);
        while (p->order > order) {
                /*
                 * The buddy of order n - 1 currently has HYP_NO_ORDER as it
@@ -119,30 +138,64 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
                p->order--;
                buddy = __find_buddy_nocheck(pool, p, p->order);
                buddy->order = p->order;
-               list_add_tail(&buddy->node, &pool->free_area[buddy->order]);
+               page_add_to_list(buddy, &pool->free_area[buddy->order]);
        }
 
        return p;
 }
 
-void hyp_put_page(void *addr)
+static inline void hyp_page_ref_inc(struct hyp_page *p)
 {
-       struct hyp_page *p = hyp_virt_to_page(addr);
+       BUG_ON(p->refcount == USHRT_MAX);
+       p->refcount++;
+}
 
+static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
+{
+       p->refcount--;
+       return (p->refcount == 0);
+}
+
+static inline void hyp_set_page_refcounted(struct hyp_page *p)
+{
+       BUG_ON(p->refcount);
+       p->refcount = 1;
+}
+
+static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p)
+{
        if (hyp_page_ref_dec_and_test(p))
-               hyp_attach_page(p);
+               __hyp_attach_page(pool, p);
+}
+
+/*
+ * Changes to the buddy tree and page refcounts must be done with the hyp_pool
+ * lock held. If a refcount change requires an update to the buddy tree (e.g.
+ * hyp_put_page()), both operations must be done within the same critical
+ * section to guarantee transient states (e.g. a page with null refcount but
+ * not yet attached to a free list) can't be observed by well-behaved readers.
+ */
+void hyp_put_page(struct hyp_pool *pool, void *addr)
+{
+       struct hyp_page *p = hyp_virt_to_page(addr);
+
+       hyp_spin_lock(&pool->lock);
+       __hyp_put_page(pool, p);
+       hyp_spin_unlock(&pool->lock);
 }
 
-void hyp_get_page(void *addr)
+void hyp_get_page(struct hyp_pool *pool, void *addr)
 {
        struct hyp_page *p = hyp_virt_to_page(addr);
 
+       hyp_spin_lock(&pool->lock);
        hyp_page_ref_inc(p);
+       hyp_spin_unlock(&pool->lock);
 }
 
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
+void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
 {
-       unsigned int i = order;
+       unsigned short i = order;
        struct hyp_page *p;
 
        hyp_spin_lock(&pool->lock);
@@ -156,11 +209,11 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
        }
 
        /* Extract it from the tree at the right order */
-       p = list_first_entry(&pool->free_area[i], struct hyp_page, node);
+       p = node_to_page(pool->free_area[i].next);
        p = __hyp_extract_page(pool, p, order);
 
-       hyp_spin_unlock(&pool->lock);
        hyp_set_page_refcounted(p);
+       hyp_spin_unlock(&pool->lock);
 
        return hyp_page_to_virt(p);
 }
@@ -181,15 +234,14 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
 
        /* Init the vmemmap portion */
        p = hyp_phys_to_page(phys);
-       memset(p, 0, sizeof(*p) * nr_pages);
        for (i = 0; i < nr_pages; i++) {
-               p[i].pool = pool;
-               INIT_LIST_HEAD(&p[i].node);
+               p[i].order = 0;
+               hyp_set_page_refcounted(&p[i]);
        }
 
        /* Attach the unused pages to the buddy tree */
        for (i = reserved_pages; i < nr_pages; i++)
-               __hyp_attach_page(pool, &p[i]);
+               __hyp_put_page(pool, &p[i]);
 
        return 0;
 }
index a3d3a27..0b574d1 100644 (file)
@@ -24,8 +24,7 @@ unsigned long hyp_nr_cpus;
 
 static void *vmemmap_base;
 static void *hyp_pgt_base;
-static void *host_s2_mem_pgt_base;
-static void *host_s2_dev_pgt_base;
+static void *host_s2_pgt_base;
 static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
 
 static int divide_memory_pool(void *virt, unsigned long size)
@@ -45,14 +44,9 @@ static int divide_memory_pool(void *virt, unsigned long size)
        if (!hyp_pgt_base)
                return -ENOMEM;
 
-       nr_pages = host_s2_mem_pgtable_pages();
-       host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages);
-       if (!host_s2_mem_pgt_base)
-               return -ENOMEM;
-
-       nr_pages = host_s2_dev_pgtable_pages();
-       host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages);
-       if (!host_s2_dev_pgt_base)
+       nr_pages = host_s2_pgtable_pages();
+       host_s2_pgt_base = hyp_early_alloc_contig(nr_pages);
+       if (!host_s2_pgt_base)
                return -ENOMEM;
 
        return 0;
@@ -134,7 +128,8 @@ static void update_nvhe_init_params(void)
        for (i = 0; i < hyp_nr_cpus; i++) {
                params = per_cpu_ptr(&kvm_init_params, i);
                params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd);
-               __flush_dcache_area(params, sizeof(*params));
+               dcache_clean_inval_poc((unsigned long)params,
+                                   (unsigned long)params + sizeof(*params));
        }
 }
 
@@ -143,6 +138,16 @@ static void *hyp_zalloc_hyp_page(void *arg)
        return hyp_alloc_pages(&hpool, 0);
 }
 
+static void hpool_get_page(void *addr)
+{
+       hyp_get_page(&hpool, addr);
+}
+
+static void hpool_put_page(void *addr)
+{
+       hyp_put_page(&hpool, addr);
+}
+
 void __noreturn __pkvm_init_finalise(void)
 {
        struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@@ -158,7 +163,7 @@ void __noreturn __pkvm_init_finalise(void)
        if (ret)
                goto out;
 
-       ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base);
+       ret = kvm_host_prepare_stage2(host_s2_pgt_base);
        if (ret)
                goto out;
 
@@ -166,8 +171,8 @@ void __noreturn __pkvm_init_finalise(void)
                .zalloc_page = hyp_zalloc_hyp_page,
                .phys_to_virt = hyp_phys_to_virt,
                .virt_to_phys = hyp_virt_to_phys,
-               .get_page = hyp_get_page,
-               .put_page = hyp_put_page,
+               .get_page = hpool_get_page,
+               .put_page = hpool_put_page,
        };
        pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
 
index 83dc3b2..38ed0f6 100644 (file)
@@ -104,7 +104,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
         * you should be running with VHE enabled.
         */
        if (icache_is_vpipt())
-               __flush_icache_all();
+               icache_inval_all_pou();
 
        __tlb_switch_to_host(&cxt);
 }
index c37c1dc..05321f4 100644 (file)
@@ -577,12 +577,24 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
        mm_ops->put_page(ptep);
 }
 
+static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
+{
+       u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
+       return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
+}
+
+static bool stage2_pte_executable(kvm_pte_t pte)
+{
+       return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+}
+
 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
                                      kvm_pte_t *ptep,
                                      struct stage2_map_data *data)
 {
        kvm_pte_t new, old = *ptep;
        u64 granule = kvm_granule_size(level), phys = data->phys;
+       struct kvm_pgtable *pgt = data->mmu->pgt;
        struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
        if (!kvm_block_mapping_supported(addr, end, phys, level))
@@ -606,6 +618,14 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
                stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
        }
 
+       /* Perform CMOs before installation of the guest stage-2 PTE */
+       if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
+               mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
+                                               granule);
+
+       if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
+               mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
+
        smp_store_release(ptep, new);
        if (stage2_pte_is_counted(new))
                mm_ops->get_page(ptep);
@@ -798,12 +818,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
        return ret;
 }
 
-static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
-{
-       u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
-       return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
-}
-
 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
                               enum kvm_pgtable_walk_flags flag,
                               void * const arg)
@@ -839,8 +853,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
        stage2_put_pte(ptep, mmu, addr, level, mm_ops);
 
        if (need_flush) {
-               __flush_dcache_area(kvm_pte_follow(pte, mm_ops),
-                                   kvm_granule_size(level));
+               kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
+
+               dcache_clean_inval_poc((unsigned long)pte_follow,
+                                   (unsigned long)pte_follow +
+                                           kvm_granule_size(level));
        }
 
        if (childp)
@@ -861,10 +878,11 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
 }
 
 struct stage2_attr_data {
-       kvm_pte_t       attr_set;
-       kvm_pte_t       attr_clr;
-       kvm_pte_t       pte;
-       u32             level;
+       kvm_pte_t                       attr_set;
+       kvm_pte_t                       attr_clr;
+       kvm_pte_t                       pte;
+       u32                             level;
+       struct kvm_pgtable_mm_ops       *mm_ops;
 };
 
 static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
@@ -873,6 +891,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 {
        kvm_pte_t pte = *ptep;
        struct stage2_attr_data *data = arg;
+       struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
        if (!kvm_pte_valid(pte))
                return 0;
@@ -887,8 +906,17 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
         * but worst-case the access flag update gets lost and will be
         * set on the next access instead.
         */
-       if (data->pte != pte)
+       if (data->pte != pte) {
+               /*
+                * Invalidate instruction cache before updating the guest
+                * stage-2 PTE if we are going to add executable permission.
+                */
+               if (mm_ops->icache_inval_pou &&
+                   stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
+                       mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
+                                                 kvm_granule_size(level));
                WRITE_ONCE(*ptep, pte);
+       }
 
        return 0;
 }
@@ -903,6 +931,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
        struct stage2_attr_data data = {
                .attr_set       = attr_set & attr_mask,
                .attr_clr       = attr_clr & attr_mask,
+               .mm_ops         = pgt->mm_ops,
        };
        struct kvm_pgtable_walker walker = {
                .cb             = stage2_attr_walker,
@@ -988,11 +1017,15 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
        struct kvm_pgtable *pgt = arg;
        struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
        kvm_pte_t pte = *ptep;
+       kvm_pte_t *pte_follow;
 
        if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
                return 0;
 
-       __flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level));
+       pte_follow = kvm_pte_follow(pte, mm_ops);
+       dcache_clean_inval_poc((unsigned long)pte_follow,
+                           (unsigned long)pte_follow +
+                                   kvm_granule_size(level));
        return 0;
 }
 
index 83ca23a..d654921 100644 (file)
@@ -71,8 +71,7 @@ void __init kvm_hyp_reserve(void)
        }
 
        hyp_mem_pages += hyp_s1_pgtable_pages();
-       hyp_mem_pages += host_s2_mem_pgtable_pages();
-       hyp_mem_pages += host_s2_dev_pgtable_pages();
+       hyp_mem_pages += host_s2_pgtable_pages();
 
        /*
         * The hyp_vmemmap needs to be backed by pages, but these pages
index c6a97d4..57292dc 100644 (file)
@@ -126,6 +126,16 @@ static void *kvm_host_va(phys_addr_t phys)
        return __va(phys);
 }
 
+static void clean_dcache_guest_page(void *va, size_t size)
+{
+       __clean_dcache_guest_page(va, size);
+}
+
+static void invalidate_icache_guest_page(void *va, size_t size)
+{
+       __invalidate_icache_guest_page(va, size);
+}
+
 /*
  * Unmapping vs dcache management:
  *
@@ -432,6 +442,8 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
        .page_count             = kvm_host_page_count,
        .phys_to_virt           = kvm_host_va,
        .virt_to_phys           = kvm_host_pa,
+       .dcache_clean_inval_poc = clean_dcache_guest_page,
+       .icache_inval_pou       = invalidate_icache_guest_page,
 };
 
 /**
@@ -693,16 +705,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
        kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
 }
 
-static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
-{
-       __clean_dcache_guest_page(pfn, size);
-}
-
-static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
-{
-       __invalidate_icache_guest_page(pfn, size);
-}
-
 static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
 {
        send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
@@ -822,6 +824,35 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
        return PAGE_SIZE;
 }
 
+static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
+{
+       unsigned long pa;
+
+       if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
+               return huge_page_shift(hstate_vma(vma));
+
+       if (!(vma->vm_flags & VM_PFNMAP))
+               return PAGE_SHIFT;
+
+       VM_BUG_ON(is_vm_hugetlb_page(vma));
+
+       pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
+
+#ifndef __PAGETABLE_PMD_FOLDED
+       if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
+           ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
+           ALIGN(hva, PUD_SIZE) <= vma->vm_end)
+               return PUD_SHIFT;
+#endif
+
+       if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
+           ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
+           ALIGN(hva, PMD_SIZE) <= vma->vm_end)
+               return PMD_SHIFT;
+
+       return PAGE_SHIFT;
+}
+
 /*
  * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
  * able to see the page's tags and therefore they must be initialised first. If
@@ -893,7 +924,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                return -EFAULT;
        }
 
-       /* Let's check if we will get back a huge page backed by hugetlbfs */
+       /*
+        * Let's check if we will get back a huge page backed by hugetlbfs, or
+        * get block mapping for device MMIO region.
+        */
        mmap_read_lock(current->mm);
        vma = find_vma_intersection(current->mm, hva, hva + 1);
        if (unlikely(!vma)) {
@@ -902,15 +936,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                return -EFAULT;
        }
 
-       if (is_vm_hugetlb_page(vma))
-               vma_shift = huge_page_shift(hstate_vma(vma));
-       else
-               vma_shift = PAGE_SHIFT;
-
-       if (logging_active ||
-           (vma->vm_flags & VM_PFNMAP)) {
+       /*
+        * logging_active is guaranteed to never be true for VM_PFNMAP
+        * memslots.
+        */
+       if (logging_active) {
                force_pte = true;
                vma_shift = PAGE_SHIFT;
+       } else {
+               vma_shift = get_vma_page_shift(vma, hva);
        }
 
        shared = (vma->vm_flags & VM_PFNMAP);
@@ -985,8 +1019,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                return -EFAULT;
 
        if (kvm_is_device_pfn(pfn)) {
+               /*
+                * If the page was identified as device early by looking at
+                * the VMA flags, vma_pagesize is already representing the
+                * largest quantity we can map.  If instead it was mapped
+                * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
+                * and must not be upgraded.
+                *
+                * In both cases, we don't let transparent_hugepage_adjust()
+                * change things at the last minute.
+                */
                device = true;
-               force_pte = true;
        } else if (logging_active && !write_fault) {
                /*
                 * Only actually map the page as writable if this was a write
@@ -1007,29 +1050,25 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         * If we are not forced to use page mapping, check if we are
         * backed by a THP and thus use block mapping if possible.
         */
-       if (vma_pagesize == PAGE_SIZE && !force_pte)
+       if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
                vma_pagesize = transparent_hugepage_adjust(memslot, hva,
                                                           &pfn, &fault_ipa);
-       if (writable)
-               prot |= KVM_PGTABLE_PROT_W;
 
-       if (fault_status != FSC_PERM && !device) {
+       if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
                /* Check the VMM hasn't introduced a new VM_SHARED VMA */
-               if (kvm_has_mte(kvm) && shared) {
+               if (!shared)
+                       ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
+               else
                        ret = -EFAULT;
-                       goto out_unlock;
-               }
-               ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
                if (ret)
                        goto out_unlock;
-
-               clean_dcache_guest_page(pfn, vma_pagesize);
        }
 
-       if (exec_fault) {
+       if (writable)
+               prot |= KVM_PGTABLE_PROT_W;
+
+       if (exec_fault)
                prot |= KVM_PGTABLE_PROT_X;
-               invalidate_icache_guest_page(pfn, vma_pagesize);
-       }
 
        if (device)
                prot |= KVM_PGTABLE_PROT_DEVICE;
@@ -1232,12 +1271,10 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
                return false;
 
        /*
-        * We've moved a page around, probably through CoW, so let's treat it
-        * just like a translation fault and clean the cache to the PoC.
-        */
-       clean_dcache_guest_page(pfn, PAGE_SIZE);
-
-       /*
+        * We've moved a page around, probably through CoW, so let's treat
+        * it just like a translation fault and the map handler will clean
+        * the cache to the PoC.
+        *
         * The MMU notifiers will have unmapped a huge PMD before calling
         * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
         * therefore we never need to clear out a huge PMD through this
@@ -1403,7 +1440,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 {
        hva_t hva = mem->userspace_addr;
        hva_t reg_end = hva + mem->memory_size;
-       bool writable = !(mem->flags & KVM_MEM_READONLY);
        int ret = 0;
 
        if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1420,8 +1456,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
        mmap_read_lock(current->mm);
        /*
         * A memory region could potentially cover multiple VMAs, and any holes
-        * between them, so iterate over all of them to find out if we can map
-        * any of them right now.
+        * between them, so iterate over all of them.
         *
         *     +--------------------------------------------+
         * +---------------+----------------+   +----------------+
@@ -1432,7 +1467,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
         */
        do {
                struct vm_area_struct *vma;
-               hva_t vm_start, vm_end;
 
                vma = find_vma_intersection(current->mm, hva, reg_end);
                if (!vma)
@@ -1446,45 +1480,16 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
                        return -EINVAL;
 
-               /*
-                * Take the intersection of this VMA with the memory region
-                */
-               vm_start = max(hva, vma->vm_start);
-               vm_end = min(reg_end, vma->vm_end);
-
                if (vma->vm_flags & VM_PFNMAP) {
-                       gpa_t gpa = mem->guest_phys_addr +
-                                   (vm_start - mem->userspace_addr);
-                       phys_addr_t pa;
-
-                       pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
-                       pa += vm_start - vma->vm_start;
-
                        /* IO region dirty page logging not allowed */
                        if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
                                ret = -EINVAL;
-                               goto out;
-                       }
-
-                       ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
-                                                   vm_end - vm_start,
-                                                   writable);
-                       if (ret)
                                break;
+                       }
                }
-               hva = vm_end;
+               hva = min(reg_end, vma->vm_end);
        } while (hva < reg_end);
 
-       if (change == KVM_MR_FLAGS_ONLY)
-               goto out;
-
-       spin_lock(&kvm->mmu_lock);
-       if (ret)
-               unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
-       else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
-               stage2_flush_memslot(kvm, memslot);
-       spin_unlock(&kvm->mmu_lock);
-out:
        mmap_read_unlock(current->mm);
        return ret;
 }
index fd167d4..f33825c 100644 (file)
@@ -578,6 +578,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
                kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
 
        if (val & ARMV8_PMU_PMCR_P) {
+               mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
                for_each_set_bit(i, &mask, 32)
                        kvm_pmu_set_counter_value(vcpu, i, 0);
        }
@@ -850,6 +851,9 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
                   return -EINVAL;
        }
 
+       /* One-off reload of the PMU on first run */
+       kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
+
        return 0;
 }
 
index 58cbda0..340c51d 100644 (file)
@@ -482,6 +482,16 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
        return IRQ_HANDLED;
 }
 
+static struct gic_kvm_info *gic_kvm_info;
+
+void __init vgic_set_kvm_info(const struct gic_kvm_info *info)
+{
+       BUG_ON(gic_kvm_info != NULL);
+       gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL);
+       if (gic_kvm_info)
+               *gic_kvm_info = *info;
+}
+
 /**
  * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
  *
@@ -509,18 +519,29 @@ void kvm_vgic_init_cpu_hardware(void)
  */
 int kvm_vgic_hyp_init(void)
 {
-       const struct gic_kvm_info *gic_kvm_info;
+       bool has_mask;
        int ret;
 
-       gic_kvm_info = gic_get_kvm_info();
        if (!gic_kvm_info)
                return -ENODEV;
 
-       if (!gic_kvm_info->maint_irq) {
+       has_mask = !gic_kvm_info->no_maint_irq_mask;
+
+       if (has_mask && !gic_kvm_info->maint_irq) {
                kvm_err("No vgic maintenance irq\n");
                return -ENXIO;
        }
 
+       /*
+        * If we get one of these oddball non-GICs, taint the kernel,
+        * as we have no idea of how they *really* behave.
+        */
+       if (gic_kvm_info->no_hw_deactivation) {
+               kvm_info("Non-architectural vgic, tainting kernel\n");
+               add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+               kvm_vgic_global_state.no_hw_deactivation = true;
+       }
+
        switch (gic_kvm_info->type) {
        case GIC_V2:
                ret = vgic_v2_probe(gic_kvm_info);
@@ -536,10 +557,17 @@ int kvm_vgic_hyp_init(void)
                ret = -ENODEV;
        }
 
+       kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
+
+       kfree(gic_kvm_info);
+       gic_kvm_info = NULL;
+
        if (ret)
                return ret;
 
-       kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
+       if (!has_mask)
+               return 0;
+
        ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
                                 vgic_maintenance_handler,
                                 "vgic", kvm_get_running_vcpus());
index 11934c2..2c58020 100644 (file)
@@ -108,11 +108,22 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
                 * If this causes us to lower the level, we have to also clear
                 * the physical active state, since we will otherwise never be
                 * told when the interrupt becomes asserted again.
+                *
+                * Another case is when the interrupt requires a helping hand
+                * on deactivation (no HW deactivation, for example).
                 */
-               if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) {
-                       irq->line_level = vgic_get_phys_line_level(irq);
+               if (vgic_irq_is_mapped_level(irq)) {
+                       bool resample = false;
+
+                       if (val & GICH_LR_PENDING_BIT) {
+                               irq->line_level = vgic_get_phys_line_level(irq);
+                               resample = !irq->line_level;
+                       } else if (vgic_irq_needs_resampling(irq) &&
+                                  !(irq->active || irq->pending_latch)) {
+                               resample = true;
+                       }
 
-                       if (!irq->line_level)
+                       if (resample)
                                vgic_irq_set_phys_active(irq, false);
                }
 
@@ -152,7 +163,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
        if (irq->group)
                val |= GICH_LR_GROUP1;
 
-       if (irq->hw) {
+       if (irq->hw && !vgic_irq_needs_resampling(irq)) {
                val |= GICH_LR_HW;
                val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
                /*
index 41ecf21..66004f6 100644 (file)
@@ -101,11 +101,22 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
                 * If this causes us to lower the level, we have to also clear
                 * the physical active state, since we will otherwise never be
                 * told when the interrupt becomes asserted again.
+                *
+                * Another case is when the interrupt requires a helping hand
+                * on deactivation (no HW deactivation, for example).
                 */
-               if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) {
-                       irq->line_level = vgic_get_phys_line_level(irq);
+               if (vgic_irq_is_mapped_level(irq)) {
+                       bool resample = false;
+
+                       if (val & ICH_LR_PENDING_BIT) {
+                               irq->line_level = vgic_get_phys_line_level(irq);
+                               resample = !irq->line_level;
+                       } else if (vgic_irq_needs_resampling(irq) &&
+                                  !(irq->active || irq->pending_latch)) {
+                               resample = true;
+                       }
 
-                       if (!irq->line_level)
+                       if (resample)
                                vgic_irq_set_phys_active(irq, false);
                }
 
@@ -136,7 +147,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
                }
        }
 
-       if (irq->hw) {
+       if (irq->hw && !vgic_irq_needs_resampling(irq)) {
                val |= ICH_LR_HW;
                val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
                /*
index 15b6662..111bff4 100644 (file)
@@ -182,8 +182,8 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq)
 
        BUG_ON(!irq->hw);
 
-       if (irq->get_input_level)
-               return irq->get_input_level(irq->intid);
+       if (irq->ops && irq->ops->get_input_level)
+               return irq->ops->get_input_level(irq->intid);
 
        WARN_ON(irq_get_irqchip_state(irq->host_irq,
                                      IRQCHIP_STATE_PENDING,
@@ -480,7 +480,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 /* @irq->irq_lock must be held */
 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
                            unsigned int host_irq,
-                           bool (*get_input_level)(int vindid))
+                           struct irq_ops *ops)
 {
        struct irq_desc *desc;
        struct irq_data *data;
@@ -500,7 +500,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
        irq->hw = true;
        irq->host_irq = host_irq;
        irq->hwintid = data->hwirq;
-       irq->get_input_level = get_input_level;
+       irq->ops = ops;
        return 0;
 }
 
@@ -509,11 +509,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
 {
        irq->hw = false;
        irq->hwintid = 0;
-       irq->get_input_level = NULL;
+       irq->ops = NULL;
 }
 
 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
-                         u32 vintid, bool (*get_input_level)(int vindid))
+                         u32 vintid, struct irq_ops *ops)
 {
        struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
        unsigned long flags;
@@ -522,7 +522,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
        BUG_ON(!irq);
 
        raw_spin_lock_irqsave(&irq->irq_lock, flags);
-       ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
+       ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
        raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
        vgic_put_irq(vcpu->kvm, irq);
 
index c83bb5a..baee229 100644 (file)
@@ -15,7 +15,7 @@ void memcpy_flushcache(void *dst, const void *src, size_t cnt)
         * barrier to order the cache maintenance against the memcpy.
         */
        memcpy(dst, src, cnt);
-       __clean_dcache_area_pop(dst, cnt);
+       dcache_clean_pop((unsigned long)dst, (unsigned long)dst + cnt);
 }
 EXPORT_SYMBOL_GPL(memcpy_flushcache);
 
@@ -33,6 +33,6 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from,
        rc = raw_copy_from_user(to, from, n);
 
        /* See above */
-       __clean_dcache_area_pop(to, n - rc);
+       dcache_clean_pop((unsigned long)to, (unsigned long)to + n - rc);
        return rc;
 }
index 2d881f3..5051b3c 100644 (file)
@@ -15,7 +15,7 @@
 #include <asm/asm-uaccess.h>
 
 /*
- *     flush_icache_range(start,end)
+ *     caches_clean_inval_pou_macro(start,end) [fixup]
  *
  *     Ensure that the I and D caches are coherent within specified region.
  *     This is typically used when code has been written to a memory region,
  *
  *     - start   - virtual start address of region
  *     - end     - virtual end address of region
+ *     - fixup   - optional label to branch to on user fault
  */
-SYM_FUNC_START(__flush_icache_range)
-       /* FALLTHROUGH */
+.macro caches_clean_inval_pou_macro, fixup
+alternative_if ARM64_HAS_CACHE_IDC
+       dsb     ishst
+       b       .Ldc_skip_\@
+alternative_else_nop_endif
+       mov     x2, x0
+       mov     x3, x1
+       dcache_by_line_op cvau, ish, x2, x3, x4, x5, \fixup
+.Ldc_skip_\@:
+alternative_if ARM64_HAS_CACHE_DIC
+       isb
+       b       .Lic_skip_\@
+alternative_else_nop_endif
+       invalidate_icache_by_line x0, x1, x2, x3, \fixup
+.Lic_skip_\@:
+.endm
 
 /*
- *     __flush_cache_user_range(start,end)
+ *     caches_clean_inval_pou(start,end)
  *
  *     Ensure that the I and D caches are coherent within specified region.
  *     This is typically used when code has been written to a memory region,
@@ -37,117 +52,103 @@ SYM_FUNC_START(__flush_icache_range)
  *     - start   - virtual start address of region
  *     - end     - virtual end address of region
  */
-SYM_FUNC_START(__flush_cache_user_range)
+SYM_FUNC_START(caches_clean_inval_pou)
+       caches_clean_inval_pou_macro
+       ret
+SYM_FUNC_END(caches_clean_inval_pou)
+
+/*
+ *     caches_clean_inval_user_pou(start,end)
+ *
+ *     Ensure that the I and D caches are coherent within specified region.
+ *     This is typically used when code has been written to a memory region,
+ *     and will be executed.
+ *
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
+ */
+SYM_FUNC_START(caches_clean_inval_user_pou)
        uaccess_ttbr0_enable x2, x3, x4
-alternative_if ARM64_HAS_CACHE_IDC
-       dsb     ishst
-       b       7f
-alternative_else_nop_endif
-       dcache_line_size x2, x3
-       sub     x3, x2, #1
-       bic     x4, x0, x3
-1:
-user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
-       add     x4, x4, x2
-       cmp     x4, x1
-       b.lo    1b
-       dsb     ish
 
-7:
-alternative_if ARM64_HAS_CACHE_DIC
-       isb
-       b       8f
-alternative_else_nop_endif
-       invalidate_icache_by_line x0, x1, x2, x3, 9f
-8:     mov     x0, #0
+       caches_clean_inval_pou_macro 2f
+       mov     x0, xzr
 1:
        uaccess_ttbr0_disable x1, x2
        ret
-9:
+2:
        mov     x0, #-EFAULT
        b       1b
-SYM_FUNC_END(__flush_icache_range)
-SYM_FUNC_END(__flush_cache_user_range)
+SYM_FUNC_END(caches_clean_inval_user_pou)
 
 /*
- *     invalidate_icache_range(start,end)
+ *     icache_inval_pou(start,end)
  *
  *     Ensure that the I cache is invalid within specified region.
  *
  *     - start   - virtual start address of region
  *     - end     - virtual end address of region
  */
-SYM_FUNC_START(invalidate_icache_range)
+SYM_FUNC_START(icache_inval_pou)
 alternative_if ARM64_HAS_CACHE_DIC
-       mov     x0, xzr
        isb
        ret
 alternative_else_nop_endif
 
-       uaccess_ttbr0_enable x2, x3, x4
-
-       invalidate_icache_by_line x0, x1, x2, x3, 2f
-       mov     x0, xzr
-1:
-       uaccess_ttbr0_disable x1, x2
+       invalidate_icache_by_line x0, x1, x2, x3
        ret
-2:
-       mov     x0, #-EFAULT
-       b       1b
-SYM_FUNC_END(invalidate_icache_range)
+SYM_FUNC_END(icache_inval_pou)
 
 /*
- *     __flush_dcache_area(kaddr, size)
+ *     dcache_clean_inval_poc(start, end)
  *
- *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     Ensure that any D-cache lines for the interval [start, end)
  *     are cleaned and invalidated to the PoC.
  *
- *     - kaddr   - kernel address
- *     - size    - size in question
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
  */
-SYM_FUNC_START_PI(__flush_dcache_area)
+SYM_FUNC_START_PI(dcache_clean_inval_poc)
        dcache_by_line_op civac, sy, x0, x1, x2, x3
        ret
-SYM_FUNC_END_PI(__flush_dcache_area)
+SYM_FUNC_END_PI(dcache_clean_inval_poc)
 
 /*
- *     __clean_dcache_area_pou(kaddr, size)
+ *     dcache_clean_pou(start, end)
  *
- *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     Ensure that any D-cache lines for the interval [start, end)
  *     are cleaned to the PoU.
  *
- *     - kaddr   - kernel address
- *     - size    - size in question
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
  */
-SYM_FUNC_START(__clean_dcache_area_pou)
+SYM_FUNC_START(dcache_clean_pou)
 alternative_if ARM64_HAS_CACHE_IDC
        dsb     ishst
        ret
 alternative_else_nop_endif
        dcache_by_line_op cvau, ish, x0, x1, x2, x3
        ret
-SYM_FUNC_END(__clean_dcache_area_pou)
+SYM_FUNC_END(dcache_clean_pou)
 
 /*
- *     __inval_dcache_area(kaddr, size)
+ *     dcache_inval_poc(start, end)
  *
- *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     Ensure that any D-cache lines for the interval [start, end)
  *     are invalidated. Any partial lines at the ends of the interval are
  *     also cleaned to PoC to prevent data loss.
  *
- *     - kaddr   - kernel address
- *     - size    - size in question
+ *     - start   - kernel start address of region
+ *     - end     - kernel end address of region
  */
 SYM_FUNC_START_LOCAL(__dma_inv_area)
-SYM_FUNC_START_PI(__inval_dcache_area)
+SYM_FUNC_START_PI(dcache_inval_poc)
        /* FALLTHROUGH */
 
 /*
- *     __dma_inv_area(start, size)
+ *     __dma_inv_area(start, end)
  *     - start   - virtual start address of region
- *     - size    - size in question
+ *     - end     - virtual end address of region
  */
-       add     x1, x1, x0
        dcache_line_size x2, x3
        sub     x3, x2, #1
        tst     x1, x3                          // end cache line aligned?
@@ -165,48 +166,48 @@ SYM_FUNC_START_PI(__inval_dcache_area)
        b.lo    2b
        dsb     sy
        ret
-SYM_FUNC_END_PI(__inval_dcache_area)
+SYM_FUNC_END_PI(dcache_inval_poc)
 SYM_FUNC_END(__dma_inv_area)
 
 /*
- *     __clean_dcache_area_poc(kaddr, size)
+ *     dcache_clean_poc(start, end)
  *
- *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     Ensure that any D-cache lines for the interval [start, end)
  *     are cleaned to the PoC.
  *
- *     - kaddr   - kernel address
- *     - size    - size in question
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
  */
 SYM_FUNC_START_LOCAL(__dma_clean_area)
-SYM_FUNC_START_PI(__clean_dcache_area_poc)
+SYM_FUNC_START_PI(dcache_clean_poc)
        /* FALLTHROUGH */
 
 /*
- *     __dma_clean_area(start, size)
+ *     __dma_clean_area(start, end)
  *     - start   - virtual start address of region
- *     - size    - size in question
+ *     - end     - virtual end address of region
  */
        dcache_by_line_op cvac, sy, x0, x1, x2, x3
        ret
-SYM_FUNC_END_PI(__clean_dcache_area_poc)
+SYM_FUNC_END_PI(dcache_clean_poc)
 SYM_FUNC_END(__dma_clean_area)
 
 /*
- *     __clean_dcache_area_pop(kaddr, size)
+ *     dcache_clean_pop(start, end)
  *
- *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     Ensure that any D-cache lines for the interval [start, end)
  *     are cleaned to the PoP.
  *
- *     - kaddr   - kernel address
- *     - size    - size in question
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
  */
-SYM_FUNC_START_PI(__clean_dcache_area_pop)
+SYM_FUNC_START_PI(dcache_clean_pop)
        alternative_if_not ARM64_HAS_DCPOP
-       b       __clean_dcache_area_poc
+       b       dcache_clean_poc
        alternative_else_nop_endif
        dcache_by_line_op cvap, sy, x0, x1, x2, x3
        ret
-SYM_FUNC_END_PI(__clean_dcache_area_pop)
+SYM_FUNC_END_PI(dcache_clean_pop)
 
 /*
  *     __dma_flush_area(start, size)
@@ -217,6 +218,7 @@ SYM_FUNC_END_PI(__clean_dcache_area_pop)
  *     - size    - size in question
  */
 SYM_FUNC_START_PI(__dma_flush_area)
+       add     x1, x0, x1
        dcache_by_line_op civac, sy, x0, x1, x2, x3
        ret
 SYM_FUNC_END_PI(__dma_flush_area)
@@ -228,6 +230,7 @@ SYM_FUNC_END_PI(__dma_flush_area)
  *     - dir   - DMA direction
  */
 SYM_FUNC_START_PI(__dma_map_area)
+       add     x1, x0, x1
        cmp     w2, #DMA_FROM_DEVICE
        b.eq    __dma_inv_area
        b       __dma_clean_area
@@ -240,6 +243,7 @@ SYM_FUNC_END_PI(__dma_map_area)
  *     - dir   - DMA direction
  */
 SYM_FUNC_START_PI(__dma_unmap_area)
+       add     x1, x0, x1
        cmp     w2, #DMA_TO_DEVICE
        b.ne    __dma_inv_area
        ret
index 6d44c02..2aaf950 100644 (file)
 #include <asm/cache.h>
 #include <asm/tlbflush.h>
 
-void sync_icache_aliases(void *kaddr, unsigned long len)
+void sync_icache_aliases(unsigned long start, unsigned long end)
 {
-       unsigned long addr = (unsigned long)kaddr;
-
        if (icache_is_aliasing()) {
-               __clean_dcache_area_pou(kaddr, len);
-               __flush_icache_all();
+               dcache_clean_pou(start, end);
+               icache_inval_all_pou();
        } else {
                /*
                 * Don't issue kick_all_cpus_sync() after I-cache invalidation
                 * for user mappings.
                 */
-               __flush_icache_range(addr, addr + len);
+               caches_clean_inval_pou(start, end);
        }
 }
 
-static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
-                               unsigned long uaddr, void *kaddr,
-                               unsigned long len)
+static void flush_ptrace_access(struct vm_area_struct *vma, unsigned long start,
+                               unsigned long end)
 {
        if (vma->vm_flags & VM_EXEC)
-               sync_icache_aliases(kaddr, len);
+               sync_icache_aliases(start, end);
 }
 
 /*
@@ -48,7 +45,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
                       unsigned long len)
 {
        memcpy(dst, src, len);
-       flush_ptrace_access(vma, page, uaddr, dst, len);
+       flush_ptrace_access(vma, (unsigned long)dst, (unsigned long)dst + len);
 }
 
 void __sync_icache_dcache(pte_t pte)
@@ -56,7 +53,9 @@ void __sync_icache_dcache(pte_t pte)
        struct page *page = pte_page(pte);
 
        if (!test_bit(PG_dcache_clean, &page->flags)) {
-               sync_icache_aliases(page_address(page), page_size(page));
+               sync_icache_aliases((unsigned long)page_address(page),
+                                   (unsigned long)page_address(page) +
+                                           page_size(page));
                set_bit(PG_dcache_clean, &page->flags);
        }
 }
@@ -77,20 +76,20 @@ EXPORT_SYMBOL(flush_dcache_page);
 /*
  * Additional functions defined in assembly.
  */
-EXPORT_SYMBOL(__flush_icache_range);
+EXPORT_SYMBOL(caches_clean_inval_pou);
 
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 void arch_wb_cache_pmem(void *addr, size_t size)
 {
        /* Ensure order against any prior non-cacheable writes */
        dmb(osh);
-       __clean_dcache_area_pop(addr, size);
+       dcache_clean_pop((unsigned long)addr, (unsigned long)addr + size);
 }
 EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 
 void arch_invalidate_pmem(void *addr, size_t size)
 {
-       __inval_dcache_area(addr, size);
+       dcache_inval_poc((unsigned long)addr, (unsigned long)addr + size);
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
 #endif
index c179e27..b8c06bd 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/cpuhotplug.h>
 #include <linux/io.h>
 #include <linux/irqchip.h>
+#include <linux/irqchip/arm-vgic-info.h>
 #include <linux/irqdomain.h>
 #include <linux/limits.h>
 #include <linux/of_address.h>
@@ -787,6 +788,12 @@ static int aic_init_cpu(unsigned int cpu)
        return 0;
 }
 
+static struct gic_kvm_info vgic_info __initdata = {
+       .type                   = GIC_V3,
+       .no_maint_irq_mask      = true,
+       .no_hw_deactivation     = true,
+};
+
 static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)
 {
        int i;
@@ -843,6 +850,8 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
                          "irqchip/apple-aic/ipi:starting",
                          aic_init_cpu, NULL);
 
+       vgic_set_kvm_info(&vgic_info);
+
        pr_info("Initialized with %d IRQs, %d FIQs, %d vIPIs\n",
                irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI);
 
index f47b41d..a610821 100644 (file)
 
 static DEFINE_RAW_SPINLOCK(irq_controller_lock);
 
-static const struct gic_kvm_info *gic_kvm_info;
-
-const struct gic_kvm_info *gic_get_kvm_info(void)
-{
-       return gic_kvm_info;
-}
-
-void gic_set_kvm_info(const struct gic_kvm_info *info)
-{
-       BUG_ON(gic_kvm_info != NULL);
-       gic_kvm_info = info;
-}
-
 void gic_enable_of_quirks(const struct device_node *np,
                          const struct gic_quirk *quirks, void *data)
 {
index ccba8b0..27e3d4e 100644 (file)
@@ -28,6 +28,4 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
 void gic_enable_of_quirks(const struct device_node *np,
                          const struct gic_quirk *quirks, void *data);
 
-void gic_set_kvm_info(const struct gic_kvm_info *info);
-
 #endif /* _IRQ_GIC_COMMON_H */
index 37a23aa..453fc42 100644 (file)
@@ -103,7 +103,7 @@ EXPORT_SYMBOL(gic_nonsecure_priorities);
 /* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */
 static refcount_t *ppi_nmi_refs;
 
-static struct gic_kvm_info gic_v3_kvm_info;
+static struct gic_kvm_info gic_v3_kvm_info __initdata;
 static DEFINE_PER_CPU(bool, has_rss);
 
 #define MPIDR_RS(mpidr)                        (((mpidr) & 0xF0UL) >> 4)
@@ -1852,7 +1852,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node)
 
        gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis;
        gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid;
-       gic_set_kvm_info(&gic_v3_kvm_info);
+       vgic_set_kvm_info(&gic_v3_kvm_info);
 }
 
 static int __init gic_of_init(struct device_node *node, struct device_node *parent)
@@ -2168,7 +2168,7 @@ static void __init gic_acpi_setup_kvm_info(void)
 
        gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis;
        gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid;
-       gic_set_kvm_info(&gic_v3_kvm_info);
+       vgic_set_kvm_info(&gic_v3_kvm_info);
 }
 
 static int __init
index b1d9c22..2de9ec8 100644 (file)
@@ -119,7 +119,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
 
 static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly;
 
-static struct gic_kvm_info gic_v2_kvm_info;
+static struct gic_kvm_info gic_v2_kvm_info __initdata;
 
 static DEFINE_PER_CPU(u32, sgi_intid);
 
@@ -1451,7 +1451,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node)
                return;
 
        if (static_branch_likely(&supports_deactivate_key))
-               gic_set_kvm_info(&gic_v2_kvm_info);
+               vgic_set_kvm_info(&gic_v2_kvm_info);
 }
 
 int __init
@@ -1618,7 +1618,7 @@ static void __init gic_acpi_setup_kvm_info(void)
 
        gic_v2_kvm_info.maint_irq = irq;
 
-       gic_set_kvm_info(&gic_v2_kvm_info);
+       vgic_set_kvm_info(&gic_v2_kvm_info);
 }
 
 static int __init gic_v2_acpi_init(union acpi_subtable_headers *header,
index ec62118..e602d84 100644 (file)
@@ -72,6 +72,9 @@ struct vgic_global {
        bool                    has_gicv4;
        bool                    has_gicv4_1;
 
+       /* Pseudo GICv3 from outer space */
+       bool                    no_hw_deactivation;
+
        /* GIC system register CPU interface */
        struct static_key_false gicv3_cpuif;
 
@@ -89,6 +92,26 @@ enum vgic_irq_config {
        VGIC_CONFIG_LEVEL
 };
 
+/*
+ * Per-irq ops overriding some common behavious.
+ *
+ * Always called in non-preemptible section and the functions can use
+ * kvm_arm_get_running_vcpu() to get the vcpu pointer for private IRQs.
+ */
+struct irq_ops {
+       /* Per interrupt flags for special-cased interrupts */
+       unsigned long flags;
+
+#define VGIC_IRQ_SW_RESAMPLE   BIT(0)  /* Clear the active state for resampling */
+
+       /*
+        * Callback function pointer to in-kernel devices that can tell us the
+        * state of the input level of mapped level-triggered IRQ faster than
+        * peaking into the physical GIC.
+        */
+       bool (*get_input_level)(int vintid);
+};
+
 struct vgic_irq {
        raw_spinlock_t irq_lock;        /* Protects the content of the struct */
        struct list_head lpi_list;      /* Used to link all LPIs together */
@@ -126,21 +149,17 @@ struct vgic_irq {
        u8 group;                       /* 0 == group 0, 1 == group 1 */
        enum vgic_irq_config config;    /* Level or edge */
 
-       /*
-        * Callback function pointer to in-kernel devices that can tell us the
-        * state of the input level of mapped level-triggered IRQ faster than
-        * peaking into the physical GIC.
-        *
-        * Always called in non-preemptible section and the functions can use
-        * kvm_arm_get_running_vcpu() to get the vcpu pointer for private
-        * IRQs.
-        */
-       bool (*get_input_level)(int vintid);
+       struct irq_ops *ops;
 
        void *owner;                    /* Opaque pointer to reserve an interrupt
                                           for in-kernel devices. */
 };
 
+static inline bool vgic_irq_needs_resampling(struct vgic_irq *irq)
+{
+       return irq->ops && (irq->ops->flags & VGIC_IRQ_SW_RESAMPLE);
+}
+
 struct vgic_register_region;
 struct vgic_its;
 
@@ -352,7 +371,7 @@ void kvm_vgic_init_cpu_hardware(void);
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
                        bool level, void *owner);
 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
-                         u32 vintid, bool (*get_input_level)(int vindid));
+                         u32 vintid, struct irq_ops *ops);
 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
 
index fa8c045..1177f3a 100644 (file)
@@ -7,8 +7,7 @@
 #ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H
 #define __LINUX_IRQCHIP_ARM_GIC_COMMON_H
 
-#include <linux/types.h>
-#include <linux/ioport.h>
+#include <linux/irqchip/arm-vgic-info.h>
 
 #define GICD_INT_DEF_PRI               0xa0
 #define GICD_INT_DEF_PRI_X4            ((GICD_INT_DEF_PRI << 24) |\
                                        (GICD_INT_DEF_PRI << 8) |\
                                        GICD_INT_DEF_PRI)
 
-enum gic_type {
-       GIC_V2,
-       GIC_V3,
-};
-
-struct gic_kvm_info {
-       /* GIC type */
-       enum gic_type   type;
-       /* Virtual CPU interface */
-       struct resource vcpu;
-       /* Interrupt number */
-       unsigned int    maint_irq;
-       /* Virtual control interface */
-       struct resource vctrl;
-       /* vlpi support */
-       bool            has_v4;
-       /* rvpeid support */
-       bool            has_v4_1;
-};
-
-const struct gic_kvm_info *gic_get_kvm_info(void);
-
 struct irq_domain;
 struct fwnode_handle;
 int gicv2m_init(struct fwnode_handle *parent_handle,
diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h
new file mode 100644 (file)
index 0000000..a75b2c7
--- /dev/null
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * include/linux/irqchip/arm-vgic-info.h
+ *
+ * Copyright (C) 2016 ARM Limited, All Rights Reserved.
+ */
+#ifndef __LINUX_IRQCHIP_ARM_VGIC_INFO_H
+#define __LINUX_IRQCHIP_ARM_VGIC_INFO_H
+
+#include <linux/types.h>
+#include <linux/ioport.h>
+
+enum gic_type {
+       /* Full GICv2 */
+       GIC_V2,
+       /* Full GICv3, optionally with v2 compat */
+       GIC_V3,
+};
+
+struct gic_kvm_info {
+       /* GIC type */
+       enum gic_type   type;
+       /* Virtual CPU interface */
+       struct resource vcpu;
+       /* Interrupt number */
+       unsigned int    maint_irq;
+       /* No interrupt mask, no need to use the above field */
+       bool            no_maint_irq_mask;
+       /* Virtual control interface */
+       struct resource vctrl;
+       /* vlpi support */
+       bool            has_v4;
+       /* rvpeid support */
+       bool            has_v4_1;
+       /* Deactivation impared, subpar stuff */
+       bool            no_hw_deactivation;
+};
+
+#ifdef CONFIG_KVM
+void vgic_set_kvm_info(const struct gic_kvm_info *info);
+#else
+static inline void vgic_set_kvm_info(const struct gic_kvm_info *info) {}
+#endif
+
+#endif
index 524c857..d4838b6 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
+/aarch64/debug-exceptions
 /aarch64/get-reg-list
-/aarch64/get-reg-list-sve
 /aarch64/vgic_init
 /s390x/memop
 /s390x/resets
index daaee18..a61b016 100644 (file)
@@ -35,7 +35,7 @@ endif
 
 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
+LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S
 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
 
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
@@ -78,8 +78,8 @@ TEST_GEN_PROGS_x86_64 += memslot_perf_test
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 
+TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
-TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
new file mode 100644 (file)
index 0000000..e5e6c92
--- /dev/null
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+#define MDSCR_KDE      (1 << 13)
+#define MDSCR_MDE      (1 << 15)
+#define MDSCR_SS       (1 << 0)
+
+#define DBGBCR_LEN8    (0xff << 5)
+#define DBGBCR_EXEC    (0x0 << 3)
+#define DBGBCR_EL1     (0x1 << 1)
+#define DBGBCR_E       (0x1 << 0)
+
+#define DBGWCR_LEN8    (0xff << 5)
+#define DBGWCR_RD      (0x1 << 3)
+#define DBGWCR_WR      (0x2 << 3)
+#define DBGWCR_EL1     (0x1 << 1)
+#define DBGWCR_E       (0x1 << 0)
+
+#define SPSR_D         (1 << 9)
+#define SPSR_SS                (1 << 21)
+
+extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start;
+static volatile uint64_t sw_bp_addr, hw_bp_addr;
+static volatile uint64_t wp_addr, wp_data_addr;
+static volatile uint64_t svc_addr;
+static volatile uint64_t ss_addr[4], ss_idx;
+#define  PC(v)  ((uint64_t)&(v))
+
+static void reset_debug_state(void)
+{
+       asm volatile("msr daifset, #8");
+
+       write_sysreg(osdlr_el1, 0);
+       write_sysreg(oslar_el1, 0);
+       isb();
+
+       write_sysreg(mdscr_el1, 0);
+       /* This test only uses the first bp and wp slot. */
+       write_sysreg(dbgbvr0_el1, 0);
+       write_sysreg(dbgbcr0_el1, 0);
+       write_sysreg(dbgwcr0_el1, 0);
+       write_sysreg(dbgwvr0_el1, 0);
+       isb();
+}
+
+static void install_wp(uint64_t addr)
+{
+       uint32_t wcr;
+       uint32_t mdscr;
+
+       wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
+       write_sysreg(dbgwcr0_el1, wcr);
+       write_sysreg(dbgwvr0_el1, addr);
+       isb();
+
+       asm volatile("msr daifclr, #8");
+
+       mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+       write_sysreg(mdscr_el1, mdscr);
+       isb();
+}
+
+static void install_hw_bp(uint64_t addr)
+{
+       uint32_t bcr;
+       uint32_t mdscr;
+
+       bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
+       write_sysreg(dbgbcr0_el1, bcr);
+       write_sysreg(dbgbvr0_el1, addr);
+       isb();
+
+       asm volatile("msr daifclr, #8");
+
+       mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+       write_sysreg(mdscr_el1, mdscr);
+       isb();
+}
+
+static void install_ss(void)
+{
+       uint32_t mdscr;
+
+       asm volatile("msr daifclr, #8");
+
+       mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
+       write_sysreg(mdscr_el1, mdscr);
+       isb();
+}
+
+static volatile char write_data;
+
+static void guest_code(void)
+{
+       GUEST_SYNC(0);
+
+       /* Software-breakpoint */
+       asm volatile("sw_bp: brk #0");
+       GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
+
+       GUEST_SYNC(1);
+
+       /* Hardware-breakpoint */
+       reset_debug_state();
+       install_hw_bp(PC(hw_bp));
+       asm volatile("hw_bp: nop");
+       GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
+
+       GUEST_SYNC(2);
+
+       /* Hardware-breakpoint + svc */
+       reset_debug_state();
+       install_hw_bp(PC(bp_svc));
+       asm volatile("bp_svc: svc #0");
+       GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
+       GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
+
+       GUEST_SYNC(3);
+
+       /* Hardware-breakpoint + software-breakpoint */
+       reset_debug_state();
+       install_hw_bp(PC(bp_brk));
+       asm volatile("bp_brk: brk #0");
+       GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
+       GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
+
+       GUEST_SYNC(4);
+
+       /* Watchpoint */
+       reset_debug_state();
+       install_wp(PC(write_data));
+       write_data = 'x';
+       GUEST_ASSERT_EQ(write_data, 'x');
+       GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+       GUEST_SYNC(5);
+
+       /* Single-step */
+       reset_debug_state();
+       install_ss();
+       ss_idx = 0;
+       asm volatile("ss_start:\n"
+                    "mrs x0, esr_el1\n"
+                    "add x0, x0, #1\n"
+                    "msr daifset, #8\n"
+                    : : : "x0");
+       GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
+       GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
+       GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
+
+       GUEST_DONE();
+}
+
+static void guest_sw_bp_handler(struct ex_regs *regs)
+{
+       sw_bp_addr = regs->pc;
+       regs->pc += 4;
+}
+
+static void guest_hw_bp_handler(struct ex_regs *regs)
+{
+       hw_bp_addr = regs->pc;
+       regs->pstate |= SPSR_D;
+}
+
+static void guest_wp_handler(struct ex_regs *regs)
+{
+       wp_data_addr = read_sysreg(far_el1);
+       wp_addr = regs->pc;
+       regs->pstate |= SPSR_D;
+}
+
+static void guest_ss_handler(struct ex_regs *regs)
+{
+       GUEST_ASSERT_1(ss_idx < 4, ss_idx);
+       ss_addr[ss_idx++] = regs->pc;
+       regs->pstate |= SPSR_SS;
+}
+
+static void guest_svc_handler(struct ex_regs *regs)
+{
+       svc_addr = regs->pc;
+}
+
+static int debug_version(struct kvm_vm *vm)
+{
+       uint64_t id_aa64dfr0;
+
+       get_reg(vm, VCPU_ID, ARM64_SYS_REG(ID_AA64DFR0_EL1), &id_aa64dfr0);
+       return id_aa64dfr0 & 0xf;
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int stage;
+
+       vm = vm_create_default(VCPU_ID, 0, guest_code);
+       ucall_init(vm, NULL);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+       if (debug_version(vm) < 6) {
+               print_skip("Armv8 debug architecture not supported.");
+               kvm_vm_free(vm);
+               exit(KSFT_SKIP);
+       }
+
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_EC_BRK_INS, guest_sw_bp_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_EC_WP_CURRENT, guest_wp_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_EC_SSTEP_CURRENT, guest_ss_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_EC_SVC64, guest_svc_handler);
+
+       for (stage = 0; stage < 7; stage++) {
+               vcpu_run(vm, VCPU_ID);
+
+               switch (get_ucall(vm, VCPU_ID, &uc)) {
+               case UCALL_SYNC:
+                       TEST_ASSERT(uc.args[1] == stage,
+                               "Stage %d: Unexpected sync ucall, got %lx",
+                               stage, (ulong)uc.args[1]);
+                       break;
+               case UCALL_ABORT:
+                       TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx",
+                               (const char *)uc.args[0],
+                               __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
deleted file mode 100644 (file)
index efba766..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define REG_LIST_SVE
-#include "get-reg-list.c"
index 4869321..a16c8f0 100644 (file)
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
 #include "kvm_util.h"
 #include "test_util.h"
 #include "processor.h"
 
-#ifdef REG_LIST_SVE
-#define reg_list_sve() (true)
-#else
-#define reg_list_sve() (false)
-#endif
+static struct kvm_reg_list *reg_list;
+static __u64 *blessed_reg, blessed_n;
 
-#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+struct reg_sublist {
+       const char *name;
+       long capability;
+       int feature;
+       bool finalize;
+       __u64 *regs;
+       __u64 regs_n;
+       __u64 *rejects_set;
+       __u64 rejects_set_n;
+};
+
+struct vcpu_config {
+       char *name;
+       struct reg_sublist sublists[];
+};
+
+static struct vcpu_config *vcpu_configs[];
+static int vcpu_configs_n;
+
+#define for_each_sublist(c, s)                                                 \
+       for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
 
 #define for_each_reg(i)                                                                \
        for ((i) = 0; (i) < reg_list->n; ++(i))
        for_each_reg_filtered(i)                                                \
                if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
 
+static const char *config_name(struct vcpu_config *c)
+{
+       struct reg_sublist *s;
+       int len = 0;
 
-static struct kvm_reg_list *reg_list;
+       if (c->name)
+               return c->name;
 
-static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
-static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
-static __u64 *blessed_reg, blessed_n;
+       for_each_sublist(c, s)
+               len += strlen(s->name) + 1;
+
+       c->name = malloc(len);
+
+       len = 0;
+       for_each_sublist(c, s) {
+               if (!strcmp(s->name, "base"))
+                       continue;
+               strcat(c->name + len, s->name);
+               len += strlen(s->name) + 1;
+               c->name[len - 1] = '+';
+       }
+       c->name[len - 1] = '\0';
+
+       return c->name;
+}
+
+static bool has_cap(struct vcpu_config *c, long capability)
+{
+       struct reg_sublist *s;
+
+       for_each_sublist(c, s)
+               if (s->capability == capability)
+                       return true;
+       return false;
+}
 
 static bool filter_reg(__u64 reg)
 {
@@ -96,11 +145,13 @@ static const char *str_with_index(const char *template, __u64 index)
        return (const char *)str;
 }
 
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
 #define CORE_REGS_XX_NR_WORDS  2
 #define CORE_SPSR_XX_NR_WORDS  2
 #define CORE_FPREGS_XX_NR_WORDS        4
 
-static const char *core_id_to_str(__u64 id)
+static const char *core_id_to_str(struct vcpu_config *c, __u64 id)
 {
        __u64 core_off = id & ~REG_MASK, idx;
 
@@ -111,7 +162,7 @@ static const char *core_id_to_str(__u64 id)
        case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
             KVM_REG_ARM_CORE_REG(regs.regs[30]):
                idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
-               TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx);
+               TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx);
                return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
        case KVM_REG_ARM_CORE_REG(regs.sp):
                return "KVM_REG_ARM_CORE_REG(regs.sp)";
@@ -126,12 +177,12 @@ static const char *core_id_to_str(__u64 id)
        case KVM_REG_ARM_CORE_REG(spsr[0]) ...
             KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
                idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
-               TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx);
+               TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx);
                return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
        case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
             KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
                idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
-               TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx);
+               TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx);
                return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
        case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
                return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
@@ -139,11 +190,11 @@ static const char *core_id_to_str(__u64 id)
                return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
        }
 
-       TEST_FAIL("Unknown core reg id: 0x%llx", id);
+       TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id);
        return NULL;
 }
 
-static const char *sve_id_to_str(__u64 id)
+static const char *sve_id_to_str(struct vcpu_config *c, __u64 id)
 {
        __u64 sve_off, n, i;
 
@@ -153,37 +204,37 @@ static const char *sve_id_to_str(__u64 id)
        sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
        i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
 
-       TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id);
+       TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id);
 
        switch (sve_off) {
        case KVM_REG_ARM64_SVE_ZREG_BASE ...
             KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
                n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
                TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
-                           "Unexpected bits set in SVE ZREG id: 0x%llx", id);
+                           "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id);
                return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
        case KVM_REG_ARM64_SVE_PREG_BASE ...
             KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
                n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
                TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
-                           "Unexpected bits set in SVE PREG id: 0x%llx", id);
+                           "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id);
                return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
        case KVM_REG_ARM64_SVE_FFR_BASE:
                TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
-                           "Unexpected bits set in SVE FFR id: 0x%llx", id);
+                           "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id);
                return "KVM_REG_ARM64_SVE_FFR(0)";
        }
 
        return NULL;
 }
 
-static void print_reg(__u64 id)
+static void print_reg(struct vcpu_config *c, __u64 id)
 {
        unsigned op0, op1, crn, crm, op2;
        const char *reg_size = NULL;
 
        TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
-                   "KVM_REG_ARM64 missing in reg id: 0x%llx", id);
+                   "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id);
 
        switch (id & KVM_REG_SIZE_MASK) {
        case KVM_REG_SIZE_U8:
@@ -214,17 +265,17 @@ static void print_reg(__u64 id)
                reg_size = "KVM_REG_SIZE_U2048";
                break;
        default:
-               TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx",
-                         (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+               TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+                         config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
        }
 
        switch (id & KVM_REG_ARM_COPROC_MASK) {
        case KVM_REG_ARM_CORE:
-               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id));
+               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id));
                break;
        case KVM_REG_ARM_DEMUX:
                TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
-                           "Unexpected bits set in DEMUX reg id: 0x%llx", id);
+                           "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id);
                printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
                       reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
                break;
@@ -235,23 +286,23 @@ static void print_reg(__u64 id)
                crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
                op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
                TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
-                           "Unexpected bits set in SYSREG reg id: 0x%llx", id);
+                           "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id);
                printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
                break;
        case KVM_REG_ARM_FW:
                TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
-                           "Unexpected bits set in FW reg id: 0x%llx", id);
+                           "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id);
                printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
                break;
        case KVM_REG_ARM64_SVE:
-               if (reg_list_sve())
-                       printf("\t%s,\n", sve_id_to_str(id));
+               if (has_cap(c, KVM_CAP_ARM_SVE))
+                       printf("\t%s,\n", sve_id_to_str(c, id));
                else
-                       TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
+                       TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id);
                break;
        default:
-               TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx",
-                         (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+               TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+                         config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
        }
 }
 
@@ -312,56 +363,58 @@ static void core_reg_fixup(void)
        reg_list = tmp;
 }
 
-static void prepare_vcpu_init(struct kvm_vcpu_init *init)
+static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init)
 {
-       if (reg_list_sve())
-               init->features[0] |= 1 << KVM_ARM_VCPU_SVE;
+       struct reg_sublist *s;
+
+       for_each_sublist(c, s)
+               if (s->capability)
+                       init->features[s->feature / 32] |= 1 << (s->feature % 32);
 }
 
-static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid, struct vcpu_config *c)
 {
+       struct reg_sublist *s;
        int feature;
 
-       if (reg_list_sve()) {
-               feature = KVM_ARM_VCPU_SVE;
-               vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+       for_each_sublist(c, s) {
+               if (s->finalize) {
+                       feature = s->feature;
+                       vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+               }
        }
 }
 
-static void check_supported(void)
+static void check_supported(struct vcpu_config *c)
 {
-       if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) {
-               fprintf(stderr, "SVE not available, skipping tests\n");
-               exit(KSFT_SKIP);
+       struct reg_sublist *s;
+
+       for_each_sublist(c, s) {
+               if (s->capability && !kvm_check_cap(s->capability)) {
+                       fprintf(stderr, "%s: %s not available, skipping tests\n", config_name(c), s->name);
+                       exit(KSFT_SKIP);
+               }
        }
 }
 
-int main(int ac, char **av)
+static bool print_list;
+static bool print_filtered;
+static bool fixup_core_regs;
+
+static void run_test(struct vcpu_config *c)
 {
        struct kvm_vcpu_init init = { .target = -1, };
-       int new_regs = 0, missing_regs = 0, i;
+       int new_regs = 0, missing_regs = 0, i, n;
        int failed_get = 0, failed_set = 0, failed_reject = 0;
-       bool print_list = false, print_filtered = false, fixup_core_regs = false;
        struct kvm_vm *vm;
-       __u64 *vec_regs;
+       struct reg_sublist *s;
 
-       check_supported();
-
-       for (i = 1; i < ac; ++i) {
-               if (strcmp(av[i], "--core-reg-fixup") == 0)
-                       fixup_core_regs = true;
-               else if (strcmp(av[i], "--list") == 0)
-                       print_list = true;
-               else if (strcmp(av[i], "--list-filtered") == 0)
-                       print_filtered = true;
-               else
-                       TEST_FAIL("Unknown option: %s\n", av[i]);
-       }
+       check_supported(c);
 
        vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
-       prepare_vcpu_init(&init);
+       prepare_vcpu_init(c, &init);
        aarch64_vcpu_add_default(vm, 0, &init, NULL);
-       finalize_vcpu(vm, 0);
+       finalize_vcpu(vm, 0, c);
 
        reg_list = vcpu_get_reg_list(vm, 0);
 
@@ -374,10 +427,10 @@ int main(int ac, char **av)
                        __u64 id = reg_list->reg[i];
                        if ((print_list && !filter_reg(id)) ||
                            (print_filtered && filter_reg(id)))
-                               print_reg(id);
+                               print_reg(c, id);
                }
                putchar('\n');
-               return 0;
+               return;
        }
 
        /*
@@ -396,50 +449,52 @@ int main(int ac, char **av)
                        .id = reg_list->reg[i],
                        .addr = (__u64)&addr,
                };
+               bool reject_reg = false;
                int ret;
 
                ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
                if (ret) {
-                       puts("Failed to get ");
-                       print_reg(reg.id);
+                       printf("%s: Failed to get ", config_name(c));
+                       print_reg(c, reg.id);
                        putchar('\n');
                        ++failed_get;
                }
 
                /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
-               if (find_reg(rejects_set, rejects_set_n, reg.id)) {
-                       ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
-                       if (ret != -1 || errno != EPERM) {
-                               printf("Failed to reject (ret=%d, errno=%d) ", ret, errno);
-                               print_reg(reg.id);
-                               putchar('\n');
-                               ++failed_reject;
+               for_each_sublist(c, s) {
+                       if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
+                               reject_reg = true;
+                               ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+                               if (ret != -1 || errno != EPERM) {
+                                       printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
+                                       print_reg(c, reg.id);
+                                       putchar('\n');
+                                       ++failed_reject;
+                               }
+                               break;
                        }
-                       continue;
                }
 
-               ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
-               if (ret) {
-                       puts("Failed to set ");
-                       print_reg(reg.id);
-                       putchar('\n');
-                       ++failed_set;
+               if (!reject_reg) {
+                       ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+                       if (ret) {
+                               printf("%s: Failed to set ", config_name(c));
+                               print_reg(c, reg.id);
+                               putchar('\n');
+                               ++failed_set;
+                       }
                }
        }
 
-       if (reg_list_sve()) {
-               blessed_n = base_regs_n + sve_regs_n;
-               vec_regs = sve_regs;
-       } else {
-               blessed_n = base_regs_n + vregs_n;
-               vec_regs = vregs;
-       }
-
+       for_each_sublist(c, s)
+               blessed_n += s->regs_n;
        blessed_reg = calloc(blessed_n, sizeof(__u64));
-       for (i = 0; i < base_regs_n; ++i)
-               blessed_reg[i] = base_regs[i];
-       for (i = 0; i < blessed_n - base_regs_n; ++i)
-               blessed_reg[base_regs_n + i] = vec_regs[i];
+
+       n = 0;
+       for_each_sublist(c, s) {
+               for (i = 0; i < s->regs_n; ++i)
+                       blessed_reg[n++] = s->regs[i];
+       }
 
        for_each_new_reg(i)
                ++new_regs;
@@ -448,40 +503,141 @@ int main(int ac, char **av)
                ++missing_regs;
 
        if (new_regs || missing_regs) {
-               printf("Number blessed registers: %5lld\n", blessed_n);
-               printf("Number registers:         %5lld\n", reg_list->n);
+               printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
+               printf("%s: Number registers:         %5lld\n", config_name(c), reg_list->n);
        }
 
        if (new_regs) {
-               printf("\nThere are %d new registers.\n"
+               printf("\n%s: There are %d new registers.\n"
                       "Consider adding them to the blessed reg "
-                      "list with the following lines:\n\n", new_regs);
+                      "list with the following lines:\n\n", config_name(c), new_regs);
                for_each_new_reg(i)
-                       print_reg(reg_list->reg[i]);
+                       print_reg(c, reg_list->reg[i]);
                putchar('\n');
        }
 
        if (missing_regs) {
-               printf("\nThere are %d missing registers.\n"
-                      "The following lines are missing registers:\n\n", missing_regs);
+               printf("\n%s: There are %d missing registers.\n"
+                      "The following lines are missing registers:\n\n", config_name(c), missing_regs);
                for_each_missing_reg(i)
-                       print_reg(blessed_reg[i]);
+                       print_reg(c, blessed_reg[i]);
                putchar('\n');
        }
 
        TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
-                   "There are %d missing registers; "
+                   "%s: There are %d missing registers; "
                    "%d registers failed get; %d registers failed set; %d registers failed reject",
-                   missing_regs, failed_get, failed_set, failed_reject);
+                   config_name(c), missing_regs, failed_get, failed_set, failed_reject);
 
-       return 0;
+       pr_info("%s: PASS\n", config_name(c));
+       blessed_n = 0;
+       free(blessed_reg);
+       free(reg_list);
+       kvm_vm_free(vm);
+}
+
+static void help(void)
+{
+       struct vcpu_config *c;
+       int i;
+
+       printf(
+       "\n"
+       "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered] [--core-reg-fixup]\n\n"
+       " --config=<selection>        Used to select a specific vcpu configuration for the test/listing\n"
+       "                             '<selection>' may be\n");
+
+       for (i = 0; i < vcpu_configs_n; ++i) {
+               c = vcpu_configs[i];
+               printf(
+       "                               '%s'\n", config_name(c));
+       }
+
+       printf(
+       "\n"
+       " --list                      Print the register list rather than test it (requires --config)\n"
+       " --list-filtered             Print registers that would normally be filtered out (requires --config)\n"
+       " --core-reg-fixup            Needed when running on old kernels with broken core reg listings\n"
+       "\n"
+       );
+}
+
+static struct vcpu_config *parse_config(const char *config)
+{
+       struct vcpu_config *c;
+       int i;
+
+       if (config[8] != '=')
+               help(), exit(1);
+
+       for (i = 0; i < vcpu_configs_n; ++i) {
+               c = vcpu_configs[i];
+               if (strcmp(config_name(c), &config[9]) == 0)
+                       break;
+       }
+
+       if (i == vcpu_configs_n)
+               help(), exit(1);
+
+       return c;
+}
+
+int main(int ac, char **av)
+{
+       struct vcpu_config *c, *sel = NULL;
+       int i, ret = 0;
+       pid_t pid;
+
+       for (i = 1; i < ac; ++i) {
+               if (strcmp(av[i], "--core-reg-fixup") == 0)
+                       fixup_core_regs = true;
+               else if (strncmp(av[i], "--config", 8) == 0)
+                       sel = parse_config(av[i]);
+               else if (strcmp(av[i], "--list") == 0)
+                       print_list = true;
+               else if (strcmp(av[i], "--list-filtered") == 0)
+                       print_filtered = true;
+               else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
+                       help(), exit(0);
+               else
+                       help(), exit(1);
+       }
+
+       if (print_list || print_filtered) {
+               /*
+                * We only want to print the register list of a single config.
+                */
+               if (!sel)
+                       help(), exit(1);
+       }
+
+       for (i = 0; i < vcpu_configs_n; ++i) {
+               c = vcpu_configs[i];
+               if (sel && c != sel)
+                       continue;
+
+               pid = fork();
+
+               if (!pid) {
+                       run_test(c);
+                       exit(0);
+               } else {
+                       int wstatus;
+                       pid_t wpid = wait(&wstatus);
+                       TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
+                       if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
+                               ret = KSFT_FAIL;
+               }
+       }
+
+       return ret;
 }
 
 /*
  * The current blessed list was primed with the output of kernel version
  * v4.15 with --core-reg-fixup and then later updated with new registers.
  *
- * The blessed list is up to date with kernel version v5.10-rc5
+ * The blessed list is up to date with kernel version v5.13-rc3
  */
 static __u64 base_regs[] = {
        KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
@@ -673,8 +829,6 @@ static __u64 base_regs[] = {
        ARM64_SYS_REG(3, 0, 5, 2, 0),   /* ESR_EL1 */
        ARM64_SYS_REG(3, 0, 6, 0, 0),   /* FAR_EL1 */
        ARM64_SYS_REG(3, 0, 7, 4, 0),   /* PAR_EL1 */
-       ARM64_SYS_REG(3, 0, 9, 14, 1),  /* PMINTENSET_EL1 */
-       ARM64_SYS_REG(3, 0, 9, 14, 2),  /* PMINTENCLR_EL1 */
        ARM64_SYS_REG(3, 0, 10, 2, 0),  /* MAIR_EL1 */
        ARM64_SYS_REG(3, 0, 10, 3, 0),  /* AMAIR_EL1 */
        ARM64_SYS_REG(3, 0, 12, 0, 0),  /* VBAR_EL1 */
@@ -683,6 +837,16 @@ static __u64 base_regs[] = {
        ARM64_SYS_REG(3, 0, 13, 0, 4),  /* TPIDR_EL1 */
        ARM64_SYS_REG(3, 0, 14, 1, 0),  /* CNTKCTL_EL1 */
        ARM64_SYS_REG(3, 2, 0, 0, 0),   /* CSSELR_EL1 */
+       ARM64_SYS_REG(3, 3, 13, 0, 2),  /* TPIDR_EL0 */
+       ARM64_SYS_REG(3, 3, 13, 0, 3),  /* TPIDRRO_EL0 */
+       ARM64_SYS_REG(3, 4, 3, 0, 0),   /* DACR32_EL2 */
+       ARM64_SYS_REG(3, 4, 5, 0, 1),   /* IFSR32_EL2 */
+       ARM64_SYS_REG(3, 4, 5, 3, 0),   /* FPEXC32_EL2 */
+};
+
+static __u64 pmu_regs[] = {
+       ARM64_SYS_REG(3, 0, 9, 14, 1),  /* PMINTENSET_EL1 */
+       ARM64_SYS_REG(3, 0, 9, 14, 2),  /* PMINTENCLR_EL1 */
        ARM64_SYS_REG(3, 3, 9, 12, 0),  /* PMCR_EL0 */
        ARM64_SYS_REG(3, 3, 9, 12, 1),  /* PMCNTENSET_EL0 */
        ARM64_SYS_REG(3, 3, 9, 12, 2),  /* PMCNTENCLR_EL0 */
@@ -692,8 +856,6 @@ static __u64 base_regs[] = {
        ARM64_SYS_REG(3, 3, 9, 13, 0),  /* PMCCNTR_EL0 */
        ARM64_SYS_REG(3, 3, 9, 14, 0),  /* PMUSERENR_EL0 */
        ARM64_SYS_REG(3, 3, 9, 14, 3),  /* PMOVSSET_EL0 */
-       ARM64_SYS_REG(3, 3, 13, 0, 2),  /* TPIDR_EL0 */
-       ARM64_SYS_REG(3, 3, 13, 0, 3),  /* TPIDRRO_EL0 */
        ARM64_SYS_REG(3, 3, 14, 8, 0),
        ARM64_SYS_REG(3, 3, 14, 8, 1),
        ARM64_SYS_REG(3, 3, 14, 8, 2),
@@ -757,11 +919,7 @@ static __u64 base_regs[] = {
        ARM64_SYS_REG(3, 3, 14, 15, 5),
        ARM64_SYS_REG(3, 3, 14, 15, 6),
        ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
-       ARM64_SYS_REG(3, 4, 3, 0, 0),   /* DACR32_EL2 */
-       ARM64_SYS_REG(3, 4, 5, 0, 1),   /* IFSR32_EL2 */
-       ARM64_SYS_REG(3, 4, 5, 3, 0),   /* FPEXC32_EL2 */
 };
-static __u64 base_regs_n = ARRAY_SIZE(base_regs);
 
 static __u64 vregs[] = {
        KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
@@ -797,7 +955,6 @@ static __u64 vregs[] = {
        KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
        KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
 };
-static __u64 vregs_n = ARRAY_SIZE(vregs);
 
 static __u64 sve_regs[] = {
        KVM_REG_ARM64_SVE_VLS,
@@ -852,11 +1009,57 @@ static __u64 sve_regs[] = {
        KVM_REG_ARM64_SVE_FFR(0),
        ARM64_SYS_REG(3, 0, 1, 2, 0),   /* ZCR_EL1 */
 };
-static __u64 sve_regs_n = ARRAY_SIZE(sve_regs);
 
-static __u64 rejects_set[] = {
-#ifdef REG_LIST_SVE
+static __u64 sve_rejects_set[] = {
        KVM_REG_ARM64_SVE_VLS,
-#endif
 };
-static __u64 rejects_set_n = ARRAY_SIZE(rejects_set);
+
+#define BASE_SUBLIST \
+       { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
+#define VREGS_SUBLIST \
+       { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
+#define PMU_SUBLIST \
+       { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+#define SVE_SUBLIST \
+       { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
+         .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
+         .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+
+static struct vcpu_config vregs_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       VREGS_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_config vregs_pmu_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       VREGS_SUBLIST,
+       PMU_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_config sve_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       SVE_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_config sve_pmu_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       SVE_SUBLIST,
+       PMU_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_config *vcpu_configs[] = {
+       &vregs_config,
+       &vregs_pmu_config,
+       &sve_config,
+       &sve_pmu_config,
+};
+static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
index b7fa0c8..27dc5c2 100644 (file)
@@ -8,16 +8,20 @@
 #define SELFTEST_KVM_PROCESSOR_H
 
 #include "kvm_util.h"
+#include <linux/stringify.h>
 
 
 #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
                           KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 
-#define CPACR_EL1      3, 0,  1, 0, 2
-#define TCR_EL1                3, 0,  2, 0, 2
-#define MAIR_EL1       3, 0, 10, 2, 0
-#define TTBR0_EL1      3, 0,  2, 0, 0
-#define SCTLR_EL1      3, 0,  1, 0, 0
+#define CPACR_EL1               3, 0,  1, 0, 2
+#define TCR_EL1                 3, 0,  2, 0, 2
+#define MAIR_EL1                3, 0, 10, 2, 0
+#define TTBR0_EL1               3, 0,  2, 0, 0
+#define SCTLR_EL1               3, 0,  1, 0, 0
+#define VBAR_EL1                3, 0, 12, 0, 0
+
+#define ID_AA64DFR0_EL1         3, 0,  0, 5, 0
 
 /*
  * Default MAIR
@@ -56,4 +60,73 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
 void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
                              struct kvm_vcpu_init *init, void *guest_code);
 
+struct ex_regs {
+       u64 regs[31];
+       u64 sp;
+       u64 pc;
+       u64 pstate;
+};
+
+#define VECTOR_NUM     16
+
+enum {
+       VECTOR_SYNC_CURRENT_SP0,
+       VECTOR_IRQ_CURRENT_SP0,
+       VECTOR_FIQ_CURRENT_SP0,
+       VECTOR_ERROR_CURRENT_SP0,
+
+       VECTOR_SYNC_CURRENT,
+       VECTOR_IRQ_CURRENT,
+       VECTOR_FIQ_CURRENT,
+       VECTOR_ERROR_CURRENT,
+
+       VECTOR_SYNC_LOWER_64,
+       VECTOR_IRQ_LOWER_64,
+       VECTOR_FIQ_LOWER_64,
+       VECTOR_ERROR_LOWER_64,
+
+       VECTOR_SYNC_LOWER_32,
+       VECTOR_IRQ_LOWER_32,
+       VECTOR_FIQ_LOWER_32,
+       VECTOR_ERROR_LOWER_32,
+};
+
+#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
+                          (v) == VECTOR_SYNC_CURRENT     || \
+                          (v) == VECTOR_SYNC_LOWER_64    || \
+                          (v) == VECTOR_SYNC_LOWER_32)
+
+#define ESR_EC_NUM             64
+#define ESR_EC_SHIFT           26
+#define ESR_EC_MASK            (ESR_EC_NUM - 1)
+
+#define ESR_EC_SVC64           0x15
+#define ESR_EC_HW_BP_CURRENT   0x31
+#define ESR_EC_SSTEP_CURRENT   0x33
+#define ESR_EC_WP_CURRENT      0x35
+#define ESR_EC_BRK_INS         0x3c
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
+
+typedef void(*handler_fn)(struct ex_regs *);
+void vm_install_exception_handler(struct kvm_vm *vm,
+               int vector, handler_fn handler);
+void vm_install_sync_handler(struct kvm_vm *vm,
+               int vector, int ec, handler_fn handler);
+
+#define write_sysreg(reg, val)                                           \
+({                                                                       \
+       u64 __val = (u64)(val);                                           \
+       asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val));   \
+})
+
+#define read_sysreg(reg)                                                 \
+({     u64 val;                                                          \
+       asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\
+       val;                                                              \
+})
+
+#define isb()  asm volatile("isb" : : : "memory")
+
 #endif /* SELFTEST_KVM_PROCESSOR_H */
index fcd8e38..ce49e22 100644 (file)
@@ -349,6 +349,7 @@ enum {
        UCALL_SYNC,
        UCALL_ABORT,
        UCALL_DONE,
+       UCALL_UNHANDLED,
 };
 
 #define UCALL_MAX_ARGS 6
@@ -367,26 +368,28 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
                                ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
 #define GUEST_SYNC(stage)      ucall(UCALL_SYNC, 2, "hello", stage)
 #define GUEST_DONE()           ucall(UCALL_DONE, 0)
-#define __GUEST_ASSERT(_condition, _nargs, _args...) do {      \
-       if (!(_condition))                                      \
-               ucall(UCALL_ABORT, 2 + _nargs,                  \
-                       "Failed guest assert: "                 \
-                       #_condition, __LINE__, _args);          \
+#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do {    \
+       if (!(_condition))                                              \
+               ucall(UCALL_ABORT, 2 + _nargs,                          \
+                       "Failed guest assert: "                         \
+                       _condstr, __LINE__, _args);                     \
 } while (0)
 
 #define GUEST_ASSERT(_condition) \
-       __GUEST_ASSERT((_condition), 0, 0)
+       __GUEST_ASSERT(_condition, #_condition, 0, 0)
 
 #define GUEST_ASSERT_1(_condition, arg1) \
-       __GUEST_ASSERT((_condition), 1, (arg1))
+       __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
 
 #define GUEST_ASSERT_2(_condition, arg1, arg2) \
-       __GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+       __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
 
 #define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
-       __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+       __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
 
 #define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
-       __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+       __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
+
+#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
 
 #endif /* SELFTEST_KVM_UTIL_H */
index 0b30b4e..92a62c6 100644 (file)
@@ -53,8 +53,6 @@
 #define CPUID_PKU              (1ul << 3)
 #define CPUID_LA57             (1ul << 16)
 
-#define UNEXPECTED_VECTOR_PORT 0xfff0u
-
 /* General Registers in 64-Bit Mode */
 struct gpr64_regs {
        u64 rax;
@@ -391,7 +389,7 @@ struct ex_regs {
 
 void vm_init_descriptor_tables(struct kvm_vm *vm);
 void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
-void vm_handle_exception(struct kvm_vm *vm, int vector,
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
                        void (*handler)(struct ex_regs *));
 
 /*
diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
new file mode 100644 (file)
index 0000000..0e443ea
--- /dev/null
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.macro save_registers
+       add     sp, sp, #-16 * 17
+
+       stp     x0, x1, [sp, #16 * 0]
+       stp     x2, x3, [sp, #16 * 1]
+       stp     x4, x5, [sp, #16 * 2]
+       stp     x6, x7, [sp, #16 * 3]
+       stp     x8, x9, [sp, #16 * 4]
+       stp     x10, x11, [sp, #16 * 5]
+       stp     x12, x13, [sp, #16 * 6]
+       stp     x14, x15, [sp, #16 * 7]
+       stp     x16, x17, [sp, #16 * 8]
+       stp     x18, x19, [sp, #16 * 9]
+       stp     x20, x21, [sp, #16 * 10]
+       stp     x22, x23, [sp, #16 * 11]
+       stp     x24, x25, [sp, #16 * 12]
+       stp     x26, x27, [sp, #16 * 13]
+       stp     x28, x29, [sp, #16 * 14]
+
+       /*
+        * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
+        * at it. It will _not_ be used to restore the sp on return from the
+        * exception so handlers can not update it.
+        */
+       add     x1, sp, #16 * 17
+       stp     x30, x1, [sp, #16 * 15] /* x30, SP */
+
+       mrs     x1, elr_el1
+       mrs     x2, spsr_el1
+       stp     x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+.endm
+
+.macro restore_registers
+       ldp     x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+       msr     elr_el1, x1
+       msr     spsr_el1, x2
+
+       /* sp is not restored */
+       ldp     x30, xzr, [sp, #16 * 15] /* x30, SP */
+
+       ldp     x28, x29, [sp, #16 * 14]
+       ldp     x26, x27, [sp, #16 * 13]
+       ldp     x24, x25, [sp, #16 * 12]
+       ldp     x22, x23, [sp, #16 * 11]
+       ldp     x20, x21, [sp, #16 * 10]
+       ldp     x18, x19, [sp, #16 * 9]
+       ldp     x16, x17, [sp, #16 * 8]
+       ldp     x14, x15, [sp, #16 * 7]
+       ldp     x12, x13, [sp, #16 * 6]
+       ldp     x10, x11, [sp, #16 * 5]
+       ldp     x8, x9, [sp, #16 * 4]
+       ldp     x6, x7, [sp, #16 * 3]
+       ldp     x4, x5, [sp, #16 * 2]
+       ldp     x2, x3, [sp, #16 * 1]
+       ldp     x0, x1, [sp, #16 * 0]
+
+       add     sp, sp, #16 * 17
+
+       eret
+.endm
+
+.pushsection ".entry.text", "ax"
+.balign 0x800
+.global vectors
+vectors:
+.popsection
+
+.set   vector, 0
+
+/*
+ * Build an exception handler for vector and append a jump to it into
+ * vectors (while making sure that it's 0x80 aligned).
+ */
+.macro HANDLER, label
+handler_\label:
+       save_registers
+       mov     x0, sp
+       mov     x1, #vector
+       bl      route_exception
+       restore_registers
+
+.pushsection ".entry.text", "ax"
+.balign 0x80
+       b       handler_\label
+.popsection
+
+.set   vector, vector + 1
+.endm
+
+.macro HANDLER_INVALID
+.pushsection ".entry.text", "ax"
+.balign 0x80
+/* This will abort so no need to save and restore registers. */
+       mov     x0, #vector
+       mov     x1, #0 /* ec */
+       mov     x2, #0 /* valid_ec */
+       b       kvm_exit_unexpected_exception
+.popsection
+
+.set   vector, vector + 1
+.endm
+
+/*
+ * Caution: be sure to not add anything between the declaration of vectors
+ * above and these macro calls that will build the vectors table below it.
+ */
+       HANDLER_INVALID                         // Synchronous EL1t
+       HANDLER_INVALID                         // IRQ EL1t
+       HANDLER_INVALID                         // FIQ EL1t
+       HANDLER_INVALID                         // Error EL1t
+
+       HANDLER el1h_sync                       // Synchronous EL1h
+       HANDLER el1h_irq                        // IRQ EL1h
+       HANDLER el1h_fiq                        // FIQ EL1h
+       HANDLER el1h_error                      // Error EL1h
+
+       HANDLER el0_sync_64                     // Synchronous 64-bit EL0
+       HANDLER el0_irq_64                      // IRQ 64-bit EL0
+       HANDLER el0_fiq_64                      // FIQ 64-bit EL0
+       HANDLER el0_error_64                    // Error 64-bit EL0
+
+       HANDLER el0_sync_32                     // Synchronous 32-bit EL0
+       HANDLER el0_irq_32                      // IRQ 32-bit EL0
+       HANDLER el0_fiq_32                      // FIQ 32-bit EL0
+       HANDLER el0_error_32                    // Error 32-bit EL0
index cee92d4..48b55c9 100644 (file)
@@ -6,6 +6,7 @@
  */
 
 #include <linux/compiler.h>
+#include <assert.h>
 
 #include "kvm_util.h"
 #include "../kvm_util_internal.h"
@@ -14,6 +15,8 @@
 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR         0x180000
 #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN    0xac0000
 
+static vm_vaddr_t exception_handlers;
+
 static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
 {
        return (v + vm->page_size) & ~(vm->page_size - 1);
@@ -334,6 +337,100 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
        va_end(ap);
 }
 
+void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
+{
+       ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
+       while (1)
+               ;
+}
+
 void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
 {
+       struct ucall uc;
+
+       if (get_ucall(vm, vcpuid, &uc) != UCALL_UNHANDLED)
+               return;
+
+       if (uc.args[2]) /* valid_ec */ {
+               assert(VECTOR_IS_SYNC(uc.args[0]));
+               TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+                         uc.args[0], uc.args[1]);
+       } else {
+               assert(!VECTOR_IS_SYNC(uc.args[0]));
+               TEST_FAIL("Unexpected exception (vector:0x%lx)",
+                         uc.args[0]);
+       }
+}
+
+struct handlers {
+       handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM];
+};
+
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
+{
+       extern char vectors;
+
+       set_reg(vm, vcpuid, ARM64_SYS_REG(VBAR_EL1), (uint64_t)&vectors);
+}
+
+void route_exception(struct ex_regs *regs, int vector)
+{
+       struct handlers *handlers = (struct handlers *)exception_handlers;
+       bool valid_ec;
+       int ec = 0;
+
+       switch (vector) {
+       case VECTOR_SYNC_CURRENT:
+       case VECTOR_SYNC_LOWER_64:
+               ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK;
+               valid_ec = true;
+               break;
+       case VECTOR_IRQ_CURRENT:
+       case VECTOR_IRQ_LOWER_64:
+       case VECTOR_FIQ_CURRENT:
+       case VECTOR_FIQ_LOWER_64:
+       case VECTOR_ERROR_CURRENT:
+       case VECTOR_ERROR_LOWER_64:
+               ec = 0;
+               valid_ec = false;
+               break;
+       default:
+               valid_ec = false;
+               goto unexpected_exception;
+       }
+
+       if (handlers && handlers->exception_handlers[vector][ec])
+               return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+       kvm_exit_unexpected_exception(vector, ec, valid_ec);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+       vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers),
+                       vm->page_size, 0, 0);
+
+       *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
+                        void (*handler)(struct ex_regs *))
+{
+       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+       assert(VECTOR_IS_SYNC(vector));
+       assert(vector < VECTOR_NUM);
+       assert(ec < ESR_EC_NUM);
+       handlers->exception_handlers[vector][ec] = handler;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+                        void (*handler)(struct ex_regs *))
+{
+       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+       assert(!VECTOR_IS_SYNC(vector));
+       assert(vector < VECTOR_NUM);
+       handlers->exception_handlers[vector][0] = handler;
 }
index efe2350..a217515 100644 (file)
@@ -1201,7 +1201,7 @@ static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
 
 void kvm_exit_unexpected_vector(uint32_t value)
 {
-       outl(UNEXPECTED_VECTOR_PORT, value);
+       ucall(UCALL_UNHANDLED, 1, value);
 }
 
 void route_exception(struct ex_regs *regs)
@@ -1244,8 +1244,8 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
        *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
 }
 
-void vm_handle_exception(struct kvm_vm *vm, int vector,
-                        void (*handler)(struct ex_regs *))
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+                              void (*handler)(struct ex_regs *))
 {
        vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
 
@@ -1254,16 +1254,13 @@ void vm_handle_exception(struct kvm_vm *vm, int vector,
 
 void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
 {
-       if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
-               && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
-               && vcpu_state(vm, vcpuid)->io.size == 4) {
-               /* Grab pointer to io data */
-               uint32_t *data = (void *)vcpu_state(vm, vcpuid)
-                       + vcpu_state(vm, vcpuid)->io.data_offset;
-
-               TEST_ASSERT(false,
-                           "Unexpected vectored event in guest (vector:0x%x)",
-                           *data);
+       struct ucall uc;
+
+       if (get_ucall(vm, vcpuid, &uc) == UCALL_UNHANDLED) {
+               uint64_t vector = uc.args[0];
+
+               TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
+                         vector);
        }
 }
 
index 63096ce..0864b2e 100644 (file)
@@ -154,8 +154,8 @@ int main(int argc, char *argv[])
 
        vm_init_descriptor_tables(vm);
        vcpu_init_descriptor_tables(vm, VCPU_ID);
-       vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
-       vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler);
+       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+       vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
 
        pr_info("Running L1 which uses EVMCS to run L2\n");
 
index 732b244..04ed975 100644 (file)
@@ -227,7 +227,7 @@ int main(void)
 
        vm_init_descriptor_tables(vm);
        vcpu_init_descriptor_tables(vm, VCPU_ID);
-       vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+       vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
 
        enter_guest(vm);
        kvm_vm_free(vm);
index d672f0a..fc03a15 100644 (file)
 
 #define UCALL_PIO_PORT ((uint16_t)0x1000)
 
+struct ucall uc_none = {
+       .cmd = UCALL_NONE,
+};
+
 /*
  * ucall is embedded here to protect against compiler reshuffling registers
  * before calling a function. In this test we only need to get KVM_EXIT_IO
@@ -34,7 +38,8 @@ void guest_code(void)
        asm volatile("1: in %[port], %%al\n"
                     "add $0x1, %%rbx\n"
                     "jmp 1b"
-                    : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
+                    : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
+                    : "rax", "rbx");
 }
 
 static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
index e357d8e..5a6a662 100644 (file)
 #define rounded_rdmsr(x)       ROUND(rdmsr(x))
 #define rounded_host_rdmsr(x)  ROUND(vcpu_get_msr(vm, 0, x))
 
-#define GUEST_ASSERT_EQ(a, b) do {                             \
-       __typeof(a) _a = (a);                                   \
-       __typeof(b) _b = (b);                                   \
-       if (_a != _b)                                           \
-                ucall(UCALL_ABORT, 4,                          \
-                        "Failed guest assert: "                        \
-                        #a " == " #b, __LINE__, _a, _b);       \
-  } while(0)
-
 static void guest_code(void)
 {
        u64 val = 0;
index 72c0d07..e3e20e8 100644 (file)
@@ -574,7 +574,7 @@ static void test_msr_filter_allow(void) {
        vm_init_descriptor_tables(vm);
        vcpu_init_descriptor_tables(vm, VCPU_ID);
 
-       vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+       vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
 
        /* Process guest code userspace exits. */
        run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
@@ -588,12 +588,12 @@ static void test_msr_filter_allow(void) {
        run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
        run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
 
-       vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
        run_guest(vm);
-       vm_handle_exception(vm, UD_VECTOR, NULL);
+       vm_install_exception_handler(vm, UD_VECTOR, NULL);
 
        if (process_ucall(vm) != UCALL_DONE) {
-               vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler);
+               vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
 
                /* Process emulated rdmsr and wrmsr instructions. */
                run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
index 2f964cd..ed27269 100644 (file)
@@ -462,7 +462,7 @@ int main(int argc, char *argv[])
 
        vm_init_descriptor_tables(vm);
        vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
-       vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler);
+       vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
 
        virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0);