Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 9 Nov 2020 21:58:10 +0000 (13:58 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 9 Nov 2020 21:58:10 +0000 (13:58 -0800)
Pull kvm fixes from Paolo Bonzini:
 "ARM:
   - fix compilation error when PMD and PUD are folded
   - fix regression in reads-as-zero behaviour of ID_AA64ZFR0_EL1
   - add aarch64 get-reg-list test

  x86:
   - fix semantic conflict between two series merged for 5.10
   - fix (and test) enforcement of paravirtual cpuid features

  selftests:
   - various cleanups to memory management selftests
   - new selftests testcase for performance of dirty logging"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (30 commits)
  KVM: selftests: allow two iterations of dirty_log_perf_test
  KVM: selftests: Introduce the dirty log perf test
  KVM: selftests: Make the number of vcpus global
  KVM: selftests: Make the per vcpu memory size global
  KVM: selftests: Drop pointless vm_create wrapper
  KVM: selftests: Add wrfract to common guest code
  KVM: selftests: Simplify demand_paging_test with timespec_diff_now
  KVM: selftests: Remove address rounding in guest code
  KVM: selftests: Factor code out of demand_paging_test
  KVM: selftests: Use a single binary for dirty/clear log test
  KVM: selftests: Always clear dirty bitmap after iteration
  KVM: selftests: Add blessed SVE registers to get-reg-list
  KVM: selftests: Add aarch64 get-reg-list test
  selftests: kvm: test enforcement of paravirtual cpuid features
  selftests: kvm: Add exception handling to selftests
  selftests: kvm: Clear uc so UCALL_NONE is being properly reported
  selftests: kvm: Fix the segment descriptor layout to match the actual layout
  KVM: x86: handle MSR_IA32_DEBUGCTLMSR with report_ignored_msrs
  kvm: x86: request masterclock update any time guest uses different msr
  kvm: x86: ensure pv_cpuid.features is initialized when enabling cap
  ...

32 files changed:
Documentation/virt/kvm/api.rst
arch/arm64/kvm/mmu.c
arch/arm64/kvm/sys_regs.c
arch/arm64/kvm/sys_regs.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c [new file with mode: 0644]
tools/testing/selftests/kvm/aarch64/get-reg-list.c [new file with mode: 0644]
tools/testing/selftests/kvm/clear_dirty_log_test.c [deleted file]
tools/testing/selftests/kvm/demand_paging_test.c
tools/testing/selftests/kvm/dirty_log_perf_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/dirty_log_test.c
tools/testing/selftests/kvm/include/kvm_util.h
tools/testing/selftests/kvm/include/perf_test_util.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/test_util.h
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/lib/aarch64/processor.c
tools/testing/selftests/kvm/lib/aarch64/ucall.c
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/kvm_util_internal.h
tools/testing/selftests/kvm/lib/s390x/processor.c
tools/testing/selftests/kvm/lib/s390x/ucall.c
tools/testing/selftests/kvm/lib/test_util.c
tools/testing/selftests/kvm/lib/x86_64/handlers.S [new file with mode: 0644]
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/lib/x86_64/ucall.c
tools/testing/selftests/kvm/x86_64/kvm_pv_test.c [new file with mode: 0644]

index 36d5f1f..e00a66d 100644 (file)
@@ -6367,7 +6367,7 @@ accesses that would usually trigger a #GP by KVM into the guest will
 instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
 KVM_EXIT_X86_WRMSR exit notifications.
 
-8.25 KVM_X86_SET_MSR_FILTER
+8.27 KVM_X86_SET_MSR_FILTER
 ---------------------------
 
 :Architectures: x86
@@ -6381,8 +6381,7 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
 trap and emulate MSRs that are outside of the scope of KVM as well as
 limit the attack surface on KVM's MSR emulation code.
 
-
-8.26 KVM_CAP_ENFORCE_PV_CPUID
+8.28 KVM_CAP_ENFORCE_PV_CPUID
 -----------------------------
 
 Architectures: x86
index 57972bd..1a01da9 100644 (file)
@@ -788,10 +788,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        }
 
        switch (vma_shift) {
+#ifndef __PAGETABLE_PMD_FOLDED
        case PUD_SHIFT:
                if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
                        break;
                fallthrough;
+#endif
        case CONT_PMD_SHIFT:
                vma_shift = PMD_SHIFT;
                fallthrough;
index fb12d3e..d0868d0 100644 (file)
@@ -1069,7 +1069,7 @@ static bool trap_ptrauth(struct kvm_vcpu *vcpu,
 static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu,
                        const struct sys_reg_desc *rd)
 {
-       return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN_USER | REG_HIDDEN_GUEST;
+       return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN;
 }
 
 #define __PTRAUTH_KEY(k)                                               \
@@ -1153,6 +1153,22 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
        return val;
 }
 
+static unsigned int id_visibility(const struct kvm_vcpu *vcpu,
+                                 const struct sys_reg_desc *r)
+{
+       u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
+                        (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
+
+       switch (id) {
+       case SYS_ID_AA64ZFR0_EL1:
+               if (!vcpu_has_sve(vcpu))
+                       return REG_RAZ;
+               break;
+       }
+
+       return 0;
+}
+
 /* cpufeature ID register access trap handlers */
 
 static bool __access_id_reg(struct kvm_vcpu *vcpu,
@@ -1171,7 +1187,9 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,
                          struct sys_reg_params *p,
                          const struct sys_reg_desc *r)
 {
-       return __access_id_reg(vcpu, p, r, false);
+       bool raz = sysreg_visible_as_raz(vcpu, r);
+
+       return __access_id_reg(vcpu, p, r, raz);
 }
 
 static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
@@ -1192,72 +1210,7 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
        if (vcpu_has_sve(vcpu))
                return 0;
 
-       return REG_HIDDEN_USER | REG_HIDDEN_GUEST;
-}
-
-/* Visibility overrides for SVE-specific ID registers */
-static unsigned int sve_id_visibility(const struct kvm_vcpu *vcpu,
-                                     const struct sys_reg_desc *rd)
-{
-       if (vcpu_has_sve(vcpu))
-               return 0;
-
-       return REG_HIDDEN_USER;
-}
-
-/* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */
-static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu)
-{
-       if (!vcpu_has_sve(vcpu))
-               return 0;
-
-       return read_sanitised_ftr_reg(SYS_ID_AA64ZFR0_EL1);
-}
-
-static bool access_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
-                                  struct sys_reg_params *p,
-                                  const struct sys_reg_desc *rd)
-{
-       if (p->is_write)
-               return write_to_read_only(vcpu, p, rd);
-
-       p->regval = guest_id_aa64zfr0_el1(vcpu);
-       return true;
-}
-
-static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
-               const struct sys_reg_desc *rd,
-               const struct kvm_one_reg *reg, void __user *uaddr)
-{
-       u64 val;
-
-       if (WARN_ON(!vcpu_has_sve(vcpu)))
-               return -ENOENT;
-
-       val = guest_id_aa64zfr0_el1(vcpu);
-       return reg_to_user(uaddr, &val, reg->id);
-}
-
-static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
-               const struct sys_reg_desc *rd,
-               const struct kvm_one_reg *reg, void __user *uaddr)
-{
-       const u64 id = sys_reg_to_index(rd);
-       int err;
-       u64 val;
-
-       if (WARN_ON(!vcpu_has_sve(vcpu)))
-               return -ENOENT;
-
-       err = reg_from_user(&val, uaddr, id);
-       if (err)
-               return err;
-
-       /* This is what we mean by invariant: you can't change it. */
-       if (val != guest_id_aa64zfr0_el1(vcpu))
-               return -EINVAL;
-
-       return 0;
+       return REG_HIDDEN;
 }
 
 /*
@@ -1299,13 +1252,17 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu,
 static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
                      const struct kvm_one_reg *reg, void __user *uaddr)
 {
-       return __get_id_reg(vcpu, rd, uaddr, false);
+       bool raz = sysreg_visible_as_raz(vcpu, rd);
+
+       return __get_id_reg(vcpu, rd, uaddr, raz);
 }
 
 static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
                      const struct kvm_one_reg *reg, void __user *uaddr)
 {
-       return __set_id_reg(vcpu, rd, uaddr, false);
+       bool raz = sysreg_visible_as_raz(vcpu, rd);
+
+       return __set_id_reg(vcpu, rd, uaddr, raz);
 }
 
 static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
@@ -1397,6 +1354,7 @@ static bool access_mte_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
        .access = access_id_reg,                \
        .get_user = get_id_reg,                 \
        .set_user = set_id_reg,                 \
+       .visibility = id_visibility,            \
 }
 
 /*
@@ -1518,7 +1476,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        ID_SANITISED(ID_AA64PFR1_EL1),
        ID_UNALLOCATED(4,2),
        ID_UNALLOCATED(4,3),
-       { SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, .visibility = sve_id_visibility },
+       ID_SANITISED(ID_AA64ZFR0_EL1),
        ID_UNALLOCATED(4,5),
        ID_UNALLOCATED(4,6),
        ID_UNALLOCATED(4,7),
@@ -2185,7 +2143,7 @@ static void perform_access(struct kvm_vcpu *vcpu,
        trace_kvm_sys_access(*vcpu_pc(vcpu), params, r);
 
        /* Check for regs disabled by runtime config */
-       if (sysreg_hidden_from_guest(vcpu, r)) {
+       if (sysreg_hidden(vcpu, r)) {
                kvm_inject_undefined(vcpu);
                return;
        }
@@ -2684,7 +2642,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
                return get_invariant_sys_reg(reg->id, uaddr);
 
        /* Check for regs disabled by runtime config */
-       if (sysreg_hidden_from_user(vcpu, r))
+       if (sysreg_hidden(vcpu, r))
                return -ENOENT;
 
        if (r->get_user)
@@ -2709,7 +2667,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
                return set_invariant_sys_reg(reg->id, uaddr);
 
        /* Check for regs disabled by runtime config */
-       if (sysreg_hidden_from_user(vcpu, r))
+       if (sysreg_hidden(vcpu, r))
                return -ENOENT;
 
        if (r->set_user)
@@ -2780,7 +2738,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
        if (!(rd->reg || rd->get_user))
                return 0;
 
-       if (sysreg_hidden_from_user(vcpu, rd))
+       if (sysreg_hidden(vcpu, rd))
                return 0;
 
        if (!copy_reg_to_user(rd, uind))
index 5a6fc30..0f95964 100644 (file)
@@ -59,8 +59,8 @@ struct sys_reg_desc {
                                   const struct sys_reg_desc *rd);
 };
 
-#define REG_HIDDEN_USER                (1 << 0) /* hidden from userspace ioctls */
-#define REG_HIDDEN_GUEST       (1 << 1) /* hidden from guest */
+#define REG_HIDDEN             (1 << 0) /* hidden from userspace and guest */
+#define REG_RAZ                        (1 << 1) /* RAZ from userspace and guest */
 
 static __printf(2, 3)
 inline void print_sys_reg_msg(const struct sys_reg_params *p,
@@ -111,22 +111,22 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r
        __vcpu_sys_reg(vcpu, r->reg) = r->val;
 }
 
-static inline bool sysreg_hidden_from_guest(const struct kvm_vcpu *vcpu,
-                                           const struct sys_reg_desc *r)
+static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
+                                const struct sys_reg_desc *r)
 {
        if (likely(!r->visibility))
                return false;
 
-       return r->visibility(vcpu, r) & REG_HIDDEN_GUEST;
+       return r->visibility(vcpu, r) & REG_HIDDEN;
 }
 
-static inline bool sysreg_hidden_from_user(const struct kvm_vcpu *vcpu,
-                                          const struct sys_reg_desc *r)
+static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
+                                        const struct sys_reg_desc *r)
 {
        if (likely(!r->visibility))
                return false;
 
-       return r->visibility(vcpu, r) & REG_HIDDEN_USER;
+       return r->visibility(vcpu, r) & REG_RAZ;
 }
 
 static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
index 06a278b..d50041f 100644 (file)
@@ -90,6 +90,20 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
        return 0;
 }
 
+void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid_entry2 *best;
+
+       best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+
+       /*
+        * save the feature bitmap to avoid cpuid lookup for every PV
+        * operation
+        */
+       if (best)
+               vcpu->arch.pv_cpuid.features = best->eax;
+}
+
 void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best;
@@ -124,13 +138,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
                (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
                best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
 
-       /*
-        * save the feature bitmap to avoid cpuid lookup for every PV
-        * operation
-        */
-       if (best)
-               vcpu->arch.pv_cpuid.features = best->eax;
-
        if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
                best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
                if (best)
@@ -162,6 +169,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
                vcpu->arch.guest_supported_xcr0 =
                        (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
 
+       kvm_update_pv_runtime(vcpu);
+
        vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
        kvm_mmu_reset_context(vcpu);
 
index bf85779..f7a6e8f 100644 (file)
@@ -11,6 +11,7 @@ extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
 void kvm_set_cpu_caps(void);
 
 void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
+void kvm_update_pv_runtime(struct kvm_vcpu *vcpu);
 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
                                              u32 function, u32 index);
 int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
index 1f96adf..5bb1939 100644 (file)
@@ -856,12 +856,14 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
        } else {
                rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
                desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
-               while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
-                       desc = desc->more;
+               while (desc->sptes[PTE_LIST_EXT-1]) {
                        count += PTE_LIST_EXT;
-               }
-               if (desc->sptes[PTE_LIST_EXT-1]) {
-                       desc->more = mmu_alloc_pte_list_desc(vcpu);
+
+                       if (!desc->more) {
+                               desc->more = mmu_alloc_pte_list_desc(vcpu);
+                               desc = desc->more;
+                               break;
+                       }
                        desc = desc->more;
                }
                for (i = 0; desc->sptes[i]; ++i)
index f5ede41..447edc0 100644 (file)
@@ -255,11 +255,10 @@ static struct kmem_cache *x86_emulator_cache;
 
 /*
  * When called, it means the previous get/set msr reached an invalid msr.
- * Return 0 if we want to ignore/silent this failed msr access, or 1 if we want
- * to fail the caller.
+ * Return true if we want to ignore/silent this failed msr access.
  */
-static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
-                                u64 data, bool write)
+static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
+                                 u64 data, bool write)
 {
        const char *op = write ? "wrmsr" : "rdmsr";
 
@@ -268,11 +267,11 @@ static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
                        kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
                                      op, msr, data);
                /* Mask the error */
-               return 0;
+               return true;
        } else {
                kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
                                      op, msr, data);
-               return -ENOENT;
+               return false;
        }
 }
 
@@ -1416,7 +1415,8 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
        if (r == KVM_MSR_RET_INVALID) {
                /* Unconditionally clear the output for simplicity */
                *data = 0;
-               r = kvm_msr_ignored_check(vcpu, index, 0, false);
+               if (kvm_msr_ignored_check(vcpu, index, 0, false))
+                       r = 0;
        }
 
        if (r)
@@ -1540,7 +1540,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
        struct msr_data msr;
 
        if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
-               return -EPERM;
+               return KVM_MSR_RET_FILTERED;
 
        switch (index) {
        case MSR_FS_BASE:
@@ -1581,7 +1581,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
        int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
 
        if (ret == KVM_MSR_RET_INVALID)
-               ret = kvm_msr_ignored_check(vcpu, index, data, true);
+               if (kvm_msr_ignored_check(vcpu, index, data, true))
+                       ret = 0;
 
        return ret;
 }
@@ -1599,7 +1600,7 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
        int ret;
 
        if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
-               return -EPERM;
+               return KVM_MSR_RET_FILTERED;
 
        msr.index = index;
        msr.host_initiated = host_initiated;
@@ -1618,7 +1619,8 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
        if (ret == KVM_MSR_RET_INVALID) {
                /* Unconditionally clear *data for simplicity */
                *data = 0;
-               ret = kvm_msr_ignored_check(vcpu, index, 0, false);
+               if (kvm_msr_ignored_check(vcpu, index, 0, false))
+                       ret = 0;
        }
 
        return ret;
@@ -1662,9 +1664,9 @@ static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
 static u64 kvm_msr_reason(int r)
 {
        switch (r) {
-       case -ENOENT:
+       case KVM_MSR_RET_INVALID:
                return KVM_MSR_EXIT_REASON_UNKNOWN;
-       case -EPERM:
+       case KVM_MSR_RET_FILTERED:
                return KVM_MSR_EXIT_REASON_FILTER;
        default:
                return KVM_MSR_EXIT_REASON_INVAL;
@@ -1965,7 +1967,7 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
        struct kvm_arch *ka = &vcpu->kvm->arch;
 
        if (vcpu->vcpu_id == 0 && !host_initiated) {
-               if (ka->boot_vcpu_runs_old_kvmclock && old_msr)
+               if (ka->boot_vcpu_runs_old_kvmclock != old_msr)
                        kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
 
                ka->boot_vcpu_runs_old_kvmclock = old_msr;
@@ -3063,9 +3065,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        /* Values other than LBR and BTF are vendor-specific,
                           thus reserved and should throw a #GP */
                        return 1;
-               }
-               vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
-                           __func__, data);
+               } else if (report_ignored_msrs)
+                       vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
+                                   __func__, data);
                break;
        case 0x200 ... 0x2ff:
                return kvm_mtrr_set_msr(vcpu, msr, data);
@@ -3463,29 +3465,63 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                msr_info->data = vcpu->arch.efer;
                break;
        case MSR_KVM_WALL_CLOCK:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
+                       return 1;
+
+               msr_info->data = vcpu->kvm->arch.wall_clock;
+               break;
        case MSR_KVM_WALL_CLOCK_NEW:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
+                       return 1;
+
                msr_info->data = vcpu->kvm->arch.wall_clock;
                break;
        case MSR_KVM_SYSTEM_TIME:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
+                       return 1;
+
+               msr_info->data = vcpu->arch.time;
+               break;
        case MSR_KVM_SYSTEM_TIME_NEW:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
+                       return 1;
+
                msr_info->data = vcpu->arch.time;
                break;
        case MSR_KVM_ASYNC_PF_EN:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+                       return 1;
+
                msr_info->data = vcpu->arch.apf.msr_en_val;
                break;
        case MSR_KVM_ASYNC_PF_INT:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
+                       return 1;
+
                msr_info->data = vcpu->arch.apf.msr_int_val;
                break;
        case MSR_KVM_ASYNC_PF_ACK:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+                       return 1;
+
                msr_info->data = 0;
                break;
        case MSR_KVM_STEAL_TIME:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
+                       return 1;
+
                msr_info->data = vcpu->arch.st.msr_val;
                break;
        case MSR_KVM_PV_EOI_EN:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
+                       return 1;
+
                msr_info->data = vcpu->arch.pv_eoi.msr_val;
                break;
        case MSR_KVM_POLL_CONTROL:
+               if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
+                       return 1;
+
                msr_info->data = vcpu->arch.msr_kvm_poll_control;
                break;
        case MSR_IA32_P5_MC_ADDR:
@@ -4575,6 +4611,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
        case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
                vcpu->arch.pv_cpuid.enforce = cap->args[0];
+               if (vcpu->arch.pv_cpuid.enforce)
+                       kvm_update_pv_runtime(vcpu);
 
                return 0;
 
index 3900ab0..e7ca622 100644 (file)
@@ -376,7 +376,13 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
 int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
 bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
 
-#define  KVM_MSR_RET_INVALID  2
+/*
+ * Internal error codes that are used to indicate that MSR emulation encountered
+ * an error that should result in #GP in the guest, unless userspace
+ * handles it.
+ */
+#define  KVM_MSR_RET_INVALID   2       /* in-kernel MSR emulation #GP condition */
+#define  KVM_MSR_RET_FILTERED  3       /* #GP due to userspace MSR filter */
 
 #define __cr4_reserved_bits(__cpu_has, __c)             \
 ({                                                      \
index d2c2d62..7a2c242 100644 (file)
@@ -1,10 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-only
+/aarch64/get-reg-list
+/aarch64/get-reg-list-sve
 /s390x/memop
 /s390x/resets
 /s390x/sync_regs_test
 /x86_64/cr4_cpuid_sync_test
 /x86_64/debug_regs
 /x86_64/evmcs_test
+/x86_64/kvm_pv_test
 /x86_64/hyperv_cpuid
 /x86_64/mmio_warning_test
 /x86_64/platform_info_test
@@ -24,6 +27,7 @@
 /clear_dirty_log_test
 /demand_paging_test
 /dirty_log_test
+/dirty_log_perf_test
 /kvm_create_max_vcpus
 /set_memory_region_test
 /steal_time
index 30afbad..3d14ef7 100644 (file)
@@ -34,13 +34,14 @@ ifeq ($(ARCH),s390)
 endif
 
 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
-LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
+LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
 LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
 
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
+TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
 TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
@@ -58,14 +59,15 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
 TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
 TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
 TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
-TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
+TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 
-TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
+TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
+TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
@@ -111,14 +113,21 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
 include ../lib.mk
 
 STATIC_LIBS := $(OUTPUT)/libkvm.a
-LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
-EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.*
+LIBKVM_C := $(filter %.c,$(LIBKVM))
+LIBKVM_S := $(filter %.S,$(LIBKVM))
+LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
+LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
+EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
+
+x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
+$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
+       $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
 
-x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
-$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
+$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
        $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
 
-$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
+$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
        $(AR) crs $@ $^
 
 x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
new file mode 100644 (file)
index 0000000..efba766
--- /dev/null
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define REG_LIST_SVE
+#include "get-reg-list.c"
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
new file mode 100644 (file)
index 0000000..33218a3
--- /dev/null
@@ -0,0 +1,841 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * When attempting to migrate from a host with an older kernel to a host
+ * with a newer kernel we allow the newer kernel on the destination to
+ * list new registers with get-reg-list. We assume they'll be unused, at
+ * least until the guest reboots, and so they're relatively harmless.
+ * However, if the destination host with the newer kernel is missing
+ * registers which the source host with the older kernel has, then that's
+ * a regression in get-reg-list. This test checks for that regression by
+ * checking the current list against a blessed list. We should never have
+ * missing registers, but if new ones appear then they can probably be
+ * added to the blessed list. A completely new blessed list can be created
+ * by running the test with the --list command line argument.
+ *
+ * Note, the blessed list should be created from the oldest possible
+ * kernel. We can't go older than v4.15, though, because that's the first
+ * release to expose the ID system registers in KVM_GET_REG_LIST, see
+ * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features
+ * from guests"). Also, one must use the --core-reg-fixup command line
+ * option when running on an older kernel that doesn't include df205b5c6328
+ * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST")
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+#ifdef REG_LIST_SVE
+#define reg_list_sve() (true)
+#else
+#define reg_list_sve() (false)
+#endif
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
+#define for_each_reg(i)                                                                \
+       for ((i) = 0; (i) < reg_list->n; ++(i))
+
+#define for_each_missing_reg(i)                                                        \
+       for ((i) = 0; (i) < blessed_n; ++(i))                                   \
+               if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))
+
+#define for_each_new_reg(i)                                                    \
+       for ((i) = 0; (i) < reg_list->n; ++(i))                                 \
+               if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+
+static struct kvm_reg_list *reg_list;
+
+static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
+static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
+static __u64 *blessed_reg, blessed_n;
+
+static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
+{
+       int i;
+
+       for (i = 0; i < nr_regs; ++i)
+               if (reg == regs[i])
+                       return true;
+       return false;
+}
+
+static const char *str_with_index(const char *template, __u64 index)
+{
+       char *str, *p;
+       int n;
+
+       str = strdup(template);
+       p = strstr(str, "##");
+       n = sprintf(p, "%lld", index);
+       strcat(p + n, strstr(template, "##") + 2);
+
+       return (const char *)str;
+}
+
+#define CORE_REGS_XX_NR_WORDS  2
+#define CORE_SPSR_XX_NR_WORDS  2
+#define CORE_FPREGS_XX_NR_WORDS        4
+
+static const char *core_id_to_str(__u64 id)
+{
+       __u64 core_off = id & ~REG_MASK, idx;
+
+       /*
+        * core_off is the offset into struct kvm_regs
+        */
+       switch (core_off) {
+       case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+            KVM_REG_ARM_CORE_REG(regs.regs[30]):
+               idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
+               TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx);
+               return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
+       case KVM_REG_ARM_CORE_REG(regs.sp):
+               return "KVM_REG_ARM_CORE_REG(regs.sp)";
+       case KVM_REG_ARM_CORE_REG(regs.pc):
+               return "KVM_REG_ARM_CORE_REG(regs.pc)";
+       case KVM_REG_ARM_CORE_REG(regs.pstate):
+               return "KVM_REG_ARM_CORE_REG(regs.pstate)";
+       case KVM_REG_ARM_CORE_REG(sp_el1):
+               return "KVM_REG_ARM_CORE_REG(sp_el1)";
+       case KVM_REG_ARM_CORE_REG(elr_el1):
+               return "KVM_REG_ARM_CORE_REG(elr_el1)";
+       case KVM_REG_ARM_CORE_REG(spsr[0]) ...
+            KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
+               idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
+               TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx);
+               return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
+       case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+            KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+               idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
+               TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx);
+               return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
+       case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+               return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
+       case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+               return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
+       }
+
+       TEST_FAIL("Unknown core reg id: 0x%llx", id);
+       return NULL;
+}
+
+static const char *sve_id_to_str(__u64 id)
+{
+       __u64 sve_off, n, i;
+
+       if (id == KVM_REG_ARM64_SVE_VLS)
+               return "KVM_REG_ARM64_SVE_VLS";
+
+       sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
+       i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
+
+       TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id);
+
+       switch (sve_off) {
+       case KVM_REG_ARM64_SVE_ZREG_BASE ...
+            KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
+               n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
+               TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
+                           "Unexpected bits set in SVE ZREG id: 0x%llx", id);
+               return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
+       case KVM_REG_ARM64_SVE_PREG_BASE ...
+            KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
+               n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
+               TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
+                           "Unexpected bits set in SVE PREG id: 0x%llx", id);
+               return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
+       case KVM_REG_ARM64_SVE_FFR_BASE:
+               TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
+                           "Unexpected bits set in SVE FFR id: 0x%llx", id);
+               return "KVM_REG_ARM64_SVE_FFR(0)";
+       }
+
+       return NULL;
+}
+
+static void print_reg(__u64 id)
+{
+       unsigned op0, op1, crn, crm, op2;
+       const char *reg_size = NULL;
+
+       TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
+                   "KVM_REG_ARM64 missing in reg id: 0x%llx", id);
+
+       switch (id & KVM_REG_SIZE_MASK) {
+       case KVM_REG_SIZE_U8:
+               reg_size = "KVM_REG_SIZE_U8";
+               break;
+       case KVM_REG_SIZE_U16:
+               reg_size = "KVM_REG_SIZE_U16";
+               break;
+       case KVM_REG_SIZE_U32:
+               reg_size = "KVM_REG_SIZE_U32";
+               break;
+       case KVM_REG_SIZE_U64:
+               reg_size = "KVM_REG_SIZE_U64";
+               break;
+       case KVM_REG_SIZE_U128:
+               reg_size = "KVM_REG_SIZE_U128";
+               break;
+       case KVM_REG_SIZE_U256:
+               reg_size = "KVM_REG_SIZE_U256";
+               break;
+       case KVM_REG_SIZE_U512:
+               reg_size = "KVM_REG_SIZE_U512";
+               break;
+       case KVM_REG_SIZE_U1024:
+               reg_size = "KVM_REG_SIZE_U1024";
+               break;
+       case KVM_REG_SIZE_U2048:
+               reg_size = "KVM_REG_SIZE_U2048";
+               break;
+       default:
+               TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx",
+                         (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+       }
+
+       switch (id & KVM_REG_ARM_COPROC_MASK) {
+       case KVM_REG_ARM_CORE:
+               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id));
+               break;
+       case KVM_REG_ARM_DEMUX:
+               TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
+                           "Unexpected bits set in DEMUX reg id: 0x%llx", id);
+               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
+                      reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
+               break;
+       case KVM_REG_ARM64_SYSREG:
+               op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT;
+               op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT;
+               crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT;
+               crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
+               op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
+               TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
+                           "Unexpected bits set in SYSREG reg id: 0x%llx", id);
+               printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
+               break;
+       case KVM_REG_ARM_FW:
+               TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
+                           "Unexpected bits set in FW reg id: 0x%llx", id);
+               printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
+               break;
+       case KVM_REG_ARM64_SVE:
+               if (reg_list_sve())
+                       printf("\t%s,\n", sve_id_to_str(id));
+               else
+                       TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
+               break;
+       default:
+               TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+                         (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+       }
+}
+
+/*
+ * Older kernels listed each 32-bit word of CORE registers separately.
+ * For 64 and 128-bit registers we need to ignore the extra words. We
+ * also need to fixup the sizes, because the older kernels stated all
+ * registers were 64-bit, even when they weren't.
+ */
+static void core_reg_fixup(void)
+{
+       struct kvm_reg_list *tmp;
+       __u64 id, core_off;
+       int i;
+
+       tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64));
+
+       for (i = 0; i < reg_list->n; ++i) {
+               id = reg_list->reg[i];
+
+               if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) {
+                       tmp->reg[tmp->n++] = id;
+                       continue;
+               }
+
+               core_off = id & ~REG_MASK;
+
+               switch (core_off) {
+               case 0x52: case 0xd2: case 0xd6:
+                       /*
+                        * These offsets are pointing at padding.
+                        * We need to ignore them too.
+                        */
+                       continue;
+               case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+                    KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+                       if (core_off & 3)
+                               continue;
+                       id &= ~KVM_REG_SIZE_MASK;
+                       id |= KVM_REG_SIZE_U128;
+                       tmp->reg[tmp->n++] = id;
+                       continue;
+               case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+               case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+                       id &= ~KVM_REG_SIZE_MASK;
+                       id |= KVM_REG_SIZE_U32;
+                       tmp->reg[tmp->n++] = id;
+                       continue;
+               default:
+                       if (core_off & 1)
+                               continue;
+                       tmp->reg[tmp->n++] = id;
+                       break;
+               }
+       }
+
+       free(reg_list);
+       reg_list = tmp;
+}
+
+static void prepare_vcpu_init(struct kvm_vcpu_init *init)
+{
+       if (reg_list_sve())
+               init->features[0] |= 1 << KVM_ARM_VCPU_SVE;
+}
+
+static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+{
+       int feature;
+
+       if (reg_list_sve()) {
+               feature = KVM_ARM_VCPU_SVE;
+               vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+       }
+}
+
+static void check_supported(void)
+{
+       if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) {
+               fprintf(stderr, "SVE not available, skipping tests\n");
+               exit(KSFT_SKIP);
+       }
+}
+
+int main(int ac, char **av)
+{
+       struct kvm_vcpu_init init = { .target = -1, };
+       int new_regs = 0, missing_regs = 0, i;
+       int failed_get = 0, failed_set = 0, failed_reject = 0;
+       bool print_list = false, fixup_core_regs = false;
+       struct kvm_vm *vm;
+       __u64 *vec_regs;
+
+       check_supported();
+
+       for (i = 1; i < ac; ++i) {
+               if (strcmp(av[i], "--core-reg-fixup") == 0)
+                       fixup_core_regs = true;
+               else if (strcmp(av[i], "--list") == 0)
+                       print_list = true;
+               else
+                       fprintf(stderr, "Ignoring unknown option: %s\n", av[i]);
+       }
+
+       vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+       prepare_vcpu_init(&init);
+       aarch64_vcpu_add_default(vm, 0, &init, NULL);
+       finalize_vcpu(vm, 0);
+
+       reg_list = vcpu_get_reg_list(vm, 0);
+
+       if (fixup_core_regs)
+               core_reg_fixup();
+
+       if (print_list) {
+               putchar('\n');
+               for_each_reg(i)
+                       print_reg(reg_list->reg[i]);
+               putchar('\n');
+               return 0;
+       }
+
+       /*
+        * We only test that we can get the register and then write back the
+        * same value. Some registers may allow other values to be written
+        * back, but others only allow some bits to be changed, and at least
+        * for ID registers set will fail if the value does not exactly match
+        * what was returned by get. If registers that allow other values to
+        * be written need to have the other values tested, then we should
+        * create a new set of tests for those in a new independent test
+        * executable.
+        */
+       for_each_reg(i) {
+               uint8_t addr[2048 / 8];
+               struct kvm_one_reg reg = {
+                       .id = reg_list->reg[i],
+                       .addr = (__u64)&addr,
+               };
+               int ret;
+
+               ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
+               if (ret) {
+                       puts("Failed to get ");
+                       print_reg(reg.id);
+                       putchar('\n');
+                       ++failed_get;
+               }
+
+               /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
+               if (find_reg(rejects_set, rejects_set_n, reg.id)) {
+                       ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+                       if (ret != -1 || errno != EPERM) {
+                               printf("Failed to reject (ret=%d, errno=%d) ", ret, errno);
+                               print_reg(reg.id);
+                               putchar('\n');
+                               ++failed_reject;
+                       }
+                       continue;
+               }
+
+               ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+               if (ret) {
+                       puts("Failed to set ");
+                       print_reg(reg.id);
+                       putchar('\n');
+                       ++failed_set;
+               }
+       }
+
+       if (reg_list_sve()) {
+               blessed_n = base_regs_n + sve_regs_n;
+               vec_regs = sve_regs;
+       } else {
+               blessed_n = base_regs_n + vregs_n;
+               vec_regs = vregs;
+       }
+
+       blessed_reg = calloc(blessed_n, sizeof(__u64));
+       for (i = 0; i < base_regs_n; ++i)
+               blessed_reg[i] = base_regs[i];
+       for (i = 0; i < blessed_n - base_regs_n; ++i)
+               blessed_reg[base_regs_n + i] = vec_regs[i];
+
+       for_each_new_reg(i)
+               ++new_regs;
+
+       for_each_missing_reg(i)
+               ++missing_regs;
+
+       if (new_regs || missing_regs) {
+               printf("Number blessed registers: %5lld\n", blessed_n);
+               printf("Number registers:         %5lld\n", reg_list->n);
+       }
+
+       if (new_regs) {
+               printf("\nThere are %d new registers.\n"
+                      "Consider adding them to the blessed reg "
+                      "list with the following lines:\n\n", new_regs);
+               for_each_new_reg(i)
+                       print_reg(reg_list->reg[i]);
+               putchar('\n');
+       }
+
+       if (missing_regs) {
+               printf("\nThere are %d missing registers.\n"
+                      "The following lines are missing registers:\n\n", missing_regs);
+               for_each_missing_reg(i)
+                       print_reg(blessed_reg[i]);
+               putchar('\n');
+       }
+
+       TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
+                   "There are %d missing registers; "
+                   "%d registers failed get; %d registers failed set; %d registers failed reject",
+                   missing_regs, failed_get, failed_set, failed_reject);
+
+       return 0;
+}
+
+/*
+ * The current blessed list was primed with the output of kernel version
+ * v4.15 with --core-reg-fixup and then later updated with new registers.
+ */
+static __u64 base_regs[] = {
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
+       KVM_REG_ARM_FW_REG(0),
+       KVM_REG_ARM_FW_REG(1),
+       KVM_REG_ARM_FW_REG(2),
+       ARM64_SYS_REG(3, 3, 14, 3, 1),  /* CNTV_CTL_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 3, 2),  /* CNTV_CVAL_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 0, 2),
+       ARM64_SYS_REG(3, 0, 0, 0, 0),   /* MIDR_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 0, 6),   /* REVIDR_EL1 */
+       ARM64_SYS_REG(3, 1, 0, 0, 1),   /* CLIDR_EL1 */
+       ARM64_SYS_REG(3, 1, 0, 0, 7),   /* AIDR_EL1 */
+       ARM64_SYS_REG(3, 3, 0, 0, 1),   /* CTR_EL0 */
+       ARM64_SYS_REG(2, 0, 0, 0, 4),
+       ARM64_SYS_REG(2, 0, 0, 0, 5),
+       ARM64_SYS_REG(2, 0, 0, 0, 6),
+       ARM64_SYS_REG(2, 0, 0, 0, 7),
+       ARM64_SYS_REG(2, 0, 0, 1, 4),
+       ARM64_SYS_REG(2, 0, 0, 1, 5),
+       ARM64_SYS_REG(2, 0, 0, 1, 6),
+       ARM64_SYS_REG(2, 0, 0, 1, 7),
+       ARM64_SYS_REG(2, 0, 0, 2, 0),   /* MDCCINT_EL1 */
+       ARM64_SYS_REG(2, 0, 0, 2, 2),   /* MDSCR_EL1 */
+       ARM64_SYS_REG(2, 0, 0, 2, 4),
+       ARM64_SYS_REG(2, 0, 0, 2, 5),
+       ARM64_SYS_REG(2, 0, 0, 2, 6),
+       ARM64_SYS_REG(2, 0, 0, 2, 7),
+       ARM64_SYS_REG(2, 0, 0, 3, 4),
+       ARM64_SYS_REG(2, 0, 0, 3, 5),
+       ARM64_SYS_REG(2, 0, 0, 3, 6),
+       ARM64_SYS_REG(2, 0, 0, 3, 7),
+       ARM64_SYS_REG(2, 0, 0, 4, 4),
+       ARM64_SYS_REG(2, 0, 0, 4, 5),
+       ARM64_SYS_REG(2, 0, 0, 4, 6),
+       ARM64_SYS_REG(2, 0, 0, 4, 7),
+       ARM64_SYS_REG(2, 0, 0, 5, 4),
+       ARM64_SYS_REG(2, 0, 0, 5, 5),
+       ARM64_SYS_REG(2, 0, 0, 5, 6),
+       ARM64_SYS_REG(2, 0, 0, 5, 7),
+       ARM64_SYS_REG(2, 0, 0, 6, 4),
+       ARM64_SYS_REG(2, 0, 0, 6, 5),
+       ARM64_SYS_REG(2, 0, 0, 6, 6),
+       ARM64_SYS_REG(2, 0, 0, 6, 7),
+       ARM64_SYS_REG(2, 0, 0, 7, 4),
+       ARM64_SYS_REG(2, 0, 0, 7, 5),
+       ARM64_SYS_REG(2, 0, 0, 7, 6),
+       ARM64_SYS_REG(2, 0, 0, 7, 7),
+       ARM64_SYS_REG(2, 0, 0, 8, 4),
+       ARM64_SYS_REG(2, 0, 0, 8, 5),
+       ARM64_SYS_REG(2, 0, 0, 8, 6),
+       ARM64_SYS_REG(2, 0, 0, 8, 7),
+       ARM64_SYS_REG(2, 0, 0, 9, 4),
+       ARM64_SYS_REG(2, 0, 0, 9, 5),
+       ARM64_SYS_REG(2, 0, 0, 9, 6),
+       ARM64_SYS_REG(2, 0, 0, 9, 7),
+       ARM64_SYS_REG(2, 0, 0, 10, 4),
+       ARM64_SYS_REG(2, 0, 0, 10, 5),
+       ARM64_SYS_REG(2, 0, 0, 10, 6),
+       ARM64_SYS_REG(2, 0, 0, 10, 7),
+       ARM64_SYS_REG(2, 0, 0, 11, 4),
+       ARM64_SYS_REG(2, 0, 0, 11, 5),
+       ARM64_SYS_REG(2, 0, 0, 11, 6),
+       ARM64_SYS_REG(2, 0, 0, 11, 7),
+       ARM64_SYS_REG(2, 0, 0, 12, 4),
+       ARM64_SYS_REG(2, 0, 0, 12, 5),
+       ARM64_SYS_REG(2, 0, 0, 12, 6),
+       ARM64_SYS_REG(2, 0, 0, 12, 7),
+       ARM64_SYS_REG(2, 0, 0, 13, 4),
+       ARM64_SYS_REG(2, 0, 0, 13, 5),
+       ARM64_SYS_REG(2, 0, 0, 13, 6),
+       ARM64_SYS_REG(2, 0, 0, 13, 7),
+       ARM64_SYS_REG(2, 0, 0, 14, 4),
+       ARM64_SYS_REG(2, 0, 0, 14, 5),
+       ARM64_SYS_REG(2, 0, 0, 14, 6),
+       ARM64_SYS_REG(2, 0, 0, 14, 7),
+       ARM64_SYS_REG(2, 0, 0, 15, 4),
+       ARM64_SYS_REG(2, 0, 0, 15, 5),
+       ARM64_SYS_REG(2, 0, 0, 15, 6),
+       ARM64_SYS_REG(2, 0, 0, 15, 7),
+       ARM64_SYS_REG(2, 4, 0, 7, 0),   /* DBGVCR32_EL2 */
+       ARM64_SYS_REG(3, 0, 0, 0, 5),   /* MPIDR_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 0),   /* ID_PFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 1),   /* ID_PFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 2),   /* ID_DFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 3),   /* ID_AFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 4),   /* ID_MMFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 5),   /* ID_MMFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 6),   /* ID_MMFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 7),   /* ID_MMFR3_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 0),   /* ID_ISAR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 1),   /* ID_ISAR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 2),   /* ID_ISAR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 3),   /* ID_ISAR3_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 4),   /* ID_ISAR4_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 5),   /* ID_ISAR5_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 6),   /* ID_MMFR4_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 7),   /* ID_ISAR6_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 0),   /* MVFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 1),   /* MVFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 2),   /* MVFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 3),
+       ARM64_SYS_REG(3, 0, 0, 3, 4),   /* ID_PFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 5),   /* ID_DFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 6),   /* ID_MMFR5_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 7),
+       ARM64_SYS_REG(3, 0, 0, 4, 0),   /* ID_AA64PFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 1),   /* ID_AA64PFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 2),
+       ARM64_SYS_REG(3, 0, 0, 4, 3),
+       ARM64_SYS_REG(3, 0, 0, 4, 4),   /* ID_AA64ZFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 5),
+       ARM64_SYS_REG(3, 0, 0, 4, 6),
+       ARM64_SYS_REG(3, 0, 0, 4, 7),
+       ARM64_SYS_REG(3, 0, 0, 5, 0),   /* ID_AA64DFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 5, 1),   /* ID_AA64DFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 5, 2),
+       ARM64_SYS_REG(3, 0, 0, 5, 3),
+       ARM64_SYS_REG(3, 0, 0, 5, 4),   /* ID_AA64AFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 5, 5),   /* ID_AA64AFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 5, 6),
+       ARM64_SYS_REG(3, 0, 0, 5, 7),
+       ARM64_SYS_REG(3, 0, 0, 6, 0),   /* ID_AA64ISAR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 6, 1),   /* ID_AA64ISAR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 6, 2),
+       ARM64_SYS_REG(3, 0, 0, 6, 3),
+       ARM64_SYS_REG(3, 0, 0, 6, 4),
+       ARM64_SYS_REG(3, 0, 0, 6, 5),
+       ARM64_SYS_REG(3, 0, 0, 6, 6),
+       ARM64_SYS_REG(3, 0, 0, 6, 7),
+       ARM64_SYS_REG(3, 0, 0, 7, 0),   /* ID_AA64MMFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 1),   /* ID_AA64MMFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 2),   /* ID_AA64MMFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 3),
+       ARM64_SYS_REG(3, 0, 0, 7, 4),
+       ARM64_SYS_REG(3, 0, 0, 7, 5),
+       ARM64_SYS_REG(3, 0, 0, 7, 6),
+       ARM64_SYS_REG(3, 0, 0, 7, 7),
+       ARM64_SYS_REG(3, 0, 1, 0, 0),   /* SCTLR_EL1 */
+       ARM64_SYS_REG(3, 0, 1, 0, 1),   /* ACTLR_EL1 */
+       ARM64_SYS_REG(3, 0, 1, 0, 2),   /* CPACR_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 0, 0),   /* TTBR0_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 0, 1),   /* TTBR1_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 0, 2),   /* TCR_EL1 */
+       ARM64_SYS_REG(3, 0, 5, 1, 0),   /* AFSR0_EL1 */
+       ARM64_SYS_REG(3, 0, 5, 1, 1),   /* AFSR1_EL1 */
+       ARM64_SYS_REG(3, 0, 5, 2, 0),   /* ESR_EL1 */
+       ARM64_SYS_REG(3, 0, 6, 0, 0),   /* FAR_EL1 */
+       ARM64_SYS_REG(3, 0, 7, 4, 0),   /* PAR_EL1 */
+       ARM64_SYS_REG(3, 0, 9, 14, 1),  /* PMINTENSET_EL1 */
+       ARM64_SYS_REG(3, 0, 9, 14, 2),  /* PMINTENCLR_EL1 */
+       ARM64_SYS_REG(3, 0, 10, 2, 0),  /* MAIR_EL1 */
+       ARM64_SYS_REG(3, 0, 10, 3, 0),  /* AMAIR_EL1 */
+       ARM64_SYS_REG(3, 0, 12, 0, 0),  /* VBAR_EL1 */
+       ARM64_SYS_REG(3, 0, 12, 1, 1),  /* DISR_EL1 */
+       ARM64_SYS_REG(3, 0, 13, 0, 1),  /* CONTEXTIDR_EL1 */
+       ARM64_SYS_REG(3, 0, 13, 0, 4),  /* TPIDR_EL1 */
+       ARM64_SYS_REG(3, 0, 14, 1, 0),  /* CNTKCTL_EL1 */
+       ARM64_SYS_REG(3, 2, 0, 0, 0),   /* CSSELR_EL1 */
+       ARM64_SYS_REG(3, 3, 9, 12, 0),  /* PMCR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 1),  /* PMCNTENSET_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 2),  /* PMCNTENCLR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 3),  /* PMOVSCLR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 4),  /* PMSWINC_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 5),  /* PMSELR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 13, 0),  /* PMCCNTR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 14, 0),  /* PMUSERENR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 14, 3),  /* PMOVSSET_EL0 */
+       ARM64_SYS_REG(3, 3, 13, 0, 2),  /* TPIDR_EL0 */
+       ARM64_SYS_REG(3, 3, 13, 0, 3),  /* TPIDRRO_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 8, 0),
+       ARM64_SYS_REG(3, 3, 14, 8, 1),
+       ARM64_SYS_REG(3, 3, 14, 8, 2),
+       ARM64_SYS_REG(3, 3, 14, 8, 3),
+       ARM64_SYS_REG(3, 3, 14, 8, 4),
+       ARM64_SYS_REG(3, 3, 14, 8, 5),
+       ARM64_SYS_REG(3, 3, 14, 8, 6),
+       ARM64_SYS_REG(3, 3, 14, 8, 7),
+       ARM64_SYS_REG(3, 3, 14, 9, 0),
+       ARM64_SYS_REG(3, 3, 14, 9, 1),
+       ARM64_SYS_REG(3, 3, 14, 9, 2),
+       ARM64_SYS_REG(3, 3, 14, 9, 3),
+       ARM64_SYS_REG(3, 3, 14, 9, 4),
+       ARM64_SYS_REG(3, 3, 14, 9, 5),
+       ARM64_SYS_REG(3, 3, 14, 9, 6),
+       ARM64_SYS_REG(3, 3, 14, 9, 7),
+       ARM64_SYS_REG(3, 3, 14, 10, 0),
+       ARM64_SYS_REG(3, 3, 14, 10, 1),
+       ARM64_SYS_REG(3, 3, 14, 10, 2),
+       ARM64_SYS_REG(3, 3, 14, 10, 3),
+       ARM64_SYS_REG(3, 3, 14, 10, 4),
+       ARM64_SYS_REG(3, 3, 14, 10, 5),
+       ARM64_SYS_REG(3, 3, 14, 10, 6),
+       ARM64_SYS_REG(3, 3, 14, 10, 7),
+       ARM64_SYS_REG(3, 3, 14, 11, 0),
+       ARM64_SYS_REG(3, 3, 14, 11, 1),
+       ARM64_SYS_REG(3, 3, 14, 11, 2),
+       ARM64_SYS_REG(3, 3, 14, 11, 3),
+       ARM64_SYS_REG(3, 3, 14, 11, 4),
+       ARM64_SYS_REG(3, 3, 14, 11, 5),
+       ARM64_SYS_REG(3, 3, 14, 11, 6),
+       ARM64_SYS_REG(3, 3, 14, 12, 0),
+       ARM64_SYS_REG(3, 3, 14, 12, 1),
+       ARM64_SYS_REG(3, 3, 14, 12, 2),
+       ARM64_SYS_REG(3, 3, 14, 12, 3),
+       ARM64_SYS_REG(3, 3, 14, 12, 4),
+       ARM64_SYS_REG(3, 3, 14, 12, 5),
+       ARM64_SYS_REG(3, 3, 14, 12, 6),
+       ARM64_SYS_REG(3, 3, 14, 12, 7),
+       ARM64_SYS_REG(3, 3, 14, 13, 0),
+       ARM64_SYS_REG(3, 3, 14, 13, 1),
+       ARM64_SYS_REG(3, 3, 14, 13, 2),
+       ARM64_SYS_REG(3, 3, 14, 13, 3),
+       ARM64_SYS_REG(3, 3, 14, 13, 4),
+       ARM64_SYS_REG(3, 3, 14, 13, 5),
+       ARM64_SYS_REG(3, 3, 14, 13, 6),
+       ARM64_SYS_REG(3, 3, 14, 13, 7),
+       ARM64_SYS_REG(3, 3, 14, 14, 0),
+       ARM64_SYS_REG(3, 3, 14, 14, 1),
+       ARM64_SYS_REG(3, 3, 14, 14, 2),
+       ARM64_SYS_REG(3, 3, 14, 14, 3),
+       ARM64_SYS_REG(3, 3, 14, 14, 4),
+       ARM64_SYS_REG(3, 3, 14, 14, 5),
+       ARM64_SYS_REG(3, 3, 14, 14, 6),
+       ARM64_SYS_REG(3, 3, 14, 14, 7),
+       ARM64_SYS_REG(3, 3, 14, 15, 0),
+       ARM64_SYS_REG(3, 3, 14, 15, 1),
+       ARM64_SYS_REG(3, 3, 14, 15, 2),
+       ARM64_SYS_REG(3, 3, 14, 15, 3),
+       ARM64_SYS_REG(3, 3, 14, 15, 4),
+       ARM64_SYS_REG(3, 3, 14, 15, 5),
+       ARM64_SYS_REG(3, 3, 14, 15, 6),
+       ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
+       ARM64_SYS_REG(3, 4, 3, 0, 0),   /* DACR32_EL2 */
+       ARM64_SYS_REG(3, 4, 5, 0, 1),   /* IFSR32_EL2 */
+       ARM64_SYS_REG(3, 4, 5, 3, 0),   /* FPEXC32_EL2 */
+       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0,
+       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1,
+       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2,
+};
+static __u64 base_regs_n = ARRAY_SIZE(base_regs);
+
+static __u64 vregs[] = {
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
+};
+static __u64 vregs_n = ARRAY_SIZE(vregs);
+
+static __u64 sve_regs[] = {
+       KVM_REG_ARM64_SVE_VLS,
+       KVM_REG_ARM64_SVE_ZREG(0, 0),
+       KVM_REG_ARM64_SVE_ZREG(1, 0),
+       KVM_REG_ARM64_SVE_ZREG(2, 0),
+       KVM_REG_ARM64_SVE_ZREG(3, 0),
+       KVM_REG_ARM64_SVE_ZREG(4, 0),
+       KVM_REG_ARM64_SVE_ZREG(5, 0),
+       KVM_REG_ARM64_SVE_ZREG(6, 0),
+       KVM_REG_ARM64_SVE_ZREG(7, 0),
+       KVM_REG_ARM64_SVE_ZREG(8, 0),
+       KVM_REG_ARM64_SVE_ZREG(9, 0),
+       KVM_REG_ARM64_SVE_ZREG(10, 0),
+       KVM_REG_ARM64_SVE_ZREG(11, 0),
+       KVM_REG_ARM64_SVE_ZREG(12, 0),
+       KVM_REG_ARM64_SVE_ZREG(13, 0),
+       KVM_REG_ARM64_SVE_ZREG(14, 0),
+       KVM_REG_ARM64_SVE_ZREG(15, 0),
+       KVM_REG_ARM64_SVE_ZREG(16, 0),
+       KVM_REG_ARM64_SVE_ZREG(17, 0),
+       KVM_REG_ARM64_SVE_ZREG(18, 0),
+       KVM_REG_ARM64_SVE_ZREG(19, 0),
+       KVM_REG_ARM64_SVE_ZREG(20, 0),
+       KVM_REG_ARM64_SVE_ZREG(21, 0),
+       KVM_REG_ARM64_SVE_ZREG(22, 0),
+       KVM_REG_ARM64_SVE_ZREG(23, 0),
+       KVM_REG_ARM64_SVE_ZREG(24, 0),
+       KVM_REG_ARM64_SVE_ZREG(25, 0),
+       KVM_REG_ARM64_SVE_ZREG(26, 0),
+       KVM_REG_ARM64_SVE_ZREG(27, 0),
+       KVM_REG_ARM64_SVE_ZREG(28, 0),
+       KVM_REG_ARM64_SVE_ZREG(29, 0),
+       KVM_REG_ARM64_SVE_ZREG(30, 0),
+       KVM_REG_ARM64_SVE_ZREG(31, 0),
+       KVM_REG_ARM64_SVE_PREG(0, 0),
+       KVM_REG_ARM64_SVE_PREG(1, 0),
+       KVM_REG_ARM64_SVE_PREG(2, 0),
+       KVM_REG_ARM64_SVE_PREG(3, 0),
+       KVM_REG_ARM64_SVE_PREG(4, 0),
+       KVM_REG_ARM64_SVE_PREG(5, 0),
+       KVM_REG_ARM64_SVE_PREG(6, 0),
+       KVM_REG_ARM64_SVE_PREG(7, 0),
+       KVM_REG_ARM64_SVE_PREG(8, 0),
+       KVM_REG_ARM64_SVE_PREG(9, 0),
+       KVM_REG_ARM64_SVE_PREG(10, 0),
+       KVM_REG_ARM64_SVE_PREG(11, 0),
+       KVM_REG_ARM64_SVE_PREG(12, 0),
+       KVM_REG_ARM64_SVE_PREG(13, 0),
+       KVM_REG_ARM64_SVE_PREG(14, 0),
+       KVM_REG_ARM64_SVE_PREG(15, 0),
+       KVM_REG_ARM64_SVE_FFR(0),
+       ARM64_SYS_REG(3, 0, 1, 2, 0),   /* ZCR_EL1 */
+};
+static __u64 sve_regs_n = ARRAY_SIZE(sve_regs);
+
+static __u64 rejects_set[] = {
+#ifdef REG_LIST_SVE
+       KVM_REG_ARM64_SVE_VLS,
+#endif
+};
+static __u64 rejects_set_n = ARRAY_SIZE(rejects_set);
diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c
deleted file mode 100644 (file)
index 11672ec..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#define USE_CLEAR_DIRTY_LOG
-#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
-#define KVM_DIRTY_LOG_INITIALLY_SET         (1 << 1)
-#define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
-               KVM_DIRTY_LOG_INITIALLY_SET)
-#include "dirty_log_test.c"
index 360cd3e..3d96a7b 100644 (file)
 #include <linux/bitops.h>
 #include <linux/userfaultfd.h>
 
-#include "test_util.h"
-#include "kvm_util.h"
+#include "perf_test_util.h"
 #include "processor.h"
+#include "test_util.h"
 
 #ifdef __NR_userfaultfd
 
-/* The memory slot index demand page */
-#define TEST_MEM_SLOT_INDEX            1
-
-/* Default guest test virtual memory offset */
-#define DEFAULT_GUEST_TEST_MEM         0xc0000000
-
-#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */
-
 #ifdef PRINT_PER_PAGE_UPDATES
 #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
 #else
 #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
 #endif
 
-#define MAX_VCPUS 512
-
-/*
- * Guest/Host shared variables. Ensure addr_gva2hva() and/or
- * sync_global_to/from_guest() are used when accessing from
- * the host. READ/WRITE_ONCE() should also be used with anything
- * that may change.
- */
-static uint64_t host_page_size;
-static uint64_t guest_page_size;
-
 static char *guest_data_prototype;
 
-/*
- * Guest physical memory offset of the testing memory slot.
- * This will be set to the topmost valid physical address minus
- * the test memory size.
- */
-static uint64_t guest_test_phys_mem;
-
-/*
- * Guest virtual memory offset of the testing memory slot.
- * Must not conflict with identity mapped test code.
- */
-static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
-
-struct vcpu_args {
-       uint64_t gva;
-       uint64_t pages;
-
-       /* Only used by the host userspace part of the vCPU thread */
-       int vcpu_id;
-       struct kvm_vm *vm;
-};
-
-static struct vcpu_args vcpu_args[MAX_VCPUS];
-
-/*
- * Continuously write to the first 8 bytes of each page in the demand paging
- * memory region.
- */
-static void guest_code(uint32_t vcpu_id)
-{
-       uint64_t gva;
-       uint64_t pages;
-       int i;
-
-       /* Make sure vCPU args data structure is not corrupt. */
-       GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id);
-
-       gva = vcpu_args[vcpu_id].gva;
-       pages = vcpu_args[vcpu_id].pages;
-
-       for (i = 0; i < pages; i++) {
-               uint64_t addr = gva + (i * guest_page_size);
-
-               addr &= ~(host_page_size - 1);
-               *(uint64_t *)addr = 0x0123456789ABCDEF;
-       }
-
-       GUEST_SYNC(1);
-}
-
 static void *vcpu_worker(void *data)
 {
        int ret;
-       struct vcpu_args *args = (struct vcpu_args *)data;
-       struct kvm_vm *vm = args->vm;
-       int vcpu_id = args->vcpu_id;
+       struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
+       int vcpu_id = vcpu_args->vcpu_id;
+       struct kvm_vm *vm = perf_test_args.vm;
        struct kvm_run *run;
-       struct timespec start, end, ts_diff;
+       struct timespec start;
+       struct timespec ts_diff;
 
        vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
        run = vcpu_state(vm, vcpu_id);
@@ -133,52 +65,18 @@ static void *vcpu_worker(void *data)
                            exit_reason_str(run->exit_reason));
        }
 
-       clock_gettime(CLOCK_MONOTONIC, &end);
-       ts_diff = timespec_sub(end, start);
+       ts_diff = timespec_diff_now(start);
        PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
                       ts_diff.tv_sec, ts_diff.tv_nsec);
 
        return NULL;
 }
 
-#define PAGE_SHIFT_4K  12
-#define PTES_PER_4K_PT 512
-
-static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
-                               uint64_t vcpu_memory_bytes)
-{
-       struct kvm_vm *vm;
-       uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
-
-       /* Account for a few pages per-vCPU for stacks */
-       pages += DEFAULT_STACK_PGS * vcpus;
-
-       /*
-        * Reserve twice the ammount of memory needed to map the test region and
-        * the page table / stacks region, at 4k, for page tables. Do the
-        * calculation with 4K page size: the smallest of all archs. (e.g., 64K
-        * page size guest will need even less memory for page tables).
-        */
-       pages += (2 * pages) / PTES_PER_4K_PT;
-       pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
-                PTES_PER_4K_PT;
-       pages = vm_adjust_num_guest_pages(mode, pages);
-
-       pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
-
-       vm = _vm_create(mode, pages, O_RDWR);
-       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-#ifdef __x86_64__
-       vm_create_irqchip(vm);
-#endif
-       return vm;
-}
-
 static int handle_uffd_page_request(int uffd, uint64_t addr)
 {
        pid_t tid;
        struct timespec start;
-       struct timespec end;
+       struct timespec ts_diff;
        struct uffdio_copy copy;
        int r;
 
@@ -186,7 +84,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
 
        copy.src = (uint64_t)guest_data_prototype;
        copy.dst = addr;
-       copy.len = host_page_size;
+       copy.len = perf_test_args.host_page_size;
        copy.mode = 0;
 
        clock_gettime(CLOCK_MONOTONIC, &start);
@@ -198,12 +96,12 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
                return r;
        }
 
-       clock_gettime(CLOCK_MONOTONIC, &end);
+       ts_diff = timespec_diff_now(start);
 
        PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
-                      timespec_to_ns(timespec_sub(end, start)));
+                      timespec_to_ns(ts_diff));
        PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
-                      host_page_size, addr, tid);
+                      perf_test_args.host_page_size, addr, tid);
 
        return 0;
 }
@@ -223,7 +121,8 @@ static void *uffd_handler_thread_fn(void *arg)
        int pipefd = uffd_args->pipefd;
        useconds_t delay = uffd_args->delay;
        int64_t pages = 0;
-       struct timespec start, end, ts_diff;
+       struct timespec start;
+       struct timespec ts_diff;
 
        clock_gettime(CLOCK_MONOTONIC, &start);
        while (!quit_uffd_thread) {
@@ -292,8 +191,7 @@ static void *uffd_handler_thread_fn(void *arg)
                pages++;
        }
 
-       clock_gettime(CLOCK_MONOTONIC, &end);
-       ts_diff = timespec_sub(end, start);
+       ts_diff = timespec_diff_now(start);
        PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
                       pages, ts_diff.tv_sec, ts_diff.tv_nsec,
                       pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
@@ -351,99 +249,54 @@ static int setup_demand_paging(struct kvm_vm *vm,
 }
 
 static void run_test(enum vm_guest_mode mode, bool use_uffd,
-                    useconds_t uffd_delay, int vcpus,
-                    uint64_t vcpu_memory_bytes)
+                    useconds_t uffd_delay)
 {
        pthread_t *vcpu_threads;
        pthread_t *uffd_handler_threads = NULL;
        struct uffd_handler_args *uffd_args = NULL;
-       struct timespec start, end, ts_diff;
+       struct timespec start;
+       struct timespec ts_diff;
        int *pipefds = NULL;
        struct kvm_vm *vm;
-       uint64_t guest_num_pages;
        int vcpu_id;
        int r;
 
-       vm = create_vm(mode, vcpus, vcpu_memory_bytes);
-
-       guest_page_size = vm_get_page_size(vm);
-
-       TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0,
-                   "Guest memory size is not guest page size aligned.");
-
-       guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size;
-       guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
-
-       /*
-        * If there should be more memory in the guest test region than there
-        * can be pages in the guest, it will definitely cause problems.
-        */
-       TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
-                   "Requested more guest memory than address space allows.\n"
-                   "    guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
-                   guest_num_pages, vm_get_max_gfn(vm), vcpus,
-                   vcpu_memory_bytes);
-
-       host_page_size = getpagesize();
-       TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0,
-                   "Guest memory size is not host page size aligned.");
+       vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
 
-       guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
-                             guest_page_size;
-       guest_test_phys_mem &= ~(host_page_size - 1);
+       perf_test_args.wr_fract = 1;
 
-#ifdef __s390x__
-       /* Align to 1M (segment size) */
-       guest_test_phys_mem &= ~((1 << 20) - 1);
-#endif
-
-       pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
-
-       /* Add an extra memory slot for testing demand paging */
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   guest_test_phys_mem,
-                                   TEST_MEM_SLOT_INDEX,
-                                   guest_num_pages, 0);
-
-       /* Do mapping for the demand paging memory slot */
-       virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
-
-       ucall_init(vm, NULL);
-
-       guest_data_prototype = malloc(host_page_size);
+       guest_data_prototype = malloc(perf_test_args.host_page_size);
        TEST_ASSERT(guest_data_prototype,
                    "Failed to allocate buffer for guest data pattern");
-       memset(guest_data_prototype, 0xAB, host_page_size);
+       memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size);
 
-       vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads));
+       vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
        TEST_ASSERT(vcpu_threads, "Memory allocation failed");
 
+       add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+
        if (use_uffd) {
                uffd_handler_threads =
-                       malloc(vcpus * sizeof(*uffd_handler_threads));
+                       malloc(nr_vcpus * sizeof(*uffd_handler_threads));
                TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
 
-               uffd_args = malloc(vcpus * sizeof(*uffd_args));
+               uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
                TEST_ASSERT(uffd_args, "Memory allocation failed");
 
-               pipefds = malloc(sizeof(int) * vcpus * 2);
+               pipefds = malloc(sizeof(int) * nr_vcpus * 2);
                TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
-       }
-
-       for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
-               vm_paddr_t vcpu_gpa;
-               void *vcpu_hva;
 
-               vm_vcpu_add_default(vm, vcpu_id, guest_code);
+               for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+                       vm_paddr_t vcpu_gpa;
+                       void *vcpu_hva;
 
-               vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
-               PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
-                              vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+                       vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size);
+                       PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+                                      vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size);
 
-               /* Cache the HVA pointer of the region */
-               vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
+                       /* Cache the HVA pointer of the region */
+                       vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
 
-               if (use_uffd) {
                        /*
                         * Set up user fault fd to handle demand paging
                         * requests.
@@ -456,53 +309,41 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
                                                &uffd_handler_threads[vcpu_id],
                                                pipefds[vcpu_id * 2],
                                                uffd_delay, &uffd_args[vcpu_id],
-                                               vcpu_hva, vcpu_memory_bytes);
+                                               vcpu_hva, guest_percpu_mem_size);
                        if (r < 0)
                                exit(-r);
                }
-
-#ifdef __x86_64__
-               vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
-#endif
-
-               vcpu_args[vcpu_id].vm = vm;
-               vcpu_args[vcpu_id].vcpu_id = vcpu_id;
-               vcpu_args[vcpu_id].gva = guest_test_virt_mem +
-                                        (vcpu_id * vcpu_memory_bytes);
-               vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size;
        }
 
        /* Export the shared variables to the guest */
-       sync_global_to_guest(vm, host_page_size);
-       sync_global_to_guest(vm, guest_page_size);
-       sync_global_to_guest(vm, vcpu_args);
+       sync_global_to_guest(vm, perf_test_args);
 
        pr_info("Finished creating vCPUs and starting uffd threads\n");
 
        clock_gettime(CLOCK_MONOTONIC, &start);
 
-       for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
                pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
-                              &vcpu_args[vcpu_id]);
+                              &perf_test_args.vcpu_args[vcpu_id]);
        }
 
        pr_info("Started all vCPUs\n");
 
        /* Wait for the vcpu threads to quit */
-       for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
                pthread_join(vcpu_threads[vcpu_id], NULL);
                PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
        }
 
-       pr_info("All vCPU threads joined\n");
+       ts_diff = timespec_diff_now(start);
 
-       clock_gettime(CLOCK_MONOTONIC, &end);
+       pr_info("All vCPU threads joined\n");
 
        if (use_uffd) {
                char c;
 
                /* Tell the user fault fd handler threads to quit */
-               for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+               for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
                        r = write(pipefds[vcpu_id * 2 + 1], &c, 1);
                        TEST_ASSERT(r == 1, "Unable to write to pipefd");
 
@@ -510,11 +351,11 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
                }
        }
 
-       ts_diff = timespec_sub(end, start);
        pr_info("Total guest execution time: %ld.%.9lds\n",
                ts_diff.tv_sec, ts_diff.tv_nsec);
        pr_info("Overall demand paging rate: %f pgs/sec\n",
-               guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+               perf_test_args.vcpu_args[0].pages * nr_vcpus /
+               ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
 
        ucall_uninit(vm);
        kvm_vm_free(vm);
@@ -568,9 +409,8 @@ static void help(char *name)
 
 int main(int argc, char *argv[])
 {
+       int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
        bool mode_selected = false;
-       uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE;
-       int vcpus = 1;
        unsigned int mode;
        int opt, i;
        bool use_uffd = false;
@@ -619,15 +459,12 @@ int main(int argc, char *argv[])
                                    "A negative UFFD delay is not supported.");
                        break;
                case 'b':
-                       vcpu_memory_bytes = parse_size(optarg);
+                       guest_percpu_mem_size = parse_size(optarg);
                        break;
                case 'v':
-                       vcpus = atoi(optarg);
-                       TEST_ASSERT(vcpus > 0,
-                                   "Must have a positive number of vCPUs");
-                       TEST_ASSERT(vcpus <= MAX_VCPUS,
-                                   "This test does not currently support\n"
-                                   "more than %d vCPUs.", MAX_VCPUS);
+                       nr_vcpus = atoi(optarg);
+                       TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+                                   "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
                        break;
                case 'h':
                default:
@@ -642,7 +479,7 @@ int main(int argc, char *argv[])
                TEST_ASSERT(guest_modes[i].supported,
                            "Guest mode ID %d (%s) not supported.",
                            i, vm_guest_mode_string(i));
-               run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes);
+               run_test(i, use_uffd, uffd_delay);
        }
 
        return 0;
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
new file mode 100644 (file)
index 0000000..85c9b8f
--- /dev/null
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging performance test
+ *
+ * Based on dirty_log_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2020, Google, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "kvm_util.h"
+#include "perf_test_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
+#define TEST_HOST_LOOP_N               2UL
+
+/* Host variables */
+static bool host_quit;
+static uint64_t iteration;
+static uint64_t vcpu_last_completed_iteration[MAX_VCPUS];
+
+static void *vcpu_worker(void *data)
+{
+       int ret;
+       struct kvm_vm *vm = perf_test_args.vm;
+       uint64_t pages_count = 0;
+       struct kvm_run *run;
+       struct timespec start;
+       struct timespec ts_diff;
+       struct timespec total = (struct timespec){0};
+       struct timespec avg;
+       struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
+       int vcpu_id = vcpu_args->vcpu_id;
+
+       vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+       run = vcpu_state(vm, vcpu_id);
+
+       while (!READ_ONCE(host_quit)) {
+               uint64_t current_iteration = READ_ONCE(iteration);
+
+               clock_gettime(CLOCK_MONOTONIC, &start);
+               ret = _vcpu_run(vm, vcpu_id);
+               ts_diff = timespec_diff_now(start);
+
+               TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+               TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC,
+                           "Invalid guest sync status: exit_reason=%s\n",
+                           exit_reason_str(run->exit_reason));
+
+               pr_debug("Got sync event from vCPU %d\n", vcpu_id);
+               vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+               pr_debug("vCPU %d updated last completed iteration to %lu\n",
+                        vcpu_id, vcpu_last_completed_iteration[vcpu_id]);
+
+               if (current_iteration) {
+                       pages_count += vcpu_args->pages;
+                       total = timespec_add(total, ts_diff);
+                       pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n",
+                               vcpu_id, current_iteration, ts_diff.tv_sec,
+                               ts_diff.tv_nsec);
+               } else {
+                       pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n",
+                               vcpu_id, current_iteration, ts_diff.tv_sec,
+                               ts_diff.tv_nsec);
+               }
+
+               while (current_iteration == READ_ONCE(iteration) &&
+                      !READ_ONCE(host_quit)) {}
+       }
+
+       avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]);
+       pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+               vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id],
+               total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+
+       return NULL;
+}
+
+#ifdef USE_CLEAR_DIRTY_LOG
+static u64 dirty_log_manual_caps;
+#endif
+
+static void run_test(enum vm_guest_mode mode, unsigned long iterations,
+                    uint64_t phys_offset, int wr_fract)
+{
+       pthread_t *vcpu_threads;
+       struct kvm_vm *vm;
+       unsigned long *bmap;
+       uint64_t guest_num_pages;
+       uint64_t host_num_pages;
+       int vcpu_id;
+       struct timespec start;
+       struct timespec ts_diff;
+       struct timespec get_dirty_log_total = (struct timespec){0};
+       struct timespec vcpu_dirty_total = (struct timespec){0};
+       struct timespec avg;
+#ifdef USE_CLEAR_DIRTY_LOG
+       struct kvm_enable_cap cap = {};
+       struct timespec clear_dirty_log_total = (struct timespec){0};
+#endif
+
+       vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+
+       perf_test_args.wr_fract = wr_fract;
+
+       guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
+       guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+       host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+       bmap = bitmap_alloc(host_num_pages);
+
+#ifdef USE_CLEAR_DIRTY_LOG
+       cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
+       cap.args[0] = dirty_log_manual_caps;
+       vm_enable_cap(vm, &cap);
+#endif
+
+       vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
+       TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+
+       add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+
+       sync_global_to_guest(vm, perf_test_args);
+
+       /* Start the iterations */
+       iteration = 0;
+       host_quit = false;
+
+       clock_gettime(CLOCK_MONOTONIC, &start);
+       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+               pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
+                              &perf_test_args.vcpu_args[vcpu_id]);
+       }
+
+       /* Allow the vCPU to populate memory */
+       pr_debug("Starting iteration %lu - Populating\n", iteration);
+       while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
+               pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n",
+                       iteration);
+
+       ts_diff = timespec_diff_now(start);
+       pr_info("Populate memory time: %ld.%.9lds\n",
+               ts_diff.tv_sec, ts_diff.tv_nsec);
+
+       /* Enable dirty logging */
+       clock_gettime(CLOCK_MONOTONIC, &start);
+       vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX,
+                               KVM_MEM_LOG_DIRTY_PAGES);
+       ts_diff = timespec_diff_now(start);
+       pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
+               ts_diff.tv_sec, ts_diff.tv_nsec);
+
+       while (iteration < iterations) {
+               /*
+                * Incrementing the iteration number will start the vCPUs
+                * dirtying memory again.
+                */
+               clock_gettime(CLOCK_MONOTONIC, &start);
+               iteration++;
+
+               pr_debug("Starting iteration %lu\n", iteration);
+               for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+                       while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
+                               pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n",
+                                        vcpu_id, iteration);
+               }
+
+               ts_diff = timespec_diff_now(start);
+               vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff);
+               pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n",
+                       iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+
+               clock_gettime(CLOCK_MONOTONIC, &start);
+               kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+
+               ts_diff = timespec_diff_now(start);
+               get_dirty_log_total = timespec_add(get_dirty_log_total,
+                                                  ts_diff);
+               pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n",
+                       iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+
+#ifdef USE_CLEAR_DIRTY_LOG
+               clock_gettime(CLOCK_MONOTONIC, &start);
+               kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
+                                      host_num_pages);
+
+               ts_diff = timespec_diff_now(start);
+               clear_dirty_log_total = timespec_add(clear_dirty_log_total,
+                                                    ts_diff);
+               pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
+                       iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+#endif
+       }
+
+       /* Tell the vcpu thread to quit */
+       host_quit = true;
+       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
+               pthread_join(vcpu_threads[vcpu_id], NULL);
+
+       /* Disable dirty logging */
+       clock_gettime(CLOCK_MONOTONIC, &start);
+       vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0);
+       ts_diff = timespec_diff_now(start);
+       pr_info("Disabling dirty logging time: %ld.%.9lds\n",
+               ts_diff.tv_sec, ts_diff.tv_nsec);
+
+       avg = timespec_div(get_dirty_log_total, iterations);
+       pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+               iterations, get_dirty_log_total.tv_sec,
+               get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+
+#ifdef USE_CLEAR_DIRTY_LOG
+       avg = timespec_div(clear_dirty_log_total, iterations);
+       pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+               iterations, clear_dirty_log_total.tv_sec,
+               clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+#endif
+
+       free(bmap);
+       free(vcpu_threads);
+       ucall_uninit(vm);
+       kvm_vm_free(vm);
+}
+
+struct guest_mode {
+       bool supported;
+       bool enabled;
+};
+static struct guest_mode guest_modes[NUM_VM_MODES];
+
+#define guest_mode_init(mode, supported, enabled) ({ \
+       guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
+})
+
+static void help(char *name)
+{
+       int i;
+
+       puts("");
+       printf("usage: %s [-h] [-i iterations] [-p offset] "
+              "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name);
+       puts("");
+       printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+              TEST_HOST_LOOP_N);
+       printf(" -p: specify guest physical test memory offset\n"
+              "     Warning: a low offset can conflict with the loaded test code.\n");
+       printf(" -m: specify the guest mode ID to test "
+              "(default: test all supported modes)\n"
+              "     This option may be used multiple times.\n"
+              "     Guest mode IDs:\n");
+       for (i = 0; i < NUM_VM_MODES; ++i) {
+               printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
+                      guest_modes[i].supported ? " (supported)" : "");
+       }
+       printf(" -b: specify the size of the memory region which should be\n"
+              "     dirtied by each vCPU. e.g. 10M or 3G.\n"
+              "     (default: 1G)\n");
+       printf(" -f: specify the fraction of pages which should be written to\n"
+              "     as opposed to simply read, in the form\n"
+              "     1/<fraction of pages to write>.\n"
+              "     (default: 1 i.e. all pages are written to.)\n");
+       printf(" -v: specify the number of vCPUs to run.\n");
+       puts("");
+       exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned long iterations = TEST_HOST_LOOP_N;
+       bool mode_selected = false;
+       uint64_t phys_offset = 0;
+       unsigned int mode;
+       int opt, i;
+       int wr_fract = 1;
+
+#ifdef USE_CLEAR_DIRTY_LOG
+       dirty_log_manual_caps =
+               kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+       if (!dirty_log_manual_caps) {
+               print_skip("KVM_CLEAR_DIRTY_LOG not available");
+               exit(KSFT_SKIP);
+       }
+       dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+                                 KVM_DIRTY_LOG_INITIALLY_SET);
+#endif
+
+#ifdef __x86_64__
+       guest_mode_init(VM_MODE_PXXV48_4K, true, true);
+#endif
+#ifdef __aarch64__
+       guest_mode_init(VM_MODE_P40V48_4K, true, true);
+       guest_mode_init(VM_MODE_P40V48_64K, true, true);
+
+       {
+               unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+
+               if (limit >= 52)
+                       guest_mode_init(VM_MODE_P52V48_64K, true, true);
+               if (limit >= 48) {
+                       guest_mode_init(VM_MODE_P48V48_4K, true, true);
+                       guest_mode_init(VM_MODE_P48V48_64K, true, true);
+               }
+       }
+#endif
+#ifdef __s390x__
+       guest_mode_init(VM_MODE_P40V48_4K, true, true);
+#endif
+
+       while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) {
+               switch (opt) {
+               case 'i':
+                       iterations = strtol(optarg, NULL, 10);
+                       break;
+               case 'p':
+                       phys_offset = strtoull(optarg, NULL, 0);
+                       break;
+               case 'm':
+                       if (!mode_selected) {
+                               for (i = 0; i < NUM_VM_MODES; ++i)
+                                       guest_modes[i].enabled = false;
+                               mode_selected = true;
+                       }
+                       mode = strtoul(optarg, NULL, 10);
+                       TEST_ASSERT(mode < NUM_VM_MODES,
+                                   "Guest mode ID %d too big", mode);
+                       guest_modes[mode].enabled = true;
+                       break;
+               case 'b':
+                       guest_percpu_mem_size = parse_size(optarg);
+                       break;
+               case 'f':
+                       wr_fract = atoi(optarg);
+                       TEST_ASSERT(wr_fract >= 1,
+                                   "Write fraction cannot be less than one");
+                       break;
+               case 'v':
+                       nr_vcpus = atoi(optarg);
+                       TEST_ASSERT(nr_vcpus > 0,
+                                   "Must have a positive number of vCPUs");
+                       TEST_ASSERT(nr_vcpus <= MAX_VCPUS,
+                                   "This test does not currently support\n"
+                                   "more than %d vCPUs.", MAX_VCPUS);
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       break;
+               }
+       }
+
+       TEST_ASSERT(iterations >= 2, "The test should have at least two iterations");
+
+       pr_info("Test iterations: %"PRIu64"\n", iterations);
+
+       for (i = 0; i < NUM_VM_MODES; ++i) {
+               if (!guest_modes[i].enabled)
+                       continue;
+               TEST_ASSERT(guest_modes[i].supported,
+                           "Guest mode ID %d (%s) not supported.",
+                           i, vm_guest_mode_string(i));
+               run_test(i, iterations, phys_offset, wr_fract);
+       }
+
+       return 0;
+}
index 752ec15..54da9cc 100644 (file)
@@ -128,6 +128,78 @@ static uint64_t host_dirty_count;
 static uint64_t host_clear_count;
 static uint64_t host_track_next_count;
 
+enum log_mode_t {
+       /* Only use KVM_GET_DIRTY_LOG for logging */
+       LOG_MODE_DIRTY_LOG = 0,
+
+       /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
+       LOG_MODE_CLEAR_LOG = 1,
+
+       LOG_MODE_NUM,
+
+       /* Run all supported modes */
+       LOG_MODE_ALL = LOG_MODE_NUM,
+};
+
+/* Mode of logging to test.  Default is to run all supported modes */
+static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
+/* Logging mode for current run */
+static enum log_mode_t host_log_mode;
+
+static bool clear_log_supported(void)
+{
+       return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+}
+
+static void clear_log_create_vm_done(struct kvm_vm *vm)
+{
+       struct kvm_enable_cap cap = {};
+       u64 manual_caps;
+
+       manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+       TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!");
+       manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+                       KVM_DIRTY_LOG_INITIALLY_SET);
+       cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
+       cap.args[0] = manual_caps;
+       vm_enable_cap(vm, &cap);
+}
+
+static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
+                                         void *bitmap, uint32_t num_pages)
+{
+       kvm_vm_get_dirty_log(vm, slot, bitmap);
+}
+
+static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
+                                         void *bitmap, uint32_t num_pages)
+{
+       kvm_vm_get_dirty_log(vm, slot, bitmap);
+       kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
+}
+
+struct log_mode {
+       const char *name;
+       /* Return true if this mode is supported, otherwise false */
+       bool (*supported)(void);
+       /* Hook when the vm creation is done (before vcpu creation) */
+       void (*create_vm_done)(struct kvm_vm *vm);
+       /* Hook to collect the dirty pages into the bitmap provided */
+       void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
+                                    void *bitmap, uint32_t num_pages);
+} log_modes[LOG_MODE_NUM] = {
+       {
+               .name = "dirty-log",
+               .collect_dirty_pages = dirty_log_collect_dirty_pages,
+       },
+       {
+               .name = "clear-log",
+               .supported = clear_log_supported,
+               .create_vm_done = clear_log_create_vm_done,
+               .collect_dirty_pages = clear_log_collect_dirty_pages,
+       },
+};
+
 /*
  * We use this bitmap to track some pages that should have its dirty
  * bit set in the _next_ iteration.  For example, if we detected the
@@ -137,6 +209,44 @@ static uint64_t host_track_next_count;
  */
 static unsigned long *host_bmap_track;
 
+static void log_modes_dump(void)
+{
+       int i;
+
+       printf("all");
+       for (i = 0; i < LOG_MODE_NUM; i++)
+               printf(", %s", log_modes[i].name);
+       printf("\n");
+}
+
+static bool log_mode_supported(void)
+{
+       struct log_mode *mode = &log_modes[host_log_mode];
+
+       if (mode->supported)
+               return mode->supported();
+
+       return true;
+}
+
+static void log_mode_create_vm_done(struct kvm_vm *vm)
+{
+       struct log_mode *mode = &log_modes[host_log_mode];
+
+       if (mode->create_vm_done)
+               mode->create_vm_done(vm);
+}
+
+static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
+                                        void *bitmap, uint32_t num_pages)
+{
+       struct log_mode *mode = &log_modes[host_log_mode];
+
+       TEST_ASSERT(mode->collect_dirty_pages != NULL,
+                   "collect_dirty_pages() is required for any log mode!");
+       mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
+}
+
 static void generate_random_array(uint64_t *guest_array, uint64_t size)
 {
        uint64_t i;
@@ -195,7 +305,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
                                    page);
                }
 
-               if (test_bit_le(page, bmap)) {
+               if (test_and_clear_bit_le(page, bmap)) {
                        host_dirty_count++;
                        /*
                         * If the bit is set, the value written onto
@@ -252,11 +362,12 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
 
        pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
-       vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+       vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
        kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
 #ifdef __x86_64__
        vm_create_irqchip(vm);
 #endif
+       log_mode_create_vm_done(vm);
        vm_vcpu_add_default(vm, vcpuid, guest_code);
        return vm;
 }
@@ -264,10 +375,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
 #define DIRTY_MEM_BITS 30 /* 1G */
 #define PAGE_SHIFT_4K  12
 
-#ifdef USE_CLEAR_DIRTY_LOG
-static u64 dirty_log_manual_caps;
-#endif
-
 static void run_test(enum vm_guest_mode mode, unsigned long iterations,
                     unsigned long interval, uint64_t phys_offset)
 {
@@ -275,6 +382,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
        struct kvm_vm *vm;
        unsigned long *bmap;
 
+       if (!log_mode_supported()) {
+               print_skip("Log mode '%s' not supported",
+                          log_modes[host_log_mode].name);
+               return;
+       }
+
        /*
         * We reserve page table for 2 times of extra dirty mem which
         * will definitely cover the original (1G+) test range.  Here
@@ -317,14 +430,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
        bmap = bitmap_alloc(host_num_pages);
        host_bmap_track = bitmap_alloc(host_num_pages);
 
-#ifdef USE_CLEAR_DIRTY_LOG
-       struct kvm_enable_cap cap = {};
-
-       cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
-       cap.args[0] = dirty_log_manual_caps;
-       vm_enable_cap(vm, &cap);
-#endif
-
        /* Add an extra memory slot for testing dirty logging */
        vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
                                    guest_test_phys_mem,
@@ -362,11 +467,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
        while (iteration < iterations) {
                /* Give the vcpu thread some time to dirty some pages */
                usleep(interval * 1000);
-               kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
-#ifdef USE_CLEAR_DIRTY_LOG
-               kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
-                                      host_num_pages);
-#endif
+               log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX,
+                                            bmap, host_num_pages);
                vm_dirty_log_verify(mode, bmap);
                iteration++;
                sync_global_to_guest(vm, iteration);
@@ -410,6 +512,9 @@ static void help(char *name)
               TEST_HOST_LOOP_INTERVAL);
        printf(" -p: specify guest physical test memory offset\n"
               "     Warning: a low offset can conflict with the loaded test code.\n");
+       printf(" -M: specify the host logging mode "
+              "(default: run all log modes).  Supported modes: \n\t");
+       log_modes_dump();
        printf(" -m: specify the guest mode ID to test "
               "(default: test all supported modes)\n"
               "     This option may be used multiple times.\n"
@@ -429,18 +534,7 @@ int main(int argc, char *argv[])
        bool mode_selected = false;
        uint64_t phys_offset = 0;
        unsigned int mode;
-       int opt, i;
-
-#ifdef USE_CLEAR_DIRTY_LOG
-       dirty_log_manual_caps =
-               kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
-       if (!dirty_log_manual_caps) {
-               print_skip("KVM_CLEAR_DIRTY_LOG not available");
-               exit(KSFT_SKIP);
-       }
-       dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
-                                 KVM_DIRTY_LOG_INITIALLY_SET);
-#endif
+       int opt, i, j;
 
 #ifdef __x86_64__
        guest_mode_init(VM_MODE_PXXV48_4K, true, true);
@@ -464,7 +558,7 @@ int main(int argc, char *argv[])
        guest_mode_init(VM_MODE_P40V48_4K, true, true);
 #endif
 
-       while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) {
+       while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) {
                switch (opt) {
                case 'i':
                        iterations = strtol(optarg, NULL, 10);
@@ -486,6 +580,26 @@ int main(int argc, char *argv[])
                                    "Guest mode ID %d too big", mode);
                        guest_modes[mode].enabled = true;
                        break;
+               case 'M':
+                       if (!strcmp(optarg, "all")) {
+                               host_log_mode_option = LOG_MODE_ALL;
+                               break;
+                       }
+                       for (i = 0; i < LOG_MODE_NUM; i++) {
+                               if (!strcmp(optarg, log_modes[i].name)) {
+                                       pr_info("Setting log mode to: '%s'\n",
+                                               optarg);
+                                       host_log_mode_option = i;
+                                       break;
+                               }
+                       }
+                       if (i == LOG_MODE_NUM) {
+                               printf("Log mode '%s' invalid. Please choose "
+                                      "from: ", optarg);
+                               log_modes_dump();
+                               exit(1);
+                       }
+                       break;
                case 'h':
                default:
                        help(argv[0]);
@@ -507,7 +621,18 @@ int main(int argc, char *argv[])
                TEST_ASSERT(guest_modes[i].supported,
                            "Guest mode ID %d (%s) not supported.",
                            i, vm_guest_mode_string(i));
-               run_test(i, iterations, interval, phys_offset);
+               if (host_log_mode_option == LOG_MODE_ALL) {
+                       /* Run each log mode */
+                       for (j = 0; j < LOG_MODE_NUM; j++) {
+                               pr_info("Testing Log Mode '%s'\n",
+                                       log_modes[j].name);
+                               host_log_mode = j;
+                               run_test(i, iterations, interval, phys_offset);
+                       }
+               } else {
+                       host_log_mode = host_log_mode_option;
+                       run_test(i, iterations, interval, phys_offset);
+               }
        }
 
        return 0;
index 919e161..7d29aa7 100644 (file)
@@ -63,9 +63,11 @@ enum vm_mem_backing_src_type {
 
 int kvm_check_cap(long cap);
 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
+int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
+                   struct kvm_enable_cap *cap);
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
 
 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
 void kvm_vm_free(struct kvm_vm *vmp);
 void kvm_vm_restart(struct kvm_vm *vmp, int perm);
 void kvm_vm_release(struct kvm_vm *vmp);
@@ -149,6 +151,7 @@ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
                          struct kvm_guest_debug *debug);
 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
                       struct kvm_mp_state *mp_state);
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
 void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
 
@@ -294,6 +297,8 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
        memcpy(&(g), _p, sizeof(g));                            \
 })
 
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
+
 /* Common ucalls */
 enum {
        UCALL_NONE,
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
new file mode 100644 (file)
index 0000000..2618052
--- /dev/null
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tools/testing/selftests/kvm/include/perf_test_util.h
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H
+#define SELFTEST_KVM_PERF_TEST_UTIL_H
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define MAX_VCPUS 512
+
+#define PAGE_SHIFT_4K  12
+#define PTES_PER_4K_PT 512
+
+#define TEST_MEM_SLOT_INDEX            1
+
+/* Default guest test virtual memory offset */
+#define DEFAULT_GUEST_TEST_MEM         0xc0000000
+
+#define DEFAULT_PER_VCPU_MEM_SIZE      (1 << 30) /* 1G */
+
+/*
+ * Guest physical memory offset of the testing memory slot.
+ * This will be set to the topmost valid physical address minus
+ * the test memory size.
+ */
+static uint64_t guest_test_phys_mem;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+/* Number of VCPUs for the test */
+static int nr_vcpus = 1;
+
+struct vcpu_args {
+       uint64_t gva;
+       uint64_t pages;
+
+       /* Only used by the host userspace part of the vCPU thread */
+       int vcpu_id;
+};
+
+struct perf_test_args {
+       struct kvm_vm *vm;
+       uint64_t host_page_size;
+       uint64_t guest_page_size;
+       int wr_fract;
+
+       struct vcpu_args vcpu_args[MAX_VCPUS];
+};
+
+static struct perf_test_args perf_test_args;
+
+/*
+ * Continuously write to the first 8 bytes of each page in the
+ * specified region.
+ */
+static void guest_code(uint32_t vcpu_id)
+{
+       struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+       uint64_t gva;
+       uint64_t pages;
+       int i;
+
+       /* Make sure vCPU args data structure is not corrupt. */
+       GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
+
+       gva = vcpu_args->gva;
+       pages = vcpu_args->pages;
+
+       while (true) {
+               for (i = 0; i < pages; i++) {
+                       uint64_t addr = gva + (i * perf_test_args.guest_page_size);
+
+                       if (i % perf_test_args.wr_fract == 0)
+                               *(uint64_t *)addr = 0x0123456789ABCDEF;
+                       else
+                               READ_ONCE(*(uint64_t *)addr);
+               }
+
+               GUEST_SYNC(1);
+       }
+}
+
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
+                               uint64_t vcpu_memory_bytes)
+{
+       struct kvm_vm *vm;
+       uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
+       uint64_t guest_num_pages;
+
+       /* Account for a few pages per-vCPU for stacks */
+       pages += DEFAULT_STACK_PGS * vcpus;
+
+       /*
+        * Reserve twice the ammount of memory needed to map the test region and
+        * the page table / stacks region, at 4k, for page tables. Do the
+        * calculation with 4K page size: the smallest of all archs. (e.g., 64K
+        * page size guest will need even less memory for page tables).
+        */
+       pages += (2 * pages) / PTES_PER_4K_PT;
+       pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
+                PTES_PER_4K_PT;
+       pages = vm_adjust_num_guest_pages(mode, pages);
+
+       pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+       vm = vm_create(mode, pages, O_RDWR);
+       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+#ifdef __x86_64__
+       vm_create_irqchip(vm);
+#endif
+
+       perf_test_args.vm = vm;
+       perf_test_args.guest_page_size = vm_get_page_size(vm);
+       perf_test_args.host_page_size = getpagesize();
+
+       TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
+                   "Guest memory size is not guest page size aligned.");
+
+       guest_num_pages = (vcpus * vcpu_memory_bytes) /
+                         perf_test_args.guest_page_size;
+       guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+
+       /*
+        * If there should be more memory in the guest test region than there
+        * can be pages in the guest, it will definitely cause problems.
+        */
+       TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+                   "Requested more guest memory than address space allows.\n"
+                   "    guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
+                   guest_num_pages, vm_get_max_gfn(vm), vcpus,
+                   vcpu_memory_bytes);
+
+       TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
+                   "Guest memory size is not host page size aligned.");
+
+       guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
+                             perf_test_args.guest_page_size;
+       guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
+
+#ifdef __s390x__
+       /* Align to 1M (segment size) */
+       guest_test_phys_mem &= ~((1 << 20) - 1);
+#endif
+
+       pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+
+       /* Add an extra memory slot for testing */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   guest_test_phys_mem,
+                                   TEST_MEM_SLOT_INDEX,
+                                   guest_num_pages, 0);
+
+       /* Do mapping for the demand paging memory slot */
+       virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+
+       ucall_init(vm, NULL);
+
+       return vm;
+}
+
+static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
+{
+       vm_paddr_t vcpu_gpa;
+       struct vcpu_args *vcpu_args;
+       int vcpu_id;
+
+       for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+               vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+
+               vm_vcpu_add_default(vm, vcpu_id, guest_code);
+
+#ifdef __x86_64__
+               vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
+#endif
+
+               vcpu_args->vcpu_id = vcpu_id;
+               vcpu_args->gva = guest_test_virt_mem +
+                                (vcpu_id * vcpu_memory_bytes);
+               vcpu_args->pages = vcpu_memory_bytes /
+                                  perf_test_args.guest_page_size;
+
+               vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
+               pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+                        vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+       }
+}
+
+#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
index 5eb01bf..ffffa56 100644 (file)
@@ -64,5 +64,7 @@ int64_t timespec_to_ns(struct timespec ts);
 struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
 struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
 struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
+struct timespec timespec_diff_now(struct timespec start);
+struct timespec timespec_div(struct timespec ts, int divisor);
 
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
index 82b7fe1..8e61340 100644 (file)
@@ -36,6 +36,8 @@
 #define X86_CR4_SMAP           (1ul << 21)
 #define X86_CR4_PKE            (1ul << 22)
 
+#define UNEXPECTED_VECTOR_PORT 0xfff0u
+
 /* General Registers in 64-Bit Mode */
 struct gpr64_regs {
        u64 rax;
@@ -59,7 +61,7 @@ struct gpr64_regs {
 struct desc64 {
        uint16_t limit0;
        uint16_t base0;
-       unsigned base1:8, s:1, type:4, dpl:2, p:1;
+       unsigned base1:8, type:4, s:1, dpl:2, p:1;
        unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
        uint32_t base3;
        uint32_t zero1;
@@ -239,6 +241,11 @@ static inline struct desc_ptr get_idt(void)
        return idt;
 }
 
+static inline void outl(uint16_t port, uint32_t value)
+{
+       __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
+}
+
 #define SET_XMM(__var, __xmm) \
        asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
 
@@ -338,6 +345,35 @@ uint32_t kvm_get_cpuid_max_basic(void);
 uint32_t kvm_get_cpuid_max_extended(void);
 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
 
+struct ex_regs {
+       uint64_t rax, rcx, rdx, rbx;
+       uint64_t rbp, rsi, rdi;
+       uint64_t r8, r9, r10, r11;
+       uint64_t r12, r13, r14, r15;
+       uint64_t vector;
+       uint64_t error_code;
+       uint64_t rip;
+       uint64_t cs;
+       uint64_t rflags;
+};
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
+void vm_handle_exception(struct kvm_vm *vm, int vector,
+                       void (*handler)(struct ex_regs *));
+
+/*
+ * set_cpuid() - overwrites a matching cpuid entry with the provided value.
+ *              matches based on ent->function && ent->index. returns true
+ *              if a match was found and successfully overwritten.
+ * @cpuid: the kvm cpuid list to modify.
+ * @ent: cpuid entry to insert
+ */
+bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent);
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+                      uint64_t a3);
+
 /*
  * Basic CPU control in CR0
  */
index 2afa661..d6c32c3 100644 (file)
@@ -350,3 +350,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
 
        va_end(ap);
 }
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+}
index c8e0ec2..2f37b90 100644 (file)
@@ -94,6 +94,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
        struct kvm_run *run = vcpu_state(vm, vcpu_id);
        struct ucall ucall = {};
 
+       if (uc)
+               memset(uc, 0, sizeof(*uc));
+
        if (run->exit_reason == KVM_EXIT_MMIO &&
            run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
                vm_vaddr_t gva;
index 3327ceb..126c672 100644 (file)
@@ -86,6 +86,34 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
        return ret;
 }
 
+/* VCPU Enable Capability
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpu_id - VCPU
+ *   cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
+ *
+ * Enables a capability (KVM_CAP_*) on the VCPU.
+ */
+int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
+                   struct kvm_enable_cap *cap)
+{
+       struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
+       int r;
+
+       TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
+
+       r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
+       TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
+                       "  rc: %i, errno: %i", r, errno);
+
+       return r;
+}
+
 static void vm_open(struct kvm_vm *vm, int perm)
 {
        vm->kvm_fd = open(KVM_DEV_PATH, perm);
@@ -152,7 +180,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params)
  * descriptor to control the created VM is created with the permissions
  * given by perm (e.g. O_RDWR).
  */
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 {
        struct kvm_vm *vm;
 
@@ -243,11 +271,6 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
        return vm;
 }
 
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
-{
-       return _vm_create(mode, phy_pages, perm);
-}
-
 /*
  * VM Restart
  *
@@ -1204,6 +1227,9 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
        do {
                rc = ioctl(vcpu->fd, KVM_RUN, NULL);
        } while (rc == -1 && errno == EINTR);
+
+       assert_on_unhandled_exception(vm, vcpuid);
+
        return rc;
 }
 
@@ -1260,6 +1286,35 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
                "rc: %i errno: %i", ret, errno);
 }
 
+/*
+ * VM VCPU Get Reg List
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args:
+ *   None
+ *
+ * Return:
+ *   A pointer to an allocated struct kvm_reg_list
+ *
+ * Get the list of guest registers which are supported for
+ * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
+ */
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
+{
+       struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
+       int ret;
+
+       ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, &reg_list_n);
+       TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
+       reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
+       reg_list->n = reg_list_n.n;
+       vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
+       return reg_list;
+}
+
 /*
  * VM VCPU Regs Get
  *
index 2ef4465..f07d383 100644 (file)
@@ -50,6 +50,8 @@ struct kvm_vm {
        vm_paddr_t pgd;
        vm_vaddr_t gdt;
        vm_vaddr_t tss;
+       vm_vaddr_t idt;
+       vm_vaddr_t handlers;
 };
 
 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
index a88c5d6..7349bb2 100644 (file)
@@ -241,3 +241,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
        fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
                indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
 }
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+}
index fd589dc..9d3b0f1 100644 (file)
@@ -38,6 +38,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
        struct kvm_run *run = vcpu_state(vm, vcpu_id);
        struct ucall ucall = {};
 
+       if (uc)
+               memset(uc, 0, sizeof(*uc));
+
        if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
            run->s390_sieic.icptcode == 4 &&
            (run->s390_sieic.ipa >> 8) == 0x83 &&    /* 0x83 means DIAGNOSE */
index 689e97c..8e04c0b 100644 (file)
@@ -4,10 +4,13 @@
  *
  * Copyright (C) 2020, Google LLC.
  */
-#include <stdlib.h>
+
+#include <assert.h>
 #include <ctype.h>
 #include <limits.h>
-#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+
 #include "test_util.h"
 
 /*
@@ -81,6 +84,21 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
        return timespec_add_ns((struct timespec){0}, ns1 - ns2);
 }
 
+struct timespec timespec_diff_now(struct timespec start)
+{
+       struct timespec end;
+
+       clock_gettime(CLOCK_MONOTONIC, &end);
+       return timespec_sub(end, start);
+}
+
+struct timespec timespec_div(struct timespec ts, int divisor)
+{
+       int64_t ns = timespec_to_ns(ts) / divisor;
+
+       return timespec_add_ns((struct timespec){0}, ns);
+}
+
 void print_skip(const char *fmt, ...)
 {
        va_list ap;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
new file mode 100644 (file)
index 0000000..aaf7bc7
--- /dev/null
@@ -0,0 +1,81 @@
+handle_exception:
+       push %r15
+       push %r14
+       push %r13
+       push %r12
+       push %r11
+       push %r10
+       push %r9
+       push %r8
+
+       push %rdi
+       push %rsi
+       push %rbp
+       push %rbx
+       push %rdx
+       push %rcx
+       push %rax
+       mov %rsp, %rdi
+
+       call route_exception
+
+       pop %rax
+       pop %rcx
+       pop %rdx
+       pop %rbx
+       pop %rbp
+       pop %rsi
+       pop %rdi
+       pop %r8
+       pop %r9
+       pop %r10
+       pop %r11
+       pop %r12
+       pop %r13
+       pop %r14
+       pop %r15
+
+       /* Discard vector and error code. */
+       add $16, %rsp
+       iretq
+
+/*
+ * Build the handle_exception wrappers which push the vector/error code on the
+ * stack and an array of pointers to those wrappers.
+ */
+.pushsection .rodata
+.globl idt_handlers
+idt_handlers:
+.popsection
+
+.macro HANDLERS has_error from to
+       vector = \from
+       .rept \to - \from + 1
+       .align 8
+
+       /* Fetch current address and append it to idt_handlers. */
+       current_handler = .
+.pushsection .rodata
+.quad current_handler
+.popsection
+
+       .if ! \has_error
+       pushq $0
+       .endif
+       pushq $vector
+       jmp handle_exception
+       vector = vector + 1
+       .endr
+.endm
+
+.global idt_handler_code
+idt_handler_code:
+       HANDLERS has_error=0 from=0  to=7
+       HANDLERS has_error=1 from=8  to=8
+       HANDLERS has_error=0 from=9  to=9
+       HANDLERS has_error=1 from=10 to=14
+       HANDLERS has_error=0 from=15 to=16
+       HANDLERS has_error=1 from=17 to=17
+       HANDLERS has_error=0 from=18 to=255
+
+.section        .note.GNU-stack, "", %progbits
index f6eb34e..d10c5c0 100644 (file)
 #include "../kvm_util_internal.h"
 #include "processor.h"
 
+#ifndef NUM_INTERRUPTS
+#define NUM_INTERRUPTS 256
+#endif
+
+#define DEFAULT_CODE_SELECTOR 0x8
+#define DEFAULT_DATA_SELECTOR 0x10
+
 /* Minimum physical address used for virtual translation tables. */
 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
 
+vm_vaddr_t exception_handlers;
+
 /* Virtual translation table structure declarations */
 struct pageMapL4Entry {
        uint64_t present:1;
@@ -392,11 +401,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
        desc->limit0 = segp->limit & 0xFFFF;
        desc->base0 = segp->base & 0xFFFF;
        desc->base1 = segp->base >> 16;
-       desc->s = segp->s;
        desc->type = segp->type;
+       desc->s = segp->s;
        desc->dpl = segp->dpl;
        desc->p = segp->present;
        desc->limit1 = segp->limit >> 16;
+       desc->avl = segp->avl;
        desc->l = segp->l;
        desc->db = segp->db;
        desc->g = segp->g;
@@ -556,9 +566,9 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
                sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
 
                kvm_seg_set_unusable(&sregs.ldt);
-               kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
-               kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
-               kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
+               kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
+               kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
+               kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
                kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
                break;
 
@@ -1118,3 +1128,131 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
                *va_bits = (entry->eax >> 8) & 0xff;
        }
 }
+
+struct idt_entry {
+       uint16_t offset0;
+       uint16_t selector;
+       uint16_t ist : 3;
+       uint16_t : 5;
+       uint16_t type : 4;
+       uint16_t : 1;
+       uint16_t dpl : 2;
+       uint16_t p : 1;
+       uint16_t offset1;
+       uint32_t offset2; uint32_t reserved;
+};
+
+static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
+                         int dpl, unsigned short selector)
+{
+       struct idt_entry *base =
+               (struct idt_entry *)addr_gva2hva(vm, vm->idt);
+       struct idt_entry *e = &base[vector];
+
+       memset(e, 0, sizeof(*e));
+       e->offset0 = addr;
+       e->selector = selector;
+       e->ist = 0;
+       e->type = 14;
+       e->dpl = dpl;
+       e->p = 1;
+       e->offset1 = addr >> 16;
+       e->offset2 = addr >> 32;
+}
+
+void kvm_exit_unexpected_vector(uint32_t value)
+{
+       outl(UNEXPECTED_VECTOR_PORT, value);
+}
+
+void route_exception(struct ex_regs *regs)
+{
+       typedef void(*handler)(struct ex_regs *);
+       handler *handlers = (handler *)exception_handlers;
+
+       if (handlers && handlers[regs->vector]) {
+               handlers[regs->vector](regs);
+               return;
+       }
+
+       kvm_exit_unexpected_vector(regs->vector);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+       extern void *idt_handlers;
+       int i;
+
+       vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0);
+       vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0);
+       /* Handlers have the same address in both address spaces.*/
+       for (i = 0; i < NUM_INTERRUPTS; i++)
+               set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
+                       DEFAULT_CODE_SELECTOR);
+}
+
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
+{
+       struct kvm_sregs sregs;
+
+       vcpu_sregs_get(vm, vcpuid, &sregs);
+       sregs.idt.base = vm->idt;
+       sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
+       sregs.gdt.base = vm->gdt;
+       sregs.gdt.limit = getpagesize() - 1;
+       kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
+       vcpu_sregs_set(vm, vcpuid, &sregs);
+       *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_handle_exception(struct kvm_vm *vm, int vector,
+                        void (*handler)(struct ex_regs *))
+{
+       vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+
+       handlers[vector] = (vm_vaddr_t)handler;
+}
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+       if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
+               && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
+               && vcpu_state(vm, vcpuid)->io.size == 4) {
+               /* Grab pointer to io data */
+               uint32_t *data = (void *)vcpu_state(vm, vcpuid)
+                       + vcpu_state(vm, vcpuid)->io.data_offset;
+
+               TEST_ASSERT(false,
+                           "Unexpected vectored event in guest (vector:0x%x)",
+                           *data);
+       }
+}
+
+bool set_cpuid(struct kvm_cpuid2 *cpuid,
+              struct kvm_cpuid_entry2 *ent)
+{
+       int i;
+
+       for (i = 0; i < cpuid->nent; i++) {
+               struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
+
+               if (cur->function != ent->function || cur->index != ent->index)
+                       continue;
+
+               memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2));
+               return true;
+       }
+
+       return false;
+}
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+                      uint64_t a3)
+{
+       uint64_t r;
+
+       asm volatile("vmcall"
+                    : "=a"(r)
+                    : "b"(a0), "c"(a1), "d"(a2), "S"(a3));
+       return r;
+}
index da4d89a..a348997 100644 (file)
@@ -40,6 +40,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
        struct kvm_run *run = vcpu_state(vm, vcpu_id);
        struct ucall ucall = {};
 
+       if (uc)
+               memset(uc, 0, sizeof(*uc));
+
        if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
                struct kvm_regs regs;
 
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
new file mode 100644 (file)
index 0000000..b10a274
--- /dev/null
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+extern unsigned char rdmsr_start;
+extern unsigned char rdmsr_end;
+
+static u64 do_rdmsr(u32 idx)
+{
+       u32 lo, hi;
+
+       asm volatile("rdmsr_start: rdmsr;"
+                    "rdmsr_end:"
+                    : "=a"(lo), "=c"(hi)
+                    : "c"(idx));
+
+       return (((u64) hi) << 32) | lo;
+}
+
+extern unsigned char wrmsr_start;
+extern unsigned char wrmsr_end;
+
+static void do_wrmsr(u32 idx, u64 val)
+{
+       u32 lo, hi;
+
+       lo = val;
+       hi = val >> 32;
+
+       asm volatile("wrmsr_start: wrmsr;"
+                    "wrmsr_end:"
+                    : : "a"(lo), "c"(idx), "d"(hi));
+}
+
+static int nr_gp;
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+       unsigned char *rip = (unsigned char *)regs->rip;
+       bool r, w;
+
+       r = rip == &rdmsr_start;
+       w = rip == &wrmsr_start;
+       GUEST_ASSERT(r || w);
+
+       nr_gp++;
+
+       if (r)
+               regs->rip = (uint64_t)&rdmsr_end;
+       else
+               regs->rip = (uint64_t)&wrmsr_end;
+}
+
+struct msr_data {
+       uint32_t idx;
+       const char *name;
+};
+
+#define TEST_MSR(msr) { .idx = msr, .name = #msr }
+#define UCALL_PR_MSR 0xdeadbeef
+#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
+
+/*
+ * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
+ * written, as the KVM_CPUID_FEATURES leaf is cleared.
+ */
+static struct msr_data msrs_to_test[] = {
+       TEST_MSR(MSR_KVM_SYSTEM_TIME),
+       TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
+       TEST_MSR(MSR_KVM_WALL_CLOCK),
+       TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
+       TEST_MSR(MSR_KVM_ASYNC_PF_EN),
+       TEST_MSR(MSR_KVM_STEAL_TIME),
+       TEST_MSR(MSR_KVM_PV_EOI_EN),
+       TEST_MSR(MSR_KVM_POLL_CONTROL),
+       TEST_MSR(MSR_KVM_ASYNC_PF_INT),
+       TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
+};
+
+static void test_msr(struct msr_data *msr)
+{
+       PR_MSR(msr);
+       do_rdmsr(msr->idx);
+       GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+
+       nr_gp = 0;
+       do_wrmsr(msr->idx, 0);
+       GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+       nr_gp = 0;
+}
+
+struct hcall_data {
+       uint64_t nr;
+       const char *name;
+};
+
+#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
+#define UCALL_PR_HCALL 0xdeadc0de
+#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
+
+/*
+ * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
+ * features have been cleared in KVM_CPUID_FEATURES.
+ */
+static struct hcall_data hcalls_to_test[] = {
+       TEST_HCALL(KVM_HC_KICK_CPU),
+       TEST_HCALL(KVM_HC_SEND_IPI),
+       TEST_HCALL(KVM_HC_SCHED_YIELD),
+};
+
+static void test_hcall(struct hcall_data *hc)
+{
+       uint64_t r;
+
+       PR_HCALL(hc);
+       r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
+       GUEST_ASSERT(r == -KVM_ENOSYS);
+}
+
+static void guest_main(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
+               test_msr(&msrs_to_test[i]);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
+               test_hcall(&hcalls_to_test[i]);
+       }
+
+       GUEST_DONE();
+}
+
+static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid)
+{
+       struct kvm_cpuid_entry2 ent = {0};
+
+       ent.function = KVM_CPUID_FEATURES;
+       TEST_ASSERT(set_cpuid(cpuid, &ent),
+                   "failed to clear KVM_CPUID_FEATURES leaf");
+}
+
+static void pr_msr(struct ucall *uc)
+{
+       struct msr_data *msr = (struct msr_data *)uc->args[0];
+
+       pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
+}
+
+static void pr_hcall(struct ucall *uc)
+{
+       struct hcall_data *hc = (struct hcall_data *)uc->args[0];
+
+       pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+       TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0],
+                 __FILE__, uc->args[1]);
+}
+
+#define VCPU_ID 0
+
+static void enter_guest(struct kvm_vm *vm)
+{
+       struct kvm_run *run;
+       struct ucall uc;
+       int r;
+
+       run = vcpu_state(vm, VCPU_ID);
+
+       while (true) {
+               r = _vcpu_run(vm, VCPU_ID);
+               TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                           "unexpected exit reason: %u (%s)",
+                           run->exit_reason, exit_reason_str(run->exit_reason));
+
+               switch (get_ucall(vm, VCPU_ID, &uc)) {
+               case UCALL_PR_MSR:
+                       pr_msr(&uc);
+                       break;
+               case UCALL_PR_HCALL:
+                       pr_hcall(&uc);
+                       break;
+               case UCALL_ABORT:
+                       handle_abort(&uc);
+                       return;
+               case UCALL_DONE:
+                       return;
+               }
+       }
+}
+
+int main(void)
+{
+       struct kvm_enable_cap cap = {0};
+       struct kvm_cpuid2 *best;
+       struct kvm_vm *vm;
+
+       if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) {
+               pr_info("will skip kvm paravirt restriction tests.\n");
+               return 0;
+       }
+
+       vm = vm_create_default(VCPU_ID, 0, guest_main);
+
+       cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID;
+       cap.args[0] = 1;
+       vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+       best = kvm_get_supported_cpuid();
+       clear_kvm_cpuid_features(best);
+       vcpu_set_cpuid(vm, VCPU_ID, best);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vm, VCPU_ID);
+       vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+
+       enter_guest(vm);
+       kvm_vm_free(vm);
+}