tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Test for x86 KVM_SET_PMU_EVENT_FILTER.
   4  *
   5  * Copyright (C) 2022, Google LLC.
   6  *
   7  * This work is licensed under the terms of the GNU GPL, version 2.
   8  *
   9  * Verifies the expected behavior of allow lists and deny lists for
  10  * virtual PMU events.
  11  */
  12
  13 #define _GNU_SOURCE /* for program_invocation_short_name */
  14 #include "test_util.h"
  15 #include "kvm_util.h"
  16 #include "processor.h"
  17
  18 /*
  19  * In lieu of copying perf_event.h into tools...
  20  */
  21 #define ARCH_PERFMON_EVENTSEL_OS                        (1ULL << 17)
  22 #define ARCH_PERFMON_EVENTSEL_ENABLE                    (1ULL << 22)
  23
  24 union cpuid10_eax {
  25         struct {
  26                 unsigned int version_id:8;
  27                 unsigned int num_counters:8;
  28                 unsigned int bit_width:8;
  29                 unsigned int mask_length:8;
  30         } split;
  31         unsigned int full;
  32 };
  33
  34 union cpuid10_ebx {
  35         struct {
  36                 unsigned int no_unhalted_core_cycles:1;
  37                 unsigned int no_instructions_retired:1;
  38                 unsigned int no_unhalted_reference_cycles:1;
  39                 unsigned int no_llc_reference:1;
  40                 unsigned int no_llc_misses:1;
  41                 unsigned int no_branch_instruction_retired:1;
  42                 unsigned int no_branch_misses_retired:1;
  43         } split;
  44         unsigned int full;
  45 };
  46
  47 /* End of stuff taken from perf_event.h. */
  48
  49 /* Oddly, this isn't in perf_event.h. */
  50 #define ARCH_PERFMON_BRANCHES_RETIRED           5
  51
  52 #define VCPU_ID 0
  53 #define NUM_BRANCHES 42
  54
  55 /*
  56  * This is how the event selector and unit mask are stored in an AMD
  57  * core performance event-select register. Intel's format is similar,
  58  * but the event selector is only 8 bits.
  59  */
  60 #define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
  61                               (umask & 0xff) << 8)
  62
  63 /*
  64  * "Branch instructions retired", from the Intel SDM, volume 3,
  65  * "Pre-defined Architectural Performance Events."
  66  */
  67
  68 #define INTEL_BR_RETIRED EVENT(0xc4, 0)
  69
  70 /*
  71  * "Retired branch instructions", from Processor Programming Reference
  72  * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
  73  * Preliminary Processor Programming Reference (PPR) for AMD Family
  74  * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
  75  * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
  76  * B1 Processors Volume 1 of 2.
  77  */
  78
  79 #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
  80
  81 /*
  82  * This event list comprises Intel's eight architectural events plus
  83  * AMD's "retired branch instructions" for Zen[123] (and possibly
  84  * other AMD CPUs).
  85  */
  86 static const uint64_t event_list[] = {
  87         EVENT(0x3c, 0),
  88         EVENT(0xc0, 0),
  89         EVENT(0x3c, 1),
  90         EVENT(0x2e, 0x4f),
  91         EVENT(0x2e, 0x41),
  92         EVENT(0xc4, 0),
  93         EVENT(0xc5, 0),
  94         EVENT(0xa4, 1),
  95         AMD_ZEN_BR_RETIRED,
  96 };
  97
  98 /*
  99  * If we encounter a #GP during the guest PMU sanity check, then the guest
 100  * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
 101  */
 102 static void guest_gp_handler(struct ex_regs *regs)
 103 {
 104         GUEST_SYNC(0);
 105 }
 106
 107 /*
 108  * Check that we can write a new value to the given MSR and read it back.
 109  * The caller should provide a non-empty set of bits that are safe to flip.
 110  *
 111  * Return on success. GUEST_SYNC(0) on error.
 112  */
 113 static void check_msr(uint32_t msr, uint64_t bits_to_flip)
 114 {
 115         uint64_t v = rdmsr(msr) ^ bits_to_flip;
 116
 117         wrmsr(msr, v);
 118         if (rdmsr(msr) != v)
 119                 GUEST_SYNC(0);
 120
 121         v ^= bits_to_flip;
 122         wrmsr(msr, v);
 123         if (rdmsr(msr) != v)
 124                 GUEST_SYNC(0);
 125 }
 126
 127 static void intel_guest_code(void)
 128 {
 129         check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
 130         check_msr(MSR_P6_EVNTSEL0, 0xffff);
 131         check_msr(MSR_IA32_PMC0, 0xffff);
 132         GUEST_SYNC(1);
 133
 134         for (;;) {
 135                 uint64_t br0, br1;
 136
 137                 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 138                 wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
 139                       ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
 140                 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
 141                 br0 = rdmsr(MSR_IA32_PMC0);
 142                 __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
 143                 br1 = rdmsr(MSR_IA32_PMC0);
 144                 GUEST_SYNC(br1 - br0);
 145         }
 146 }
 147
 148 /*
 149  * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
 150  * this code uses the always-available, legacy K7 PMU MSRs, which alias to
 151  * the first four of the six extended core PMU MSRs.
 152  */
 153 static void amd_guest_code(void)
 154 {
 155         check_msr(MSR_K7_EVNTSEL0, 0xffff);
 156         check_msr(MSR_K7_PERFCTR0, 0xffff);
 157         GUEST_SYNC(1);
 158
 159         for (;;) {
 160                 uint64_t br0, br1;
 161
 162                 wrmsr(MSR_K7_EVNTSEL0, 0);
 163                 wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
 164                       ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
 165                 br0 = rdmsr(MSR_K7_PERFCTR0);
 166                 __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
 167                 br1 = rdmsr(MSR_K7_PERFCTR0);
 168                 GUEST_SYNC(br1 - br0);
 169         }
 170 }
 171
 172 /*
 173  * Run the VM to the next GUEST_SYNC(value), and return the value passed
 174  * to the sync. Any other exit from the guest is fatal.
 175  */
 176 static uint64_t run_vm_to_sync(struct kvm_vm *vm)
 177 {
 178         struct kvm_run *run = vcpu_state(vm, VCPU_ID);
 179         struct ucall uc;
 180
 181         vcpu_run(vm, VCPU_ID);
 182         TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 183                     "Exit_reason other than KVM_EXIT_IO: %u (%s)\n",
 184                     run->exit_reason,
 185                     exit_reason_str(run->exit_reason));
 186         get_ucall(vm, VCPU_ID, &uc);
 187         TEST_ASSERT(uc.cmd == UCALL_SYNC,
 188                     "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
 189         return uc.args[1];
 190 }
 191
 192 /*
 193  * In a nested environment or if the vPMU is disabled, the guest PMU
 194  * might not work as architected (accessing the PMU MSRs may raise
 195  * #GP, or writes could simply be discarded). In those situations,
 196  * there is no point in running these tests. The guest code will perform
 197  * a sanity check and then GUEST_SYNC(success). In the case of failure,
 198  * the behavior of the guest on resumption is undefined.
 199  */
 200 static bool sanity_check_pmu(struct kvm_vm *vm)
 201 {
 202         bool success;
 203
 204         vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
 205         success = run_vm_to_sync(vm);
 206         vm_install_exception_handler(vm, GP_VECTOR, NULL);
 207
 208         return success;
 209 }
 210
 211 static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
 212 {
 213         struct kvm_pmu_event_filter *f;
 214         int size = sizeof(*f) + nevents * sizeof(f->events[0]);
 215
 216         f = malloc(size);
 217         TEST_ASSERT(f, "Out of memory");
 218         memset(f, 0, size);
 219         f->nevents = nevents;
 220         return f;
 221 }
 222
 223 static struct kvm_pmu_event_filter *event_filter(uint32_t action)
 224 {
 225         struct kvm_pmu_event_filter *f;
 226         int i;
 227
 228         f = make_pmu_event_filter(ARRAY_SIZE(event_list));
 229         f->action = action;
 230         for (i = 0; i < ARRAY_SIZE(event_list); i++)
 231                 f->events[i] = event_list[i];
 232
 233         return f;
 234 }
 235
 236 /*
 237  * Remove the first occurrence of 'event' (if any) from the filter's
 238  * event list.
 239  */
 240 static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
 241                                                  uint64_t event)
 242 {
 243         bool found = false;
 244         int i;
 245
 246         for (i = 0; i < f->nevents; i++) {
 247                 if (found)
 248                         f->events[i - 1] = f->events[i];
 249                 else
 250                         found = f->events[i] == event;
 251         }
 252         if (found)
 253                 f->nevents--;
 254         return f;
 255 }
 256
 257 static void test_without_filter(struct kvm_vm *vm)
 258 {
 259         uint64_t count = run_vm_to_sync(vm);
 260
 261         if (count != NUM_BRANCHES)
 262                 pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
 263                         __func__, count, NUM_BRANCHES);
 264         TEST_ASSERT(count, "Allowed PMU event is not counting");
 265 }
 266
 267 static uint64_t test_with_filter(struct kvm_vm *vm,
 268                                  struct kvm_pmu_event_filter *f)
 269 {
 270         vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f);
 271         return run_vm_to_sync(vm);
 272 }
 273
 274 static void test_member_deny_list(struct kvm_vm *vm)
 275 {
 276         struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
 277         uint64_t count = test_with_filter(vm, f);
 278
 279         free(f);
 280         if (count)
 281                 pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
 282                         __func__, count);
 283         TEST_ASSERT(!count, "Disallowed PMU Event is counting");
 284 }
 285
 286 static void test_member_allow_list(struct kvm_vm *vm)
 287 {
 288         struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
 289         uint64_t count = test_with_filter(vm, f);
 290
 291         free(f);
 292         if (count != NUM_BRANCHES)
 293                 pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
 294                         __func__, count, NUM_BRANCHES);
 295         TEST_ASSERT(count, "Allowed PMU event is not counting");
 296 }
 297
 298 static void test_not_member_deny_list(struct kvm_vm *vm)
 299 {
 300         struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
 301         uint64_t count;
 302
 303         remove_event(f, INTEL_BR_RETIRED);
 304         remove_event(f, AMD_ZEN_BR_RETIRED);
 305         count = test_with_filter(vm, f);
 306         free(f);
 307         if (count != NUM_BRANCHES)
 308                 pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
 309                         __func__, count, NUM_BRANCHES);
 310         TEST_ASSERT(count, "Allowed PMU event is not counting");
 311 }
 312
 313 static void test_not_member_allow_list(struct kvm_vm *vm)
 314 {
 315         struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
 316         uint64_t count;
 317
 318         remove_event(f, INTEL_BR_RETIRED);
 319         remove_event(f, AMD_ZEN_BR_RETIRED);
 320         count = test_with_filter(vm, f);
 321         free(f);
 322         if (count)
 323                 pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
 324                         __func__, count);
 325         TEST_ASSERT(!count, "Disallowed PMU Event is counting");
 326 }
 327
 328 /*
 329  * Check for a non-zero PMU version, at least one general-purpose
 330  * counter per logical processor, an EBX bit vector of length greater
 331  * than 5, and EBX[5] clear.
 332  */
 333 static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry)
 334 {
 335         union cpuid10_eax eax = { .full = entry->eax };
 336         union cpuid10_ebx ebx = { .full = entry->ebx };
 337
 338         return eax.split.version_id && eax.split.num_counters > 0 &&
 339                 eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED &&
 340                 !ebx.split.no_branch_instruction_retired;
 341 }
 342
 343 /*
 344  * Note that CPUID leaf 0xa is Intel-specific. This leaf should be
 345  * clear on AMD hardware.
 346  */
 347 static bool use_intel_pmu(void)
 348 {
 349         struct kvm_cpuid_entry2 *entry;
 350
 351         entry = kvm_get_supported_cpuid_index(0xa, 0);
 352         return is_intel_cpu() && entry && check_intel_pmu_leaf(entry);
 353 }
 354
 355 static bool is_zen1(uint32_t eax)
 356 {
 357         return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f;
 358 }
 359
 360 static bool is_zen2(uint32_t eax)
 361 {
 362         return x86_family(eax) == 0x17 &&
 363                 x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f;
 364 }
 365
 366 static bool is_zen3(uint32_t eax)
 367 {
 368         return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f;
 369 }
 370
 371 /*
 372  * Determining AMD support for a PMU event requires consulting the AMD
 373  * PPR for the CPU or reference material derived therefrom. The AMD
 374  * test code herein has been verified to work on Zen1, Zen2, and Zen3.
 375  *
 376  * Feel free to add more AMD CPUs that are documented to support event
 377  * select 0xc2 umask 0 as "retired branch instructions."
 378  */
 379 static bool use_amd_pmu(void)
 380 {
 381         struct kvm_cpuid_entry2 *entry;
 382
 383         entry = kvm_get_supported_cpuid_index(1, 0);
 384         return is_amd_cpu() && entry &&
 385                 (is_zen1(entry->eax) ||
 386                  is_zen2(entry->eax) ||
 387                  is_zen3(entry->eax));
 388 }
 389
 390 int main(int argc, char *argv[])
 391 {
 392         void (*guest_code)(void) = NULL;
 393         struct kvm_vm *vm;
 394         int r;
 395
 396         /* Tell stdout not to buffer its content */
 397         setbuf(stdout, NULL);
 398
 399         r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER);
 400         if (!r) {
 401                 print_skip("KVM_CAP_PMU_EVENT_FILTER not supported");
 402                 exit(KSFT_SKIP);
 403         }
 404
 405         if (use_intel_pmu())
 406                 guest_code = intel_guest_code;
 407         else if (use_amd_pmu())
 408                 guest_code = amd_guest_code;
 409
 410         if (!guest_code) {
 411                 print_skip("Don't know how to test this guest PMU");
 412                 exit(KSFT_SKIP);
 413         }
 414
 415         vm = vm_create_default(VCPU_ID, 0, guest_code);
 416
 417         vm_init_descriptor_tables(vm);
 418         vcpu_init_descriptor_tables(vm, VCPU_ID);
 419
 420         if (!sanity_check_pmu(vm)) {
 421                 print_skip("Guest PMU is not functional");
 422                 exit(KSFT_SKIP);
 423         }
 424
 425         test_without_filter(vm);
 426         test_member_deny_list(vm);
 427         test_member_allow_list(vm);
 428         test_not_member_deny_list(vm);
 429         test_not_member_allow_list(vm);
 430
 431         kvm_vm_free(vm);
 432
 433         return 0;
 434 }