Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[linux-2.6-microblaze.git] / arch / x86 / kvm / vmx / pmu_intel.c
index 315c7c2..12ade34 100644 (file)
 #include "nested.h"
 #include "pmu.h"
 
-#define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
-
-enum intel_pmu_architectural_events {
-       /*
-        * The order of the architectural events matters as support for each
-        * event is enumerated via CPUID using the index of the event.
-        */
-       INTEL_ARCH_CPU_CYCLES,
-       INTEL_ARCH_INSTRUCTIONS_RETIRED,
-       INTEL_ARCH_REFERENCE_CYCLES,
-       INTEL_ARCH_LLC_REFERENCES,
-       INTEL_ARCH_LLC_MISSES,
-       INTEL_ARCH_BRANCHES_RETIRED,
-       INTEL_ARCH_BRANCHES_MISPREDICTED,
-
-       NR_REAL_INTEL_ARCH_EVENTS,
-
-       /*
-        * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a.
-        * TSC reference cycles.  The architectural reference cycles event may
-        * or may not actually use the TSC as the reference, e.g. might use the
-        * core crystal clock or the bus clock (yeah, "architectural").
-        */
-       PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS,
-       NR_INTEL_ARCH_EVENTS,
-};
+/*
+ * Perf's "BASE" is wildly misleading, architectural PMUs use bits 31:16 of ECX
+ * to encode the "type" of counter to read, i.e. this is not a "base".  And to
+ * further confuse things, non-architectural PMUs use bit 31 as a flag for
+ * "fast" reads, whereas the "type" is an explicit value.
+ */
+#define INTEL_RDPMC_GP         0
+#define INTEL_RDPMC_FIXED      INTEL_PMC_FIXED_RDPMC_BASE
 
-static struct {
-       u8 eventsel;
-       u8 unit_mask;
-} const intel_arch_events[] = {
-       [INTEL_ARCH_CPU_CYCLES]                 = { 0x3c, 0x00 },
-       [INTEL_ARCH_INSTRUCTIONS_RETIRED]       = { 0xc0, 0x00 },
-       [INTEL_ARCH_REFERENCE_CYCLES]           = { 0x3c, 0x01 },
-       [INTEL_ARCH_LLC_REFERENCES]             = { 0x2e, 0x4f },
-       [INTEL_ARCH_LLC_MISSES]                 = { 0x2e, 0x41 },
-       [INTEL_ARCH_BRANCHES_RETIRED]           = { 0xc4, 0x00 },
-       [INTEL_ARCH_BRANCHES_MISPREDICTED]      = { 0xc5, 0x00 },
-       [PSEUDO_ARCH_REFERENCE_CYCLES]          = { 0x00, 0x03 },
-};
+#define INTEL_RDPMC_TYPE_MASK  GENMASK(31, 16)
+#define INTEL_RDPMC_INDEX_MASK GENMASK(15, 0)
 
-/* mapping between fixed pmc index and intel_arch_events array */
-static int fixed_pmc_events[] = {
-       [0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
-       [1] = INTEL_ARCH_CPU_CYCLES,
-       [2] = PSEUDO_ARCH_REFERENCE_CYCLES,
-};
+#define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
 
 static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 {
@@ -84,77 +50,61 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 
                pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
 
-               __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
+               __set_bit(KVM_FIXED_PMC_BASE_IDX + i, pmu->pmc_in_use);
                kvm_pmu_request_counter_reprogram(pmc);
        }
 }
 
-static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
-{
-       if (pmc_idx < INTEL_PMC_IDX_FIXED) {
-               return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx,
-                                 MSR_P6_EVNTSEL0);
-       } else {
-               u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED;
-
-               return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0);
-       }
-}
-
-static bool intel_hw_event_available(struct kvm_pmc *pmc)
-{
-       struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-       u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
-       u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
-       int i;
-
-       BUILD_BUG_ON(ARRAY_SIZE(intel_arch_events) != NR_INTEL_ARCH_EVENTS);
-
-       /*
-        * Disallow events reported as unavailable in guest CPUID.  Note, this
-        * doesn't apply to pseudo-architectural events.
-        */
-       for (i = 0; i < NR_REAL_INTEL_ARCH_EVENTS; i++) {
-               if (intel_arch_events[i].eventsel != event_select ||
-                   intel_arch_events[i].unit_mask != unit_mask)
-                       continue;
-
-               return pmu->available_event_types & BIT(i);
-       }
-
-       return true;
-}
-
-static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
-{
-       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-       bool fixed = idx & (1u << 30);
-
-       idx &= ~(3u << 30);
-
-       return fixed ? idx < pmu->nr_arch_fixed_counters
-                    : idx < pmu->nr_arch_gp_counters;
-}
-
 static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
                                            unsigned int idx, u64 *mask)
 {
+       unsigned int type = idx & INTEL_RDPMC_TYPE_MASK;
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-       bool fixed = idx & (1u << 30);
        struct kvm_pmc *counters;
        unsigned int num_counters;
+       u64 bitmask;
 
-       idx &= ~(3u << 30);
-       if (fixed) {
+       /*
+        * The encoding of ECX for RDPMC is different for architectural versus
+        * non-architecturals PMUs (PMUs with version '0').  For architectural
+        * PMUs, bits 31:16 specify the PMC type and bits 15:0 specify the PMC
+        * index.  For non-architectural PMUs, bit 31 is a "fast" flag, and
+        * bits 30:0 specify the PMC index.
+        *
+        * Yell and reject attempts to read PMCs for a non-architectural PMU,
+        * as KVM doesn't support such PMUs.
+        */
+       if (WARN_ON_ONCE(!pmu->version))
+               return NULL;
+
+       /*
+        * General Purpose (GP) PMCs are supported on all PMUs, and fixed PMCs
+        * are supported on all architectural PMUs, i.e. on all virtual PMUs
+        * supported by KVM.  Note, KVM only emulates fixed PMCs for PMU v2+,
+        * but the type itself is still valid, i.e. let RDPMC fail due to
+        * accessing a non-existent counter.  Reject attempts to read all other
+        * types, which are unknown/unsupported.
+        */
+       switch (type) {
+       case INTEL_RDPMC_FIXED:
                counters = pmu->fixed_counters;
                num_counters = pmu->nr_arch_fixed_counters;
-       } else {
+               bitmask = pmu->counter_bitmask[KVM_PMC_FIXED];
+               break;
+       case INTEL_RDPMC_GP:
                counters = pmu->gp_counters;
                num_counters = pmu->nr_arch_gp_counters;
+               bitmask = pmu->counter_bitmask[KVM_PMC_GP];
+               break;
+       default:
+               return NULL;
        }
+
+       idx &= INTEL_RDPMC_INDEX_MASK;
        if (idx >= num_counters)
                return NULL;
-       *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
+
+       *mask &= bitmask;
        return &counters[array_index_nospec(idx, num_counters)];
 }
 
@@ -464,20 +414,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        return 0;
 }
 
-static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
+/*
+ * Map fixed counter events to architectural general purpose event encodings.
+ * Perf doesn't provide APIs to allow KVM to directly program a fixed counter,
+ * and so KVM instead programs the architectural event to effectively request
+ * the fixed counter.  Perf isn't guaranteed to use a fixed counter and may
+ * instead program the encoding into a general purpose counter, e.g. if a
+ * different perf_event is already utilizing the requested counter, but the end
+ * result is the same (ignoring the fact that using a general purpose counter
+ * will likely exacerbate counter contention).
+ *
+ * Forcibly inlined to allow asserting on @index at build time, and there should
+ * never be more than one user.
+ */
+static __always_inline u64 intel_get_fixed_pmc_eventsel(unsigned int index)
 {
-       int i;
-
-       BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
+       const enum perf_hw_id fixed_pmc_perf_ids[] = {
+               [0] = PERF_COUNT_HW_INSTRUCTIONS,
+               [1] = PERF_COUNT_HW_CPU_CYCLES,
+               [2] = PERF_COUNT_HW_REF_CPU_CYCLES,
+       };
+       u64 eventsel;
 
-       for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
-               int index = array_index_nospec(i, KVM_PMC_MAX_FIXED);
-               struct kvm_pmc *pmc = &pmu->fixed_counters[index];
-               u32 event = fixed_pmc_events[index];
+       BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_PMC_MAX_FIXED);
+       BUILD_BUG_ON(index >= KVM_PMC_MAX_FIXED);
 
-               pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
-                                intel_arch_events[event].eventsel;
-       }
+       /*
+        * Yell if perf reports support for a fixed counter but perf doesn't
+        * have a known encoding for the associated general purpose event.
+        */
+       eventsel = perf_get_hw_event_config(fixed_pmc_perf_ids[index]);
+       WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed);
+       return eventsel;
 }
 
 static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
@@ -491,19 +459,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
        u64 counter_mask;
        int i;
 
-       pmu->nr_arch_gp_counters = 0;
-       pmu->nr_arch_fixed_counters = 0;
-       pmu->counter_bitmask[KVM_PMC_GP] = 0;
-       pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
-       pmu->version = 0;
-       pmu->reserved_bits = 0xffffffff00200000ull;
-       pmu->raw_event_mask = X86_RAW_EVENT_MASK;
-       pmu->global_ctrl_mask = ~0ull;
-       pmu->global_status_mask = ~0ull;
-       pmu->fixed_ctr_ctrl_mask = ~0ull;
-       pmu->pebs_enable_mask = ~0ull;
-       pmu->pebs_data_cfg_mask = ~0ull;
-
        memset(&lbr_desc->records, 0, sizeof(lbr_desc->records));
 
        /*
@@ -515,8 +470,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
                return;
 
        entry = kvm_find_cpuid_entry(vcpu, 0xa);
-       if (!entry || !vcpu->kvm->arch.enable_pmu)
+       if (!entry)
                return;
+
        eax.full = entry->eax;
        edx.full = entry->edx;
 
@@ -543,13 +499,12 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
                                                  kvm_pmu_cap.bit_width_fixed);
                pmu->counter_bitmask[KVM_PMC_FIXED] =
                        ((u64)1 << edx.split.bit_width_fixed) - 1;
-               setup_fixed_pmc_eventsel(pmu);
        }
 
        for (i = 0; i < pmu->nr_arch_fixed_counters; i++)
                pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4));
        counter_mask = ~(((1ull << pmu->nr_arch_gp_counters) - 1) |
-               (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED));
+               (((1ull << pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX));
        pmu->global_ctrl_mask = counter_mask;
 
        /*
@@ -593,7 +548,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
                        pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE;
                        for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
                                pmu->fixed_ctr_ctrl_mask &=
-                                       ~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4));
+                                       ~(1ULL << (KVM_FIXED_PMC_BASE_IDX + i * 4));
                        }
                        pmu->pebs_data_cfg_mask = ~0xff00000full;
                } else {
@@ -619,8 +574,9 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
        for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
                pmu->fixed_counters[i].type = KVM_PMC_FIXED;
                pmu->fixed_counters[i].vcpu = vcpu;
-               pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
+               pmu->fixed_counters[i].idx = i + KVM_FIXED_PMC_BASE_IDX;
                pmu->fixed_counters[i].current_config = 0;
+               pmu->fixed_counters[i].eventsel = intel_get_fixed_pmc_eventsel(i);
        }
 
        lbr_desc->records.nr = 0;
@@ -748,11 +704,8 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
        struct kvm_pmc *pmc = NULL;
        int bit, hw_idx;
 
-       for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl,
-                        X86_PMC_IDX_MAX) {
-               pmc = intel_pmc_idx_to_pmc(pmu, bit);
-
-               if (!pmc || !pmc_speculative_in_use(pmc) ||
+       kvm_for_each_pmc(pmu, pmc, bit, (unsigned long *)&pmu->global_ctrl) {
+               if (!pmc_speculative_in_use(pmc) ||
                    !pmc_is_globally_enabled(pmc) || !pmc->perf_event)
                        continue;
 
@@ -767,11 +720,8 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
 }
 
 struct kvm_pmu_ops intel_pmu_ops __initdata = {
-       .hw_event_available = intel_hw_event_available,
-       .pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
        .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
        .msr_idx_to_pmc = intel_msr_idx_to_pmc,
-       .is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,
        .is_valid_msr = intel_is_valid_msr,
        .get_msr = intel_pmu_get_msr,
        .set_msr = intel_pmu_set_msr,