perf/x86/intel: Support the PEBS event mask
authorKan Liang <kan.liang@linux.intel.com>
Wed, 26 Jun 2024 14:35:33 +0000 (07:35 -0700)
committerPeter Zijlstra <peterz@infradead.org>
Thu, 4 Jul 2024 14:00:36 +0000 (16:00 +0200)
The current perf assumes that the counters that support PEBS are
contiguous. But it's not guaranteed with the new leaf 0x23 introduced.
The counters are enumerated with a counter mask. There may be holes in
the counter mask for future platforms or in a virtualization
environment.

Store the PEBS event mask rather than the maximum number of PEBS
counters in the x86 PMU structures.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Link: https://lkml.kernel.org/r/20240626143545.480761-2-kan.liang@linux.intel.com
arch/x86/events/intel/core.c
arch/x86/events/intel/ds.c
arch/x86/events/perf_event.h
arch/x86/include/asm/intel_ds.h

index 0e835dc..6e2e363 100644 (file)
@@ -4728,7 +4728,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
 {
        intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
                                     &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
-       pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+       pmu->pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counters - 1, 0));
        pmu->unconstrained = (struct event_constraint)
                             __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
                                                0, pmu->num_counters, 0, 0);
@@ -6070,7 +6070,7 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
 
                pmu->num_counters = x86_pmu.num_counters;
                pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
-               pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+               pmu->pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counters - 1, 0));
                pmu->unconstrained = (struct event_constraint)
                                     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
                                                        0, pmu->num_counters, 0, 0);
@@ -6193,7 +6193,7 @@ __init int intel_pmu_init(void)
        x86_pmu.events_maskl            = ebx.full;
        x86_pmu.events_mask_len         = eax.split.mask_length;
 
-       x86_pmu.max_pebs_events         = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+       x86_pmu.pebs_events_mask        = intel_pmu_pebs_mask(GENMASK_ULL(x86_pmu.num_counters - 1, 0));
        x86_pmu.pebs_capable            = PEBS_COUNTER_MASK;
 
        /*
@@ -6822,7 +6822,7 @@ __init int intel_pmu_init(void)
                        pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
                }
 
-               pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+               pmu->pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counters - 1, 0));
                pmu->unconstrained = (struct event_constraint)
                                        __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
                                                           0, pmu->num_counters, 0, 0);
index e010bfe..f6105b8 100644 (file)
@@ -1137,7 +1137,7 @@ void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sche
 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 {
        struct debug_store *ds = cpuc->ds;
-       int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
+       int max_pebs_events = intel_pmu_max_num_pebs(cpuc->pmu);
        int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
        u64 threshold;
        int reserved;
@@ -2157,6 +2157,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
        void *base, *at, *top;
        short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
        short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+       int max_pebs_events = intel_pmu_max_num_pebs(NULL);
        int bit, i, size;
        u64 mask;
 
@@ -2168,8 +2169,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
 
        ds->pebs_index = ds->pebs_buffer_base;
 
-       mask = (1ULL << x86_pmu.max_pebs_events) - 1;
-       size = x86_pmu.max_pebs_events;
+       mask = x86_pmu.pebs_events_mask;
+       size = max_pebs_events;
        if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
                mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
                size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
@@ -2208,8 +2209,9 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
                        pebs_status = p->status = cpuc->pebs_enabled;
 
                bit = find_first_bit((unsigned long *)&pebs_status,
-                                       x86_pmu.max_pebs_events);
-               if (bit >= x86_pmu.max_pebs_events)
+                                    max_pebs_events);
+
+               if (!(x86_pmu.pebs_events_mask & (1 << bit)))
                        continue;
 
                /*
@@ -2267,7 +2269,6 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
 {
        short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
        int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
        struct debug_store *ds = cpuc->ds;
        struct perf_event *event;
@@ -2283,7 +2284,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
 
        ds->pebs_index = ds->pebs_buffer_base;
 
-       mask = ((1ULL << max_pebs_events) - 1) |
+       mask = hybrid(cpuc->pmu, pebs_events_mask) |
               (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
        size = INTEL_PMC_IDX_FIXED + num_counters_fixed;
 
index 72b022a..a7ba286 100644 (file)
@@ -684,7 +684,7 @@ struct x86_hybrid_pmu {
        cpumask_t                       supported_cpus;
        union perf_capabilities         intel_cap;
        u64                             intel_ctrl;
-       int                             max_pebs_events;
+       u64                             pebs_events_mask;
        int                             num_counters;
        int                             num_counters_fixed;
        struct event_constraint         unconstrained;
@@ -852,7 +852,7 @@ struct x86_pmu {
                        pebs_ept                :1;
        int             pebs_record_size;
        int             pebs_buffer_size;
-       int             max_pebs_events;
+       u64             pebs_events_mask;
        void            (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
        struct event_constraint *pebs_constraints;
        void            (*pebs_aliases)(struct perf_event *event);
@@ -1661,6 +1661,17 @@ static inline int is_ht_workaround_enabled(void)
        return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
 }
 
+static inline u64 intel_pmu_pebs_mask(u64 cntr_mask)
+{
+       return MAX_PEBS_EVENTS_MASK & cntr_mask;
+}
+
+static inline int intel_pmu_max_num_pebs(struct pmu *pmu)
+{
+       static_assert(MAX_PEBS_EVENTS == 32);
+       return fls((u32)hybrid(pmu, pebs_events_mask));
+}
+
 #else /* CONFIG_CPU_SUP_INTEL */
 
 static inline void reserve_ds_buffers(void)
index 2f9eeb5..5dbeac4 100644 (file)
@@ -9,6 +9,7 @@
 /* The maximal number of PEBS events: */
 #define MAX_PEBS_EVENTS_FMT4   8
 #define MAX_PEBS_EVENTS                32
+#define MAX_PEBS_EVENTS_MASK   GENMASK_ULL(MAX_PEBS_EVENTS - 1, 0)
 #define MAX_FIXED_PEBS_EVENTS  16
 
 /*