perf/x86: Move cpuc->running into P4 specific code
authorKan Liang <kan.liang@linux.intel.com>
Wed, 14 Apr 2021 14:36:29 +0000 (07:36 -0700)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 16 Apr 2021 14:32:42 +0000 (16:32 +0200)
The 'running' variable is only used in the P4 PMU. Current perf sets the
variable in the critical function x86_pmu_start(), which wastes cycles
for everybody not running on P4.

Move cpuc->running into the P4 specific p4_pmu_enable_event().

Add a static per-CPU 'p4_running' variable to replace the 'running'
variable in the struct cpu_hw_events. Saves space for the generic
structure.

The p4_pmu_enable_all() also invokes the p4_pmu_enable_event(), but it
should not set cpuc->running. Factor out __p4_pmu_enable_event() for
p4_pmu_enable_all().

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/1618410990-21383-1-git-send-email-kan.liang@linux.intel.com
arch/x86/events/core.c
arch/x86/events/intel/p4.c
arch/x86/events/perf_event.h

index 18df171..dd9f3c2 100644 (file)
@@ -1480,7 +1480,6 @@ static void x86_pmu_start(struct perf_event *event, int flags)
 
        cpuc->events[idx] = event;
        __set_bit(idx, cpuc->active_mask);
-       __set_bit(idx, cpuc->running);
        static_call(x86_pmu_enable)(event);
        perf_event_update_userpage(event);
 }
index a4cc660..9c10cbb 100644 (file)
@@ -947,7 +947,7 @@ static void p4_pmu_enable_pebs(u64 config)
        (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT,      (u64)bind->metric_vert);
 }
 
-static void p4_pmu_enable_event(struct perf_event *event)
+static void __p4_pmu_enable_event(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
        int thread = p4_ht_config_thread(hwc->config);
@@ -983,6 +983,16 @@ static void p4_pmu_enable_event(struct perf_event *event)
                                (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
 }
 
+static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(X86_PMC_IDX_MAX)], p4_running);
+
+static void p4_pmu_enable_event(struct perf_event *event)
+{
+       int idx = event->hw.idx;
+
+       __set_bit(idx, per_cpu(p4_running, smp_processor_id()));
+       __p4_pmu_enable_event(event);
+}
+
 static void p4_pmu_enable_all(int added)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -992,7 +1002,7 @@ static void p4_pmu_enable_all(int added)
                struct perf_event *event = cpuc->events[idx];
                if (!test_bit(idx, cpuc->active_mask))
                        continue;
-               p4_pmu_enable_event(event);
+               __p4_pmu_enable_event(event);
        }
 }
 
@@ -1012,7 +1022,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 
                if (!test_bit(idx, cpuc->active_mask)) {
                        /* catch in-flight IRQs */
-                       if (__test_and_clear_bit(idx, cpuc->running))
+                       if (__test_and_clear_bit(idx, per_cpu(p4_running, smp_processor_id())))
                                handled++;
                        continue;
                }
index 53b2b5f..54a340e 100644 (file)
@@ -228,7 +228,6 @@ struct cpu_hw_events {
         */
        struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
        unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       unsigned long           running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
        int                     enabled;
 
        int                     n_events; /* the # of events in the below arrays */