perf/x86/amd: Add support for Large Increment per Cycle Events

author Kim Phillips <kim.phillips@amd.com>

Thu, 14 Nov 2019 18:37:20 +0000 (12:37 -0600)

committer Peter Zijlstra <peterz@infradead.org>

Fri, 17 Jan 2020 09:19:26 +0000 (10:19 +0100)
author Kim Phillips <kim.phillips@amd.com>
Thu, 14 Nov 2019 18:37:20 +0000 (12:37 -0600)
committer Peter Zijlstra <peterz@infradead.org>
Fri, 17 Jan 2020 09:19:26 +0000 (10:19 +0100)
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c

index 571168f..1f22b6b 100644 (file)
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -14,6 +14,10 @@
  static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
  static unsigned long perf_nmi_window;
  
+/* AMD Event 0xFFF: Merge.  Used with Large Increment per Cycle events */
+#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
+#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
+
  static __initconst const u64 amd_hw_cache_event_ids
                                 [PERF_COUNT_HW_CACHE_MAX]
                                 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -335,6 +339,9 @@ static int amd_core_hw_config(struct perf_event *event)
         else if (event->attr.exclude_guest)
                 event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
  
+       if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
+               event->hw.flags |= PERF_X86_EVENT_PAIR;
+
         return 0;
  }
  
@@ -880,6 +887,15 @@ amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx,
         return &unconstrained;
  }
  
+static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
+                                          struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (is_counter_pair(hwc))
+               --cpuc->n_pair;
+}
+
  static ssize_t amd_event_sysfs_show(char *page, u64 config)
  {
         u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
@@ -967,6 +983,8 @@ static int __init amd_core_pmu_init(void)
                                     PERF_X86_EVENT_PAIR);
  
                 x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
+               x86_pmu.put_event_constraints = amd_put_event_constraints_f17h;
+               x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE;
                 x86_pmu.flags |= PMU_FL_PAIR;
         }
  
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c

index f118af9..3bb738f 100644 (file)
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -618,6 +618,7 @@ void x86_pmu_disable_all(void)
         int idx;
  
         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
                 u64 val;
  
                 if (!test_bit(idx, cpuc->active_mask))
@@ -627,6 +628,8 @@ void x86_pmu_disable_all(void)
                         continue;
                 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
                 wrmsrl(x86_pmu_config_addr(idx), val);
+               if (is_counter_pair(hwc))
+                       wrmsrl(x86_pmu_config_addr(idx + 1), 0);
         }
  }
  
@@ -699,7 +702,7 @@ struct sched_state {
         int     counter;        /* counter index */
         int     unassigned;     /* number of events to be assigned left */
         int     nr_gp;          /* number of GP counters used */
-       unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+       u64     used;
  };
  
  /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
@@ -756,8 +759,12 @@ static bool perf_sched_restore_state(struct perf_sched *sched)
         sched->saved_states--;
         sched->state = sched->saved[sched->saved_states];
  
-       /* continue with next counter: */
-       clear_bit(sched->state.counter++, sched->state.used);
+       /* this assignment didn't work out */
+       /* XXX broken vs EVENT_PAIR */
+       sched->state.used &= ~BIT_ULL(sched->state.counter);
+
+       /* try the next one */
+       sched->state.counter++;
  
         return true;
  }
@@ -782,20 +789,32 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
         if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
                 idx = INTEL_PMC_IDX_FIXED;
                 for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
-                       if (!__test_and_set_bit(idx, sched->state.used))
-                               goto done;
+                       u64 mask = BIT_ULL(idx);
+
+                       if (sched->state.used & mask)
+                               continue;
+
+                       sched->state.used |= mask;
+                       goto done;
                 }
         }
  
         /* Grab the first unused counter starting with idx */
         idx = sched->state.counter;
         for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
-               if (!__test_and_set_bit(idx, sched->state.used)) {
-                       if (sched->state.nr_gp++ >= sched->max_gp)
-                               return false;
+               u64 mask = BIT_ULL(idx);
  
-                       goto done;
-               }
+               if (c->flags & PERF_X86_EVENT_PAIR)
+                       mask |= mask << 1;
+
+               if (sched->state.used & mask)
+                       continue;
+
+               if (sched->state.nr_gp++ >= sched->max_gp)
+                       return false;
+
+               sched->state.used |= mask;
+               goto done;
         }
  
         return false;
@@ -872,12 +891,10 @@ EXPORT_SYMBOL_GPL(perf_assign_events);
  int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
  {
         struct event_constraint *c;
-       unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
         struct perf_event *e;
         int n0, i, wmin, wmax, unsched = 0;
         struct hw_perf_event *hwc;
-
-       bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+       u64 used_mask = 0;
  
         /*
          * Compute the number of events already present; see x86_pmu_add(),
@@ -920,6 +937,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
          * fastpath, try to reuse previous register
          */
         for (i = 0; i < n; i++) {
+               u64 mask;
+
                 hwc = &cpuc->event_list[i]->hw;
                 c = cpuc->event_constraint[i];
  
@@ -931,11 +950,16 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
                 if (!test_bit(hwc->idx, c->idxmsk))
                         break;
  
+               mask = BIT_ULL(hwc->idx);
+               if (is_counter_pair(hwc))
+                       mask |= mask << 1;
+
                 /* not already used */
-               if (test_bit(hwc->idx, used_mask))
+               if (used_mask & mask)
                         break;
  
-               __set_bit(hwc->idx, used_mask);
+               used_mask |= mask;
+
                 if (assign)
                         assign[i] = hwc->idx;
         }
@@ -958,6 +982,15 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
                     READ_ONCE(cpuc->excl_cntrs->exclusive_present))
                         gpmax /= 2;
  
+               /*
+                * Reduce the amount of available counters to allow fitting
+                * the extra Merge events needed by large increment events.
+                */
+               if (x86_pmu.flags & PMU_FL_PAIR) {
+                       gpmax = x86_pmu.num_counters - cpuc->n_pair;
+                       WARN_ON(gpmax <= 0);
+               }
+
                 unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
                                              wmax, gpmax, assign);
         }
@@ -1038,6 +1071,8 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
                         return -EINVAL;
                 cpuc->event_list[n] = leader;
                 n++;
+               if (is_counter_pair(&leader->hw))
+                       cpuc->n_pair++;
         }
         if (!dogrp)
                 return n;
@@ -1052,6 +1087,8 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
  
                 cpuc->event_list[n] = event;
                 n++;
+               if (is_counter_pair(&event->hw))
+                       cpuc->n_pair++;
         }
         return n;
  }
@@ -1237,6 +1274,13 @@ int x86_perf_event_set_period(struct perf_event *event)
  
         wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
  
+       /*
+        * Clear the Merge event counter's upper 16 bits since
+        * we currently declare a 48-bit counter width
+        */
+       if (is_counter_pair(hwc))
+               wrmsrl(x86_pmu_event_addr(idx + 1), 0);
+
         /*
          * Due to erratum on certan cpu we need
          * a second write to be sure the register
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h

index e2fd363..f1cd1ca 100644 (file)
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -273,6 +273,7 @@ struct cpu_hw_events {
         struct amd_nb                   *amd_nb;
         /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
         u64                             perf_ctr_virt_mask;
+       int                             n_pair; /* Large increment events */
  
         void                            *kfree_on_online[X86_PERF_KFREE_MAX];
  };
@@ -695,6 +696,7 @@ struct x86_pmu {
          * AMD bits
          */
         unsigned int    amd_nb_constraints : 1;
+       u64             perf_ctr_pair_en;
  
         /*
          * Extra registers for events
@@ -840,6 +842,11 @@ int x86_pmu_hw_config(struct perf_event *event);
  
  void x86_pmu_disable_all(void);
  
+static inline bool is_counter_pair(struct hw_perf_event *hwc)
+{
+       return hwc->flags & PERF_X86_EVENT_PAIR;
+}
+
  static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
                                           u64 enable_mask)
  {
@@ -847,6 +854,14 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
  
         if (hwc->extra_reg.reg)
                 wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
+
+       /*
+        * Add enabled Merge event on next counter
+        * if large increment event being enabled on this counter
+        */
+       if (is_counter_pair(hwc))
+               wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en);
+
         wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
  }
  
@@ -863,6 +878,9 @@ static inline void x86_pmu_disable_event(struct perf_event *event)
         struct hw_perf_event *hwc = &event->hw;
  
         wrmsrl(hwc->config_base, hwc->config);
+
+       if (is_counter_pair(hwc))
+               wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);
  }
  
  void x86_pmu_enable_event(struct perf_event *event);
author	Kim Phillips <kim.phillips@amd.com>
	Thu, 14 Nov 2019 18:37:20 +0000 (12:37 -0600)
committer	Peter Zijlstra <peterz@infradead.org>
	Fri, 17 Jan 2020 09:19:26 +0000 (10:19 +0100)
arch/x86/events/amd/core.c		patch \| blob \| history
arch/x86/events/core.c		patch \| blob \| history
arch/x86/events/perf_event.h		patch \| blob \| history