Merge tag 'x86-asm-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
[linux-2.6-microblaze.git] / drivers / perf / arm_pmuv3.c
index 6ca7be0..23fa6c5 100644 (file)
@@ -15,6 +15,7 @@
 #include <clocksource/arm_arch_timer.h>
 
 #include <linux/acpi.h>
+#include <linux/bitfield.h>
 #include <linux/clocksource.h>
 #include <linux/of.h>
 #include <linux/perf/arm_pmu.h>
@@ -169,7 +170,11 @@ armv8pmu_events_sysfs_show(struct device *dev,
        PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config)
 
 static struct attribute *armv8_pmuv3_event_attrs[] = {
-       ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
+       /*
+        * Don't expose the sw_incr event in /sys. It's not usable as writes to
+        * PMSWINC_EL0 will trap as PMUSERENR.{SW,EN}=={0,0} and event rotation
+        * means we don't have a fixed event<->counter relationship regardless.
+        */
        ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
        ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
        ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
@@ -294,26 +299,66 @@ static const struct attribute_group armv8_pmuv3_events_attr_group = {
        .is_visible = armv8pmu_event_attr_is_visible,
 };
 
-PMU_FORMAT_ATTR(event, "config:0-15");
-PMU_FORMAT_ATTR(long, "config1:0");
-PMU_FORMAT_ATTR(rdpmc, "config1:1");
+/* User ABI */
+#define ATTR_CFG_FLD_event_CFG         config
+#define ATTR_CFG_FLD_event_LO          0
+#define ATTR_CFG_FLD_event_HI          15
+#define ATTR_CFG_FLD_long_CFG          config1
+#define ATTR_CFG_FLD_long_LO           0
+#define ATTR_CFG_FLD_long_HI           0
+#define ATTR_CFG_FLD_rdpmc_CFG         config1
+#define ATTR_CFG_FLD_rdpmc_LO          1
+#define ATTR_CFG_FLD_rdpmc_HI          1
+#define ATTR_CFG_FLD_threshold_count_CFG       config1 /* PMEVTYPER.TC[0] */
+#define ATTR_CFG_FLD_threshold_count_LO                2
+#define ATTR_CFG_FLD_threshold_count_HI                2
+#define ATTR_CFG_FLD_threshold_compare_CFG     config1 /* PMEVTYPER.TC[2:1] */
+#define ATTR_CFG_FLD_threshold_compare_LO      3
+#define ATTR_CFG_FLD_threshold_compare_HI      4
+#define ATTR_CFG_FLD_threshold_CFG             config1 /* PMEVTYPER.TH */
+#define ATTR_CFG_FLD_threshold_LO              5
+#define ATTR_CFG_FLD_threshold_HI              16
+
+GEN_PMU_FORMAT_ATTR(event);
+GEN_PMU_FORMAT_ATTR(long);
+GEN_PMU_FORMAT_ATTR(rdpmc);
+GEN_PMU_FORMAT_ATTR(threshold_count);
+GEN_PMU_FORMAT_ATTR(threshold_compare);
+GEN_PMU_FORMAT_ATTR(threshold);
 
 static int sysctl_perf_user_access __read_mostly;
 
-static inline bool armv8pmu_event_is_64bit(struct perf_event *event)
+static bool armv8pmu_event_is_64bit(struct perf_event *event)
+{
+       return ATTR_CFG_GET_FLD(&event->attr, long);
+}
+
+static bool armv8pmu_event_want_user_access(struct perf_event *event)
 {
-       return event->attr.config1 & 0x1;
+       return ATTR_CFG_GET_FLD(&event->attr, rdpmc);
 }
 
-static inline bool armv8pmu_event_want_user_access(struct perf_event *event)
+static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr)
 {
-       return event->attr.config1 & 0x2;
+       u8 th_compare = ATTR_CFG_GET_FLD(attr, threshold_compare);
+       u8 th_count = ATTR_CFG_GET_FLD(attr, threshold_count);
+
+       /*
+        * The count bit is always the bottom bit of the full control field, and
+        * the comparison is the upper two bits, but it's not explicitly
+        * labelled in the Arm ARM. For the Perf interface we split it into two
+        * fields, so reconstruct it here.
+        */
+       return (th_compare << 1) | th_count;
 }
 
 static struct attribute *armv8_pmuv3_format_attrs[] = {
        &format_attr_event.attr,
        &format_attr_long.attr,
        &format_attr_rdpmc.attr,
+       &format_attr_threshold.attr,
+       &format_attr_threshold_compare.attr,
+       &format_attr_threshold_count.attr,
        NULL,
 };
 
@@ -327,7 +372,7 @@ static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
 {
        struct pmu *pmu = dev_get_drvdata(dev);
        struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
-       u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK;
+       u32 slots = FIELD_GET(ARMV8_PMU_SLOTS, cpu_pmu->reg_pmmir);
 
        return sysfs_emit(page, "0x%08x\n", slots);
 }
@@ -339,8 +384,7 @@ static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr,
 {
        struct pmu *pmu = dev_get_drvdata(dev);
        struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
-       u32 bus_slots = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_SLOTS_SHIFT)
-                       & ARMV8_PMU_BUS_SLOTS_MASK;
+       u32 bus_slots = FIELD_GET(ARMV8_PMU_BUS_SLOTS, cpu_pmu->reg_pmmir);
 
        return sysfs_emit(page, "0x%08x\n", bus_slots);
 }
@@ -352,8 +396,7 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
 {
        struct pmu *pmu = dev_get_drvdata(dev);
        struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
-       u32 bus_width = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_WIDTH_SHIFT)
-                       & ARMV8_PMU_BUS_WIDTH_MASK;
+       u32 bus_width = FIELD_GET(ARMV8_PMU_BUS_WIDTH, cpu_pmu->reg_pmmir);
        u32 val = 0;
 
        /* Encoded as Log2(number of bytes), plus one */
@@ -365,10 +408,38 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR_RO(bus_width);
 
+static u32 threshold_max(struct arm_pmu *cpu_pmu)
+{
+       /*
+        * PMMIR.THWIDTH is readable and non-zero on aarch32, but it would be
+        * impossible to write the threshold in the upper 32 bits of PMEVTYPER.
+        */
+       if (IS_ENABLED(CONFIG_ARM))
+               return 0;
+
+       /*
+        * The largest value that can be written to PMEVTYPER<n>_EL0.TH is
+        * (2 ^ PMMIR.THWIDTH) - 1.
+        */
+       return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1;
+}
+
+static ssize_t threshold_max_show(struct device *dev,
+                                 struct device_attribute *attr, char *page)
+{
+       struct pmu *pmu = dev_get_drvdata(dev);
+       struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+
+       return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
+}
+
+static DEVICE_ATTR_RO(threshold_max);
+
 static struct attribute *armv8_pmuv3_caps_attrs[] = {
        &dev_attr_slots.attr,
        &dev_attr_bus_slots.attr,
        &dev_attr_bus_width.attr,
+       &dev_attr_threshold_max.attr,
        NULL,
 };
 
@@ -397,7 +468,7 @@ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu)
        return (IS_ENABLED(CONFIG_ARM64) && is_pmuv3p5(cpu_pmu->pmuver));
 }
 
-static inline bool armv8pmu_event_has_user_read(struct perf_event *event)
+static bool armv8pmu_event_has_user_read(struct perf_event *event)
 {
        return event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT;
 }
@@ -407,7 +478,7 @@ static inline bool armv8pmu_event_has_user_read(struct perf_event *event)
  * except when we have allocated the 64bit cycle counter (for CPU
  * cycles event) or when user space counter access is enabled.
  */
-static inline bool armv8pmu_event_is_chained(struct perf_event *event)
+static bool armv8pmu_event_is_chained(struct perf_event *event)
 {
        int idx = event->hw.idx;
        struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
@@ -428,36 +499,36 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event)
 #define        ARMV8_IDX_TO_COUNTER(x) \
        (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK)
 
-static inline u64 armv8pmu_pmcr_read(void)
+static u64 armv8pmu_pmcr_read(void)
 {
        return read_pmcr();
 }
 
-static inline void armv8pmu_pmcr_write(u64 val)
+static void armv8pmu_pmcr_write(u64 val)
 {
        val &= ARMV8_PMU_PMCR_MASK;
        isb();
        write_pmcr(val);
 }
 
-static inline int armv8pmu_has_overflowed(u32 pmovsr)
+static int armv8pmu_has_overflowed(u32 pmovsr)
 {
        return pmovsr & ARMV8_PMU_OVERFLOWED_MASK;
 }
 
-static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
+static int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
 {
        return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
 }
 
-static inline u64 armv8pmu_read_evcntr(int idx)
+static u64 armv8pmu_read_evcntr(int idx)
 {
        u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 
        return read_pmevcntrn(counter);
 }
 
-static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
+static u64 armv8pmu_read_hw_counter(struct perf_event *event)
 {
        int idx = event->hw.idx;
        u64 val = armv8pmu_read_evcntr(idx);
@@ -519,14 +590,14 @@ static u64 armv8pmu_read_counter(struct perf_event *event)
        return  armv8pmu_unbias_long_counter(event, value);
 }
 
-static inline void armv8pmu_write_evcntr(int idx, u64 value)
+static void armv8pmu_write_evcntr(int idx, u64 value)
 {
        u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 
        write_pmevcntrn(counter, value);
 }
 
-static inline void armv8pmu_write_hw_counter(struct perf_event *event,
+static void armv8pmu_write_hw_counter(struct perf_event *event,
                                             u64 value)
 {
        int idx = event->hw.idx;
@@ -552,15 +623,22 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value)
                armv8pmu_write_hw_counter(event, value);
 }
 
-static inline void armv8pmu_write_evtype(int idx, u32 val)
+static void armv8pmu_write_evtype(int idx, unsigned long val)
 {
        u32 counter = ARMV8_IDX_TO_COUNTER(idx);
+       unsigned long mask = ARMV8_PMU_EVTYPE_EVENT |
+                            ARMV8_PMU_INCLUDE_EL2 |
+                            ARMV8_PMU_EXCLUDE_EL0 |
+                            ARMV8_PMU_EXCLUDE_EL1;
 
-       val &= ARMV8_PMU_EVTYPE_MASK;
+       if (IS_ENABLED(CONFIG_ARM64))
+               mask |= ARMV8_PMU_EVTYPE_TC | ARMV8_PMU_EVTYPE_TH;
+
+       val &= mask;
        write_pmevtypern(counter, val);
 }
 
-static inline void armv8pmu_write_event_type(struct perf_event *event)
+static void armv8pmu_write_event_type(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
        int idx = hwc->idx;
@@ -594,7 +672,7 @@ static u32 armv8pmu_event_cnten_mask(struct perf_event *event)
        return mask;
 }
 
-static inline void armv8pmu_enable_counter(u32 mask)
+static void armv8pmu_enable_counter(u32 mask)
 {
        /*
         * Make sure event configuration register writes are visible before we
@@ -604,7 +682,7 @@ static inline void armv8pmu_enable_counter(u32 mask)
        write_pmcntenset(mask);
 }
 
-static inline void armv8pmu_enable_event_counter(struct perf_event *event)
+static void armv8pmu_enable_event_counter(struct perf_event *event)
 {
        struct perf_event_attr *attr = &event->attr;
        u32 mask = armv8pmu_event_cnten_mask(event);
@@ -616,7 +694,7 @@ static inline void armv8pmu_enable_event_counter(struct perf_event *event)
                armv8pmu_enable_counter(mask);
 }
 
-static inline void armv8pmu_disable_counter(u32 mask)
+static void armv8pmu_disable_counter(u32 mask)
 {
        write_pmcntenclr(mask);
        /*
@@ -626,7 +704,7 @@ static inline void armv8pmu_disable_counter(u32 mask)
        isb();
 }
 
-static inline void armv8pmu_disable_event_counter(struct perf_event *event)
+static void armv8pmu_disable_event_counter(struct perf_event *event)
 {
        struct perf_event_attr *attr = &event->attr;
        u32 mask = armv8pmu_event_cnten_mask(event);
@@ -638,18 +716,18 @@ static inline void armv8pmu_disable_event_counter(struct perf_event *event)
                armv8pmu_disable_counter(mask);
 }
 
-static inline void armv8pmu_enable_intens(u32 mask)
+static void armv8pmu_enable_intens(u32 mask)
 {
        write_pmintenset(mask);
 }
 
-static inline void armv8pmu_enable_event_irq(struct perf_event *event)
+static void armv8pmu_enable_event_irq(struct perf_event *event)
 {
        u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
        armv8pmu_enable_intens(BIT(counter));
 }
 
-static inline void armv8pmu_disable_intens(u32 mask)
+static void armv8pmu_disable_intens(u32 mask)
 {
        write_pmintenclr(mask);
        isb();
@@ -658,13 +736,13 @@ static inline void armv8pmu_disable_intens(u32 mask)
        isb();
 }
 
-static inline void armv8pmu_disable_event_irq(struct perf_event *event)
+static void armv8pmu_disable_event_irq(struct perf_event *event)
 {
        u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
        armv8pmu_disable_intens(BIT(counter));
 }
 
-static inline u32 armv8pmu_getreset_flags(void)
+static u32 armv8pmu_getreset_flags(void)
 {
        u32 value;
 
@@ -672,7 +750,7 @@ static inline u32 armv8pmu_getreset_flags(void)
        value = read_pmovsclr();
 
        /* Write to clear flags */
-       value &= ARMV8_PMU_OVSR_MASK;
+       value &= ARMV8_PMU_OVERFLOWED_MASK;
        write_pmovsclr(value);
 
        return value;
@@ -914,9 +992,15 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
                                     struct perf_event_attr *attr)
 {
        unsigned long config_base = 0;
-
-       if (attr->exclude_idle)
-               return -EPERM;
+       struct perf_event *perf_event = container_of(attr, struct perf_event,
+                                                    attr);
+       struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
+       u32 th;
+
+       if (attr->exclude_idle) {
+               pr_debug("ARM performance counters do not support mode exclusion\n");
+               return -EOPNOTSUPP;
+       }
 
        /*
         * If we're running in hyp mode, then we *are* the hypervisor.
@@ -945,6 +1029,22 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
        if (attr->exclude_user)
                config_base |= ARMV8_PMU_EXCLUDE_EL0;
 
+       /*
+        * If FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will
+        * be 0 and will also trigger this check, preventing it from being used.
+        */
+       th = ATTR_CFG_GET_FLD(attr, threshold);
+       if (th > threshold_max(cpu_pmu)) {
+               pr_debug("PMU event threshold exceeds max value\n");
+               return -EINVAL;
+       }
+
+       if (IS_ENABLED(CONFIG_ARM64) && th) {
+               config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
+               config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
+                                         armv8pmu_event_threshold_control(attr));
+       }
+
        /*
         * Install the filter into config_base as this is used to
         * construct the event type.
@@ -1107,8 +1207,7 @@ static void __armv8pmu_probe_pmu(void *info)
        probe->present = true;
 
        /* Read the nb of CNTx counters supported from PMNC */
-       cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT)
-               & ARMV8_PMU_PMCR_N_MASK;
+       cpu_pmu->num_events = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());
 
        /* Add the CPU cycles counter */
        cpu_pmu->num_events += 1;
@@ -1221,6 +1320,12 @@ static int name##_pmu_init(struct arm_pmu *cpu_pmu)                      \
        return armv8_pmu_init(cpu_pmu, #name, armv8_pmuv3_map_event);   \
 }
 
+#define PMUV3_INIT_MAP_EVENT(name, map_event)                          \
+static int name##_pmu_init(struct arm_pmu *cpu_pmu)                    \
+{                                                                      \
+       return armv8_pmu_init(cpu_pmu, #name, map_event);               \
+}
+
 PMUV3_INIT_SIMPLE(armv8_pmuv3)
 
 PMUV3_INIT_SIMPLE(armv8_cortex_a34)
@@ -1247,51 +1352,24 @@ PMUV3_INIT_SIMPLE(armv8_neoverse_v1)
 PMUV3_INIT_SIMPLE(armv8_nvidia_carmel)
 PMUV3_INIT_SIMPLE(armv8_nvidia_denver)
 
-static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
-{
-       return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", armv8_a53_map_event);
-}
-
-static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
-{
-       return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", armv8_a53_map_event);
-}
-
-static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
-{
-       return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", armv8_a57_map_event);
-}
-
-static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
-{
-       return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", armv8_a57_map_event);
-}
-
-static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
-{
-       return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", armv8_a73_map_event);
-}
-
-static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
-{
-       return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", armv8_thunder_map_event);
-}
-
-static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
-{
-       return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", armv8_vulcan_map_event);
-}
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a35, armv8_a53_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a53, armv8_a53_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a57, armv8_a57_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a72, armv8_a57_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a73, armv8_a73_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cavium_thunder, armv8_thunder_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_brcm_vulcan, armv8_vulcan_map_event)
 
 static const struct of_device_id armv8_pmu_of_device_ids[] = {
        {.compatible = "arm,armv8-pmuv3",       .data = armv8_pmuv3_pmu_init},
        {.compatible = "arm,cortex-a34-pmu",    .data = armv8_cortex_a34_pmu_init},
-       {.compatible = "arm,cortex-a35-pmu",    .data = armv8_a35_pmu_init},
-       {.compatible = "arm,cortex-a53-pmu",    .data = armv8_a53_pmu_init},
+       {.compatible = "arm,cortex-a35-pmu",    .data = armv8_cortex_a35_pmu_init},
+       {.compatible = "arm,cortex-a53-pmu",    .data = armv8_cortex_a53_pmu_init},
        {.compatible = "arm,cortex-a55-pmu",    .data = armv8_cortex_a55_pmu_init},
-       {.compatible = "arm,cortex-a57-pmu",    .data = armv8_a57_pmu_init},
+       {.compatible = "arm,cortex-a57-pmu",    .data = armv8_cortex_a57_pmu_init},
        {.compatible = "arm,cortex-a65-pmu",    .data = armv8_cortex_a65_pmu_init},
-       {.compatible = "arm,cortex-a72-pmu",    .data = armv8_a72_pmu_init},
-       {.compatible = "arm,cortex-a73-pmu",    .data = armv8_a73_pmu_init},
+       {.compatible = "arm,cortex-a72-pmu",    .data = armv8_cortex_a72_pmu_init},
+       {.compatible = "arm,cortex-a73-pmu",    .data = armv8_cortex_a73_pmu_init},
        {.compatible = "arm,cortex-a75-pmu",    .data = armv8_cortex_a75_pmu_init},
        {.compatible = "arm,cortex-a76-pmu",    .data = armv8_cortex_a76_pmu_init},
        {.compatible = "arm,cortex-a77-pmu",    .data = armv8_cortex_a77_pmu_init},
@@ -1309,8 +1387,8 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
        {.compatible = "arm,neoverse-n1-pmu",   .data = armv8_neoverse_n1_pmu_init},
        {.compatible = "arm,neoverse-n2-pmu",   .data = armv9_neoverse_n2_pmu_init},
        {.compatible = "arm,neoverse-v1-pmu",   .data = armv8_neoverse_v1_pmu_init},
-       {.compatible = "cavium,thunder-pmu",    .data = armv8_thunder_pmu_init},
-       {.compatible = "brcm,vulcan-pmu",       .data = armv8_vulcan_pmu_init},
+       {.compatible = "cavium,thunder-pmu",    .data = armv8_cavium_thunder_pmu_init},
+       {.compatible = "brcm,vulcan-pmu",       .data = armv8_brcm_vulcan_pmu_init},
        {.compatible = "nvidia,carmel-pmu",     .data = armv8_nvidia_carmel_pmu_init},
        {.compatible = "nvidia,denver-pmu",     .data = armv8_nvidia_denver_pmu_init},
        {},