drivers/perf: hisi: Add new functions for L3C PMU
authorShaokun Zhang <zhangshaokun@hisilicon.com>
Mon, 8 Mar 2021 06:50:32 +0000 (14:50 +0800)
committerWill Deacon <will@kernel.org>
Thu, 25 Mar 2021 13:03:46 +0000 (13:03 +0000)
On HiSilicon Hip09 platform, some new functions are enhanced on L3C PMU:

* tt_req: it is the abbreviation of tracetag request and allows user to
count only read/write/atomic operations. tt_req is 3-bit and details are
listed in the hisi-pmu document.
$# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5

* tt_core: it is the abbreviation of tracetag core and allows user to
filter by core/thread within the cluster, it is a 8-bit bitmap that each
bit represents the corresponding core/thread in this L3C.
$# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0xf/ sleep 5

* datasrc_cfg: it is the abbreviation of data source configuration and
allows user to check where the data comes from, such as: from local DDR,
cross-die DDR or cross-socket DDR. Its is 5-bit and represents different
data source in the SoC.
$# perf stat -a -e hisi_sccl3_l3c0/dat_access,datasrc_cfg=0xe/ sleep 5

* datasrc_skt: it is the abbreviation of data source from another socket
and is used in the multi-chips, if user wants to check the cross-socket
datat source, it shall be added in perf command. Only one bit is used to
control this.
$# perf stat -a -e hisi_sccl3_l3c0/dat_access,datasrc_cfg=0x10,datasrc_skt=1/ sleep 5

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Co-developed-by: Qi Liu <liuqi115@huawei.com>
Signed-off-by: Qi Liu <liuqi115@huawei.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Link: https://lore.kernel.org/r/1615186237-22263-5-git-send-email-zhangshaokun@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
drivers/perf/hisilicon/hisi_uncore_pmu.c
drivers/perf/hisilicon/hisi_uncore_pmu.h

index 2b26386..bf9f777 100644 (file)
 #define L3C_INT_MASK           0x0800
 #define L3C_INT_STATUS         0x0808
 #define L3C_INT_CLEAR          0x080c
+#define L3C_CORE_CTRL           0x1b04
+#define L3C_TRACETAG_CTRL       0x1b20
+#define L3C_DATSRC_TYPE         0x1b48
+#define L3C_DATSRC_CTRL         0x1bf0
 #define L3C_EVENT_CTRL         0x1c00
 #define L3C_VERSION            0x1cf0
 #define L3C_EVENT_TYPE0                0x1d00
 /*
- * Each counter is 48-bits and [48:63] are reserved
- * which are Read-As-Zero and Writes-Ignored.
+ * If the HW version only supports a 48-bit counter, then
+ * bits [63:48] are reserved, which are Read-As-Zero and
+ * Writes-Ignored.
  */
 #define L3C_CNTR0_LOWER                0x1e00
 
 #define L3C_NR_COUNTERS                0x8
 
 #define L3C_PERF_CTRL_EN       0x10000
+#define L3C_TRACETAG_EN                BIT(31)
+#define L3C_TRACETAG_REQ_SHIFT 7
+#define L3C_TRACETAG_MARK_EN   BIT(0)
+#define L3C_TRACETAG_REQ_EN    (L3C_TRACETAG_MARK_EN | BIT(2))
+#define L3C_TRACETAG_CORE_EN   (L3C_TRACETAG_MARK_EN | BIT(3))
+#define L3C_CORE_EN            BIT(20)
+#define L3C_COER_NONE          0x0
+#define L3C_DATSRC_MASK                0xFF
+#define L3C_DATSRC_SKT_EN      BIT(23)
+#define L3C_DATSRC_NONE                0x0
 #define L3C_EVTYPE_NONE                0xff
 #define L3C_V1_NR_EVENTS       0x59
+#define L3C_V2_NR_EVENTS       0xFF
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16);
+
+static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
+{
+       struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+       u32 tt_req = hisi_get_tt_req(event);
+
+       if (tt_req) {
+               u32 val;
+
+               /* Set request-type for tracetag */
+               val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+               val |= tt_req << L3C_TRACETAG_REQ_SHIFT;
+               val |= L3C_TRACETAG_REQ_EN;
+               writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+
+               /* Enable request-tracetag statistics */
+               val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+               val |= L3C_TRACETAG_EN;
+               writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+       }
+}
+
+static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event)
+{
+       struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+       u32 tt_req = hisi_get_tt_req(event);
+
+       if (tt_req) {
+               u32 val;
+
+               /* Clear request-type */
+               val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+               val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT);
+               val &= ~L3C_TRACETAG_REQ_EN;
+               writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+
+               /* Disable request-tracetag statistics */
+               val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+               val &= ~L3C_TRACETAG_EN;
+               writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+       }
+}
+
+static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
+{
+       struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       u32 reg, reg_idx, shift, val;
+       int idx = hwc->idx;
+
+       /*
+        * Select the appropriate datasource register(L3C_DATSRC_TYPE0/1).
+        * There are 2 datasource ctrl register for the 8 hardware counters.
+        * Datasrc is 8-bits and for the former 4 hardware counters,
+        * L3C_DATSRC_TYPE0 is chosen. For the latter 4 hardware counters,
+        * L3C_DATSRC_TYPE1 is chosen.
+        */
+       reg = L3C_DATSRC_TYPE + (idx / 4) * 4;
+       reg_idx = idx % 4;
+       shift = 8 * reg_idx;
+
+       val = readl(l3c_pmu->base + reg);
+       val &= ~(L3C_DATSRC_MASK << shift);
+       val |= ds_cfg << shift;
+       writel(val, l3c_pmu->base + reg);
+}
+
+static void hisi_l3c_pmu_config_ds(struct perf_event *event)
+{
+       struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+       u32 ds_cfg = hisi_get_datasrc_cfg(event);
+       u32 ds_skt = hisi_get_datasrc_skt(event);
+
+       if (ds_cfg)
+               hisi_l3c_pmu_write_ds(event, ds_cfg);
+
+       if (ds_skt) {
+               u32 val;
+
+               val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+               val |= L3C_DATSRC_SKT_EN;
+               writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+       }
+}
+
+static void hisi_l3c_pmu_clear_ds(struct perf_event *event)
+{
+       struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+       u32 ds_cfg = hisi_get_datasrc_cfg(event);
+       u32 ds_skt = hisi_get_datasrc_skt(event);
+
+       if (ds_cfg)
+               hisi_l3c_pmu_write_ds(event, L3C_DATSRC_NONE);
+
+       if (ds_skt) {
+               u32 val;
+
+               val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+               val &= ~L3C_DATSRC_SKT_EN;
+               writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+       }
+}
+
+static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event)
+{
+       struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+       u32 core = hisi_get_tt_core(event);
+
+       if (core) {
+               u32 val;
+
+               /* Config and enable core information */
+               writel(core, l3c_pmu->base + L3C_CORE_CTRL);
+               val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+               val |= L3C_CORE_EN;
+               writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+
+               /* Enable core-tracetag statistics */
+               val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+               val |= L3C_TRACETAG_CORE_EN;
+               writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+       }
+}
+
+static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event)
+{
+       struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+       u32 core = hisi_get_tt_core(event);
+
+       if (core) {
+               u32 val;
+
+               /* Clear core information */
+               writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL);
+               val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+               val &= ~L3C_CORE_EN;
+               writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+
+               /* Disable core-tracetag statistics */
+               val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+               val &= ~L3C_TRACETAG_CORE_EN;
+               writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+       }
+}
+
+static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
+{
+       if (event->attr.config1 != 0x0) {
+               hisi_l3c_pmu_config_req_tracetag(event);
+               hisi_l3c_pmu_config_core_tracetag(event);
+               hisi_l3c_pmu_config_ds(event);
+       }
+}
+
+static void hisi_l3c_pmu_disable_filter(struct perf_event *event)
+{
+       if (event->attr.config1 != 0x0) {
+               hisi_l3c_pmu_clear_ds(event);
+               hisi_l3c_pmu_clear_core_tracetag(event);
+               hisi_l3c_pmu_clear_req_tracetag(event);
+       }
+}
 
 /*
  * Select the counter register offset using the counter index
@@ -50,14 +233,12 @@ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
 static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
                                     struct hw_perf_event *hwc)
 {
-       /* Read 64-bits and the upper 16 bits are RAZ */
        return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
 }
 
 static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
                                       struct hw_perf_event *hwc, u64 val)
 {
-       /* Write 64-bits and the upper 16 bits are WI */
        writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
 }
 
@@ -166,23 +347,14 @@ static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx)
 
 static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
        { "HISI0213", },
-       {},
+       { "HISI0214", },
+       {}
 };
 MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
 
 static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
                                  struct hisi_pmu *l3c_pmu)
 {
-       unsigned long long id;
-       acpi_status status;
-
-       status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
-                                      "_UID", NULL, &id);
-       if (ACPI_FAILURE(status))
-               return -EINVAL;
-
-       l3c_pmu->index_id = id;
-
        /*
         * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
         * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
@@ -220,6 +392,20 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = {
        .attrs = hisi_l3c_pmu_v1_format_attr,
 };
 
+static struct attribute *hisi_l3c_pmu_v2_format_attr[] = {
+       HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+       HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"),
+       HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
+       HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"),
+       HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"),
+       NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v2_format_group = {
+       .name = "format",
+       .attrs = hisi_l3c_pmu_v2_format_attr,
+};
+
 static struct attribute *hisi_l3c_pmu_v1_events_attr[] = {
        HISI_PMU_EVENT_ATTR(rd_cpipe,           0x00),
        HISI_PMU_EVENT_ATTR(wr_cpipe,           0x01),
@@ -242,6 +428,19 @@ static const struct attribute_group hisi_l3c_pmu_v1_events_group = {
        .attrs = hisi_l3c_pmu_v1_events_attr,
 };
 
+static struct attribute *hisi_l3c_pmu_v2_events_attr[] = {
+       HISI_PMU_EVENT_ATTR(l3c_hit,            0x48),
+       HISI_PMU_EVENT_ATTR(cycles,             0x7f),
+       HISI_PMU_EVENT_ATTR(l3c_ref,            0xb8),
+       HISI_PMU_EVENT_ATTR(dat_access,         0xb9),
+       NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v2_events_group = {
+       .name = "events",
+       .attrs = hisi_l3c_pmu_v2_events_attr,
+};
+
 static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
 
 static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = {
@@ -273,6 +472,14 @@ static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = {
        NULL,
 };
 
+static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = {
+       &hisi_l3c_pmu_v2_format_group,
+       &hisi_l3c_pmu_v2_events_group,
+       &hisi_l3c_pmu_cpumask_attr_group,
+       &hisi_l3c_pmu_identifier_group,
+       NULL
+};
+
 static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
        .write_evtype           = hisi_l3c_pmu_write_evtype,
        .get_event_idx          = hisi_uncore_pmu_get_event_idx,
@@ -286,6 +493,8 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
        .read_counter           = hisi_l3c_pmu_read_counter,
        .get_int_status         = hisi_l3c_pmu_get_int_status,
        .clear_int_status       = hisi_l3c_pmu_clear_int_status,
+       .enable_filter          = hisi_l3c_pmu_enable_filter,
+       .disable_filter         = hisi_l3c_pmu_disable_filter,
 };
 
 static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
@@ -301,12 +510,20 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
        if (ret)
                return ret;
 
+       if (l3c_pmu->identifier >= HISI_PMU_V2) {
+               l3c_pmu->counter_bits = 64;
+               l3c_pmu->check_event = L3C_V2_NR_EVENTS;
+               l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups;
+       } else {
+               l3c_pmu->counter_bits = 48;
+               l3c_pmu->check_event = L3C_V1_NR_EVENTS;
+               l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups;
+       }
+
        l3c_pmu->num_counters = L3C_NR_COUNTERS;
-       l3c_pmu->counter_bits = 48;
        l3c_pmu->ops = &hisi_uncore_l3c_ops;
        l3c_pmu->dev = &pdev->dev;
        l3c_pmu->on_cpu = -1;
-       l3c_pmu->check_event = L3C_V1_NR_EVENTS;
 
        return 0;
 }
@@ -334,8 +551,12 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
                return ret;
        }
 
+       /*
+        * CCL_ID is used to identify the L3C in the same SCCL which was
+        * used _UID by mistake.
+        */
        name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
-                             l3c_pmu->sccl_id, l3c_pmu->index_id);
+                             l3c_pmu->sccl_id, l3c_pmu->ccl_id);
        l3c_pmu->pmu = (struct pmu) {
                .name           = name,
                .module         = THIS_MODULE,
@@ -348,7 +569,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
                .start          = hisi_uncore_pmu_start,
                .stop           = hisi_uncore_pmu_stop,
                .read           = hisi_uncore_pmu_read,
-               .attr_groups    = hisi_l3c_pmu_v1_attr_groups,
+               .attr_groups    = l3c_pmu->pmu_events.attr_groups,
                .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
        };
 
index c9d8e2e..13c68b5 100644 (file)
@@ -21,7 +21,7 @@
 #include "hisi_uncore_pmu.h"
 
 #define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
-#define HISI_MAX_PERIOD(nr) (BIT_ULL(nr) - 1)
+#define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0))
 
 /*
  * PMU format attributes
@@ -245,6 +245,9 @@ static void hisi_uncore_pmu_enable_event(struct perf_event *event)
        hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx,
                                    HISI_GET_EVENTID(event));
 
+       if (hisi_pmu->ops->enable_filter)
+               hisi_pmu->ops->enable_filter(event);
+
        hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc);
        hisi_pmu->ops->enable_counter(hisi_pmu, hwc);
 }
@@ -259,6 +262,9 @@ static void hisi_uncore_pmu_disable_event(struct perf_event *event)
 
        hisi_pmu->ops->disable_counter(hisi_pmu, hwc);
        hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc);
+
+       if (hisi_pmu->ops->disable_filter)
+               hisi_pmu->ops->disable_filter(event);
 }
 
 void hisi_uncore_pmu_set_event_period(struct perf_event *event)
index 933020c..1591dbc 100644 (file)
@@ -11,6 +11,7 @@
 #ifndef __HISI_UNCORE_PMU_H__
 #define __HISI_UNCORE_PMU_H__
 
+#include <linux/bitfield.h>
 #include <linux/cpumask.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
@@ -22,6 +23,7 @@
 #undef pr_fmt
 #define pr_fmt(fmt)     "hisi_pmu: " fmt
 
+#define HISI_PMU_V2            0x30
 #define HISI_MAX_COUNTERS 0x10
 #define to_hisi_pmu(p) (container_of(p, struct hisi_pmu, pmu))
 
 #define HISI_PMU_EVENT_ATTR(_name, _config)            \
        HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config)
 
+#define HISI_PMU_EVENT_ATTR_EXTRACTOR(name, config, hi, lo)        \
+       static inline u32 hisi_get_##name(struct perf_event *event)            \
+       {                                                                  \
+               return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config);  \
+       }
+
 struct hisi_pmu;
 
 struct hisi_uncore_ops {
@@ -50,11 +58,14 @@ struct hisi_uncore_ops {
        void (*stop_counters)(struct hisi_pmu *);
        u32 (*get_int_status)(struct hisi_pmu *hisi_pmu);
        void (*clear_int_status)(struct hisi_pmu *hisi_pmu, int idx);
+       void (*enable_filter)(struct perf_event *event);
+       void (*disable_filter)(struct perf_event *event);
 };
 
 struct hisi_pmu_hwevents {
        struct perf_event *hw_events[HISI_MAX_COUNTERS];
        DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS);
+       const struct attribute_group **attr_groups;
 };
 
 /* Generic pmu struct for different pmu types */