Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64...
[linux-2.6-microblaze.git] / drivers / perf / arm-cmn.c
index 6b50bc5..014010d 100644 (file)
 
 #define CMN_DTM_PMEVCNTSR              0x240
 
-#define CMN_DTM_UNIT_INFO              0x0910
+#define CMN650_DTM_UNIT_INFO           0x0910
+#define CMN_DTM_UNIT_INFO              0x0960
+#define CMN_DTM_UNIT_INFO_DTC_DOMAIN   GENMASK_ULL(1, 0)
 
 #define CMN_DTM_NUM_COUNTERS           4
 /* Want more local counters? Why not replicate the whole DTM! Ugh... */
@@ -279,16 +281,13 @@ struct arm_cmn_node {
        u16 id, logid;
        enum cmn_node_type type;
 
-       int dtm;
-       union {
-               /* DN/HN-F/CXHA */
-               struct {
-                       u8 val : 4;
-                       u8 count : 4;
-               } occupid[SEL_MAX];
-               /* XP */
-               u8 dtc;
-       };
+       u8 dtm;
+       s8 dtc;
+       /* DN/HN-F/CXHA */
+       struct {
+               u8 val : 4;
+               u8 count : 4;
+       } occupid[SEL_MAX];
        union {
                u8 event[4];
                __le32 event_sel;
@@ -538,12 +537,12 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
 
                seq_puts(s, "\n     |");
                for (x = 0; x < cmn->mesh_x; x++) {
-                       u8 dtc = cmn->xps[xp_base + x].dtc;
+                       s8 dtc = cmn->xps[xp_base + x].dtc;
 
-                       if (dtc & (dtc - 1))
+                       if (dtc < 0)
                                seq_puts(s, " DTC ?? |");
                        else
-                               seq_printf(s, " DTC %ld  |", __ffs(dtc));
+                               seq_printf(s, " DTC %d  |", dtc);
                }
                seq_puts(s, "\n     |");
                for (x = 0; x < cmn->mesh_x; x++)
@@ -587,8 +586,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
 struct arm_cmn_hw_event {
        struct arm_cmn_node *dn;
        u64 dtm_idx[4];
-       unsigned int dtc_idx;
-       u8 dtcs_used;
+       s8 dtc_idx[CMN_MAX_DTCS];
        u8 num_dns;
        u8 dtm_offset;
        bool wide_sel;
@@ -598,6 +596,10 @@ struct arm_cmn_hw_event {
 #define for_each_hw_dn(hw, dn, i) \
        for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
 
+/* @i is the DTC number, @idx is the counter index on that DTC */
+#define for_each_hw_dtc_idx(hw, i, idx) \
+       for (int i = 0, idx; i < CMN_MAX_DTCS; i++) if ((idx = hw->dtc_idx[i]) >= 0)
+
 static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
 {
        BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
@@ -1427,12 +1429,11 @@ static void arm_cmn_init_counter(struct perf_event *event)
 {
        struct arm_cmn *cmn = to_cmn(event->pmu);
        struct arm_cmn_hw_event *hw = to_cmn_hw(event);
-       unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx);
        u64 count;
 
-       for (i = 0; hw->dtcs_used & (1U << i); i++) {
-               writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt);
-               cmn->dtc[i].counters[hw->dtc_idx] = event;
+       for_each_hw_dtc_idx(hw, i, idx) {
+               writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + CMN_DT_PMEVCNT(idx));
+               cmn->dtc[i].counters[idx] = event;
        }
 
        count = arm_cmn_read_dtm(cmn, hw, false);
@@ -1445,11 +1446,9 @@ static void arm_cmn_event_read(struct perf_event *event)
        struct arm_cmn_hw_event *hw = to_cmn_hw(event);
        u64 delta, new, prev;
        unsigned long flags;
-       unsigned int i;
 
-       if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) {
-               i = __ffs(hw->dtcs_used);
-               delta = arm_cmn_read_cc(cmn->dtc + i);
+       if (CMN_EVENT_TYPE(event) == CMN_TYPE_DTC) {
+               delta = arm_cmn_read_cc(cmn->dtc + hw->dtc_idx[0]);
                local64_add(delta, &event->count);
                return;
        }
@@ -1459,8 +1458,8 @@ static void arm_cmn_event_read(struct perf_event *event)
        delta = new - prev;
 
        local_irq_save(flags);
-       for (i = 0; hw->dtcs_used & (1U << i); i++) {
-               new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx);
+       for_each_hw_dtc_idx(hw, i, idx) {
+               new = arm_cmn_read_counter(cmn->dtc + i, idx);
                delta += new << 16;
        }
        local_irq_restore(flags);
@@ -1516,7 +1515,7 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
        int i;
 
        if (type == CMN_TYPE_DTC) {
-               i = __ffs(hw->dtcs_used);
+               i = hw->dtc_idx[0];
                writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
                cmn->dtc[i].cc_active = true;
        } else if (type == CMN_TYPE_WP) {
@@ -1547,7 +1546,7 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
        int i;
 
        if (type == CMN_TYPE_DTC) {
-               i = __ffs(hw->dtcs_used);
+               i = hw->dtc_idx[0];
                cmn->dtc[i].cc_active = false;
        } else if (type == CMN_TYPE_WP) {
                int wp_idx = arm_cmn_wp_idx(event);
@@ -1571,7 +1570,7 @@ struct arm_cmn_val {
        u8 dtm_count[CMN_MAX_DTMS];
        u8 occupid[CMN_MAX_DTMS][SEL_MAX];
        u8 wp[CMN_MAX_DTMS][4];
-       int dtc_count;
+       int dtc_count[CMN_MAX_DTCS];
        bool cycles;
 };
 
@@ -1592,7 +1591,8 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
                return;
        }
 
-       val->dtc_count++;
+       for_each_hw_dtc_idx(hw, dtc, idx)
+               val->dtc_count[dtc]++;
 
        for_each_hw_dn(hw, dn, i) {
                int wp_idx, dtm = dn->dtm, sel = hw->filter_sel;
@@ -1639,8 +1639,9 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
                goto done;
        }
 
-       if (val->dtc_count == CMN_DT_NUM_COUNTERS)
-               goto done;
+       for (i = 0; i < CMN_MAX_DTCS; i++)
+               if (val->dtc_count[i] == CMN_DT_NUM_COUNTERS)
+                       goto done;
 
        for_each_hw_dn(hw, dn, i) {
                int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel;
@@ -1733,12 +1734,19 @@ static int arm_cmn_event_init(struct perf_event *event)
        hw->dn = arm_cmn_node(cmn, type);
        if (!hw->dn)
                return -EINVAL;
+
+       memset(hw->dtc_idx, -1, sizeof(hw->dtc_idx));
        for (dn = hw->dn; dn->type == type; dn++) {
                if (bynodeid && dn->id != nodeid) {
                        hw->dn++;
                        continue;
                }
                hw->num_dns++;
+               if (dn->dtc < 0)
+                       memset(hw->dtc_idx, 0, cmn->num_dtcs);
+               else
+                       hw->dtc_idx[dn->dtc] = 0;
+
                if (bynodeid)
                        break;
        }
@@ -1750,12 +1758,6 @@ static int arm_cmn_event_init(struct perf_event *event)
                        nodeid, nid.x, nid.y, nid.port, nid.dev, type);
                return -EINVAL;
        }
-       /*
-        * Keep assuming non-cycles events count in all DTC domains; turns out
-        * it's hard to make a worthwhile optimisation around this, short of
-        * going all-in with domain-local counter allocation as well.
-        */
-       hw->dtcs_used = (1U << cmn->num_dtcs) - 1;
 
        return arm_cmn_validate_group(cmn, event);
 }
@@ -1781,46 +1783,48 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
        }
        memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
 
-       for (i = 0; hw->dtcs_used & (1U << i); i++)
-               cmn->dtc[i].counters[hw->dtc_idx] = NULL;
+       for_each_hw_dtc_idx(hw, j, idx)
+               cmn->dtc[j].counters[idx] = NULL;
 }
 
 static int arm_cmn_event_add(struct perf_event *event, int flags)
 {
        struct arm_cmn *cmn = to_cmn(event->pmu);
        struct arm_cmn_hw_event *hw = to_cmn_hw(event);
-       struct arm_cmn_dtc *dtc = &cmn->dtc[0];
        struct arm_cmn_node *dn;
        enum cmn_node_type type = CMN_EVENT_TYPE(event);
-       unsigned int i, dtc_idx, input_sel;
+       unsigned int input_sel, i = 0;
 
        if (type == CMN_TYPE_DTC) {
-               i = 0;
                while (cmn->dtc[i].cycles)
                        if (++i == cmn->num_dtcs)
                                return -ENOSPC;
 
                cmn->dtc[i].cycles = event;
-               hw->dtc_idx = CMN_DT_NUM_COUNTERS;
-               hw->dtcs_used = 1U << i;
+               hw->dtc_idx[0] = i;
 
                if (flags & PERF_EF_START)
                        arm_cmn_event_start(event, 0);
                return 0;
        }
 
-       /* Grab a free global counter first... */
-       dtc_idx = 0;
-       while (dtc->counters[dtc_idx])
-               if (++dtc_idx == CMN_DT_NUM_COUNTERS)
-                       return -ENOSPC;
-
-       hw->dtc_idx = dtc_idx;
+       /* Grab the global counters first... */
+       for_each_hw_dtc_idx(hw, j, idx) {
+               if (cmn->part == PART_CMN600 && j > 0) {
+                       idx = hw->dtc_idx[0];
+               } else {
+                       idx = 0;
+                       while (cmn->dtc[j].counters[idx])
+                               if (++idx == CMN_DT_NUM_COUNTERS)
+                                       goto free_dtms;
+               }
+               hw->dtc_idx[j] = idx;
+       }
 
-       /* ...then the local counters to feed it. */
+       /* ...then the local counters to feed them */
        for_each_hw_dn(hw, dn, i) {
                struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
-               unsigned int dtm_idx, shift;
+               unsigned int dtm_idx, shift, d = max_t(int, dn->dtc, 0);
                u64 reg;
 
                dtm_idx = 0;
@@ -1839,11 +1843,11 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
 
                        tmp = dtm->wp_event[wp_idx ^ 1];
                        if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
-                                       CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
+                                       CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp]))
                                goto free_dtms;
 
                        input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
-                       dtm->wp_event[wp_idx] = dtc_idx;
+                       dtm->wp_event[wp_idx] = hw->dtc_idx[d];
                        writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
                } else {
                        struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
@@ -1863,7 +1867,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
                dtm->input_sel[dtm_idx] = input_sel;
                shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx);
                dtm->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift);
-               dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift;
+               dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, hw->dtc_idx[d]) << shift;
                dtm->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx);
                reg = (u64)le32_to_cpu(dtm->pmu_config_high) << 32 | dtm->pmu_config_low;
                writeq_relaxed(reg, dtm->base + CMN_DTM_PMU_CONFIG);
@@ -1891,7 +1895,7 @@ static void arm_cmn_event_del(struct perf_event *event, int flags)
        arm_cmn_event_stop(event, PERF_EF_UPDATE);
 
        if (type == CMN_TYPE_DTC)
-               cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL;
+               cmn->dtc[hw->dtc_idx[0]].cycles = NULL;
        else
                arm_cmn_event_clear(cmn, event, hw->num_dns);
 }
@@ -2072,7 +2076,6 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
 {
        struct arm_cmn_node *dn, *xp;
        int dtc_idx = 0;
-       u8 dtcs_present = (1 << cmn->num_dtcs) - 1;
 
        cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL);
        if (!cmn->dtc)
@@ -2082,23 +2085,26 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
 
        cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP);
 
+       if (cmn->part == PART_CMN600 && cmn->num_dtcs > 1) {
+               /* We do at least know that a DTC's XP must be in that DTC's domain */
+               dn = arm_cmn_node(cmn, CMN_TYPE_DTC);
+               for (int i = 0; i < cmn->num_dtcs; i++)
+                       arm_cmn_node_to_xp(cmn, dn + i)->dtc = i;
+       }
+
        for (dn = cmn->dns; dn->type; dn++) {
-               if (dn->type == CMN_TYPE_XP) {
-                       dn->dtc &= dtcs_present;
+               if (dn->type == CMN_TYPE_XP)
                        continue;
-               }
 
                xp = arm_cmn_node_to_xp(cmn, dn);
+               dn->dtc = xp->dtc;
                dn->dtm = xp->dtm;
                if (cmn->multi_dtm)
                        dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
 
                if (dn->type == CMN_TYPE_DTC) {
-                       int err;
-                       /* We do at least know that a DTC's XP must be in that DTC's domain */
-                       if (xp->dtc == 0xf)
-                               xp->dtc = 1 << dtc_idx;
-                       err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
+                       int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
+
                        if (err)
                                return err;
                }
@@ -2117,6 +2123,16 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
        return 0;
 }
 
+static unsigned int arm_cmn_dtc_domain(struct arm_cmn *cmn, void __iomem *xp_region)
+{
+       int offset = CMN_DTM_UNIT_INFO;
+
+       if (cmn->part == PART_CMN650 || cmn->part == PART_CI700)
+               offset = CMN650_DTM_UNIT_INFO;
+
+       return FIELD_GET(CMN_DTM_UNIT_INFO_DTC_DOMAIN, readl_relaxed(xp_region + offset));
+}
+
 static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node)
 {
        int level;
@@ -2246,9 +2262,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
                        cmn->mesh_x = xp->logid;
 
                if (cmn->part == PART_CMN600)
-                       xp->dtc = 0xf;
+                       xp->dtc = -1;
                else
-                       xp->dtc = 1 << readl_relaxed(xp_region + CMN_DTM_UNIT_INFO);
+                       xp->dtc = arm_cmn_dtc_domain(cmn, xp_region);
 
                xp->dtm = dtm - cmn->dtms;
                arm_cmn_init_dtm(dtm++, xp, 0);