perf header: Record non-CPU PMU capabilities
authorRavi Bangoria <ravi.bangoria@amd.com>
Sat, 4 Jun 2022 04:45:16 +0000 (10:15 +0530)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 24 Jun 2022 16:18:55 +0000 (13:18 -0300)
PMUs advertise their capabilities via sysfs attribute files but
the perf tool currently parses only core(CPU) or hybrid core PMU
capabilities. Add support of recording non-core PMU capabilities
int perf.data header.

Note that a newly proposed HEADER_PMU_CAPS is replacing existing
HEADER_HYBRID_CPU_PMU_CAPS. Special care is taken for hybrid core
PMUs by writing their capabilities first in the perf.data header
to make sure new perf.data file being read by old perf tool does
not break.

Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Ananth Narayan <ananth.narayan@amd.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <rrichter@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Santosh Shukla <santosh.shukla@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: like.xu.linux@gmail.com
Cc: x86@kernel.org
Link: https://lore.kernel.org/r/20220604044519.594-6-ravi.bangoria@amd.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf.data-file-format.txt
tools/perf/builtin-inject.c
tools/perf/util/env.c
tools/perf/util/env.h
tools/perf/util/header.c
tools/perf/util/header.h

index bc9f0aa..635ba04 100644 (file)
@@ -419,18 +419,20 @@ Example:
   cpu_core cpu list : 0-15
   cpu_atom cpu list : 16-23
 
-       HEADER_HYBRID_CPU_PMU_CAPS = 31,
+       HEADER_PMU_CAPS = 31,
 
-       A list of hybrid CPU PMU capabilities.
+       List of pmu capabilities (except cpu pmu which is already
+       covered by HEADER_CPU_PMU_CAPS). Note that hybrid cpu pmu
+       capabilities are also stored here.
 
 struct {
        u32 nr_pmu;
        struct {
-               u32 nr_cpu_pmu_caps;
+               u32 nr_caps;
                {
                        char    name[];
                        char    value[];
-               } [nr_cpu_pmu_caps];
+               } [nr_caps];
                char pmu_name[];
        } [nr_pmu];
 };
index 42e2918..b9022d4 100644 (file)
@@ -809,7 +809,7 @@ static bool keep_feat(int feat)
        case HEADER_CPU_PMU_CAPS:
        case HEADER_CLOCK_DATA:
        case HEADER_HYBRID_TOPOLOGY:
-       case HEADER_HYBRID_CPU_PMU_CAPS:
+       case HEADER_PMU_CAPS:
                return true;
        /* Information that can be updated */
        case HEADER_BUILD_ID:
index 7d3aeb2..5b8cf6a 100644 (file)
@@ -219,13 +219,13 @@ void perf_env__exit(struct perf_env *env)
        }
        zfree(&env->hybrid_nodes);
 
-       for (i = 0; i < env->nr_hybrid_cpc_nodes; i++) {
-               for (j = 0; j < env->hybrid_cpc_nodes[i].nr_cpu_pmu_caps; j++)
-                       zfree(&env->hybrid_cpc_nodes[i].cpu_pmu_caps[j]);
-               zfree(&env->hybrid_cpc_nodes[i].cpu_pmu_caps);
-               zfree(&env->hybrid_cpc_nodes[i].pmu_name);
+       for (i = 0; i < env->nr_pmus_with_caps; i++) {
+               for (j = 0; j < env->pmu_caps[i].nr_caps; j++)
+                       zfree(&env->pmu_caps[i].caps[j]);
+               zfree(&env->pmu_caps[i].caps);
+               zfree(&env->pmu_caps[i].pmu_name);
        }
-       zfree(&env->hybrid_cpc_nodes);
+       zfree(&env->pmu_caps);
 }
 
 void perf_env__init(struct perf_env *env)
@@ -531,3 +531,51 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
 
        return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1;
 }
+
+char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
+                            const char *cap)
+{
+       char *cap_eq;
+       int cap_size;
+       char **ptr;
+       int i, j;
+
+       if (!pmu_name || !cap)
+               return NULL;
+
+       cap_size = strlen(cap);
+       cap_eq = zalloc(cap_size + 2);
+       if (!cap_eq)
+               return NULL;
+
+       memcpy(cap_eq, cap, cap_size);
+       cap_eq[cap_size] = '=';
+
+       if (!strcmp(pmu_name, "cpu")) {
+               for (i = 0; i < env->nr_cpu_pmu_caps; i++) {
+                       if (!strncmp(env->cpu_pmu_caps[i], cap_eq, cap_size + 1)) {
+                               free(cap_eq);
+                               return &env->cpu_pmu_caps[i][cap_size + 1];
+                       }
+               }
+               goto out;
+       }
+
+       for (i = 0; i < env->nr_pmus_with_caps; i++) {
+               if (strcmp(env->pmu_caps[i].pmu_name, pmu_name))
+                       continue;
+
+               ptr = env->pmu_caps[i].caps;
+
+               for (j = 0; j < env->pmu_caps[i].nr_caps; j++) {
+                       if (!strncmp(ptr[j], cap_eq, cap_size + 1)) {
+                               free(cap_eq);
+                               return &ptr[j][cap_size + 1];
+                       }
+               }
+       }
+
+out:
+       free(cap_eq);
+       return NULL;
+}
index 43aab59..4566c51 100644 (file)
@@ -43,10 +43,10 @@ struct hybrid_node {
        char    *cpus;
 };
 
-struct hybrid_cpc_node {
-       int             nr_cpu_pmu_caps;
+struct pmu_caps {
+       int             nr_caps;
        unsigned int    max_branches;
-       char            **cpu_pmu_caps;
+       char            **caps;
        char            *pmu_name;
 };
 
@@ -74,7 +74,7 @@ struct perf_env {
        int                     nr_groups;
        int                     nr_cpu_pmu_caps;
        int                     nr_hybrid_nodes;
-       int                     nr_hybrid_cpc_nodes;
+       int                     nr_pmus_with_caps;
        char                    *cmdline;
        const char              **cmdline_argv;
        char                    *sibling_cores;
@@ -94,7 +94,7 @@ struct perf_env {
        struct memory_node      *memory_nodes;
        unsigned long long       memory_bsize;
        struct hybrid_node      *hybrid_nodes;
-       struct hybrid_cpc_node  *hybrid_cpc_nodes;
+       struct pmu_caps         *pmu_caps;
 #ifdef HAVE_LIBBPF_SUPPORT
        /*
         * bpf_info_lock protects bpf rbtrees. This is needed because the
@@ -172,4 +172,6 @@ bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
 struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
 
 int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
+char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
+                            const char *cap);
 #endif /* __PERF_ENV_H */
index c9d1f76..5114784 100644 (file)
@@ -1512,18 +1512,13 @@ static int write_compressed(struct feat_fd *ff __maybe_unused,
        return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
 }
 
-static int write_per_cpu_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu,
-                                 bool write_pmu)
+static int __write_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu,
+                           bool write_pmu)
 {
        struct perf_pmu_caps *caps = NULL;
-       int nr_caps;
        int ret;
 
-       nr_caps = perf_pmu__caps_parse(pmu);
-       if (nr_caps < 0)
-               return nr_caps;
-
-       ret = do_write(ff, &nr_caps, sizeof(nr_caps));
+       ret = do_write(ff, &pmu->nr_caps, sizeof(pmu->nr_caps));
        if (ret < 0)
                return ret;
 
@@ -1550,33 +1545,60 @@ static int write_cpu_pmu_caps(struct feat_fd *ff,
                              struct evlist *evlist __maybe_unused)
 {
        struct perf_pmu *cpu_pmu = perf_pmu__find("cpu");
+       int ret;
 
        if (!cpu_pmu)
                return -ENOENT;
 
-       return write_per_cpu_pmu_caps(ff, cpu_pmu, false);
+       ret = perf_pmu__caps_parse(cpu_pmu);
+       if (ret < 0)
+               return ret;
+
+       return __write_pmu_caps(ff, cpu_pmu, false);
 }
 
-static int write_hybrid_cpu_pmu_caps(struct feat_fd *ff,
-                                    struct evlist *evlist __maybe_unused)
+static int write_pmu_caps(struct feat_fd *ff,
+                         struct evlist *evlist __maybe_unused)
 {
-       struct perf_pmu *pmu;
-       u32 nr_pmu = perf_pmu__hybrid_pmu_num();
+       struct perf_pmu *pmu = NULL;
+       int nr_pmu = 0;
        int ret;
 
-       if (nr_pmu == 0)
-               return -ENOENT;
+       while ((pmu = perf_pmu__scan(pmu))) {
+               if (!pmu->name || !strcmp(pmu->name, "cpu") ||
+                   perf_pmu__caps_parse(pmu) <= 0)
+                       continue;
+               nr_pmu++;
+       }
 
        ret = do_write(ff, &nr_pmu, sizeof(nr_pmu));
        if (ret < 0)
                return ret;
 
+       if (!nr_pmu)
+               return 0;
+
+       /*
+        * Write hybrid pmu caps first to maintain compatibility with
+        * older perf tool.
+        */
+       pmu = NULL;
        perf_pmu__for_each_hybrid_pmu(pmu) {
-               ret = write_per_cpu_pmu_caps(ff, pmu, true);
+               ret = __write_pmu_caps(ff, pmu, true);
                if (ret < 0)
                        return ret;
        }
 
+       pmu = NULL;
+       while ((pmu = perf_pmu__scan(pmu))) {
+               if (!pmu->name || !strcmp(pmu->name, "cpu") ||
+                   !pmu->nr_caps || perf_pmu__is_hybrid(pmu->name))
+                       continue;
+
+               ret = __write_pmu_caps(ff, pmu, true);
+               if (ret < 0)
+                       return ret;
+       }
        return 0;
 }
 
@@ -2051,8 +2073,7 @@ static void print_compressed(struct feat_fd *ff, FILE *fp)
                ff->ph->env.comp_level, ff->ph->env.comp_ratio);
 }
 
-static void print_per_cpu_pmu_caps(FILE *fp, int nr_caps, char **cpu_pmu_caps,
-                                  char *pmu_name)
+static void __print_pmu_caps(FILE *fp, int nr_caps, char **caps, char *pmu_name)
 {
        const char *delimiter = "";
        int i;
@@ -2064,7 +2085,7 @@ static void print_per_cpu_pmu_caps(FILE *fp, int nr_caps, char **cpu_pmu_caps,
 
        fprintf(fp, "# %s pmu capabilities: ", pmu_name);
        for (i = 0; i < nr_caps; i++) {
-               fprintf(fp, "%s%s", delimiter, cpu_pmu_caps[i]);
+               fprintf(fp, "%s%s", delimiter, caps[i]);
                delimiter = ", ";
        }
 
@@ -2073,19 +2094,18 @@ static void print_per_cpu_pmu_caps(FILE *fp, int nr_caps, char **cpu_pmu_caps,
 
 static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
 {
-       print_per_cpu_pmu_caps(fp, ff->ph->env.nr_cpu_pmu_caps,
-                              ff->ph->env.cpu_pmu_caps, (char *)"cpu");
+       __print_pmu_caps(fp, ff->ph->env.nr_cpu_pmu_caps,
+                        ff->ph->env.cpu_pmu_caps, (char *)"cpu");
 }
 
-static void print_hybrid_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
+static void print_pmu_caps(struct feat_fd *ff, FILE *fp)
 {
-       struct hybrid_cpc_node *n;
+       struct pmu_caps *pmu_caps;
 
-       for (int i = 0; i < ff->ph->env.nr_hybrid_cpc_nodes; i++) {
-               n = &ff->ph->env.hybrid_cpc_nodes[i];
-               print_per_cpu_pmu_caps(fp, n->nr_cpu_pmu_caps,
-                                      n->cpu_pmu_caps,
-                                      n->pmu_name);
+       for (int i = 0; i < ff->ph->env.nr_pmus_with_caps; i++) {
+               pmu_caps = &ff->ph->env.pmu_caps[i];
+               __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps,
+                                pmu_caps->pmu_name);
        }
 }
 
@@ -3196,27 +3216,26 @@ static int process_compressed(struct feat_fd *ff,
        return 0;
 }
 
-static int process_per_cpu_pmu_caps(struct feat_fd *ff, int *nr_cpu_pmu_caps,
-                                   char ***cpu_pmu_caps,
-                                   unsigned int *max_branches)
+static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps,
+                             char ***caps, unsigned int *max_branches)
 {
        char *name, *value, *ptr;
-       u32 nr_caps, i;
+       u32 nr_pmu_caps, i;
 
-       *nr_cpu_pmu_caps = 0;
-       *cpu_pmu_caps = NULL;
+       *nr_caps = 0;
+       *caps = NULL;
 
-       if (do_read_u32(ff, &nr_caps))
+       if (do_read_u32(ff, &nr_pmu_caps))
                return -1;
 
-       if (!nr_caps)
+       if (!nr_pmu_caps)
                return 0;
 
-       *cpu_pmu_caps = zalloc(sizeof(char *) * nr_caps);
-       if (!*cpu_pmu_caps)
+       *caps = zalloc(sizeof(char *) * nr_pmu_caps);
+       if (!*caps)
                return -1;
 
-       for (i = 0; i < nr_caps; i++) {
+       for (i = 0; i < nr_pmu_caps; i++) {
                name = do_read_string(ff);
                if (!name)
                        goto error;
@@ -3228,7 +3247,7 @@ static int process_per_cpu_pmu_caps(struct feat_fd *ff, int *nr_cpu_pmu_caps,
                if (asprintf(&ptr, "%s=%s", name, value) < 0)
                        goto free_value;
 
-               (*cpu_pmu_caps)[i] = ptr;
+               (*caps)[i] = ptr;
 
                if (!strcmp(name, "branches"))
                        *max_branches = atoi(value);
@@ -3236,7 +3255,7 @@ static int process_per_cpu_pmu_caps(struct feat_fd *ff, int *nr_cpu_pmu_caps,
                free(value);
                free(name);
        }
-       *nr_cpu_pmu_caps = nr_caps;
+       *nr_caps = nr_pmu_caps;
        return 0;
 
 free_value:
@@ -3245,29 +3264,28 @@ free_name:
        free(name);
 error:
        for (; i > 0; i--)
-               free((*cpu_pmu_caps)[i - 1]);
-       free(*cpu_pmu_caps);
-       *cpu_pmu_caps = NULL;
-       *nr_cpu_pmu_caps = 0;
+               free((*caps)[i - 1]);
+       free(*caps);
+       *caps = NULL;
+       *nr_caps = 0;
        return -1;
 }
 
 static int process_cpu_pmu_caps(struct feat_fd *ff,
                                void *data __maybe_unused)
 {
-       int ret = process_per_cpu_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps,
-                                       &ff->ph->env.cpu_pmu_caps,
-                                       &ff->ph->env.max_branches);
+       int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps,
+                                    &ff->ph->env.cpu_pmu_caps,
+                                    &ff->ph->env.max_branches);
 
        if (!ret && !ff->ph->env.cpu_pmu_caps)
                pr_debug("cpu pmu capabilities not available\n");
        return ret;
 }
 
-static int process_hybrid_cpu_pmu_caps(struct feat_fd *ff,
-                                      void *data __maybe_unused)
+static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused)
 {
-       struct hybrid_cpc_node *nodes;
+       struct pmu_caps *pmu_caps;
        u32 nr_pmu, i;
        int ret;
        int j;
@@ -3276,45 +3294,45 @@ static int process_hybrid_cpu_pmu_caps(struct feat_fd *ff,
                return -1;
 
        if (!nr_pmu) {
-               pr_debug("hybrid cpu pmu capabilities not available\n");
+               pr_debug("pmu capabilities not available\n");
                return 0;
        }
 
-       nodes = zalloc(sizeof(*nodes) * nr_pmu);
-       if (!nodes)
+       pmu_caps = zalloc(sizeof(*pmu_caps) * nr_pmu);
+       if (!pmu_caps)
                return -ENOMEM;
 
        for (i = 0; i < nr_pmu; i++) {
-               struct hybrid_cpc_node *n = &nodes[i];
-
-               ret = process_per_cpu_pmu_caps(ff, &n->nr_cpu_pmu_caps,
-                                              &n->cpu_pmu_caps,
-                                              &n->max_branches);
+               ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps,
+                                        &pmu_caps[i].caps,
+                                        &pmu_caps[i].max_branches);
                if (ret)
                        goto err;
 
-               n->pmu_name = do_read_string(ff);
-               if (!n->pmu_name) {
+               pmu_caps[i].pmu_name = do_read_string(ff);
+               if (!pmu_caps[i].pmu_name) {
                        ret = -1;
                        goto err;
                }
-               if (!n->nr_cpu_pmu_caps)
-                       pr_debug("%s pmu capabilities not available\n", n->pmu_name);
+               if (!pmu_caps[i].nr_caps) {
+                       pr_debug("%s pmu capabilities not available\n",
+                                pmu_caps[i].pmu_name);
+               }
        }
 
-       ff->ph->env.nr_hybrid_cpc_nodes = nr_pmu;
-       ff->ph->env.hybrid_cpc_nodes = nodes;
+       ff->ph->env.nr_pmus_with_caps = nr_pmu;
+       ff->ph->env.pmu_caps = pmu_caps;
        return 0;
 
 err:
        for (i = 0; i < nr_pmu; i++) {
-               for (j = 0; j < nodes[i].nr_cpu_pmu_caps; j++)
-                       free(nodes[i].cpu_pmu_caps[j]);
-               free(nodes[i].cpu_pmu_caps);
-               free(nodes[i].pmu_name);
+               for (j = 0; j < pmu_caps[i].nr_caps; j++)
+                       free(pmu_caps[i].caps[j]);
+               free(pmu_caps[i].caps);
+               free(pmu_caps[i].pmu_name);
        }
 
-       free(nodes);
+       free(pmu_caps);
        return ret;
 }
 
@@ -3380,7 +3398,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
        FEAT_OPR(CPU_PMU_CAPS,  cpu_pmu_caps,   false),
        FEAT_OPR(CLOCK_DATA,    clock_data,     false),
        FEAT_OPN(HYBRID_TOPOLOGY,       hybrid_topology,        true),
-       FEAT_OPR(HYBRID_CPU_PMU_CAPS,   hybrid_cpu_pmu_caps,    false),
+       FEAT_OPR(PMU_CAPS,      pmu_caps,       false),
 };
 
 struct header_print_data {
index 08563c1..5790bf5 100644 (file)
@@ -46,7 +46,7 @@ enum {
        HEADER_CPU_PMU_CAPS,
        HEADER_CLOCK_DATA,
        HEADER_HYBRID_TOPOLOGY,
-       HEADER_HYBRID_CPU_PMU_CAPS,
+       HEADER_PMU_CAPS,
        HEADER_LAST_FEATURE,
        HEADER_FEAT_BITS        = 256,
 };