perf evlist: Special map propagation for tool events that read on 1 CPU
authorIan Rogers <irogers@google.com>
Tue, 10 Feb 2026 06:03:56 +0000 (22:03 -0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 10 Feb 2026 12:33:28 +0000 (09:33 -0300)
Tool events like duration_time don't need a perf_cpu_map that contains
all online CPUs.

Having such a perf_cpu_map causes overheads when iterating between
events for CPU affinity.

During parsing mark events that just read on a single CPU map index as
such, then during map propagation set up the evsel's CPUs and thereby
the evlists accordingly.

The setting cannot be done early in parsing as user CPUs are only fully
known when evlist__create_maps is called.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andres Freund <andres@anarazel.de>
Cc: Dapeng Mi <dapeng1.mi@linux.intel.com>
Cc: Dr. David Alan Gilbert <linux@treblig.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Falcon <thomas.falcon@intel.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/lib/perf/evlist.c
tools/lib/perf/include/internal/evsel.h
tools/perf/util/parse-events.c
tools/perf/util/pmu.c
tools/perf/util/pmu.h

index 3ed023f..1f210da 100644 (file)
@@ -101,6 +101,28 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
                evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
        }
 
+       /*
+        * Tool events may only read on the first CPU index to avoid double
+        * counting things like duration_time. Make the evsel->cpus contain just
+        * that single entry otherwise we may spend time changing affinity to
+        * CPUs that just have tool events, etc.
+        */
+       if (evsel->reads_only_on_cpu_idx0 && perf_cpu_map__nr(evsel->cpus) > 0) {
+               struct perf_cpu_map *srcs[3] = {
+                       evlist->all_cpus,
+                       evlist->user_requested_cpus,
+                       evsel->pmu_cpus,
+               };
+               for (size_t i = 0; i < ARRAY_SIZE(srcs); i++) {
+                       if (!srcs[i])
+                               continue;
+
+                       perf_cpu_map__put(evsel->cpus);
+                       evsel->cpus = perf_cpu_map__new_int(perf_cpu_map__cpu(srcs[i], 0).cpu);
+                       break;
+               }
+       }
+
        /* Sanity check assert before the evsel is potentially removed. */
        assert(!evsel->requires_cpu || !perf_cpu_map__has_any_cpu(evsel->cpus));
 
@@ -133,16 +155,22 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
 
 static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
 {
-       struct perf_evsel *evsel, *n;
-
        evlist->needs_map_propagation = true;
 
        /* Clear the all_cpus set which will be merged into during propagation. */
        perf_cpu_map__put(evlist->all_cpus);
        evlist->all_cpus = NULL;
 
-       list_for_each_entry_safe(evsel, n, &evlist->entries, node)
-               __perf_evlist__propagate_maps(evlist, evsel);
+       /* 2 rounds so that reads_only_on_cpu_idx0 benefit from knowing the other CPU maps. */
+       for (int round = 0; round < 2; round++) {
+               struct perf_evsel *evsel, *n;
+
+               list_for_each_entry_safe(evsel, n, &evlist->entries, node) {
+                       if ((!evsel->reads_only_on_cpu_idx0 && round == 0) ||
+                           (evsel->reads_only_on_cpu_idx0 && round == 1))
+                               __perf_evlist__propagate_maps(evlist, evsel);
+               }
+       }
 }
 
 void perf_evlist__add(struct perf_evlist *evlist,
index fefe64b..b988034 100644 (file)
@@ -128,6 +128,8 @@ struct perf_evsel {
        bool                     requires_cpu;
        /** Is the PMU for the event a core one? Effects the handling of own_cpus. */
        bool                     is_pmu_core;
+       /** Does the evsel on read on the first CPU index such as tool time events? */
+       bool                     reads_only_on_cpu_idx0;
        int                      idx;
 };
 
index f631bf7..b9efb29 100644 (file)
@@ -269,6 +269,7 @@ __add_event(struct list_head *list, int *idx,
        evsel->core.pmu_cpus = pmu_cpus;
        evsel->core.requires_cpu = pmu ? pmu->is_uncore : false;
        evsel->core.is_pmu_core = is_pmu_core;
+       evsel->core.reads_only_on_cpu_idx0 = perf_pmu__reads_only_on_cpu_idx0(attr);
        evsel->pmu = pmu;
        evsel->alternate_hw_config = alternate_hw_config;
        evsel->first_wildcard_match = first_wildcard_match;
index bb399a4..81ab746 100644 (file)
@@ -2718,3 +2718,14 @@ const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config)
        }
        return NULL;
 }
+
+bool perf_pmu__reads_only_on_cpu_idx0(const struct perf_event_attr *attr)
+{
+       enum tool_pmu_event event;
+
+       if (attr->type != PERF_PMU_TYPE_TOOL)
+               return false;
+
+       event = (enum tool_pmu_event)attr->config;
+       return event != TOOL_PMU__EVENT_USER_TIME && event != TOOL_PMU__EVENT_SYSTEM_TIME;
+}
index 7ef90b5..41c2138 100644 (file)
@@ -350,6 +350,8 @@ void perf_pmu__delete(struct perf_pmu *pmu);
 const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config);
 bool perf_pmu__is_fake(const struct perf_pmu *pmu);
 
+bool perf_pmu__reads_only_on_cpu_idx0(const struct perf_event_attr *attr);
+
 static inline enum pmu_kind perf_pmu__kind(const struct perf_pmu *pmu)
 {
        __u32 type;