perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event
authorNamhyung Kim <namhyung@kernel.org>
Wed, 10 Feb 2021 08:33:26 +0000 (17:33 +0900)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 16 Apr 2021 16:58:52 +0000 (18:58 +0200)
This patch adds a new software event to count context switches
involving cgroup switches.  So it's counted only if cgroups of
previous and next tasks are different.  Note that it only checks the
cgroups in the perf_event subsystem.  For cgroup v2, it shouldn't
matter anyway.

One can argue that we can do this by using existing sched_switch event
with eBPF.  But some systems might not have eBPF for some reason so
I'd like to add this as a simple way.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210210083327.22726-2-namhyung@kernel.org
include/linux/perf_event.h
include/uapi/linux/perf_event.h

index 92d51a7..8989b2b 100644 (file)
@@ -1218,6 +1218,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
        if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
                __perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
 
+#ifdef CONFIG_CGROUP_PERF
+       if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
+           perf_cgroup_from_task(prev, NULL) !=
+           perf_cgroup_from_task(next, NULL))
+               __perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
+#endif
+
        if (static_branch_unlikely(&perf_sched_events))
                __perf_event_task_sched_out(prev, next);
 }
index 31b00e3..0b58970 100644 (file)
@@ -112,6 +112,7 @@ enum perf_sw_ids {
        PERF_COUNT_SW_EMULATION_FAULTS          = 8,
        PERF_COUNT_SW_DUMMY                     = 9,
        PERF_COUNT_SW_BPF_OUTPUT                = 10,
+       PERF_COUNT_SW_CGROUP_SWITCHES           = 11,
 
        PERF_COUNT_SW_MAX,                      /* non-ABI */
 };