perf/core: Add PERF_SAMPLE_CGROUP feature
authorNamhyung Kim <namhyung@kernel.org>
Wed, 25 Mar 2020 12:45:29 +0000 (21:45 +0900)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 27 Mar 2020 13:41:44 +0000 (10:41 -0300)
The PERF_SAMPLE_CGROUP bit is to save (perf_event) cgroup information in
the sample.  It will add a 64-bit id to identify current cgroup and it's
the file handle in the cgroup file system.  Userspace should use this
information with PERF_RECORD_CGROUP event to match which cgroup it
belongs.

I put it before PERF_SAMPLE_AUX for simplicity since it just needs a
64-bit word.  But if we want bigger samples, I can work on that
direction too.

Committer testing:

  $ pahole perf_sample_data | grep -w cgroup -B5 -A5
   /* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */
   struct perf_regs           regs_intr;            /*   312    16 */
   /* --- cacheline 5 boundary (320 bytes) was 8 bytes ago --- */
   u64                        stack_user_size;      /*   328     8 */
   u64                        phys_addr;            /*   336     8 */
   u64                        cgroup;               /*   344     8 */

   /* size: 384, cachelines: 6, members: 22 */
   /* padding: 32 */
  };
  $

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lore.kernel.org/lkml/20200325124536.2800725-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
include/linux/perf_event.h
include/uapi/linux/perf_event.h
init/Kconfig
kernel/events/core.c

index 8768a39..9c3e761 100644 (file)
@@ -1020,6 +1020,7 @@ struct perf_sample_data {
        u64                             stack_user_size;
 
        u64                             phys_addr;
+       u64                             cgroup;
 } ____cacheline_aligned;
 
 /* default value for data source */
index de95f6c..7b2d6fc 100644 (file)
@@ -142,8 +142,9 @@ enum perf_event_sample_format {
        PERF_SAMPLE_REGS_INTR                   = 1U << 18,
        PERF_SAMPLE_PHYS_ADDR                   = 1U << 19,
        PERF_SAMPLE_AUX                         = 1U << 20,
+       PERF_SAMPLE_CGROUP                      = 1U << 21,
 
-       PERF_SAMPLE_MAX = 1U << 21,             /* non-ABI */
+       PERF_SAMPLE_MAX = 1U << 22,             /* non-ABI */
 
        __PERF_SAMPLE_CALLCHAIN_EARLY           = 1ULL << 63, /* non-ABI; internal use */
 };
index 20a6ac3..7766b06 100644 (file)
@@ -1027,7 +1027,8 @@ config CGROUP_PERF
        help
          This option extends the perf per-cpu mode to restrict monitoring
          to threads which belong to the cgroup specified and run on the
-         designated cpu.
+         designated cpu.  Or this can be used to have cgroup ID in samples
+         so that it can monitor performance events among cgroups.
 
          Say N if unsure.
 
index 994932d..1569979 100644 (file)
@@ -1862,6 +1862,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
        if (sample_type & PERF_SAMPLE_PHYS_ADDR)
                size += sizeof(data->phys_addr);
 
+       if (sample_type & PERF_SAMPLE_CGROUP)
+               size += sizeof(data->cgroup);
+
        event->header_size = size;
 }
 
@@ -6867,6 +6870,9 @@ void perf_output_sample(struct perf_output_handle *handle,
        if (sample_type & PERF_SAMPLE_PHYS_ADDR)
                perf_output_put(handle, data->phys_addr);
 
+       if (sample_type & PERF_SAMPLE_CGROUP)
+               perf_output_put(handle, data->cgroup);
+
        if (sample_type & PERF_SAMPLE_AUX) {
                perf_output_put(handle, data->aux_size);
 
@@ -7066,6 +7072,16 @@ void perf_prepare_sample(struct perf_event_header *header,
        if (sample_type & PERF_SAMPLE_PHYS_ADDR)
                data->phys_addr = perf_virt_to_phys(data->addr);
 
+#ifdef CONFIG_CGROUP_PERF
+       if (sample_type & PERF_SAMPLE_CGROUP) {
+               struct cgroup *cgrp;
+
+               /* protected by RCU */
+               cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
+               data->cgroup = cgroup_id(cgrp);
+       }
+#endif
+
        if (sample_type & PERF_SAMPLE_AUX) {
                u64 size;
 
@@ -11264,6 +11280,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 
        if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
                ret = perf_reg_validate(attr->sample_regs_intr);
+
+#ifndef CONFIG_CGROUP_PERF
+       if (attr->sample_type & PERF_SAMPLE_CGROUP)
+               return -EINVAL;
+#endif
+
 out:
        return ret;