}
static struct perf_cpu_map *all_cpu_map;
+static __u32 filter_entry_cnt;
static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
struct perf_event_attr_map_entry *entry)
return err;
}
+static int bperf_attach_follower_program(struct bperf_follower_bpf *skel,
+ enum bperf_filter_type filter_type,
+ bool inherit)
+{
+ struct bpf_link *link;
+ int err = 0;
+
+ if ((filter_type == BPERF_FILTER_PID ||
+ filter_type == BPERF_FILTER_TGID) && inherit)
+ /* attach all follower bpf progs to enable event inheritance */
+ err = bperf_follower_bpf__attach(skel);
+ else {
+ link = bpf_program__attach(skel->progs.fexit_XXX);
+ if (IS_ERR(link))
+ err = PTR_ERR(link);
+ }
+
+ return err;
+}
+
static int bperf__load(struct evsel *evsel, struct target *target)
{
struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff};
int attr_map_fd, diff_map_fd = -1, err;
enum bperf_filter_type filter_type;
- __u32 filter_entry_cnt, i;
+ __u32 i;
if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt))
return -1;
/* set up reading map */
bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings,
filter_entry_cnt);
- /* set up follower filter based on target */
- bpf_map__set_max_entries(evsel->follower_skel->maps.filter,
- filter_entry_cnt);
err = bperf_follower_bpf__load(evsel->follower_skel);
if (err) {
pr_err("Failed to load follower skeleton\n");
for (i = 0; i < filter_entry_cnt; i++) {
int filter_map_fd;
__u32 key;
+ struct bperf_filter_value fval = { i, 0 };
if (filter_type == BPERF_FILTER_PID ||
filter_type == BPERF_FILTER_TGID)
break;
filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter);
- bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY);
+ bpf_map_update_elem(filter_map_fd, &key, &fval, BPF_ANY);
}
evsel->follower_skel->bss->type = filter_type;
+ evsel->follower_skel->bss->inherit = target->inherit;
- err = bperf_follower_bpf__attach(evsel->follower_skel);
+ err = bperf_attach_follower_program(evsel->follower_skel, filter_type,
+ target->inherit);
out:
if (err && evsel->bperf_leader_link_fd >= 0)
bperf_sync_counters(evsel);
reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
- for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) {
+ for (i = 0; i < filter_entry_cnt; i++) {
struct perf_cpu entry;
__u32 cpu;
#include <bpf/bpf_tracing.h>
#include "bperf_u.h"
+#define MAX_ENTRIES 102400
+
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(__u32));
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(__u32));
- __uint(value_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bperf_filter_value));
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
} filter SEC(".maps");
enum bperf_filter_type type = 0;
int enabled = 0;
+int inherit;
SEC("fexit/XXX")
int BPF_PROG(fexit_XXX)
{
struct bpf_perf_event_value *diff_val, *accum_val;
__u32 filter_key, zero = 0;
- __u32 *accum_key;
+ __u32 accum_key;
+ struct bperf_filter_value *fval;
if (!enabled)
return 0;
switch (type) {
case BPERF_FILTER_GLOBAL:
- accum_key = &zero;
+ accum_key = zero;
goto do_add;
case BPERF_FILTER_CPU:
filter_key = bpf_get_smp_processor_id();
filter_key = bpf_get_current_pid_tgid() & 0xffffffff;
break;
case BPERF_FILTER_TGID:
- filter_key = bpf_get_current_pid_tgid() >> 32;
+ /* Use pid as the filter_key to exclude new task counts
+ * when inherit is disabled. Don't worry about the existing
+ * children in TGID losing their counts, bpf_counter has
+ * already added them to the filter map via perf_thread_map
+ * before this bpf prog runs.
+ */
+ filter_key = inherit ?
+ bpf_get_current_pid_tgid() >> 32 :
+ bpf_get_current_pid_tgid() & 0xffffffff;
break;
default:
return 0;
}
- accum_key = bpf_map_lookup_elem(&filter, &filter_key);
- if (!accum_key)
+ fval = bpf_map_lookup_elem(&filter, &filter_key);
+ if (!fval)
return 0;
+ accum_key = fval->accum_key;
+ if (fval->exited)
+ bpf_map_delete_elem(&filter, &filter_key);
+
do_add:
diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
if (!diff_val)
return 0;
- accum_val = bpf_map_lookup_elem(&accum_readings, accum_key);
+ accum_val = bpf_map_lookup_elem(&accum_readings, &accum_key);
if (!accum_val)
return 0;
return 0;
}
+/* The program is only used for PID or TGID filter types. */
+SEC("tp_btf/task_newtask")
+int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags)
+{
+ __u32 parent_key, child_key;
+ struct bperf_filter_value *parent_fval;
+ struct bperf_filter_value child_fval = { 0 };
+
+ if (!enabled)
+ return 0;
+
+ switch (type) {
+ case BPERF_FILTER_PID:
+ parent_key = bpf_get_current_pid_tgid() & 0xffffffff;
+ child_key = task->pid;
+ break;
+ case BPERF_FILTER_TGID:
+ parent_key = bpf_get_current_pid_tgid() >> 32;
+ child_key = task->tgid;
+ if (child_key == parent_key)
+ return 0;
+ break;
+ default:
+ return 0;
+ }
+
+ /* Check if the current task is one of the target tasks to be counted */
+ parent_fval = bpf_map_lookup_elem(&filter, &parent_key);
+ if (!parent_fval)
+ return 0;
+
+ /* Start counting for the new task by adding it into filter map,
+ * inherit the accum key of its parent task so that they can be
+ * counted together.
+ */
+ child_fval.accum_key = parent_fval->accum_key;
+ child_fval.exited = 0;
+ bpf_map_update_elem(&filter, &child_key, &child_fval, BPF_NOEXIST);
+
+ return 0;
+}
+
+/* The program is only used for PID or TGID filter types. */
+SEC("tp_btf/sched_process_exit")
+int BPF_PROG(on_exittask, struct task_struct *task)
+{
+ __u32 pid;
+ struct bperf_filter_value *fval;
+
+ if (!enabled)
+ return 0;
+
+ /* Stop counting for this task by removing it from filter map.
+ * For TGID type, if the pid can be found in the map, it means that
+ * this pid belongs to the leader task. After the task exits, the
+ * tgid of its child tasks (if any) will be 1, so the pid can be
+ * safely removed.
+ */
+ pid = task->pid;
+ fval = bpf_map_lookup_elem(&filter, &pid);
+ if (fval)
+ fval->exited = 1;
+
+ return 0;
+}
+
char LICENSE[] SEC("license") = "Dual BSD/GPL";