dt-bindings: soc: bcm: use absolute path to other schema
[linux-2.6-microblaze.git] / tools / perf / util / bpf_skel / off_cpu.bpf.c
1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 // Copyright (c) 2022 Google
3 #include "vmlinux.h"
4 #include <bpf/bpf_helpers.h>
5 #include <bpf/bpf_tracing.h>
6 #include <bpf/bpf_core_read.h>
7
8 /* task->flags for off-cpu analysis */
9 #define PF_KTHREAD   0x00200000  /* I am a kernel thread */
10
11 /* task->state for off-cpu analysis */
12 #define TASK_INTERRUPTIBLE      0x0001
13 #define TASK_UNINTERRUPTIBLE    0x0002
14
15 #define MAX_STACKS   32
16 #define MAX_ENTRIES  102400
17
18 struct tstamp_data {
19         __u32 stack_id;
20         __u32 state;
21         __u64 timestamp;
22 };
23
24 struct offcpu_key {
25         __u32 pid;
26         __u32 tgid;
27         __u32 stack_id;
28         __u32 state;
29         __u64 cgroup_id;
30 };
31
32 struct {
33         __uint(type, BPF_MAP_TYPE_STACK_TRACE);
34         __uint(key_size, sizeof(__u32));
35         __uint(value_size, MAX_STACKS * sizeof(__u64));
36         __uint(max_entries, MAX_ENTRIES);
37 } stacks SEC(".maps");
38
39 struct {
40         __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
41         __uint(map_flags, BPF_F_NO_PREALLOC);
42         __type(key, int);
43         __type(value, struct tstamp_data);
44 } tstamp SEC(".maps");
45
46 struct {
47         __uint(type, BPF_MAP_TYPE_HASH);
48         __uint(key_size, sizeof(struct offcpu_key));
49         __uint(value_size, sizeof(__u64));
50         __uint(max_entries, MAX_ENTRIES);
51 } off_cpu SEC(".maps");
52
53 struct {
54         __uint(type, BPF_MAP_TYPE_HASH);
55         __uint(key_size, sizeof(__u32));
56         __uint(value_size, sizeof(__u8));
57         __uint(max_entries, 1);
58 } cpu_filter SEC(".maps");
59
60 struct {
61         __uint(type, BPF_MAP_TYPE_HASH);
62         __uint(key_size, sizeof(__u32));
63         __uint(value_size, sizeof(__u8));
64         __uint(max_entries, 1);
65 } task_filter SEC(".maps");
66
67 struct {
68         __uint(type, BPF_MAP_TYPE_HASH);
69         __uint(key_size, sizeof(__u64));
70         __uint(value_size, sizeof(__u8));
71         __uint(max_entries, 1);
72 } cgroup_filter SEC(".maps");
73
74 /* old kernel task_struct definition */
75 struct task_struct___old {
76         long state;
77 } __attribute__((preserve_access_index));
78
79 int enabled = 0;
80 int has_cpu = 0;
81 int has_task = 0;
82 int has_cgroup = 0;
83
84 const volatile bool has_prev_state = false;
85 const volatile bool needs_cgroup = false;
86 const volatile bool uses_cgroup_v1 = false;
87
88 /*
89  * Old kernel used to call it task_struct->state and now it's '__state'.
90  * Use BPF CO-RE "ignored suffix rule" to deal with it like below:
91  *
92  * https://nakryiko.com/posts/bpf-core-reference-guide/#handling-incompatible-field-and-type-changes
93  */
94 static inline int get_task_state(struct task_struct *t)
95 {
96         if (bpf_core_field_exists(t->__state))
97                 return BPF_CORE_READ(t, __state);
98
99         /* recast pointer to capture task_struct___old type for compiler */
100         struct task_struct___old *t_old = (void *)t;
101
102         /* now use old "state" name of the field */
103         return BPF_CORE_READ(t_old, state);
104 }
105
106 static inline __u64 get_cgroup_id(struct task_struct *t)
107 {
108         struct cgroup *cgrp;
109
110         if (uses_cgroup_v1)
111                 cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup);
112         else
113                 cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp);
114
115         return BPF_CORE_READ(cgrp, kn, id);
116 }
117
118 static inline int can_record(struct task_struct *t, int state)
119 {
120         /* kernel threads don't have user stack */
121         if (t->flags & PF_KTHREAD)
122                 return 0;
123
124         if (state != TASK_INTERRUPTIBLE &&
125             state != TASK_UNINTERRUPTIBLE)
126                 return 0;
127
128         if (has_cpu) {
129                 __u32 cpu = bpf_get_smp_processor_id();
130                 __u8 *ok;
131
132                 ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
133                 if (!ok)
134                         return 0;
135         }
136
137         if (has_task) {
138                 __u8 *ok;
139                 __u32 pid = t->pid;
140
141                 ok = bpf_map_lookup_elem(&task_filter, &pid);
142                 if (!ok)
143                         return 0;
144         }
145
146         if (has_cgroup) {
147                 __u8 *ok;
148                 __u64 cgrp_id = get_cgroup_id(t);
149
150                 ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp_id);
151                 if (!ok)
152                         return 0;
153         }
154
155         return 1;
156 }
157
158 static int off_cpu_stat(u64 *ctx, struct task_struct *prev,
159                         struct task_struct *next, int state)
160 {
161         __u64 ts;
162         __u32 stack_id;
163         struct tstamp_data *pelem;
164
165         ts = bpf_ktime_get_ns();
166
167         if (!can_record(prev, state))
168                 goto next;
169
170         stack_id = bpf_get_stackid(ctx, &stacks,
171                                    BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK);
172
173         pelem = bpf_task_storage_get(&tstamp, prev, NULL,
174                                      BPF_LOCAL_STORAGE_GET_F_CREATE);
175         if (!pelem)
176                 goto next;
177
178         pelem->timestamp = ts;
179         pelem->state = state;
180         pelem->stack_id = stack_id;
181
182 next:
183         pelem = bpf_task_storage_get(&tstamp, next, NULL, 0);
184
185         if (pelem && pelem->timestamp) {
186                 struct offcpu_key key = {
187                         .pid = next->pid,
188                         .tgid = next->tgid,
189                         .stack_id = pelem->stack_id,
190                         .state = pelem->state,
191                         .cgroup_id = needs_cgroup ? get_cgroup_id(next) : 0,
192                 };
193                 __u64 delta = ts - pelem->timestamp;
194                 __u64 *total;
195
196                 total = bpf_map_lookup_elem(&off_cpu, &key);
197                 if (total)
198                         *total += delta;
199                 else
200                         bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY);
201
202                 /* prevent to reuse the timestamp later */
203                 pelem->timestamp = 0;
204         }
205
206         return 0;
207 }
208
209 SEC("tp_btf/sched_switch")
210 int on_switch(u64 *ctx)
211 {
212         struct task_struct *prev, *next;
213         int prev_state;
214
215         if (!enabled)
216                 return 0;
217
218         prev = (struct task_struct *)ctx[1];
219         next = (struct task_struct *)ctx[2];
220
221         if (has_prev_state)
222                 prev_state = (int)ctx[3];
223         else
224                 prev_state = get_task_state(prev);
225
226         return off_cpu_stat(ctx, prev, next, prev_state);
227 }
228
229 char LICENSE[] SEC("license") = "Dual BSD/GPL";