sched: Prevent balance_push() on remote runqueues
[linux-2.6-microblaze.git] / samples / bpf / offwaketime_kern.c
1 /* Copyright (c) 2016 Facebook
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of version 2 of the GNU General Public
5  * License as published by the Free Software Foundation.
6  */
7 #include <uapi/linux/bpf.h>
8 #include <uapi/linux/ptrace.h>
9 #include <uapi/linux/perf_event.h>
10 #include <linux/version.h>
11 #include <linux/sched.h>
12 #include <bpf/bpf_helpers.h>
13 #include <bpf/bpf_tracing.h>
14
15 #define _(P)                                                                   \
16         ({                                                                     \
17                 typeof(P) val;                                                 \
18                 bpf_probe_read_kernel(&val, sizeof(val), &(P));                \
19                 val;                                                           \
20         })
21
22 #define MINBLOCK_US     1
23
24 struct key_t {
25         char waker[TASK_COMM_LEN];
26         char target[TASK_COMM_LEN];
27         u32 wret;
28         u32 tret;
29 };
30
31 struct {
32         __uint(type, BPF_MAP_TYPE_HASH);
33         __type(key, struct key_t);
34         __type(value, u64);
35         __uint(max_entries, 10000);
36 } counts SEC(".maps");
37
38 struct {
39         __uint(type, BPF_MAP_TYPE_HASH);
40         __type(key, u32);
41         __type(value, u64);
42         __uint(max_entries, 10000);
43 } start SEC(".maps");
44
45 struct wokeby_t {
46         char name[TASK_COMM_LEN];
47         u32 ret;
48 };
49
50 struct {
51         __uint(type, BPF_MAP_TYPE_HASH);
52         __type(key, u32);
53         __type(value, struct wokeby_t);
54         __uint(max_entries, 10000);
55 } wokeby SEC(".maps");
56
57 struct {
58         __uint(type, BPF_MAP_TYPE_STACK_TRACE);
59         __uint(key_size, sizeof(u32));
60         __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
61         __uint(max_entries, 10000);
62 } stackmap SEC(".maps");
63
64 #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
65
66 SEC("kprobe/try_to_wake_up")
67 int waker(struct pt_regs *ctx)
68 {
69         struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
70         struct wokeby_t woke;
71         u32 pid;
72
73         pid = _(p->pid);
74
75         bpf_get_current_comm(&woke.name, sizeof(woke.name));
76         woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
77
78         bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
79         return 0;
80 }
81
82 static inline int update_counts(void *ctx, u32 pid, u64 delta)
83 {
84         struct wokeby_t *woke;
85         u64 zero = 0, *val;
86         struct key_t key;
87
88         __builtin_memset(&key.waker, 0, sizeof(key.waker));
89         bpf_get_current_comm(&key.target, sizeof(key.target));
90         key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
91         key.wret = 0;
92
93         woke = bpf_map_lookup_elem(&wokeby, &pid);
94         if (woke) {
95                 key.wret = woke->ret;
96                 __builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
97                 bpf_map_delete_elem(&wokeby, &pid);
98         }
99
100         val = bpf_map_lookup_elem(&counts, &key);
101         if (!val) {
102                 bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
103                 val = bpf_map_lookup_elem(&counts, &key);
104                 if (!val)
105                         return 0;
106         }
107         (*val) += delta;
108         return 0;
109 }
110
111 #if 1
112 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
113 struct sched_switch_args {
114         unsigned long long pad;
115         char prev_comm[16];
116         int prev_pid;
117         int prev_prio;
118         long long prev_state;
119         char next_comm[16];
120         int next_pid;
121         int next_prio;
122 };
123 SEC("tracepoint/sched/sched_switch")
124 int oncpu(struct sched_switch_args *ctx)
125 {
126         /* record previous thread sleep time */
127         u32 pid = ctx->prev_pid;
128 #else
129 SEC("kprobe/finish_task_switch")
130 int oncpu(struct pt_regs *ctx)
131 {
132         struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
133         /* record previous thread sleep time */
134         u32 pid = _(p->pid);
135 #endif
136         u64 delta, ts, *tsp;
137
138         ts = bpf_ktime_get_ns();
139         bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
140
141         /* calculate current thread's delta time */
142         pid = bpf_get_current_pid_tgid();
143         tsp = bpf_map_lookup_elem(&start, &pid);
144         if (!tsp)
145                 /* missed start or filtered */
146                 return 0;
147
148         delta = bpf_ktime_get_ns() - *tsp;
149         bpf_map_delete_elem(&start, &pid);
150         delta = delta / 1000;
151         if (delta < MINBLOCK_US)
152                 return 0;
153
154         return update_counts(ctx, pid, delta);
155 }
156 char _license[] SEC("license") = "GPL";
157 u32 _version SEC("version") = LINUX_VERSION_CODE;