f42f700c1d2813b7c45232398fbc55e83685261e
[linux-2.6-microblaze.git] / kernel / bpf / trampoline.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2019 Facebook */
3 #include <linux/hash.h>
4 #include <linux/bpf.h>
5 #include <linux/filter.h>
6 #include <linux/ftrace.h>
7 #include <linux/rbtree_latch.h>
8 #include <linux/perf_event.h>
9
10 /* dummy _ops. The verifier will operate on target program's ops. */
11 const struct bpf_verifier_ops bpf_extension_verifier_ops = {
12 };
13 const struct bpf_prog_ops bpf_extension_prog_ops = {
14 };
15
16 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
17 #define TRAMPOLINE_HASH_BITS 10
18 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
19
20 static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
21
22 /* serializes access to trampoline_table */
23 static DEFINE_MUTEX(trampoline_mutex);
24
25 void *bpf_jit_alloc_exec_page(void)
26 {
27         void *image;
28
29         image = bpf_jit_alloc_exec(PAGE_SIZE);
30         if (!image)
31                 return NULL;
32
33         set_vm_flush_reset_perms(image);
34         /* Keep image as writeable. The alternative is to keep flipping ro/rw
35          * everytime new program is attached or detached.
36          */
37         set_memory_x((long)image, 1);
38         return image;
39 }
40
41 void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
42 {
43         ksym->start = (unsigned long) data;
44         ksym->end = ksym->start + PAGE_SIZE;
45         bpf_ksym_add(ksym);
46         perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
47                            PAGE_SIZE, false, ksym->name);
48 }
49
50 void bpf_image_ksym_del(struct bpf_ksym *ksym)
51 {
52         bpf_ksym_del(ksym);
53         perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
54                            PAGE_SIZE, true, ksym->name);
55 }
56
57 static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr)
58 {
59         struct bpf_ksym *ksym = &tr->ksym;
60
61         snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key);
62         bpf_image_ksym_add(tr->image, ksym);
63 }
64
65 struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
66 {
67         struct bpf_trampoline *tr;
68         struct hlist_head *head;
69         void *image;
70         int i;
71
72         mutex_lock(&trampoline_mutex);
73         head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
74         hlist_for_each_entry(tr, head, hlist) {
75                 if (tr->key == key) {
76                         refcount_inc(&tr->refcnt);
77                         goto out;
78                 }
79         }
80         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
81         if (!tr)
82                 goto out;
83
84         /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
85         image = bpf_jit_alloc_exec_page();
86         if (!image) {
87                 kfree(tr);
88                 tr = NULL;
89                 goto out;
90         }
91
92         tr->key = key;
93         INIT_HLIST_NODE(&tr->hlist);
94         hlist_add_head(&tr->hlist, head);
95         refcount_set(&tr->refcnt, 1);
96         mutex_init(&tr->mutex);
97         for (i = 0; i < BPF_TRAMP_MAX; i++)
98                 INIT_HLIST_HEAD(&tr->progs_hlist[i]);
99         tr->image = image;
100         INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
101         bpf_trampoline_ksym_add(tr);
102 out:
103         mutex_unlock(&trampoline_mutex);
104         return tr;
105 }
106
107 static int is_ftrace_location(void *ip)
108 {
109         long addr;
110
111         addr = ftrace_location((long)ip);
112         if (!addr)
113                 return 0;
114         if (WARN_ON_ONCE(addr != (long)ip))
115                 return -EFAULT;
116         return 1;
117 }
118
119 static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
120 {
121         void *ip = tr->func.addr;
122         int ret;
123
124         if (tr->func.ftrace_managed)
125                 ret = unregister_ftrace_direct((long)ip, (long)old_addr);
126         else
127                 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
128         return ret;
129 }
130
131 static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr)
132 {
133         void *ip = tr->func.addr;
134         int ret;
135
136         if (tr->func.ftrace_managed)
137                 ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr);
138         else
139                 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
140         return ret;
141 }
142
143 /* first time registering */
144 static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
145 {
146         void *ip = tr->func.addr;
147         int ret;
148
149         ret = is_ftrace_location(ip);
150         if (ret < 0)
151                 return ret;
152         tr->func.ftrace_managed = ret;
153
154         if (tr->func.ftrace_managed)
155                 ret = register_ftrace_direct((long)ip, (long)new_addr);
156         else
157                 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
158         return ret;
159 }
160
161 static struct bpf_tramp_progs *
162 bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
163 {
164         const struct bpf_prog_aux *aux;
165         struct bpf_tramp_progs *tprogs;
166         struct bpf_prog **progs;
167         int kind;
168
169         *total = 0;
170         tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
171         if (!tprogs)
172                 return ERR_PTR(-ENOMEM);
173
174         for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
175                 tprogs[kind].nr_progs = tr->progs_cnt[kind];
176                 *total += tr->progs_cnt[kind];
177                 progs = tprogs[kind].progs;
178
179                 hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist)
180                         *progs++ = aux->prog;
181         }
182         return tprogs;
183 }
184
185 static int bpf_trampoline_update(struct bpf_trampoline *tr)
186 {
187         void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
188         void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
189         struct bpf_tramp_progs *tprogs;
190         u32 flags = BPF_TRAMP_F_RESTORE_REGS;
191         int err, total;
192
193         tprogs = bpf_trampoline_get_progs(tr, &total);
194         if (IS_ERR(tprogs))
195                 return PTR_ERR(tprogs);
196
197         if (total == 0) {
198                 err = unregister_fentry(tr, old_image);
199                 tr->selector = 0;
200                 goto out;
201         }
202
203         if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||
204             tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
205                 flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
206
207         /* Though the second half of trampoline page is unused a task could be
208          * preempted in the middle of the first half of trampoline and two
209          * updates to trampoline would change the code from underneath the
210          * preempted task. Hence wait for tasks to voluntarily schedule or go
211          * to userspace.
212          */
213
214         synchronize_rcu_tasks();
215
216         err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
217                                           &tr->func.model, flags, tprogs,
218                                           tr->func.addr);
219         if (err < 0)
220                 goto out;
221
222         if (tr->selector)
223                 /* progs already running at this address */
224                 err = modify_fentry(tr, old_image, new_image);
225         else
226                 /* first time registering */
227                 err = register_fentry(tr, new_image);
228         if (err)
229                 goto out;
230         tr->selector++;
231 out:
232         kfree(tprogs);
233         return err;
234 }
235
236 static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
237 {
238         switch (t) {
239         case BPF_TRACE_FENTRY:
240                 return BPF_TRAMP_FENTRY;
241         case BPF_MODIFY_RETURN:
242                 return BPF_TRAMP_MODIFY_RETURN;
243         case BPF_TRACE_FEXIT:
244                 return BPF_TRAMP_FEXIT;
245         default:
246                 return BPF_TRAMP_REPLACE;
247         }
248 }
249
250 int bpf_trampoline_link_prog(struct bpf_prog *prog)
251 {
252         enum bpf_tramp_prog_type kind;
253         struct bpf_trampoline *tr;
254         int err = 0;
255         int cnt;
256
257         tr = prog->aux->trampoline;
258         kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
259         mutex_lock(&tr->mutex);
260         if (tr->extension_prog) {
261                 /* cannot attach fentry/fexit if extension prog is attached.
262                  * cannot overwrite extension prog either.
263                  */
264                 err = -EBUSY;
265                 goto out;
266         }
267         cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT];
268         if (kind == BPF_TRAMP_REPLACE) {
269                 /* Cannot attach extension if fentry/fexit are in use. */
270                 if (cnt) {
271                         err = -EBUSY;
272                         goto out;
273                 }
274                 tr->extension_prog = prog;
275                 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
276                                          prog->bpf_func);
277                 goto out;
278         }
279         if (cnt >= BPF_MAX_TRAMP_PROGS) {
280                 err = -E2BIG;
281                 goto out;
282         }
283         if (!hlist_unhashed(&prog->aux->tramp_hlist)) {
284                 /* prog already linked */
285                 err = -EBUSY;
286                 goto out;
287         }
288         hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
289         tr->progs_cnt[kind]++;
290         err = bpf_trampoline_update(prog->aux->trampoline);
291         if (err) {
292                 hlist_del(&prog->aux->tramp_hlist);
293                 tr->progs_cnt[kind]--;
294         }
295 out:
296         mutex_unlock(&tr->mutex);
297         return err;
298 }
299
300 /* bpf_trampoline_unlink_prog() should never fail. */
301 int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
302 {
303         enum bpf_tramp_prog_type kind;
304         struct bpf_trampoline *tr;
305         int err;
306
307         tr = prog->aux->trampoline;
308         kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
309         mutex_lock(&tr->mutex);
310         if (kind == BPF_TRAMP_REPLACE) {
311                 WARN_ON_ONCE(!tr->extension_prog);
312                 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
313                                          tr->extension_prog->bpf_func, NULL);
314                 tr->extension_prog = NULL;
315                 goto out;
316         }
317         hlist_del(&prog->aux->tramp_hlist);
318         tr->progs_cnt[kind]--;
319         err = bpf_trampoline_update(prog->aux->trampoline);
320 out:
321         mutex_unlock(&tr->mutex);
322         return err;
323 }
324
325 void bpf_trampoline_put(struct bpf_trampoline *tr)
326 {
327         if (!tr)
328                 return;
329         mutex_lock(&trampoline_mutex);
330         if (!refcount_dec_and_test(&tr->refcnt))
331                 goto out;
332         WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
333         if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY])))
334                 goto out;
335         if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
336                 goto out;
337         bpf_image_ksym_del(&tr->ksym);
338         /* wait for tasks to get out of trampoline before freeing it */
339         synchronize_rcu_tasks();
340         bpf_jit_free_exec(tr->image);
341         hlist_del(&tr->hlist);
342         kfree(tr);
343 out:
344         mutex_unlock(&trampoline_mutex);
345 }
346
347 /* The logic is similar to BPF_PROG_RUN, but with an explicit
348  * rcu_read_lock() and migrate_disable() which are required
349  * for the trampoline. The macro is split into
350  * call _bpf_prog_enter
351  * call prog->bpf_func
352  * call __bpf_prog_exit
353  */
354 u64 notrace __bpf_prog_enter(void)
355 {
356         u64 start = 0;
357
358         rcu_read_lock();
359         migrate_disable();
360         if (static_branch_unlikely(&bpf_stats_enabled_key))
361                 start = sched_clock();
362         return start;
363 }
364
365 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
366 {
367         struct bpf_prog_stats *stats;
368
369         if (static_branch_unlikely(&bpf_stats_enabled_key) &&
370             /* static_key could be enabled in __bpf_prog_enter
371              * and disabled in __bpf_prog_exit.
372              * And vice versa.
373              * Hence check that 'start' is not zero.
374              */
375             start) {
376                 stats = this_cpu_ptr(prog->aux->stats);
377                 u64_stats_update_begin(&stats->syncp);
378                 stats->cnt++;
379                 stats->nsecs += sched_clock() - start;
380                 u64_stats_update_end(&stats->syncp);
381         }
382         migrate_enable();
383         rcu_read_unlock();
384 }
385
386 int __weak
387 arch_prepare_bpf_trampoline(void *image, void *image_end,
388                             const struct btf_func_model *m, u32 flags,
389                             struct bpf_tramp_progs *tprogs,
390                             void *orig_call)
391 {
392         return -ENOTSUPP;
393 }
394
395 static int __init init_trampolines(void)
396 {
397         int i;
398
399         for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
400                 INIT_HLIST_HEAD(&trampoline_table[i]);
401         return 0;
402 }
403 late_initcall(init_trampolines);