lib/genalloc: fix the overflow when size is too big
[linux-2.6-microblaze.git] / kernel / bpf / trampoline.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2019 Facebook */
3 #include <linux/hash.h>
4 #include <linux/bpf.h>
5 #include <linux/filter.h>
6 #include <linux/ftrace.h>
7 #include <linux/rbtree_latch.h>
8 #include <linux/perf_event.h>
9 #include <linux/btf.h>
10 #include <linux/rcupdate_trace.h>
11 #include <linux/rcupdate_wait.h>
12
13 /* dummy _ops. The verifier will operate on target program's ops. */
14 const struct bpf_verifier_ops bpf_extension_verifier_ops = {
15 };
16 const struct bpf_prog_ops bpf_extension_prog_ops = {
17 };
18
19 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
20 #define TRAMPOLINE_HASH_BITS 10
21 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
22
23 static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
24
25 /* serializes access to trampoline_table */
26 static DEFINE_MUTEX(trampoline_mutex);
27
28 void *bpf_jit_alloc_exec_page(void)
29 {
30         void *image;
31
32         image = bpf_jit_alloc_exec(PAGE_SIZE);
33         if (!image)
34                 return NULL;
35
36         set_vm_flush_reset_perms(image);
37         /* Keep image as writeable. The alternative is to keep flipping ro/rw
38          * everytime new program is attached or detached.
39          */
40         set_memory_x((long)image, 1);
41         return image;
42 }
43
44 void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
45 {
46         ksym->start = (unsigned long) data;
47         ksym->end = ksym->start + PAGE_SIZE;
48         bpf_ksym_add(ksym);
49         perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
50                            PAGE_SIZE, false, ksym->name);
51 }
52
53 void bpf_image_ksym_del(struct bpf_ksym *ksym)
54 {
55         bpf_ksym_del(ksym);
56         perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
57                            PAGE_SIZE, true, ksym->name);
58 }
59
60 static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr)
61 {
62         struct bpf_ksym *ksym = &tr->ksym;
63
64         snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key);
65         bpf_image_ksym_add(tr->image, ksym);
66 }
67
68 static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
69 {
70         struct bpf_trampoline *tr;
71         struct hlist_head *head;
72         void *image;
73         int i;
74
75         mutex_lock(&trampoline_mutex);
76         head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
77         hlist_for_each_entry(tr, head, hlist) {
78                 if (tr->key == key) {
79                         refcount_inc(&tr->refcnt);
80                         goto out;
81                 }
82         }
83         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
84         if (!tr)
85                 goto out;
86
87         /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
88         image = bpf_jit_alloc_exec_page();
89         if (!image) {
90                 kfree(tr);
91                 tr = NULL;
92                 goto out;
93         }
94
95         tr->key = key;
96         INIT_HLIST_NODE(&tr->hlist);
97         hlist_add_head(&tr->hlist, head);
98         refcount_set(&tr->refcnt, 1);
99         mutex_init(&tr->mutex);
100         for (i = 0; i < BPF_TRAMP_MAX; i++)
101                 INIT_HLIST_HEAD(&tr->progs_hlist[i]);
102         tr->image = image;
103         INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
104         bpf_trampoline_ksym_add(tr);
105 out:
106         mutex_unlock(&trampoline_mutex);
107         return tr;
108 }
109
110 static int is_ftrace_location(void *ip)
111 {
112         long addr;
113
114         addr = ftrace_location((long)ip);
115         if (!addr)
116                 return 0;
117         if (WARN_ON_ONCE(addr != (long)ip))
118                 return -EFAULT;
119         return 1;
120 }
121
122 static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
123 {
124         void *ip = tr->func.addr;
125         int ret;
126
127         if (tr->func.ftrace_managed)
128                 ret = unregister_ftrace_direct((long)ip, (long)old_addr);
129         else
130                 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
131         return ret;
132 }
133
134 static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr)
135 {
136         void *ip = tr->func.addr;
137         int ret;
138
139         if (tr->func.ftrace_managed)
140                 ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr);
141         else
142                 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
143         return ret;
144 }
145
146 /* first time registering */
147 static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
148 {
149         void *ip = tr->func.addr;
150         int ret;
151
152         ret = is_ftrace_location(ip);
153         if (ret < 0)
154                 return ret;
155         tr->func.ftrace_managed = ret;
156
157         if (tr->func.ftrace_managed)
158                 ret = register_ftrace_direct((long)ip, (long)new_addr);
159         else
160                 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
161         return ret;
162 }
163
164 static struct bpf_tramp_progs *
165 bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
166 {
167         const struct bpf_prog_aux *aux;
168         struct bpf_tramp_progs *tprogs;
169         struct bpf_prog **progs;
170         int kind;
171
172         *total = 0;
173         tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
174         if (!tprogs)
175                 return ERR_PTR(-ENOMEM);
176
177         for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
178                 tprogs[kind].nr_progs = tr->progs_cnt[kind];
179                 *total += tr->progs_cnt[kind];
180                 progs = tprogs[kind].progs;
181
182                 hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist)
183                         *progs++ = aux->prog;
184         }
185         return tprogs;
186 }
187
188 static int bpf_trampoline_update(struct bpf_trampoline *tr)
189 {
190         void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
191         void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
192         struct bpf_tramp_progs *tprogs;
193         u32 flags = BPF_TRAMP_F_RESTORE_REGS;
194         int err, total;
195
196         tprogs = bpf_trampoline_get_progs(tr, &total);
197         if (IS_ERR(tprogs))
198                 return PTR_ERR(tprogs);
199
200         if (total == 0) {
201                 err = unregister_fentry(tr, old_image);
202                 tr->selector = 0;
203                 goto out;
204         }
205
206         if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||
207             tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
208                 flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
209
210         /* Though the second half of trampoline page is unused a task could be
211          * preempted in the middle of the first half of trampoline and two
212          * updates to trampoline would change the code from underneath the
213          * preempted task. Hence wait for tasks to voluntarily schedule or go
214          * to userspace.
215          * The same trampoline can hold both sleepable and non-sleepable progs.
216          * synchronize_rcu_tasks_trace() is needed to make sure all sleepable
217          * programs finish executing.
218          * Wait for these two grace periods together.
219          */
220         synchronize_rcu_mult(call_rcu_tasks, call_rcu_tasks_trace);
221
222         err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
223                                           &tr->func.model, flags, tprogs,
224                                           tr->func.addr);
225         if (err < 0)
226                 goto out;
227
228         if (tr->selector)
229                 /* progs already running at this address */
230                 err = modify_fentry(tr, old_image, new_image);
231         else
232                 /* first time registering */
233                 err = register_fentry(tr, new_image);
234         if (err)
235                 goto out;
236         tr->selector++;
237 out:
238         kfree(tprogs);
239         return err;
240 }
241
242 static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
243 {
244         switch (prog->expected_attach_type) {
245         case BPF_TRACE_FENTRY:
246                 return BPF_TRAMP_FENTRY;
247         case BPF_MODIFY_RETURN:
248                 return BPF_TRAMP_MODIFY_RETURN;
249         case BPF_TRACE_FEXIT:
250                 return BPF_TRAMP_FEXIT;
251         case BPF_LSM_MAC:
252                 if (!prog->aux->attach_func_proto->type)
253                         /* The function returns void, we cannot modify its
254                          * return value.
255                          */
256                         return BPF_TRAMP_FEXIT;
257                 else
258                         return BPF_TRAMP_MODIFY_RETURN;
259         default:
260                 return BPF_TRAMP_REPLACE;
261         }
262 }
263
264 int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
265 {
266         enum bpf_tramp_prog_type kind;
267         int err = 0;
268         int cnt;
269
270         kind = bpf_attach_type_to_tramp(prog);
271         mutex_lock(&tr->mutex);
272         if (tr->extension_prog) {
273                 /* cannot attach fentry/fexit if extension prog is attached.
274                  * cannot overwrite extension prog either.
275                  */
276                 err = -EBUSY;
277                 goto out;
278         }
279         cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT];
280         if (kind == BPF_TRAMP_REPLACE) {
281                 /* Cannot attach extension if fentry/fexit are in use. */
282                 if (cnt) {
283                         err = -EBUSY;
284                         goto out;
285                 }
286                 tr->extension_prog = prog;
287                 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
288                                          prog->bpf_func);
289                 goto out;
290         }
291         if (cnt >= BPF_MAX_TRAMP_PROGS) {
292                 err = -E2BIG;
293                 goto out;
294         }
295         if (!hlist_unhashed(&prog->aux->tramp_hlist)) {
296                 /* prog already linked */
297                 err = -EBUSY;
298                 goto out;
299         }
300         hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
301         tr->progs_cnt[kind]++;
302         err = bpf_trampoline_update(tr);
303         if (err) {
304                 hlist_del(&prog->aux->tramp_hlist);
305                 tr->progs_cnt[kind]--;
306         }
307 out:
308         mutex_unlock(&tr->mutex);
309         return err;
310 }
311
312 /* bpf_trampoline_unlink_prog() should never fail. */
313 int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
314 {
315         enum bpf_tramp_prog_type kind;
316         int err;
317
318         kind = bpf_attach_type_to_tramp(prog);
319         mutex_lock(&tr->mutex);
320         if (kind == BPF_TRAMP_REPLACE) {
321                 WARN_ON_ONCE(!tr->extension_prog);
322                 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
323                                          tr->extension_prog->bpf_func, NULL);
324                 tr->extension_prog = NULL;
325                 goto out;
326         }
327         hlist_del(&prog->aux->tramp_hlist);
328         tr->progs_cnt[kind]--;
329         err = bpf_trampoline_update(tr);
330 out:
331         mutex_unlock(&tr->mutex);
332         return err;
333 }
334
335 struct bpf_trampoline *bpf_trampoline_get(u64 key,
336                                           struct bpf_attach_target_info *tgt_info)
337 {
338         struct bpf_trampoline *tr;
339
340         tr = bpf_trampoline_lookup(key);
341         if (!tr)
342                 return NULL;
343
344         mutex_lock(&tr->mutex);
345         if (tr->func.addr)
346                 goto out;
347
348         memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
349         tr->func.addr = (void *)tgt_info->tgt_addr;
350 out:
351         mutex_unlock(&tr->mutex);
352         return tr;
353 }
354
355 void bpf_trampoline_put(struct bpf_trampoline *tr)
356 {
357         if (!tr)
358                 return;
359         mutex_lock(&trampoline_mutex);
360         if (!refcount_dec_and_test(&tr->refcnt))
361                 goto out;
362         WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
363         if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY])))
364                 goto out;
365         if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
366                 goto out;
367         bpf_image_ksym_del(&tr->ksym);
368         /* This code will be executed when all bpf progs (both sleepable and
369          * non-sleepable) went through
370          * bpf_prog_put()->call_rcu[_tasks_trace]()->bpf_prog_free_deferred().
371          * Hence no need for another synchronize_rcu_tasks_trace() here,
372          * but synchronize_rcu_tasks() is still needed, since trampoline
373          * may not have had any sleepable programs and we need to wait
374          * for tasks to get out of trampoline code before freeing it.
375          */
376         synchronize_rcu_tasks();
377         bpf_jit_free_exec(tr->image);
378         hlist_del(&tr->hlist);
379         kfree(tr);
380 out:
381         mutex_unlock(&trampoline_mutex);
382 }
383
384 /* The logic is similar to BPF_PROG_RUN, but with an explicit
385  * rcu_read_lock() and migrate_disable() which are required
386  * for the trampoline. The macro is split into
387  * call _bpf_prog_enter
388  * call prog->bpf_func
389  * call __bpf_prog_exit
390  */
391 u64 notrace __bpf_prog_enter(void)
392         __acquires(RCU)
393 {
394         u64 start = 0;
395
396         rcu_read_lock();
397         migrate_disable();
398         if (static_branch_unlikely(&bpf_stats_enabled_key))
399                 start = sched_clock();
400         return start;
401 }
402
403 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
404         __releases(RCU)
405 {
406         struct bpf_prog_stats *stats;
407
408         if (static_branch_unlikely(&bpf_stats_enabled_key) &&
409             /* static_key could be enabled in __bpf_prog_enter
410              * and disabled in __bpf_prog_exit.
411              * And vice versa.
412              * Hence check that 'start' is not zero.
413              */
414             start) {
415                 stats = this_cpu_ptr(prog->aux->stats);
416                 u64_stats_update_begin(&stats->syncp);
417                 stats->cnt++;
418                 stats->nsecs += sched_clock() - start;
419                 u64_stats_update_end(&stats->syncp);
420         }
421         migrate_enable();
422         rcu_read_unlock();
423 }
424
425 void notrace __bpf_prog_enter_sleepable(void)
426 {
427         rcu_read_lock_trace();
428         might_fault();
429 }
430
431 void notrace __bpf_prog_exit_sleepable(void)
432 {
433         rcu_read_unlock_trace();
434 }
435
436 int __weak
437 arch_prepare_bpf_trampoline(void *image, void *image_end,
438                             const struct btf_func_model *m, u32 flags,
439                             struct bpf_tramp_progs *tprogs,
440                             void *orig_call)
441 {
442         return -ENOTSUPP;
443 }
444
445 static int __init init_trampolines(void)
446 {
447         int i;
448
449         for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
450                 INIT_HLIST_HEAD(&trampoline_table[i]);
451         return 0;
452 }
453 late_initcall(init_trampolines);