Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
[linux-2.6-microblaze.git] / kernel / bpf / helpers.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  */
4 #include <linux/bpf.h>
5 #include <linux/rcupdate.h>
6 #include <linux/random.h>
7 #include <linux/smp.h>
8 #include <linux/topology.h>
9 #include <linux/ktime.h>
10 #include <linux/sched.h>
11 #include <linux/uidgid.h>
12 #include <linux/filter.h>
13 #include <linux/ctype.h>
14 #include <linux/jiffies.h>
15 #include <linux/pid_namespace.h>
16 #include <linux/proc_ns.h>
17 #include <linux/security.h>
18
19 #include "../../lib/kstrtox.h"
20
21 /* If kernel subsystem is allowing eBPF programs to call this function,
22  * inside its own verifier_ops->get_func_proto() callback it should return
23  * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
24  *
25  * Different map implementations will rely on rcu in map methods
26  * lookup/update/delete, therefore eBPF programs must run under rcu lock
27  * if program is allowed to access maps, so check rcu_read_lock_held in
28  * all three functions.
29  */
30 BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
31 {
32         WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
33         return (unsigned long) map->ops->map_lookup_elem(map, key);
34 }
35
36 const struct bpf_func_proto bpf_map_lookup_elem_proto = {
37         .func           = bpf_map_lookup_elem,
38         .gpl_only       = false,
39         .pkt_access     = true,
40         .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
41         .arg1_type      = ARG_CONST_MAP_PTR,
42         .arg2_type      = ARG_PTR_TO_MAP_KEY,
43 };
44
45 BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
46            void *, value, u64, flags)
47 {
48         WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
49         return map->ops->map_update_elem(map, key, value, flags);
50 }
51
52 const struct bpf_func_proto bpf_map_update_elem_proto = {
53         .func           = bpf_map_update_elem,
54         .gpl_only       = false,
55         .pkt_access     = true,
56         .ret_type       = RET_INTEGER,
57         .arg1_type      = ARG_CONST_MAP_PTR,
58         .arg2_type      = ARG_PTR_TO_MAP_KEY,
59         .arg3_type      = ARG_PTR_TO_MAP_VALUE,
60         .arg4_type      = ARG_ANYTHING,
61 };
62
63 BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
64 {
65         WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
66         return map->ops->map_delete_elem(map, key);
67 }
68
69 const struct bpf_func_proto bpf_map_delete_elem_proto = {
70         .func           = bpf_map_delete_elem,
71         .gpl_only       = false,
72         .pkt_access     = true,
73         .ret_type       = RET_INTEGER,
74         .arg1_type      = ARG_CONST_MAP_PTR,
75         .arg2_type      = ARG_PTR_TO_MAP_KEY,
76 };
77
78 BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
79 {
80         return map->ops->map_push_elem(map, value, flags);
81 }
82
83 const struct bpf_func_proto bpf_map_push_elem_proto = {
84         .func           = bpf_map_push_elem,
85         .gpl_only       = false,
86         .pkt_access     = true,
87         .ret_type       = RET_INTEGER,
88         .arg1_type      = ARG_CONST_MAP_PTR,
89         .arg2_type      = ARG_PTR_TO_MAP_VALUE,
90         .arg3_type      = ARG_ANYTHING,
91 };
92
93 BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
94 {
95         return map->ops->map_pop_elem(map, value);
96 }
97
98 const struct bpf_func_proto bpf_map_pop_elem_proto = {
99         .func           = bpf_map_pop_elem,
100         .gpl_only       = false,
101         .ret_type       = RET_INTEGER,
102         .arg1_type      = ARG_CONST_MAP_PTR,
103         .arg2_type      = ARG_PTR_TO_UNINIT_MAP_VALUE,
104 };
105
106 BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
107 {
108         return map->ops->map_peek_elem(map, value);
109 }
110
111 const struct bpf_func_proto bpf_map_peek_elem_proto = {
112         .func           = bpf_map_peek_elem,
113         .gpl_only       = false,
114         .ret_type       = RET_INTEGER,
115         .arg1_type      = ARG_CONST_MAP_PTR,
116         .arg2_type      = ARG_PTR_TO_UNINIT_MAP_VALUE,
117 };
118
119 const struct bpf_func_proto bpf_get_prandom_u32_proto = {
120         .func           = bpf_user_rnd_u32,
121         .gpl_only       = false,
122         .ret_type       = RET_INTEGER,
123 };
124
125 BPF_CALL_0(bpf_get_smp_processor_id)
126 {
127         return smp_processor_id();
128 }
129
130 const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
131         .func           = bpf_get_smp_processor_id,
132         .gpl_only       = false,
133         .ret_type       = RET_INTEGER,
134 };
135
136 BPF_CALL_0(bpf_get_numa_node_id)
137 {
138         return numa_node_id();
139 }
140
141 const struct bpf_func_proto bpf_get_numa_node_id_proto = {
142         .func           = bpf_get_numa_node_id,
143         .gpl_only       = false,
144         .ret_type       = RET_INTEGER,
145 };
146
147 BPF_CALL_0(bpf_ktime_get_ns)
148 {
149         /* NMI safe access to clock monotonic */
150         return ktime_get_mono_fast_ns();
151 }
152
153 const struct bpf_func_proto bpf_ktime_get_ns_proto = {
154         .func           = bpf_ktime_get_ns,
155         .gpl_only       = false,
156         .ret_type       = RET_INTEGER,
157 };
158
159 BPF_CALL_0(bpf_ktime_get_boot_ns)
160 {
161         /* NMI safe access to clock boottime */
162         return ktime_get_boot_fast_ns();
163 }
164
165 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
166         .func           = bpf_ktime_get_boot_ns,
167         .gpl_only       = false,
168         .ret_type       = RET_INTEGER,
169 };
170
171 BPF_CALL_0(bpf_ktime_get_coarse_ns)
172 {
173         return ktime_get_coarse_ns();
174 }
175
176 const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
177         .func           = bpf_ktime_get_coarse_ns,
178         .gpl_only       = false,
179         .ret_type       = RET_INTEGER,
180 };
181
182 BPF_CALL_0(bpf_get_current_pid_tgid)
183 {
184         struct task_struct *task = current;
185
186         if (unlikely(!task))
187                 return -EINVAL;
188
189         return (u64) task->tgid << 32 | task->pid;
190 }
191
192 const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
193         .func           = bpf_get_current_pid_tgid,
194         .gpl_only       = false,
195         .ret_type       = RET_INTEGER,
196 };
197
198 BPF_CALL_0(bpf_get_current_uid_gid)
199 {
200         struct task_struct *task = current;
201         kuid_t uid;
202         kgid_t gid;
203
204         if (unlikely(!task))
205                 return -EINVAL;
206
207         current_uid_gid(&uid, &gid);
208         return (u64) from_kgid(&init_user_ns, gid) << 32 |
209                      from_kuid(&init_user_ns, uid);
210 }
211
212 const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
213         .func           = bpf_get_current_uid_gid,
214         .gpl_only       = false,
215         .ret_type       = RET_INTEGER,
216 };
217
218 BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
219 {
220         struct task_struct *task = current;
221
222         if (unlikely(!task))
223                 goto err_clear;
224
225         strncpy(buf, task->comm, size);
226
227         /* Verifier guarantees that size > 0. For task->comm exceeding
228          * size, guarantee that buf is %NUL-terminated. Unconditionally
229          * done here to save the size test.
230          */
231         buf[size - 1] = 0;
232         return 0;
233 err_clear:
234         memset(buf, 0, size);
235         return -EINVAL;
236 }
237
238 const struct bpf_func_proto bpf_get_current_comm_proto = {
239         .func           = bpf_get_current_comm,
240         .gpl_only       = false,
241         .ret_type       = RET_INTEGER,
242         .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
243         .arg2_type      = ARG_CONST_SIZE,
244 };
245
246 #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
247
248 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
249 {
250         arch_spinlock_t *l = (void *)lock;
251         union {
252                 __u32 val;
253                 arch_spinlock_t lock;
254         } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
255
256         compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
257         BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
258         BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
259         arch_spin_lock(l);
260 }
261
262 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
263 {
264         arch_spinlock_t *l = (void *)lock;
265
266         arch_spin_unlock(l);
267 }
268
269 #else
270
271 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
272 {
273         atomic_t *l = (void *)lock;
274
275         BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
276         do {
277                 atomic_cond_read_relaxed(l, !VAL);
278         } while (atomic_xchg(l, 1));
279 }
280
281 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
282 {
283         atomic_t *l = (void *)lock;
284
285         atomic_set_release(l, 0);
286 }
287
288 #endif
289
290 static DEFINE_PER_CPU(unsigned long, irqsave_flags);
291
292 static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
293 {
294         unsigned long flags;
295
296         local_irq_save(flags);
297         __bpf_spin_lock(lock);
298         __this_cpu_write(irqsave_flags, flags);
299 }
300
301 notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
302 {
303         __bpf_spin_lock_irqsave(lock);
304         return 0;
305 }
306
307 const struct bpf_func_proto bpf_spin_lock_proto = {
308         .func           = bpf_spin_lock,
309         .gpl_only       = false,
310         .ret_type       = RET_VOID,
311         .arg1_type      = ARG_PTR_TO_SPIN_LOCK,
312 };
313
314 static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
315 {
316         unsigned long flags;
317
318         flags = __this_cpu_read(irqsave_flags);
319         __bpf_spin_unlock(lock);
320         local_irq_restore(flags);
321 }
322
323 notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
324 {
325         __bpf_spin_unlock_irqrestore(lock);
326         return 0;
327 }
328
329 const struct bpf_func_proto bpf_spin_unlock_proto = {
330         .func           = bpf_spin_unlock,
331         .gpl_only       = false,
332         .ret_type       = RET_VOID,
333         .arg1_type      = ARG_PTR_TO_SPIN_LOCK,
334 };
335
336 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
337                            bool lock_src)
338 {
339         struct bpf_spin_lock *lock;
340
341         if (lock_src)
342                 lock = src + map->spin_lock_off;
343         else
344                 lock = dst + map->spin_lock_off;
345         preempt_disable();
346         __bpf_spin_lock_irqsave(lock);
347         copy_map_value(map, dst, src);
348         __bpf_spin_unlock_irqrestore(lock);
349         preempt_enable();
350 }
351
352 BPF_CALL_0(bpf_jiffies64)
353 {
354         return get_jiffies_64();
355 }
356
357 const struct bpf_func_proto bpf_jiffies64_proto = {
358         .func           = bpf_jiffies64,
359         .gpl_only       = false,
360         .ret_type       = RET_INTEGER,
361 };
362
363 #ifdef CONFIG_CGROUPS
364 BPF_CALL_0(bpf_get_current_cgroup_id)
365 {
366         struct cgroup *cgrp;
367         u64 cgrp_id;
368
369         rcu_read_lock();
370         cgrp = task_dfl_cgroup(current);
371         cgrp_id = cgroup_id(cgrp);
372         rcu_read_unlock();
373
374         return cgrp_id;
375 }
376
377 const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
378         .func           = bpf_get_current_cgroup_id,
379         .gpl_only       = false,
380         .ret_type       = RET_INTEGER,
381 };
382
383 BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
384 {
385         struct cgroup *cgrp;
386         struct cgroup *ancestor;
387         u64 cgrp_id;
388
389         rcu_read_lock();
390         cgrp = task_dfl_cgroup(current);
391         ancestor = cgroup_ancestor(cgrp, ancestor_level);
392         cgrp_id = ancestor ? cgroup_id(ancestor) : 0;
393         rcu_read_unlock();
394
395         return cgrp_id;
396 }
397
398 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
399         .func           = bpf_get_current_ancestor_cgroup_id,
400         .gpl_only       = false,
401         .ret_type       = RET_INTEGER,
402         .arg1_type      = ARG_ANYTHING,
403 };
404
405 #ifdef CONFIG_CGROUP_BPF
406
407 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
408 {
409         /* flags argument is not used now,
410          * but provides an ability to extend the API.
411          * verifier checks that its value is correct.
412          */
413         enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
414         struct bpf_cgroup_storage *storage;
415         struct bpf_cg_run_ctx *ctx;
416         void *ptr;
417
418         /* get current cgroup storage from BPF run context */
419         ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
420         storage = ctx->prog_item->cgroup_storage[stype];
421
422         if (stype == BPF_CGROUP_STORAGE_SHARED)
423                 ptr = &READ_ONCE(storage->buf)->data[0];
424         else
425                 ptr = this_cpu_ptr(storage->percpu_buf);
426
427         return (unsigned long)ptr;
428 }
429
430 const struct bpf_func_proto bpf_get_local_storage_proto = {
431         .func           = bpf_get_local_storage,
432         .gpl_only       = false,
433         .ret_type       = RET_PTR_TO_MAP_VALUE,
434         .arg1_type      = ARG_CONST_MAP_PTR,
435         .arg2_type      = ARG_ANYTHING,
436 };
437 #endif
438
439 #define BPF_STRTOX_BASE_MASK 0x1F
440
441 static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
442                           unsigned long long *res, bool *is_negative)
443 {
444         unsigned int base = flags & BPF_STRTOX_BASE_MASK;
445         const char *cur_buf = buf;
446         size_t cur_len = buf_len;
447         unsigned int consumed;
448         size_t val_len;
449         char str[64];
450
451         if (!buf || !buf_len || !res || !is_negative)
452                 return -EINVAL;
453
454         if (base != 0 && base != 8 && base != 10 && base != 16)
455                 return -EINVAL;
456
457         if (flags & ~BPF_STRTOX_BASE_MASK)
458                 return -EINVAL;
459
460         while (cur_buf < buf + buf_len && isspace(*cur_buf))
461                 ++cur_buf;
462
463         *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
464         if (*is_negative)
465                 ++cur_buf;
466
467         consumed = cur_buf - buf;
468         cur_len -= consumed;
469         if (!cur_len)
470                 return -EINVAL;
471
472         cur_len = min(cur_len, sizeof(str) - 1);
473         memcpy(str, cur_buf, cur_len);
474         str[cur_len] = '\0';
475         cur_buf = str;
476
477         cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
478         val_len = _parse_integer(cur_buf, base, res);
479
480         if (val_len & KSTRTOX_OVERFLOW)
481                 return -ERANGE;
482
483         if (val_len == 0)
484                 return -EINVAL;
485
486         cur_buf += val_len;
487         consumed += cur_buf - str;
488
489         return consumed;
490 }
491
492 static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
493                          long long *res)
494 {
495         unsigned long long _res;
496         bool is_negative;
497         int err;
498
499         err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
500         if (err < 0)
501                 return err;
502         if (is_negative) {
503                 if ((long long)-_res > 0)
504                         return -ERANGE;
505                 *res = -_res;
506         } else {
507                 if ((long long)_res < 0)
508                         return -ERANGE;
509                 *res = _res;
510         }
511         return err;
512 }
513
514 BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
515            long *, res)
516 {
517         long long _res;
518         int err;
519
520         err = __bpf_strtoll(buf, buf_len, flags, &_res);
521         if (err < 0)
522                 return err;
523         if (_res != (long)_res)
524                 return -ERANGE;
525         *res = _res;
526         return err;
527 }
528
529 const struct bpf_func_proto bpf_strtol_proto = {
530         .func           = bpf_strtol,
531         .gpl_only       = false,
532         .ret_type       = RET_INTEGER,
533         .arg1_type      = ARG_PTR_TO_MEM,
534         .arg2_type      = ARG_CONST_SIZE,
535         .arg3_type      = ARG_ANYTHING,
536         .arg4_type      = ARG_PTR_TO_LONG,
537 };
538
539 BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
540            unsigned long *, res)
541 {
542         unsigned long long _res;
543         bool is_negative;
544         int err;
545
546         err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
547         if (err < 0)
548                 return err;
549         if (is_negative)
550                 return -EINVAL;
551         if (_res != (unsigned long)_res)
552                 return -ERANGE;
553         *res = _res;
554         return err;
555 }
556
557 const struct bpf_func_proto bpf_strtoul_proto = {
558         .func           = bpf_strtoul,
559         .gpl_only       = false,
560         .ret_type       = RET_INTEGER,
561         .arg1_type      = ARG_PTR_TO_MEM,
562         .arg2_type      = ARG_CONST_SIZE,
563         .arg3_type      = ARG_ANYTHING,
564         .arg4_type      = ARG_PTR_TO_LONG,
565 };
566 #endif
567
568 BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
569            struct bpf_pidns_info *, nsdata, u32, size)
570 {
571         struct task_struct *task = current;
572         struct pid_namespace *pidns;
573         int err = -EINVAL;
574
575         if (unlikely(size != sizeof(struct bpf_pidns_info)))
576                 goto clear;
577
578         if (unlikely((u64)(dev_t)dev != dev))
579                 goto clear;
580
581         if (unlikely(!task))
582                 goto clear;
583
584         pidns = task_active_pid_ns(task);
585         if (unlikely(!pidns)) {
586                 err = -ENOENT;
587                 goto clear;
588         }
589
590         if (!ns_match(&pidns->ns, (dev_t)dev, ino))
591                 goto clear;
592
593         nsdata->pid = task_pid_nr_ns(task, pidns);
594         nsdata->tgid = task_tgid_nr_ns(task, pidns);
595         return 0;
596 clear:
597         memset((void *)nsdata, 0, (size_t) size);
598         return err;
599 }
600
601 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
602         .func           = bpf_get_ns_current_pid_tgid,
603         .gpl_only       = false,
604         .ret_type       = RET_INTEGER,
605         .arg1_type      = ARG_ANYTHING,
606         .arg2_type      = ARG_ANYTHING,
607         .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
608         .arg4_type      = ARG_CONST_SIZE,
609 };
610
611 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
612         .func           = bpf_get_raw_cpu_id,
613         .gpl_only       = false,
614         .ret_type       = RET_INTEGER,
615 };
616
617 BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map,
618            u64, flags, void *, data, u64, size)
619 {
620         if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
621                 return -EINVAL;
622
623         return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
624 }
625
626 const struct bpf_func_proto bpf_event_output_data_proto =  {
627         .func           = bpf_event_output_data,
628         .gpl_only       = true,
629         .ret_type       = RET_INTEGER,
630         .arg1_type      = ARG_PTR_TO_CTX,
631         .arg2_type      = ARG_CONST_MAP_PTR,
632         .arg3_type      = ARG_ANYTHING,
633         .arg4_type      = ARG_PTR_TO_MEM,
634         .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
635 };
636
637 BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
638            const void __user *, user_ptr)
639 {
640         int ret = copy_from_user(dst, user_ptr, size);
641
642         if (unlikely(ret)) {
643                 memset(dst, 0, size);
644                 ret = -EFAULT;
645         }
646
647         return ret;
648 }
649
650 const struct bpf_func_proto bpf_copy_from_user_proto = {
651         .func           = bpf_copy_from_user,
652         .gpl_only       = false,
653         .ret_type       = RET_INTEGER,
654         .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
655         .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
656         .arg3_type      = ARG_ANYTHING,
657 };
658
659 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
660 {
661         if (cpu >= nr_cpu_ids)
662                 return (unsigned long)NULL;
663
664         return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
665 }
666
667 const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
668         .func           = bpf_per_cpu_ptr,
669         .gpl_only       = false,
670         .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
671         .arg1_type      = ARG_PTR_TO_PERCPU_BTF_ID,
672         .arg2_type      = ARG_ANYTHING,
673 };
674
675 BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
676 {
677         return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
678 }
679
680 const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
681         .func           = bpf_this_cpu_ptr,
682         .gpl_only       = false,
683         .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID,
684         .arg1_type      = ARG_PTR_TO_PERCPU_BTF_ID,
685 };
686
687 static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
688                 size_t bufsz)
689 {
690         void __user *user_ptr = (__force void __user *)unsafe_ptr;
691
692         buf[0] = 0;
693
694         switch (fmt_ptype) {
695         case 's':
696 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
697                 if ((unsigned long)unsafe_ptr < TASK_SIZE)
698                         return strncpy_from_user_nofault(buf, user_ptr, bufsz);
699                 fallthrough;
700 #endif
701         case 'k':
702                 return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
703         case 'u':
704                 return strncpy_from_user_nofault(buf, user_ptr, bufsz);
705         }
706
707         return -EINVAL;
708 }
709
710 /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
711  * arguments representation.
712  */
713 #define MAX_BPRINTF_BUF_LEN     512
714
715 /* Support executing three nested bprintf helper calls on a given CPU */
716 #define MAX_BPRINTF_NEST_LEVEL  3
717 struct bpf_bprintf_buffers {
718         char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
719 };
720 static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
721 static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
722
723 static int try_get_fmt_tmp_buf(char **tmp_buf)
724 {
725         struct bpf_bprintf_buffers *bufs;
726         int nest_level;
727
728         preempt_disable();
729         nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
730         if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
731                 this_cpu_dec(bpf_bprintf_nest_level);
732                 preempt_enable();
733                 return -EBUSY;
734         }
735         bufs = this_cpu_ptr(&bpf_bprintf_bufs);
736         *tmp_buf = bufs->tmp_bufs[nest_level - 1];
737
738         return 0;
739 }
740
741 void bpf_bprintf_cleanup(void)
742 {
743         if (this_cpu_read(bpf_bprintf_nest_level)) {
744                 this_cpu_dec(bpf_bprintf_nest_level);
745                 preempt_enable();
746         }
747 }
748
749 /*
750  * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
751  *
752  * Returns a negative value if fmt is an invalid format string or 0 otherwise.
753  *
754  * This can be used in two ways:
755  * - Format string verification only: when bin_args is NULL
756  * - Arguments preparation: in addition to the above verification, it writes in
757  *   bin_args a binary representation of arguments usable by bstr_printf where
758  *   pointers from BPF have been sanitized.
759  *
760  * In argument preparation mode, if 0 is returned, safe temporary buffers are
761  * allocated and bpf_bprintf_cleanup should be called to free them after use.
762  */
763 int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
764                         u32 **bin_args, u32 num_args)
765 {
766         char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
767         size_t sizeof_cur_arg, sizeof_cur_ip;
768         int err, i, num_spec = 0;
769         u64 cur_arg;
770         char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
771
772         fmt_end = strnchr(fmt, fmt_size, 0);
773         if (!fmt_end)
774                 return -EINVAL;
775         fmt_size = fmt_end - fmt;
776
777         if (bin_args) {
778                 if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
779                         return -EBUSY;
780
781                 tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
782                 *bin_args = (u32 *)tmp_buf;
783         }
784
785         for (i = 0; i < fmt_size; i++) {
786                 if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
787                         err = -EINVAL;
788                         goto out;
789                 }
790
791                 if (fmt[i] != '%')
792                         continue;
793
794                 if (fmt[i + 1] == '%') {
795                         i++;
796                         continue;
797                 }
798
799                 if (num_spec >= num_args) {
800                         err = -EINVAL;
801                         goto out;
802                 }
803
804                 /* The string is zero-terminated so if fmt[i] != 0, we can
805                  * always access fmt[i + 1], in the worst case it will be a 0
806                  */
807                 i++;
808
809                 /* skip optional "[0 +-][num]" width formatting field */
810                 while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
811                        fmt[i] == ' ')
812                         i++;
813                 if (fmt[i] >= '1' && fmt[i] <= '9') {
814                         i++;
815                         while (fmt[i] >= '0' && fmt[i] <= '9')
816                                 i++;
817                 }
818
819                 if (fmt[i] == 'p') {
820                         sizeof_cur_arg = sizeof(long);
821
822                         if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
823                             fmt[i + 2] == 's') {
824                                 fmt_ptype = fmt[i + 1];
825                                 i += 2;
826                                 goto fmt_str;
827                         }
828
829                         if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
830                             ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
831                             fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
832                             fmt[i + 1] == 'S') {
833                                 /* just kernel pointers */
834                                 if (tmp_buf)
835                                         cur_arg = raw_args[num_spec];
836                                 i++;
837                                 goto nocopy_fmt;
838                         }
839
840                         if (fmt[i + 1] == 'B') {
841                                 if (tmp_buf)  {
842                                         err = snprintf(tmp_buf,
843                                                        (tmp_buf_end - tmp_buf),
844                                                        "%pB",
845                                                        (void *)(long)raw_args[num_spec]);
846                                         tmp_buf += (err + 1);
847                                 }
848
849                                 i++;
850                                 num_spec++;
851                                 continue;
852                         }
853
854                         /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
855                         if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
856                             (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
857                                 err = -EINVAL;
858                                 goto out;
859                         }
860
861                         i += 2;
862                         if (!tmp_buf)
863                                 goto nocopy_fmt;
864
865                         sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16;
866                         if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
867                                 err = -ENOSPC;
868                                 goto out;
869                         }
870
871                         unsafe_ptr = (char *)(long)raw_args[num_spec];
872                         err = copy_from_kernel_nofault(cur_ip, unsafe_ptr,
873                                                        sizeof_cur_ip);
874                         if (err < 0)
875                                 memset(cur_ip, 0, sizeof_cur_ip);
876
877                         /* hack: bstr_printf expects IP addresses to be
878                          * pre-formatted as strings, ironically, the easiest way
879                          * to do that is to call snprintf.
880                          */
881                         ip_spec[2] = fmt[i - 1];
882                         ip_spec[3] = fmt[i];
883                         err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
884                                        ip_spec, &cur_ip);
885
886                         tmp_buf += err + 1;
887                         num_spec++;
888
889                         continue;
890                 } else if (fmt[i] == 's') {
891                         fmt_ptype = fmt[i];
892 fmt_str:
893                         if (fmt[i + 1] != 0 &&
894                             !isspace(fmt[i + 1]) &&
895                             !ispunct(fmt[i + 1])) {
896                                 err = -EINVAL;
897                                 goto out;
898                         }
899
900                         if (!tmp_buf)
901                                 goto nocopy_fmt;
902
903                         if (tmp_buf_end == tmp_buf) {
904                                 err = -ENOSPC;
905                                 goto out;
906                         }
907
908                         unsafe_ptr = (char *)(long)raw_args[num_spec];
909                         err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
910                                                     fmt_ptype,
911                                                     tmp_buf_end - tmp_buf);
912                         if (err < 0) {
913                                 tmp_buf[0] = '\0';
914                                 err = 1;
915                         }
916
917                         tmp_buf += err;
918                         num_spec++;
919
920                         continue;
921                 } else if (fmt[i] == 'c') {
922                         if (!tmp_buf)
923                                 goto nocopy_fmt;
924
925                         if (tmp_buf_end == tmp_buf) {
926                                 err = -ENOSPC;
927                                 goto out;
928                         }
929
930                         *tmp_buf = raw_args[num_spec];
931                         tmp_buf++;
932                         num_spec++;
933
934                         continue;
935                 }
936
937                 sizeof_cur_arg = sizeof(int);
938
939                 if (fmt[i] == 'l') {
940                         sizeof_cur_arg = sizeof(long);
941                         i++;
942                 }
943                 if (fmt[i] == 'l') {
944                         sizeof_cur_arg = sizeof(long long);
945                         i++;
946                 }
947
948                 if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
949                     fmt[i] != 'x' && fmt[i] != 'X') {
950                         err = -EINVAL;
951                         goto out;
952                 }
953
954                 if (tmp_buf)
955                         cur_arg = raw_args[num_spec];
956 nocopy_fmt:
957                 if (tmp_buf) {
958                         tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
959                         if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
960                                 err = -ENOSPC;
961                                 goto out;
962                         }
963
964                         if (sizeof_cur_arg == 8) {
965                                 *(u32 *)tmp_buf = *(u32 *)&cur_arg;
966                                 *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1);
967                         } else {
968                                 *(u32 *)tmp_buf = (u32)(long)cur_arg;
969                         }
970                         tmp_buf += sizeof_cur_arg;
971                 }
972                 num_spec++;
973         }
974
975         err = 0;
976 out:
977         if (err)
978                 bpf_bprintf_cleanup();
979         return err;
980 }
981
982 #define MAX_SNPRINTF_VARARGS            12
983
984 BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
985            const void *, data, u32, data_len)
986 {
987         int err, num_args;
988         u32 *bin_args;
989
990         if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 ||
991             (data_len && !data))
992                 return -EINVAL;
993         num_args = data_len / 8;
994
995         /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
996          * can safely give an unbounded size.
997          */
998         err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args);
999         if (err < 0)
1000                 return err;
1001
1002         err = bstr_printf(str, str_size, fmt, bin_args);
1003
1004         bpf_bprintf_cleanup();
1005
1006         return err + 1;
1007 }
1008
1009 const struct bpf_func_proto bpf_snprintf_proto = {
1010         .func           = bpf_snprintf,
1011         .gpl_only       = true,
1012         .ret_type       = RET_INTEGER,
1013         .arg1_type      = ARG_PTR_TO_MEM_OR_NULL,
1014         .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
1015         .arg3_type      = ARG_PTR_TO_CONST_STR,
1016         .arg4_type      = ARG_PTR_TO_MEM_OR_NULL,
1017         .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
1018 };
1019
1020 /* BPF map elements can contain 'struct bpf_timer'.
1021  * Such map owns all of its BPF timers.
1022  * 'struct bpf_timer' is allocated as part of map element allocation
1023  * and it's zero initialized.
1024  * That space is used to keep 'struct bpf_timer_kern'.
1025  * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
1026  * remembers 'struct bpf_map *' pointer it's part of.
1027  * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
1028  * bpf_timer_start() arms the timer.
1029  * If user space reference to a map goes to zero at this point
1030  * ops->map_release_uref callback is responsible for cancelling the timers,
1031  * freeing their memory, and decrementing prog's refcnts.
1032  * bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
1033  * Inner maps can contain bpf timers as well. ops->map_release_uref is
1034  * freeing the timers when inner map is replaced or deleted by user space.
1035  */
1036 struct bpf_hrtimer {
1037         struct hrtimer timer;
1038         struct bpf_map *map;
1039         struct bpf_prog *prog;
1040         void __rcu *callback_fn;
1041         void *value;
1042 };
1043
1044 /* the actual struct hidden inside uapi struct bpf_timer */
1045 struct bpf_timer_kern {
1046         struct bpf_hrtimer *timer;
1047         /* bpf_spin_lock is used here instead of spinlock_t to make
1048          * sure that it always fits into space resereved by struct bpf_timer
1049          * regardless of LOCKDEP and spinlock debug flags.
1050          */
1051         struct bpf_spin_lock lock;
1052 } __attribute__((aligned(8)));
1053
1054 static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
1055
1056 static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
1057 {
1058         struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
1059         struct bpf_map *map = t->map;
1060         void *value = t->value;
1061         void *callback_fn;
1062         void *key;
1063         u32 idx;
1064
1065         callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held());
1066         if (!callback_fn)
1067                 goto out;
1068
1069         /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
1070          * cannot be preempted by another bpf_timer_cb() on the same cpu.
1071          * Remember the timer this callback is servicing to prevent
1072          * deadlock if callback_fn() calls bpf_timer_cancel() or
1073          * bpf_map_delete_elem() on the same timer.
1074          */
1075         this_cpu_write(hrtimer_running, t);
1076         if (map->map_type == BPF_MAP_TYPE_ARRAY) {
1077                 struct bpf_array *array = container_of(map, struct bpf_array, map);
1078
1079                 /* compute the key */
1080                 idx = ((char *)value - array->value) / array->elem_size;
1081                 key = &idx;
1082         } else { /* hash or lru */
1083                 key = value - round_up(map->key_size, 8);
1084         }
1085
1086         BPF_CAST_CALL(callback_fn)((u64)(long)map, (u64)(long)key,
1087                                    (u64)(long)value, 0, 0);
1088         /* The verifier checked that return value is zero. */
1089
1090         this_cpu_write(hrtimer_running, NULL);
1091 out:
1092         return HRTIMER_NORESTART;
1093 }
1094
1095 BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map,
1096            u64, flags)
1097 {
1098         clockid_t clockid = flags & (MAX_CLOCKS - 1);
1099         struct bpf_hrtimer *t;
1100         int ret = 0;
1101
1102         BUILD_BUG_ON(MAX_CLOCKS != 16);
1103         BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer));
1104         BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer));
1105
1106         if (in_nmi())
1107                 return -EOPNOTSUPP;
1108
1109         if (flags >= MAX_CLOCKS ||
1110             /* similar to timerfd except _ALARM variants are not supported */
1111             (clockid != CLOCK_MONOTONIC &&
1112              clockid != CLOCK_REALTIME &&
1113              clockid != CLOCK_BOOTTIME))
1114                 return -EINVAL;
1115         __bpf_spin_lock_irqsave(&timer->lock);
1116         t = timer->timer;
1117         if (t) {
1118                 ret = -EBUSY;
1119                 goto out;
1120         }
1121         if (!atomic64_read(&map->usercnt)) {
1122                 /* maps with timers must be either held by user space
1123                  * or pinned in bpffs.
1124                  */
1125                 ret = -EPERM;
1126                 goto out;
1127         }
1128         /* allocate hrtimer via map_kmalloc to use memcg accounting */
1129         t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
1130         if (!t) {
1131                 ret = -ENOMEM;
1132                 goto out;
1133         }
1134         t->value = (void *)timer - map->timer_off;
1135         t->map = map;
1136         t->prog = NULL;
1137         rcu_assign_pointer(t->callback_fn, NULL);
1138         hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
1139         t->timer.function = bpf_timer_cb;
1140         timer->timer = t;
1141 out:
1142         __bpf_spin_unlock_irqrestore(&timer->lock);
1143         return ret;
1144 }
1145
1146 static const struct bpf_func_proto bpf_timer_init_proto = {
1147         .func           = bpf_timer_init,
1148         .gpl_only       = true,
1149         .ret_type       = RET_INTEGER,
1150         .arg1_type      = ARG_PTR_TO_TIMER,
1151         .arg2_type      = ARG_CONST_MAP_PTR,
1152         .arg3_type      = ARG_ANYTHING,
1153 };
1154
1155 BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn,
1156            struct bpf_prog_aux *, aux)
1157 {
1158         struct bpf_prog *prev, *prog = aux->prog;
1159         struct bpf_hrtimer *t;
1160         int ret = 0;
1161
1162         if (in_nmi())
1163                 return -EOPNOTSUPP;
1164         __bpf_spin_lock_irqsave(&timer->lock);
1165         t = timer->timer;
1166         if (!t) {
1167                 ret = -EINVAL;
1168                 goto out;
1169         }
1170         if (!atomic64_read(&t->map->usercnt)) {
1171                 /* maps with timers must be either held by user space
1172                  * or pinned in bpffs. Otherwise timer might still be
1173                  * running even when bpf prog is detached and user space
1174                  * is gone, since map_release_uref won't ever be called.
1175                  */
1176                 ret = -EPERM;
1177                 goto out;
1178         }
1179         prev = t->prog;
1180         if (prev != prog) {
1181                 /* Bump prog refcnt once. Every bpf_timer_set_callback()
1182                  * can pick different callback_fn-s within the same prog.
1183                  */
1184                 prog = bpf_prog_inc_not_zero(prog);
1185                 if (IS_ERR(prog)) {
1186                         ret = PTR_ERR(prog);
1187                         goto out;
1188                 }
1189                 if (prev)
1190                         /* Drop prev prog refcnt when swapping with new prog */
1191                         bpf_prog_put(prev);
1192                 t->prog = prog;
1193         }
1194         rcu_assign_pointer(t->callback_fn, callback_fn);
1195 out:
1196         __bpf_spin_unlock_irqrestore(&timer->lock);
1197         return ret;
1198 }
1199
1200 static const struct bpf_func_proto bpf_timer_set_callback_proto = {
1201         .func           = bpf_timer_set_callback,
1202         .gpl_only       = true,
1203         .ret_type       = RET_INTEGER,
1204         .arg1_type      = ARG_PTR_TO_TIMER,
1205         .arg2_type      = ARG_PTR_TO_FUNC,
1206 };
1207
1208 BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags)
1209 {
1210         struct bpf_hrtimer *t;
1211         int ret = 0;
1212
1213         if (in_nmi())
1214                 return -EOPNOTSUPP;
1215         if (flags)
1216                 return -EINVAL;
1217         __bpf_spin_lock_irqsave(&timer->lock);
1218         t = timer->timer;
1219         if (!t || !t->prog) {
1220                 ret = -EINVAL;
1221                 goto out;
1222         }
1223         hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT);
1224 out:
1225         __bpf_spin_unlock_irqrestore(&timer->lock);
1226         return ret;
1227 }
1228
1229 static const struct bpf_func_proto bpf_timer_start_proto = {
1230         .func           = bpf_timer_start,
1231         .gpl_only       = true,
1232         .ret_type       = RET_INTEGER,
1233         .arg1_type      = ARG_PTR_TO_TIMER,
1234         .arg2_type      = ARG_ANYTHING,
1235         .arg3_type      = ARG_ANYTHING,
1236 };
1237
1238 static void drop_prog_refcnt(struct bpf_hrtimer *t)
1239 {
1240         struct bpf_prog *prog = t->prog;
1241
1242         if (prog) {
1243                 bpf_prog_put(prog);
1244                 t->prog = NULL;
1245                 rcu_assign_pointer(t->callback_fn, NULL);
1246         }
1247 }
1248
1249 BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
1250 {
1251         struct bpf_hrtimer *t;
1252         int ret = 0;
1253
1254         if (in_nmi())
1255                 return -EOPNOTSUPP;
1256         __bpf_spin_lock_irqsave(&timer->lock);
1257         t = timer->timer;
1258         if (!t) {
1259                 ret = -EINVAL;
1260                 goto out;
1261         }
1262         if (this_cpu_read(hrtimer_running) == t) {
1263                 /* If bpf callback_fn is trying to bpf_timer_cancel()
1264                  * its own timer the hrtimer_cancel() will deadlock
1265                  * since it waits for callback_fn to finish
1266                  */
1267                 ret = -EDEADLK;
1268                 goto out;
1269         }
1270         drop_prog_refcnt(t);
1271 out:
1272         __bpf_spin_unlock_irqrestore(&timer->lock);
1273         /* Cancel the timer and wait for associated callback to finish
1274          * if it was running.
1275          */
1276         ret = ret ?: hrtimer_cancel(&t->timer);
1277         return ret;
1278 }
1279
1280 static const struct bpf_func_proto bpf_timer_cancel_proto = {
1281         .func           = bpf_timer_cancel,
1282         .gpl_only       = true,
1283         .ret_type       = RET_INTEGER,
1284         .arg1_type      = ARG_PTR_TO_TIMER,
1285 };
1286
1287 /* This function is called by map_delete/update_elem for individual element and
1288  * by ops->map_release_uref when the user space reference to a map reaches zero.
1289  */
1290 void bpf_timer_cancel_and_free(void *val)
1291 {
1292         struct bpf_timer_kern *timer = val;
1293         struct bpf_hrtimer *t;
1294
1295         /* Performance optimization: read timer->timer without lock first. */
1296         if (!READ_ONCE(timer->timer))
1297                 return;
1298
1299         __bpf_spin_lock_irqsave(&timer->lock);
1300         /* re-read it under lock */
1301         t = timer->timer;
1302         if (!t)
1303                 goto out;
1304         drop_prog_refcnt(t);
1305         /* The subsequent bpf_timer_start/cancel() helpers won't be able to use
1306          * this timer, since it won't be initialized.
1307          */
1308         timer->timer = NULL;
1309 out:
1310         __bpf_spin_unlock_irqrestore(&timer->lock);
1311         if (!t)
1312                 return;
1313         /* Cancel the timer and wait for callback to complete if it was running.
1314          * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
1315          * right after for both preallocated and non-preallocated maps.
1316          * The timer->timer = NULL was already done and no code path can
1317          * see address 't' anymore.
1318          *
1319          * Check that bpf_map_delete/update_elem() wasn't called from timer
1320          * callback_fn. In such case don't call hrtimer_cancel() (since it will
1321          * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
1322          * return -1). Though callback_fn is still running on this cpu it's
1323          * safe to do kfree(t) because bpf_timer_cb() read everything it needed
1324          * from 't'. The bpf subprog callback_fn won't be able to access 't',
1325          * since timer->timer = NULL was already done. The timer will be
1326          * effectively cancelled because bpf_timer_cb() will return
1327          * HRTIMER_NORESTART.
1328          */
1329         if (this_cpu_read(hrtimer_running) != t)
1330                 hrtimer_cancel(&t->timer);
1331         kfree(t);
1332 }
1333
1334 const struct bpf_func_proto bpf_get_current_task_proto __weak;
1335 const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
1336 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
1337 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
1338 const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
1339 const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
1340 const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
1341
1342 const struct bpf_func_proto *
1343 bpf_base_func_proto(enum bpf_func_id func_id)
1344 {
1345         switch (func_id) {
1346         case BPF_FUNC_map_lookup_elem:
1347                 return &bpf_map_lookup_elem_proto;
1348         case BPF_FUNC_map_update_elem:
1349                 return &bpf_map_update_elem_proto;
1350         case BPF_FUNC_map_delete_elem:
1351                 return &bpf_map_delete_elem_proto;
1352         case BPF_FUNC_map_push_elem:
1353                 return &bpf_map_push_elem_proto;
1354         case BPF_FUNC_map_pop_elem:
1355                 return &bpf_map_pop_elem_proto;
1356         case BPF_FUNC_map_peek_elem:
1357                 return &bpf_map_peek_elem_proto;
1358         case BPF_FUNC_get_prandom_u32:
1359                 return &bpf_get_prandom_u32_proto;
1360         case BPF_FUNC_get_smp_processor_id:
1361                 return &bpf_get_raw_smp_processor_id_proto;
1362         case BPF_FUNC_get_numa_node_id:
1363                 return &bpf_get_numa_node_id_proto;
1364         case BPF_FUNC_tail_call:
1365                 return &bpf_tail_call_proto;
1366         case BPF_FUNC_ktime_get_ns:
1367                 return &bpf_ktime_get_ns_proto;
1368         case BPF_FUNC_ktime_get_boot_ns:
1369                 return &bpf_ktime_get_boot_ns_proto;
1370         case BPF_FUNC_ktime_get_coarse_ns:
1371                 return &bpf_ktime_get_coarse_ns_proto;
1372         case BPF_FUNC_ringbuf_output:
1373                 return &bpf_ringbuf_output_proto;
1374         case BPF_FUNC_ringbuf_reserve:
1375                 return &bpf_ringbuf_reserve_proto;
1376         case BPF_FUNC_ringbuf_submit:
1377                 return &bpf_ringbuf_submit_proto;
1378         case BPF_FUNC_ringbuf_discard:
1379                 return &bpf_ringbuf_discard_proto;
1380         case BPF_FUNC_ringbuf_query:
1381                 return &bpf_ringbuf_query_proto;
1382         case BPF_FUNC_for_each_map_elem:
1383                 return &bpf_for_each_map_elem_proto;
1384         default:
1385                 break;
1386         }
1387
1388         if (!bpf_capable())
1389                 return NULL;
1390
1391         switch (func_id) {
1392         case BPF_FUNC_spin_lock:
1393                 return &bpf_spin_lock_proto;
1394         case BPF_FUNC_spin_unlock:
1395                 return &bpf_spin_unlock_proto;
1396         case BPF_FUNC_jiffies64:
1397                 return &bpf_jiffies64_proto;
1398         case BPF_FUNC_per_cpu_ptr:
1399                 return &bpf_per_cpu_ptr_proto;
1400         case BPF_FUNC_this_cpu_ptr:
1401                 return &bpf_this_cpu_ptr_proto;
1402         case BPF_FUNC_timer_init:
1403                 return &bpf_timer_init_proto;
1404         case BPF_FUNC_timer_set_callback:
1405                 return &bpf_timer_set_callback_proto;
1406         case BPF_FUNC_timer_start:
1407                 return &bpf_timer_start_proto;
1408         case BPF_FUNC_timer_cancel:
1409                 return &bpf_timer_cancel_proto;
1410         default:
1411                 break;
1412         }
1413
1414         if (!perfmon_capable())
1415                 return NULL;
1416
1417         switch (func_id) {
1418         case BPF_FUNC_trace_printk:
1419                 return bpf_get_trace_printk_proto();
1420         case BPF_FUNC_get_current_task:
1421                 return &bpf_get_current_task_proto;
1422         case BPF_FUNC_get_current_task_btf:
1423                 return &bpf_get_current_task_btf_proto;
1424         case BPF_FUNC_probe_read_user:
1425                 return &bpf_probe_read_user_proto;
1426         case BPF_FUNC_probe_read_kernel:
1427                 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1428                        NULL : &bpf_probe_read_kernel_proto;
1429         case BPF_FUNC_probe_read_user_str:
1430                 return &bpf_probe_read_user_str_proto;
1431         case BPF_FUNC_probe_read_kernel_str:
1432                 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1433                        NULL : &bpf_probe_read_kernel_str_proto;
1434         case BPF_FUNC_snprintf_btf:
1435                 return &bpf_snprintf_btf_proto;
1436         case BPF_FUNC_snprintf:
1437                 return &bpf_snprintf_proto;
1438         case BPF_FUNC_task_pt_regs:
1439                 return &bpf_task_pt_regs_proto;
1440         default:
1441                 return NULL;
1442         }
1443 }