1 /* SPDX-License-Identifier: GPL-2.0 */
6 #include <linux/errno.h>
7 #include <linux/jump_label.h>
8 #include <linux/percpu.h>
9 #include <linux/percpu-refcount.h>
10 #include <linux/rbtree.h>
11 #include <uapi/linux/bpf.h>
19 struct bpf_sock_ops_kern;
20 struct bpf_cgroup_storage;
22 struct ctl_table_header;
25 #ifdef CONFIG_CGROUP_BPF
27 extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
28 #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
30 #define BPF_CGROUP_STORAGE_NEST_MAX 8
32 struct bpf_cgroup_storage_info {
33 struct task_struct *task;
34 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
37 /* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
38 * to use bpf cgroup storage simultaneously.
40 DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
41 bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
43 #define for_each_cgroup_storage_type(stype) \
44 for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
46 struct bpf_cgroup_storage_map;
48 struct bpf_storage_buffer {
53 struct bpf_cgroup_storage {
55 struct bpf_storage_buffer *buf;
56 void __percpu *percpu_buf;
58 struct bpf_cgroup_storage_map *map;
59 struct bpf_cgroup_storage_key key;
60 struct list_head list_map;
61 struct list_head list_cg;
66 struct bpf_cgroup_link {
68 struct cgroup *cgroup;
69 enum bpf_attach_type type;
72 struct bpf_prog_list {
73 struct list_head node;
74 struct bpf_prog *prog;
75 struct bpf_cgroup_link *link;
76 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
79 struct bpf_prog_array;
82 /* array of effective progs in this cgroup */
83 struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE];
85 /* attached progs to this cgroup and attach flags
86 * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
87 * have either zero or one element
88 * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
90 struct list_head progs[MAX_BPF_ATTACH_TYPE];
91 u32 flags[MAX_BPF_ATTACH_TYPE];
93 /* list of cgroup shared storages */
94 struct list_head storages;
96 /* temp storage for effective prog array used by prog_attach/detach */
97 struct bpf_prog_array *inactive;
99 /* reference counter used to detach bpf programs after cgroup removal */
100 struct percpu_ref refcnt;
102 /* cgroup_bpf is released using a work queue */
103 struct work_struct release_work;
106 int cgroup_bpf_inherit(struct cgroup *cgrp);
107 void cgroup_bpf_offline(struct cgroup *cgrp);
109 int __cgroup_bpf_attach(struct cgroup *cgrp,
110 struct bpf_prog *prog, struct bpf_prog *replace_prog,
111 struct bpf_cgroup_link *link,
112 enum bpf_attach_type type, u32 flags);
113 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
114 struct bpf_cgroup_link *link,
115 enum bpf_attach_type type);
116 int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
117 union bpf_attr __user *uattr);
119 /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
120 int cgroup_bpf_attach(struct cgroup *cgrp,
121 struct bpf_prog *prog, struct bpf_prog *replace_prog,
122 struct bpf_cgroup_link *link, enum bpf_attach_type type,
124 int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
125 enum bpf_attach_type type);
126 int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
127 union bpf_attr __user *uattr);
129 int __cgroup_bpf_run_filter_skb(struct sock *sk,
131 enum bpf_attach_type type);
133 int __cgroup_bpf_run_filter_sk(struct sock *sk,
134 enum bpf_attach_type type);
136 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
137 struct sockaddr *uaddr,
138 enum bpf_attach_type type,
142 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
143 struct bpf_sock_ops_kern *sock_ops,
144 enum bpf_attach_type type);
146 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
147 short access, enum bpf_attach_type type);
149 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
150 struct ctl_table *table, int write,
151 char **buf, size_t *pcount, loff_t *ppos,
152 enum bpf_attach_type type);
154 int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
155 int *optname, char __user *optval,
156 int *optlen, char **kernel_optval);
157 int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
158 int optname, char __user *optval,
159 int __user *optlen, int max_optlen,
162 int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
163 int optname, void *optval,
164 int *optlen, int retval);
166 static inline enum bpf_cgroup_storage_type cgroup_storage_type(
169 if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
170 return BPF_CGROUP_STORAGE_PERCPU;
172 return BPF_CGROUP_STORAGE_SHARED;
175 static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
176 *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
178 enum bpf_cgroup_storage_type stype;
182 for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
183 if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
186 this_cpu_write(bpf_cgroup_storage_info[i].task, current);
187 for_each_cgroup_storage_type(stype)
188 this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
200 static inline void bpf_cgroup_storage_unset(void)
204 for (i = BPF_CGROUP_STORAGE_NEST_MAX - 1; i >= 0; i--) {
205 if (likely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
208 this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
213 struct bpf_cgroup_storage *
214 cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
215 void *key, bool locked);
216 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
217 enum bpf_cgroup_storage_type stype);
218 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
219 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
220 struct cgroup *cgroup,
221 enum bpf_attach_type type);
222 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
223 int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map);
225 int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
226 int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
227 void *value, u64 flags);
229 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
230 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
233 if (cgroup_bpf_enabled(BPF_CGROUP_INET_INGRESS)) \
234 __ret = __cgroup_bpf_run_filter_skb(sk, skb, \
235 BPF_CGROUP_INET_INGRESS); \
240 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
243 if (cgroup_bpf_enabled(BPF_CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
244 typeof(sk) __sk = sk_to_full_sk(sk); \
245 if (sk_fullsock(__sk)) \
246 __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
247 BPF_CGROUP_INET_EGRESS); \
252 #define BPF_CGROUP_RUN_SK_PROG(sk, type) \
255 if (cgroup_bpf_enabled(type)) { \
256 __ret = __cgroup_bpf_run_filter_sk(sk, type); \
261 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
262 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
264 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) \
265 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_RELEASE)
267 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
268 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
270 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
271 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
273 #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
275 u32 __unused_flags; \
277 if (cgroup_bpf_enabled(type)) \
278 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
284 #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
286 u32 __unused_flags; \
288 if (cgroup_bpf_enabled(type)) { \
290 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
298 /* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags
299 * via upper bits of return code. The only flag that is supported
300 * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
301 * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
303 #define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags) \
307 if (cgroup_bpf_enabled(type)) { \
309 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
312 if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \
313 *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \
318 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \
319 ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \
320 cgroup_bpf_enabled(BPF_CGROUP_INET6_CONNECT)) && \
321 (sk)->sk_prot->pre_connect)
323 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
324 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
326 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
327 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
329 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
330 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL)
332 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
333 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL)
335 #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \
336 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx)
338 #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
339 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx)
341 #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \
342 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_RECVMSG, NULL)
344 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \
345 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL)
347 /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
348 * fullsock and its parent fullsock cannot be traced by
351 * e.g. sock_ops->sk is a request_sock and it is under syncookie mode.
352 * Its listener-sk is not attached to the rsk_listener.
353 * In this case, the caller holds the listener-sk (unlocked),
354 * set its sock_ops->sk to req_sk, and call this SOCK_OPS"_SK" with
355 * the listener-sk such that the cgroup-bpf-progs of the
356 * listener-sk will be run.
358 * Regardless of syncookie mode or not,
359 * calling bpf_setsockopt on listener-sk will not make sense anyway,
360 * so passing 'sock_ops->sk == req_sk' to the bpf prog is appropriate here.
362 #define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \
365 if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS)) \
366 __ret = __cgroup_bpf_run_filter_sock_ops(sk, \
368 BPF_CGROUP_SOCK_OPS); \
372 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
375 if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS) && (sock_ops)->sk) { \
376 typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \
377 if (__sk && sk_fullsock(__sk)) \
378 __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \
380 BPF_CGROUP_SOCK_OPS); \
385 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \
388 if (cgroup_bpf_enabled(BPF_CGROUP_DEVICE)) \
389 __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
391 BPF_CGROUP_DEVICE); \
397 #define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \
400 if (cgroup_bpf_enabled(BPF_CGROUP_SYSCTL)) \
401 __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
403 BPF_CGROUP_SYSCTL); \
407 #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
411 if (cgroup_bpf_enabled(BPF_CGROUP_SETSOCKOPT)) \
412 __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
419 #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
422 if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
423 get_user(__ret, optlen); \
427 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \
428 max_optlen, retval) \
430 int __ret = retval; \
431 if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
432 if (!(sock)->sk_prot->bpf_bypass_getsockopt || \
433 !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
434 tcp_bpf_bypass_getsockopt, \
436 __ret = __cgroup_bpf_run_filter_getsockopt( \
437 sock, level, optname, optval, optlen, \
438 max_optlen, retval); \
442 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
445 int __ret = retval; \
446 if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
447 __ret = __cgroup_bpf_run_filter_getsockopt_kern( \
448 sock, level, optname, optval, optlen, retval); \
452 int cgroup_bpf_prog_attach(const union bpf_attr *attr,
453 enum bpf_prog_type ptype, struct bpf_prog *prog);
454 int cgroup_bpf_prog_detach(const union bpf_attr *attr,
455 enum bpf_prog_type ptype);
456 int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
457 int cgroup_bpf_prog_query(const union bpf_attr *attr,
458 union bpf_attr __user *uattr);
461 struct cgroup_bpf {};
462 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
463 static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
465 static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
466 enum bpf_prog_type ptype,
467 struct bpf_prog *prog)
472 static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
473 enum bpf_prog_type ptype)
478 static inline int cgroup_bpf_link_attach(const union bpf_attr *attr,
479 struct bpf_prog *prog)
484 static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
485 union bpf_attr __user *uattr)
490 static inline int bpf_cgroup_storage_set(
491 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
492 static inline void bpf_cgroup_storage_unset(void) {}
493 static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
494 struct bpf_map *map) { return 0; }
495 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
496 struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; }
497 static inline void bpf_cgroup_storage_free(
498 struct bpf_cgroup_storage *storage) {}
499 static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
503 static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
504 void *key, void *value, u64 flags) {
508 #define cgroup_bpf_enabled(type) (0)
509 #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; })
510 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
511 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
512 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
513 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
514 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
515 #define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
516 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
517 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
518 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
519 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
520 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
521 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
522 #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
523 #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
524 #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
525 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
526 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
527 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
528 #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
529 #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
530 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
531 optlen, max_optlen, retval) ({ retval; })
532 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
533 optlen, retval) ({ retval; })
534 #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
535 kernel_optval) ({ 0; })
537 #define for_each_cgroup_storage_type(stype) for (; false; )
539 #endif /* CONFIG_CGROUP_BPF */
541 #endif /* _BPF_CGROUP_H */