Merge tag 'i2c-for-6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa...
[linux-2.6-microblaze.git] / net / netfilter / nf_conntrack_bpf.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Unstable Conntrack Helpers for XDP and TC-BPF hook
3  *
4  * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
5  * allowed to break compatibility for these functions since the interface they
6  * are exposed through to BPF programs is explicitly unstable.
7  */
8
9 #include <linux/bpf_verifier.h>
10 #include <linux/bpf.h>
11 #include <linux/btf.h>
12 #include <linux/filter.h>
13 #include <linux/mutex.h>
14 #include <linux/types.h>
15 #include <linux/btf_ids.h>
16 #include <linux/net_namespace.h>
17 #include <net/xdp.h>
18 #include <net/netfilter/nf_conntrack_bpf.h>
19 #include <net/netfilter/nf_conntrack_core.h>
20
21 /* bpf_ct_opts - Options for CT lookup helpers
22  *
23  * Members:
24  * @netns_id   - Specify the network namespace for lookup
25  *               Values:
26  *                 BPF_F_CURRENT_NETNS (-1)
27  *                   Use namespace associated with ctx (xdp_md, __sk_buff)
28  *                 [0, S32_MAX]
29  *                   Network Namespace ID
30  * @error      - Out parameter, set for any errors encountered
31  *               Values:
32  *                 -EINVAL - Passed NULL for bpf_tuple pointer
33  *                 -EINVAL - opts->reserved is not 0
34  *                 -EINVAL - netns_id is less than -1
35  *                 -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
36  *                 -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
37  *                 -ENONET - No network namespace found for netns_id
38  *                 -ENOENT - Conntrack lookup could not find entry for tuple
39  *                 -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
40  *                                 or sizeof(tuple->ipv6)
41  * @l4proto    - Layer 4 protocol
42  *               Values:
43  *                 IPPROTO_TCP, IPPROTO_UDP
44  * @dir:       - connection tracking tuple direction.
45  * @reserved   - Reserved member, will be reused for more options in future
46  *               Values:
47  *                 0
48  */
49 struct bpf_ct_opts {
50         s32 netns_id;
51         s32 error;
52         u8 l4proto;
53         u8 dir;
54         u8 reserved[2];
55 };
56
57 enum {
58         NF_BPF_CT_OPTS_SZ = 12,
59 };
60
61 static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
62                                  u32 tuple_len, u8 protonum, u8 dir,
63                                  struct nf_conntrack_tuple *tuple)
64 {
65         union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3;
66         union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3;
67         union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u
68                                                   : &tuple->src.u;
69         union nf_conntrack_man_proto *dport = dir ? &tuple->src.u
70                                                   : (void *)&tuple->dst.u;
71
72         if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
73                 return -EPROTO;
74
75         memset(tuple, 0, sizeof(*tuple));
76
77         switch (tuple_len) {
78         case sizeof(bpf_tuple->ipv4):
79                 tuple->src.l3num = AF_INET;
80                 src->ip = bpf_tuple->ipv4.saddr;
81                 sport->tcp.port = bpf_tuple->ipv4.sport;
82                 dst->ip = bpf_tuple->ipv4.daddr;
83                 dport->tcp.port = bpf_tuple->ipv4.dport;
84                 break;
85         case sizeof(bpf_tuple->ipv6):
86                 tuple->src.l3num = AF_INET6;
87                 memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
88                 sport->tcp.port = bpf_tuple->ipv6.sport;
89                 memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
90                 dport->tcp.port = bpf_tuple->ipv6.dport;
91                 break;
92         default:
93                 return -EAFNOSUPPORT;
94         }
95         tuple->dst.protonum = protonum;
96         tuple->dst.dir = dir;
97
98         return 0;
99 }
100
101 static struct nf_conn *
102 __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
103                         u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len,
104                         u32 timeout)
105 {
106         struct nf_conntrack_tuple otuple, rtuple;
107         struct nf_conn *ct;
108         int err;
109
110         if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
111             opts_len != NF_BPF_CT_OPTS_SZ)
112                 return ERR_PTR(-EINVAL);
113
114         if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
115                 return ERR_PTR(-EINVAL);
116
117         err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
118                                     IP_CT_DIR_ORIGINAL, &otuple);
119         if (err < 0)
120                 return ERR_PTR(err);
121
122         err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
123                                     IP_CT_DIR_REPLY, &rtuple);
124         if (err < 0)
125                 return ERR_PTR(err);
126
127         if (opts->netns_id >= 0) {
128                 net = get_net_ns_by_id(net, opts->netns_id);
129                 if (unlikely(!net))
130                         return ERR_PTR(-ENONET);
131         }
132
133         ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
134                                 GFP_ATOMIC);
135         if (IS_ERR(ct))
136                 goto out;
137
138         memset(&ct->proto, 0, sizeof(ct->proto));
139         __nf_ct_set_timeout(ct, timeout * HZ);
140
141 out:
142         if (opts->netns_id >= 0)
143                 put_net(net);
144
145         return ct;
146 }
147
148 static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
149                                           struct bpf_sock_tuple *bpf_tuple,
150                                           u32 tuple_len, struct bpf_ct_opts *opts,
151                                           u32 opts_len)
152 {
153         struct nf_conntrack_tuple_hash *hash;
154         struct nf_conntrack_tuple tuple;
155         struct nf_conn *ct;
156         int err;
157
158         if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
159             opts_len != NF_BPF_CT_OPTS_SZ)
160                 return ERR_PTR(-EINVAL);
161         if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
162                 return ERR_PTR(-EPROTO);
163         if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
164                 return ERR_PTR(-EINVAL);
165
166         err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
167                                     IP_CT_DIR_ORIGINAL, &tuple);
168         if (err < 0)
169                 return ERR_PTR(err);
170
171         if (opts->netns_id >= 0) {
172                 net = get_net_ns_by_id(net, opts->netns_id);
173                 if (unlikely(!net))
174                         return ERR_PTR(-ENONET);
175         }
176
177         hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
178         if (opts->netns_id >= 0)
179                 put_net(net);
180         if (!hash)
181                 return ERR_PTR(-ENOENT);
182
183         ct = nf_ct_tuplehash_to_ctrack(hash);
184         opts->dir = NF_CT_DIRECTION(hash);
185
186         return ct;
187 }
188
189 BTF_ID_LIST(btf_nf_conn_ids)
190 BTF_ID(struct, nf_conn)
191 BTF_ID(struct, nf_conn___init)
192
193 /* Check writes into `struct nf_conn` */
194 static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
195                                            const struct bpf_reg_state *reg,
196                                            int off, int size)
197 {
198         const struct btf_type *ncit, *nct, *t;
199         size_t end;
200
201         ncit = btf_type_by_id(reg->btf, btf_nf_conn_ids[1]);
202         nct = btf_type_by_id(reg->btf, btf_nf_conn_ids[0]);
203         t = btf_type_by_id(reg->btf, reg->btf_id);
204         if (t != nct && t != ncit) {
205                 bpf_log(log, "only read is supported\n");
206                 return -EACCES;
207         }
208
209         /* `struct nf_conn` and `struct nf_conn___init` have the same layout
210          * so we are safe to simply merge offset checks here
211          */
212         switch (off) {
213 #if defined(CONFIG_NF_CONNTRACK_MARK)
214         case offsetof(struct nf_conn, mark):
215                 end = offsetofend(struct nf_conn, mark);
216                 break;
217 #endif
218         default:
219                 bpf_log(log, "no write support to nf_conn at off %d\n", off);
220                 return -EACCES;
221         }
222
223         if (off + size > end) {
224                 bpf_log(log,
225                         "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
226                         off, size, end);
227                 return -EACCES;
228         }
229
230         return 0;
231 }
232
233 __diag_push();
234 __diag_ignore_all("-Wmissing-prototypes",
235                   "Global functions as their definitions will be in nf_conntrack BTF");
236
237 /* bpf_xdp_ct_alloc - Allocate a new CT entry
238  *
239  * Parameters:
240  * @xdp_ctx     - Pointer to ctx (xdp_md) in XDP program
241  *                  Cannot be NULL
242  * @bpf_tuple   - Pointer to memory representing the tuple to look up
243  *                  Cannot be NULL
244  * @tuple__sz   - Length of the tuple structure
245  *                  Must be one of sizeof(bpf_tuple->ipv4) or
246  *                  sizeof(bpf_tuple->ipv6)
247  * @opts        - Additional options for allocation (documented above)
248  *                  Cannot be NULL
249  * @opts__sz    - Length of the bpf_ct_opts structure
250  *                  Must be NF_BPF_CT_OPTS_SZ (12)
251  */
252 __bpf_kfunc struct nf_conn___init *
253 bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
254                  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
255 {
256         struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
257         struct nf_conn *nfct;
258
259         nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
260                                        opts, opts__sz, 10);
261         if (IS_ERR(nfct)) {
262                 if (opts)
263                         opts->error = PTR_ERR(nfct);
264                 return NULL;
265         }
266
267         return (struct nf_conn___init *)nfct;
268 }
269
270 /* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
271  *                     reference to it
272  *
273  * Parameters:
274  * @xdp_ctx     - Pointer to ctx (xdp_md) in XDP program
275  *                  Cannot be NULL
276  * @bpf_tuple   - Pointer to memory representing the tuple to look up
277  *                  Cannot be NULL
278  * @tuple__sz   - Length of the tuple structure
279  *                  Must be one of sizeof(bpf_tuple->ipv4) or
280  *                  sizeof(bpf_tuple->ipv6)
281  * @opts        - Additional options for lookup (documented above)
282  *                  Cannot be NULL
283  * @opts__sz    - Length of the bpf_ct_opts structure
284  *                  Must be NF_BPF_CT_OPTS_SZ (12)
285  */
286 __bpf_kfunc struct nf_conn *
287 bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
288                   u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
289 {
290         struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
291         struct net *caller_net;
292         struct nf_conn *nfct;
293
294         caller_net = dev_net(ctx->rxq->dev);
295         nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
296         if (IS_ERR(nfct)) {
297                 if (opts)
298                         opts->error = PTR_ERR(nfct);
299                 return NULL;
300         }
301         return nfct;
302 }
303
304 /* bpf_skb_ct_alloc - Allocate a new CT entry
305  *
306  * Parameters:
307  * @skb_ctx     - Pointer to ctx (__sk_buff) in TC program
308  *                  Cannot be NULL
309  * @bpf_tuple   - Pointer to memory representing the tuple to look up
310  *                  Cannot be NULL
311  * @tuple__sz   - Length of the tuple structure
312  *                  Must be one of sizeof(bpf_tuple->ipv4) or
313  *                  sizeof(bpf_tuple->ipv6)
314  * @opts        - Additional options for allocation (documented above)
315  *                  Cannot be NULL
316  * @opts__sz    - Length of the bpf_ct_opts structure
317  *                  Must be NF_BPF_CT_OPTS_SZ (12)
318  */
319 __bpf_kfunc struct nf_conn___init *
320 bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
321                  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
322 {
323         struct sk_buff *skb = (struct sk_buff *)skb_ctx;
324         struct nf_conn *nfct;
325         struct net *net;
326
327         net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
328         nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
329         if (IS_ERR(nfct)) {
330                 if (opts)
331                         opts->error = PTR_ERR(nfct);
332                 return NULL;
333         }
334
335         return (struct nf_conn___init *)nfct;
336 }
337
338 /* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
339  *                     reference to it
340  *
341  * Parameters:
342  * @skb_ctx     - Pointer to ctx (__sk_buff) in TC program
343  *                  Cannot be NULL
344  * @bpf_tuple   - Pointer to memory representing the tuple to look up
345  *                  Cannot be NULL
346  * @tuple__sz   - Length of the tuple structure
347  *                  Must be one of sizeof(bpf_tuple->ipv4) or
348  *                  sizeof(bpf_tuple->ipv6)
349  * @opts        - Additional options for lookup (documented above)
350  *                  Cannot be NULL
351  * @opts__sz    - Length of the bpf_ct_opts structure
352  *                  Must be NF_BPF_CT_OPTS_SZ (12)
353  */
354 __bpf_kfunc struct nf_conn *
355 bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
356                   u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
357 {
358         struct sk_buff *skb = (struct sk_buff *)skb_ctx;
359         struct net *caller_net;
360         struct nf_conn *nfct;
361
362         caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
363         nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
364         if (IS_ERR(nfct)) {
365                 if (opts)
366                         opts->error = PTR_ERR(nfct);
367                 return NULL;
368         }
369         return nfct;
370 }
371
372 /* bpf_ct_insert_entry - Add the provided entry into a CT map
373  *
374  * This must be invoked for referenced PTR_TO_BTF_ID.
375  *
376  * @nfct         - Pointer to referenced nf_conn___init object, obtained
377  *                 using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
378  */
379 __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
380 {
381         struct nf_conn *nfct = (struct nf_conn *)nfct_i;
382         int err;
383
384         if (!nf_ct_is_confirmed(nfct))
385                 nfct->timeout += nfct_time_stamp;
386         nfct->status |= IPS_CONFIRMED;
387         err = nf_conntrack_hash_check_insert(nfct);
388         if (err < 0) {
389                 nf_conntrack_free(nfct);
390                 return NULL;
391         }
392         return nfct;
393 }
394
395 /* bpf_ct_release - Release acquired nf_conn object
396  *
397  * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
398  * the program if any references remain in the program in all of the explored
399  * states.
400  *
401  * Parameters:
402  * @nf_conn      - Pointer to referenced nf_conn object, obtained using
403  *                 bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
404  */
405 __bpf_kfunc void bpf_ct_release(struct nf_conn *nfct)
406 {
407         nf_ct_put(nfct);
408 }
409
410 /* bpf_ct_set_timeout - Set timeout of allocated nf_conn
411  *
412  * Sets the default timeout of newly allocated nf_conn before insertion.
413  * This helper must be invoked for refcounted pointer to nf_conn___init.
414  *
415  * Parameters:
416  * @nfct         - Pointer to referenced nf_conn object, obtained using
417  *                 bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
418  * @timeout      - Timeout in msecs.
419  */
420 __bpf_kfunc void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
421 {
422         __nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
423 }
424
425 /* bpf_ct_change_timeout - Change timeout of inserted nf_conn
426  *
427  * Change timeout associated of the inserted or looked up nf_conn.
428  * This helper must be invoked for refcounted pointer to nf_conn.
429  *
430  * Parameters:
431  * @nfct         - Pointer to referenced nf_conn object, obtained using
432  *                 bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
433  * @timeout      - New timeout in msecs.
434  */
435 __bpf_kfunc int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
436 {
437         return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
438 }
439
440 /* bpf_ct_set_status - Set status field of allocated nf_conn
441  *
442  * Set the status field of the newly allocated nf_conn before insertion.
443  * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init.
444  *
445  * Parameters:
446  * @nfct         - Pointer to referenced nf_conn object, obtained using
447  *                 bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
448  * @status       - New status value.
449  */
450 __bpf_kfunc int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
451 {
452         return nf_ct_change_status_common((struct nf_conn *)nfct, status);
453 }
454
455 /* bpf_ct_change_status - Change status of inserted nf_conn
456  *
457  * Change the status field of the provided connection tracking entry.
458  * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn.
459  *
460  * Parameters:
461  * @nfct         - Pointer to referenced nf_conn object, obtained using
462  *                 bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
463  * @status       - New status value.
464  */
465 __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
466 {
467         return nf_ct_change_status_common(nfct, status);
468 }
469
470 __diag_pop()
471
472 BTF_SET8_START(nf_ct_kfunc_set)
473 BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
474 BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
475 BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
476 BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
477 BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
478 BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
479 BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
480 BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
481 BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
482 BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
483 BTF_SET8_END(nf_ct_kfunc_set)
484
485 static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
486         .owner = THIS_MODULE,
487         .set   = &nf_ct_kfunc_set,
488 };
489
490 int register_nf_conntrack_bpf(void)
491 {
492         int ret;
493
494         ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
495         ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
496         if (!ret) {
497                 mutex_lock(&nf_conn_btf_access_lock);
498                 nfct_btf_struct_access = _nf_conntrack_btf_struct_access;
499                 mutex_unlock(&nf_conn_btf_access_lock);
500         }
501
502         return ret;
503 }
504
505 void cleanup_nf_conntrack_bpf(void)
506 {
507         mutex_lock(&nf_conn_btf_access_lock);
508         nfct_btf_struct_access = NULL;
509         mutex_unlock(&nf_conn_btf_access_lock);
510 }