2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/netlink.h>
16 #include <linux/netfilter.h>
17 #include <linux/netfilter/nf_tables.h>
18 #include <net/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_conntrack.h>
20 #include <net/netfilter/nf_conntrack_acct.h>
21 #include <net/netfilter/nf_conntrack_tuple.h>
22 #include <net/netfilter/nf_conntrack_helper.h>
23 #include <net/netfilter/nf_conntrack_ecache.h>
24 #include <net/netfilter/nf_conntrack_labels.h>
27 enum nft_ct_keys key:8;
28 enum ip_conntrack_dir dir:8;
30 enum nft_registers dreg:8;
31 enum nft_registers sreg:8;
35 #ifdef CONFIG_NF_CONNTRACK_ZONES
36 static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
37 static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
40 static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
42 enum ip_conntrack_dir d)
44 if (d < IP_CT_DIR_MAX)
45 return k == NFT_CT_BYTES ? atomic64_read(&c[d].bytes) :
46 atomic64_read(&c[d].packets);
48 return nft_ct_get_eval_counter(c, k, IP_CT_DIR_ORIGINAL) +
49 nft_ct_get_eval_counter(c, k, IP_CT_DIR_REPLY);
52 static void nft_ct_get_eval(const struct nft_expr *expr,
53 struct nft_regs *regs,
54 const struct nft_pktinfo *pkt)
56 const struct nft_ct *priv = nft_expr_priv(expr);
57 u32 *dest = ®s->data[priv->dreg];
58 enum ip_conntrack_info ctinfo;
59 const struct nf_conn *ct;
60 const struct nf_conn_help *help;
61 const struct nf_conntrack_tuple *tuple;
62 const struct nf_conntrack_helper *helper;
65 ct = nf_ct_get(pkt->skb, &ctinfo);
70 state = NF_CT_STATE_INVALID_BIT;
71 else if (nf_ct_is_untracked(ct))
72 state = NF_CT_STATE_UNTRACKED_BIT;
74 state = NF_CT_STATE_BIT(ctinfo);
85 case NFT_CT_DIRECTION:
86 nft_reg_store8(dest, CTINFO2DIR(ctinfo));
91 #ifdef CONFIG_NF_CONNTRACK_MARK
96 #ifdef CONFIG_NF_CONNTRACK_SECMARK
101 case NFT_CT_EXPIRATION:
102 *dest = jiffies_to_msecs(nf_ct_expires(ct));
105 if (ct->master == NULL)
107 help = nfct_help(ct->master);
110 helper = rcu_dereference(help->helper);
113 strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
115 #ifdef CONFIG_NF_CONNTRACK_LABELS
116 case NFT_CT_LABELS: {
117 struct nf_conn_labels *labels = nf_ct_labels_find(ct);
120 memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE);
122 memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
126 case NFT_CT_BYTES: /* fallthrough */
128 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
132 count = nft_ct_get_eval_counter(acct->counter,
133 priv->key, priv->dir);
134 memcpy(dest, &count, sizeof(count));
137 case NFT_CT_AVGPKT: {
138 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
139 u64 avgcnt = 0, bcnt = 0, pcnt = 0;
142 pcnt = nft_ct_get_eval_counter(acct->counter,
143 NFT_CT_PKTS, priv->dir);
144 bcnt = nft_ct_get_eval_counter(acct->counter,
145 NFT_CT_BYTES, priv->dir);
147 avgcnt = div64_u64(bcnt, pcnt);
150 memcpy(dest, &avgcnt, sizeof(avgcnt));
153 case NFT_CT_L3PROTOCOL:
154 nft_reg_store8(dest, nf_ct_l3num(ct));
156 case NFT_CT_PROTOCOL:
157 nft_reg_store8(dest, nf_ct_protonum(ct));
159 #ifdef CONFIG_NF_CONNTRACK_ZONES
161 const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
164 if (priv->dir < IP_CT_DIR_MAX)
165 zoneid = nf_ct_zone_id(zone, priv->dir);
169 nft_reg_store16(dest, zoneid);
177 tuple = &ct->tuplehash[priv->dir].tuple;
180 memcpy(dest, tuple->src.u3.all,
181 nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
184 memcpy(dest, tuple->dst.u3.all,
185 nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
187 case NFT_CT_PROTO_SRC:
188 nft_reg_store16(dest, (__force u16)tuple->src.u.all);
190 case NFT_CT_PROTO_DST:
191 nft_reg_store16(dest, (__force u16)tuple->dst.u.all);
198 regs->verdict.code = NFT_BREAK;
201 #ifdef CONFIG_NF_CONNTRACK_ZONES
202 static void nft_ct_set_zone_eval(const struct nft_expr *expr,
203 struct nft_regs *regs,
204 const struct nft_pktinfo *pkt)
206 struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR };
207 const struct nft_ct *priv = nft_expr_priv(expr);
208 struct sk_buff *skb = pkt->skb;
209 enum ip_conntrack_info ctinfo;
210 u16 value = nft_reg_load16(®s->data[priv->sreg]);
213 ct = nf_ct_get(skb, &ctinfo);
214 if (ct) /* already tracked */
220 case IP_CT_DIR_ORIGINAL:
221 zone.dir = NF_CT_ZONE_DIR_ORIG;
223 case IP_CT_DIR_REPLY:
224 zone.dir = NF_CT_ZONE_DIR_REPL;
230 ct = this_cpu_read(nft_ct_pcpu_template);
232 if (likely(atomic_read(&ct->ct_general.use) == 1)) {
233 nf_ct_zone_add(ct, &zone);
235 /* previous skb got queued to userspace */
236 ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
238 regs->verdict.code = NF_DROP;
243 atomic_inc(&ct->ct_general.use);
244 nf_ct_set(skb, ct, IP_CT_NEW);
248 static void nft_ct_set_eval(const struct nft_expr *expr,
249 struct nft_regs *regs,
250 const struct nft_pktinfo *pkt)
252 const struct nft_ct *priv = nft_expr_priv(expr);
253 struct sk_buff *skb = pkt->skb;
254 #ifdef CONFIG_NF_CONNTRACK_MARK
255 u32 value = regs->data[priv->sreg];
257 enum ip_conntrack_info ctinfo;
260 ct = nf_ct_get(skb, &ctinfo);
265 #ifdef CONFIG_NF_CONNTRACK_MARK
267 if (ct->mark != value) {
269 nf_conntrack_event_cache(IPCT_MARK, ct);
273 #ifdef CONFIG_NF_CONNTRACK_LABELS
275 nf_connlabels_replace(ct,
276 ®s->data[priv->sreg],
277 ®s->data[priv->sreg],
278 NF_CT_LABELS_MAX_SIZE / sizeof(u32));
286 static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
287 [NFTA_CT_DREG] = { .type = NLA_U32 },
288 [NFTA_CT_KEY] = { .type = NLA_U32 },
289 [NFTA_CT_DIRECTION] = { .type = NLA_U8 },
290 [NFTA_CT_SREG] = { .type = NLA_U32 },
293 static int nft_ct_netns_get(struct net *net, uint8_t family)
297 if (family == NFPROTO_INET) {
298 err = nf_ct_netns_get(net, NFPROTO_IPV4);
301 err = nf_ct_netns_get(net, NFPROTO_IPV6);
305 err = nf_ct_netns_get(net, family);
312 nf_ct_netns_put(net, NFPROTO_IPV4);
317 static void nft_ct_netns_put(struct net *net, uint8_t family)
319 if (family == NFPROTO_INET) {
320 nf_ct_netns_put(net, NFPROTO_IPV4);
321 nf_ct_netns_put(net, NFPROTO_IPV6);
323 nf_ct_netns_put(net, family);
326 #ifdef CONFIG_NF_CONNTRACK_ZONES
327 static void nft_ct_tmpl_put_pcpu(void)
332 for_each_possible_cpu(cpu) {
333 ct = per_cpu(nft_ct_pcpu_template, cpu);
337 per_cpu(nft_ct_pcpu_template, cpu) = NULL;
341 static bool nft_ct_tmpl_alloc_pcpu(void)
343 struct nf_conntrack_zone zone = { .id = 0 };
347 if (nft_ct_pcpu_template_refcnt)
350 for_each_possible_cpu(cpu) {
351 tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL);
353 nft_ct_tmpl_put_pcpu();
357 atomic_set(&tmp->ct_general.use, 1);
358 per_cpu(nft_ct_pcpu_template, cpu) = tmp;
365 static int nft_ct_get_init(const struct nft_ctx *ctx,
366 const struct nft_expr *expr,
367 const struct nlattr * const tb[])
369 struct nft_ct *priv = nft_expr_priv(expr);
373 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
374 priv->dir = IP_CT_DIR_MAX;
376 case NFT_CT_DIRECTION:
377 if (tb[NFTA_CT_DIRECTION] != NULL)
383 #ifdef CONFIG_NF_CONNTRACK_MARK
386 #ifdef CONFIG_NF_CONNTRACK_SECMARK
389 case NFT_CT_EXPIRATION:
390 if (tb[NFTA_CT_DIRECTION] != NULL)
394 #ifdef CONFIG_NF_CONNTRACK_LABELS
396 if (tb[NFTA_CT_DIRECTION] != NULL)
398 len = NF_CT_LABELS_MAX_SIZE;
402 if (tb[NFTA_CT_DIRECTION] != NULL)
404 len = NF_CT_HELPER_NAME_LEN;
407 case NFT_CT_L3PROTOCOL:
408 case NFT_CT_PROTOCOL:
409 /* For compatibility, do not report error if NFTA_CT_DIRECTION
410 * attribute is specified.
416 if (tb[NFTA_CT_DIRECTION] == NULL)
419 switch (ctx->afi->family) {
421 len = FIELD_SIZEOF(struct nf_conntrack_tuple,
426 len = FIELD_SIZEOF(struct nf_conntrack_tuple,
430 return -EAFNOSUPPORT;
433 case NFT_CT_PROTO_SRC:
434 case NFT_CT_PROTO_DST:
435 if (tb[NFTA_CT_DIRECTION] == NULL)
437 len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
444 #ifdef CONFIG_NF_CONNTRACK_ZONES
453 if (tb[NFTA_CT_DIRECTION] != NULL) {
454 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
456 case IP_CT_DIR_ORIGINAL:
457 case IP_CT_DIR_REPLY:
464 priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
465 err = nft_validate_register_store(ctx, priv->dreg, NULL,
466 NFT_DATA_VALUE, len);
470 err = nft_ct_netns_get(ctx->net, ctx->afi->family);
474 if (priv->key == NFT_CT_BYTES ||
475 priv->key == NFT_CT_PKTS ||
476 priv->key == NFT_CT_AVGPKT)
477 nf_ct_set_acct(ctx->net, true);
482 static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
485 #ifdef CONFIG_NF_CONNTRACK_LABELS
487 nf_connlabels_put(ctx->net);
490 #ifdef CONFIG_NF_CONNTRACK_ZONES
492 if (--nft_ct_pcpu_template_refcnt == 0)
493 nft_ct_tmpl_put_pcpu();
500 static int nft_ct_set_init(const struct nft_ctx *ctx,
501 const struct nft_expr *expr,
502 const struct nlattr * const tb[])
504 struct nft_ct *priv = nft_expr_priv(expr);
508 priv->dir = IP_CT_DIR_MAX;
509 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
511 #ifdef CONFIG_NF_CONNTRACK_MARK
513 if (tb[NFTA_CT_DIRECTION])
515 len = FIELD_SIZEOF(struct nf_conn, mark);
518 #ifdef CONFIG_NF_CONNTRACK_LABELS
520 if (tb[NFTA_CT_DIRECTION])
522 len = NF_CT_LABELS_MAX_SIZE;
523 err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
528 #ifdef CONFIG_NF_CONNTRACK_ZONES
530 if (!nft_ct_tmpl_alloc_pcpu())
532 nft_ct_pcpu_template_refcnt++;
540 if (tb[NFTA_CT_DIRECTION]) {
541 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
543 case IP_CT_DIR_ORIGINAL:
544 case IP_CT_DIR_REPLY:
551 priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
552 err = nft_validate_register_load(priv->sreg, len);
556 err = nft_ct_netns_get(ctx->net, ctx->afi->family);
563 __nft_ct_set_destroy(ctx, priv);
567 static void nft_ct_get_destroy(const struct nft_ctx *ctx,
568 const struct nft_expr *expr)
570 nf_ct_netns_put(ctx->net, ctx->afi->family);
573 static void nft_ct_set_destroy(const struct nft_ctx *ctx,
574 const struct nft_expr *expr)
576 struct nft_ct *priv = nft_expr_priv(expr);
578 __nft_ct_set_destroy(ctx, priv);
579 nft_ct_netns_put(ctx->net, ctx->afi->family);
582 static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
584 const struct nft_ct *priv = nft_expr_priv(expr);
586 if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg))
587 goto nla_put_failure;
588 if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
589 goto nla_put_failure;
594 case NFT_CT_PROTO_SRC:
595 case NFT_CT_PROTO_DST:
596 if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
597 goto nla_put_failure;
603 if (priv->dir < IP_CT_DIR_MAX &&
604 nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
605 goto nla_put_failure;
617 static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
619 const struct nft_ct *priv = nft_expr_priv(expr);
621 if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg))
622 goto nla_put_failure;
623 if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
624 goto nla_put_failure;
628 if (priv->dir < IP_CT_DIR_MAX &&
629 nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
630 goto nla_put_failure;
642 static struct nft_expr_type nft_ct_type;
643 static const struct nft_expr_ops nft_ct_get_ops = {
644 .type = &nft_ct_type,
645 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
646 .eval = nft_ct_get_eval,
647 .init = nft_ct_get_init,
648 .destroy = nft_ct_get_destroy,
649 .dump = nft_ct_get_dump,
652 static const struct nft_expr_ops nft_ct_set_ops = {
653 .type = &nft_ct_type,
654 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
655 .eval = nft_ct_set_eval,
656 .init = nft_ct_set_init,
657 .destroy = nft_ct_set_destroy,
658 .dump = nft_ct_set_dump,
661 #ifdef CONFIG_NF_CONNTRACK_ZONES
662 static const struct nft_expr_ops nft_ct_set_zone_ops = {
663 .type = &nft_ct_type,
664 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
665 .eval = nft_ct_set_zone_eval,
666 .init = nft_ct_set_init,
667 .destroy = nft_ct_set_destroy,
668 .dump = nft_ct_set_dump,
672 static const struct nft_expr_ops *
673 nft_ct_select_ops(const struct nft_ctx *ctx,
674 const struct nlattr * const tb[])
676 if (tb[NFTA_CT_KEY] == NULL)
677 return ERR_PTR(-EINVAL);
679 if (tb[NFTA_CT_DREG] && tb[NFTA_CT_SREG])
680 return ERR_PTR(-EINVAL);
682 if (tb[NFTA_CT_DREG])
683 return &nft_ct_get_ops;
685 if (tb[NFTA_CT_SREG]) {
686 #ifdef CONFIG_NF_CONNTRACK_ZONES
687 if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE))
688 return &nft_ct_set_zone_ops;
690 return &nft_ct_set_ops;
693 return ERR_PTR(-EINVAL);
696 static struct nft_expr_type nft_ct_type __read_mostly = {
698 .select_ops = &nft_ct_select_ops,
699 .policy = nft_ct_policy,
700 .maxattr = NFTA_CT_MAX,
701 .owner = THIS_MODULE,
704 static void nft_notrack_eval(const struct nft_expr *expr,
705 struct nft_regs *regs,
706 const struct nft_pktinfo *pkt)
708 struct sk_buff *skb = pkt->skb;
709 enum ip_conntrack_info ctinfo;
712 ct = nf_ct_get(pkt->skb, &ctinfo);
713 /* Previously seen (loopback or untracked)? Ignore. */
717 ct = nf_ct_untracked_get();
718 atomic_inc(&ct->ct_general.use);
719 nf_ct_set(skb, ct, IP_CT_NEW);
722 static struct nft_expr_type nft_notrack_type;
723 static const struct nft_expr_ops nft_notrack_ops = {
724 .type = &nft_notrack_type,
725 .size = NFT_EXPR_SIZE(0),
726 .eval = nft_notrack_eval,
729 static struct nft_expr_type nft_notrack_type __read_mostly = {
731 .ops = &nft_notrack_ops,
732 .owner = THIS_MODULE,
735 static int __init nft_ct_module_init(void)
739 BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE);
741 err = nft_register_expr(&nft_ct_type);
745 err = nft_register_expr(&nft_notrack_type);
751 nft_unregister_expr(&nft_ct_type);
755 static void __exit nft_ct_module_exit(void)
757 nft_unregister_expr(&nft_notrack_type);
758 nft_unregister_expr(&nft_ct_type);
761 module_init(nft_ct_module_init);
762 module_exit(nft_ct_module_exit);
764 MODULE_LICENSE("GPL");
765 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
766 MODULE_ALIAS_NFT_EXPR("ct");
767 MODULE_ALIAS_NFT_EXPR("notrack");