1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Expectation handling for nf_conntrack. */
4 /* (C) 1999-2001 Paul `Rusty' Russell
5 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
6 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
7 * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
10 #include <linux/types.h>
11 #include <linux/netfilter.h>
12 #include <linux/skbuff.h>
13 #include <linux/proc_fs.h>
14 #include <linux/seq_file.h>
15 #include <linux/stddef.h>
16 #include <linux/slab.h>
17 #include <linux/err.h>
18 #include <linux/percpu.h>
19 #include <linux/kernel.h>
20 #include <linux/jhash.h>
21 #include <linux/moduleparam.h>
22 #include <linux/export.h>
23 #include <net/net_namespace.h>
24 #include <net/netns/hash.h>
26 #include <net/netfilter/nf_conntrack.h>
27 #include <net/netfilter/nf_conntrack_core.h>
28 #include <net/netfilter/nf_conntrack_ecache.h>
29 #include <net/netfilter/nf_conntrack_expect.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_l4proto.h>
32 #include <net/netfilter/nf_conntrack_tuple.h>
33 #include <net/netfilter/nf_conntrack_zones.h>
35 unsigned int nf_ct_expect_hsize __read_mostly;
36 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
38 struct hlist_head *nf_ct_expect_hash __read_mostly;
39 EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
41 unsigned int nf_ct_expect_max __read_mostly;
43 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
44 static unsigned int nf_ct_expect_hashrnd __read_mostly;
46 /* nf_conntrack_expect helper functions */
47 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
48 u32 portid, int report)
50 struct nf_conn_help *master_help = nfct_help(exp->master);
51 struct net *net = nf_ct_exp_net(exp);
52 struct nf_conntrack_net *cnet;
54 WARN_ON(!master_help);
55 WARN_ON(timer_pending(&exp->timeout));
57 hlist_del_rcu(&exp->hnode);
59 cnet = nf_ct_pernet(net);
62 hlist_del_rcu(&exp->lnode);
63 master_help->expecting[exp->class]--;
65 nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
66 nf_ct_expect_put(exp);
68 NF_CT_STAT_INC(net, expect_delete);
70 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
72 static void nf_ct_expectation_timed_out(struct timer_list *t)
74 struct nf_conntrack_expect *exp = from_timer(exp, t, timeout);
76 spin_lock_bh(&nf_conntrack_expect_lock);
77 nf_ct_unlink_expect(exp);
78 spin_unlock_bh(&nf_conntrack_expect_lock);
79 nf_ct_expect_put(exp);
82 static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
84 unsigned int hash, seed;
86 get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
88 seed = nf_ct_expect_hashrnd ^ net_hash_mix(n);
90 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
91 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
92 (__force __u16)tuple->dst.u.all) ^ seed);
94 return reciprocal_scale(hash, nf_ct_expect_hsize);
98 nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
99 const struct nf_conntrack_expect *i,
100 const struct nf_conntrack_zone *zone,
101 const struct net *net)
103 return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
104 net_eq(net, nf_ct_net(i->master)) &&
105 nf_ct_zone_equal_any(i->master, zone);
108 bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
110 if (del_timer(&exp->timeout)) {
111 nf_ct_unlink_expect(exp);
112 nf_ct_expect_put(exp);
117 EXPORT_SYMBOL_GPL(nf_ct_remove_expect);
119 struct nf_conntrack_expect *
120 __nf_ct_expect_find(struct net *net,
121 const struct nf_conntrack_zone *zone,
122 const struct nf_conntrack_tuple *tuple)
124 struct nf_conntrack_net *cnet = nf_ct_pernet(net);
125 struct nf_conntrack_expect *i;
128 if (!cnet->expect_count)
131 h = nf_ct_expect_dst_hash(net, tuple);
132 hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
133 if (nf_ct_exp_equal(tuple, i, zone, net))
138 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
140 /* Just find a expectation corresponding to a tuple. */
141 struct nf_conntrack_expect *
142 nf_ct_expect_find_get(struct net *net,
143 const struct nf_conntrack_zone *zone,
144 const struct nf_conntrack_tuple *tuple)
146 struct nf_conntrack_expect *i;
149 i = __nf_ct_expect_find(net, zone, tuple);
150 if (i && !refcount_inc_not_zero(&i->use))
156 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
158 /* If an expectation for this connection is found, it gets delete from
159 * global list then returned. */
160 struct nf_conntrack_expect *
161 nf_ct_find_expectation(struct net *net,
162 const struct nf_conntrack_zone *zone,
163 const struct nf_conntrack_tuple *tuple)
165 struct nf_conntrack_net *cnet = nf_ct_pernet(net);
166 struct nf_conntrack_expect *i, *exp = NULL;
169 if (!cnet->expect_count)
172 h = nf_ct_expect_dst_hash(net, tuple);
173 hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) {
174 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
175 nf_ct_exp_equal(tuple, i, zone, net)) {
183 /* If master is not in hash table yet (ie. packet hasn't left
184 this machine yet), how can other end know about expected?
185 Hence these are not the droids you are looking for (if
186 master ct never got confirmed, we'd hold a reference to it
187 and weird things would happen to future packets). */
188 if (!nf_ct_is_confirmed(exp->master))
191 /* Avoid race with other CPUs, that for exp->master ct, is
192 * about to invoke ->destroy(), or nf_ct_delete() via timeout
195 * The atomic_inc_not_zero() check tells: If that fails, we
196 * know that the ct is being destroyed. If it succeeds, we
197 * can be sure the ct cannot disappear underneath.
199 if (unlikely(nf_ct_is_dying(exp->master) ||
200 !atomic_inc_not_zero(&exp->master->ct_general.use)))
203 if (exp->flags & NF_CT_EXPECT_PERMANENT) {
204 refcount_inc(&exp->use);
206 } else if (del_timer(&exp->timeout)) {
207 nf_ct_unlink_expect(exp);
210 /* Undo exp->master refcnt increase, if del_timer() failed */
211 nf_ct_put(exp->master);
216 /* delete all expectations for this conntrack */
217 void nf_ct_remove_expectations(struct nf_conn *ct)
219 struct nf_conn_help *help = nfct_help(ct);
220 struct nf_conntrack_expect *exp;
221 struct hlist_node *next;
223 /* Optimization: most connection never expect any others. */
227 spin_lock_bh(&nf_conntrack_expect_lock);
228 hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
229 nf_ct_remove_expect(exp);
231 spin_unlock_bh(&nf_conntrack_expect_lock);
233 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
235 /* Would two expected things clash? */
236 static inline int expect_clash(const struct nf_conntrack_expect *a,
237 const struct nf_conntrack_expect *b)
239 /* Part covered by intersection of masks must be unequal,
240 otherwise they clash */
241 struct nf_conntrack_tuple_mask intersect_mask;
244 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
246 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
247 intersect_mask.src.u3.all[count] =
248 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
251 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
252 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
253 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
256 static inline int expect_matches(const struct nf_conntrack_expect *a,
257 const struct nf_conntrack_expect *b)
259 return nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
260 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
261 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
262 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
265 static bool master_matches(const struct nf_conntrack_expect *a,
266 const struct nf_conntrack_expect *b,
269 if (flags & NF_CT_EXP_F_SKIP_MASTER)
272 return a->master == b->master;
275 /* Generally a bad idea to call this: could have matched already. */
276 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
278 spin_lock_bh(&nf_conntrack_expect_lock);
279 nf_ct_remove_expect(exp);
280 spin_unlock_bh(&nf_conntrack_expect_lock);
282 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
284 /* We don't increase the master conntrack refcount for non-fulfilled
285 * conntracks. During the conntrack destruction, the expectations are
286 * always killed before the conntrack itself */
287 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
289 struct nf_conntrack_expect *new;
291 new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
296 refcount_set(&new->use, 1);
299 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
301 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
303 const union nf_inet_addr *saddr,
304 const union nf_inet_addr *daddr,
305 u_int8_t proto, const __be16 *src, const __be16 *dst)
309 if (family == AF_INET)
316 exp->expectfn = NULL;
318 exp->tuple.src.l3num = family;
319 exp->tuple.dst.protonum = proto;
322 memcpy(&exp->tuple.src.u3, saddr, len);
323 if (sizeof(exp->tuple.src.u3) > len)
324 /* address needs to be cleared for nf_ct_tuple_equal */
325 memset((void *)&exp->tuple.src.u3 + len, 0x00,
326 sizeof(exp->tuple.src.u3) - len);
327 memset(&exp->mask.src.u3, 0xFF, len);
328 if (sizeof(exp->mask.src.u3) > len)
329 memset((void *)&exp->mask.src.u3 + len, 0x00,
330 sizeof(exp->mask.src.u3) - len);
332 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
333 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
337 exp->tuple.src.u.all = *src;
338 exp->mask.src.u.all = htons(0xFFFF);
340 exp->tuple.src.u.all = 0;
341 exp->mask.src.u.all = 0;
344 memcpy(&exp->tuple.dst.u3, daddr, len);
345 if (sizeof(exp->tuple.dst.u3) > len)
346 /* address needs to be cleared for nf_ct_tuple_equal */
347 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
348 sizeof(exp->tuple.dst.u3) - len);
350 exp->tuple.dst.u.all = *dst;
352 #if IS_ENABLED(CONFIG_NF_NAT)
353 memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
354 memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
357 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
359 static void nf_ct_expect_free_rcu(struct rcu_head *head)
361 struct nf_conntrack_expect *exp;
363 exp = container_of(head, struct nf_conntrack_expect, rcu);
364 kmem_cache_free(nf_ct_expect_cachep, exp);
367 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
369 if (refcount_dec_and_test(&exp->use))
370 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
372 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
374 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
376 struct nf_conntrack_net *cnet;
377 struct nf_conn_help *master_help = nfct_help(exp->master);
378 struct nf_conntrack_helper *helper;
379 struct net *net = nf_ct_exp_net(exp);
380 unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
382 /* two references : one for hash insert, one for the timer */
383 refcount_add(2, &exp->use);
385 timer_setup(&exp->timeout, nf_ct_expectation_timed_out, 0);
386 helper = rcu_dereference_protected(master_help->helper,
387 lockdep_is_held(&nf_conntrack_expect_lock));
389 exp->timeout.expires = jiffies +
390 helper->expect_policy[exp->class].timeout * HZ;
392 add_timer(&exp->timeout);
394 hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
395 master_help->expecting[exp->class]++;
397 hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
398 cnet = nf_ct_pernet(net);
399 cnet->expect_count++;
401 NF_CT_STAT_INC(net, expect_create);
404 /* Race with expectations being used means we could have none to find; OK. */
405 static void evict_oldest_expect(struct nf_conn *master,
406 struct nf_conntrack_expect *new)
408 struct nf_conn_help *master_help = nfct_help(master);
409 struct nf_conntrack_expect *exp, *last = NULL;
411 hlist_for_each_entry(exp, &master_help->expectations, lnode) {
412 if (exp->class == new->class)
417 nf_ct_remove_expect(last);
420 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
423 const struct nf_conntrack_expect_policy *p;
424 struct nf_conntrack_expect *i;
425 struct nf_conntrack_net *cnet;
426 struct nf_conn *master = expect->master;
427 struct nf_conn_help *master_help = nfct_help(master);
428 struct nf_conntrack_helper *helper;
429 struct net *net = nf_ct_exp_net(expect);
430 struct hlist_node *next;
438 h = nf_ct_expect_dst_hash(net, &expect->tuple);
439 hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
440 if (master_matches(i, expect, flags) &&
441 expect_matches(i, expect)) {
442 if (i->class != expect->class ||
443 i->master != expect->master)
446 if (nf_ct_remove_expect(i))
448 } else if (expect_clash(i, expect)) {
453 /* Will be over limit? */
454 helper = rcu_dereference_protected(master_help->helper,
455 lockdep_is_held(&nf_conntrack_expect_lock));
457 p = &helper->expect_policy[expect->class];
458 if (p->max_expected &&
459 master_help->expecting[expect->class] >= p->max_expected) {
460 evict_oldest_expect(master, expect);
461 if (master_help->expecting[expect->class]
462 >= p->max_expected) {
469 cnet = nf_ct_pernet(net);
470 if (cnet->expect_count >= nf_ct_expect_max) {
471 net_warn_ratelimited("nf_conntrack: expectation table full\n");
478 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
479 u32 portid, int report, unsigned int flags)
483 spin_lock_bh(&nf_conntrack_expect_lock);
484 ret = __nf_ct_expect_check(expect, flags);
488 nf_ct_expect_insert(expect);
490 spin_unlock_bh(&nf_conntrack_expect_lock);
491 nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
494 spin_unlock_bh(&nf_conntrack_expect_lock);
497 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
499 void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
502 struct nf_conntrack_expect *exp;
503 const struct hlist_node *next;
506 spin_lock_bh(&nf_conntrack_expect_lock);
508 for (i = 0; i < nf_ct_expect_hsize; i++) {
509 hlist_for_each_entry_safe(exp, next,
510 &nf_ct_expect_hash[i],
512 if (iter(exp, data) && del_timer(&exp->timeout)) {
513 nf_ct_unlink_expect(exp);
514 nf_ct_expect_put(exp);
519 spin_unlock_bh(&nf_conntrack_expect_lock);
521 EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
523 void nf_ct_expect_iterate_net(struct net *net,
524 bool (*iter)(struct nf_conntrack_expect *e, void *data),
526 u32 portid, int report)
528 struct nf_conntrack_expect *exp;
529 const struct hlist_node *next;
532 spin_lock_bh(&nf_conntrack_expect_lock);
534 for (i = 0; i < nf_ct_expect_hsize; i++) {
535 hlist_for_each_entry_safe(exp, next,
536 &nf_ct_expect_hash[i],
539 if (!net_eq(nf_ct_exp_net(exp), net))
542 if (iter(exp, data) && del_timer(&exp->timeout)) {
543 nf_ct_unlink_expect_report(exp, portid, report);
544 nf_ct_expect_put(exp);
549 spin_unlock_bh(&nf_conntrack_expect_lock);
551 EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
553 #ifdef CONFIG_NF_CONNTRACK_PROCFS
554 struct ct_expect_iter_state {
555 struct seq_net_private p;
559 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
561 struct ct_expect_iter_state *st = seq->private;
562 struct hlist_node *n;
564 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
565 n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
572 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
573 struct hlist_node *head)
575 struct ct_expect_iter_state *st = seq->private;
577 head = rcu_dereference(hlist_next_rcu(head));
578 while (head == NULL) {
579 if (++st->bucket >= nf_ct_expect_hsize)
581 head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
586 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
588 struct hlist_node *head = ct_expect_get_first(seq);
591 while (pos && (head = ct_expect_get_next(seq, head)))
593 return pos ? NULL : head;
596 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
600 return ct_expect_get_idx(seq, *pos);
603 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
606 return ct_expect_get_next(seq, v);
609 static void exp_seq_stop(struct seq_file *seq, void *v)
615 static int exp_seq_show(struct seq_file *s, void *v)
617 struct nf_conntrack_expect *expect;
618 struct nf_conntrack_helper *helper;
619 struct hlist_node *n = v;
622 expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
624 if (expect->timeout.function)
625 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
626 ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
629 seq_printf(s, "l3proto = %u proto=%u ",
630 expect->tuple.src.l3num,
631 expect->tuple.dst.protonum);
632 print_tuple(s, &expect->tuple,
633 nf_ct_l4proto_find(expect->tuple.dst.protonum));
635 if (expect->flags & NF_CT_EXPECT_PERMANENT) {
636 seq_puts(s, "PERMANENT");
639 if (expect->flags & NF_CT_EXPECT_INACTIVE) {
640 seq_printf(s, "%sINACTIVE", delim);
643 if (expect->flags & NF_CT_EXPECT_USERSPACE)
644 seq_printf(s, "%sUSERSPACE", delim);
646 helper = rcu_dereference(nfct_help(expect->master)->helper);
648 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
649 if (helper->expect_policy[expect->class].name[0])
651 helper->expect_policy[expect->class].name);
659 static const struct seq_operations exp_seq_ops = {
660 .start = exp_seq_start,
661 .next = exp_seq_next,
662 .stop = exp_seq_stop,
665 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
667 static int exp_proc_init(struct net *net)
669 #ifdef CONFIG_NF_CONNTRACK_PROCFS
670 struct proc_dir_entry *proc;
674 proc = proc_create_net("nf_conntrack_expect", 0440, net->proc_net,
675 &exp_seq_ops, sizeof(struct ct_expect_iter_state));
679 root_uid = make_kuid(net->user_ns, 0);
680 root_gid = make_kgid(net->user_ns, 0);
681 if (uid_valid(root_uid) && gid_valid(root_gid))
682 proc_set_user(proc, root_uid, root_gid);
683 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
687 static void exp_proc_remove(struct net *net)
689 #ifdef CONFIG_NF_CONNTRACK_PROCFS
690 remove_proc_entry("nf_conntrack_expect", net->proc_net);
691 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
694 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
696 int nf_conntrack_expect_pernet_init(struct net *net)
698 return exp_proc_init(net);
701 void nf_conntrack_expect_pernet_fini(struct net *net)
703 exp_proc_remove(net);
706 int nf_conntrack_expect_init(void)
708 if (!nf_ct_expect_hsize) {
709 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
710 if (!nf_ct_expect_hsize)
711 nf_ct_expect_hsize = 1;
713 nf_ct_expect_max = nf_ct_expect_hsize * 4;
714 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
715 sizeof(struct nf_conntrack_expect),
717 if (!nf_ct_expect_cachep)
720 nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
721 if (!nf_ct_expect_hash) {
722 kmem_cache_destroy(nf_ct_expect_cachep);
729 void nf_conntrack_expect_fini(void)
731 rcu_barrier(); /* Wait for call_rcu() before destroy */
732 kmem_cache_destroy(nf_ct_expect_cachep);
733 kvfree(nf_ct_expect_hash);