1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Expectation handling for nf_conntrack. */
4 /* (C) 1999-2001 Paul `Rusty' Russell
5 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
6 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
7 * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
10 #include <linux/types.h>
11 #include <linux/netfilter.h>
12 #include <linux/skbuff.h>
13 #include <linux/proc_fs.h>
14 #include <linux/seq_file.h>
15 #include <linux/stddef.h>
16 #include <linux/slab.h>
17 #include <linux/err.h>
18 #include <linux/percpu.h>
19 #include <linux/kernel.h>
20 #include <linux/jhash.h>
21 #include <linux/moduleparam.h>
22 #include <linux/export.h>
23 #include <net/net_namespace.h>
24 #include <net/netns/hash.h>
26 #include <net/netfilter/nf_conntrack.h>
27 #include <net/netfilter/nf_conntrack_core.h>
28 #include <net/netfilter/nf_conntrack_ecache.h>
29 #include <net/netfilter/nf_conntrack_expect.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_l4proto.h>
32 #include <net/netfilter/nf_conntrack_tuple.h>
33 #include <net/netfilter/nf_conntrack_zones.h>
35 unsigned int nf_ct_expect_hsize __read_mostly;
36 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
38 struct hlist_head *nf_ct_expect_hash __read_mostly;
39 EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
41 unsigned int nf_ct_expect_max __read_mostly;
43 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
44 static unsigned int nf_ct_expect_hashrnd __read_mostly;
46 /* nf_conntrack_expect helper functions */
47 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
48 u32 portid, int report)
50 struct nf_conn_help *master_help = nfct_help(exp->master);
51 struct net *net = nf_ct_exp_net(exp);
53 WARN_ON(!master_help);
54 WARN_ON(timer_pending(&exp->timeout));
56 hlist_del_rcu(&exp->hnode);
57 net->ct.expect_count--;
59 hlist_del_rcu(&exp->lnode);
60 master_help->expecting[exp->class]--;
62 nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
63 nf_ct_expect_put(exp);
65 NF_CT_STAT_INC(net, expect_delete);
67 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
69 static void nf_ct_expectation_timed_out(struct timer_list *t)
71 struct nf_conntrack_expect *exp = from_timer(exp, t, timeout);
73 spin_lock_bh(&nf_conntrack_expect_lock);
74 nf_ct_unlink_expect(exp);
75 spin_unlock_bh(&nf_conntrack_expect_lock);
76 nf_ct_expect_put(exp);
79 static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
81 unsigned int hash, seed;
83 get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
85 seed = nf_ct_expect_hashrnd ^ net_hash_mix(n);
87 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
88 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
89 (__force __u16)tuple->dst.u.all) ^ seed);
91 return reciprocal_scale(hash, nf_ct_expect_hsize);
95 nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
96 const struct nf_conntrack_expect *i,
97 const struct nf_conntrack_zone *zone,
98 const struct net *net)
100 return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
101 net_eq(net, nf_ct_net(i->master)) &&
102 nf_ct_zone_equal_any(i->master, zone);
105 bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
107 if (del_timer(&exp->timeout)) {
108 nf_ct_unlink_expect(exp);
109 nf_ct_expect_put(exp);
114 EXPORT_SYMBOL_GPL(nf_ct_remove_expect);
116 struct nf_conntrack_expect *
117 __nf_ct_expect_find(struct net *net,
118 const struct nf_conntrack_zone *zone,
119 const struct nf_conntrack_tuple *tuple)
121 struct nf_conntrack_expect *i;
124 if (!net->ct.expect_count)
127 h = nf_ct_expect_dst_hash(net, tuple);
128 hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
129 if (nf_ct_exp_equal(tuple, i, zone, net))
134 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
136 /* Just find a expectation corresponding to a tuple. */
137 struct nf_conntrack_expect *
138 nf_ct_expect_find_get(struct net *net,
139 const struct nf_conntrack_zone *zone,
140 const struct nf_conntrack_tuple *tuple)
142 struct nf_conntrack_expect *i;
145 i = __nf_ct_expect_find(net, zone, tuple);
146 if (i && !refcount_inc_not_zero(&i->use))
152 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
154 /* If an expectation for this connection is found, it gets delete from
155 * global list then returned. */
156 struct nf_conntrack_expect *
157 nf_ct_find_expectation(struct net *net,
158 const struct nf_conntrack_zone *zone,
159 const struct nf_conntrack_tuple *tuple)
161 struct nf_conntrack_expect *i, *exp = NULL;
164 if (!net->ct.expect_count)
167 h = nf_ct_expect_dst_hash(net, tuple);
168 hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) {
169 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
170 nf_ct_exp_equal(tuple, i, zone, net)) {
178 /* If master is not in hash table yet (ie. packet hasn't left
179 this machine yet), how can other end know about expected?
180 Hence these are not the droids you are looking for (if
181 master ct never got confirmed, we'd hold a reference to it
182 and weird things would happen to future packets). */
183 if (!nf_ct_is_confirmed(exp->master))
186 /* Avoid race with other CPUs, that for exp->master ct, is
187 * about to invoke ->destroy(), or nf_ct_delete() via timeout
190 * The atomic_inc_not_zero() check tells: If that fails, we
191 * know that the ct is being destroyed. If it succeeds, we
192 * can be sure the ct cannot disappear underneath.
194 if (unlikely(nf_ct_is_dying(exp->master) ||
195 !atomic_inc_not_zero(&exp->master->ct_general.use)))
198 if (exp->flags & NF_CT_EXPECT_PERMANENT) {
199 refcount_inc(&exp->use);
201 } else if (del_timer(&exp->timeout)) {
202 nf_ct_unlink_expect(exp);
205 /* Undo exp->master refcnt increase, if del_timer() failed */
206 nf_ct_put(exp->master);
211 /* delete all expectations for this conntrack */
212 void nf_ct_remove_expectations(struct nf_conn *ct)
214 struct nf_conn_help *help = nfct_help(ct);
215 struct nf_conntrack_expect *exp;
216 struct hlist_node *next;
218 /* Optimization: most connection never expect any others. */
222 spin_lock_bh(&nf_conntrack_expect_lock);
223 hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
224 nf_ct_remove_expect(exp);
226 spin_unlock_bh(&nf_conntrack_expect_lock);
228 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
230 /* Would two expected things clash? */
231 static inline int expect_clash(const struct nf_conntrack_expect *a,
232 const struct nf_conntrack_expect *b)
234 /* Part covered by intersection of masks must be unequal,
235 otherwise they clash */
236 struct nf_conntrack_tuple_mask intersect_mask;
239 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
241 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
242 intersect_mask.src.u3.all[count] =
243 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
246 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
247 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
248 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
251 static inline int expect_matches(const struct nf_conntrack_expect *a,
252 const struct nf_conntrack_expect *b)
254 return nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
255 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
256 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
257 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
260 static bool master_matches(const struct nf_conntrack_expect *a,
261 const struct nf_conntrack_expect *b,
264 if (flags & NF_CT_EXP_F_SKIP_MASTER)
267 return a->master == b->master;
270 /* Generally a bad idea to call this: could have matched already. */
271 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
273 spin_lock_bh(&nf_conntrack_expect_lock);
274 nf_ct_remove_expect(exp);
275 spin_unlock_bh(&nf_conntrack_expect_lock);
277 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
279 /* We don't increase the master conntrack refcount for non-fulfilled
280 * conntracks. During the conntrack destruction, the expectations are
281 * always killed before the conntrack itself */
282 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
284 struct nf_conntrack_expect *new;
286 new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
291 refcount_set(&new->use, 1);
294 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
296 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
298 const union nf_inet_addr *saddr,
299 const union nf_inet_addr *daddr,
300 u_int8_t proto, const __be16 *src, const __be16 *dst)
304 if (family == AF_INET)
311 exp->expectfn = NULL;
313 exp->tuple.src.l3num = family;
314 exp->tuple.dst.protonum = proto;
317 memcpy(&exp->tuple.src.u3, saddr, len);
318 if (sizeof(exp->tuple.src.u3) > len)
319 /* address needs to be cleared for nf_ct_tuple_equal */
320 memset((void *)&exp->tuple.src.u3 + len, 0x00,
321 sizeof(exp->tuple.src.u3) - len);
322 memset(&exp->mask.src.u3, 0xFF, len);
323 if (sizeof(exp->mask.src.u3) > len)
324 memset((void *)&exp->mask.src.u3 + len, 0x00,
325 sizeof(exp->mask.src.u3) - len);
327 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
328 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
332 exp->tuple.src.u.all = *src;
333 exp->mask.src.u.all = htons(0xFFFF);
335 exp->tuple.src.u.all = 0;
336 exp->mask.src.u.all = 0;
339 memcpy(&exp->tuple.dst.u3, daddr, len);
340 if (sizeof(exp->tuple.dst.u3) > len)
341 /* address needs to be cleared for nf_ct_tuple_equal */
342 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
343 sizeof(exp->tuple.dst.u3) - len);
345 exp->tuple.dst.u.all = *dst;
347 #if IS_ENABLED(CONFIG_NF_NAT)
348 memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
349 memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
352 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
354 static void nf_ct_expect_free_rcu(struct rcu_head *head)
356 struct nf_conntrack_expect *exp;
358 exp = container_of(head, struct nf_conntrack_expect, rcu);
359 kmem_cache_free(nf_ct_expect_cachep, exp);
362 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
364 if (refcount_dec_and_test(&exp->use))
365 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
367 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
369 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
371 struct nf_conn_help *master_help = nfct_help(exp->master);
372 struct nf_conntrack_helper *helper;
373 struct net *net = nf_ct_exp_net(exp);
374 unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
376 /* two references : one for hash insert, one for the timer */
377 refcount_add(2, &exp->use);
379 timer_setup(&exp->timeout, nf_ct_expectation_timed_out, 0);
380 helper = rcu_dereference_protected(master_help->helper,
381 lockdep_is_held(&nf_conntrack_expect_lock));
383 exp->timeout.expires = jiffies +
384 helper->expect_policy[exp->class].timeout * HZ;
386 add_timer(&exp->timeout);
388 hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
389 master_help->expecting[exp->class]++;
391 hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
392 net->ct.expect_count++;
394 NF_CT_STAT_INC(net, expect_create);
397 /* Race with expectations being used means we could have none to find; OK. */
398 static void evict_oldest_expect(struct nf_conn *master,
399 struct nf_conntrack_expect *new)
401 struct nf_conn_help *master_help = nfct_help(master);
402 struct nf_conntrack_expect *exp, *last = NULL;
404 hlist_for_each_entry(exp, &master_help->expectations, lnode) {
405 if (exp->class == new->class)
410 nf_ct_remove_expect(last);
413 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
416 const struct nf_conntrack_expect_policy *p;
417 struct nf_conntrack_expect *i;
418 struct nf_conn *master = expect->master;
419 struct nf_conn_help *master_help = nfct_help(master);
420 struct nf_conntrack_helper *helper;
421 struct net *net = nf_ct_exp_net(expect);
422 struct hlist_node *next;
430 h = nf_ct_expect_dst_hash(net, &expect->tuple);
431 hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
432 if (master_matches(i, expect, flags) &&
433 expect_matches(i, expect)) {
434 if (i->class != expect->class ||
435 i->master != expect->master)
438 if (nf_ct_remove_expect(i))
440 } else if (expect_clash(i, expect)) {
445 /* Will be over limit? */
446 helper = rcu_dereference_protected(master_help->helper,
447 lockdep_is_held(&nf_conntrack_expect_lock));
449 p = &helper->expect_policy[expect->class];
450 if (p->max_expected &&
451 master_help->expecting[expect->class] >= p->max_expected) {
452 evict_oldest_expect(master, expect);
453 if (master_help->expecting[expect->class]
454 >= p->max_expected) {
461 if (net->ct.expect_count >= nf_ct_expect_max) {
462 net_warn_ratelimited("nf_conntrack: expectation table full\n");
469 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
470 u32 portid, int report, unsigned int flags)
474 spin_lock_bh(&nf_conntrack_expect_lock);
475 ret = __nf_ct_expect_check(expect, flags);
479 nf_ct_expect_insert(expect);
481 spin_unlock_bh(&nf_conntrack_expect_lock);
482 nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
485 spin_unlock_bh(&nf_conntrack_expect_lock);
488 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
490 void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
493 struct nf_conntrack_expect *exp;
494 const struct hlist_node *next;
497 spin_lock_bh(&nf_conntrack_expect_lock);
499 for (i = 0; i < nf_ct_expect_hsize; i++) {
500 hlist_for_each_entry_safe(exp, next,
501 &nf_ct_expect_hash[i],
503 if (iter(exp, data) && del_timer(&exp->timeout)) {
504 nf_ct_unlink_expect(exp);
505 nf_ct_expect_put(exp);
510 spin_unlock_bh(&nf_conntrack_expect_lock);
512 EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
514 void nf_ct_expect_iterate_net(struct net *net,
515 bool (*iter)(struct nf_conntrack_expect *e, void *data),
517 u32 portid, int report)
519 struct nf_conntrack_expect *exp;
520 const struct hlist_node *next;
523 spin_lock_bh(&nf_conntrack_expect_lock);
525 for (i = 0; i < nf_ct_expect_hsize; i++) {
526 hlist_for_each_entry_safe(exp, next,
527 &nf_ct_expect_hash[i],
530 if (!net_eq(nf_ct_exp_net(exp), net))
533 if (iter(exp, data) && del_timer(&exp->timeout)) {
534 nf_ct_unlink_expect_report(exp, portid, report);
535 nf_ct_expect_put(exp);
540 spin_unlock_bh(&nf_conntrack_expect_lock);
542 EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
544 #ifdef CONFIG_NF_CONNTRACK_PROCFS
545 struct ct_expect_iter_state {
546 struct seq_net_private p;
550 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
552 struct ct_expect_iter_state *st = seq->private;
553 struct hlist_node *n;
555 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
556 n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
563 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
564 struct hlist_node *head)
566 struct ct_expect_iter_state *st = seq->private;
568 head = rcu_dereference(hlist_next_rcu(head));
569 while (head == NULL) {
570 if (++st->bucket >= nf_ct_expect_hsize)
572 head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
577 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
579 struct hlist_node *head = ct_expect_get_first(seq);
582 while (pos && (head = ct_expect_get_next(seq, head)))
584 return pos ? NULL : head;
587 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
591 return ct_expect_get_idx(seq, *pos);
594 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
597 return ct_expect_get_next(seq, v);
600 static void exp_seq_stop(struct seq_file *seq, void *v)
606 static int exp_seq_show(struct seq_file *s, void *v)
608 struct nf_conntrack_expect *expect;
609 struct nf_conntrack_helper *helper;
610 struct hlist_node *n = v;
613 expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
615 if (expect->timeout.function)
616 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
617 ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
620 seq_printf(s, "l3proto = %u proto=%u ",
621 expect->tuple.src.l3num,
622 expect->tuple.dst.protonum);
623 print_tuple(s, &expect->tuple,
624 nf_ct_l4proto_find(expect->tuple.dst.protonum));
626 if (expect->flags & NF_CT_EXPECT_PERMANENT) {
627 seq_puts(s, "PERMANENT");
630 if (expect->flags & NF_CT_EXPECT_INACTIVE) {
631 seq_printf(s, "%sINACTIVE", delim);
634 if (expect->flags & NF_CT_EXPECT_USERSPACE)
635 seq_printf(s, "%sUSERSPACE", delim);
637 helper = rcu_dereference(nfct_help(expect->master)->helper);
639 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
640 if (helper->expect_policy[expect->class].name[0])
642 helper->expect_policy[expect->class].name);
650 static const struct seq_operations exp_seq_ops = {
651 .start = exp_seq_start,
652 .next = exp_seq_next,
653 .stop = exp_seq_stop,
656 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
658 static int exp_proc_init(struct net *net)
660 #ifdef CONFIG_NF_CONNTRACK_PROCFS
661 struct proc_dir_entry *proc;
665 proc = proc_create_net("nf_conntrack_expect", 0440, net->proc_net,
666 &exp_seq_ops, sizeof(struct ct_expect_iter_state));
670 root_uid = make_kuid(net->user_ns, 0);
671 root_gid = make_kgid(net->user_ns, 0);
672 if (uid_valid(root_uid) && gid_valid(root_gid))
673 proc_set_user(proc, root_uid, root_gid);
674 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
678 static void exp_proc_remove(struct net *net)
680 #ifdef CONFIG_NF_CONNTRACK_PROCFS
681 remove_proc_entry("nf_conntrack_expect", net->proc_net);
682 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
685 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
687 int nf_conntrack_expect_pernet_init(struct net *net)
689 net->ct.expect_count = 0;
690 return exp_proc_init(net);
693 void nf_conntrack_expect_pernet_fini(struct net *net)
695 exp_proc_remove(net);
698 int nf_conntrack_expect_init(void)
700 if (!nf_ct_expect_hsize) {
701 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
702 if (!nf_ct_expect_hsize)
703 nf_ct_expect_hsize = 1;
705 nf_ct_expect_max = nf_ct_expect_hsize * 4;
706 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
707 sizeof(struct nf_conntrack_expect),
709 if (!nf_ct_expect_cachep)
712 nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
713 if (!nf_ct_expect_hash) {
714 kmem_cache_destroy(nf_ct_expect_cachep);
721 void nf_conntrack_expect_fini(void)
723 rcu_barrier(); /* Wait for call_rcu() before destroy */
724 kmem_cache_destroy(nf_ct_expect_cachep);
725 kvfree(nf_ct_expect_hash);