1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Expectation handling for nf_conntrack. */
4 /* (C) 1999-2001 Paul `Rusty' Russell
5 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
6 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
7 * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
10 #include <linux/types.h>
11 #include <linux/netfilter.h>
12 #include <linux/skbuff.h>
13 #include <linux/proc_fs.h>
14 #include <linux/seq_file.h>
15 #include <linux/stddef.h>
16 #include <linux/slab.h>
17 #include <linux/err.h>
18 #include <linux/percpu.h>
19 #include <linux/kernel.h>
20 #include <linux/jhash.h>
21 #include <linux/moduleparam.h>
22 #include <linux/export.h>
23 #include <net/net_namespace.h>
24 #include <net/netns/hash.h>
26 #include <net/netfilter/nf_conntrack.h>
27 #include <net/netfilter/nf_conntrack_core.h>
28 #include <net/netfilter/nf_conntrack_ecache.h>
29 #include <net/netfilter/nf_conntrack_expect.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_l4proto.h>
32 #include <net/netfilter/nf_conntrack_tuple.h>
33 #include <net/netfilter/nf_conntrack_zones.h>
35 unsigned int nf_ct_expect_hsize __read_mostly;
36 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
38 struct hlist_head *nf_ct_expect_hash __read_mostly;
39 EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
41 unsigned int nf_ct_expect_max __read_mostly;
43 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
44 static unsigned int nf_ct_expect_hashrnd __read_mostly;
46 extern unsigned int nf_conntrack_net_id;
48 /* nf_conntrack_expect helper functions */
49 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
50 u32 portid, int report)
52 struct nf_conn_help *master_help = nfct_help(exp->master);
53 struct net *net = nf_ct_exp_net(exp);
54 struct nf_conntrack_net *cnet;
56 WARN_ON(!master_help);
57 WARN_ON(timer_pending(&exp->timeout));
59 hlist_del_rcu(&exp->hnode);
61 cnet = net_generic(net, nf_conntrack_net_id);
64 hlist_del_rcu(&exp->lnode);
65 master_help->expecting[exp->class]--;
67 nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
68 nf_ct_expect_put(exp);
70 NF_CT_STAT_INC(net, expect_delete);
72 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
74 static void nf_ct_expectation_timed_out(struct timer_list *t)
76 struct nf_conntrack_expect *exp = from_timer(exp, t, timeout);
78 spin_lock_bh(&nf_conntrack_expect_lock);
79 nf_ct_unlink_expect(exp);
80 spin_unlock_bh(&nf_conntrack_expect_lock);
81 nf_ct_expect_put(exp);
84 static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
86 unsigned int hash, seed;
88 get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
90 seed = nf_ct_expect_hashrnd ^ net_hash_mix(n);
92 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
93 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
94 (__force __u16)tuple->dst.u.all) ^ seed);
96 return reciprocal_scale(hash, nf_ct_expect_hsize);
100 nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
101 const struct nf_conntrack_expect *i,
102 const struct nf_conntrack_zone *zone,
103 const struct net *net)
105 return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
106 net_eq(net, nf_ct_net(i->master)) &&
107 nf_ct_zone_equal_any(i->master, zone);
110 bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
112 if (del_timer(&exp->timeout)) {
113 nf_ct_unlink_expect(exp);
114 nf_ct_expect_put(exp);
119 EXPORT_SYMBOL_GPL(nf_ct_remove_expect);
121 struct nf_conntrack_expect *
122 __nf_ct_expect_find(struct net *net,
123 const struct nf_conntrack_zone *zone,
124 const struct nf_conntrack_tuple *tuple)
126 struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
127 struct nf_conntrack_expect *i;
130 if (!cnet->expect_count)
133 h = nf_ct_expect_dst_hash(net, tuple);
134 hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
135 if (nf_ct_exp_equal(tuple, i, zone, net))
140 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
142 /* Just find a expectation corresponding to a tuple. */
143 struct nf_conntrack_expect *
144 nf_ct_expect_find_get(struct net *net,
145 const struct nf_conntrack_zone *zone,
146 const struct nf_conntrack_tuple *tuple)
148 struct nf_conntrack_expect *i;
151 i = __nf_ct_expect_find(net, zone, tuple);
152 if (i && !refcount_inc_not_zero(&i->use))
158 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
160 /* If an expectation for this connection is found, it gets delete from
161 * global list then returned. */
162 struct nf_conntrack_expect *
163 nf_ct_find_expectation(struct net *net,
164 const struct nf_conntrack_zone *zone,
165 const struct nf_conntrack_tuple *tuple)
167 struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
168 struct nf_conntrack_expect *i, *exp = NULL;
171 if (!cnet->expect_count)
174 h = nf_ct_expect_dst_hash(net, tuple);
175 hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) {
176 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
177 nf_ct_exp_equal(tuple, i, zone, net)) {
185 /* If master is not in hash table yet (ie. packet hasn't left
186 this machine yet), how can other end know about expected?
187 Hence these are not the droids you are looking for (if
188 master ct never got confirmed, we'd hold a reference to it
189 and weird things would happen to future packets). */
190 if (!nf_ct_is_confirmed(exp->master))
193 /* Avoid race with other CPUs, that for exp->master ct, is
194 * about to invoke ->destroy(), or nf_ct_delete() via timeout
197 * The atomic_inc_not_zero() check tells: If that fails, we
198 * know that the ct is being destroyed. If it succeeds, we
199 * can be sure the ct cannot disappear underneath.
201 if (unlikely(nf_ct_is_dying(exp->master) ||
202 !atomic_inc_not_zero(&exp->master->ct_general.use)))
205 if (exp->flags & NF_CT_EXPECT_PERMANENT) {
206 refcount_inc(&exp->use);
208 } else if (del_timer(&exp->timeout)) {
209 nf_ct_unlink_expect(exp);
212 /* Undo exp->master refcnt increase, if del_timer() failed */
213 nf_ct_put(exp->master);
218 /* delete all expectations for this conntrack */
219 void nf_ct_remove_expectations(struct nf_conn *ct)
221 struct nf_conn_help *help = nfct_help(ct);
222 struct nf_conntrack_expect *exp;
223 struct hlist_node *next;
225 /* Optimization: most connection never expect any others. */
229 spin_lock_bh(&nf_conntrack_expect_lock);
230 hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
231 nf_ct_remove_expect(exp);
233 spin_unlock_bh(&nf_conntrack_expect_lock);
235 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
237 /* Would two expected things clash? */
238 static inline int expect_clash(const struct nf_conntrack_expect *a,
239 const struct nf_conntrack_expect *b)
241 /* Part covered by intersection of masks must be unequal,
242 otherwise they clash */
243 struct nf_conntrack_tuple_mask intersect_mask;
246 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
248 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
249 intersect_mask.src.u3.all[count] =
250 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
253 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
254 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
255 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
258 static inline int expect_matches(const struct nf_conntrack_expect *a,
259 const struct nf_conntrack_expect *b)
261 return nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
262 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
263 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
264 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
267 static bool master_matches(const struct nf_conntrack_expect *a,
268 const struct nf_conntrack_expect *b,
271 if (flags & NF_CT_EXP_F_SKIP_MASTER)
274 return a->master == b->master;
277 /* Generally a bad idea to call this: could have matched already. */
278 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
280 spin_lock_bh(&nf_conntrack_expect_lock);
281 nf_ct_remove_expect(exp);
282 spin_unlock_bh(&nf_conntrack_expect_lock);
284 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
286 /* We don't increase the master conntrack refcount for non-fulfilled
287 * conntracks. During the conntrack destruction, the expectations are
288 * always killed before the conntrack itself */
289 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
291 struct nf_conntrack_expect *new;
293 new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
298 refcount_set(&new->use, 1);
301 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
303 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
305 const union nf_inet_addr *saddr,
306 const union nf_inet_addr *daddr,
307 u_int8_t proto, const __be16 *src, const __be16 *dst)
311 if (family == AF_INET)
318 exp->expectfn = NULL;
320 exp->tuple.src.l3num = family;
321 exp->tuple.dst.protonum = proto;
324 memcpy(&exp->tuple.src.u3, saddr, len);
325 if (sizeof(exp->tuple.src.u3) > len)
326 /* address needs to be cleared for nf_ct_tuple_equal */
327 memset((void *)&exp->tuple.src.u3 + len, 0x00,
328 sizeof(exp->tuple.src.u3) - len);
329 memset(&exp->mask.src.u3, 0xFF, len);
330 if (sizeof(exp->mask.src.u3) > len)
331 memset((void *)&exp->mask.src.u3 + len, 0x00,
332 sizeof(exp->mask.src.u3) - len);
334 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
335 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
339 exp->tuple.src.u.all = *src;
340 exp->mask.src.u.all = htons(0xFFFF);
342 exp->tuple.src.u.all = 0;
343 exp->mask.src.u.all = 0;
346 memcpy(&exp->tuple.dst.u3, daddr, len);
347 if (sizeof(exp->tuple.dst.u3) > len)
348 /* address needs to be cleared for nf_ct_tuple_equal */
349 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
350 sizeof(exp->tuple.dst.u3) - len);
352 exp->tuple.dst.u.all = *dst;
354 #if IS_ENABLED(CONFIG_NF_NAT)
355 memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
356 memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
359 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
361 static void nf_ct_expect_free_rcu(struct rcu_head *head)
363 struct nf_conntrack_expect *exp;
365 exp = container_of(head, struct nf_conntrack_expect, rcu);
366 kmem_cache_free(nf_ct_expect_cachep, exp);
369 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
371 if (refcount_dec_and_test(&exp->use))
372 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
374 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
376 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
378 struct nf_conntrack_net *cnet;
379 struct nf_conn_help *master_help = nfct_help(exp->master);
380 struct nf_conntrack_helper *helper;
381 struct net *net = nf_ct_exp_net(exp);
382 unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
384 /* two references : one for hash insert, one for the timer */
385 refcount_add(2, &exp->use);
387 timer_setup(&exp->timeout, nf_ct_expectation_timed_out, 0);
388 helper = rcu_dereference_protected(master_help->helper,
389 lockdep_is_held(&nf_conntrack_expect_lock));
391 exp->timeout.expires = jiffies +
392 helper->expect_policy[exp->class].timeout * HZ;
394 add_timer(&exp->timeout);
396 hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
397 master_help->expecting[exp->class]++;
399 hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
400 cnet = net_generic(net, nf_conntrack_net_id);
401 cnet->expect_count++;
403 NF_CT_STAT_INC(net, expect_create);
406 /* Race with expectations being used means we could have none to find; OK. */
407 static void evict_oldest_expect(struct nf_conn *master,
408 struct nf_conntrack_expect *new)
410 struct nf_conn_help *master_help = nfct_help(master);
411 struct nf_conntrack_expect *exp, *last = NULL;
413 hlist_for_each_entry(exp, &master_help->expectations, lnode) {
414 if (exp->class == new->class)
419 nf_ct_remove_expect(last);
422 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
425 const struct nf_conntrack_expect_policy *p;
426 struct nf_conntrack_expect *i;
427 struct nf_conntrack_net *cnet;
428 struct nf_conn *master = expect->master;
429 struct nf_conn_help *master_help = nfct_help(master);
430 struct nf_conntrack_helper *helper;
431 struct net *net = nf_ct_exp_net(expect);
432 struct hlist_node *next;
440 h = nf_ct_expect_dst_hash(net, &expect->tuple);
441 hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
442 if (master_matches(i, expect, flags) &&
443 expect_matches(i, expect)) {
444 if (i->class != expect->class ||
445 i->master != expect->master)
448 if (nf_ct_remove_expect(i))
450 } else if (expect_clash(i, expect)) {
455 /* Will be over limit? */
456 helper = rcu_dereference_protected(master_help->helper,
457 lockdep_is_held(&nf_conntrack_expect_lock));
459 p = &helper->expect_policy[expect->class];
460 if (p->max_expected &&
461 master_help->expecting[expect->class] >= p->max_expected) {
462 evict_oldest_expect(master, expect);
463 if (master_help->expecting[expect->class]
464 >= p->max_expected) {
471 cnet = net_generic(net, nf_conntrack_net_id);
472 if (cnet->expect_count >= nf_ct_expect_max) {
473 net_warn_ratelimited("nf_conntrack: expectation table full\n");
480 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
481 u32 portid, int report, unsigned int flags)
485 spin_lock_bh(&nf_conntrack_expect_lock);
486 ret = __nf_ct_expect_check(expect, flags);
490 nf_ct_expect_insert(expect);
492 spin_unlock_bh(&nf_conntrack_expect_lock);
493 nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
496 spin_unlock_bh(&nf_conntrack_expect_lock);
499 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
501 void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
504 struct nf_conntrack_expect *exp;
505 const struct hlist_node *next;
508 spin_lock_bh(&nf_conntrack_expect_lock);
510 for (i = 0; i < nf_ct_expect_hsize; i++) {
511 hlist_for_each_entry_safe(exp, next,
512 &nf_ct_expect_hash[i],
514 if (iter(exp, data) && del_timer(&exp->timeout)) {
515 nf_ct_unlink_expect(exp);
516 nf_ct_expect_put(exp);
521 spin_unlock_bh(&nf_conntrack_expect_lock);
523 EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
525 void nf_ct_expect_iterate_net(struct net *net,
526 bool (*iter)(struct nf_conntrack_expect *e, void *data),
528 u32 portid, int report)
530 struct nf_conntrack_expect *exp;
531 const struct hlist_node *next;
534 spin_lock_bh(&nf_conntrack_expect_lock);
536 for (i = 0; i < nf_ct_expect_hsize; i++) {
537 hlist_for_each_entry_safe(exp, next,
538 &nf_ct_expect_hash[i],
541 if (!net_eq(nf_ct_exp_net(exp), net))
544 if (iter(exp, data) && del_timer(&exp->timeout)) {
545 nf_ct_unlink_expect_report(exp, portid, report);
546 nf_ct_expect_put(exp);
551 spin_unlock_bh(&nf_conntrack_expect_lock);
553 EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
555 #ifdef CONFIG_NF_CONNTRACK_PROCFS
556 struct ct_expect_iter_state {
557 struct seq_net_private p;
561 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
563 struct ct_expect_iter_state *st = seq->private;
564 struct hlist_node *n;
566 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
567 n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
574 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
575 struct hlist_node *head)
577 struct ct_expect_iter_state *st = seq->private;
579 head = rcu_dereference(hlist_next_rcu(head));
580 while (head == NULL) {
581 if (++st->bucket >= nf_ct_expect_hsize)
583 head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
588 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
590 struct hlist_node *head = ct_expect_get_first(seq);
593 while (pos && (head = ct_expect_get_next(seq, head)))
595 return pos ? NULL : head;
598 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
602 return ct_expect_get_idx(seq, *pos);
605 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
608 return ct_expect_get_next(seq, v);
611 static void exp_seq_stop(struct seq_file *seq, void *v)
617 static int exp_seq_show(struct seq_file *s, void *v)
619 struct nf_conntrack_expect *expect;
620 struct nf_conntrack_helper *helper;
621 struct hlist_node *n = v;
624 expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
626 if (expect->timeout.function)
627 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
628 ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
631 seq_printf(s, "l3proto = %u proto=%u ",
632 expect->tuple.src.l3num,
633 expect->tuple.dst.protonum);
634 print_tuple(s, &expect->tuple,
635 nf_ct_l4proto_find(expect->tuple.dst.protonum));
637 if (expect->flags & NF_CT_EXPECT_PERMANENT) {
638 seq_puts(s, "PERMANENT");
641 if (expect->flags & NF_CT_EXPECT_INACTIVE) {
642 seq_printf(s, "%sINACTIVE", delim);
645 if (expect->flags & NF_CT_EXPECT_USERSPACE)
646 seq_printf(s, "%sUSERSPACE", delim);
648 helper = rcu_dereference(nfct_help(expect->master)->helper);
650 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
651 if (helper->expect_policy[expect->class].name[0])
653 helper->expect_policy[expect->class].name);
661 static const struct seq_operations exp_seq_ops = {
662 .start = exp_seq_start,
663 .next = exp_seq_next,
664 .stop = exp_seq_stop,
667 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
669 static int exp_proc_init(struct net *net)
671 #ifdef CONFIG_NF_CONNTRACK_PROCFS
672 struct proc_dir_entry *proc;
676 proc = proc_create_net("nf_conntrack_expect", 0440, net->proc_net,
677 &exp_seq_ops, sizeof(struct ct_expect_iter_state));
681 root_uid = make_kuid(net->user_ns, 0);
682 root_gid = make_kgid(net->user_ns, 0);
683 if (uid_valid(root_uid) && gid_valid(root_gid))
684 proc_set_user(proc, root_uid, root_gid);
685 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
689 static void exp_proc_remove(struct net *net)
691 #ifdef CONFIG_NF_CONNTRACK_PROCFS
692 remove_proc_entry("nf_conntrack_expect", net->proc_net);
693 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
696 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
698 int nf_conntrack_expect_pernet_init(struct net *net)
700 return exp_proc_init(net);
703 void nf_conntrack_expect_pernet_fini(struct net *net)
705 exp_proc_remove(net);
708 int nf_conntrack_expect_init(void)
710 if (!nf_ct_expect_hsize) {
711 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
712 if (!nf_ct_expect_hsize)
713 nf_ct_expect_hsize = 1;
715 nf_ct_expect_max = nf_ct_expect_hsize * 4;
716 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
717 sizeof(struct nf_conntrack_expect),
719 if (!nf_ct_expect_cachep)
722 nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
723 if (!nf_ct_expect_hash) {
724 kmem_cache_destroy(nf_ct_expect_cachep);
731 void nf_conntrack_expect_fini(void)
733 rcu_barrier(); /* Wait for call_rcu() before destroy */
734 kmem_cache_destroy(nf_ct_expect_cachep);
735 kvfree(nf_ct_expect_hash);