LoongArch: Parse MADT to get multi-processor information
[linux-2.6-microblaze.git] / net / netfilter / nf_flow_table_core.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/netfilter.h>
6 #include <linux/rhashtable.h>
7 #include <linux/netdevice.h>
8 #include <net/ip.h>
9 #include <net/ip6_route.h>
10 #include <net/netfilter/nf_tables.h>
11 #include <net/netfilter/nf_flow_table.h>
12 #include <net/netfilter/nf_conntrack.h>
13 #include <net/netfilter/nf_conntrack_core.h>
14 #include <net/netfilter/nf_conntrack_l4proto.h>
15 #include <net/netfilter/nf_conntrack_tuple.h>
16
17 static DEFINE_MUTEX(flowtable_lock);
18 static LIST_HEAD(flowtables);
19
20 static void
21 flow_offload_fill_dir(struct flow_offload *flow,
22                       enum flow_offload_tuple_dir dir)
23 {
24         struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
25         struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
26
27         ft->dir = dir;
28
29         switch (ctt->src.l3num) {
30         case NFPROTO_IPV4:
31                 ft->src_v4 = ctt->src.u3.in;
32                 ft->dst_v4 = ctt->dst.u3.in;
33                 break;
34         case NFPROTO_IPV6:
35                 ft->src_v6 = ctt->src.u3.in6;
36                 ft->dst_v6 = ctt->dst.u3.in6;
37                 break;
38         }
39
40         ft->l3proto = ctt->src.l3num;
41         ft->l4proto = ctt->dst.protonum;
42
43         switch (ctt->dst.protonum) {
44         case IPPROTO_TCP:
45         case IPPROTO_UDP:
46                 ft->src_port = ctt->src.u.tcp.port;
47                 ft->dst_port = ctt->dst.u.tcp.port;
48                 break;
49         }
50 }
51
52 struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
53 {
54         struct flow_offload *flow;
55
56         if (unlikely(nf_ct_is_dying(ct) ||
57             !refcount_inc_not_zero(&ct->ct_general.use)))
58                 return NULL;
59
60         flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
61         if (!flow)
62                 goto err_ct_refcnt;
63
64         flow->ct = ct;
65
66         flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
67         flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
68
69         if (ct->status & IPS_SRC_NAT)
70                 __set_bit(NF_FLOW_SNAT, &flow->flags);
71         if (ct->status & IPS_DST_NAT)
72                 __set_bit(NF_FLOW_DNAT, &flow->flags);
73
74         return flow;
75
76 err_ct_refcnt:
77         nf_ct_put(ct);
78
79         return NULL;
80 }
81 EXPORT_SYMBOL_GPL(flow_offload_alloc);
82
83 static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
84 {
85         const struct rt6_info *rt;
86
87         if (flow_tuple->l3proto == NFPROTO_IPV6) {
88                 rt = (const struct rt6_info *)flow_tuple->dst_cache;
89                 return rt6_get_cookie(rt);
90         }
91
92         return 0;
93 }
94
95 static int flow_offload_fill_route(struct flow_offload *flow,
96                                    const struct nf_flow_route *route,
97                                    enum flow_offload_tuple_dir dir)
98 {
99         struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
100         struct dst_entry *dst = route->tuple[dir].dst;
101         int i, j = 0;
102
103         switch (flow_tuple->l3proto) {
104         case NFPROTO_IPV4:
105                 flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
106                 break;
107         case NFPROTO_IPV6:
108                 flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
109                 break;
110         }
111
112         flow_tuple->iifidx = route->tuple[dir].in.ifindex;
113         for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
114                 flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
115                 flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
116                 if (route->tuple[dir].in.ingress_vlans & BIT(i))
117                         flow_tuple->in_vlan_ingress |= BIT(j);
118                 j++;
119         }
120         flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
121
122         switch (route->tuple[dir].xmit_type) {
123         case FLOW_OFFLOAD_XMIT_DIRECT:
124                 memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
125                        ETH_ALEN);
126                 memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
127                        ETH_ALEN);
128                 flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
129                 flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
130                 break;
131         case FLOW_OFFLOAD_XMIT_XFRM:
132         case FLOW_OFFLOAD_XMIT_NEIGH:
133                 if (!dst_hold_safe(route->tuple[dir].dst))
134                         return -1;
135
136                 flow_tuple->dst_cache = dst;
137                 flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
138                 break;
139         default:
140                 WARN_ON_ONCE(1);
141                 break;
142         }
143         flow_tuple->xmit_type = route->tuple[dir].xmit_type;
144
145         return 0;
146 }
147
148 static void nft_flow_dst_release(struct flow_offload *flow,
149                                  enum flow_offload_tuple_dir dir)
150 {
151         if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
152             flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
153                 dst_release(flow->tuplehash[dir].tuple.dst_cache);
154 }
155
156 int flow_offload_route_init(struct flow_offload *flow,
157                             const struct nf_flow_route *route)
158 {
159         int err;
160
161         err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
162         if (err < 0)
163                 return err;
164
165         err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
166         if (err < 0)
167                 goto err_route_reply;
168
169         flow->type = NF_FLOW_OFFLOAD_ROUTE;
170
171         return 0;
172
173 err_route_reply:
174         nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
175
176         return err;
177 }
178 EXPORT_SYMBOL_GPL(flow_offload_route_init);
179
180 static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
181 {
182         tcp->seen[0].td_maxwin = 0;
183         tcp->seen[1].td_maxwin = 0;
184 }
185
186 static void flow_offload_fixup_ct(struct nf_conn *ct)
187 {
188         struct net *net = nf_ct_net(ct);
189         int l4num = nf_ct_protonum(ct);
190         s32 timeout;
191
192         if (l4num == IPPROTO_TCP) {
193                 struct nf_tcp_net *tn = nf_tcp_pernet(net);
194
195                 flow_offload_fixup_tcp(&ct->proto.tcp);
196
197                 timeout = tn->timeouts[ct->proto.tcp.state];
198                 timeout -= tn->offload_timeout;
199         } else if (l4num == IPPROTO_UDP) {
200                 struct nf_udp_net *tn = nf_udp_pernet(net);
201
202                 timeout = tn->timeouts[UDP_CT_REPLIED];
203                 timeout -= tn->offload_timeout;
204         } else {
205                 return;
206         }
207
208         if (timeout < 0)
209                 timeout = 0;
210
211         if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
212                 WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
213 }
214
215 static void flow_offload_route_release(struct flow_offload *flow)
216 {
217         nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
218         nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
219 }
220
221 void flow_offload_free(struct flow_offload *flow)
222 {
223         switch (flow->type) {
224         case NF_FLOW_OFFLOAD_ROUTE:
225                 flow_offload_route_release(flow);
226                 break;
227         default:
228                 break;
229         }
230         nf_ct_put(flow->ct);
231         kfree_rcu(flow, rcu_head);
232 }
233 EXPORT_SYMBOL_GPL(flow_offload_free);
234
235 static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
236 {
237         const struct flow_offload_tuple *tuple = data;
238
239         return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
240 }
241
242 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
243 {
244         const struct flow_offload_tuple_rhash *tuplehash = data;
245
246         return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
247 }
248
249 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
250                                         const void *ptr)
251 {
252         const struct flow_offload_tuple *tuple = arg->key;
253         const struct flow_offload_tuple_rhash *x = ptr;
254
255         if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
256                 return 1;
257
258         return 0;
259 }
260
261 static const struct rhashtable_params nf_flow_offload_rhash_params = {
262         .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
263         .hashfn                 = flow_offload_hash,
264         .obj_hashfn             = flow_offload_hash_obj,
265         .obj_cmpfn              = flow_offload_hash_cmp,
266         .automatic_shrinking    = true,
267 };
268
269 unsigned long flow_offload_get_timeout(struct flow_offload *flow)
270 {
271         unsigned long timeout = NF_FLOW_TIMEOUT;
272         struct net *net = nf_ct_net(flow->ct);
273         int l4num = nf_ct_protonum(flow->ct);
274
275         if (l4num == IPPROTO_TCP) {
276                 struct nf_tcp_net *tn = nf_tcp_pernet(net);
277
278                 timeout = tn->offload_timeout;
279         } else if (l4num == IPPROTO_UDP) {
280                 struct nf_udp_net *tn = nf_udp_pernet(net);
281
282                 timeout = tn->offload_timeout;
283         }
284
285         return timeout;
286 }
287
288 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
289 {
290         int err;
291
292         flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
293
294         err = rhashtable_insert_fast(&flow_table->rhashtable,
295                                      &flow->tuplehash[0].node,
296                                      nf_flow_offload_rhash_params);
297         if (err < 0)
298                 return err;
299
300         err = rhashtable_insert_fast(&flow_table->rhashtable,
301                                      &flow->tuplehash[1].node,
302                                      nf_flow_offload_rhash_params);
303         if (err < 0) {
304                 rhashtable_remove_fast(&flow_table->rhashtable,
305                                        &flow->tuplehash[0].node,
306                                        nf_flow_offload_rhash_params);
307                 return err;
308         }
309
310         nf_ct_offload_timeout(flow->ct);
311
312         if (nf_flowtable_hw_offload(flow_table)) {
313                 __set_bit(NF_FLOW_HW, &flow->flags);
314                 nf_flow_offload_add(flow_table, flow);
315         }
316
317         return 0;
318 }
319 EXPORT_SYMBOL_GPL(flow_offload_add);
320
321 void flow_offload_refresh(struct nf_flowtable *flow_table,
322                           struct flow_offload *flow)
323 {
324         u32 timeout;
325
326         timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
327         if (timeout - READ_ONCE(flow->timeout) > HZ)
328                 WRITE_ONCE(flow->timeout, timeout);
329         else
330                 return;
331
332         if (likely(!nf_flowtable_hw_offload(flow_table)))
333                 return;
334
335         nf_flow_offload_add(flow_table, flow);
336 }
337 EXPORT_SYMBOL_GPL(flow_offload_refresh);
338
339 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
340 {
341         return nf_flow_timeout_delta(flow->timeout) <= 0;
342 }
343
344 static void flow_offload_del(struct nf_flowtable *flow_table,
345                              struct flow_offload *flow)
346 {
347         rhashtable_remove_fast(&flow_table->rhashtable,
348                                &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
349                                nf_flow_offload_rhash_params);
350         rhashtable_remove_fast(&flow_table->rhashtable,
351                                &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
352                                nf_flow_offload_rhash_params);
353         flow_offload_free(flow);
354 }
355
356 void flow_offload_teardown(struct flow_offload *flow)
357 {
358         clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
359         set_bit(NF_FLOW_TEARDOWN, &flow->flags);
360         flow_offload_fixup_ct(flow->ct);
361 }
362 EXPORT_SYMBOL_GPL(flow_offload_teardown);
363
364 struct flow_offload_tuple_rhash *
365 flow_offload_lookup(struct nf_flowtable *flow_table,
366                     struct flow_offload_tuple *tuple)
367 {
368         struct flow_offload_tuple_rhash *tuplehash;
369         struct flow_offload *flow;
370         int dir;
371
372         tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
373                                       nf_flow_offload_rhash_params);
374         if (!tuplehash)
375                 return NULL;
376
377         dir = tuplehash->tuple.dir;
378         flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
379         if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
380                 return NULL;
381
382         if (unlikely(nf_ct_is_dying(flow->ct)))
383                 return NULL;
384
385         return tuplehash;
386 }
387 EXPORT_SYMBOL_GPL(flow_offload_lookup);
388
389 static int
390 nf_flow_table_iterate(struct nf_flowtable *flow_table,
391                       void (*iter)(struct nf_flowtable *flowtable,
392                                    struct flow_offload *flow, void *data),
393                       void *data)
394 {
395         struct flow_offload_tuple_rhash *tuplehash;
396         struct rhashtable_iter hti;
397         struct flow_offload *flow;
398         int err = 0;
399
400         rhashtable_walk_enter(&flow_table->rhashtable, &hti);
401         rhashtable_walk_start(&hti);
402
403         while ((tuplehash = rhashtable_walk_next(&hti))) {
404                 if (IS_ERR(tuplehash)) {
405                         if (PTR_ERR(tuplehash) != -EAGAIN) {
406                                 err = PTR_ERR(tuplehash);
407                                 break;
408                         }
409                         continue;
410                 }
411                 if (tuplehash->tuple.dir)
412                         continue;
413
414                 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
415
416                 iter(flow_table, flow, data);
417         }
418         rhashtable_walk_stop(&hti);
419         rhashtable_walk_exit(&hti);
420
421         return err;
422 }
423
424 static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
425                                     struct flow_offload *flow, void *data)
426 {
427         if (nf_flow_has_expired(flow) ||
428             nf_ct_is_dying(flow->ct))
429                 flow_offload_teardown(flow);
430
431         if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
432                 if (test_bit(NF_FLOW_HW, &flow->flags)) {
433                         if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
434                                 nf_flow_offload_del(flow_table, flow);
435                         else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
436                                 flow_offload_del(flow_table, flow);
437                 } else {
438                         flow_offload_del(flow_table, flow);
439                 }
440         } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
441                 nf_flow_offload_stats(flow_table, flow);
442         }
443 }
444
445 static void nf_flow_offload_work_gc(struct work_struct *work)
446 {
447         struct nf_flowtable *flow_table;
448
449         flow_table = container_of(work, struct nf_flowtable, gc_work.work);
450         nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
451         queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
452 }
453
454 static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
455                                  __be16 port, __be16 new_port)
456 {
457         struct tcphdr *tcph;
458
459         tcph = (void *)(skb_network_header(skb) + thoff);
460         inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
461 }
462
463 static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
464                                  __be16 port, __be16 new_port)
465 {
466         struct udphdr *udph;
467
468         udph = (void *)(skb_network_header(skb) + thoff);
469         if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
470                 inet_proto_csum_replace2(&udph->check, skb, port,
471                                          new_port, false);
472                 if (!udph->check)
473                         udph->check = CSUM_MANGLED_0;
474         }
475 }
476
477 static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
478                              u8 protocol, __be16 port, __be16 new_port)
479 {
480         switch (protocol) {
481         case IPPROTO_TCP:
482                 nf_flow_nat_port_tcp(skb, thoff, port, new_port);
483                 break;
484         case IPPROTO_UDP:
485                 nf_flow_nat_port_udp(skb, thoff, port, new_port);
486                 break;
487         }
488 }
489
490 void nf_flow_snat_port(const struct flow_offload *flow,
491                        struct sk_buff *skb, unsigned int thoff,
492                        u8 protocol, enum flow_offload_tuple_dir dir)
493 {
494         struct flow_ports *hdr;
495         __be16 port, new_port;
496
497         hdr = (void *)(skb_network_header(skb) + thoff);
498
499         switch (dir) {
500         case FLOW_OFFLOAD_DIR_ORIGINAL:
501                 port = hdr->source;
502                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
503                 hdr->source = new_port;
504                 break;
505         case FLOW_OFFLOAD_DIR_REPLY:
506                 port = hdr->dest;
507                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
508                 hdr->dest = new_port;
509                 break;
510         }
511
512         nf_flow_nat_port(skb, thoff, protocol, port, new_port);
513 }
514 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
515
516 void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
517                        unsigned int thoff, u8 protocol,
518                        enum flow_offload_tuple_dir dir)
519 {
520         struct flow_ports *hdr;
521         __be16 port, new_port;
522
523         hdr = (void *)(skb_network_header(skb) + thoff);
524
525         switch (dir) {
526         case FLOW_OFFLOAD_DIR_ORIGINAL:
527                 port = hdr->dest;
528                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
529                 hdr->dest = new_port;
530                 break;
531         case FLOW_OFFLOAD_DIR_REPLY:
532                 port = hdr->source;
533                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
534                 hdr->source = new_port;
535                 break;
536         }
537
538         nf_flow_nat_port(skb, thoff, protocol, port, new_port);
539 }
540 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
541
542 int nf_flow_table_init(struct nf_flowtable *flowtable)
543 {
544         int err;
545
546         INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
547         flow_block_init(&flowtable->flow_block);
548         init_rwsem(&flowtable->flow_block_lock);
549
550         err = rhashtable_init(&flowtable->rhashtable,
551                               &nf_flow_offload_rhash_params);
552         if (err < 0)
553                 return err;
554
555         queue_delayed_work(system_power_efficient_wq,
556                            &flowtable->gc_work, HZ);
557
558         mutex_lock(&flowtable_lock);
559         list_add(&flowtable->list, &flowtables);
560         mutex_unlock(&flowtable_lock);
561
562         return 0;
563 }
564 EXPORT_SYMBOL_GPL(nf_flow_table_init);
565
566 static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table,
567                                      struct flow_offload *flow, void *data)
568 {
569         struct net_device *dev = data;
570
571         if (!dev) {
572                 flow_offload_teardown(flow);
573                 return;
574         }
575
576         if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
577             (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
578              flow->tuplehash[1].tuple.iifidx == dev->ifindex))
579                 flow_offload_teardown(flow);
580 }
581
582 void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
583                               struct net_device *dev)
584 {
585         nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
586         flush_delayed_work(&flowtable->gc_work);
587         nf_flow_table_offload_flush(flowtable);
588 }
589
590 void nf_flow_table_cleanup(struct net_device *dev)
591 {
592         struct nf_flowtable *flowtable;
593
594         mutex_lock(&flowtable_lock);
595         list_for_each_entry(flowtable, &flowtables, list)
596                 nf_flow_table_gc_cleanup(flowtable, dev);
597         mutex_unlock(&flowtable_lock);
598 }
599 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
600
601 void nf_flow_table_free(struct nf_flowtable *flow_table)
602 {
603         mutex_lock(&flowtable_lock);
604         list_del(&flow_table->list);
605         mutex_unlock(&flowtable_lock);
606
607         cancel_delayed_work_sync(&flow_table->gc_work);
608         nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
609         nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
610         nf_flow_table_offload_flush(flow_table);
611         if (nf_flowtable_hw_offload(flow_table))
612                 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
613         rhashtable_destroy(&flow_table->rhashtable);
614 }
615 EXPORT_SYMBOL_GPL(nf_flow_table_free);
616
617 static int __init nf_flow_table_module_init(void)
618 {
619         return nf_flow_table_offload_init();
620 }
621
622 static void __exit nf_flow_table_module_exit(void)
623 {
624         nf_flow_table_offload_exit();
625 }
626
627 module_init(nf_flow_table_module_init);
628 module_exit(nf_flow_table_module_exit);
629
630 MODULE_LICENSE("GPL");
631 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
632 MODULE_DESCRIPTION("Netfilter flow table module");