1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <linux/tc_act/tc_csum.h>
8 #include <net/flow_offload.h>
9 #include <net/netfilter/nf_flow_table.h>
10 #include <net/netfilter/nf_tables.h>
11 #include <net/netfilter/nf_conntrack.h>
12 #include <net/netfilter/nf_conntrack_acct.h>
13 #include <net/netfilter/nf_conntrack_core.h>
14 #include <net/netfilter/nf_conntrack_tuple.h>
16 static struct workqueue_struct *nf_flow_offload_add_wq;
17 static struct workqueue_struct *nf_flow_offload_del_wq;
18 static struct workqueue_struct *nf_flow_offload_stats_wq;
20 struct flow_offload_work {
21 struct list_head list;
22 enum flow_cls_command cmd;
23 struct nf_flowtable *flowtable;
24 struct flow_offload *flow;
25 struct work_struct work;
28 #define NF_FLOW_DISSECTOR(__match, __type, __field) \
29 (__match)->dissector.offset[__type] = \
30 offsetof(struct nf_flow_key, __field)
32 static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
33 struct ip_tunnel_info *tun_info)
35 struct nf_flow_key *mask = &match->mask;
36 struct nf_flow_key *key = &match->key;
37 unsigned int enc_keys;
39 if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
42 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
43 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
44 key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
45 mask->enc_key_id.keyid = 0xffffffff;
46 enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
47 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
49 if (ip_tunnel_info_af(tun_info) == AF_INET) {
50 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
52 key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
53 key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
54 if (key->enc_ipv4.src)
55 mask->enc_ipv4.src = 0xffffffff;
56 if (key->enc_ipv4.dst)
57 mask->enc_ipv4.dst = 0xffffffff;
58 enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
59 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
61 memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
62 sizeof(struct in6_addr));
63 memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
64 sizeof(struct in6_addr));
65 if (memcmp(&key->enc_ipv6.src, &in6addr_any,
66 sizeof(struct in6_addr)))
67 memset(&mask->enc_ipv6.src, 0xff,
68 sizeof(struct in6_addr));
69 if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
70 sizeof(struct in6_addr)))
71 memset(&mask->enc_ipv6.dst, 0xff,
72 sizeof(struct in6_addr));
73 enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
74 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
77 match->dissector.used_keys |= enc_keys;
80 static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
81 struct flow_dissector_key_vlan *mask,
82 u16 vlan_id, __be16 proto)
84 key->vlan_id = vlan_id;
85 mask->vlan_id = VLAN_VID_MASK;
86 key->vlan_tpid = proto;
87 mask->vlan_tpid = 0xffff;
90 static int nf_flow_rule_match(struct nf_flow_match *match,
91 const struct flow_offload_tuple *tuple,
92 struct dst_entry *other_dst)
94 struct nf_flow_key *mask = &match->mask;
95 struct nf_flow_key *key = &match->key;
96 struct ip_tunnel_info *tun_info;
97 bool vlan_encap = false;
99 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
100 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
101 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
102 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
103 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
104 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
105 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
107 if (other_dst && other_dst->lwtstate) {
108 tun_info = lwt_tun_info(other_dst->lwtstate);
109 nf_flow_rule_lwt_match(match, tun_info);
112 if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_TC)
113 key->meta.ingress_ifindex = tuple->tc.iifidx;
115 key->meta.ingress_ifindex = tuple->iifidx;
117 mask->meta.ingress_ifindex = 0xffffffff;
119 if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
120 tuple->encap[0].proto == htons(ETH_P_8021Q)) {
121 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
122 nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
124 tuple->encap[0].proto);
128 if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
129 tuple->encap[1].proto == htons(ETH_P_8021Q)) {
131 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
133 nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
135 tuple->encap[1].proto);
137 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
139 nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
141 tuple->encap[1].proto);
145 switch (tuple->l3proto) {
147 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
148 key->basic.n_proto = htons(ETH_P_IP);
149 key->ipv4.src = tuple->src_v4.s_addr;
150 mask->ipv4.src = 0xffffffff;
151 key->ipv4.dst = tuple->dst_v4.s_addr;
152 mask->ipv4.dst = 0xffffffff;
155 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
156 key->basic.n_proto = htons(ETH_P_IPV6);
157 key->ipv6.src = tuple->src_v6;
158 memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
159 key->ipv6.dst = tuple->dst_v6;
160 memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
165 mask->control.addr_type = 0xffff;
166 match->dissector.used_keys |= BIT(key->control.addr_type);
167 mask->basic.n_proto = 0xffff;
169 switch (tuple->l4proto) {
172 mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
173 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
182 key->basic.ip_proto = tuple->l4proto;
183 mask->basic.ip_proto = 0xff;
185 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
186 BIT(FLOW_DISSECTOR_KEY_CONTROL) |
187 BIT(FLOW_DISSECTOR_KEY_BASIC);
189 switch (tuple->l4proto) {
192 key->tp.src = tuple->src_port;
193 mask->tp.src = 0xffff;
194 key->tp.dst = tuple->dst_port;
195 mask->tp.dst = 0xffff;
197 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS);
204 static void flow_offload_mangle(struct flow_action_entry *entry,
205 enum flow_action_mangle_base htype, u32 offset,
206 const __be32 *value, const __be32 *mask)
208 entry->id = FLOW_ACTION_MANGLE;
209 entry->mangle.htype = htype;
210 entry->mangle.offset = offset;
211 memcpy(&entry->mangle.mask, mask, sizeof(u32));
212 memcpy(&entry->mangle.val, value, sizeof(u32));
215 static inline struct flow_action_entry *
216 flow_action_entry_next(struct nf_flow_rule *flow_rule)
218 int i = flow_rule->rule->action.num_entries++;
220 return &flow_rule->rule->action.entries[i];
223 static int flow_offload_eth_src(struct net *net,
224 const struct flow_offload *flow,
225 enum flow_offload_tuple_dir dir,
226 struct nf_flow_rule *flow_rule)
228 struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
229 struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
230 const struct flow_offload_tuple *other_tuple, *this_tuple;
231 struct net_device *dev = NULL;
232 const unsigned char *addr;
236 this_tuple = &flow->tuplehash[dir].tuple;
238 switch (this_tuple->xmit_type) {
239 case FLOW_OFFLOAD_XMIT_DIRECT:
240 addr = this_tuple->out.h_source;
242 case FLOW_OFFLOAD_XMIT_NEIGH:
243 other_tuple = &flow->tuplehash[!dir].tuple;
244 dev = dev_get_by_index(net, other_tuple->iifidx);
248 addr = dev->dev_addr;
255 memcpy(&val16, addr, 2);
257 flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
261 memcpy(&val, addr + 2, 4);
262 flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
270 static int flow_offload_eth_dst(struct net *net,
271 const struct flow_offload *flow,
272 enum flow_offload_tuple_dir dir,
273 struct nf_flow_rule *flow_rule)
275 struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
276 struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
277 const struct flow_offload_tuple *other_tuple, *this_tuple;
278 const struct dst_entry *dst_cache;
279 unsigned char ha[ETH_ALEN];
286 this_tuple = &flow->tuplehash[dir].tuple;
288 switch (this_tuple->xmit_type) {
289 case FLOW_OFFLOAD_XMIT_DIRECT:
290 ether_addr_copy(ha, this_tuple->out.h_dest);
292 case FLOW_OFFLOAD_XMIT_NEIGH:
293 other_tuple = &flow->tuplehash[!dir].tuple;
294 daddr = &other_tuple->src_v4;
295 dst_cache = this_tuple->dst_cache;
296 n = dst_neigh_lookup(dst_cache, daddr);
300 read_lock_bh(&n->lock);
301 nud_state = n->nud_state;
302 ether_addr_copy(ha, n->ha);
303 read_unlock_bh(&n->lock);
306 if (!(nud_state & NUD_VALID))
315 flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
319 memcpy(&val16, ha + 4, 2);
321 flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
327 static void flow_offload_ipv4_snat(struct net *net,
328 const struct flow_offload *flow,
329 enum flow_offload_tuple_dir dir,
330 struct nf_flow_rule *flow_rule)
332 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
333 u32 mask = ~htonl(0xffffffff);
338 case FLOW_OFFLOAD_DIR_ORIGINAL:
339 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
340 offset = offsetof(struct iphdr, saddr);
342 case FLOW_OFFLOAD_DIR_REPLY:
343 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
344 offset = offsetof(struct iphdr, daddr);
350 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
354 static void flow_offload_ipv4_dnat(struct net *net,
355 const struct flow_offload *flow,
356 enum flow_offload_tuple_dir dir,
357 struct nf_flow_rule *flow_rule)
359 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
360 u32 mask = ~htonl(0xffffffff);
365 case FLOW_OFFLOAD_DIR_ORIGINAL:
366 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
367 offset = offsetof(struct iphdr, daddr);
369 case FLOW_OFFLOAD_DIR_REPLY:
370 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
371 offset = offsetof(struct iphdr, saddr);
377 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
381 static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
383 const __be32 *addr, const __be32 *mask)
385 struct flow_action_entry *entry;
388 for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
389 entry = flow_action_entry_next(flow_rule);
390 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
391 offset + i, &addr[j], mask);
395 static void flow_offload_ipv6_snat(struct net *net,
396 const struct flow_offload *flow,
397 enum flow_offload_tuple_dir dir,
398 struct nf_flow_rule *flow_rule)
400 u32 mask = ~htonl(0xffffffff);
405 case FLOW_OFFLOAD_DIR_ORIGINAL:
406 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
407 offset = offsetof(struct ipv6hdr, saddr);
409 case FLOW_OFFLOAD_DIR_REPLY:
410 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
411 offset = offsetof(struct ipv6hdr, daddr);
417 flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
420 static void flow_offload_ipv6_dnat(struct net *net,
421 const struct flow_offload *flow,
422 enum flow_offload_tuple_dir dir,
423 struct nf_flow_rule *flow_rule)
425 u32 mask = ~htonl(0xffffffff);
430 case FLOW_OFFLOAD_DIR_ORIGINAL:
431 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
432 offset = offsetof(struct ipv6hdr, daddr);
434 case FLOW_OFFLOAD_DIR_REPLY:
435 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
436 offset = offsetof(struct ipv6hdr, saddr);
442 flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
445 static int flow_offload_l4proto(const struct flow_offload *flow)
447 u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
452 type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
455 type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
464 static void flow_offload_port_snat(struct net *net,
465 const struct flow_offload *flow,
466 enum flow_offload_tuple_dir dir,
467 struct nf_flow_rule *flow_rule)
469 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
474 case FLOW_OFFLOAD_DIR_ORIGINAL:
475 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
476 offset = 0; /* offsetof(struct tcphdr, source); */
477 port = htonl(port << 16);
478 mask = ~htonl(0xffff0000);
480 case FLOW_OFFLOAD_DIR_REPLY:
481 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
482 offset = 0; /* offsetof(struct tcphdr, dest); */
484 mask = ~htonl(0xffff);
490 flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
494 static void flow_offload_port_dnat(struct net *net,
495 const struct flow_offload *flow,
496 enum flow_offload_tuple_dir dir,
497 struct nf_flow_rule *flow_rule)
499 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
504 case FLOW_OFFLOAD_DIR_ORIGINAL:
505 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
506 offset = 0; /* offsetof(struct tcphdr, dest); */
508 mask = ~htonl(0xffff);
510 case FLOW_OFFLOAD_DIR_REPLY:
511 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
512 offset = 0; /* offsetof(struct tcphdr, source); */
513 port = htonl(port << 16);
514 mask = ~htonl(0xffff0000);
520 flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
524 static void flow_offload_ipv4_checksum(struct net *net,
525 const struct flow_offload *flow,
526 struct nf_flow_rule *flow_rule)
528 u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
529 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
531 entry->id = FLOW_ACTION_CSUM;
532 entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
536 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
539 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
544 static void flow_offload_redirect(struct net *net,
545 const struct flow_offload *flow,
546 enum flow_offload_tuple_dir dir,
547 struct nf_flow_rule *flow_rule)
549 const struct flow_offload_tuple *this_tuple, *other_tuple;
550 struct flow_action_entry *entry;
551 struct net_device *dev;
554 this_tuple = &flow->tuplehash[dir].tuple;
555 switch (this_tuple->xmit_type) {
556 case FLOW_OFFLOAD_XMIT_DIRECT:
557 this_tuple = &flow->tuplehash[dir].tuple;
558 ifindex = this_tuple->out.hw_ifidx;
560 case FLOW_OFFLOAD_XMIT_NEIGH:
561 other_tuple = &flow->tuplehash[!dir].tuple;
562 ifindex = other_tuple->iifidx;
568 dev = dev_get_by_index(net, ifindex);
572 entry = flow_action_entry_next(flow_rule);
573 entry->id = FLOW_ACTION_REDIRECT;
577 static void flow_offload_encap_tunnel(const struct flow_offload *flow,
578 enum flow_offload_tuple_dir dir,
579 struct nf_flow_rule *flow_rule)
581 const struct flow_offload_tuple *this_tuple;
582 struct flow_action_entry *entry;
583 struct dst_entry *dst;
585 this_tuple = &flow->tuplehash[dir].tuple;
586 if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
589 dst = this_tuple->dst_cache;
590 if (dst && dst->lwtstate) {
591 struct ip_tunnel_info *tun_info;
593 tun_info = lwt_tun_info(dst->lwtstate);
594 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
595 entry = flow_action_entry_next(flow_rule);
596 entry->id = FLOW_ACTION_TUNNEL_ENCAP;
597 entry->tunnel = tun_info;
602 static void flow_offload_decap_tunnel(const struct flow_offload *flow,
603 enum flow_offload_tuple_dir dir,
604 struct nf_flow_rule *flow_rule)
606 const struct flow_offload_tuple *other_tuple;
607 struct flow_action_entry *entry;
608 struct dst_entry *dst;
610 other_tuple = &flow->tuplehash[!dir].tuple;
611 if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
614 dst = other_tuple->dst_cache;
615 if (dst && dst->lwtstate) {
616 struct ip_tunnel_info *tun_info;
618 tun_info = lwt_tun_info(dst->lwtstate);
619 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
620 entry = flow_action_entry_next(flow_rule);
621 entry->id = FLOW_ACTION_TUNNEL_DECAP;
627 nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
628 enum flow_offload_tuple_dir dir,
629 struct nf_flow_rule *flow_rule)
631 const struct flow_offload_tuple *other_tuple;
632 const struct flow_offload_tuple *tuple;
635 flow_offload_decap_tunnel(flow, dir, flow_rule);
636 flow_offload_encap_tunnel(flow, dir, flow_rule);
638 if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
639 flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
642 tuple = &flow->tuplehash[dir].tuple;
644 for (i = 0; i < tuple->encap_num; i++) {
645 struct flow_action_entry *entry;
647 if (tuple->in_vlan_ingress & BIT(i))
650 if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
651 entry = flow_action_entry_next(flow_rule);
652 entry->id = FLOW_ACTION_VLAN_POP;
656 other_tuple = &flow->tuplehash[!dir].tuple;
658 for (i = 0; i < other_tuple->encap_num; i++) {
659 struct flow_action_entry *entry;
661 if (other_tuple->in_vlan_ingress & BIT(i))
664 entry = flow_action_entry_next(flow_rule);
666 switch (other_tuple->encap[i].proto) {
667 case htons(ETH_P_PPP_SES):
668 entry->id = FLOW_ACTION_PPPOE_PUSH;
669 entry->pppoe.sid = other_tuple->encap[i].id;
671 case htons(ETH_P_8021Q):
672 entry->id = FLOW_ACTION_VLAN_PUSH;
673 entry->vlan.vid = other_tuple->encap[i].id;
674 entry->vlan.proto = other_tuple->encap[i].proto;
682 int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
683 enum flow_offload_tuple_dir dir,
684 struct nf_flow_rule *flow_rule)
686 if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
689 if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
690 flow_offload_ipv4_snat(net, flow, dir, flow_rule);
691 flow_offload_port_snat(net, flow, dir, flow_rule);
693 if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
694 flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
695 flow_offload_port_dnat(net, flow, dir, flow_rule);
697 if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
698 test_bit(NF_FLOW_DNAT, &flow->flags))
699 flow_offload_ipv4_checksum(net, flow, flow_rule);
701 flow_offload_redirect(net, flow, dir, flow_rule);
705 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
707 int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
708 enum flow_offload_tuple_dir dir,
709 struct nf_flow_rule *flow_rule)
711 if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
714 if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
715 flow_offload_ipv6_snat(net, flow, dir, flow_rule);
716 flow_offload_port_snat(net, flow, dir, flow_rule);
718 if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
719 flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
720 flow_offload_port_dnat(net, flow, dir, flow_rule);
723 flow_offload_redirect(net, flow, dir, flow_rule);
727 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
729 #define NF_FLOW_RULE_ACTION_MAX 16
731 static struct nf_flow_rule *
732 nf_flow_offload_rule_alloc(struct net *net,
733 const struct flow_offload_work *offload,
734 enum flow_offload_tuple_dir dir)
736 const struct nf_flowtable *flowtable = offload->flowtable;
737 const struct flow_offload_tuple *tuple, *other_tuple;
738 const struct flow_offload *flow = offload->flow;
739 struct dst_entry *other_dst = NULL;
740 struct nf_flow_rule *flow_rule;
743 flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
747 flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
748 if (!flow_rule->rule)
751 flow_rule->rule->match.dissector = &flow_rule->match.dissector;
752 flow_rule->rule->match.mask = &flow_rule->match.mask;
753 flow_rule->rule->match.key = &flow_rule->match.key;
755 tuple = &flow->tuplehash[dir].tuple;
756 other_tuple = &flow->tuplehash[!dir].tuple;
757 if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
758 other_dst = other_tuple->dst_cache;
760 err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
764 flow_rule->rule->action.num_entries = 0;
765 if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
771 kfree(flow_rule->rule);
778 static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
780 struct flow_action_entry *entry;
783 for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
784 entry = &flow_rule->rule->action.entries[i];
785 if (entry->id != FLOW_ACTION_REDIRECT)
790 kfree(flow_rule->rule);
794 static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
798 for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
799 __nf_flow_offload_destroy(flow_rule[i]);
802 static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
803 struct nf_flow_rule *flow_rule[])
805 struct net *net = read_pnet(&offload->flowtable->net);
807 flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
808 FLOW_OFFLOAD_DIR_ORIGINAL);
812 flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
813 FLOW_OFFLOAD_DIR_REPLY);
815 __nf_flow_offload_destroy(flow_rule[0]);
822 static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
823 __be16 proto, int priority,
824 enum flow_cls_command cmd,
825 const struct flow_offload_tuple *tuple,
826 struct netlink_ext_ack *extack)
828 cls_flow->common.protocol = proto;
829 cls_flow->common.prio = priority;
830 cls_flow->common.extack = extack;
831 cls_flow->command = cmd;
832 cls_flow->cookie = (unsigned long)tuple;
835 static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
836 struct flow_offload *flow,
837 struct nf_flow_rule *flow_rule,
838 enum flow_offload_tuple_dir dir,
839 int priority, int cmd,
840 struct flow_stats *stats,
841 struct list_head *block_cb_list)
843 struct flow_cls_offload cls_flow = {};
844 struct flow_block_cb *block_cb;
845 struct netlink_ext_ack extack;
846 __be16 proto = ETH_P_ALL;
849 nf_flow_offload_init(&cls_flow, proto, priority, cmd,
850 &flow->tuplehash[dir].tuple, &extack);
851 if (cmd == FLOW_CLS_REPLACE)
852 cls_flow.rule = flow_rule->rule;
854 down_read(&flowtable->flow_block_lock);
855 list_for_each_entry(block_cb, block_cb_list, list) {
856 err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
863 up_read(&flowtable->flow_block_lock);
865 if (cmd == FLOW_CLS_STATS)
866 memcpy(stats, &cls_flow.stats, sizeof(*stats));
871 static int flow_offload_tuple_add(struct flow_offload_work *offload,
872 struct nf_flow_rule *flow_rule,
873 enum flow_offload_tuple_dir dir)
875 return nf_flow_offload_tuple(offload->flowtable, offload->flow,
877 offload->flowtable->priority,
878 FLOW_CLS_REPLACE, NULL,
879 &offload->flowtable->flow_block.cb_list);
882 static void flow_offload_tuple_del(struct flow_offload_work *offload,
883 enum flow_offload_tuple_dir dir)
885 nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
886 offload->flowtable->priority,
887 FLOW_CLS_DESTROY, NULL,
888 &offload->flowtable->flow_block.cb_list);
891 static int flow_offload_rule_add(struct flow_offload_work *offload,
892 struct nf_flow_rule *flow_rule[])
896 ok_count += flow_offload_tuple_add(offload, flow_rule[0],
897 FLOW_OFFLOAD_DIR_ORIGINAL);
898 ok_count += flow_offload_tuple_add(offload, flow_rule[1],
899 FLOW_OFFLOAD_DIR_REPLY);
906 static void flow_offload_work_add(struct flow_offload_work *offload)
908 struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
911 err = nf_flow_offload_alloc(offload, flow_rule);
915 err = flow_offload_rule_add(offload, flow_rule);
919 set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
922 nf_flow_offload_destroy(flow_rule);
925 static void flow_offload_work_del(struct flow_offload_work *offload)
927 clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
928 flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
929 flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
930 set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
933 static void flow_offload_tuple_stats(struct flow_offload_work *offload,
934 enum flow_offload_tuple_dir dir,
935 struct flow_stats *stats)
937 nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
938 offload->flowtable->priority,
939 FLOW_CLS_STATS, stats,
940 &offload->flowtable->flow_block.cb_list);
943 static void flow_offload_work_stats(struct flow_offload_work *offload)
945 struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
948 flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
949 flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
951 lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
952 offload->flow->timeout = max_t(u64, offload->flow->timeout,
953 lastused + flow_offload_get_timeout(offload->flow));
955 if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
957 nf_ct_acct_add(offload->flow->ct,
958 FLOW_OFFLOAD_DIR_ORIGINAL,
959 stats[0].pkts, stats[0].bytes);
961 nf_ct_acct_add(offload->flow->ct,
962 FLOW_OFFLOAD_DIR_REPLY,
963 stats[1].pkts, stats[1].bytes);
967 static void flow_offload_work_handler(struct work_struct *work)
969 struct flow_offload_work *offload;
971 offload = container_of(work, struct flow_offload_work, work);
972 switch (offload->cmd) {
973 case FLOW_CLS_REPLACE:
974 flow_offload_work_add(offload);
976 case FLOW_CLS_DESTROY:
977 flow_offload_work_del(offload);
980 flow_offload_work_stats(offload);
986 clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
990 static void flow_offload_queue_work(struct flow_offload_work *offload)
992 if (offload->cmd == FLOW_CLS_REPLACE)
993 queue_work(nf_flow_offload_add_wq, &offload->work);
994 else if (offload->cmd == FLOW_CLS_DESTROY)
995 queue_work(nf_flow_offload_del_wq, &offload->work);
997 queue_work(nf_flow_offload_stats_wq, &offload->work);
1000 static struct flow_offload_work *
1001 nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
1002 struct flow_offload *flow, unsigned int cmd)
1004 struct flow_offload_work *offload;
1006 if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
1009 offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
1011 clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
1016 offload->flow = flow;
1017 offload->flowtable = flowtable;
1018 INIT_WORK(&offload->work, flow_offload_work_handler);
1024 void nf_flow_offload_add(struct nf_flowtable *flowtable,
1025 struct flow_offload *flow)
1027 struct flow_offload_work *offload;
1029 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
1033 flow_offload_queue_work(offload);
1036 void nf_flow_offload_del(struct nf_flowtable *flowtable,
1037 struct flow_offload *flow)
1039 struct flow_offload_work *offload;
1041 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
1045 set_bit(NF_FLOW_HW_DYING, &flow->flags);
1046 flow_offload_queue_work(offload);
1049 void nf_flow_offload_stats(struct nf_flowtable *flowtable,
1050 struct flow_offload *flow)
1052 struct flow_offload_work *offload;
1055 delta = nf_flow_timeout_delta(flow->timeout);
1056 if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
1059 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
1063 flow_offload_queue_work(offload);
1066 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
1068 if (nf_flowtable_hw_offload(flowtable)) {
1069 flush_workqueue(nf_flow_offload_add_wq);
1070 flush_workqueue(nf_flow_offload_del_wq);
1071 flush_workqueue(nf_flow_offload_stats_wq);
1075 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
1076 struct flow_block_offload *bo,
1077 enum flow_block_command cmd)
1079 struct flow_block_cb *block_cb, *next;
1083 case FLOW_BLOCK_BIND:
1084 list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
1086 case FLOW_BLOCK_UNBIND:
1087 list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1088 list_del(&block_cb->list);
1089 flow_block_cb_free(block_cb);
1100 static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
1102 enum flow_block_command cmd,
1103 struct nf_flowtable *flowtable,
1104 struct netlink_ext_ack *extack)
1106 memset(bo, 0, sizeof(*bo));
1108 bo->block = &flowtable->flow_block;
1110 bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
1111 bo->extack = extack;
1112 bo->cb_list_head = &flowtable->flow_block.cb_list;
1113 INIT_LIST_HEAD(&bo->cb_list);
1116 static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb)
1118 struct nf_flowtable *flowtable = block_cb->indr.data;
1119 struct net_device *dev = block_cb->indr.dev;
1121 nf_flow_table_gc_cleanup(flowtable, dev);
1122 down_write(&flowtable->flow_block_lock);
1123 list_del(&block_cb->list);
1124 list_del(&block_cb->driver_list);
1125 flow_block_cb_free(block_cb);
1126 up_write(&flowtable->flow_block_lock);
1129 static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
1130 struct nf_flowtable *flowtable,
1131 struct net_device *dev,
1132 enum flow_block_command cmd,
1133 struct netlink_ext_ack *extack)
1135 nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
1138 return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo,
1139 nf_flow_table_indr_cleanup);
1142 static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
1143 struct nf_flowtable *flowtable,
1144 struct net_device *dev,
1145 enum flow_block_command cmd,
1146 struct netlink_ext_ack *extack)
1150 nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
1152 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
1159 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
1160 struct net_device *dev,
1161 enum flow_block_command cmd)
1163 struct netlink_ext_ack extack = {};
1164 struct flow_block_offload bo;
1167 if (!nf_flowtable_hw_offload(flowtable))
1170 if (dev->netdev_ops->ndo_setup_tc)
1171 err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
1174 err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
1179 return nf_flow_table_block_setup(flowtable, &bo, cmd);
1181 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
1183 int nf_flow_table_offload_init(void)
1185 nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
1186 WQ_UNBOUND | WQ_SYSFS, 0);
1187 if (!nf_flow_offload_add_wq)
1190 nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
1191 WQ_UNBOUND | WQ_SYSFS, 0);
1192 if (!nf_flow_offload_del_wq)
1195 nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
1196 WQ_UNBOUND | WQ_SYSFS, 0);
1197 if (!nf_flow_offload_stats_wq)
1203 destroy_workqueue(nf_flow_offload_del_wq);
1205 destroy_workqueue(nf_flow_offload_add_wq);
1209 void nf_flow_table_offload_exit(void)
1211 destroy_workqueue(nf_flow_offload_add_wq);
1212 destroy_workqueue(nf_flow_offload_del_wq);
1213 destroy_workqueue(nf_flow_offload_stats_wq);