1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <linux/tc_act/tc_csum.h>
8 #include <net/flow_offload.h>
9 #include <net/netfilter/nf_flow_table.h>
10 #include <net/netfilter/nf_tables.h>
11 #include <net/netfilter/nf_conntrack.h>
12 #include <net/netfilter/nf_conntrack_acct.h>
13 #include <net/netfilter/nf_conntrack_core.h>
14 #include <net/netfilter/nf_conntrack_tuple.h>
16 static struct workqueue_struct *nf_flow_offload_add_wq;
17 static struct workqueue_struct *nf_flow_offload_del_wq;
18 static struct workqueue_struct *nf_flow_offload_stats_wq;
20 struct flow_offload_work {
21 struct list_head list;
22 enum flow_cls_command cmd;
24 struct nf_flowtable *flowtable;
25 struct flow_offload *flow;
26 struct work_struct work;
29 #define NF_FLOW_DISSECTOR(__match, __type, __field) \
30 (__match)->dissector.offset[__type] = \
31 offsetof(struct nf_flow_key, __field)
33 static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
34 struct ip_tunnel_info *tun_info)
36 struct nf_flow_key *mask = &match->mask;
37 struct nf_flow_key *key = &match->key;
38 unsigned int enc_keys;
40 if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
43 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
44 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
45 key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
46 mask->enc_key_id.keyid = 0xffffffff;
47 enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
48 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
50 if (ip_tunnel_info_af(tun_info) == AF_INET) {
51 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
53 key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
54 key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
55 if (key->enc_ipv4.src)
56 mask->enc_ipv4.src = 0xffffffff;
57 if (key->enc_ipv4.dst)
58 mask->enc_ipv4.dst = 0xffffffff;
59 enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
60 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
62 memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
63 sizeof(struct in6_addr));
64 memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
65 sizeof(struct in6_addr));
66 if (memcmp(&key->enc_ipv6.src, &in6addr_any,
67 sizeof(struct in6_addr)))
68 memset(&mask->enc_ipv6.src, 0xff,
69 sizeof(struct in6_addr));
70 if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
71 sizeof(struct in6_addr)))
72 memset(&mask->enc_ipv6.dst, 0xff,
73 sizeof(struct in6_addr));
74 enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
75 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
78 match->dissector.used_keys |= enc_keys;
81 static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
82 struct flow_dissector_key_vlan *mask,
83 u16 vlan_id, __be16 proto)
85 key->vlan_id = vlan_id;
86 mask->vlan_id = VLAN_VID_MASK;
87 key->vlan_tpid = proto;
88 mask->vlan_tpid = 0xffff;
91 static int nf_flow_rule_match(struct nf_flow_match *match,
92 const struct flow_offload_tuple *tuple,
93 struct dst_entry *other_dst)
95 struct nf_flow_key *mask = &match->mask;
96 struct nf_flow_key *key = &match->key;
97 struct ip_tunnel_info *tun_info;
98 bool vlan_encap = false;
100 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
101 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
102 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
103 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
104 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
105 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
106 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
108 if (other_dst && other_dst->lwtstate) {
109 tun_info = lwt_tun_info(other_dst->lwtstate);
110 nf_flow_rule_lwt_match(match, tun_info);
113 if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_TC)
114 key->meta.ingress_ifindex = tuple->tc.iifidx;
116 key->meta.ingress_ifindex = tuple->iifidx;
118 mask->meta.ingress_ifindex = 0xffffffff;
120 if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
121 tuple->encap[0].proto == htons(ETH_P_8021Q)) {
122 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
123 nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
125 tuple->encap[0].proto);
129 if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
130 tuple->encap[1].proto == htons(ETH_P_8021Q)) {
132 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
134 nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
136 tuple->encap[1].proto);
138 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
140 nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
142 tuple->encap[1].proto);
146 switch (tuple->l3proto) {
148 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
149 key->basic.n_proto = htons(ETH_P_IP);
150 key->ipv4.src = tuple->src_v4.s_addr;
151 mask->ipv4.src = 0xffffffff;
152 key->ipv4.dst = tuple->dst_v4.s_addr;
153 mask->ipv4.dst = 0xffffffff;
156 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
157 key->basic.n_proto = htons(ETH_P_IPV6);
158 key->ipv6.src = tuple->src_v6;
159 memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
160 key->ipv6.dst = tuple->dst_v6;
161 memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
166 mask->control.addr_type = 0xffff;
167 match->dissector.used_keys |= BIT(key->control.addr_type);
168 mask->basic.n_proto = 0xffff;
170 switch (tuple->l4proto) {
173 mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
174 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
182 key->basic.ip_proto = tuple->l4proto;
183 mask->basic.ip_proto = 0xff;
185 key->tp.src = tuple->src_port;
186 mask->tp.src = 0xffff;
187 key->tp.dst = tuple->dst_port;
188 mask->tp.dst = 0xffff;
190 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
191 BIT(FLOW_DISSECTOR_KEY_CONTROL) |
192 BIT(FLOW_DISSECTOR_KEY_BASIC) |
193 BIT(FLOW_DISSECTOR_KEY_PORTS);
197 static void flow_offload_mangle(struct flow_action_entry *entry,
198 enum flow_action_mangle_base htype, u32 offset,
199 const __be32 *value, const __be32 *mask)
201 entry->id = FLOW_ACTION_MANGLE;
202 entry->mangle.htype = htype;
203 entry->mangle.offset = offset;
204 memcpy(&entry->mangle.mask, mask, sizeof(u32));
205 memcpy(&entry->mangle.val, value, sizeof(u32));
208 static inline struct flow_action_entry *
209 flow_action_entry_next(struct nf_flow_rule *flow_rule)
211 int i = flow_rule->rule->action.num_entries++;
213 return &flow_rule->rule->action.entries[i];
216 static int flow_offload_eth_src(struct net *net,
217 const struct flow_offload *flow,
218 enum flow_offload_tuple_dir dir,
219 struct nf_flow_rule *flow_rule)
221 struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
222 struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
223 const struct flow_offload_tuple *other_tuple, *this_tuple;
224 struct net_device *dev = NULL;
225 const unsigned char *addr;
229 this_tuple = &flow->tuplehash[dir].tuple;
231 switch (this_tuple->xmit_type) {
232 case FLOW_OFFLOAD_XMIT_DIRECT:
233 addr = this_tuple->out.h_source;
235 case FLOW_OFFLOAD_XMIT_NEIGH:
236 other_tuple = &flow->tuplehash[!dir].tuple;
237 dev = dev_get_by_index(net, other_tuple->iifidx);
241 addr = dev->dev_addr;
248 memcpy(&val16, addr, 2);
250 flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
254 memcpy(&val, addr + 2, 4);
255 flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
263 static int flow_offload_eth_dst(struct net *net,
264 const struct flow_offload *flow,
265 enum flow_offload_tuple_dir dir,
266 struct nf_flow_rule *flow_rule)
268 struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
269 struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
270 const struct flow_offload_tuple *other_tuple, *this_tuple;
271 const struct dst_entry *dst_cache;
272 unsigned char ha[ETH_ALEN];
279 this_tuple = &flow->tuplehash[dir].tuple;
281 switch (this_tuple->xmit_type) {
282 case FLOW_OFFLOAD_XMIT_DIRECT:
283 ether_addr_copy(ha, this_tuple->out.h_dest);
285 case FLOW_OFFLOAD_XMIT_NEIGH:
286 other_tuple = &flow->tuplehash[!dir].tuple;
287 daddr = &other_tuple->src_v4;
288 dst_cache = this_tuple->dst_cache;
289 n = dst_neigh_lookup(dst_cache, daddr);
293 read_lock_bh(&n->lock);
294 nud_state = n->nud_state;
295 ether_addr_copy(ha, n->ha);
296 read_unlock_bh(&n->lock);
299 if (!(nud_state & NUD_VALID))
308 flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
312 memcpy(&val16, ha + 4, 2);
314 flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
320 static void flow_offload_ipv4_snat(struct net *net,
321 const struct flow_offload *flow,
322 enum flow_offload_tuple_dir dir,
323 struct nf_flow_rule *flow_rule)
325 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
326 u32 mask = ~htonl(0xffffffff);
331 case FLOW_OFFLOAD_DIR_ORIGINAL:
332 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
333 offset = offsetof(struct iphdr, saddr);
335 case FLOW_OFFLOAD_DIR_REPLY:
336 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
337 offset = offsetof(struct iphdr, daddr);
343 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
347 static void flow_offload_ipv4_dnat(struct net *net,
348 const struct flow_offload *flow,
349 enum flow_offload_tuple_dir dir,
350 struct nf_flow_rule *flow_rule)
352 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
353 u32 mask = ~htonl(0xffffffff);
358 case FLOW_OFFLOAD_DIR_ORIGINAL:
359 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
360 offset = offsetof(struct iphdr, daddr);
362 case FLOW_OFFLOAD_DIR_REPLY:
363 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
364 offset = offsetof(struct iphdr, saddr);
370 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
374 static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
376 const __be32 *addr, const __be32 *mask)
378 struct flow_action_entry *entry;
381 for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
382 entry = flow_action_entry_next(flow_rule);
383 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
384 offset + i, &addr[j], mask);
388 static void flow_offload_ipv6_snat(struct net *net,
389 const struct flow_offload *flow,
390 enum flow_offload_tuple_dir dir,
391 struct nf_flow_rule *flow_rule)
393 u32 mask = ~htonl(0xffffffff);
398 case FLOW_OFFLOAD_DIR_ORIGINAL:
399 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
400 offset = offsetof(struct ipv6hdr, saddr);
402 case FLOW_OFFLOAD_DIR_REPLY:
403 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
404 offset = offsetof(struct ipv6hdr, daddr);
410 flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
413 static void flow_offload_ipv6_dnat(struct net *net,
414 const struct flow_offload *flow,
415 enum flow_offload_tuple_dir dir,
416 struct nf_flow_rule *flow_rule)
418 u32 mask = ~htonl(0xffffffff);
423 case FLOW_OFFLOAD_DIR_ORIGINAL:
424 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
425 offset = offsetof(struct ipv6hdr, daddr);
427 case FLOW_OFFLOAD_DIR_REPLY:
428 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
429 offset = offsetof(struct ipv6hdr, saddr);
435 flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
438 static int flow_offload_l4proto(const struct flow_offload *flow)
440 u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
445 type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
448 type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
457 static void flow_offload_port_snat(struct net *net,
458 const struct flow_offload *flow,
459 enum flow_offload_tuple_dir dir,
460 struct nf_flow_rule *flow_rule)
462 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
467 case FLOW_OFFLOAD_DIR_ORIGINAL:
468 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
469 offset = 0; /* offsetof(struct tcphdr, source); */
470 port = htonl(port << 16);
471 mask = ~htonl(0xffff0000);
473 case FLOW_OFFLOAD_DIR_REPLY:
474 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
475 offset = 0; /* offsetof(struct tcphdr, dest); */
477 mask = ~htonl(0xffff);
483 flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
487 static void flow_offload_port_dnat(struct net *net,
488 const struct flow_offload *flow,
489 enum flow_offload_tuple_dir dir,
490 struct nf_flow_rule *flow_rule)
492 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
497 case FLOW_OFFLOAD_DIR_ORIGINAL:
498 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
499 offset = 0; /* offsetof(struct tcphdr, dest); */
501 mask = ~htonl(0xffff);
503 case FLOW_OFFLOAD_DIR_REPLY:
504 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
505 offset = 0; /* offsetof(struct tcphdr, source); */
506 port = htonl(port << 16);
507 mask = ~htonl(0xffff0000);
513 flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
517 static void flow_offload_ipv4_checksum(struct net *net,
518 const struct flow_offload *flow,
519 struct nf_flow_rule *flow_rule)
521 u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
522 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
524 entry->id = FLOW_ACTION_CSUM;
525 entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
529 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
532 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
537 static void flow_offload_redirect(struct net *net,
538 const struct flow_offload *flow,
539 enum flow_offload_tuple_dir dir,
540 struct nf_flow_rule *flow_rule)
542 const struct flow_offload_tuple *this_tuple, *other_tuple;
543 struct flow_action_entry *entry;
544 struct net_device *dev;
547 this_tuple = &flow->tuplehash[dir].tuple;
548 switch (this_tuple->xmit_type) {
549 case FLOW_OFFLOAD_XMIT_DIRECT:
550 this_tuple = &flow->tuplehash[dir].tuple;
551 ifindex = this_tuple->out.hw_ifidx;
553 case FLOW_OFFLOAD_XMIT_NEIGH:
554 other_tuple = &flow->tuplehash[!dir].tuple;
555 ifindex = other_tuple->iifidx;
561 dev = dev_get_by_index(net, ifindex);
565 entry = flow_action_entry_next(flow_rule);
566 entry->id = FLOW_ACTION_REDIRECT;
570 static void flow_offload_encap_tunnel(const struct flow_offload *flow,
571 enum flow_offload_tuple_dir dir,
572 struct nf_flow_rule *flow_rule)
574 const struct flow_offload_tuple *this_tuple;
575 struct flow_action_entry *entry;
576 struct dst_entry *dst;
578 this_tuple = &flow->tuplehash[dir].tuple;
579 if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
582 dst = this_tuple->dst_cache;
583 if (dst && dst->lwtstate) {
584 struct ip_tunnel_info *tun_info;
586 tun_info = lwt_tun_info(dst->lwtstate);
587 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
588 entry = flow_action_entry_next(flow_rule);
589 entry->id = FLOW_ACTION_TUNNEL_ENCAP;
590 entry->tunnel = tun_info;
595 static void flow_offload_decap_tunnel(const struct flow_offload *flow,
596 enum flow_offload_tuple_dir dir,
597 struct nf_flow_rule *flow_rule)
599 const struct flow_offload_tuple *other_tuple;
600 struct flow_action_entry *entry;
601 struct dst_entry *dst;
603 other_tuple = &flow->tuplehash[!dir].tuple;
604 if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
607 dst = other_tuple->dst_cache;
608 if (dst && dst->lwtstate) {
609 struct ip_tunnel_info *tun_info;
611 tun_info = lwt_tun_info(dst->lwtstate);
612 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
613 entry = flow_action_entry_next(flow_rule);
614 entry->id = FLOW_ACTION_TUNNEL_DECAP;
620 nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
621 enum flow_offload_tuple_dir dir,
622 struct nf_flow_rule *flow_rule)
624 const struct flow_offload_tuple *other_tuple;
625 const struct flow_offload_tuple *tuple;
628 flow_offload_decap_tunnel(flow, dir, flow_rule);
629 flow_offload_encap_tunnel(flow, dir, flow_rule);
631 if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
632 flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
635 tuple = &flow->tuplehash[dir].tuple;
637 for (i = 0; i < tuple->encap_num; i++) {
638 struct flow_action_entry *entry;
640 if (tuple->in_vlan_ingress & BIT(i))
643 if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
644 entry = flow_action_entry_next(flow_rule);
645 entry->id = FLOW_ACTION_VLAN_POP;
649 other_tuple = &flow->tuplehash[!dir].tuple;
651 for (i = 0; i < other_tuple->encap_num; i++) {
652 struct flow_action_entry *entry;
654 if (other_tuple->in_vlan_ingress & BIT(i))
657 entry = flow_action_entry_next(flow_rule);
659 switch (other_tuple->encap[i].proto) {
660 case htons(ETH_P_PPP_SES):
661 entry->id = FLOW_ACTION_PPPOE_PUSH;
662 entry->pppoe.sid = other_tuple->encap[i].id;
664 case htons(ETH_P_8021Q):
665 entry->id = FLOW_ACTION_VLAN_PUSH;
666 entry->vlan.vid = other_tuple->encap[i].id;
667 entry->vlan.proto = other_tuple->encap[i].proto;
675 int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
676 enum flow_offload_tuple_dir dir,
677 struct nf_flow_rule *flow_rule)
679 if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
682 if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
683 flow_offload_ipv4_snat(net, flow, dir, flow_rule);
684 flow_offload_port_snat(net, flow, dir, flow_rule);
686 if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
687 flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
688 flow_offload_port_dnat(net, flow, dir, flow_rule);
690 if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
691 test_bit(NF_FLOW_DNAT, &flow->flags))
692 flow_offload_ipv4_checksum(net, flow, flow_rule);
694 flow_offload_redirect(net, flow, dir, flow_rule);
698 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
700 int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
701 enum flow_offload_tuple_dir dir,
702 struct nf_flow_rule *flow_rule)
704 if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
707 if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
708 flow_offload_ipv6_snat(net, flow, dir, flow_rule);
709 flow_offload_port_snat(net, flow, dir, flow_rule);
711 if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
712 flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
713 flow_offload_port_dnat(net, flow, dir, flow_rule);
716 flow_offload_redirect(net, flow, dir, flow_rule);
720 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
722 #define NF_FLOW_RULE_ACTION_MAX 16
724 static struct nf_flow_rule *
725 nf_flow_offload_rule_alloc(struct net *net,
726 const struct flow_offload_work *offload,
727 enum flow_offload_tuple_dir dir)
729 const struct nf_flowtable *flowtable = offload->flowtable;
730 const struct flow_offload_tuple *tuple, *other_tuple;
731 const struct flow_offload *flow = offload->flow;
732 struct dst_entry *other_dst = NULL;
733 struct nf_flow_rule *flow_rule;
736 flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
740 flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
741 if (!flow_rule->rule)
744 flow_rule->rule->match.dissector = &flow_rule->match.dissector;
745 flow_rule->rule->match.mask = &flow_rule->match.mask;
746 flow_rule->rule->match.key = &flow_rule->match.key;
748 tuple = &flow->tuplehash[dir].tuple;
749 other_tuple = &flow->tuplehash[!dir].tuple;
750 if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
751 other_dst = other_tuple->dst_cache;
753 err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
757 flow_rule->rule->action.num_entries = 0;
758 if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
764 kfree(flow_rule->rule);
771 static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
773 struct flow_action_entry *entry;
776 for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
777 entry = &flow_rule->rule->action.entries[i];
778 if (entry->id != FLOW_ACTION_REDIRECT)
783 kfree(flow_rule->rule);
787 static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
791 for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
792 __nf_flow_offload_destroy(flow_rule[i]);
795 static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
796 struct nf_flow_rule *flow_rule[])
798 struct net *net = read_pnet(&offload->flowtable->net);
800 flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
801 FLOW_OFFLOAD_DIR_ORIGINAL);
805 flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
806 FLOW_OFFLOAD_DIR_REPLY);
808 __nf_flow_offload_destroy(flow_rule[0]);
815 static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
816 __be16 proto, int priority,
817 enum flow_cls_command cmd,
818 const struct flow_offload_tuple *tuple,
819 struct netlink_ext_ack *extack)
821 cls_flow->common.protocol = proto;
822 cls_flow->common.prio = priority;
823 cls_flow->common.extack = extack;
824 cls_flow->command = cmd;
825 cls_flow->cookie = (unsigned long)tuple;
828 static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
829 struct flow_offload *flow,
830 struct nf_flow_rule *flow_rule,
831 enum flow_offload_tuple_dir dir,
832 int priority, int cmd,
833 struct flow_stats *stats,
834 struct list_head *block_cb_list)
836 struct flow_cls_offload cls_flow = {};
837 struct flow_block_cb *block_cb;
838 struct netlink_ext_ack extack;
839 __be16 proto = ETH_P_ALL;
842 nf_flow_offload_init(&cls_flow, proto, priority, cmd,
843 &flow->tuplehash[dir].tuple, &extack);
844 if (cmd == FLOW_CLS_REPLACE)
845 cls_flow.rule = flow_rule->rule;
847 down_read(&flowtable->flow_block_lock);
848 list_for_each_entry(block_cb, block_cb_list, list) {
849 err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
856 up_read(&flowtable->flow_block_lock);
858 if (cmd == FLOW_CLS_STATS)
859 memcpy(stats, &cls_flow.stats, sizeof(*stats));
864 static int flow_offload_tuple_add(struct flow_offload_work *offload,
865 struct nf_flow_rule *flow_rule,
866 enum flow_offload_tuple_dir dir)
868 return nf_flow_offload_tuple(offload->flowtable, offload->flow,
869 flow_rule, dir, offload->priority,
870 FLOW_CLS_REPLACE, NULL,
871 &offload->flowtable->flow_block.cb_list);
874 static void flow_offload_tuple_del(struct flow_offload_work *offload,
875 enum flow_offload_tuple_dir dir)
877 nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
878 offload->priority, FLOW_CLS_DESTROY, NULL,
879 &offload->flowtable->flow_block.cb_list);
882 static int flow_offload_rule_add(struct flow_offload_work *offload,
883 struct nf_flow_rule *flow_rule[])
887 ok_count += flow_offload_tuple_add(offload, flow_rule[0],
888 FLOW_OFFLOAD_DIR_ORIGINAL);
889 ok_count += flow_offload_tuple_add(offload, flow_rule[1],
890 FLOW_OFFLOAD_DIR_REPLY);
897 static void flow_offload_work_add(struct flow_offload_work *offload)
899 struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
902 err = nf_flow_offload_alloc(offload, flow_rule);
906 err = flow_offload_rule_add(offload, flow_rule);
910 set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
913 nf_flow_offload_destroy(flow_rule);
916 static void flow_offload_work_del(struct flow_offload_work *offload)
918 clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
919 flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
920 flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
921 set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
924 static void flow_offload_tuple_stats(struct flow_offload_work *offload,
925 enum flow_offload_tuple_dir dir,
926 struct flow_stats *stats)
928 nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
929 offload->priority, FLOW_CLS_STATS, stats,
930 &offload->flowtable->flow_block.cb_list);
933 static void flow_offload_work_stats(struct flow_offload_work *offload)
935 struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
938 flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
939 flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
941 lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
942 offload->flow->timeout = max_t(u64, offload->flow->timeout,
943 lastused + flow_offload_get_timeout(offload->flow));
945 if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
947 nf_ct_acct_add(offload->flow->ct,
948 FLOW_OFFLOAD_DIR_ORIGINAL,
949 stats[0].pkts, stats[0].bytes);
951 nf_ct_acct_add(offload->flow->ct,
952 FLOW_OFFLOAD_DIR_REPLY,
953 stats[1].pkts, stats[1].bytes);
957 static void flow_offload_work_handler(struct work_struct *work)
959 struct flow_offload_work *offload;
961 offload = container_of(work, struct flow_offload_work, work);
962 switch (offload->cmd) {
963 case FLOW_CLS_REPLACE:
964 flow_offload_work_add(offload);
966 case FLOW_CLS_DESTROY:
967 flow_offload_work_del(offload);
970 flow_offload_work_stats(offload);
976 clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
980 static void flow_offload_queue_work(struct flow_offload_work *offload)
982 if (offload->cmd == FLOW_CLS_REPLACE)
983 queue_work(nf_flow_offload_add_wq, &offload->work);
984 else if (offload->cmd == FLOW_CLS_DESTROY)
985 queue_work(nf_flow_offload_del_wq, &offload->work);
987 queue_work(nf_flow_offload_stats_wq, &offload->work);
990 static struct flow_offload_work *
991 nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
992 struct flow_offload *flow, unsigned int cmd)
994 struct flow_offload_work *offload;
996 if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
999 offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
1001 clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
1006 offload->flow = flow;
1007 offload->priority = flowtable->priority;
1008 offload->flowtable = flowtable;
1009 INIT_WORK(&offload->work, flow_offload_work_handler);
1015 void nf_flow_offload_add(struct nf_flowtable *flowtable,
1016 struct flow_offload *flow)
1018 struct flow_offload_work *offload;
1020 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
1024 flow_offload_queue_work(offload);
1027 void nf_flow_offload_del(struct nf_flowtable *flowtable,
1028 struct flow_offload *flow)
1030 struct flow_offload_work *offload;
1032 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
1036 set_bit(NF_FLOW_HW_DYING, &flow->flags);
1037 flow_offload_queue_work(offload);
1040 void nf_flow_offload_stats(struct nf_flowtable *flowtable,
1041 struct flow_offload *flow)
1043 struct flow_offload_work *offload;
1046 delta = nf_flow_timeout_delta(flow->timeout);
1047 if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
1050 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
1054 flow_offload_queue_work(offload);
1057 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
1059 if (nf_flowtable_hw_offload(flowtable)) {
1060 flush_workqueue(nf_flow_offload_add_wq);
1061 flush_workqueue(nf_flow_offload_del_wq);
1062 flush_workqueue(nf_flow_offload_stats_wq);
1066 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
1067 struct flow_block_offload *bo,
1068 enum flow_block_command cmd)
1070 struct flow_block_cb *block_cb, *next;
1074 case FLOW_BLOCK_BIND:
1075 list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
1077 case FLOW_BLOCK_UNBIND:
1078 list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1079 list_del(&block_cb->list);
1080 flow_block_cb_free(block_cb);
1091 static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
1093 enum flow_block_command cmd,
1094 struct nf_flowtable *flowtable,
1095 struct netlink_ext_ack *extack)
1097 memset(bo, 0, sizeof(*bo));
1099 bo->block = &flowtable->flow_block;
1101 bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
1102 bo->extack = extack;
1103 bo->cb_list_head = &flowtable->flow_block.cb_list;
1104 INIT_LIST_HEAD(&bo->cb_list);
1107 static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb)
1109 struct nf_flowtable *flowtable = block_cb->indr.data;
1110 struct net_device *dev = block_cb->indr.dev;
1112 nf_flow_table_gc_cleanup(flowtable, dev);
1113 down_write(&flowtable->flow_block_lock);
1114 list_del(&block_cb->list);
1115 list_del(&block_cb->driver_list);
1116 flow_block_cb_free(block_cb);
1117 up_write(&flowtable->flow_block_lock);
1120 static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
1121 struct nf_flowtable *flowtable,
1122 struct net_device *dev,
1123 enum flow_block_command cmd,
1124 struct netlink_ext_ack *extack)
1126 nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
1129 return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo,
1130 nf_flow_table_indr_cleanup);
1133 static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
1134 struct nf_flowtable *flowtable,
1135 struct net_device *dev,
1136 enum flow_block_command cmd,
1137 struct netlink_ext_ack *extack)
1141 nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
1143 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
1150 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
1151 struct net_device *dev,
1152 enum flow_block_command cmd)
1154 struct netlink_ext_ack extack = {};
1155 struct flow_block_offload bo;
1158 if (!nf_flowtable_hw_offload(flowtable))
1161 if (dev->netdev_ops->ndo_setup_tc)
1162 err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
1165 err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
1170 return nf_flow_table_block_setup(flowtable, &bo, cmd);
1172 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
1174 int nf_flow_table_offload_init(void)
1176 nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
1177 WQ_UNBOUND | WQ_SYSFS, 0);
1178 if (!nf_flow_offload_add_wq)
1181 nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
1182 WQ_UNBOUND | WQ_SYSFS, 0);
1183 if (!nf_flow_offload_del_wq)
1186 nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
1187 WQ_UNBOUND | WQ_SYSFS, 0);
1188 if (!nf_flow_offload_stats_wq)
1194 destroy_workqueue(nf_flow_offload_del_wq);
1196 destroy_workqueue(nf_flow_offload_add_wq);
1200 void nf_flow_table_offload_exit(void)
1202 destroy_workqueue(nf_flow_offload_add_wq);
1203 destroy_workqueue(nf_flow_offload_del_wq);
1204 destroy_workqueue(nf_flow_offload_stats_wq);