Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[linux-2.6-microblaze.git] / net / netfilter / nf_flow_table_offload.c
1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <linux/tc_act/tc_csum.h>
8 #include <net/flow_offload.h>
9 #include <net/netfilter/nf_flow_table.h>
10 #include <net/netfilter/nf_tables.h>
11 #include <net/netfilter/nf_conntrack.h>
12 #include <net/netfilter/nf_conntrack_acct.h>
13 #include <net/netfilter/nf_conntrack_core.h>
14 #include <net/netfilter/nf_conntrack_tuple.h>
15
16 static struct workqueue_struct *nf_flow_offload_add_wq;
17 static struct workqueue_struct *nf_flow_offload_del_wq;
18 static struct workqueue_struct *nf_flow_offload_stats_wq;
19
20 struct flow_offload_work {
21         struct list_head        list;
22         enum flow_cls_command   cmd;
23         int                     priority;
24         struct nf_flowtable     *flowtable;
25         struct flow_offload     *flow;
26         struct work_struct      work;
27 };
28
29 #define NF_FLOW_DISSECTOR(__match, __type, __field)     \
30         (__match)->dissector.offset[__type] =           \
31                 offsetof(struct nf_flow_key, __field)
32
33 static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
34                                    struct ip_tunnel_info *tun_info)
35 {
36         struct nf_flow_key *mask = &match->mask;
37         struct nf_flow_key *key = &match->key;
38         unsigned int enc_keys;
39
40         if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
41                 return;
42
43         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
44         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
45         key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
46         mask->enc_key_id.keyid = 0xffffffff;
47         enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
48                    BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
49
50         if (ip_tunnel_info_af(tun_info) == AF_INET) {
51                 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
52                                   enc_ipv4);
53                 key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
54                 key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
55                 if (key->enc_ipv4.src)
56                         mask->enc_ipv4.src = 0xffffffff;
57                 if (key->enc_ipv4.dst)
58                         mask->enc_ipv4.dst = 0xffffffff;
59                 enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
60                 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
61         } else {
62                 memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
63                        sizeof(struct in6_addr));
64                 memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
65                        sizeof(struct in6_addr));
66                 if (memcmp(&key->enc_ipv6.src, &in6addr_any,
67                            sizeof(struct in6_addr)))
68                         memset(&key->enc_ipv6.src, 0xff,
69                                sizeof(struct in6_addr));
70                 if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
71                            sizeof(struct in6_addr)))
72                         memset(&key->enc_ipv6.dst, 0xff,
73                                sizeof(struct in6_addr));
74                 enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
75                 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
76         }
77
78         match->dissector.used_keys |= enc_keys;
79 }
80
81 static int nf_flow_rule_match(struct nf_flow_match *match,
82                               const struct flow_offload_tuple *tuple,
83                               struct dst_entry *other_dst)
84 {
85         struct nf_flow_key *mask = &match->mask;
86         struct nf_flow_key *key = &match->key;
87         struct ip_tunnel_info *tun_info;
88
89         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
90         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
91         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
92         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
93         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
94         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
95         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
96
97         if (other_dst && other_dst->lwtstate) {
98                 tun_info = lwt_tun_info(other_dst->lwtstate);
99                 nf_flow_rule_lwt_match(match, tun_info);
100         }
101
102         key->meta.ingress_ifindex = tuple->iifidx;
103         mask->meta.ingress_ifindex = 0xffffffff;
104
105         switch (tuple->l3proto) {
106         case AF_INET:
107                 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
108                 key->basic.n_proto = htons(ETH_P_IP);
109                 key->ipv4.src = tuple->src_v4.s_addr;
110                 mask->ipv4.src = 0xffffffff;
111                 key->ipv4.dst = tuple->dst_v4.s_addr;
112                 mask->ipv4.dst = 0xffffffff;
113                 break;
114        case AF_INET6:
115                 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
116                 key->basic.n_proto = htons(ETH_P_IPV6);
117                 key->ipv6.src = tuple->src_v6;
118                 memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
119                 key->ipv6.dst = tuple->dst_v6;
120                 memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
121                 break;
122         default:
123                 return -EOPNOTSUPP;
124         }
125         mask->control.addr_type = 0xffff;
126         match->dissector.used_keys |= BIT(key->control.addr_type);
127         mask->basic.n_proto = 0xffff;
128
129         switch (tuple->l4proto) {
130         case IPPROTO_TCP:
131                 key->tcp.flags = 0;
132                 mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
133                 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
134                 break;
135         case IPPROTO_UDP:
136                 break;
137         default:
138                 return -EOPNOTSUPP;
139         }
140
141         key->basic.ip_proto = tuple->l4proto;
142         mask->basic.ip_proto = 0xff;
143
144         key->tp.src = tuple->src_port;
145         mask->tp.src = 0xffff;
146         key->tp.dst = tuple->dst_port;
147         mask->tp.dst = 0xffff;
148
149         match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
150                                       BIT(FLOW_DISSECTOR_KEY_CONTROL) |
151                                       BIT(FLOW_DISSECTOR_KEY_BASIC) |
152                                       BIT(FLOW_DISSECTOR_KEY_PORTS);
153         return 0;
154 }
155
156 static void flow_offload_mangle(struct flow_action_entry *entry,
157                                 enum flow_action_mangle_base htype, u32 offset,
158                                 const __be32 *value, const __be32 *mask)
159 {
160         entry->id = FLOW_ACTION_MANGLE;
161         entry->mangle.htype = htype;
162         entry->mangle.offset = offset;
163         memcpy(&entry->mangle.mask, mask, sizeof(u32));
164         memcpy(&entry->mangle.val, value, sizeof(u32));
165 }
166
167 static inline struct flow_action_entry *
168 flow_action_entry_next(struct nf_flow_rule *flow_rule)
169 {
170         int i = flow_rule->rule->action.num_entries++;
171
172         return &flow_rule->rule->action.entries[i];
173 }
174
175 static int flow_offload_eth_src(struct net *net,
176                                 const struct flow_offload *flow,
177                                 enum flow_offload_tuple_dir dir,
178                                 struct nf_flow_rule *flow_rule)
179 {
180         struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
181         struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
182         const struct flow_offload_tuple *other_tuple, *this_tuple;
183         struct net_device *dev = NULL;
184         const unsigned char *addr;
185         u32 mask, val;
186         u16 val16;
187
188         this_tuple = &flow->tuplehash[dir].tuple;
189
190         switch (this_tuple->xmit_type) {
191         case FLOW_OFFLOAD_XMIT_DIRECT:
192                 addr = this_tuple->out.h_source;
193                 break;
194         case FLOW_OFFLOAD_XMIT_NEIGH:
195                 other_tuple = &flow->tuplehash[!dir].tuple;
196                 dev = dev_get_by_index(net, other_tuple->iifidx);
197                 if (!dev)
198                         return -ENOENT;
199
200                 addr = dev->dev_addr;
201                 break;
202         default:
203                 return -EOPNOTSUPP;
204         }
205
206         mask = ~0xffff0000;
207         memcpy(&val16, addr, 2);
208         val = val16 << 16;
209         flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
210                             &val, &mask);
211
212         mask = ~0xffffffff;
213         memcpy(&val, addr + 2, 4);
214         flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
215                             &val, &mask);
216
217         if (dev)
218                 dev_put(dev);
219
220         return 0;
221 }
222
223 static int flow_offload_eth_dst(struct net *net,
224                                 const struct flow_offload *flow,
225                                 enum flow_offload_tuple_dir dir,
226                                 struct nf_flow_rule *flow_rule)
227 {
228         struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
229         struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
230         const struct flow_offload_tuple *other_tuple, *this_tuple;
231         const struct dst_entry *dst_cache;
232         unsigned char ha[ETH_ALEN];
233         struct neighbour *n;
234         const void *daddr;
235         u32 mask, val;
236         u8 nud_state;
237         u16 val16;
238
239         this_tuple = &flow->tuplehash[dir].tuple;
240
241         switch (this_tuple->xmit_type) {
242         case FLOW_OFFLOAD_XMIT_DIRECT:
243                 ether_addr_copy(ha, this_tuple->out.h_dest);
244                 break;
245         case FLOW_OFFLOAD_XMIT_NEIGH:
246                 other_tuple = &flow->tuplehash[!dir].tuple;
247                 daddr = &other_tuple->src_v4;
248                 dst_cache = this_tuple->dst_cache;
249                 n = dst_neigh_lookup(dst_cache, daddr);
250                 if (!n)
251                         return -ENOENT;
252
253                 read_lock_bh(&n->lock);
254                 nud_state = n->nud_state;
255                 ether_addr_copy(ha, n->ha);
256                 read_unlock_bh(&n->lock);
257                 neigh_release(n);
258
259                 if (!(nud_state & NUD_VALID))
260                         return -ENOENT;
261                 break;
262         default:
263                 return -EOPNOTSUPP;
264         }
265
266         mask = ~0xffffffff;
267         memcpy(&val, ha, 4);
268         flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
269                             &val, &mask);
270
271         mask = ~0x0000ffff;
272         memcpy(&val16, ha + 4, 2);
273         val = val16;
274         flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
275                             &val, &mask);
276
277         return 0;
278 }
279
280 static void flow_offload_ipv4_snat(struct net *net,
281                                    const struct flow_offload *flow,
282                                    enum flow_offload_tuple_dir dir,
283                                    struct nf_flow_rule *flow_rule)
284 {
285         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
286         u32 mask = ~htonl(0xffffffff);
287         __be32 addr;
288         u32 offset;
289
290         switch (dir) {
291         case FLOW_OFFLOAD_DIR_ORIGINAL:
292                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
293                 offset = offsetof(struct iphdr, saddr);
294                 break;
295         case FLOW_OFFLOAD_DIR_REPLY:
296                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
297                 offset = offsetof(struct iphdr, daddr);
298                 break;
299         default:
300                 return;
301         }
302
303         flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
304                             &addr, &mask);
305 }
306
307 static void flow_offload_ipv4_dnat(struct net *net,
308                                    const struct flow_offload *flow,
309                                    enum flow_offload_tuple_dir dir,
310                                    struct nf_flow_rule *flow_rule)
311 {
312         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
313         u32 mask = ~htonl(0xffffffff);
314         __be32 addr;
315         u32 offset;
316
317         switch (dir) {
318         case FLOW_OFFLOAD_DIR_ORIGINAL:
319                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
320                 offset = offsetof(struct iphdr, daddr);
321                 break;
322         case FLOW_OFFLOAD_DIR_REPLY:
323                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
324                 offset = offsetof(struct iphdr, saddr);
325                 break;
326         default:
327                 return;
328         }
329
330         flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
331                             &addr, &mask);
332 }
333
334 static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
335                                      unsigned int offset,
336                                      const __be32 *addr, const __be32 *mask)
337 {
338         struct flow_action_entry *entry;
339         int i, j;
340
341         for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
342                 entry = flow_action_entry_next(flow_rule);
343                 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
344                                     offset + i, &addr[j], mask);
345         }
346 }
347
348 static void flow_offload_ipv6_snat(struct net *net,
349                                    const struct flow_offload *flow,
350                                    enum flow_offload_tuple_dir dir,
351                                    struct nf_flow_rule *flow_rule)
352 {
353         u32 mask = ~htonl(0xffffffff);
354         const __be32 *addr;
355         u32 offset;
356
357         switch (dir) {
358         case FLOW_OFFLOAD_DIR_ORIGINAL:
359                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
360                 offset = offsetof(struct ipv6hdr, saddr);
361                 break;
362         case FLOW_OFFLOAD_DIR_REPLY:
363                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
364                 offset = offsetof(struct ipv6hdr, daddr);
365                 break;
366         default:
367                 return;
368         }
369
370         flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
371 }
372
373 static void flow_offload_ipv6_dnat(struct net *net,
374                                    const struct flow_offload *flow,
375                                    enum flow_offload_tuple_dir dir,
376                                    struct nf_flow_rule *flow_rule)
377 {
378         u32 mask = ~htonl(0xffffffff);
379         const __be32 *addr;
380         u32 offset;
381
382         switch (dir) {
383         case FLOW_OFFLOAD_DIR_ORIGINAL:
384                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
385                 offset = offsetof(struct ipv6hdr, daddr);
386                 break;
387         case FLOW_OFFLOAD_DIR_REPLY:
388                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
389                 offset = offsetof(struct ipv6hdr, saddr);
390                 break;
391         default:
392                 return;
393         }
394
395         flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
396 }
397
398 static int flow_offload_l4proto(const struct flow_offload *flow)
399 {
400         u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
401         u8 type = 0;
402
403         switch (protonum) {
404         case IPPROTO_TCP:
405                 type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
406                 break;
407         case IPPROTO_UDP:
408                 type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
409                 break;
410         default:
411                 break;
412         }
413
414         return type;
415 }
416
417 static void flow_offload_port_snat(struct net *net,
418                                    const struct flow_offload *flow,
419                                    enum flow_offload_tuple_dir dir,
420                                    struct nf_flow_rule *flow_rule)
421 {
422         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
423         u32 mask, port;
424         u32 offset;
425
426         switch (dir) {
427         case FLOW_OFFLOAD_DIR_ORIGINAL:
428                 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
429                 offset = 0; /* offsetof(struct tcphdr, source); */
430                 port = htonl(port << 16);
431                 mask = ~htonl(0xffff0000);
432                 break;
433         case FLOW_OFFLOAD_DIR_REPLY:
434                 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
435                 offset = 0; /* offsetof(struct tcphdr, dest); */
436                 port = htonl(port);
437                 mask = ~htonl(0xffff);
438                 break;
439         default:
440                 return;
441         }
442
443         flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
444                             &port, &mask);
445 }
446
447 static void flow_offload_port_dnat(struct net *net,
448                                    const struct flow_offload *flow,
449                                    enum flow_offload_tuple_dir dir,
450                                    struct nf_flow_rule *flow_rule)
451 {
452         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
453         u32 mask, port;
454         u32 offset;
455
456         switch (dir) {
457         case FLOW_OFFLOAD_DIR_ORIGINAL:
458                 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
459                 offset = 0; /* offsetof(struct tcphdr, dest); */
460                 port = htonl(port);
461                 mask = ~htonl(0xffff);
462                 break;
463         case FLOW_OFFLOAD_DIR_REPLY:
464                 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
465                 offset = 0; /* offsetof(struct tcphdr, source); */
466                 port = htonl(port << 16);
467                 mask = ~htonl(0xffff0000);
468                 break;
469         default:
470                 return;
471         }
472
473         flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
474                             &port, &mask);
475 }
476
477 static void flow_offload_ipv4_checksum(struct net *net,
478                                        const struct flow_offload *flow,
479                                        struct nf_flow_rule *flow_rule)
480 {
481         u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
482         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
483
484         entry->id = FLOW_ACTION_CSUM;
485         entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
486
487         switch (protonum) {
488         case IPPROTO_TCP:
489                 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
490                 break;
491         case IPPROTO_UDP:
492                 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
493                 break;
494         }
495 }
496
497 static void flow_offload_redirect(struct net *net,
498                                   const struct flow_offload *flow,
499                                   enum flow_offload_tuple_dir dir,
500                                   struct nf_flow_rule *flow_rule)
501 {
502         const struct flow_offload_tuple *this_tuple, *other_tuple;
503         struct flow_action_entry *entry;
504         struct net_device *dev;
505         int ifindex;
506
507         this_tuple = &flow->tuplehash[dir].tuple;
508         switch (this_tuple->xmit_type) {
509         case FLOW_OFFLOAD_XMIT_DIRECT:
510                 this_tuple = &flow->tuplehash[dir].tuple;
511                 ifindex = this_tuple->out.hw_ifidx;
512                 break;
513         case FLOW_OFFLOAD_XMIT_NEIGH:
514                 other_tuple = &flow->tuplehash[!dir].tuple;
515                 ifindex = other_tuple->iifidx;
516                 break;
517         default:
518                 return;
519         }
520
521         dev = dev_get_by_index(net, ifindex);
522         if (!dev)
523                 return;
524
525         entry = flow_action_entry_next(flow_rule);
526         entry->id = FLOW_ACTION_REDIRECT;
527         entry->dev = dev;
528 }
529
530 static void flow_offload_encap_tunnel(const struct flow_offload *flow,
531                                       enum flow_offload_tuple_dir dir,
532                                       struct nf_flow_rule *flow_rule)
533 {
534         const struct flow_offload_tuple *this_tuple;
535         struct flow_action_entry *entry;
536         struct dst_entry *dst;
537
538         this_tuple = &flow->tuplehash[dir].tuple;
539         if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
540                 return;
541
542         dst = this_tuple->dst_cache;
543         if (dst && dst->lwtstate) {
544                 struct ip_tunnel_info *tun_info;
545
546                 tun_info = lwt_tun_info(dst->lwtstate);
547                 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
548                         entry = flow_action_entry_next(flow_rule);
549                         entry->id = FLOW_ACTION_TUNNEL_ENCAP;
550                         entry->tunnel = tun_info;
551                 }
552         }
553 }
554
555 static void flow_offload_decap_tunnel(const struct flow_offload *flow,
556                                       enum flow_offload_tuple_dir dir,
557                                       struct nf_flow_rule *flow_rule)
558 {
559         const struct flow_offload_tuple *other_tuple;
560         struct flow_action_entry *entry;
561         struct dst_entry *dst;
562
563         other_tuple = &flow->tuplehash[!dir].tuple;
564         if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
565                 return;
566
567         dst = other_tuple->dst_cache;
568         if (dst && dst->lwtstate) {
569                 struct ip_tunnel_info *tun_info;
570
571                 tun_info = lwt_tun_info(dst->lwtstate);
572                 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
573                         entry = flow_action_entry_next(flow_rule);
574                         entry->id = FLOW_ACTION_TUNNEL_DECAP;
575                 }
576         }
577 }
578
579 static int
580 nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
581                           enum flow_offload_tuple_dir dir,
582                           struct nf_flow_rule *flow_rule)
583 {
584         const struct flow_offload_tuple *other_tuple;
585         int i;
586
587         flow_offload_decap_tunnel(flow, dir, flow_rule);
588         flow_offload_encap_tunnel(flow, dir, flow_rule);
589
590         if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
591             flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
592                 return -1;
593
594         other_tuple = &flow->tuplehash[!dir].tuple;
595
596         for (i = 0; i < other_tuple->encap_num; i++) {
597                 struct flow_action_entry *entry;
598
599                 if (other_tuple->in_vlan_ingress & BIT(i))
600                         continue;
601
602                 entry = flow_action_entry_next(flow_rule);
603
604                 switch (other_tuple->encap[i].proto) {
605                 case htons(ETH_P_PPP_SES):
606                         entry->id = FLOW_ACTION_PPPOE_PUSH;
607                         entry->pppoe.sid = other_tuple->encap[i].id;
608                         break;
609                 case htons(ETH_P_8021Q):
610                         entry->id = FLOW_ACTION_VLAN_PUSH;
611                         entry->vlan.vid = other_tuple->encap[i].id;
612                         entry->vlan.proto = other_tuple->encap[i].proto;
613                         break;
614                 }
615         }
616
617         return 0;
618 }
619
620 int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
621                             enum flow_offload_tuple_dir dir,
622                             struct nf_flow_rule *flow_rule)
623 {
624         if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
625                 return -1;
626
627         if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
628                 flow_offload_ipv4_snat(net, flow, dir, flow_rule);
629                 flow_offload_port_snat(net, flow, dir, flow_rule);
630         }
631         if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
632                 flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
633                 flow_offload_port_dnat(net, flow, dir, flow_rule);
634         }
635         if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
636             test_bit(NF_FLOW_DNAT, &flow->flags))
637                 flow_offload_ipv4_checksum(net, flow, flow_rule);
638
639         flow_offload_redirect(net, flow, dir, flow_rule);
640
641         return 0;
642 }
643 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
644
645 int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
646                             enum flow_offload_tuple_dir dir,
647                             struct nf_flow_rule *flow_rule)
648 {
649         if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
650                 return -1;
651
652         if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
653                 flow_offload_ipv6_snat(net, flow, dir, flow_rule);
654                 flow_offload_port_snat(net, flow, dir, flow_rule);
655         }
656         if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
657                 flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
658                 flow_offload_port_dnat(net, flow, dir, flow_rule);
659         }
660
661         flow_offload_redirect(net, flow, dir, flow_rule);
662
663         return 0;
664 }
665 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
666
667 #define NF_FLOW_RULE_ACTION_MAX 16
668
669 static struct nf_flow_rule *
670 nf_flow_offload_rule_alloc(struct net *net,
671                            const struct flow_offload_work *offload,
672                            enum flow_offload_tuple_dir dir)
673 {
674         const struct nf_flowtable *flowtable = offload->flowtable;
675         const struct flow_offload_tuple *tuple, *other_tuple;
676         const struct flow_offload *flow = offload->flow;
677         struct dst_entry *other_dst = NULL;
678         struct nf_flow_rule *flow_rule;
679         int err = -ENOMEM;
680
681         flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
682         if (!flow_rule)
683                 goto err_flow;
684
685         flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
686         if (!flow_rule->rule)
687                 goto err_flow_rule;
688
689         flow_rule->rule->match.dissector = &flow_rule->match.dissector;
690         flow_rule->rule->match.mask = &flow_rule->match.mask;
691         flow_rule->rule->match.key = &flow_rule->match.key;
692
693         tuple = &flow->tuplehash[dir].tuple;
694         other_tuple = &flow->tuplehash[!dir].tuple;
695         if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
696                 other_dst = other_tuple->dst_cache;
697
698         err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
699         if (err < 0)
700                 goto err_flow_match;
701
702         flow_rule->rule->action.num_entries = 0;
703         if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
704                 goto err_flow_match;
705
706         return flow_rule;
707
708 err_flow_match:
709         kfree(flow_rule->rule);
710 err_flow_rule:
711         kfree(flow_rule);
712 err_flow:
713         return NULL;
714 }
715
716 static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
717 {
718         struct flow_action_entry *entry;
719         int i;
720
721         for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
722                 entry = &flow_rule->rule->action.entries[i];
723                 if (entry->id != FLOW_ACTION_REDIRECT)
724                         continue;
725
726                 dev_put(entry->dev);
727         }
728         kfree(flow_rule->rule);
729         kfree(flow_rule);
730 }
731
732 static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
733 {
734         int i;
735
736         for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
737                 __nf_flow_offload_destroy(flow_rule[i]);
738 }
739
740 static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
741                                  struct nf_flow_rule *flow_rule[])
742 {
743         struct net *net = read_pnet(&offload->flowtable->net);
744
745         flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
746                                                   FLOW_OFFLOAD_DIR_ORIGINAL);
747         if (!flow_rule[0])
748                 return -ENOMEM;
749
750         flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
751                                                   FLOW_OFFLOAD_DIR_REPLY);
752         if (!flow_rule[1]) {
753                 __nf_flow_offload_destroy(flow_rule[0]);
754                 return -ENOMEM;
755         }
756
757         return 0;
758 }
759
760 static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
761                                  __be16 proto, int priority,
762                                  enum flow_cls_command cmd,
763                                  const struct flow_offload_tuple *tuple,
764                                  struct netlink_ext_ack *extack)
765 {
766         cls_flow->common.protocol = proto;
767         cls_flow->common.prio = priority;
768         cls_flow->common.extack = extack;
769         cls_flow->command = cmd;
770         cls_flow->cookie = (unsigned long)tuple;
771 }
772
773 static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
774                                  struct flow_offload *flow,
775                                  struct nf_flow_rule *flow_rule,
776                                  enum flow_offload_tuple_dir dir,
777                                  int priority, int cmd,
778                                  struct flow_stats *stats,
779                                  struct list_head *block_cb_list)
780 {
781         struct flow_cls_offload cls_flow = {};
782         struct flow_block_cb *block_cb;
783         struct netlink_ext_ack extack;
784         __be16 proto = ETH_P_ALL;
785         int err, i = 0;
786
787         nf_flow_offload_init(&cls_flow, proto, priority, cmd,
788                              &flow->tuplehash[dir].tuple, &extack);
789         if (cmd == FLOW_CLS_REPLACE)
790                 cls_flow.rule = flow_rule->rule;
791
792         down_read(&flowtable->flow_block_lock);
793         list_for_each_entry(block_cb, block_cb_list, list) {
794                 err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
795                                    block_cb->cb_priv);
796                 if (err < 0)
797                         continue;
798
799                 i++;
800         }
801         up_read(&flowtable->flow_block_lock);
802
803         if (cmd == FLOW_CLS_STATS)
804                 memcpy(stats, &cls_flow.stats, sizeof(*stats));
805
806         return i;
807 }
808
809 static int flow_offload_tuple_add(struct flow_offload_work *offload,
810                                   struct nf_flow_rule *flow_rule,
811                                   enum flow_offload_tuple_dir dir)
812 {
813         return nf_flow_offload_tuple(offload->flowtable, offload->flow,
814                                      flow_rule, dir, offload->priority,
815                                      FLOW_CLS_REPLACE, NULL,
816                                      &offload->flowtable->flow_block.cb_list);
817 }
818
819 static void flow_offload_tuple_del(struct flow_offload_work *offload,
820                                    enum flow_offload_tuple_dir dir)
821 {
822         nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
823                               offload->priority, FLOW_CLS_DESTROY, NULL,
824                               &offload->flowtable->flow_block.cb_list);
825 }
826
827 static int flow_offload_rule_add(struct flow_offload_work *offload,
828                                  struct nf_flow_rule *flow_rule[])
829 {
830         int ok_count = 0;
831
832         ok_count += flow_offload_tuple_add(offload, flow_rule[0],
833                                            FLOW_OFFLOAD_DIR_ORIGINAL);
834         ok_count += flow_offload_tuple_add(offload, flow_rule[1],
835                                            FLOW_OFFLOAD_DIR_REPLY);
836         if (ok_count == 0)
837                 return -ENOENT;
838
839         return 0;
840 }
841
842 static void flow_offload_work_add(struct flow_offload_work *offload)
843 {
844         struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
845         int err;
846
847         err = nf_flow_offload_alloc(offload, flow_rule);
848         if (err < 0)
849                 return;
850
851         err = flow_offload_rule_add(offload, flow_rule);
852         if (err < 0)
853                 set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags);
854         else
855                 set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
856
857         nf_flow_offload_destroy(flow_rule);
858 }
859
860 static void flow_offload_work_del(struct flow_offload_work *offload)
861 {
862         clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
863         flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
864         flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
865         set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
866 }
867
868 static void flow_offload_tuple_stats(struct flow_offload_work *offload,
869                                      enum flow_offload_tuple_dir dir,
870                                      struct flow_stats *stats)
871 {
872         nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
873                               offload->priority, FLOW_CLS_STATS, stats,
874                               &offload->flowtable->flow_block.cb_list);
875 }
876
877 static void flow_offload_work_stats(struct flow_offload_work *offload)
878 {
879         struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
880         u64 lastused;
881
882         flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
883         flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
884
885         lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
886         offload->flow->timeout = max_t(u64, offload->flow->timeout,
887                                        lastused + NF_FLOW_TIMEOUT);
888
889         if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
890                 if (stats[0].pkts)
891                         nf_ct_acct_add(offload->flow->ct,
892                                        FLOW_OFFLOAD_DIR_ORIGINAL,
893                                        stats[0].pkts, stats[0].bytes);
894                 if (stats[1].pkts)
895                         nf_ct_acct_add(offload->flow->ct,
896                                        FLOW_OFFLOAD_DIR_REPLY,
897                                        stats[1].pkts, stats[1].bytes);
898         }
899 }
900
901 static void flow_offload_work_handler(struct work_struct *work)
902 {
903         struct flow_offload_work *offload;
904
905         offload = container_of(work, struct flow_offload_work, work);
906         switch (offload->cmd) {
907                 case FLOW_CLS_REPLACE:
908                         flow_offload_work_add(offload);
909                         break;
910                 case FLOW_CLS_DESTROY:
911                         flow_offload_work_del(offload);
912                         break;
913                 case FLOW_CLS_STATS:
914                         flow_offload_work_stats(offload);
915                         break;
916                 default:
917                         WARN_ON_ONCE(1);
918         }
919
920         clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
921         kfree(offload);
922 }
923
924 static void flow_offload_queue_work(struct flow_offload_work *offload)
925 {
926         if (offload->cmd == FLOW_CLS_REPLACE)
927                 queue_work(nf_flow_offload_add_wq, &offload->work);
928         else if (offload->cmd == FLOW_CLS_DESTROY)
929                 queue_work(nf_flow_offload_del_wq, &offload->work);
930         else
931                 queue_work(nf_flow_offload_stats_wq, &offload->work);
932 }
933
934 static struct flow_offload_work *
935 nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
936                            struct flow_offload *flow, unsigned int cmd)
937 {
938         struct flow_offload_work *offload;
939
940         if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
941                 return NULL;
942
943         offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
944         if (!offload) {
945                 clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
946                 return NULL;
947         }
948
949         offload->cmd = cmd;
950         offload->flow = flow;
951         offload->priority = flowtable->priority;
952         offload->flowtable = flowtable;
953         INIT_WORK(&offload->work, flow_offload_work_handler);
954
955         return offload;
956 }
957
958
959 void nf_flow_offload_add(struct nf_flowtable *flowtable,
960                          struct flow_offload *flow)
961 {
962         struct flow_offload_work *offload;
963
964         offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
965         if (!offload)
966                 return;
967
968         flow_offload_queue_work(offload);
969 }
970
971 void nf_flow_offload_del(struct nf_flowtable *flowtable,
972                          struct flow_offload *flow)
973 {
974         struct flow_offload_work *offload;
975
976         offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
977         if (!offload)
978                 return;
979
980         set_bit(NF_FLOW_HW_DYING, &flow->flags);
981         flow_offload_queue_work(offload);
982 }
983
984 void nf_flow_offload_stats(struct nf_flowtable *flowtable,
985                            struct flow_offload *flow)
986 {
987         struct flow_offload_work *offload;
988         __s32 delta;
989
990         delta = nf_flow_timeout_delta(flow->timeout);
991         if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10))
992                 return;
993
994         offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
995         if (!offload)
996                 return;
997
998         flow_offload_queue_work(offload);
999 }
1000
1001 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
1002 {
1003         if (nf_flowtable_hw_offload(flowtable)) {
1004                 flush_workqueue(nf_flow_offload_add_wq);
1005                 flush_workqueue(nf_flow_offload_del_wq);
1006                 flush_workqueue(nf_flow_offload_stats_wq);
1007         }
1008 }
1009
1010 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
1011                                      struct flow_block_offload *bo,
1012                                      enum flow_block_command cmd)
1013 {
1014         struct flow_block_cb *block_cb, *next;
1015         int err = 0;
1016
1017         switch (cmd) {
1018         case FLOW_BLOCK_BIND:
1019                 list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
1020                 break;
1021         case FLOW_BLOCK_UNBIND:
1022                 list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1023                         list_del(&block_cb->list);
1024                         flow_block_cb_free(block_cb);
1025                 }
1026                 break;
1027         default:
1028                 WARN_ON_ONCE(1);
1029                 err = -EOPNOTSUPP;
1030         }
1031
1032         return err;
1033 }
1034
1035 static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
1036                                              struct net *net,
1037                                              enum flow_block_command cmd,
1038                                              struct nf_flowtable *flowtable,
1039                                              struct netlink_ext_ack *extack)
1040 {
1041         memset(bo, 0, sizeof(*bo));
1042         bo->net         = net;
1043         bo->block       = &flowtable->flow_block;
1044         bo->command     = cmd;
1045         bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
1046         bo->extack      = extack;
1047         INIT_LIST_HEAD(&bo->cb_list);
1048 }
1049
1050 static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb)
1051 {
1052         struct nf_flowtable *flowtable = block_cb->indr.data;
1053         struct net_device *dev = block_cb->indr.dev;
1054
1055         nf_flow_table_gc_cleanup(flowtable, dev);
1056         down_write(&flowtable->flow_block_lock);
1057         list_del(&block_cb->list);
1058         list_del(&block_cb->driver_list);
1059         flow_block_cb_free(block_cb);
1060         up_write(&flowtable->flow_block_lock);
1061 }
1062
1063 static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
1064                                           struct nf_flowtable *flowtable,
1065                                           struct net_device *dev,
1066                                           enum flow_block_command cmd,
1067                                           struct netlink_ext_ack *extack)
1068 {
1069         nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
1070                                          extack);
1071
1072         return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo,
1073                                            nf_flow_table_indr_cleanup);
1074 }
1075
1076 static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
1077                                      struct nf_flowtable *flowtable,
1078                                      struct net_device *dev,
1079                                      enum flow_block_command cmd,
1080                                      struct netlink_ext_ack *extack)
1081 {
1082         int err;
1083
1084         nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
1085                                          extack);
1086         err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
1087         if (err < 0)
1088                 return err;
1089
1090         return 0;
1091 }
1092
1093 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
1094                                 struct net_device *dev,
1095                                 enum flow_block_command cmd)
1096 {
1097         struct netlink_ext_ack extack = {};
1098         struct flow_block_offload bo;
1099         int err;
1100
1101         if (!nf_flowtable_hw_offload(flowtable))
1102                 return 0;
1103
1104         if (dev->netdev_ops->ndo_setup_tc)
1105                 err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
1106                                                 &extack);
1107         else
1108                 err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
1109                                                      &extack);
1110         if (err < 0)
1111                 return err;
1112
1113         return nf_flow_table_block_setup(flowtable, &bo, cmd);
1114 }
1115 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
1116
1117 int nf_flow_table_offload_init(void)
1118 {
1119         nf_flow_offload_add_wq  = alloc_workqueue("nf_ft_offload_add",
1120                                                   WQ_UNBOUND | WQ_SYSFS, 0);
1121         if (!nf_flow_offload_add_wq)
1122                 return -ENOMEM;
1123
1124         nf_flow_offload_del_wq  = alloc_workqueue("nf_ft_offload_del",
1125                                                   WQ_UNBOUND | WQ_SYSFS, 0);
1126         if (!nf_flow_offload_del_wq)
1127                 goto err_del_wq;
1128
1129         nf_flow_offload_stats_wq  = alloc_workqueue("nf_ft_offload_stats",
1130                                                     WQ_UNBOUND | WQ_SYSFS, 0);
1131         if (!nf_flow_offload_stats_wq)
1132                 goto err_stats_wq;
1133
1134         return 0;
1135
1136 err_stats_wq:
1137         destroy_workqueue(nf_flow_offload_del_wq);
1138 err_del_wq:
1139         destroy_workqueue(nf_flow_offload_add_wq);
1140         return -ENOMEM;
1141 }
1142
1143 void nf_flow_table_offload_exit(void)
1144 {
1145         destroy_workqueue(nf_flow_offload_add_wq);
1146         destroy_workqueue(nf_flow_offload_del_wq);
1147         destroy_workqueue(nf_flow_offload_stats_wq);
1148 }