Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
[linux-2.6-microblaze.git] / net / netfilter / nf_flow_table_core.c
1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <net/ip.h>
8 #include <net/ip6_route.h>
9 #include <net/netfilter/nf_tables.h>
10 #include <net/netfilter/nf_flow_table.h>
11 #include <net/netfilter/nf_conntrack.h>
12 #include <net/netfilter/nf_conntrack_core.h>
13 #include <net/netfilter/nf_conntrack_tuple.h>
14
15 struct flow_offload_entry {
16         struct flow_offload     flow;
17         struct nf_conn          *ct;
18         struct rcu_head         rcu_head;
19 };
20
21 static DEFINE_MUTEX(flowtable_lock);
22 static LIST_HEAD(flowtables);
23
24 static void
25 flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
26                       struct nf_flow_route *route,
27                       enum flow_offload_tuple_dir dir)
28 {
29         struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
30         struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
31         struct dst_entry *other_dst = route->tuple[!dir].dst;
32         struct dst_entry *dst = route->tuple[dir].dst;
33
34         ft->dir = dir;
35
36         switch (ctt->src.l3num) {
37         case NFPROTO_IPV4:
38                 ft->src_v4 = ctt->src.u3.in;
39                 ft->dst_v4 = ctt->dst.u3.in;
40                 ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
41                 break;
42         case NFPROTO_IPV6:
43                 ft->src_v6 = ctt->src.u3.in6;
44                 ft->dst_v6 = ctt->dst.u3.in6;
45                 ft->mtu = ip6_dst_mtu_forward(dst);
46                 break;
47         }
48
49         ft->l3proto = ctt->src.l3num;
50         ft->l4proto = ctt->dst.protonum;
51         ft->src_port = ctt->src.u.tcp.port;
52         ft->dst_port = ctt->dst.u.tcp.port;
53
54         ft->iifidx = other_dst->dev->ifindex;
55         ft->oifidx = dst->dev->ifindex;
56         ft->dst_cache = dst;
57 }
58
59 struct flow_offload *
60 flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
61 {
62         struct flow_offload_entry *entry;
63         struct flow_offload *flow;
64
65         if (unlikely(nf_ct_is_dying(ct) ||
66             !atomic_inc_not_zero(&ct->ct_general.use)))
67                 return NULL;
68
69         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
70         if (!entry)
71                 goto err_ct_refcnt;
72
73         flow = &entry->flow;
74
75         if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
76                 goto err_dst_cache_original;
77
78         if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
79                 goto err_dst_cache_reply;
80
81         entry->ct = ct;
82
83         flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
84         flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
85
86         if (ct->status & IPS_SRC_NAT)
87                 flow->flags |= FLOW_OFFLOAD_SNAT;
88         if (ct->status & IPS_DST_NAT)
89                 flow->flags |= FLOW_OFFLOAD_DNAT;
90
91         return flow;
92
93 err_dst_cache_reply:
94         dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
95 err_dst_cache_original:
96         kfree(entry);
97 err_ct_refcnt:
98         nf_ct_put(ct);
99
100         return NULL;
101 }
102 EXPORT_SYMBOL_GPL(flow_offload_alloc);
103
104 static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
105 {
106         tcp->state = TCP_CONNTRACK_ESTABLISHED;
107         tcp->seen[0].td_maxwin = 0;
108         tcp->seen[1].td_maxwin = 0;
109 }
110
111 #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
112 #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
113
114 static void flow_offload_fixup_ct_state(struct nf_conn *ct)
115 {
116         const struct nf_conntrack_l4proto *l4proto;
117         unsigned int timeout;
118         int l4num;
119
120         l4num = nf_ct_protonum(ct);
121         if (l4num == IPPROTO_TCP)
122                 flow_offload_fixup_tcp(&ct->proto.tcp);
123
124         l4proto = nf_ct_l4proto_find(l4num);
125         if (!l4proto)
126                 return;
127
128         if (l4num == IPPROTO_TCP)
129                 timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
130         else if (l4num == IPPROTO_UDP)
131                 timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
132         else
133                 return;
134
135         ct->timeout = nfct_time_stamp + timeout;
136 }
137
138 void flow_offload_free(struct flow_offload *flow)
139 {
140         struct flow_offload_entry *e;
141
142         dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
143         dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
144         e = container_of(flow, struct flow_offload_entry, flow);
145         if (flow->flags & FLOW_OFFLOAD_DYING)
146                 nf_ct_delete(e->ct, 0, 0);
147         nf_ct_put(e->ct);
148         kfree_rcu(e, rcu_head);
149 }
150 EXPORT_SYMBOL_GPL(flow_offload_free);
151
152 static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
153 {
154         const struct flow_offload_tuple *tuple = data;
155
156         return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
157 }
158
159 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
160 {
161         const struct flow_offload_tuple_rhash *tuplehash = data;
162
163         return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
164 }
165
166 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
167                                         const void *ptr)
168 {
169         const struct flow_offload_tuple *tuple = arg->key;
170         const struct flow_offload_tuple_rhash *x = ptr;
171
172         if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
173                 return 1;
174
175         return 0;
176 }
177
178 static const struct rhashtable_params nf_flow_offload_rhash_params = {
179         .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
180         .hashfn                 = flow_offload_hash,
181         .obj_hashfn             = flow_offload_hash_obj,
182         .obj_cmpfn              = flow_offload_hash_cmp,
183         .automatic_shrinking    = true,
184 };
185
186 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
187 {
188         int err;
189
190         err = rhashtable_insert_fast(&flow_table->rhashtable,
191                                      &flow->tuplehash[0].node,
192                                      nf_flow_offload_rhash_params);
193         if (err < 0)
194                 return err;
195
196         err = rhashtable_insert_fast(&flow_table->rhashtable,
197                                      &flow->tuplehash[1].node,
198                                      nf_flow_offload_rhash_params);
199         if (err < 0) {
200                 rhashtable_remove_fast(&flow_table->rhashtable,
201                                        &flow->tuplehash[0].node,
202                                        nf_flow_offload_rhash_params);
203                 return err;
204         }
205
206         flow->timeout = (u32)jiffies;
207         return 0;
208 }
209 EXPORT_SYMBOL_GPL(flow_offload_add);
210
211 static void flow_offload_del(struct nf_flowtable *flow_table,
212                              struct flow_offload *flow)
213 {
214         struct flow_offload_entry *e;
215
216         rhashtable_remove_fast(&flow_table->rhashtable,
217                                &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
218                                nf_flow_offload_rhash_params);
219         rhashtable_remove_fast(&flow_table->rhashtable,
220                                &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
221                                nf_flow_offload_rhash_params);
222
223         e = container_of(flow, struct flow_offload_entry, flow);
224         clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
225
226         flow_offload_free(flow);
227 }
228
229 void flow_offload_teardown(struct flow_offload *flow)
230 {
231         struct flow_offload_entry *e;
232
233         flow->flags |= FLOW_OFFLOAD_TEARDOWN;
234
235         e = container_of(flow, struct flow_offload_entry, flow);
236         flow_offload_fixup_ct_state(e->ct);
237 }
238 EXPORT_SYMBOL_GPL(flow_offload_teardown);
239
240 struct flow_offload_tuple_rhash *
241 flow_offload_lookup(struct nf_flowtable *flow_table,
242                     struct flow_offload_tuple *tuple)
243 {
244         struct flow_offload_tuple_rhash *tuplehash;
245         struct flow_offload *flow;
246         struct flow_offload_entry *e;
247         int dir;
248
249         tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
250                                       nf_flow_offload_rhash_params);
251         if (!tuplehash)
252                 return NULL;
253
254         dir = tuplehash->tuple.dir;
255         flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
256         if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
257                 return NULL;
258
259         e = container_of(flow, struct flow_offload_entry, flow);
260         if (unlikely(nf_ct_is_dying(e->ct)))
261                 return NULL;
262
263         return tuplehash;
264 }
265 EXPORT_SYMBOL_GPL(flow_offload_lookup);
266
267 static int
268 nf_flow_table_iterate(struct nf_flowtable *flow_table,
269                       void (*iter)(struct flow_offload *flow, void *data),
270                       void *data)
271 {
272         struct flow_offload_tuple_rhash *tuplehash;
273         struct rhashtable_iter hti;
274         struct flow_offload *flow;
275         int err = 0;
276
277         rhashtable_walk_enter(&flow_table->rhashtable, &hti);
278         rhashtable_walk_start(&hti);
279
280         while ((tuplehash = rhashtable_walk_next(&hti))) {
281                 if (IS_ERR(tuplehash)) {
282                         if (PTR_ERR(tuplehash) != -EAGAIN) {
283                                 err = PTR_ERR(tuplehash);
284                                 break;
285                         }
286                         continue;
287                 }
288                 if (tuplehash->tuple.dir)
289                         continue;
290
291                 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
292
293                 iter(flow, data);
294         }
295         rhashtable_walk_stop(&hti);
296         rhashtable_walk_exit(&hti);
297
298         return err;
299 }
300
301 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
302 {
303         return (__s32)(flow->timeout - (u32)jiffies) <= 0;
304 }
305
306 static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
307 {
308         struct nf_flowtable *flow_table = data;
309         struct flow_offload_entry *e;
310
311         e = container_of(flow, struct flow_offload_entry, flow);
312         if (nf_flow_has_expired(flow) || nf_ct_is_dying(e->ct) ||
313             (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)))
314                 flow_offload_del(flow_table, flow);
315 }
316
317 static void nf_flow_offload_work_gc(struct work_struct *work)
318 {
319         struct nf_flowtable *flow_table;
320
321         flow_table = container_of(work, struct nf_flowtable, gc_work.work);
322         nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
323         queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
324 }
325
326 static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
327                                 __be16 port, __be16 new_port)
328 {
329         struct tcphdr *tcph;
330
331         if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
332             skb_try_make_writable(skb, thoff + sizeof(*tcph)))
333                 return -1;
334
335         tcph = (void *)(skb_network_header(skb) + thoff);
336         inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
337
338         return 0;
339 }
340
341 static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
342                                 __be16 port, __be16 new_port)
343 {
344         struct udphdr *udph;
345
346         if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
347             skb_try_make_writable(skb, thoff + sizeof(*udph)))
348                 return -1;
349
350         udph = (void *)(skb_network_header(skb) + thoff);
351         if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
352                 inet_proto_csum_replace2(&udph->check, skb, port,
353                                          new_port, true);
354                 if (!udph->check)
355                         udph->check = CSUM_MANGLED_0;
356         }
357
358         return 0;
359 }
360
361 static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
362                             u8 protocol, __be16 port, __be16 new_port)
363 {
364         switch (protocol) {
365         case IPPROTO_TCP:
366                 if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
367                         return NF_DROP;
368                 break;
369         case IPPROTO_UDP:
370                 if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
371                         return NF_DROP;
372                 break;
373         }
374
375         return 0;
376 }
377
378 int nf_flow_snat_port(const struct flow_offload *flow,
379                       struct sk_buff *skb, unsigned int thoff,
380                       u8 protocol, enum flow_offload_tuple_dir dir)
381 {
382         struct flow_ports *hdr;
383         __be16 port, new_port;
384
385         if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
386             skb_try_make_writable(skb, thoff + sizeof(*hdr)))
387                 return -1;
388
389         hdr = (void *)(skb_network_header(skb) + thoff);
390
391         switch (dir) {
392         case FLOW_OFFLOAD_DIR_ORIGINAL:
393                 port = hdr->source;
394                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
395                 hdr->source = new_port;
396                 break;
397         case FLOW_OFFLOAD_DIR_REPLY:
398                 port = hdr->dest;
399                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
400                 hdr->dest = new_port;
401                 break;
402         default:
403                 return -1;
404         }
405
406         return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
407 }
408 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
409
410 int nf_flow_dnat_port(const struct flow_offload *flow,
411                       struct sk_buff *skb, unsigned int thoff,
412                       u8 protocol, enum flow_offload_tuple_dir dir)
413 {
414         struct flow_ports *hdr;
415         __be16 port, new_port;
416
417         if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
418             skb_try_make_writable(skb, thoff + sizeof(*hdr)))
419                 return -1;
420
421         hdr = (void *)(skb_network_header(skb) + thoff);
422
423         switch (dir) {
424         case FLOW_OFFLOAD_DIR_ORIGINAL:
425                 port = hdr->dest;
426                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
427                 hdr->dest = new_port;
428                 break;
429         case FLOW_OFFLOAD_DIR_REPLY:
430                 port = hdr->source;
431                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
432                 hdr->source = new_port;
433                 break;
434         default:
435                 return -1;
436         }
437
438         return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
439 }
440 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
441
442 int nf_flow_table_init(struct nf_flowtable *flowtable)
443 {
444         int err;
445
446         INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
447
448         err = rhashtable_init(&flowtable->rhashtable,
449                               &nf_flow_offload_rhash_params);
450         if (err < 0)
451                 return err;
452
453         queue_delayed_work(system_power_efficient_wq,
454                            &flowtable->gc_work, HZ);
455
456         mutex_lock(&flowtable_lock);
457         list_add(&flowtable->list, &flowtables);
458         mutex_unlock(&flowtable_lock);
459
460         return 0;
461 }
462 EXPORT_SYMBOL_GPL(nf_flow_table_init);
463
464 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
465 {
466         struct net_device *dev = data;
467         struct flow_offload_entry *e;
468
469         e = container_of(flow, struct flow_offload_entry, flow);
470
471         if (!dev) {
472                 flow_offload_teardown(flow);
473                 return;
474         }
475         if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
476             (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
477              flow->tuplehash[1].tuple.iifidx == dev->ifindex))
478                 flow_offload_dead(flow);
479 }
480
481 static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
482                                           struct net_device *dev)
483 {
484         nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
485         flush_delayed_work(&flowtable->gc_work);
486 }
487
488 void nf_flow_table_cleanup(struct net_device *dev)
489 {
490         struct nf_flowtable *flowtable;
491
492         mutex_lock(&flowtable_lock);
493         list_for_each_entry(flowtable, &flowtables, list)
494                 nf_flow_table_iterate_cleanup(flowtable, dev);
495         mutex_unlock(&flowtable_lock);
496 }
497 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
498
499 void nf_flow_table_free(struct nf_flowtable *flow_table)
500 {
501         mutex_lock(&flowtable_lock);
502         list_del(&flow_table->list);
503         mutex_unlock(&flowtable_lock);
504         cancel_delayed_work_sync(&flow_table->gc_work);
505         nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
506         nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
507         rhashtable_destroy(&flow_table->rhashtable);
508 }
509 EXPORT_SYMBOL_GPL(nf_flow_table_free);
510
511 MODULE_LICENSE("GPL");
512 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");