clk: mediatek: using CLK_MUX_ROUND_CLOSEST for the clock of dpi1_sel
[linux-2.6-microblaze.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/init.h>
34 #include <linux/in6.h>
35 #include <linux/inetdevice.h>
36 #include <linux/igmp.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/etherdevice.h>
39 #include <linux/if_ether.h>
40 #include <linux/if_vlan.h>
41 #include <linux/rculist.h>
42 #include <linux/err.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57 #include <net/udp.h>
58 #include <net/dst_metadata.h>
59
60 #if IS_ENABLED(CONFIG_IPV6)
61 #include <net/ipv6.h>
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
64 #endif
65
66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
67 {
68         return hash_32((__force u32)key ^ (__force u32)remote,
69                          IP_TNL_HASH_BITS);
70 }
71
72 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
73                                 __be16 flags, __be32 key)
74 {
75         if (p->i_flags & TUNNEL_KEY) {
76                 if (flags & TUNNEL_KEY)
77                         return key == p->i_key;
78                 else
79                         /* key expected, none present */
80                         return false;
81         } else
82                 return !(flags & TUNNEL_KEY);
83 }
84
85 /* Fallback tunnel: no source, no destination, no key, no options
86
87    Tunnel hash table:
88    We require exact key match i.e. if a key is present in packet
89    it will match only tunnel with the same key; if it is not present,
90    it will match only keyless tunnel.
91
92    All keysless packets, if not matched configured keyless tunnels
93    will match fallback tunnel.
94    Given src, dst and key, find appropriate for input tunnel.
95 */
96 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
97                                    int link, __be16 flags,
98                                    __be32 remote, __be32 local,
99                                    __be32 key)
100 {
101         unsigned int hash;
102         struct ip_tunnel *t, *cand = NULL;
103         struct hlist_head *head;
104
105         hash = ip_tunnel_hash(key, remote);
106         head = &itn->tunnels[hash];
107
108         hlist_for_each_entry_rcu(t, head, hash_node) {
109                 if (local != t->parms.iph.saddr ||
110                     remote != t->parms.iph.daddr ||
111                     !(t->dev->flags & IFF_UP))
112                         continue;
113
114                 if (!ip_tunnel_key_match(&t->parms, flags, key))
115                         continue;
116
117                 if (t->parms.link == link)
118                         return t;
119                 else
120                         cand = t;
121         }
122
123         hlist_for_each_entry_rcu(t, head, hash_node) {
124                 if (remote != t->parms.iph.daddr ||
125                     t->parms.iph.saddr != 0 ||
126                     !(t->dev->flags & IFF_UP))
127                         continue;
128
129                 if (!ip_tunnel_key_match(&t->parms, flags, key))
130                         continue;
131
132                 if (t->parms.link == link)
133                         return t;
134                 else if (!cand)
135                         cand = t;
136         }
137
138         hash = ip_tunnel_hash(key, 0);
139         head = &itn->tunnels[hash];
140
141         hlist_for_each_entry_rcu(t, head, hash_node) {
142                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
143                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
144                         continue;
145
146                 if (!(t->dev->flags & IFF_UP))
147                         continue;
148
149                 if (!ip_tunnel_key_match(&t->parms, flags, key))
150                         continue;
151
152                 if (t->parms.link == link)
153                         return t;
154                 else if (!cand)
155                         cand = t;
156         }
157
158         if (flags & TUNNEL_NO_KEY)
159                 goto skip_key_lookup;
160
161         hlist_for_each_entry_rcu(t, head, hash_node) {
162                 if (t->parms.i_key != key ||
163                     t->parms.iph.saddr != 0 ||
164                     t->parms.iph.daddr != 0 ||
165                     !(t->dev->flags & IFF_UP))
166                         continue;
167
168                 if (t->parms.link == link)
169                         return t;
170                 else if (!cand)
171                         cand = t;
172         }
173
174 skip_key_lookup:
175         if (cand)
176                 return cand;
177
178         t = rcu_dereference(itn->collect_md_tun);
179         if (t && t->dev->flags & IFF_UP)
180                 return t;
181
182         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
183                 return netdev_priv(itn->fb_tunnel_dev);
184
185         return NULL;
186 }
187 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
188
189 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
190                                     struct ip_tunnel_parm *parms)
191 {
192         unsigned int h;
193         __be32 remote;
194         __be32 i_key = parms->i_key;
195
196         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
197                 remote = parms->iph.daddr;
198         else
199                 remote = 0;
200
201         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
202                 i_key = 0;
203
204         h = ip_tunnel_hash(i_key, remote);
205         return &itn->tunnels[h];
206 }
207
208 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
209 {
210         struct hlist_head *head = ip_bucket(itn, &t->parms);
211
212         if (t->collect_md)
213                 rcu_assign_pointer(itn->collect_md_tun, t);
214         hlist_add_head_rcu(&t->hash_node, head);
215 }
216
217 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
218 {
219         if (t->collect_md)
220                 rcu_assign_pointer(itn->collect_md_tun, NULL);
221         hlist_del_init_rcu(&t->hash_node);
222 }
223
224 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
225                                         struct ip_tunnel_parm *parms,
226                                         int type)
227 {
228         __be32 remote = parms->iph.daddr;
229         __be32 local = parms->iph.saddr;
230         __be32 key = parms->i_key;
231         __be16 flags = parms->i_flags;
232         int link = parms->link;
233         struct ip_tunnel *t = NULL;
234         struct hlist_head *head = ip_bucket(itn, parms);
235
236         hlist_for_each_entry_rcu(t, head, hash_node) {
237                 if (local == t->parms.iph.saddr &&
238                     remote == t->parms.iph.daddr &&
239                     link == t->parms.link &&
240                     type == t->dev->type &&
241                     ip_tunnel_key_match(&t->parms, flags, key))
242                         break;
243         }
244         return t;
245 }
246
247 static struct net_device *__ip_tunnel_create(struct net *net,
248                                              const struct rtnl_link_ops *ops,
249                                              struct ip_tunnel_parm *parms)
250 {
251         int err;
252         struct ip_tunnel *tunnel;
253         struct net_device *dev;
254         char name[IFNAMSIZ];
255
256         err = -E2BIG;
257         if (parms->name[0]) {
258                 if (!dev_valid_name(parms->name))
259                         goto failed;
260                 strlcpy(name, parms->name, IFNAMSIZ);
261         } else {
262                 if (strlen(ops->kind) > (IFNAMSIZ - 3))
263                         goto failed;
264                 strcpy(name, ops->kind);
265                 strcat(name, "%d");
266         }
267
268         ASSERT_RTNL();
269         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
270         if (!dev) {
271                 err = -ENOMEM;
272                 goto failed;
273         }
274         dev_net_set(dev, net);
275
276         dev->rtnl_link_ops = ops;
277
278         tunnel = netdev_priv(dev);
279         tunnel->parms = *parms;
280         tunnel->net = net;
281
282         err = register_netdevice(dev);
283         if (err)
284                 goto failed_free;
285
286         return dev;
287
288 failed_free:
289         free_netdev(dev);
290 failed:
291         return ERR_PTR(err);
292 }
293
294 static int ip_tunnel_bind_dev(struct net_device *dev)
295 {
296         struct net_device *tdev = NULL;
297         struct ip_tunnel *tunnel = netdev_priv(dev);
298         const struct iphdr *iph;
299         int hlen = LL_MAX_HEADER;
300         int mtu = ETH_DATA_LEN;
301         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
302
303         iph = &tunnel->parms.iph;
304
305         /* Guess output device to choose reasonable mtu and needed_headroom */
306         if (iph->daddr) {
307                 struct flowi4 fl4;
308                 struct rtable *rt;
309
310                 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
311                                     iph->saddr, tunnel->parms.o_key,
312                                     RT_TOS(iph->tos), tunnel->parms.link,
313                                     tunnel->fwmark);
314                 rt = ip_route_output_key(tunnel->net, &fl4);
315
316                 if (!IS_ERR(rt)) {
317                         tdev = rt->dst.dev;
318                         ip_rt_put(rt);
319                 }
320                 if (dev->type != ARPHRD_ETHER)
321                         dev->flags |= IFF_POINTOPOINT;
322
323                 dst_cache_reset(&tunnel->dst_cache);
324         }
325
326         if (!tdev && tunnel->parms.link)
327                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
328
329         if (tdev) {
330                 hlen = tdev->hard_header_len + tdev->needed_headroom;
331                 mtu = min(tdev->mtu, IP_MAX_MTU);
332         }
333
334         dev->needed_headroom = t_hlen + hlen;
335         mtu -= (dev->hard_header_len + t_hlen);
336
337         if (mtu < IPV4_MIN_MTU)
338                 mtu = IPV4_MIN_MTU;
339
340         return mtu;
341 }
342
343 static struct ip_tunnel *ip_tunnel_create(struct net *net,
344                                           struct ip_tunnel_net *itn,
345                                           struct ip_tunnel_parm *parms)
346 {
347         struct ip_tunnel *nt;
348         struct net_device *dev;
349         int t_hlen;
350         int mtu;
351         int err;
352
353         dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
354         if (IS_ERR(dev))
355                 return ERR_CAST(dev);
356
357         mtu = ip_tunnel_bind_dev(dev);
358         err = dev_set_mtu(dev, mtu);
359         if (err)
360                 goto err_dev_set_mtu;
361
362         nt = netdev_priv(dev);
363         t_hlen = nt->hlen + sizeof(struct iphdr);
364         dev->min_mtu = ETH_MIN_MTU;
365         dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
366         ip_tunnel_add(itn, nt);
367         return nt;
368
369 err_dev_set_mtu:
370         unregister_netdevice(dev);
371         return ERR_PTR(err);
372 }
373
374 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
375                   const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
376                   bool log_ecn_error)
377 {
378         struct pcpu_sw_netstats *tstats;
379         const struct iphdr *iph = ip_hdr(skb);
380         int err;
381
382 #ifdef CONFIG_NET_IPGRE_BROADCAST
383         if (ipv4_is_multicast(iph->daddr)) {
384                 tunnel->dev->stats.multicast++;
385                 skb->pkt_type = PACKET_BROADCAST;
386         }
387 #endif
388
389         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
390              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
391                 tunnel->dev->stats.rx_crc_errors++;
392                 tunnel->dev->stats.rx_errors++;
393                 goto drop;
394         }
395
396         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
397                 if (!(tpi->flags&TUNNEL_SEQ) ||
398                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
399                         tunnel->dev->stats.rx_fifo_errors++;
400                         tunnel->dev->stats.rx_errors++;
401                         goto drop;
402                 }
403                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
404         }
405
406         skb_reset_network_header(skb);
407
408         err = IP_ECN_decapsulate(iph, skb);
409         if (unlikely(err)) {
410                 if (log_ecn_error)
411                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
412                                         &iph->saddr, iph->tos);
413                 if (err > 1) {
414                         ++tunnel->dev->stats.rx_frame_errors;
415                         ++tunnel->dev->stats.rx_errors;
416                         goto drop;
417                 }
418         }
419
420         tstats = this_cpu_ptr(tunnel->dev->tstats);
421         u64_stats_update_begin(&tstats->syncp);
422         tstats->rx_packets++;
423         tstats->rx_bytes += skb->len;
424         u64_stats_update_end(&tstats->syncp);
425
426         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
427
428         if (tunnel->dev->type == ARPHRD_ETHER) {
429                 skb->protocol = eth_type_trans(skb, tunnel->dev);
430                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
431         } else {
432                 skb->dev = tunnel->dev;
433         }
434
435         if (tun_dst)
436                 skb_dst_set(skb, (struct dst_entry *)tun_dst);
437
438         gro_cells_receive(&tunnel->gro_cells, skb);
439         return 0;
440
441 drop:
442         if (tun_dst)
443                 dst_release((struct dst_entry *)tun_dst);
444         kfree_skb(skb);
445         return 0;
446 }
447 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
448
449 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
450                             unsigned int num)
451 {
452         if (num >= MAX_IPTUN_ENCAP_OPS)
453                 return -ERANGE;
454
455         return !cmpxchg((const struct ip_tunnel_encap_ops **)
456                         &iptun_encaps[num],
457                         NULL, ops) ? 0 : -1;
458 }
459 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
460
461 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
462                             unsigned int num)
463 {
464         int ret;
465
466         if (num >= MAX_IPTUN_ENCAP_OPS)
467                 return -ERANGE;
468
469         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
470                        &iptun_encaps[num],
471                        ops, NULL) == ops) ? 0 : -1;
472
473         synchronize_net();
474
475         return ret;
476 }
477 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
478
479 int ip_tunnel_encap_setup(struct ip_tunnel *t,
480                           struct ip_tunnel_encap *ipencap)
481 {
482         int hlen;
483
484         memset(&t->encap, 0, sizeof(t->encap));
485
486         hlen = ip_encap_hlen(ipencap);
487         if (hlen < 0)
488                 return hlen;
489
490         t->encap.type = ipencap->type;
491         t->encap.sport = ipencap->sport;
492         t->encap.dport = ipencap->dport;
493         t->encap.flags = ipencap->flags;
494
495         t->encap_hlen = hlen;
496         t->hlen = t->encap_hlen + t->tun_hlen;
497
498         return 0;
499 }
500 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
501
502 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
503                             struct rtable *rt, __be16 df,
504                             const struct iphdr *inner_iph)
505 {
506         struct ip_tunnel *tunnel = netdev_priv(dev);
507         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
508         int mtu;
509
510         if (df)
511                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
512                                         - sizeof(struct iphdr) - tunnel->hlen;
513         else
514                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
515
516         skb_dst_update_pmtu(skb, mtu);
517
518         if (skb->protocol == htons(ETH_P_IP)) {
519                 if (!skb_is_gso(skb) &&
520                     (inner_iph->frag_off & htons(IP_DF)) &&
521                     mtu < pkt_size) {
522                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
523                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
524                         return -E2BIG;
525                 }
526         }
527 #if IS_ENABLED(CONFIG_IPV6)
528         else if (skb->protocol == htons(ETH_P_IPV6)) {
529                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
530
531                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
532                            mtu >= IPV6_MIN_MTU) {
533                         if ((tunnel->parms.iph.daddr &&
534                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
535                             rt6->rt6i_dst.plen == 128) {
536                                 rt6->rt6i_flags |= RTF_MODIFIED;
537                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
538                         }
539                 }
540
541                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
542                                         mtu < pkt_size) {
543                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
544                         return -E2BIG;
545                 }
546         }
547 #endif
548         return 0;
549 }
550
551 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
552 {
553         struct ip_tunnel *tunnel = netdev_priv(dev);
554         u32 headroom = sizeof(struct iphdr);
555         struct ip_tunnel_info *tun_info;
556         const struct ip_tunnel_key *key;
557         const struct iphdr *inner_iph;
558         struct rtable *rt;
559         struct flowi4 fl4;
560         __be16 df = 0;
561         u8 tos, ttl;
562
563         tun_info = skb_tunnel_info(skb);
564         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
565                      ip_tunnel_info_af(tun_info) != AF_INET))
566                 goto tx_error;
567         key = &tun_info->key;
568         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
569         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
570         tos = key->tos;
571         if (tos == 1) {
572                 if (skb->protocol == htons(ETH_P_IP))
573                         tos = inner_iph->tos;
574                 else if (skb->protocol == htons(ETH_P_IPV6))
575                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
576         }
577         ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
578                             RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
579         if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
580                 goto tx_error;
581         rt = ip_route_output_key(tunnel->net, &fl4);
582         if (IS_ERR(rt)) {
583                 dev->stats.tx_carrier_errors++;
584                 goto tx_error;
585         }
586         if (rt->dst.dev == dev) {
587                 ip_rt_put(rt);
588                 dev->stats.collisions++;
589                 goto tx_error;
590         }
591         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
592         ttl = key->ttl;
593         if (ttl == 0) {
594                 if (skb->protocol == htons(ETH_P_IP))
595                         ttl = inner_iph->ttl;
596                 else if (skb->protocol == htons(ETH_P_IPV6))
597                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
598                 else
599                         ttl = ip4_dst_hoplimit(&rt->dst);
600         }
601         if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
602                 df = htons(IP_DF);
603         else if (skb->protocol == htons(ETH_P_IP))
604                 df = inner_iph->frag_off & htons(IP_DF);
605         headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
606         if (headroom > dev->needed_headroom)
607                 dev->needed_headroom = headroom;
608
609         if (skb_cow_head(skb, dev->needed_headroom)) {
610                 ip_rt_put(rt);
611                 goto tx_dropped;
612         }
613         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
614                       df, !net_eq(tunnel->net, dev_net(dev)));
615         return;
616 tx_error:
617         dev->stats.tx_errors++;
618         goto kfree;
619 tx_dropped:
620         dev->stats.tx_dropped++;
621 kfree:
622         kfree_skb(skb);
623 }
624 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
625
626 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
627                     const struct iphdr *tnl_params, u8 protocol)
628 {
629         struct ip_tunnel *tunnel = netdev_priv(dev);
630         const struct iphdr *inner_iph;
631         struct flowi4 fl4;
632         u8     tos, ttl;
633         __be16 df;
634         struct rtable *rt;              /* Route to the other host */
635         unsigned int max_headroom;      /* The extra header space needed */
636         __be32 dst;
637         bool connected;
638
639         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
640         connected = (tunnel->parms.iph.daddr != 0);
641
642         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
643
644         dst = tnl_params->daddr;
645         if (dst == 0) {
646                 /* NBMA tunnel */
647
648                 if (!skb_dst(skb)) {
649                         dev->stats.tx_fifo_errors++;
650                         goto tx_error;
651                 }
652
653                 if (skb->protocol == htons(ETH_P_IP)) {
654                         rt = skb_rtable(skb);
655                         dst = rt_nexthop(rt, inner_iph->daddr);
656                 }
657 #if IS_ENABLED(CONFIG_IPV6)
658                 else if (skb->protocol == htons(ETH_P_IPV6)) {
659                         const struct in6_addr *addr6;
660                         struct neighbour *neigh;
661                         bool do_tx_error_icmp;
662                         int addr_type;
663
664                         neigh = dst_neigh_lookup(skb_dst(skb),
665                                                  &ipv6_hdr(skb)->daddr);
666                         if (!neigh)
667                                 goto tx_error;
668
669                         addr6 = (const struct in6_addr *)&neigh->primary_key;
670                         addr_type = ipv6_addr_type(addr6);
671
672                         if (addr_type == IPV6_ADDR_ANY) {
673                                 addr6 = &ipv6_hdr(skb)->daddr;
674                                 addr_type = ipv6_addr_type(addr6);
675                         }
676
677                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
678                                 do_tx_error_icmp = true;
679                         else {
680                                 do_tx_error_icmp = false;
681                                 dst = addr6->s6_addr32[3];
682                         }
683                         neigh_release(neigh);
684                         if (do_tx_error_icmp)
685                                 goto tx_error_icmp;
686                 }
687 #endif
688                 else
689                         goto tx_error;
690
691                 connected = false;
692         }
693
694         tos = tnl_params->tos;
695         if (tos & 0x1) {
696                 tos &= ~0x1;
697                 if (skb->protocol == htons(ETH_P_IP)) {
698                         tos = inner_iph->tos;
699                         connected = false;
700                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
701                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
702                         connected = false;
703                 }
704         }
705
706         ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
707                             tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
708                             tunnel->fwmark);
709
710         if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
711                 goto tx_error;
712
713         rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
714                          NULL;
715
716         if (!rt) {
717                 rt = ip_route_output_key(tunnel->net, &fl4);
718
719                 if (IS_ERR(rt)) {
720                         dev->stats.tx_carrier_errors++;
721                         goto tx_error;
722                 }
723                 if (connected)
724                         dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
725                                           fl4.saddr);
726         }
727
728         if (rt->dst.dev == dev) {
729                 ip_rt_put(rt);
730                 dev->stats.collisions++;
731                 goto tx_error;
732         }
733
734         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
735                 ip_rt_put(rt);
736                 goto tx_error;
737         }
738
739         if (tunnel->err_count > 0) {
740                 if (time_before(jiffies,
741                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
742                         tunnel->err_count--;
743
744                         dst_link_failure(skb);
745                 } else
746                         tunnel->err_count = 0;
747         }
748
749         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
750         ttl = tnl_params->ttl;
751         if (ttl == 0) {
752                 if (skb->protocol == htons(ETH_P_IP))
753                         ttl = inner_iph->ttl;
754 #if IS_ENABLED(CONFIG_IPV6)
755                 else if (skb->protocol == htons(ETH_P_IPV6))
756                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
757 #endif
758                 else
759                         ttl = ip4_dst_hoplimit(&rt->dst);
760         }
761
762         df = tnl_params->frag_off;
763         if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
764                 df |= (inner_iph->frag_off&htons(IP_DF));
765
766         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
767                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
768         if (max_headroom > dev->needed_headroom)
769                 dev->needed_headroom = max_headroom;
770
771         if (skb_cow_head(skb, dev->needed_headroom)) {
772                 ip_rt_put(rt);
773                 dev->stats.tx_dropped++;
774                 kfree_skb(skb);
775                 return;
776         }
777
778         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
779                       df, !net_eq(tunnel->net, dev_net(dev)));
780         return;
781
782 #if IS_ENABLED(CONFIG_IPV6)
783 tx_error_icmp:
784         dst_link_failure(skb);
785 #endif
786 tx_error:
787         dev->stats.tx_errors++;
788         kfree_skb(skb);
789 }
790 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
791
792 static void ip_tunnel_update(struct ip_tunnel_net *itn,
793                              struct ip_tunnel *t,
794                              struct net_device *dev,
795                              struct ip_tunnel_parm *p,
796                              bool set_mtu,
797                              __u32 fwmark)
798 {
799         ip_tunnel_del(itn, t);
800         t->parms.iph.saddr = p->iph.saddr;
801         t->parms.iph.daddr = p->iph.daddr;
802         t->parms.i_key = p->i_key;
803         t->parms.o_key = p->o_key;
804         if (dev->type != ARPHRD_ETHER) {
805                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
806                 memcpy(dev->broadcast, &p->iph.daddr, 4);
807         }
808         ip_tunnel_add(itn, t);
809
810         t->parms.iph.ttl = p->iph.ttl;
811         t->parms.iph.tos = p->iph.tos;
812         t->parms.iph.frag_off = p->iph.frag_off;
813
814         if (t->parms.link != p->link || t->fwmark != fwmark) {
815                 int mtu;
816
817                 t->parms.link = p->link;
818                 t->fwmark = fwmark;
819                 mtu = ip_tunnel_bind_dev(dev);
820                 if (set_mtu)
821                         dev->mtu = mtu;
822         }
823         dst_cache_reset(&t->dst_cache);
824         netdev_state_change(dev);
825 }
826
827 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
828 {
829         int err = 0;
830         struct ip_tunnel *t = netdev_priv(dev);
831         struct net *net = t->net;
832         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
833
834         switch (cmd) {
835         case SIOCGETTUNNEL:
836                 if (dev == itn->fb_tunnel_dev) {
837                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
838                         if (!t)
839                                 t = netdev_priv(dev);
840                 }
841                 memcpy(p, &t->parms, sizeof(*p));
842                 break;
843
844         case SIOCADDTUNNEL:
845         case SIOCCHGTUNNEL:
846                 err = -EPERM;
847                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
848                         goto done;
849                 if (p->iph.ttl)
850                         p->iph.frag_off |= htons(IP_DF);
851                 if (!(p->i_flags & VTI_ISVTI)) {
852                         if (!(p->i_flags & TUNNEL_KEY))
853                                 p->i_key = 0;
854                         if (!(p->o_flags & TUNNEL_KEY))
855                                 p->o_key = 0;
856                 }
857
858                 t = ip_tunnel_find(itn, p, itn->type);
859
860                 if (cmd == SIOCADDTUNNEL) {
861                         if (!t) {
862                                 t = ip_tunnel_create(net, itn, p);
863                                 err = PTR_ERR_OR_ZERO(t);
864                                 break;
865                         }
866
867                         err = -EEXIST;
868                         break;
869                 }
870                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
871                         if (t) {
872                                 if (t->dev != dev) {
873                                         err = -EEXIST;
874                                         break;
875                                 }
876                         } else {
877                                 unsigned int nflags = 0;
878
879                                 if (ipv4_is_multicast(p->iph.daddr))
880                                         nflags = IFF_BROADCAST;
881                                 else if (p->iph.daddr)
882                                         nflags = IFF_POINTOPOINT;
883
884                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
885                                         err = -EINVAL;
886                                         break;
887                                 }
888
889                                 t = netdev_priv(dev);
890                         }
891                 }
892
893                 if (t) {
894                         err = 0;
895                         ip_tunnel_update(itn, t, dev, p, true, 0);
896                 } else {
897                         err = -ENOENT;
898                 }
899                 break;
900
901         case SIOCDELTUNNEL:
902                 err = -EPERM;
903                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
904                         goto done;
905
906                 if (dev == itn->fb_tunnel_dev) {
907                         err = -ENOENT;
908                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
909                         if (!t)
910                                 goto done;
911                         err = -EPERM;
912                         if (t == netdev_priv(itn->fb_tunnel_dev))
913                                 goto done;
914                         dev = t->dev;
915                 }
916                 unregister_netdevice(dev);
917                 err = 0;
918                 break;
919
920         default:
921                 err = -EINVAL;
922         }
923
924 done:
925         return err;
926 }
927 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
928
929 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
930 {
931         struct ip_tunnel *tunnel = netdev_priv(dev);
932         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
933         int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
934
935         if (new_mtu < ETH_MIN_MTU)
936                 return -EINVAL;
937
938         if (new_mtu > max_mtu) {
939                 if (strict)
940                         return -EINVAL;
941
942                 new_mtu = max_mtu;
943         }
944
945         dev->mtu = new_mtu;
946         return 0;
947 }
948 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
949
950 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
951 {
952         return __ip_tunnel_change_mtu(dev, new_mtu, true);
953 }
954 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
955
956 static void ip_tunnel_dev_free(struct net_device *dev)
957 {
958         struct ip_tunnel *tunnel = netdev_priv(dev);
959
960         gro_cells_destroy(&tunnel->gro_cells);
961         dst_cache_destroy(&tunnel->dst_cache);
962         free_percpu(dev->tstats);
963 }
964
965 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
966 {
967         struct ip_tunnel *tunnel = netdev_priv(dev);
968         struct ip_tunnel_net *itn;
969
970         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
971
972         if (itn->fb_tunnel_dev != dev) {
973                 ip_tunnel_del(itn, netdev_priv(dev));
974                 unregister_netdevice_queue(dev, head);
975         }
976 }
977 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
978
979 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
980 {
981         struct ip_tunnel *tunnel = netdev_priv(dev);
982
983         return tunnel->net;
984 }
985 EXPORT_SYMBOL(ip_tunnel_get_link_net);
986
987 int ip_tunnel_get_iflink(const struct net_device *dev)
988 {
989         struct ip_tunnel *tunnel = netdev_priv(dev);
990
991         return tunnel->parms.link;
992 }
993 EXPORT_SYMBOL(ip_tunnel_get_iflink);
994
995 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
996                                   struct rtnl_link_ops *ops, char *devname)
997 {
998         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
999         struct ip_tunnel_parm parms;
1000         unsigned int i;
1001
1002         itn->rtnl_link_ops = ops;
1003         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1004                 INIT_HLIST_HEAD(&itn->tunnels[i]);
1005
1006         if (!ops || !net_has_fallback_tunnels(net)) {
1007                 struct ip_tunnel_net *it_init_net;
1008
1009                 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1010                 itn->type = it_init_net->type;
1011                 itn->fb_tunnel_dev = NULL;
1012                 return 0;
1013         }
1014
1015         memset(&parms, 0, sizeof(parms));
1016         if (devname)
1017                 strlcpy(parms.name, devname, IFNAMSIZ);
1018
1019         rtnl_lock();
1020         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1021         /* FB netdevice is special: we have one, and only one per netns.
1022          * Allowing to move it to another netns is clearly unsafe.
1023          */
1024         if (!IS_ERR(itn->fb_tunnel_dev)) {
1025                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1026                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1027                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1028                 itn->type = itn->fb_tunnel_dev->type;
1029         }
1030         rtnl_unlock();
1031
1032         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1033 }
1034 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1035
1036 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1037                               struct list_head *head,
1038                               struct rtnl_link_ops *ops)
1039 {
1040         struct net_device *dev, *aux;
1041         int h;
1042
1043         for_each_netdev_safe(net, dev, aux)
1044                 if (dev->rtnl_link_ops == ops)
1045                         unregister_netdevice_queue(dev, head);
1046
1047         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1048                 struct ip_tunnel *t;
1049                 struct hlist_node *n;
1050                 struct hlist_head *thead = &itn->tunnels[h];
1051
1052                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1053                         /* If dev is in the same netns, it has already
1054                          * been added to the list by the previous loop.
1055                          */
1056                         if (!net_eq(dev_net(t->dev), net))
1057                                 unregister_netdevice_queue(t->dev, head);
1058         }
1059 }
1060
1061 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1062                            struct rtnl_link_ops *ops)
1063 {
1064         struct ip_tunnel_net *itn;
1065         struct net *net;
1066         LIST_HEAD(list);
1067
1068         rtnl_lock();
1069         list_for_each_entry(net, net_list, exit_list) {
1070                 itn = net_generic(net, id);
1071                 ip_tunnel_destroy(net, itn, &list, ops);
1072         }
1073         unregister_netdevice_many(&list);
1074         rtnl_unlock();
1075 }
1076 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1077
1078 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1079                       struct ip_tunnel_parm *p, __u32 fwmark)
1080 {
1081         struct ip_tunnel *nt;
1082         struct net *net = dev_net(dev);
1083         struct ip_tunnel_net *itn;
1084         int mtu;
1085         int err;
1086
1087         nt = netdev_priv(dev);
1088         itn = net_generic(net, nt->ip_tnl_net_id);
1089
1090         if (nt->collect_md) {
1091                 if (rtnl_dereference(itn->collect_md_tun))
1092                         return -EEXIST;
1093         } else {
1094                 if (ip_tunnel_find(itn, p, dev->type))
1095                         return -EEXIST;
1096         }
1097
1098         nt->net = net;
1099         nt->parms = *p;
1100         nt->fwmark = fwmark;
1101         err = register_netdevice(dev);
1102         if (err)
1103                 goto err_register_netdevice;
1104
1105         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1106                 eth_hw_addr_random(dev);
1107
1108         mtu = ip_tunnel_bind_dev(dev);
1109         if (tb[IFLA_MTU]) {
1110                 unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
1111
1112                 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
1113                             (unsigned int)(max - sizeof(struct iphdr)));
1114         }
1115
1116         err = dev_set_mtu(dev, mtu);
1117         if (err)
1118                 goto err_dev_set_mtu;
1119
1120         ip_tunnel_add(itn, nt);
1121         return 0;
1122
1123 err_dev_set_mtu:
1124         unregister_netdevice(dev);
1125 err_register_netdevice:
1126         return err;
1127 }
1128 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1129
1130 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1131                          struct ip_tunnel_parm *p, __u32 fwmark)
1132 {
1133         struct ip_tunnel *t;
1134         struct ip_tunnel *tunnel = netdev_priv(dev);
1135         struct net *net = tunnel->net;
1136         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1137
1138         if (dev == itn->fb_tunnel_dev)
1139                 return -EINVAL;
1140
1141         t = ip_tunnel_find(itn, p, dev->type);
1142
1143         if (t) {
1144                 if (t->dev != dev)
1145                         return -EEXIST;
1146         } else {
1147                 t = tunnel;
1148
1149                 if (dev->type != ARPHRD_ETHER) {
1150                         unsigned int nflags = 0;
1151
1152                         if (ipv4_is_multicast(p->iph.daddr))
1153                                 nflags = IFF_BROADCAST;
1154                         else if (p->iph.daddr)
1155                                 nflags = IFF_POINTOPOINT;
1156
1157                         if ((dev->flags ^ nflags) &
1158                             (IFF_POINTOPOINT | IFF_BROADCAST))
1159                                 return -EINVAL;
1160                 }
1161         }
1162
1163         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1164         return 0;
1165 }
1166 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1167
1168 int ip_tunnel_init(struct net_device *dev)
1169 {
1170         struct ip_tunnel *tunnel = netdev_priv(dev);
1171         struct iphdr *iph = &tunnel->parms.iph;
1172         int err;
1173
1174         dev->needs_free_netdev = true;
1175         dev->priv_destructor = ip_tunnel_dev_free;
1176         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1177         if (!dev->tstats)
1178                 return -ENOMEM;
1179
1180         err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1181         if (err) {
1182                 free_percpu(dev->tstats);
1183                 return err;
1184         }
1185
1186         err = gro_cells_init(&tunnel->gro_cells, dev);
1187         if (err) {
1188                 dst_cache_destroy(&tunnel->dst_cache);
1189                 free_percpu(dev->tstats);
1190                 return err;
1191         }
1192
1193         tunnel->dev = dev;
1194         tunnel->net = dev_net(dev);
1195         strcpy(tunnel->parms.name, dev->name);
1196         iph->version            = 4;
1197         iph->ihl                = 5;
1198
1199         if (tunnel->collect_md) {
1200                 dev->features |= NETIF_F_NETNS_LOCAL;
1201                 netif_keep_dst(dev);
1202         }
1203         return 0;
1204 }
1205 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1206
1207 void ip_tunnel_uninit(struct net_device *dev)
1208 {
1209         struct ip_tunnel *tunnel = netdev_priv(dev);
1210         struct net *net = tunnel->net;
1211         struct ip_tunnel_net *itn;
1212
1213         itn = net_generic(net, tunnel->ip_tnl_net_id);
1214         /* fb_tunnel_dev will be unregisted in net-exit call. */
1215         if (itn->fb_tunnel_dev != dev)
1216                 ip_tunnel_del(itn, netdev_priv(dev));
1217
1218         dst_cache_reset(&tunnel->dst_cache);
1219 }
1220 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1221
1222 /* Do least required initialization, rest of init is done in tunnel_init call */
1223 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1224 {
1225         struct ip_tunnel *tunnel = netdev_priv(dev);
1226         tunnel->ip_tnl_net_id = net_id;
1227 }
1228 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1229
1230 MODULE_LICENSE("GPL");