2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
59 #if IS_ENABLED(CONFIG_IPV6)
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
67 return hash_32((__force u32)key ^ (__force u32)remote,
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst, __be32 saddr)
74 struct dst_entry *old_dst;
77 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
82 static void tunnel_dst_set(struct ip_tunnel *t,
83 struct dst_entry *dst, __be32 saddr)
85 __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst, saddr);
88 static void tunnel_dst_reset(struct ip_tunnel *t)
90 tunnel_dst_set(t, NULL, 0);
93 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
97 for_each_possible_cpu(i)
98 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
100 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
102 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
103 u32 cookie, __be32 *saddr)
105 struct ip_tunnel_dst *idst;
106 struct dst_entry *dst;
109 idst = this_cpu_ptr(t->dst_cache);
110 dst = rcu_dereference(idst->dst);
111 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
114 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
115 *saddr = idst->saddr;
123 return (struct rtable *)dst;
126 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
127 __be16 flags, __be32 key)
129 if (p->i_flags & TUNNEL_KEY) {
130 if (flags & TUNNEL_KEY)
131 return key == p->i_key;
133 /* key expected, none present */
136 return !(flags & TUNNEL_KEY);
139 /* Fallback tunnel: no source, no destination, no key, no options
142 We require exact key match i.e. if a key is present in packet
143 it will match only tunnel with the same key; if it is not present,
144 it will match only keyless tunnel.
146 All keysless packets, if not matched configured keyless tunnels
147 will match fallback tunnel.
148 Given src, dst and key, find appropriate for input tunnel.
150 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
151 int link, __be16 flags,
152 __be32 remote, __be32 local,
156 struct ip_tunnel *t, *cand = NULL;
157 struct hlist_head *head;
159 hash = ip_tunnel_hash(key, remote);
160 head = &itn->tunnels[hash];
162 hlist_for_each_entry_rcu(t, head, hash_node) {
163 if (local != t->parms.iph.saddr ||
164 remote != t->parms.iph.daddr ||
165 !(t->dev->flags & IFF_UP))
168 if (!ip_tunnel_key_match(&t->parms, flags, key))
171 if (t->parms.link == link)
177 hlist_for_each_entry_rcu(t, head, hash_node) {
178 if (remote != t->parms.iph.daddr ||
179 t->parms.iph.saddr != 0 ||
180 !(t->dev->flags & IFF_UP))
183 if (!ip_tunnel_key_match(&t->parms, flags, key))
186 if (t->parms.link == link)
192 hash = ip_tunnel_hash(key, 0);
193 head = &itn->tunnels[hash];
195 hlist_for_each_entry_rcu(t, head, hash_node) {
196 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
197 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
200 if (!(t->dev->flags & IFF_UP))
203 if (!ip_tunnel_key_match(&t->parms, flags, key))
206 if (t->parms.link == link)
212 if (flags & TUNNEL_NO_KEY)
213 goto skip_key_lookup;
215 hlist_for_each_entry_rcu(t, head, hash_node) {
216 if (t->parms.i_key != key ||
217 t->parms.iph.saddr != 0 ||
218 t->parms.iph.daddr != 0 ||
219 !(t->dev->flags & IFF_UP))
222 if (t->parms.link == link)
232 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
233 return netdev_priv(itn->fb_tunnel_dev);
238 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
240 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
241 struct ip_tunnel_parm *parms)
245 __be32 i_key = parms->i_key;
247 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
248 remote = parms->iph.daddr;
252 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
255 h = ip_tunnel_hash(i_key, remote);
256 return &itn->tunnels[h];
259 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
261 struct hlist_head *head = ip_bucket(itn, &t->parms);
263 hlist_add_head_rcu(&t->hash_node, head);
266 static void ip_tunnel_del(struct ip_tunnel *t)
268 hlist_del_init_rcu(&t->hash_node);
271 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
272 struct ip_tunnel_parm *parms,
275 __be32 remote = parms->iph.daddr;
276 __be32 local = parms->iph.saddr;
277 __be32 key = parms->i_key;
278 __be16 flags = parms->i_flags;
279 int link = parms->link;
280 struct ip_tunnel *t = NULL;
281 struct hlist_head *head = ip_bucket(itn, parms);
283 hlist_for_each_entry_rcu(t, head, hash_node) {
284 if (local == t->parms.iph.saddr &&
285 remote == t->parms.iph.daddr &&
286 link == t->parms.link &&
287 type == t->dev->type &&
288 ip_tunnel_key_match(&t->parms, flags, key))
294 static struct net_device *__ip_tunnel_create(struct net *net,
295 const struct rtnl_link_ops *ops,
296 struct ip_tunnel_parm *parms)
299 struct ip_tunnel *tunnel;
300 struct net_device *dev;
304 strlcpy(name, parms->name, IFNAMSIZ);
306 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
310 strlcpy(name, ops->kind, IFNAMSIZ);
311 strncat(name, "%d", 2);
315 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
320 dev_net_set(dev, net);
322 dev->rtnl_link_ops = ops;
324 tunnel = netdev_priv(dev);
325 tunnel->parms = *parms;
328 err = register_netdevice(dev);
340 static inline void init_tunnel_flow(struct flowi4 *fl4,
342 __be32 daddr, __be32 saddr,
343 __be32 key, __u8 tos, int oif)
345 memset(fl4, 0, sizeof(*fl4));
346 fl4->flowi4_oif = oif;
349 fl4->flowi4_tos = tos;
350 fl4->flowi4_proto = proto;
351 fl4->fl4_gre_key = key;
354 static int ip_tunnel_bind_dev(struct net_device *dev)
356 struct net_device *tdev = NULL;
357 struct ip_tunnel *tunnel = netdev_priv(dev);
358 const struct iphdr *iph;
359 int hlen = LL_MAX_HEADER;
360 int mtu = ETH_DATA_LEN;
361 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
363 iph = &tunnel->parms.iph;
365 /* Guess output device to choose reasonable mtu and needed_headroom */
370 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
371 iph->saddr, tunnel->parms.o_key,
372 RT_TOS(iph->tos), tunnel->parms.link);
373 rt = ip_route_output_key(tunnel->net, &fl4);
377 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
380 if (dev->type != ARPHRD_ETHER)
381 dev->flags |= IFF_POINTOPOINT;
384 if (!tdev && tunnel->parms.link)
385 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
388 hlen = tdev->hard_header_len + tdev->needed_headroom;
391 dev->iflink = tunnel->parms.link;
393 dev->needed_headroom = t_hlen + hlen;
394 mtu -= (dev->hard_header_len + t_hlen);
402 static struct ip_tunnel *ip_tunnel_create(struct net *net,
403 struct ip_tunnel_net *itn,
404 struct ip_tunnel_parm *parms)
406 struct ip_tunnel *nt;
407 struct net_device *dev;
409 BUG_ON(!itn->fb_tunnel_dev);
410 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
412 return ERR_CAST(dev);
414 dev->mtu = ip_tunnel_bind_dev(dev);
416 nt = netdev_priv(dev);
417 ip_tunnel_add(itn, nt);
421 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
422 const struct tnl_ptk_info *tpi, bool log_ecn_error)
424 struct pcpu_sw_netstats *tstats;
425 const struct iphdr *iph = ip_hdr(skb);
428 #ifdef CONFIG_NET_IPGRE_BROADCAST
429 if (ipv4_is_multicast(iph->daddr)) {
430 tunnel->dev->stats.multicast++;
431 skb->pkt_type = PACKET_BROADCAST;
435 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
436 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
437 tunnel->dev->stats.rx_crc_errors++;
438 tunnel->dev->stats.rx_errors++;
442 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
443 if (!(tpi->flags&TUNNEL_SEQ) ||
444 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
445 tunnel->dev->stats.rx_fifo_errors++;
446 tunnel->dev->stats.rx_errors++;
449 tunnel->i_seqno = ntohl(tpi->seq) + 1;
452 skb_reset_network_header(skb);
454 err = IP_ECN_decapsulate(iph, skb);
457 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
458 &iph->saddr, iph->tos);
460 ++tunnel->dev->stats.rx_frame_errors;
461 ++tunnel->dev->stats.rx_errors;
466 tstats = this_cpu_ptr(tunnel->dev->tstats);
467 u64_stats_update_begin(&tstats->syncp);
468 tstats->rx_packets++;
469 tstats->rx_bytes += skb->len;
470 u64_stats_update_end(&tstats->syncp);
472 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
474 if (tunnel->dev->type == ARPHRD_ETHER) {
475 skb->protocol = eth_type_trans(skb, tunnel->dev);
476 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
478 skb->dev = tunnel->dev;
481 gro_cells_receive(&tunnel->gro_cells, skb);
488 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
490 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
491 struct rtable *rt, __be16 df)
493 struct ip_tunnel *tunnel = netdev_priv(dev);
494 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
498 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
499 - sizeof(struct iphdr) - tunnel->hlen;
501 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
504 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
506 if (skb->protocol == htons(ETH_P_IP)) {
507 if (!skb_is_gso(skb) &&
508 (df & htons(IP_DF)) && mtu < pkt_size) {
509 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
510 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
514 #if IS_ENABLED(CONFIG_IPV6)
515 else if (skb->protocol == htons(ETH_P_IPV6)) {
516 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
518 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
519 mtu >= IPV6_MIN_MTU) {
520 if ((tunnel->parms.iph.daddr &&
521 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
522 rt6->rt6i_dst.plen == 128) {
523 rt6->rt6i_flags |= RTF_MODIFIED;
524 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
528 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
530 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
538 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
539 const struct iphdr *tnl_params, const u8 protocol)
541 struct ip_tunnel *tunnel = netdev_priv(dev);
542 const struct iphdr *inner_iph;
546 struct rtable *rt; /* Route to the other host */
547 unsigned int max_headroom; /* The extra header space needed */
552 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
553 connected = (tunnel->parms.iph.daddr != 0);
555 dst = tnl_params->daddr;
559 if (skb_dst(skb) == NULL) {
560 dev->stats.tx_fifo_errors++;
564 if (skb->protocol == htons(ETH_P_IP)) {
565 rt = skb_rtable(skb);
566 dst = rt_nexthop(rt, inner_iph->daddr);
568 #if IS_ENABLED(CONFIG_IPV6)
569 else if (skb->protocol == htons(ETH_P_IPV6)) {
570 const struct in6_addr *addr6;
571 struct neighbour *neigh;
572 bool do_tx_error_icmp;
575 neigh = dst_neigh_lookup(skb_dst(skb),
576 &ipv6_hdr(skb)->daddr);
580 addr6 = (const struct in6_addr *)&neigh->primary_key;
581 addr_type = ipv6_addr_type(addr6);
583 if (addr_type == IPV6_ADDR_ANY) {
584 addr6 = &ipv6_hdr(skb)->daddr;
585 addr_type = ipv6_addr_type(addr6);
588 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
589 do_tx_error_icmp = true;
591 do_tx_error_icmp = false;
592 dst = addr6->s6_addr32[3];
594 neigh_release(neigh);
595 if (do_tx_error_icmp)
605 tos = tnl_params->tos;
608 if (skb->protocol == htons(ETH_P_IP)) {
609 tos = inner_iph->tos;
611 } else if (skb->protocol == htons(ETH_P_IPV6)) {
612 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
617 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
618 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
620 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
623 rt = ip_route_output_key(tunnel->net, &fl4);
626 dev->stats.tx_carrier_errors++;
630 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
633 if (rt->dst.dev == dev) {
635 dev->stats.collisions++;
639 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
644 if (tunnel->err_count > 0) {
645 if (time_before(jiffies,
646 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
649 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
650 dst_link_failure(skb);
652 tunnel->err_count = 0;
655 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
656 ttl = tnl_params->ttl;
658 if (skb->protocol == htons(ETH_P_IP))
659 ttl = inner_iph->ttl;
660 #if IS_ENABLED(CONFIG_IPV6)
661 else if (skb->protocol == htons(ETH_P_IPV6))
662 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
665 ttl = ip4_dst_hoplimit(&rt->dst);
668 df = tnl_params->frag_off;
669 if (skb->protocol == htons(ETH_P_IP))
670 df |= (inner_iph->frag_off&htons(IP_DF));
672 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
673 + rt->dst.header_len;
674 if (max_headroom > dev->needed_headroom)
675 dev->needed_headroom = max_headroom;
677 if (skb_cow_head(skb, dev->needed_headroom)) {
679 dev->stats.tx_dropped++;
684 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
685 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
686 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
690 #if IS_ENABLED(CONFIG_IPV6)
692 dst_link_failure(skb);
695 dev->stats.tx_errors++;
698 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
700 static void ip_tunnel_update(struct ip_tunnel_net *itn,
702 struct net_device *dev,
703 struct ip_tunnel_parm *p,
707 t->parms.iph.saddr = p->iph.saddr;
708 t->parms.iph.daddr = p->iph.daddr;
709 t->parms.i_key = p->i_key;
710 t->parms.o_key = p->o_key;
711 if (dev->type != ARPHRD_ETHER) {
712 memcpy(dev->dev_addr, &p->iph.saddr, 4);
713 memcpy(dev->broadcast, &p->iph.daddr, 4);
715 ip_tunnel_add(itn, t);
717 t->parms.iph.ttl = p->iph.ttl;
718 t->parms.iph.tos = p->iph.tos;
719 t->parms.iph.frag_off = p->iph.frag_off;
721 if (t->parms.link != p->link) {
724 t->parms.link = p->link;
725 mtu = ip_tunnel_bind_dev(dev);
729 ip_tunnel_dst_reset_all(t);
730 netdev_state_change(dev);
733 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
736 struct ip_tunnel *t = netdev_priv(dev);
737 struct net *net = t->net;
738 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
740 BUG_ON(!itn->fb_tunnel_dev);
743 if (dev == itn->fb_tunnel_dev) {
744 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
746 t = netdev_priv(dev);
748 memcpy(p, &t->parms, sizeof(*p));
754 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
757 p->iph.frag_off |= htons(IP_DF);
758 if (!(p->i_flags & VTI_ISVTI)) {
759 if (!(p->i_flags & TUNNEL_KEY))
761 if (!(p->o_flags & TUNNEL_KEY))
765 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
767 if (!t && (cmd == SIOCADDTUNNEL)) {
768 t = ip_tunnel_create(net, itn, p);
769 err = PTR_ERR_OR_ZERO(t);
772 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
779 unsigned int nflags = 0;
781 if (ipv4_is_multicast(p->iph.daddr))
782 nflags = IFF_BROADCAST;
783 else if (p->iph.daddr)
784 nflags = IFF_POINTOPOINT;
786 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
791 t = netdev_priv(dev);
797 ip_tunnel_update(itn, t, dev, p, true);
805 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
808 if (dev == itn->fb_tunnel_dev) {
810 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
814 if (t == netdev_priv(itn->fb_tunnel_dev))
818 unregister_netdevice(dev);
829 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
831 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
833 struct ip_tunnel *tunnel = netdev_priv(dev);
834 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
837 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
842 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
844 static void ip_tunnel_dev_free(struct net_device *dev)
846 struct ip_tunnel *tunnel = netdev_priv(dev);
848 gro_cells_destroy(&tunnel->gro_cells);
849 free_percpu(tunnel->dst_cache);
850 free_percpu(dev->tstats);
854 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
856 struct ip_tunnel *tunnel = netdev_priv(dev);
857 struct ip_tunnel_net *itn;
859 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
861 if (itn->fb_tunnel_dev != dev) {
862 ip_tunnel_del(netdev_priv(dev));
863 unregister_netdevice_queue(dev, head);
866 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
868 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
869 struct rtnl_link_ops *ops, char *devname)
871 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
872 struct ip_tunnel_parm parms;
875 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
876 INIT_HLIST_HEAD(&itn->tunnels[i]);
879 itn->fb_tunnel_dev = NULL;
883 memset(&parms, 0, sizeof(parms));
885 strlcpy(parms.name, devname, IFNAMSIZ);
888 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
889 /* FB netdevice is special: we have one, and only one per netns.
890 * Allowing to move it to another netns is clearly unsafe.
892 if (!IS_ERR(itn->fb_tunnel_dev)) {
893 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
894 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
895 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
899 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
901 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
903 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
904 struct rtnl_link_ops *ops)
906 struct net *net = dev_net(itn->fb_tunnel_dev);
907 struct net_device *dev, *aux;
910 for_each_netdev_safe(net, dev, aux)
911 if (dev->rtnl_link_ops == ops)
912 unregister_netdevice_queue(dev, head);
914 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
916 struct hlist_node *n;
917 struct hlist_head *thead = &itn->tunnels[h];
919 hlist_for_each_entry_safe(t, n, thead, hash_node)
920 /* If dev is in the same netns, it has already
921 * been added to the list by the previous loop.
923 if (!net_eq(dev_net(t->dev), net))
924 unregister_netdevice_queue(t->dev, head);
928 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
933 ip_tunnel_destroy(itn, &list, ops);
934 unregister_netdevice_many(&list);
937 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
939 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
940 struct ip_tunnel_parm *p)
942 struct ip_tunnel *nt;
943 struct net *net = dev_net(dev);
944 struct ip_tunnel_net *itn;
948 nt = netdev_priv(dev);
949 itn = net_generic(net, nt->ip_tnl_net_id);
951 if (ip_tunnel_find(itn, p, dev->type))
956 err = register_netdevice(dev);
960 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
961 eth_hw_addr_random(dev);
963 mtu = ip_tunnel_bind_dev(dev);
967 ip_tunnel_add(itn, nt);
972 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
974 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
975 struct ip_tunnel_parm *p)
978 struct ip_tunnel *tunnel = netdev_priv(dev);
979 struct net *net = tunnel->net;
980 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
982 if (dev == itn->fb_tunnel_dev)
985 t = ip_tunnel_find(itn, p, dev->type);
993 if (dev->type != ARPHRD_ETHER) {
994 unsigned int nflags = 0;
996 if (ipv4_is_multicast(p->iph.daddr))
997 nflags = IFF_BROADCAST;
998 else if (p->iph.daddr)
999 nflags = IFF_POINTOPOINT;
1001 if ((dev->flags ^ nflags) &
1002 (IFF_POINTOPOINT | IFF_BROADCAST))
1007 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1010 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1012 int ip_tunnel_init(struct net_device *dev)
1014 struct ip_tunnel *tunnel = netdev_priv(dev);
1015 struct iphdr *iph = &tunnel->parms.iph;
1018 dev->destructor = ip_tunnel_dev_free;
1019 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1023 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1024 if (!tunnel->dst_cache) {
1025 free_percpu(dev->tstats);
1029 err = gro_cells_init(&tunnel->gro_cells, dev);
1031 free_percpu(tunnel->dst_cache);
1032 free_percpu(dev->tstats);
1037 tunnel->net = dev_net(dev);
1038 strcpy(tunnel->parms.name, dev->name);
1044 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1046 void ip_tunnel_uninit(struct net_device *dev)
1048 struct ip_tunnel *tunnel = netdev_priv(dev);
1049 struct net *net = tunnel->net;
1050 struct ip_tunnel_net *itn;
1052 itn = net_generic(net, tunnel->ip_tnl_net_id);
1053 /* fb_tunnel_dev will be unregisted in net-exit call. */
1054 if (itn->fb_tunnel_dev != dev)
1055 ip_tunnel_del(netdev_priv(dev));
1057 ip_tunnel_dst_reset_all(tunnel);
1059 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1061 /* Do least required initialization, rest of init is done in tunnel_init call */
1062 void ip_tunnel_setup(struct net_device *dev, int net_id)
1064 struct ip_tunnel *tunnel = netdev_priv(dev);
1065 tunnel->ip_tnl_net_id = net_id;
1067 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1069 MODULE_LICENSE("GPL");