2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is a good
66 solution, but it supposes maintaining new variable in ALL
67 skb, even if no tunneling is used.
69 Current solution: xmit_recursion breaks dead loops. This is a percpu
70 counter, since when we enter the first ndo_xmit(), cpu migration is
71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, ttl is not solution at all.
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 rapidly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
110 static bool log_ecn_error = true;
111 module_param(log_ecn_error, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115 static int ipgre_tunnel_init(struct net_device *dev);
116 static void erspan_build_header(struct sk_buff *skb,
117 __be32 id, u32 index, bool truncate);
119 static unsigned int ipgre_net_id __read_mostly;
120 static unsigned int gre_tap_net_id __read_mostly;
121 static unsigned int erspan_net_id __read_mostly;
123 static void ipgre_err(struct sk_buff *skb, u32 info,
124 const struct tnl_ptk_info *tpi)
127 /* All the routers (except for Linux) return only
128 8 bytes of packet payload. It means, that precise relaying of
129 ICMP in the real Internet is absolutely infeasible.
131 Moreover, Cisco "wise men" put GRE key to the third word
132 in GRE header. It makes impossible maintaining even soft
133 state for keyed GRE tunnels with enabled checksum. Tell
136 Well, I wonder, rfc1812 was written by Cisco employee,
137 what the hell these idiots break standards established
140 struct net *net = dev_net(skb->dev);
141 struct ip_tunnel_net *itn;
142 const struct iphdr *iph;
143 const int type = icmp_hdr(skb)->type;
144 const int code = icmp_hdr(skb)->code;
145 unsigned int data_len = 0;
150 case ICMP_PARAMETERPROB:
153 case ICMP_DEST_UNREACH:
156 case ICMP_PORT_UNREACH:
157 /* Impossible event. */
160 /* All others are translated to HOST_UNREACH.
161 rfc2003 contains "deep thoughts" about NET_UNREACH,
162 I believe they are just ether pollution. --ANK
168 case ICMP_TIME_EXCEEDED:
169 if (code != ICMP_EXC_TTL)
171 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
178 if (tpi->proto == htons(ETH_P_TEB))
179 itn = net_generic(net, gre_tap_net_id);
181 itn = net_generic(net, ipgre_net_id);
183 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
184 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
185 iph->daddr, iph->saddr, tpi->key);
190 #if IS_ENABLED(CONFIG_IPV6)
191 if (tpi->proto == htons(ETH_P_IPV6) &&
192 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
197 if (t->parms.iph.daddr == 0 ||
198 ipv4_is_multicast(t->parms.iph.daddr))
201 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
204 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
208 t->err_time = jiffies;
211 static void gre_err(struct sk_buff *skb, u32 info)
213 /* All the routers (except for Linux) return only
214 * 8 bytes of packet payload. It means, that precise relaying of
215 * ICMP in the real Internet is absolutely infeasible.
217 * Moreover, Cisco "wise men" put GRE key to the third word
218 * in GRE header. It makes impossible maintaining even soft
220 * GRE tunnels with enabled checksum. Tell them "thank you".
222 * Well, I wonder, rfc1812 was written by Cisco employee,
223 * what the hell these idiots break standards established
227 const struct iphdr *iph = (struct iphdr *)skb->data;
228 const int type = icmp_hdr(skb)->type;
229 const int code = icmp_hdr(skb)->code;
230 struct tnl_ptk_info tpi;
231 bool csum_err = false;
233 if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
235 if (!csum_err) /* ignore csum errors. */
239 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
240 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
241 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
244 if (type == ICMP_REDIRECT) {
245 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
250 ipgre_err(skb, info, &tpi);
253 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
256 struct net *net = dev_net(skb->dev);
257 struct metadata_dst *tun_dst = NULL;
258 struct ip_tunnel_net *itn;
259 struct ip_tunnel *tunnel;
260 struct erspanhdr *ershdr;
261 const struct iphdr *iph;
265 itn = net_generic(net, erspan_net_id);
266 len = gre_hdr_len + sizeof(*ershdr);
268 if (unlikely(!pskb_may_pull(skb, len)))
272 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
274 /* The original GRE header does not have key field,
275 * Use ERSPAN 10-bit session ID as key.
277 tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
278 index = ershdr->md.index;
279 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
280 tpi->flags | TUNNEL_KEY,
281 iph->saddr, iph->daddr, tpi->key);
284 if (__iptunnel_pull_header(skb,
285 gre_hdr_len + sizeof(*ershdr),
290 if (tunnel->collect_md) {
291 struct ip_tunnel_info *info;
292 struct erspan_metadata *md;
296 tpi->flags |= TUNNEL_KEY;
298 tun_id = key32_to_tunnel_id(tpi->key);
300 tun_dst = ip_tun_rx_dst(skb, flags,
301 tun_id, sizeof(*md));
303 return PACKET_REJECT;
305 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
307 return PACKET_REJECT;
310 info = &tun_dst->u.tun_info;
311 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
312 info->options_len = sizeof(*md);
314 tunnel->index = ntohl(index);
317 skb_reset_mac_header(skb);
318 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
326 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
327 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
329 struct metadata_dst *tun_dst = NULL;
330 const struct iphdr *iph;
331 struct ip_tunnel *tunnel;
334 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
335 iph->saddr, iph->daddr, tpi->key);
338 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
339 raw_proto, false) < 0)
342 if (tunnel->dev->type != ARPHRD_NONE)
343 skb_pop_mac_header(skb);
345 skb_reset_mac_header(skb);
346 if (tunnel->collect_md) {
350 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
351 tun_id = key32_to_tunnel_id(tpi->key);
352 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
354 return PACKET_REJECT;
357 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
367 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
370 struct net *net = dev_net(skb->dev);
371 struct ip_tunnel_net *itn;
374 if (tpi->proto == htons(ETH_P_TEB))
375 itn = net_generic(net, gre_tap_net_id);
377 itn = net_generic(net, ipgre_net_id);
379 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
380 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
381 /* ipgre tunnels in collect metadata mode should receive
382 * also ETH_P_TEB traffic.
384 itn = net_generic(net, ipgre_net_id);
385 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
390 static int gre_rcv(struct sk_buff *skb)
392 struct tnl_ptk_info tpi;
393 bool csum_err = false;
396 #ifdef CONFIG_NET_IPGRE_BROADCAST
397 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
398 /* Looped back packet, drop it! */
399 if (rt_is_output_route(skb_rtable(skb)))
404 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
408 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
409 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
413 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
416 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
422 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
423 const struct iphdr *tnl_params,
426 struct ip_tunnel *tunnel = netdev_priv(dev);
428 if (tunnel->parms.o_flags & TUNNEL_SEQ)
431 /* Push GRE header. */
432 gre_build_header(skb, tunnel->tun_hlen,
433 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
434 htonl(tunnel->o_seqno));
436 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
439 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
441 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
444 static struct rtable *gre_get_rt(struct sk_buff *skb,
445 struct net_device *dev,
447 const struct ip_tunnel_key *key)
449 struct net *net = dev_net(dev);
451 memset(fl, 0, sizeof(*fl));
452 fl->daddr = key->u.ipv4.dst;
453 fl->saddr = key->u.ipv4.src;
454 fl->flowi4_tos = RT_TOS(key->tos);
455 fl->flowi4_mark = skb->mark;
456 fl->flowi4_proto = IPPROTO_GRE;
458 return ip_route_output_key(net, fl);
461 static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
462 struct net_device *dev,
466 struct ip_tunnel_info *tun_info;
467 const struct ip_tunnel_key *key;
468 struct rtable *rt = NULL;
473 tun_info = skb_tunnel_info(skb);
474 key = &tun_info->key;
475 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
478 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
480 rt = gre_get_rt(skb, dev, fl, key);
484 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
488 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
489 + tunnel_hlen + sizeof(struct iphdr);
490 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
491 int head_delta = SKB_DATA_ALIGN(min_headroom -
494 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
505 dev->stats.tx_dropped++;
509 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
512 struct ip_tunnel_info *tun_info;
513 const struct ip_tunnel_key *key;
514 struct rtable *rt = NULL;
519 tun_info = skb_tunnel_info(skb);
520 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
521 ip_tunnel_info_af(tun_info) != AF_INET))
524 key = &tun_info->key;
525 tunnel_hlen = gre_calc_hlen(key->tun_flags);
527 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
531 /* Push Tunnel header. */
532 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
535 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
536 gre_build_header(skb, tunnel_hlen, flags, proto,
537 tunnel_id_to_key32(tun_info->key.tun_id), 0);
539 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
541 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
542 key->tos, key->ttl, df, false);
549 dev->stats.tx_dropped++;
552 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
555 struct ip_tunnel *tunnel = netdev_priv(dev);
556 struct ip_tunnel_info *tun_info;
557 const struct ip_tunnel_key *key;
558 struct erspan_metadata *md;
559 struct rtable *rt = NULL;
560 bool truncate = false;
565 tun_info = skb_tunnel_info(skb);
566 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
567 ip_tunnel_info_af(tun_info) != AF_INET))
570 key = &tun_info->key;
572 /* ERSPAN has fixed 8 byte GRE header */
573 tunnel_hlen = 8 + sizeof(struct erspanhdr);
575 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
579 if (gre_handle_offloads(skb, false))
582 if (skb->len > dev->mtu + dev->hard_header_len) {
583 pskb_trim(skb, dev->mtu + dev->hard_header_len);
587 md = ip_tunnel_info_opts(tun_info);
591 erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
592 ntohl(md->index), truncate);
594 gre_build_header(skb, 8, TUNNEL_SEQ,
595 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
597 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
599 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
600 key->tos, key->ttl, df, false);
607 dev->stats.tx_dropped++;
610 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
612 struct ip_tunnel_info *info = skb_tunnel_info(skb);
616 if (ip_tunnel_info_af(info) != AF_INET)
619 rt = gre_get_rt(skb, dev, &fl4, &info->key);
624 info->key.u.ipv4.src = fl4.saddr;
628 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
629 struct net_device *dev)
631 struct ip_tunnel *tunnel = netdev_priv(dev);
632 const struct iphdr *tnl_params;
634 if (tunnel->collect_md) {
635 gre_fb_xmit(skb, dev, skb->protocol);
639 if (dev->header_ops) {
640 /* Need space for new headers */
641 if (skb_cow_head(skb, dev->needed_headroom -
642 (tunnel->hlen + sizeof(struct iphdr))))
645 tnl_params = (const struct iphdr *)skb->data;
647 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
650 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
651 skb_reset_mac_header(skb);
653 if (skb_cow_head(skb, dev->needed_headroom))
656 tnl_params = &tunnel->parms.iph;
659 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
662 __gre_xmit(skb, dev, tnl_params, skb->protocol);
667 dev->stats.tx_dropped++;
671 static inline u8 tos_to_cos(u8 tos)
680 static void erspan_build_header(struct sk_buff *skb,
681 __be32 id, u32 index, bool truncate)
683 struct iphdr *iphdr = ip_hdr(skb);
684 struct ethhdr *eth = eth_hdr(skb);
685 enum erspan_encap_type enc_type;
686 struct erspanhdr *ershdr;
693 enc_type = ERSPAN_ENCAP_NOVLAN;
695 /* If mirrored packet has vlan tag, extract tci and
696 * perserve vlan header in the mirrored frame.
698 if (eth->h_proto == htons(ETH_P_8021Q)) {
699 qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
700 vlan_tci = ntohs(qp->tci);
701 enc_type = ERSPAN_ENCAP_INFRAME;
704 skb_push(skb, sizeof(*ershdr));
705 ershdr = (struct erspanhdr *)skb->data;
706 memset(ershdr, 0, sizeof(*ershdr));
708 ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
709 (ERSPAN_VERSION << VER_OFFSET));
710 ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
711 ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
712 (enc_type << EN_OFFSET & EN_MASK) |
713 ((truncate << T_OFFSET) & T_MASK));
714 ershdr->md.index = htonl(index & INDEX_MASK);
717 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
718 struct net_device *dev)
720 struct ip_tunnel *tunnel = netdev_priv(dev);
721 bool truncate = false;
723 if (tunnel->collect_md) {
724 erspan_fb_xmit(skb, dev, skb->protocol);
728 if (gre_handle_offloads(skb, false))
731 if (skb_cow_head(skb, dev->needed_headroom))
734 if (skb->len > dev->mtu + dev->hard_header_len) {
735 pskb_trim(skb, dev->mtu + dev->hard_header_len);
739 /* Push ERSPAN header */
740 erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
741 tunnel->parms.o_flags &= ~TUNNEL_KEY;
742 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
747 dev->stats.tx_dropped++;
751 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
752 struct net_device *dev)
754 struct ip_tunnel *tunnel = netdev_priv(dev);
756 if (tunnel->collect_md) {
757 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
761 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
764 if (skb_cow_head(skb, dev->needed_headroom))
767 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
772 dev->stats.tx_dropped++;
776 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
778 struct ip_tunnel *tunnel = netdev_priv(dev);
781 len = tunnel->tun_hlen;
782 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
783 len = tunnel->tun_hlen - len;
784 tunnel->hlen = tunnel->hlen + len;
786 dev->needed_headroom = dev->needed_headroom + len;
788 dev->mtu = max_t(int, dev->mtu - len, 68);
790 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
791 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
792 tunnel->encap.type == TUNNEL_ENCAP_NONE) {
793 dev->features |= NETIF_F_GSO_SOFTWARE;
794 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
796 dev->features |= NETIF_F_LLTX;
800 static int ipgre_tunnel_ioctl(struct net_device *dev,
801 struct ifreq *ifr, int cmd)
803 struct ip_tunnel_parm p;
806 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
809 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
810 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
811 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
812 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
816 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
817 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
819 err = ip_tunnel_ioctl(dev, &p, cmd);
823 if (cmd == SIOCCHGTUNNEL) {
824 struct ip_tunnel *t = netdev_priv(dev);
826 t->parms.i_flags = p.i_flags;
827 t->parms.o_flags = p.o_flags;
829 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
830 ipgre_link_update(dev, true);
833 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
834 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
836 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
842 /* Nice toy. Unfortunately, useless in real life :-)
843 It allows to construct virtual multiprotocol broadcast "LAN"
844 over the Internet, provided multicast routing is tuned.
847 I have no idea was this bicycle invented before me,
848 so that I had to set ARPHRD_IPGRE to a random value.
849 I have an impression, that Cisco could make something similar,
850 but this feature is apparently missing in IOS<=11.2(8).
852 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
853 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
855 ping -t 255 224.66.66.66
857 If nobody answers, mbone does not work.
859 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
860 ip addr add 10.66.66.<somewhat>/24 dev Universe
862 ifconfig Universe add fe80::<Your_real_addr>/10
863 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
866 ftp fec0:6666:6666::193.233.7.65
869 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
871 const void *daddr, const void *saddr, unsigned int len)
873 struct ip_tunnel *t = netdev_priv(dev);
875 struct gre_base_hdr *greh;
877 iph = skb_push(skb, t->hlen + sizeof(*iph));
878 greh = (struct gre_base_hdr *)(iph+1);
879 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
880 greh->protocol = htons(type);
882 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
884 /* Set the source hardware address. */
886 memcpy(&iph->saddr, saddr, 4);
888 memcpy(&iph->daddr, daddr, 4);
890 return t->hlen + sizeof(*iph);
892 return -(t->hlen + sizeof(*iph));
895 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
897 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
898 memcpy(haddr, &iph->saddr, 4);
902 static const struct header_ops ipgre_header_ops = {
903 .create = ipgre_header,
904 .parse = ipgre_header_parse,
907 #ifdef CONFIG_NET_IPGRE_BROADCAST
908 static int ipgre_open(struct net_device *dev)
910 struct ip_tunnel *t = netdev_priv(dev);
912 if (ipv4_is_multicast(t->parms.iph.daddr)) {
916 rt = ip_route_output_gre(t->net, &fl4,
920 RT_TOS(t->parms.iph.tos),
923 return -EADDRNOTAVAIL;
926 if (!__in_dev_get_rtnl(dev))
927 return -EADDRNOTAVAIL;
928 t->mlink = dev->ifindex;
929 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
934 static int ipgre_close(struct net_device *dev)
936 struct ip_tunnel *t = netdev_priv(dev);
938 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
939 struct in_device *in_dev;
940 in_dev = inetdev_by_index(t->net, t->mlink);
942 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
948 static const struct net_device_ops ipgre_netdev_ops = {
949 .ndo_init = ipgre_tunnel_init,
950 .ndo_uninit = ip_tunnel_uninit,
951 #ifdef CONFIG_NET_IPGRE_BROADCAST
952 .ndo_open = ipgre_open,
953 .ndo_stop = ipgre_close,
955 .ndo_start_xmit = ipgre_xmit,
956 .ndo_do_ioctl = ipgre_tunnel_ioctl,
957 .ndo_change_mtu = ip_tunnel_change_mtu,
958 .ndo_get_stats64 = ip_tunnel_get_stats64,
959 .ndo_get_iflink = ip_tunnel_get_iflink,
962 #define GRE_FEATURES (NETIF_F_SG | \
967 static void ipgre_tunnel_setup(struct net_device *dev)
969 dev->netdev_ops = &ipgre_netdev_ops;
970 dev->type = ARPHRD_IPGRE;
971 ip_tunnel_setup(dev, ipgre_net_id);
974 static void __gre_tunnel_init(struct net_device *dev)
976 struct ip_tunnel *tunnel;
979 tunnel = netdev_priv(dev);
980 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
981 tunnel->parms.iph.protocol = IPPROTO_GRE;
983 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
985 t_hlen = tunnel->hlen + sizeof(struct iphdr);
987 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
988 dev->mtu = ETH_DATA_LEN - t_hlen - 4;
990 dev->features |= GRE_FEATURES;
991 dev->hw_features |= GRE_FEATURES;
993 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
994 /* TCP offload with GRE SEQ is not supported, nor
995 * can we support 2 levels of outer headers requiring
998 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
999 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
1000 dev->features |= NETIF_F_GSO_SOFTWARE;
1001 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1004 /* Can use a lockless transmit, unless we generate
1007 dev->features |= NETIF_F_LLTX;
1011 static int ipgre_tunnel_init(struct net_device *dev)
1013 struct ip_tunnel *tunnel = netdev_priv(dev);
1014 struct iphdr *iph = &tunnel->parms.iph;
1016 __gre_tunnel_init(dev);
1018 memcpy(dev->dev_addr, &iph->saddr, 4);
1019 memcpy(dev->broadcast, &iph->daddr, 4);
1021 dev->flags = IFF_NOARP;
1022 netif_keep_dst(dev);
1025 if (iph->daddr && !tunnel->collect_md) {
1026 #ifdef CONFIG_NET_IPGRE_BROADCAST
1027 if (ipv4_is_multicast(iph->daddr)) {
1030 dev->flags = IFF_BROADCAST;
1031 dev->header_ops = &ipgre_header_ops;
1034 } else if (!tunnel->collect_md) {
1035 dev->header_ops = &ipgre_header_ops;
1038 return ip_tunnel_init(dev);
1041 static const struct gre_protocol ipgre_protocol = {
1043 .err_handler = gre_err,
1046 static int __net_init ipgre_init_net(struct net *net)
1048 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1051 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1053 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1056 static struct pernet_operations ipgre_net_ops = {
1057 .init = ipgre_init_net,
1058 .exit_batch = ipgre_exit_batch_net,
1059 .id = &ipgre_net_id,
1060 .size = sizeof(struct ip_tunnel_net),
1063 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1064 struct netlink_ext_ack *extack)
1072 if (data[IFLA_GRE_IFLAGS])
1073 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1074 if (data[IFLA_GRE_OFLAGS])
1075 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1076 if (flags & (GRE_VERSION|GRE_ROUTING))
1079 if (data[IFLA_GRE_COLLECT_METADATA] &&
1080 data[IFLA_GRE_ENCAP_TYPE] &&
1081 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1087 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1088 struct netlink_ext_ack *extack)
1092 if (tb[IFLA_ADDRESS]) {
1093 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1095 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1096 return -EADDRNOTAVAIL;
1102 if (data[IFLA_GRE_REMOTE]) {
1103 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1109 return ipgre_tunnel_validate(tb, data, extack);
1112 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1113 struct netlink_ext_ack *extack)
1121 ret = ipgre_tap_validate(tb, data, extack);
1125 /* ERSPAN should only have GRE sequence and key flag */
1126 if (data[IFLA_GRE_OFLAGS])
1127 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1128 if (data[IFLA_GRE_IFLAGS])
1129 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1130 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1131 flags != (GRE_SEQ | GRE_KEY))
1134 /* ERSPAN Session ID only has 10-bit. Since we reuse
1135 * 32-bit key field as ID, check it's range.
1137 if (data[IFLA_GRE_IKEY] &&
1138 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1141 if (data[IFLA_GRE_OKEY] &&
1142 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1148 static int ipgre_netlink_parms(struct net_device *dev,
1149 struct nlattr *data[],
1150 struct nlattr *tb[],
1151 struct ip_tunnel_parm *parms,
1154 struct ip_tunnel *t = netdev_priv(dev);
1156 memset(parms, 0, sizeof(*parms));
1158 parms->iph.protocol = IPPROTO_GRE;
1163 if (data[IFLA_GRE_LINK])
1164 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1166 if (data[IFLA_GRE_IFLAGS])
1167 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1169 if (data[IFLA_GRE_OFLAGS])
1170 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1172 if (data[IFLA_GRE_IKEY])
1173 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1175 if (data[IFLA_GRE_OKEY])
1176 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1178 if (data[IFLA_GRE_LOCAL])
1179 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1181 if (data[IFLA_GRE_REMOTE])
1182 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1184 if (data[IFLA_GRE_TTL])
1185 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1187 if (data[IFLA_GRE_TOS])
1188 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1190 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1193 parms->iph.frag_off = htons(IP_DF);
1196 if (data[IFLA_GRE_COLLECT_METADATA]) {
1197 t->collect_md = true;
1198 if (dev->type == ARPHRD_IPGRE)
1199 dev->type = ARPHRD_NONE;
1202 if (data[IFLA_GRE_IGNORE_DF]) {
1203 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1204 && (parms->iph.frag_off & htons(IP_DF)))
1206 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1209 if (data[IFLA_GRE_FWMARK])
1210 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1212 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1213 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1215 if (t->index & ~INDEX_MASK)
1222 /* This function returns true when ENCAP attributes are present in the nl msg */
1223 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1224 struct ip_tunnel_encap *ipencap)
1228 memset(ipencap, 0, sizeof(*ipencap));
1233 if (data[IFLA_GRE_ENCAP_TYPE]) {
1235 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1238 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1240 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1243 if (data[IFLA_GRE_ENCAP_SPORT]) {
1245 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1248 if (data[IFLA_GRE_ENCAP_DPORT]) {
1250 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1256 static int gre_tap_init(struct net_device *dev)
1258 __gre_tunnel_init(dev);
1259 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1260 netif_keep_dst(dev);
1262 return ip_tunnel_init(dev);
1265 static const struct net_device_ops gre_tap_netdev_ops = {
1266 .ndo_init = gre_tap_init,
1267 .ndo_uninit = ip_tunnel_uninit,
1268 .ndo_start_xmit = gre_tap_xmit,
1269 .ndo_set_mac_address = eth_mac_addr,
1270 .ndo_validate_addr = eth_validate_addr,
1271 .ndo_change_mtu = ip_tunnel_change_mtu,
1272 .ndo_get_stats64 = ip_tunnel_get_stats64,
1273 .ndo_get_iflink = ip_tunnel_get_iflink,
1274 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1277 static int erspan_tunnel_init(struct net_device *dev)
1279 struct ip_tunnel *tunnel = netdev_priv(dev);
1282 tunnel->tun_hlen = 8;
1283 tunnel->parms.iph.protocol = IPPROTO_GRE;
1284 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1285 sizeof(struct erspanhdr);
1286 t_hlen = tunnel->hlen + sizeof(struct iphdr);
1288 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
1289 dev->mtu = ETH_DATA_LEN - t_hlen - 4;
1290 dev->features |= GRE_FEATURES;
1291 dev->hw_features |= GRE_FEATURES;
1292 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1293 netif_keep_dst(dev);
1295 return ip_tunnel_init(dev);
1298 static const struct net_device_ops erspan_netdev_ops = {
1299 .ndo_init = erspan_tunnel_init,
1300 .ndo_uninit = ip_tunnel_uninit,
1301 .ndo_start_xmit = erspan_xmit,
1302 .ndo_set_mac_address = eth_mac_addr,
1303 .ndo_validate_addr = eth_validate_addr,
1304 .ndo_change_mtu = ip_tunnel_change_mtu,
1305 .ndo_get_stats64 = ip_tunnel_get_stats64,
1306 .ndo_get_iflink = ip_tunnel_get_iflink,
1307 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1310 static void ipgre_tap_setup(struct net_device *dev)
1313 dev->netdev_ops = &gre_tap_netdev_ops;
1314 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1315 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1316 ip_tunnel_setup(dev, gre_tap_net_id);
1319 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1320 struct nlattr *tb[], struct nlattr *data[],
1321 struct netlink_ext_ack *extack)
1323 struct ip_tunnel_parm p;
1324 struct ip_tunnel_encap ipencap;
1328 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1329 struct ip_tunnel *t = netdev_priv(dev);
1330 err = ip_tunnel_encap_setup(t, &ipencap);
1336 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1339 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1342 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1343 struct nlattr *data[],
1344 struct netlink_ext_ack *extack)
1346 struct ip_tunnel *t = netdev_priv(dev);
1347 struct ip_tunnel_encap ipencap;
1348 __u32 fwmark = t->fwmark;
1349 struct ip_tunnel_parm p;
1352 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1353 err = ip_tunnel_encap_setup(t, &ipencap);
1359 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1363 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1367 t->parms.i_flags = p.i_flags;
1368 t->parms.o_flags = p.o_flags;
1370 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
1371 ipgre_link_update(dev, !tb[IFLA_MTU]);
1376 static size_t ipgre_get_size(const struct net_device *dev)
1381 /* IFLA_GRE_IFLAGS */
1383 /* IFLA_GRE_OFLAGS */
1389 /* IFLA_GRE_LOCAL */
1391 /* IFLA_GRE_REMOTE */
1397 /* IFLA_GRE_PMTUDISC */
1399 /* IFLA_GRE_ENCAP_TYPE */
1401 /* IFLA_GRE_ENCAP_FLAGS */
1403 /* IFLA_GRE_ENCAP_SPORT */
1405 /* IFLA_GRE_ENCAP_DPORT */
1407 /* IFLA_GRE_COLLECT_METADATA */
1409 /* IFLA_GRE_IGNORE_DF */
1411 /* IFLA_GRE_FWMARK */
1413 /* IFLA_GRE_ERSPAN_INDEX */
1418 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1420 struct ip_tunnel *t = netdev_priv(dev);
1421 struct ip_tunnel_parm *p = &t->parms;
1423 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1424 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1425 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1426 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1427 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1428 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1429 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1430 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1431 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1432 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1433 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1434 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1435 !!(p->iph.frag_off & htons(IP_DF))) ||
1436 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1437 goto nla_put_failure;
1439 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1441 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1443 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1445 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1447 goto nla_put_failure;
1449 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1450 goto nla_put_failure;
1452 if (t->collect_md) {
1453 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1454 goto nla_put_failure;
1458 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1459 goto nla_put_failure;
1467 static void erspan_setup(struct net_device *dev)
1470 dev->netdev_ops = &erspan_netdev_ops;
1471 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1472 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1473 ip_tunnel_setup(dev, erspan_net_id);
1476 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1477 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1478 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1479 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1480 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1481 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1482 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1483 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1484 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1485 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1486 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1487 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1488 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1489 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1490 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
1491 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
1492 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
1493 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
1494 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1497 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1499 .maxtype = IFLA_GRE_MAX,
1500 .policy = ipgre_policy,
1501 .priv_size = sizeof(struct ip_tunnel),
1502 .setup = ipgre_tunnel_setup,
1503 .validate = ipgre_tunnel_validate,
1504 .newlink = ipgre_newlink,
1505 .changelink = ipgre_changelink,
1506 .dellink = ip_tunnel_dellink,
1507 .get_size = ipgre_get_size,
1508 .fill_info = ipgre_fill_info,
1509 .get_link_net = ip_tunnel_get_link_net,
1512 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1514 .maxtype = IFLA_GRE_MAX,
1515 .policy = ipgre_policy,
1516 .priv_size = sizeof(struct ip_tunnel),
1517 .setup = ipgre_tap_setup,
1518 .validate = ipgre_tap_validate,
1519 .newlink = ipgre_newlink,
1520 .changelink = ipgre_changelink,
1521 .dellink = ip_tunnel_dellink,
1522 .get_size = ipgre_get_size,
1523 .fill_info = ipgre_fill_info,
1524 .get_link_net = ip_tunnel_get_link_net,
1527 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1529 .maxtype = IFLA_GRE_MAX,
1530 .policy = ipgre_policy,
1531 .priv_size = sizeof(struct ip_tunnel),
1532 .setup = erspan_setup,
1533 .validate = erspan_validate,
1534 .newlink = ipgre_newlink,
1535 .changelink = ipgre_changelink,
1536 .dellink = ip_tunnel_dellink,
1537 .get_size = ipgre_get_size,
1538 .fill_info = ipgre_fill_info,
1539 .get_link_net = ip_tunnel_get_link_net,
1542 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1543 u8 name_assign_type)
1545 struct nlattr *tb[IFLA_MAX + 1];
1546 struct net_device *dev;
1547 LIST_HEAD(list_kill);
1548 struct ip_tunnel *t;
1551 memset(&tb, 0, sizeof(tb));
1553 dev = rtnl_create_link(net, name, name_assign_type,
1554 &ipgre_tap_ops, tb);
1558 /* Configure flow based GRE device. */
1559 t = netdev_priv(dev);
1560 t->collect_md = true;
1562 err = ipgre_newlink(net, dev, tb, NULL, NULL);
1565 return ERR_PTR(err);
1568 /* openvswitch users expect packet sizes to be unrestricted,
1569 * so set the largest MTU we can.
1571 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1575 err = rtnl_configure_link(dev, NULL);
1581 ip_tunnel_dellink(dev, &list_kill);
1582 unregister_netdevice_many(&list_kill);
1583 return ERR_PTR(err);
1585 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1587 static int __net_init ipgre_tap_init_net(struct net *net)
1589 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1592 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1594 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1597 static struct pernet_operations ipgre_tap_net_ops = {
1598 .init = ipgre_tap_init_net,
1599 .exit_batch = ipgre_tap_exit_batch_net,
1600 .id = &gre_tap_net_id,
1601 .size = sizeof(struct ip_tunnel_net),
1604 static int __net_init erspan_init_net(struct net *net)
1606 return ip_tunnel_init_net(net, erspan_net_id,
1607 &erspan_link_ops, "erspan0");
1610 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1612 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1615 static struct pernet_operations erspan_net_ops = {
1616 .init = erspan_init_net,
1617 .exit_batch = erspan_exit_batch_net,
1618 .id = &erspan_net_id,
1619 .size = sizeof(struct ip_tunnel_net),
1622 static int __init ipgre_init(void)
1626 pr_info("GRE over IPv4 tunneling driver\n");
1628 err = register_pernet_device(&ipgre_net_ops);
1632 err = register_pernet_device(&ipgre_tap_net_ops);
1634 goto pnet_tap_failed;
1636 err = register_pernet_device(&erspan_net_ops);
1638 goto pnet_erspan_failed;
1640 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1642 pr_info("%s: can't add protocol\n", __func__);
1643 goto add_proto_failed;
1646 err = rtnl_link_register(&ipgre_link_ops);
1648 goto rtnl_link_failed;
1650 err = rtnl_link_register(&ipgre_tap_ops);
1652 goto tap_ops_failed;
1654 err = rtnl_link_register(&erspan_link_ops);
1656 goto erspan_link_failed;
1661 rtnl_link_unregister(&ipgre_tap_ops);
1663 rtnl_link_unregister(&ipgre_link_ops);
1665 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1667 unregister_pernet_device(&erspan_net_ops);
1669 unregister_pernet_device(&ipgre_tap_net_ops);
1671 unregister_pernet_device(&ipgre_net_ops);
1675 static void __exit ipgre_fini(void)
1677 rtnl_link_unregister(&ipgre_tap_ops);
1678 rtnl_link_unregister(&ipgre_link_ops);
1679 rtnl_link_unregister(&erspan_link_ops);
1680 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1681 unregister_pernet_device(&ipgre_tap_net_ops);
1682 unregister_pernet_device(&ipgre_net_ops);
1683 unregister_pernet_device(&erspan_net_ops);
1686 module_init(ipgre_init);
1687 module_exit(ipgre_fini);
1688 MODULE_LICENSE("GPL");
1689 MODULE_ALIAS_RTNL_LINK("gre");
1690 MODULE_ALIAS_RTNL_LINK("gretap");
1691 MODULE_ALIAS_RTNL_LINK("erspan");
1692 MODULE_ALIAS_NETDEV("gre0");
1693 MODULE_ALIAS_NETDEV("gretap0");
1694 MODULE_ALIAS_NETDEV("erspan0");