lkdtm/heap: Hide allocation size from -Warray-bounds
[linux-2.6-microblaze.git] / net / ipv4 / ip_gre.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      Linux NET3:     GRE over IP protocol decoder.
4  *
5  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6  */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
18 #include <linux/in.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
30
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/gre.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
47
48 /*
49    Problems & solutions
50    --------------------
51
52    1. The most important issue is detecting local dead loops.
53    They would cause complete host lockup in transmit, which
54    would be "resolved" by stack overflow or, if queueing is enabled,
55    with infinite looping in net_bh.
56
57    We cannot track such dead loops during route installation,
58    it is infeasible task. The most general solutions would be
59    to keep skb->encapsulation counter (sort of local ttl),
60    and silently drop packet when it expires. It is a good
61    solution, but it supposes maintaining new variable in ALL
62    skb, even if no tunneling is used.
63
64    Current solution: xmit_recursion breaks dead loops. This is a percpu
65    counter, since when we enter the first ndo_xmit(), cpu migration is
66    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
67
68    2. Networking dead loops would not kill routers, but would really
69    kill network. IP hop limit plays role of "t->recursion" in this case,
70    if we copy it from packet being encapsulated to upper header.
71    It is very good solution, but it introduces two problems:
72
73    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
74      do not work over tunnels.
75    - traceroute does not work. I planned to relay ICMP from tunnel,
76      so that this problem would be solved and traceroute output
77      would even more informative. This idea appeared to be wrong:
78      only Linux complies to rfc1812 now (yes, guys, Linux is the only
79      true router now :-)), all routers (at least, in neighbourhood of mine)
80      return only 8 bytes of payload. It is the end.
81
82    Hence, if we want that OSPF worked or traceroute said something reasonable,
83    we should search for another solution.
84
85    One of them is to parse packet trying to detect inner encapsulation
86    made by our node. It is difficult or even impossible, especially,
87    taking into account fragmentation. TO be short, ttl is not solution at all.
88
89    Current solution: The solution was UNEXPECTEDLY SIMPLE.
90    We force DF flag on tunnels with preconfigured hop limit,
91    that is ALL. :-) Well, it does not remove the problem completely,
92    but exponential growth of network traffic is changed to linear
93    (branches, that exceed pmtu are pruned) and tunnel mtu
94    rapidly degrades to value <68, where looping stops.
95    Yes, it is not good if there exists a router in the loop,
96    which does not force DF, even when encapsulating packets have DF set.
97    But it is not our problem! Nobody could accuse us, we made
98    all that we could make. Even if it is your gated who injected
99    fatal route to network, even if it were you who configured
100    fatal static route: you are innocent. :-)
101
102    Alexey Kuznetsov.
103  */
104
105 static bool log_ecn_error = true;
106 module_param(log_ecn_error, bool, 0644);
107 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
108
109 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
110 static const struct header_ops ipgre_header_ops;
111
112 static int ipgre_tunnel_init(struct net_device *dev);
113 static void erspan_build_header(struct sk_buff *skb,
114                                 u32 id, u32 index,
115                                 bool truncate, bool is_ipv4);
116
117 static unsigned int ipgre_net_id __read_mostly;
118 static unsigned int gre_tap_net_id __read_mostly;
119 static unsigned int erspan_net_id __read_mostly;
120
121 static int ipgre_err(struct sk_buff *skb, u32 info,
122                      const struct tnl_ptk_info *tpi)
123 {
124
125         /* All the routers (except for Linux) return only
126            8 bytes of packet payload. It means, that precise relaying of
127            ICMP in the real Internet is absolutely infeasible.
128
129            Moreover, Cisco "wise men" put GRE key to the third word
130            in GRE header. It makes impossible maintaining even soft
131            state for keyed GRE tunnels with enabled checksum. Tell
132            them "thank you".
133
134            Well, I wonder, rfc1812 was written by Cisco employee,
135            what the hell these idiots break standards established
136            by themselves???
137            */
138         struct net *net = dev_net(skb->dev);
139         struct ip_tunnel_net *itn;
140         const struct iphdr *iph;
141         const int type = icmp_hdr(skb)->type;
142         const int code = icmp_hdr(skb)->code;
143         unsigned int data_len = 0;
144         struct ip_tunnel *t;
145
146         if (tpi->proto == htons(ETH_P_TEB))
147                 itn = net_generic(net, gre_tap_net_id);
148         else if (tpi->proto == htons(ETH_P_ERSPAN) ||
149                  tpi->proto == htons(ETH_P_ERSPAN2))
150                 itn = net_generic(net, erspan_net_id);
151         else
152                 itn = net_generic(net, ipgre_net_id);
153
154         iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
155         t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
156                              iph->daddr, iph->saddr, tpi->key);
157
158         if (!t)
159                 return -ENOENT;
160
161         switch (type) {
162         default:
163         case ICMP_PARAMETERPROB:
164                 return 0;
165
166         case ICMP_DEST_UNREACH:
167                 switch (code) {
168                 case ICMP_SR_FAILED:
169                 case ICMP_PORT_UNREACH:
170                         /* Impossible event. */
171                         return 0;
172                 default:
173                         /* All others are translated to HOST_UNREACH.
174                            rfc2003 contains "deep thoughts" about NET_UNREACH,
175                            I believe they are just ether pollution. --ANK
176                          */
177                         break;
178                 }
179                 break;
180
181         case ICMP_TIME_EXCEEDED:
182                 if (code != ICMP_EXC_TTL)
183                         return 0;
184                 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
185                 break;
186
187         case ICMP_REDIRECT:
188                 break;
189         }
190
191 #if IS_ENABLED(CONFIG_IPV6)
192        if (tpi->proto == htons(ETH_P_IPV6) &&
193            !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
194                                        type, data_len))
195                return 0;
196 #endif
197
198         if (t->parms.iph.daddr == 0 ||
199             ipv4_is_multicast(t->parms.iph.daddr))
200                 return 0;
201
202         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
203                 return 0;
204
205         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
206                 t->err_count++;
207         else
208                 t->err_count = 1;
209         t->err_time = jiffies;
210
211         return 0;
212 }
213
214 static void gre_err(struct sk_buff *skb, u32 info)
215 {
216         /* All the routers (except for Linux) return only
217          * 8 bytes of packet payload. It means, that precise relaying of
218          * ICMP in the real Internet is absolutely infeasible.
219          *
220          * Moreover, Cisco "wise men" put GRE key to the third word
221          * in GRE header. It makes impossible maintaining even soft
222          * state for keyed
223          * GRE tunnels with enabled checksum. Tell them "thank you".
224          *
225          * Well, I wonder, rfc1812 was written by Cisco employee,
226          * what the hell these idiots break standards established
227          * by themselves???
228          */
229
230         const struct iphdr *iph = (struct iphdr *)skb->data;
231         const int type = icmp_hdr(skb)->type;
232         const int code = icmp_hdr(skb)->code;
233         struct tnl_ptk_info tpi;
234
235         if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
236                              iph->ihl * 4) < 0)
237                 return;
238
239         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
240                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
241                                  skb->dev->ifindex, IPPROTO_GRE);
242                 return;
243         }
244         if (type == ICMP_REDIRECT) {
245                 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
246                               IPPROTO_GRE);
247                 return;
248         }
249
250         ipgre_err(skb, info, &tpi);
251 }
252
253 static bool is_erspan_type1(int gre_hdr_len)
254 {
255         /* Both ERSPAN type I (version 0) and type II (version 1) use
256          * protocol 0x88BE, but the type I has only 4-byte GRE header,
257          * while type II has 8-byte.
258          */
259         return gre_hdr_len == 4;
260 }
261
262 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
263                       int gre_hdr_len)
264 {
265         struct net *net = dev_net(skb->dev);
266         struct metadata_dst *tun_dst = NULL;
267         struct erspan_base_hdr *ershdr;
268         struct ip_tunnel_net *itn;
269         struct ip_tunnel *tunnel;
270         const struct iphdr *iph;
271         struct erspan_md2 *md2;
272         int ver;
273         int len;
274
275         itn = net_generic(net, erspan_net_id);
276         iph = ip_hdr(skb);
277         if (is_erspan_type1(gre_hdr_len)) {
278                 ver = 0;
279                 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
280                                           tpi->flags | TUNNEL_NO_KEY,
281                                           iph->saddr, iph->daddr, 0);
282         } else {
283                 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
284                 ver = ershdr->ver;
285                 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
286                                           tpi->flags | TUNNEL_KEY,
287                                           iph->saddr, iph->daddr, tpi->key);
288         }
289
290         if (tunnel) {
291                 if (is_erspan_type1(gre_hdr_len))
292                         len = gre_hdr_len;
293                 else
294                         len = gre_hdr_len + erspan_hdr_len(ver);
295
296                 if (unlikely(!pskb_may_pull(skb, len)))
297                         return PACKET_REJECT;
298
299                 if (__iptunnel_pull_header(skb,
300                                            len,
301                                            htons(ETH_P_TEB),
302                                            false, false) < 0)
303                         goto drop;
304
305                 if (tunnel->collect_md) {
306                         struct erspan_metadata *pkt_md, *md;
307                         struct ip_tunnel_info *info;
308                         unsigned char *gh;
309                         __be64 tun_id;
310                         __be16 flags;
311
312                         tpi->flags |= TUNNEL_KEY;
313                         flags = tpi->flags;
314                         tun_id = key32_to_tunnel_id(tpi->key);
315
316                         tun_dst = ip_tun_rx_dst(skb, flags,
317                                                 tun_id, sizeof(*md));
318                         if (!tun_dst)
319                                 return PACKET_REJECT;
320
321                         /* skb can be uncloned in __iptunnel_pull_header, so
322                          * old pkt_md is no longer valid and we need to reset
323                          * it
324                          */
325                         gh = skb_network_header(skb) +
326                              skb_network_header_len(skb);
327                         pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
328                                                             sizeof(*ershdr));
329                         md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
330                         md->version = ver;
331                         md2 = &md->u.md2;
332                         memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
333                                                        ERSPAN_V2_MDSIZE);
334
335                         info = &tun_dst->u.tun_info;
336                         info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
337                         info->options_len = sizeof(*md);
338                 }
339
340                 skb_reset_mac_header(skb);
341                 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
342                 return PACKET_RCVD;
343         }
344         return PACKET_REJECT;
345
346 drop:
347         kfree_skb(skb);
348         return PACKET_RCVD;
349 }
350
351 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
352                        struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
353 {
354         struct metadata_dst *tun_dst = NULL;
355         const struct iphdr *iph;
356         struct ip_tunnel *tunnel;
357
358         iph = ip_hdr(skb);
359         tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
360                                   iph->saddr, iph->daddr, tpi->key);
361
362         if (tunnel) {
363                 const struct iphdr *tnl_params;
364
365                 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
366                                            raw_proto, false) < 0)
367                         goto drop;
368
369                 /* Special case for ipgre_header_parse(), which expects the
370                  * mac_header to point to the outer IP header.
371                  */
372                 if (tunnel->dev->header_ops == &ipgre_header_ops)
373                         skb_pop_mac_header(skb);
374                 else
375                         skb_reset_mac_header(skb);
376
377                 tnl_params = &tunnel->parms.iph;
378                 if (tunnel->collect_md || tnl_params->daddr == 0) {
379                         __be16 flags;
380                         __be64 tun_id;
381
382                         flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
383                         tun_id = key32_to_tunnel_id(tpi->key);
384                         tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
385                         if (!tun_dst)
386                                 return PACKET_REJECT;
387                 }
388
389                 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
390                 return PACKET_RCVD;
391         }
392         return PACKET_NEXT;
393
394 drop:
395         kfree_skb(skb);
396         return PACKET_RCVD;
397 }
398
399 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
400                      int hdr_len)
401 {
402         struct net *net = dev_net(skb->dev);
403         struct ip_tunnel_net *itn;
404         int res;
405
406         if (tpi->proto == htons(ETH_P_TEB))
407                 itn = net_generic(net, gre_tap_net_id);
408         else
409                 itn = net_generic(net, ipgre_net_id);
410
411         res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
412         if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
413                 /* ipgre tunnels in collect metadata mode should receive
414                  * also ETH_P_TEB traffic.
415                  */
416                 itn = net_generic(net, ipgre_net_id);
417                 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
418         }
419         return res;
420 }
421
422 static int gre_rcv(struct sk_buff *skb)
423 {
424         struct tnl_ptk_info tpi;
425         bool csum_err = false;
426         int hdr_len;
427
428 #ifdef CONFIG_NET_IPGRE_BROADCAST
429         if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
430                 /* Looped back packet, drop it! */
431                 if (rt_is_output_route(skb_rtable(skb)))
432                         goto drop;
433         }
434 #endif
435
436         hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
437         if (hdr_len < 0)
438                 goto drop;
439
440         if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
441                      tpi.proto == htons(ETH_P_ERSPAN2))) {
442                 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
443                         return 0;
444                 goto out;
445         }
446
447         if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
448                 return 0;
449
450 out:
451         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
452 drop:
453         kfree_skb(skb);
454         return 0;
455 }
456
457 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
458                        const struct iphdr *tnl_params,
459                        __be16 proto)
460 {
461         struct ip_tunnel *tunnel = netdev_priv(dev);
462
463         if (tunnel->parms.o_flags & TUNNEL_SEQ)
464                 tunnel->o_seqno++;
465
466         /* Push GRE header. */
467         gre_build_header(skb, tunnel->tun_hlen,
468                          tunnel->parms.o_flags, proto, tunnel->parms.o_key,
469                          htonl(tunnel->o_seqno));
470
471         ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
472 }
473
474 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
475 {
476         return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
477 }
478
479 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
480                         __be16 proto)
481 {
482         struct ip_tunnel *tunnel = netdev_priv(dev);
483         struct ip_tunnel_info *tun_info;
484         const struct ip_tunnel_key *key;
485         int tunnel_hlen;
486         __be16 flags;
487
488         tun_info = skb_tunnel_info(skb);
489         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
490                      ip_tunnel_info_af(tun_info) != AF_INET))
491                 goto err_free_skb;
492
493         key = &tun_info->key;
494         tunnel_hlen = gre_calc_hlen(key->tun_flags);
495
496         if (skb_cow_head(skb, dev->needed_headroom))
497                 goto err_free_skb;
498
499         /* Push Tunnel header. */
500         if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
501                 goto err_free_skb;
502
503         flags = tun_info->key.tun_flags &
504                 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
505         gre_build_header(skb, tunnel_hlen, flags, proto,
506                          tunnel_id_to_key32(tun_info->key.tun_id),
507                          (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
508
509         ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
510
511         return;
512
513 err_free_skb:
514         kfree_skb(skb);
515         dev->stats.tx_dropped++;
516 }
517
518 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
519 {
520         struct ip_tunnel *tunnel = netdev_priv(dev);
521         struct ip_tunnel_info *tun_info;
522         const struct ip_tunnel_key *key;
523         struct erspan_metadata *md;
524         bool truncate = false;
525         __be16 proto;
526         int tunnel_hlen;
527         int version;
528         int nhoff;
529         int thoff;
530
531         tun_info = skb_tunnel_info(skb);
532         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
533                      ip_tunnel_info_af(tun_info) != AF_INET))
534                 goto err_free_skb;
535
536         key = &tun_info->key;
537         if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
538                 goto err_free_skb;
539         if (tun_info->options_len < sizeof(*md))
540                 goto err_free_skb;
541         md = ip_tunnel_info_opts(tun_info);
542
543         /* ERSPAN has fixed 8 byte GRE header */
544         version = md->version;
545         tunnel_hlen = 8 + erspan_hdr_len(version);
546
547         if (skb_cow_head(skb, dev->needed_headroom))
548                 goto err_free_skb;
549
550         if (gre_handle_offloads(skb, false))
551                 goto err_free_skb;
552
553         if (skb->len > dev->mtu + dev->hard_header_len) {
554                 pskb_trim(skb, dev->mtu + dev->hard_header_len);
555                 truncate = true;
556         }
557
558         nhoff = skb_network_header(skb) - skb_mac_header(skb);
559         if (skb->protocol == htons(ETH_P_IP) &&
560             (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
561                 truncate = true;
562
563         thoff = skb_transport_header(skb) - skb_mac_header(skb);
564         if (skb->protocol == htons(ETH_P_IPV6) &&
565             (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
566                 truncate = true;
567
568         if (version == 1) {
569                 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
570                                     ntohl(md->u.index), truncate, true);
571                 proto = htons(ETH_P_ERSPAN);
572         } else if (version == 2) {
573                 erspan_build_header_v2(skb,
574                                        ntohl(tunnel_id_to_key32(key->tun_id)),
575                                        md->u.md2.dir,
576                                        get_hwid(&md->u.md2),
577                                        truncate, true);
578                 proto = htons(ETH_P_ERSPAN2);
579         } else {
580                 goto err_free_skb;
581         }
582
583         gre_build_header(skb, 8, TUNNEL_SEQ,
584                          proto, 0, htonl(tunnel->o_seqno++));
585
586         ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
587
588         return;
589
590 err_free_skb:
591         kfree_skb(skb);
592         dev->stats.tx_dropped++;
593 }
594
595 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
596 {
597         struct ip_tunnel_info *info = skb_tunnel_info(skb);
598         const struct ip_tunnel_key *key;
599         struct rtable *rt;
600         struct flowi4 fl4;
601
602         if (ip_tunnel_info_af(info) != AF_INET)
603                 return -EINVAL;
604
605         key = &info->key;
606         ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
607                             tunnel_id_to_key32(key->tun_id),
608                             key->tos & ~INET_ECN_MASK, 0, skb->mark,
609                             skb_get_hash(skb));
610         rt = ip_route_output_key(dev_net(dev), &fl4);
611         if (IS_ERR(rt))
612                 return PTR_ERR(rt);
613
614         ip_rt_put(rt);
615         info->key.u.ipv4.src = fl4.saddr;
616         return 0;
617 }
618
619 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
620                               struct net_device *dev)
621 {
622         struct ip_tunnel *tunnel = netdev_priv(dev);
623         const struct iphdr *tnl_params;
624
625         if (!pskb_inet_may_pull(skb))
626                 goto free_skb;
627
628         if (tunnel->collect_md) {
629                 gre_fb_xmit(skb, dev, skb->protocol);
630                 return NETDEV_TX_OK;
631         }
632
633         if (dev->header_ops) {
634                 const int pull_len = tunnel->hlen + sizeof(struct iphdr);
635
636                 if (skb_cow_head(skb, 0))
637                         goto free_skb;
638
639                 tnl_params = (const struct iphdr *)skb->data;
640
641                 if (pull_len > skb_transport_offset(skb))
642                         goto free_skb;
643
644                 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
645                  * to gre header.
646                  */
647                 skb_pull(skb, pull_len);
648                 skb_reset_mac_header(skb);
649         } else {
650                 if (skb_cow_head(skb, dev->needed_headroom))
651                         goto free_skb;
652
653                 tnl_params = &tunnel->parms.iph;
654         }
655
656         if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
657                 goto free_skb;
658
659         __gre_xmit(skb, dev, tnl_params, skb->protocol);
660         return NETDEV_TX_OK;
661
662 free_skb:
663         kfree_skb(skb);
664         dev->stats.tx_dropped++;
665         return NETDEV_TX_OK;
666 }
667
668 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
669                                struct net_device *dev)
670 {
671         struct ip_tunnel *tunnel = netdev_priv(dev);
672         bool truncate = false;
673         __be16 proto;
674
675         if (!pskb_inet_may_pull(skb))
676                 goto free_skb;
677
678         if (tunnel->collect_md) {
679                 erspan_fb_xmit(skb, dev);
680                 return NETDEV_TX_OK;
681         }
682
683         if (gre_handle_offloads(skb, false))
684                 goto free_skb;
685
686         if (skb_cow_head(skb, dev->needed_headroom))
687                 goto free_skb;
688
689         if (skb->len > dev->mtu + dev->hard_header_len) {
690                 pskb_trim(skb, dev->mtu + dev->hard_header_len);
691                 truncate = true;
692         }
693
694         /* Push ERSPAN header */
695         if (tunnel->erspan_ver == 0) {
696                 proto = htons(ETH_P_ERSPAN);
697                 tunnel->parms.o_flags &= ~TUNNEL_SEQ;
698         } else if (tunnel->erspan_ver == 1) {
699                 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
700                                     tunnel->index,
701                                     truncate, true);
702                 proto = htons(ETH_P_ERSPAN);
703         } else if (tunnel->erspan_ver == 2) {
704                 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
705                                        tunnel->dir, tunnel->hwid,
706                                        truncate, true);
707                 proto = htons(ETH_P_ERSPAN2);
708         } else {
709                 goto free_skb;
710         }
711
712         tunnel->parms.o_flags &= ~TUNNEL_KEY;
713         __gre_xmit(skb, dev, &tunnel->parms.iph, proto);
714         return NETDEV_TX_OK;
715
716 free_skb:
717         kfree_skb(skb);
718         dev->stats.tx_dropped++;
719         return NETDEV_TX_OK;
720 }
721
722 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
723                                 struct net_device *dev)
724 {
725         struct ip_tunnel *tunnel = netdev_priv(dev);
726
727         if (!pskb_inet_may_pull(skb))
728                 goto free_skb;
729
730         if (tunnel->collect_md) {
731                 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
732                 return NETDEV_TX_OK;
733         }
734
735         if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
736                 goto free_skb;
737
738         if (skb_cow_head(skb, dev->needed_headroom))
739                 goto free_skb;
740
741         __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
742         return NETDEV_TX_OK;
743
744 free_skb:
745         kfree_skb(skb);
746         dev->stats.tx_dropped++;
747         return NETDEV_TX_OK;
748 }
749
750 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
751 {
752         struct ip_tunnel *tunnel = netdev_priv(dev);
753         int len;
754
755         len = tunnel->tun_hlen;
756         tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
757         len = tunnel->tun_hlen - len;
758         tunnel->hlen = tunnel->hlen + len;
759
760         if (dev->header_ops)
761                 dev->hard_header_len += len;
762         else
763                 dev->needed_headroom += len;
764
765         if (set_mtu)
766                 dev->mtu = max_t(int, dev->mtu - len, 68);
767
768         if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
769                 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
770                     tunnel->encap.type == TUNNEL_ENCAP_NONE) {
771                         dev->features |= NETIF_F_GSO_SOFTWARE;
772                         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
773                 } else {
774                         dev->features &= ~NETIF_F_GSO_SOFTWARE;
775                         dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
776                 }
777                 dev->features |= NETIF_F_LLTX;
778         } else {
779                 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
780                 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
781         }
782 }
783
784 static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
785                             int cmd)
786 {
787         int err;
788
789         if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
790                 if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
791                     p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
792                     ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
793                         return -EINVAL;
794         }
795
796         p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
797         p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
798
799         err = ip_tunnel_ctl(dev, p, cmd);
800         if (err)
801                 return err;
802
803         if (cmd == SIOCCHGTUNNEL) {
804                 struct ip_tunnel *t = netdev_priv(dev);
805
806                 t->parms.i_flags = p->i_flags;
807                 t->parms.o_flags = p->o_flags;
808
809                 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
810                         ipgre_link_update(dev, true);
811         }
812
813         p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
814         p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
815         return 0;
816 }
817
818 /* Nice toy. Unfortunately, useless in real life :-)
819    It allows to construct virtual multiprotocol broadcast "LAN"
820    over the Internet, provided multicast routing is tuned.
821
822
823    I have no idea was this bicycle invented before me,
824    so that I had to set ARPHRD_IPGRE to a random value.
825    I have an impression, that Cisco could make something similar,
826    but this feature is apparently missing in IOS<=11.2(8).
827
828    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
829    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
830
831    ping -t 255 224.66.66.66
832
833    If nobody answers, mbone does not work.
834
835    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
836    ip addr add 10.66.66.<somewhat>/24 dev Universe
837    ifconfig Universe up
838    ifconfig Universe add fe80::<Your_real_addr>/10
839    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
840    ftp 10.66.66.66
841    ...
842    ftp fec0:6666:6666::193.233.7.65
843    ...
844  */
845 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
846                         unsigned short type,
847                         const void *daddr, const void *saddr, unsigned int len)
848 {
849         struct ip_tunnel *t = netdev_priv(dev);
850         struct iphdr *iph;
851         struct gre_base_hdr *greh;
852
853         iph = skb_push(skb, t->hlen + sizeof(*iph));
854         greh = (struct gre_base_hdr *)(iph+1);
855         greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
856         greh->protocol = htons(type);
857
858         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
859
860         /* Set the source hardware address. */
861         if (saddr)
862                 memcpy(&iph->saddr, saddr, 4);
863         if (daddr)
864                 memcpy(&iph->daddr, daddr, 4);
865         if (iph->daddr)
866                 return t->hlen + sizeof(*iph);
867
868         return -(t->hlen + sizeof(*iph));
869 }
870
871 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
872 {
873         const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
874         memcpy(haddr, &iph->saddr, 4);
875         return 4;
876 }
877
878 static const struct header_ops ipgre_header_ops = {
879         .create = ipgre_header,
880         .parse  = ipgre_header_parse,
881 };
882
883 #ifdef CONFIG_NET_IPGRE_BROADCAST
884 static int ipgre_open(struct net_device *dev)
885 {
886         struct ip_tunnel *t = netdev_priv(dev);
887
888         if (ipv4_is_multicast(t->parms.iph.daddr)) {
889                 struct flowi4 fl4;
890                 struct rtable *rt;
891
892                 rt = ip_route_output_gre(t->net, &fl4,
893                                          t->parms.iph.daddr,
894                                          t->parms.iph.saddr,
895                                          t->parms.o_key,
896                                          RT_TOS(t->parms.iph.tos),
897                                          t->parms.link);
898                 if (IS_ERR(rt))
899                         return -EADDRNOTAVAIL;
900                 dev = rt->dst.dev;
901                 ip_rt_put(rt);
902                 if (!__in_dev_get_rtnl(dev))
903                         return -EADDRNOTAVAIL;
904                 t->mlink = dev->ifindex;
905                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
906         }
907         return 0;
908 }
909
910 static int ipgre_close(struct net_device *dev)
911 {
912         struct ip_tunnel *t = netdev_priv(dev);
913
914         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
915                 struct in_device *in_dev;
916                 in_dev = inetdev_by_index(t->net, t->mlink);
917                 if (in_dev)
918                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
919         }
920         return 0;
921 }
922 #endif
923
924 static const struct net_device_ops ipgre_netdev_ops = {
925         .ndo_init               = ipgre_tunnel_init,
926         .ndo_uninit             = ip_tunnel_uninit,
927 #ifdef CONFIG_NET_IPGRE_BROADCAST
928         .ndo_open               = ipgre_open,
929         .ndo_stop               = ipgre_close,
930 #endif
931         .ndo_start_xmit         = ipgre_xmit,
932         .ndo_siocdevprivate     = ip_tunnel_siocdevprivate,
933         .ndo_change_mtu         = ip_tunnel_change_mtu,
934         .ndo_get_stats64        = dev_get_tstats64,
935         .ndo_get_iflink         = ip_tunnel_get_iflink,
936         .ndo_tunnel_ctl         = ipgre_tunnel_ctl,
937 };
938
939 #define GRE_FEATURES (NETIF_F_SG |              \
940                       NETIF_F_FRAGLIST |        \
941                       NETIF_F_HIGHDMA |         \
942                       NETIF_F_HW_CSUM)
943
944 static void ipgre_tunnel_setup(struct net_device *dev)
945 {
946         dev->netdev_ops         = &ipgre_netdev_ops;
947         dev->type               = ARPHRD_IPGRE;
948         ip_tunnel_setup(dev, ipgre_net_id);
949 }
950
951 static void __gre_tunnel_init(struct net_device *dev)
952 {
953         struct ip_tunnel *tunnel;
954
955         tunnel = netdev_priv(dev);
956         tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
957         tunnel->parms.iph.protocol = IPPROTO_GRE;
958
959         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
960         dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
961
962         dev->features           |= GRE_FEATURES;
963         dev->hw_features        |= GRE_FEATURES;
964
965         if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
966                 /* TCP offload with GRE SEQ is not supported, nor
967                  * can we support 2 levels of outer headers requiring
968                  * an update.
969                  */
970                 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
971                     (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
972                         dev->features    |= NETIF_F_GSO_SOFTWARE;
973                         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
974                 }
975
976                 /* Can use a lockless transmit, unless we generate
977                  * output sequences
978                  */
979                 dev->features |= NETIF_F_LLTX;
980         }
981 }
982
983 static int ipgre_tunnel_init(struct net_device *dev)
984 {
985         struct ip_tunnel *tunnel = netdev_priv(dev);
986         struct iphdr *iph = &tunnel->parms.iph;
987
988         __gre_tunnel_init(dev);
989
990         __dev_addr_set(dev, &iph->saddr, 4);
991         memcpy(dev->broadcast, &iph->daddr, 4);
992
993         dev->flags              = IFF_NOARP;
994         netif_keep_dst(dev);
995         dev->addr_len           = 4;
996
997         if (iph->daddr && !tunnel->collect_md) {
998 #ifdef CONFIG_NET_IPGRE_BROADCAST
999                 if (ipv4_is_multicast(iph->daddr)) {
1000                         if (!iph->saddr)
1001                                 return -EINVAL;
1002                         dev->flags = IFF_BROADCAST;
1003                         dev->header_ops = &ipgre_header_ops;
1004                         dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1005                         dev->needed_headroom = 0;
1006                 }
1007 #endif
1008         } else if (!tunnel->collect_md) {
1009                 dev->header_ops = &ipgre_header_ops;
1010                 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1011                 dev->needed_headroom = 0;
1012         }
1013
1014         return ip_tunnel_init(dev);
1015 }
1016
1017 static const struct gre_protocol ipgre_protocol = {
1018         .handler     = gre_rcv,
1019         .err_handler = gre_err,
1020 };
1021
1022 static int __net_init ipgre_init_net(struct net *net)
1023 {
1024         return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1025 }
1026
1027 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1028 {
1029         ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1030 }
1031
1032 static struct pernet_operations ipgre_net_ops = {
1033         .init = ipgre_init_net,
1034         .exit_batch = ipgre_exit_batch_net,
1035         .id   = &ipgre_net_id,
1036         .size = sizeof(struct ip_tunnel_net),
1037 };
1038
1039 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1040                                  struct netlink_ext_ack *extack)
1041 {
1042         __be16 flags;
1043
1044         if (!data)
1045                 return 0;
1046
1047         flags = 0;
1048         if (data[IFLA_GRE_IFLAGS])
1049                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1050         if (data[IFLA_GRE_OFLAGS])
1051                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1052         if (flags & (GRE_VERSION|GRE_ROUTING))
1053                 return -EINVAL;
1054
1055         if (data[IFLA_GRE_COLLECT_METADATA] &&
1056             data[IFLA_GRE_ENCAP_TYPE] &&
1057             nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1058                 return -EINVAL;
1059
1060         return 0;
1061 }
1062
1063 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1064                               struct netlink_ext_ack *extack)
1065 {
1066         __be32 daddr;
1067
1068         if (tb[IFLA_ADDRESS]) {
1069                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1070                         return -EINVAL;
1071                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1072                         return -EADDRNOTAVAIL;
1073         }
1074
1075         if (!data)
1076                 goto out;
1077
1078         if (data[IFLA_GRE_REMOTE]) {
1079                 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1080                 if (!daddr)
1081                         return -EINVAL;
1082         }
1083
1084 out:
1085         return ipgre_tunnel_validate(tb, data, extack);
1086 }
1087
1088 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1089                            struct netlink_ext_ack *extack)
1090 {
1091         __be16 flags = 0;
1092         int ret;
1093
1094         if (!data)
1095                 return 0;
1096
1097         ret = ipgre_tap_validate(tb, data, extack);
1098         if (ret)
1099                 return ret;
1100
1101         if (data[IFLA_GRE_ERSPAN_VER] &&
1102             nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1103                 return 0;
1104
1105         /* ERSPAN type II/III should only have GRE sequence and key flag */
1106         if (data[IFLA_GRE_OFLAGS])
1107                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1108         if (data[IFLA_GRE_IFLAGS])
1109                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1110         if (!data[IFLA_GRE_COLLECT_METADATA] &&
1111             flags != (GRE_SEQ | GRE_KEY))
1112                 return -EINVAL;
1113
1114         /* ERSPAN Session ID only has 10-bit. Since we reuse
1115          * 32-bit key field as ID, check it's range.
1116          */
1117         if (data[IFLA_GRE_IKEY] &&
1118             (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1119                 return -EINVAL;
1120
1121         if (data[IFLA_GRE_OKEY] &&
1122             (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1123                 return -EINVAL;
1124
1125         return 0;
1126 }
1127
1128 static int ipgre_netlink_parms(struct net_device *dev,
1129                                 struct nlattr *data[],
1130                                 struct nlattr *tb[],
1131                                 struct ip_tunnel_parm *parms,
1132                                 __u32 *fwmark)
1133 {
1134         struct ip_tunnel *t = netdev_priv(dev);
1135
1136         memset(parms, 0, sizeof(*parms));
1137
1138         parms->iph.protocol = IPPROTO_GRE;
1139
1140         if (!data)
1141                 return 0;
1142
1143         if (data[IFLA_GRE_LINK])
1144                 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1145
1146         if (data[IFLA_GRE_IFLAGS])
1147                 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1148
1149         if (data[IFLA_GRE_OFLAGS])
1150                 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1151
1152         if (data[IFLA_GRE_IKEY])
1153                 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1154
1155         if (data[IFLA_GRE_OKEY])
1156                 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1157
1158         if (data[IFLA_GRE_LOCAL])
1159                 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1160
1161         if (data[IFLA_GRE_REMOTE])
1162                 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1163
1164         if (data[IFLA_GRE_TTL])
1165                 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1166
1167         if (data[IFLA_GRE_TOS])
1168                 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1169
1170         if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1171                 if (t->ignore_df)
1172                         return -EINVAL;
1173                 parms->iph.frag_off = htons(IP_DF);
1174         }
1175
1176         if (data[IFLA_GRE_COLLECT_METADATA]) {
1177                 t->collect_md = true;
1178                 if (dev->type == ARPHRD_IPGRE)
1179                         dev->type = ARPHRD_NONE;
1180         }
1181
1182         if (data[IFLA_GRE_IGNORE_DF]) {
1183                 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1184                   && (parms->iph.frag_off & htons(IP_DF)))
1185                         return -EINVAL;
1186                 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1187         }
1188
1189         if (data[IFLA_GRE_FWMARK])
1190                 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1191
1192         return 0;
1193 }
1194
1195 static int erspan_netlink_parms(struct net_device *dev,
1196                                 struct nlattr *data[],
1197                                 struct nlattr *tb[],
1198                                 struct ip_tunnel_parm *parms,
1199                                 __u32 *fwmark)
1200 {
1201         struct ip_tunnel *t = netdev_priv(dev);
1202         int err;
1203
1204         err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1205         if (err)
1206                 return err;
1207         if (!data)
1208                 return 0;
1209
1210         if (data[IFLA_GRE_ERSPAN_VER]) {
1211                 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1212
1213                 if (t->erspan_ver > 2)
1214                         return -EINVAL;
1215         }
1216
1217         if (t->erspan_ver == 1) {
1218                 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1219                         t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1220                         if (t->index & ~INDEX_MASK)
1221                                 return -EINVAL;
1222                 }
1223         } else if (t->erspan_ver == 2) {
1224                 if (data[IFLA_GRE_ERSPAN_DIR]) {
1225                         t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1226                         if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1227                                 return -EINVAL;
1228                 }
1229                 if (data[IFLA_GRE_ERSPAN_HWID]) {
1230                         t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1231                         if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1232                                 return -EINVAL;
1233                 }
1234         }
1235
1236         return 0;
1237 }
1238
1239 /* This function returns true when ENCAP attributes are present in the nl msg */
1240 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1241                                       struct ip_tunnel_encap *ipencap)
1242 {
1243         bool ret = false;
1244
1245         memset(ipencap, 0, sizeof(*ipencap));
1246
1247         if (!data)
1248                 return ret;
1249
1250         if (data[IFLA_GRE_ENCAP_TYPE]) {
1251                 ret = true;
1252                 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1253         }
1254
1255         if (data[IFLA_GRE_ENCAP_FLAGS]) {
1256                 ret = true;
1257                 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1258         }
1259
1260         if (data[IFLA_GRE_ENCAP_SPORT]) {
1261                 ret = true;
1262                 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1263         }
1264
1265         if (data[IFLA_GRE_ENCAP_DPORT]) {
1266                 ret = true;
1267                 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1268         }
1269
1270         return ret;
1271 }
1272
1273 static int gre_tap_init(struct net_device *dev)
1274 {
1275         __gre_tunnel_init(dev);
1276         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1277         netif_keep_dst(dev);
1278
1279         return ip_tunnel_init(dev);
1280 }
1281
1282 static const struct net_device_ops gre_tap_netdev_ops = {
1283         .ndo_init               = gre_tap_init,
1284         .ndo_uninit             = ip_tunnel_uninit,
1285         .ndo_start_xmit         = gre_tap_xmit,
1286         .ndo_set_mac_address    = eth_mac_addr,
1287         .ndo_validate_addr      = eth_validate_addr,
1288         .ndo_change_mtu         = ip_tunnel_change_mtu,
1289         .ndo_get_stats64        = dev_get_tstats64,
1290         .ndo_get_iflink         = ip_tunnel_get_iflink,
1291         .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1292 };
1293
1294 static int erspan_tunnel_init(struct net_device *dev)
1295 {
1296         struct ip_tunnel *tunnel = netdev_priv(dev);
1297
1298         if (tunnel->erspan_ver == 0)
1299                 tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1300         else
1301                 tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1302
1303         tunnel->parms.iph.protocol = IPPROTO_GRE;
1304         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1305                        erspan_hdr_len(tunnel->erspan_ver);
1306
1307         dev->features           |= GRE_FEATURES;
1308         dev->hw_features        |= GRE_FEATURES;
1309         dev->priv_flags         |= IFF_LIVE_ADDR_CHANGE;
1310         netif_keep_dst(dev);
1311
1312         return ip_tunnel_init(dev);
1313 }
1314
1315 static const struct net_device_ops erspan_netdev_ops = {
1316         .ndo_init               = erspan_tunnel_init,
1317         .ndo_uninit             = ip_tunnel_uninit,
1318         .ndo_start_xmit         = erspan_xmit,
1319         .ndo_set_mac_address    = eth_mac_addr,
1320         .ndo_validate_addr      = eth_validate_addr,
1321         .ndo_change_mtu         = ip_tunnel_change_mtu,
1322         .ndo_get_stats64        = dev_get_tstats64,
1323         .ndo_get_iflink         = ip_tunnel_get_iflink,
1324         .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1325 };
1326
1327 static void ipgre_tap_setup(struct net_device *dev)
1328 {
1329         ether_setup(dev);
1330         dev->max_mtu = 0;
1331         dev->netdev_ops = &gre_tap_netdev_ops;
1332         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1333         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1334         ip_tunnel_setup(dev, gre_tap_net_id);
1335 }
1336
1337 static int
1338 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1339 {
1340         struct ip_tunnel_encap ipencap;
1341
1342         if (ipgre_netlink_encap_parms(data, &ipencap)) {
1343                 struct ip_tunnel *t = netdev_priv(dev);
1344                 int err = ip_tunnel_encap_setup(t, &ipencap);
1345
1346                 if (err < 0)
1347                         return err;
1348         }
1349
1350         return 0;
1351 }
1352
1353 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1354                          struct nlattr *tb[], struct nlattr *data[],
1355                          struct netlink_ext_ack *extack)
1356 {
1357         struct ip_tunnel_parm p;
1358         __u32 fwmark = 0;
1359         int err;
1360
1361         err = ipgre_newlink_encap_setup(dev, data);
1362         if (err)
1363                 return err;
1364
1365         err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1366         if (err < 0)
1367                 return err;
1368         return ip_tunnel_newlink(dev, tb, &p, fwmark);
1369 }
1370
1371 static int erspan_newlink(struct net *src_net, struct net_device *dev,
1372                           struct nlattr *tb[], struct nlattr *data[],
1373                           struct netlink_ext_ack *extack)
1374 {
1375         struct ip_tunnel_parm p;
1376         __u32 fwmark = 0;
1377         int err;
1378
1379         err = ipgre_newlink_encap_setup(dev, data);
1380         if (err)
1381                 return err;
1382
1383         err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1384         if (err)
1385                 return err;
1386         return ip_tunnel_newlink(dev, tb, &p, fwmark);
1387 }
1388
1389 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1390                             struct nlattr *data[],
1391                             struct netlink_ext_ack *extack)
1392 {
1393         struct ip_tunnel *t = netdev_priv(dev);
1394         __u32 fwmark = t->fwmark;
1395         struct ip_tunnel_parm p;
1396         int err;
1397
1398         err = ipgre_newlink_encap_setup(dev, data);
1399         if (err)
1400                 return err;
1401
1402         err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1403         if (err < 0)
1404                 return err;
1405
1406         err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1407         if (err < 0)
1408                 return err;
1409
1410         t->parms.i_flags = p.i_flags;
1411         t->parms.o_flags = p.o_flags;
1412
1413         ipgre_link_update(dev, !tb[IFLA_MTU]);
1414
1415         return 0;
1416 }
1417
1418 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1419                              struct nlattr *data[],
1420                              struct netlink_ext_ack *extack)
1421 {
1422         struct ip_tunnel *t = netdev_priv(dev);
1423         __u32 fwmark = t->fwmark;
1424         struct ip_tunnel_parm p;
1425         int err;
1426
1427         err = ipgre_newlink_encap_setup(dev, data);
1428         if (err)
1429                 return err;
1430
1431         err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1432         if (err < 0)
1433                 return err;
1434
1435         err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1436         if (err < 0)
1437                 return err;
1438
1439         t->parms.i_flags = p.i_flags;
1440         t->parms.o_flags = p.o_flags;
1441
1442         return 0;
1443 }
1444
1445 static size_t ipgre_get_size(const struct net_device *dev)
1446 {
1447         return
1448                 /* IFLA_GRE_LINK */
1449                 nla_total_size(4) +
1450                 /* IFLA_GRE_IFLAGS */
1451                 nla_total_size(2) +
1452                 /* IFLA_GRE_OFLAGS */
1453                 nla_total_size(2) +
1454                 /* IFLA_GRE_IKEY */
1455                 nla_total_size(4) +
1456                 /* IFLA_GRE_OKEY */
1457                 nla_total_size(4) +
1458                 /* IFLA_GRE_LOCAL */
1459                 nla_total_size(4) +
1460                 /* IFLA_GRE_REMOTE */
1461                 nla_total_size(4) +
1462                 /* IFLA_GRE_TTL */
1463                 nla_total_size(1) +
1464                 /* IFLA_GRE_TOS */
1465                 nla_total_size(1) +
1466                 /* IFLA_GRE_PMTUDISC */
1467                 nla_total_size(1) +
1468                 /* IFLA_GRE_ENCAP_TYPE */
1469                 nla_total_size(2) +
1470                 /* IFLA_GRE_ENCAP_FLAGS */
1471                 nla_total_size(2) +
1472                 /* IFLA_GRE_ENCAP_SPORT */
1473                 nla_total_size(2) +
1474                 /* IFLA_GRE_ENCAP_DPORT */
1475                 nla_total_size(2) +
1476                 /* IFLA_GRE_COLLECT_METADATA */
1477                 nla_total_size(0) +
1478                 /* IFLA_GRE_IGNORE_DF */
1479                 nla_total_size(1) +
1480                 /* IFLA_GRE_FWMARK */
1481                 nla_total_size(4) +
1482                 /* IFLA_GRE_ERSPAN_INDEX */
1483                 nla_total_size(4) +
1484                 /* IFLA_GRE_ERSPAN_VER */
1485                 nla_total_size(1) +
1486                 /* IFLA_GRE_ERSPAN_DIR */
1487                 nla_total_size(1) +
1488                 /* IFLA_GRE_ERSPAN_HWID */
1489                 nla_total_size(2) +
1490                 0;
1491 }
1492
1493 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1494 {
1495         struct ip_tunnel *t = netdev_priv(dev);
1496         struct ip_tunnel_parm *p = &t->parms;
1497         __be16 o_flags = p->o_flags;
1498
1499         if (t->erspan_ver <= 2) {
1500                 if (t->erspan_ver != 0 && !t->collect_md)
1501                         o_flags |= TUNNEL_KEY;
1502
1503                 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1504                         goto nla_put_failure;
1505
1506                 if (t->erspan_ver == 1) {
1507                         if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1508                                 goto nla_put_failure;
1509                 } else if (t->erspan_ver == 2) {
1510                         if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1511                                 goto nla_put_failure;
1512                         if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1513                                 goto nla_put_failure;
1514                 }
1515         }
1516
1517         if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1518             nla_put_be16(skb, IFLA_GRE_IFLAGS,
1519                          gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1520             nla_put_be16(skb, IFLA_GRE_OFLAGS,
1521                          gre_tnl_flags_to_gre_flags(o_flags)) ||
1522             nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1523             nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1524             nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1525             nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1526             nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1527             nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1528             nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1529                        !!(p->iph.frag_off & htons(IP_DF))) ||
1530             nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1531                 goto nla_put_failure;
1532
1533         if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1534                         t->encap.type) ||
1535             nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1536                          t->encap.sport) ||
1537             nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1538                          t->encap.dport) ||
1539             nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1540                         t->encap.flags))
1541                 goto nla_put_failure;
1542
1543         if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1544                 goto nla_put_failure;
1545
1546         if (t->collect_md) {
1547                 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1548                         goto nla_put_failure;
1549         }
1550
1551         return 0;
1552
1553 nla_put_failure:
1554         return -EMSGSIZE;
1555 }
1556
1557 static void erspan_setup(struct net_device *dev)
1558 {
1559         struct ip_tunnel *t = netdev_priv(dev);
1560
1561         ether_setup(dev);
1562         dev->max_mtu = 0;
1563         dev->netdev_ops = &erspan_netdev_ops;
1564         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1565         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1566         ip_tunnel_setup(dev, erspan_net_id);
1567         t->erspan_ver = 1;
1568 }
1569
1570 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1571         [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1572         [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1573         [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1574         [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1575         [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1576         [IFLA_GRE_LOCAL]        = { .len = sizeof_field(struct iphdr, saddr) },
1577         [IFLA_GRE_REMOTE]       = { .len = sizeof_field(struct iphdr, daddr) },
1578         [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1579         [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1580         [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1581         [IFLA_GRE_ENCAP_TYPE]   = { .type = NLA_U16 },
1582         [IFLA_GRE_ENCAP_FLAGS]  = { .type = NLA_U16 },
1583         [IFLA_GRE_ENCAP_SPORT]  = { .type = NLA_U16 },
1584         [IFLA_GRE_ENCAP_DPORT]  = { .type = NLA_U16 },
1585         [IFLA_GRE_COLLECT_METADATA]     = { .type = NLA_FLAG },
1586         [IFLA_GRE_IGNORE_DF]    = { .type = NLA_U8 },
1587         [IFLA_GRE_FWMARK]       = { .type = NLA_U32 },
1588         [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1589         [IFLA_GRE_ERSPAN_VER]   = { .type = NLA_U8 },
1590         [IFLA_GRE_ERSPAN_DIR]   = { .type = NLA_U8 },
1591         [IFLA_GRE_ERSPAN_HWID]  = { .type = NLA_U16 },
1592 };
1593
1594 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1595         .kind           = "gre",
1596         .maxtype        = IFLA_GRE_MAX,
1597         .policy         = ipgre_policy,
1598         .priv_size      = sizeof(struct ip_tunnel),
1599         .setup          = ipgre_tunnel_setup,
1600         .validate       = ipgre_tunnel_validate,
1601         .newlink        = ipgre_newlink,
1602         .changelink     = ipgre_changelink,
1603         .dellink        = ip_tunnel_dellink,
1604         .get_size       = ipgre_get_size,
1605         .fill_info      = ipgre_fill_info,
1606         .get_link_net   = ip_tunnel_get_link_net,
1607 };
1608
1609 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1610         .kind           = "gretap",
1611         .maxtype        = IFLA_GRE_MAX,
1612         .policy         = ipgre_policy,
1613         .priv_size      = sizeof(struct ip_tunnel),
1614         .setup          = ipgre_tap_setup,
1615         .validate       = ipgre_tap_validate,
1616         .newlink        = ipgre_newlink,
1617         .changelink     = ipgre_changelink,
1618         .dellink        = ip_tunnel_dellink,
1619         .get_size       = ipgre_get_size,
1620         .fill_info      = ipgre_fill_info,
1621         .get_link_net   = ip_tunnel_get_link_net,
1622 };
1623
1624 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1625         .kind           = "erspan",
1626         .maxtype        = IFLA_GRE_MAX,
1627         .policy         = ipgre_policy,
1628         .priv_size      = sizeof(struct ip_tunnel),
1629         .setup          = erspan_setup,
1630         .validate       = erspan_validate,
1631         .newlink        = erspan_newlink,
1632         .changelink     = erspan_changelink,
1633         .dellink        = ip_tunnel_dellink,
1634         .get_size       = ipgre_get_size,
1635         .fill_info      = ipgre_fill_info,
1636         .get_link_net   = ip_tunnel_get_link_net,
1637 };
1638
1639 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1640                                         u8 name_assign_type)
1641 {
1642         struct nlattr *tb[IFLA_MAX + 1];
1643         struct net_device *dev;
1644         LIST_HEAD(list_kill);
1645         struct ip_tunnel *t;
1646         int err;
1647
1648         memset(&tb, 0, sizeof(tb));
1649
1650         dev = rtnl_create_link(net, name, name_assign_type,
1651                                &ipgre_tap_ops, tb, NULL);
1652         if (IS_ERR(dev))
1653                 return dev;
1654
1655         /* Configure flow based GRE device. */
1656         t = netdev_priv(dev);
1657         t->collect_md = true;
1658
1659         err = ipgre_newlink(net, dev, tb, NULL, NULL);
1660         if (err < 0) {
1661                 free_netdev(dev);
1662                 return ERR_PTR(err);
1663         }
1664
1665         /* openvswitch users expect packet sizes to be unrestricted,
1666          * so set the largest MTU we can.
1667          */
1668         err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1669         if (err)
1670                 goto out;
1671
1672         err = rtnl_configure_link(dev, NULL);
1673         if (err < 0)
1674                 goto out;
1675
1676         return dev;
1677 out:
1678         ip_tunnel_dellink(dev, &list_kill);
1679         unregister_netdevice_many(&list_kill);
1680         return ERR_PTR(err);
1681 }
1682 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1683
1684 static int __net_init ipgre_tap_init_net(struct net *net)
1685 {
1686         return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1687 }
1688
1689 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1690 {
1691         ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1692 }
1693
1694 static struct pernet_operations ipgre_tap_net_ops = {
1695         .init = ipgre_tap_init_net,
1696         .exit_batch = ipgre_tap_exit_batch_net,
1697         .id   = &gre_tap_net_id,
1698         .size = sizeof(struct ip_tunnel_net),
1699 };
1700
1701 static int __net_init erspan_init_net(struct net *net)
1702 {
1703         return ip_tunnel_init_net(net, erspan_net_id,
1704                                   &erspan_link_ops, "erspan0");
1705 }
1706
1707 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1708 {
1709         ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1710 }
1711
1712 static struct pernet_operations erspan_net_ops = {
1713         .init = erspan_init_net,
1714         .exit_batch = erspan_exit_batch_net,
1715         .id   = &erspan_net_id,
1716         .size = sizeof(struct ip_tunnel_net),
1717 };
1718
1719 static int __init ipgre_init(void)
1720 {
1721         int err;
1722
1723         pr_info("GRE over IPv4 tunneling driver\n");
1724
1725         err = register_pernet_device(&ipgre_net_ops);
1726         if (err < 0)
1727                 return err;
1728
1729         err = register_pernet_device(&ipgre_tap_net_ops);
1730         if (err < 0)
1731                 goto pnet_tap_failed;
1732
1733         err = register_pernet_device(&erspan_net_ops);
1734         if (err < 0)
1735                 goto pnet_erspan_failed;
1736
1737         err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1738         if (err < 0) {
1739                 pr_info("%s: can't add protocol\n", __func__);
1740                 goto add_proto_failed;
1741         }
1742
1743         err = rtnl_link_register(&ipgre_link_ops);
1744         if (err < 0)
1745                 goto rtnl_link_failed;
1746
1747         err = rtnl_link_register(&ipgre_tap_ops);
1748         if (err < 0)
1749                 goto tap_ops_failed;
1750
1751         err = rtnl_link_register(&erspan_link_ops);
1752         if (err < 0)
1753                 goto erspan_link_failed;
1754
1755         return 0;
1756
1757 erspan_link_failed:
1758         rtnl_link_unregister(&ipgre_tap_ops);
1759 tap_ops_failed:
1760         rtnl_link_unregister(&ipgre_link_ops);
1761 rtnl_link_failed:
1762         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1763 add_proto_failed:
1764         unregister_pernet_device(&erspan_net_ops);
1765 pnet_erspan_failed:
1766         unregister_pernet_device(&ipgre_tap_net_ops);
1767 pnet_tap_failed:
1768         unregister_pernet_device(&ipgre_net_ops);
1769         return err;
1770 }
1771
1772 static void __exit ipgre_fini(void)
1773 {
1774         rtnl_link_unregister(&ipgre_tap_ops);
1775         rtnl_link_unregister(&ipgre_link_ops);
1776         rtnl_link_unregister(&erspan_link_ops);
1777         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1778         unregister_pernet_device(&ipgre_tap_net_ops);
1779         unregister_pernet_device(&ipgre_net_ops);
1780         unregister_pernet_device(&erspan_net_ops);
1781 }
1782
1783 module_init(ipgre_init);
1784 module_exit(ipgre_fini);
1785 MODULE_LICENSE("GPL");
1786 MODULE_ALIAS_RTNL_LINK("gre");
1787 MODULE_ALIAS_RTNL_LINK("gretap");
1788 MODULE_ALIAS_RTNL_LINK("erspan");
1789 MODULE_ALIAS_NETDEV("gre0");
1790 MODULE_ALIAS_NETDEV("gretap0");
1791 MODULE_ALIAS_NETDEV("erspan0");