Merge branch 'misc.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[linux-2.6-microblaze.git] / net / ipv4 / ip_gre.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      Linux NET3:     GRE over IP protocol decoder.
4  *
5  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6  */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
18 #include <linux/in.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
30
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/gre.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
47
48 /*
49    Problems & solutions
50    --------------------
51
52    1. The most important issue is detecting local dead loops.
53    They would cause complete host lockup in transmit, which
54    would be "resolved" by stack overflow or, if queueing is enabled,
55    with infinite looping in net_bh.
56
57    We cannot track such dead loops during route installation,
58    it is infeasible task. The most general solutions would be
59    to keep skb->encapsulation counter (sort of local ttl),
60    and silently drop packet when it expires. It is a good
61    solution, but it supposes maintaining new variable in ALL
62    skb, even if no tunneling is used.
63
64    Current solution: xmit_recursion breaks dead loops. This is a percpu
65    counter, since when we enter the first ndo_xmit(), cpu migration is
66    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
67
68    2. Networking dead loops would not kill routers, but would really
69    kill network. IP hop limit plays role of "t->recursion" in this case,
70    if we copy it from packet being encapsulated to upper header.
71    It is very good solution, but it introduces two problems:
72
73    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
74      do not work over tunnels.
75    - traceroute does not work. I planned to relay ICMP from tunnel,
76      so that this problem would be solved and traceroute output
77      would even more informative. This idea appeared to be wrong:
78      only Linux complies to rfc1812 now (yes, guys, Linux is the only
79      true router now :-)), all routers (at least, in neighbourhood of mine)
80      return only 8 bytes of payload. It is the end.
81
82    Hence, if we want that OSPF worked or traceroute said something reasonable,
83    we should search for another solution.
84
85    One of them is to parse packet trying to detect inner encapsulation
86    made by our node. It is difficult or even impossible, especially,
87    taking into account fragmentation. TO be short, ttl is not solution at all.
88
89    Current solution: The solution was UNEXPECTEDLY SIMPLE.
90    We force DF flag on tunnels with preconfigured hop limit,
91    that is ALL. :-) Well, it does not remove the problem completely,
92    but exponential growth of network traffic is changed to linear
93    (branches, that exceed pmtu are pruned) and tunnel mtu
94    rapidly degrades to value <68, where looping stops.
95    Yes, it is not good if there exists a router in the loop,
96    which does not force DF, even when encapsulating packets have DF set.
97    But it is not our problem! Nobody could accuse us, we made
98    all that we could make. Even if it is your gated who injected
99    fatal route to network, even if it were you who configured
100    fatal static route: you are innocent. :-)
101
102    Alexey Kuznetsov.
103  */
104
105 static bool log_ecn_error = true;
106 module_param(log_ecn_error, bool, 0644);
107 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
108
109 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
110 static const struct header_ops ipgre_header_ops;
111
112 static int ipgre_tunnel_init(struct net_device *dev);
113 static void erspan_build_header(struct sk_buff *skb,
114                                 u32 id, u32 index,
115                                 bool truncate, bool is_ipv4);
116
117 static unsigned int ipgre_net_id __read_mostly;
118 static unsigned int gre_tap_net_id __read_mostly;
119 static unsigned int erspan_net_id __read_mostly;
120
121 static int ipgre_err(struct sk_buff *skb, u32 info,
122                      const struct tnl_ptk_info *tpi)
123 {
124
125         /* All the routers (except for Linux) return only
126            8 bytes of packet payload. It means, that precise relaying of
127            ICMP in the real Internet is absolutely infeasible.
128
129            Moreover, Cisco "wise men" put GRE key to the third word
130            in GRE header. It makes impossible maintaining even soft
131            state for keyed GRE tunnels with enabled checksum. Tell
132            them "thank you".
133
134            Well, I wonder, rfc1812 was written by Cisco employee,
135            what the hell these idiots break standards established
136            by themselves???
137            */
138         struct net *net = dev_net(skb->dev);
139         struct ip_tunnel_net *itn;
140         const struct iphdr *iph;
141         const int type = icmp_hdr(skb)->type;
142         const int code = icmp_hdr(skb)->code;
143         unsigned int data_len = 0;
144         struct ip_tunnel *t;
145
146         if (tpi->proto == htons(ETH_P_TEB))
147                 itn = net_generic(net, gre_tap_net_id);
148         else if (tpi->proto == htons(ETH_P_ERSPAN) ||
149                  tpi->proto == htons(ETH_P_ERSPAN2))
150                 itn = net_generic(net, erspan_net_id);
151         else
152                 itn = net_generic(net, ipgre_net_id);
153
154         iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
155         t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
156                              iph->daddr, iph->saddr, tpi->key);
157
158         if (!t)
159                 return -ENOENT;
160
161         switch (type) {
162         default:
163         case ICMP_PARAMETERPROB:
164                 return 0;
165
166         case ICMP_DEST_UNREACH:
167                 switch (code) {
168                 case ICMP_SR_FAILED:
169                 case ICMP_PORT_UNREACH:
170                         /* Impossible event. */
171                         return 0;
172                 default:
173                         /* All others are translated to HOST_UNREACH.
174                            rfc2003 contains "deep thoughts" about NET_UNREACH,
175                            I believe they are just ether pollution. --ANK
176                          */
177                         break;
178                 }
179                 break;
180
181         case ICMP_TIME_EXCEEDED:
182                 if (code != ICMP_EXC_TTL)
183                         return 0;
184                 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
185                 break;
186
187         case ICMP_REDIRECT:
188                 break;
189         }
190
191 #if IS_ENABLED(CONFIG_IPV6)
192        if (tpi->proto == htons(ETH_P_IPV6) &&
193            !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
194                                        type, data_len))
195                return 0;
196 #endif
197
198         if (t->parms.iph.daddr == 0 ||
199             ipv4_is_multicast(t->parms.iph.daddr))
200                 return 0;
201
202         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
203                 return 0;
204
205         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
206                 t->err_count++;
207         else
208                 t->err_count = 1;
209         t->err_time = jiffies;
210
211         return 0;
212 }
213
214 static void gre_err(struct sk_buff *skb, u32 info)
215 {
216         /* All the routers (except for Linux) return only
217          * 8 bytes of packet payload. It means, that precise relaying of
218          * ICMP in the real Internet is absolutely infeasible.
219          *
220          * Moreover, Cisco "wise men" put GRE key to the third word
221          * in GRE header. It makes impossible maintaining even soft
222          * state for keyed
223          * GRE tunnels with enabled checksum. Tell them "thank you".
224          *
225          * Well, I wonder, rfc1812 was written by Cisco employee,
226          * what the hell these idiots break standards established
227          * by themselves???
228          */
229
230         const struct iphdr *iph = (struct iphdr *)skb->data;
231         const int type = icmp_hdr(skb)->type;
232         const int code = icmp_hdr(skb)->code;
233         struct tnl_ptk_info tpi;
234
235         if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
236                              iph->ihl * 4) < 0)
237                 return;
238
239         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
240                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
241                                  skb->dev->ifindex, IPPROTO_GRE);
242                 return;
243         }
244         if (type == ICMP_REDIRECT) {
245                 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
246                               IPPROTO_GRE);
247                 return;
248         }
249
250         ipgre_err(skb, info, &tpi);
251 }
252
253 static bool is_erspan_type1(int gre_hdr_len)
254 {
255         /* Both ERSPAN type I (version 0) and type II (version 1) use
256          * protocol 0x88BE, but the type I has only 4-byte GRE header,
257          * while type II has 8-byte.
258          */
259         return gre_hdr_len == 4;
260 }
261
262 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
263                       int gre_hdr_len)
264 {
265         struct net *net = dev_net(skb->dev);
266         struct metadata_dst *tun_dst = NULL;
267         struct erspan_base_hdr *ershdr;
268         struct ip_tunnel_net *itn;
269         struct ip_tunnel *tunnel;
270         const struct iphdr *iph;
271         struct erspan_md2 *md2;
272         int ver;
273         int len;
274
275         itn = net_generic(net, erspan_net_id);
276         iph = ip_hdr(skb);
277         if (is_erspan_type1(gre_hdr_len)) {
278                 ver = 0;
279                 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
280                                           tpi->flags | TUNNEL_NO_KEY,
281                                           iph->saddr, iph->daddr, 0);
282         } else {
283                 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
284                 ver = ershdr->ver;
285                 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
286                                           tpi->flags | TUNNEL_KEY,
287                                           iph->saddr, iph->daddr, tpi->key);
288         }
289
290         if (tunnel) {
291                 if (is_erspan_type1(gre_hdr_len))
292                         len = gre_hdr_len;
293                 else
294                         len = gre_hdr_len + erspan_hdr_len(ver);
295
296                 if (unlikely(!pskb_may_pull(skb, len)))
297                         return PACKET_REJECT;
298
299                 if (__iptunnel_pull_header(skb,
300                                            len,
301                                            htons(ETH_P_TEB),
302                                            false, false) < 0)
303                         goto drop;
304
305                 if (tunnel->collect_md) {
306                         struct erspan_metadata *pkt_md, *md;
307                         struct ip_tunnel_info *info;
308                         unsigned char *gh;
309                         __be64 tun_id;
310                         __be16 flags;
311
312                         tpi->flags |= TUNNEL_KEY;
313                         flags = tpi->flags;
314                         tun_id = key32_to_tunnel_id(tpi->key);
315
316                         tun_dst = ip_tun_rx_dst(skb, flags,
317                                                 tun_id, sizeof(*md));
318                         if (!tun_dst)
319                                 return PACKET_REJECT;
320
321                         /* skb can be uncloned in __iptunnel_pull_header, so
322                          * old pkt_md is no longer valid and we need to reset
323                          * it
324                          */
325                         gh = skb_network_header(skb) +
326                              skb_network_header_len(skb);
327                         pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
328                                                             sizeof(*ershdr));
329                         md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
330                         md->version = ver;
331                         md2 = &md->u.md2;
332                         memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
333                                                        ERSPAN_V2_MDSIZE);
334
335                         info = &tun_dst->u.tun_info;
336                         info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
337                         info->options_len = sizeof(*md);
338                 }
339
340                 skb_reset_mac_header(skb);
341                 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
342                 return PACKET_RCVD;
343         }
344         return PACKET_REJECT;
345
346 drop:
347         kfree_skb(skb);
348         return PACKET_RCVD;
349 }
350
351 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
352                        struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
353 {
354         struct metadata_dst *tun_dst = NULL;
355         const struct iphdr *iph;
356         struct ip_tunnel *tunnel;
357
358         iph = ip_hdr(skb);
359         tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
360                                   iph->saddr, iph->daddr, tpi->key);
361
362         if (tunnel) {
363                 const struct iphdr *tnl_params;
364
365                 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
366                                            raw_proto, false) < 0)
367                         goto drop;
368
369                 /* Special case for ipgre_header_parse(), which expects the
370                  * mac_header to point to the outer IP header.
371                  */
372                 if (tunnel->dev->header_ops == &ipgre_header_ops)
373                         skb_pop_mac_header(skb);
374                 else
375                         skb_reset_mac_header(skb);
376
377                 tnl_params = &tunnel->parms.iph;
378                 if (tunnel->collect_md || tnl_params->daddr == 0) {
379                         __be16 flags;
380                         __be64 tun_id;
381
382                         flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
383                         tun_id = key32_to_tunnel_id(tpi->key);
384                         tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
385                         if (!tun_dst)
386                                 return PACKET_REJECT;
387                 }
388
389                 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
390                 return PACKET_RCVD;
391         }
392         return PACKET_NEXT;
393
394 drop:
395         kfree_skb(skb);
396         return PACKET_RCVD;
397 }
398
399 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
400                      int hdr_len)
401 {
402         struct net *net = dev_net(skb->dev);
403         struct ip_tunnel_net *itn;
404         int res;
405
406         if (tpi->proto == htons(ETH_P_TEB))
407                 itn = net_generic(net, gre_tap_net_id);
408         else
409                 itn = net_generic(net, ipgre_net_id);
410
411         res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
412         if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
413                 /* ipgre tunnels in collect metadata mode should receive
414                  * also ETH_P_TEB traffic.
415                  */
416                 itn = net_generic(net, ipgre_net_id);
417                 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
418         }
419         return res;
420 }
421
422 static int gre_rcv(struct sk_buff *skb)
423 {
424         struct tnl_ptk_info tpi;
425         bool csum_err = false;
426         int hdr_len;
427
428 #ifdef CONFIG_NET_IPGRE_BROADCAST
429         if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
430                 /* Looped back packet, drop it! */
431                 if (rt_is_output_route(skb_rtable(skb)))
432                         goto drop;
433         }
434 #endif
435
436         hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
437         if (hdr_len < 0)
438                 goto drop;
439
440         if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
441                      tpi.proto == htons(ETH_P_ERSPAN2))) {
442                 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
443                         return 0;
444                 goto out;
445         }
446
447         if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
448                 return 0;
449
450 out:
451         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
452 drop:
453         kfree_skb(skb);
454         return 0;
455 }
456
457 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
458                        const struct iphdr *tnl_params,
459                        __be16 proto)
460 {
461         struct ip_tunnel *tunnel = netdev_priv(dev);
462
463         if (tunnel->parms.o_flags & TUNNEL_SEQ)
464                 tunnel->o_seqno++;
465
466         /* Push GRE header. */
467         gre_build_header(skb, tunnel->tun_hlen,
468                          tunnel->parms.o_flags, proto, tunnel->parms.o_key,
469                          htonl(tunnel->o_seqno));
470
471         ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
472 }
473
474 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
475 {
476         return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
477 }
478
479 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
480                         __be16 proto)
481 {
482         struct ip_tunnel *tunnel = netdev_priv(dev);
483         struct ip_tunnel_info *tun_info;
484         const struct ip_tunnel_key *key;
485         int tunnel_hlen;
486         __be16 flags;
487
488         tun_info = skb_tunnel_info(skb);
489         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
490                      ip_tunnel_info_af(tun_info) != AF_INET))
491                 goto err_free_skb;
492
493         key = &tun_info->key;
494         tunnel_hlen = gre_calc_hlen(key->tun_flags);
495
496         if (skb_cow_head(skb, dev->needed_headroom))
497                 goto err_free_skb;
498
499         /* Push Tunnel header. */
500         if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
501                 goto err_free_skb;
502
503         flags = tun_info->key.tun_flags &
504                 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
505         gre_build_header(skb, tunnel_hlen, flags, proto,
506                          tunnel_id_to_key32(tun_info->key.tun_id),
507                          (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
508
509         ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
510
511         return;
512
513 err_free_skb:
514         kfree_skb(skb);
515         dev->stats.tx_dropped++;
516 }
517
518 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
519 {
520         struct ip_tunnel *tunnel = netdev_priv(dev);
521         struct ip_tunnel_info *tun_info;
522         const struct ip_tunnel_key *key;
523         struct erspan_metadata *md;
524         bool truncate = false;
525         __be16 proto;
526         int tunnel_hlen;
527         int version;
528         int nhoff;
529         int thoff;
530
531         tun_info = skb_tunnel_info(skb);
532         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
533                      ip_tunnel_info_af(tun_info) != AF_INET))
534                 goto err_free_skb;
535
536         key = &tun_info->key;
537         if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
538                 goto err_free_skb;
539         if (tun_info->options_len < sizeof(*md))
540                 goto err_free_skb;
541         md = ip_tunnel_info_opts(tun_info);
542
543         /* ERSPAN has fixed 8 byte GRE header */
544         version = md->version;
545         tunnel_hlen = 8 + erspan_hdr_len(version);
546
547         if (skb_cow_head(skb, dev->needed_headroom))
548                 goto err_free_skb;
549
550         if (gre_handle_offloads(skb, false))
551                 goto err_free_skb;
552
553         if (skb->len > dev->mtu + dev->hard_header_len) {
554                 pskb_trim(skb, dev->mtu + dev->hard_header_len);
555                 truncate = true;
556         }
557
558         nhoff = skb_network_header(skb) - skb_mac_header(skb);
559         if (skb->protocol == htons(ETH_P_IP) &&
560             (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
561                 truncate = true;
562
563         thoff = skb_transport_header(skb) - skb_mac_header(skb);
564         if (skb->protocol == htons(ETH_P_IPV6) &&
565             (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
566                 truncate = true;
567
568         if (version == 1) {
569                 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
570                                     ntohl(md->u.index), truncate, true);
571                 proto = htons(ETH_P_ERSPAN);
572         } else if (version == 2) {
573                 erspan_build_header_v2(skb,
574                                        ntohl(tunnel_id_to_key32(key->tun_id)),
575                                        md->u.md2.dir,
576                                        get_hwid(&md->u.md2),
577                                        truncate, true);
578                 proto = htons(ETH_P_ERSPAN2);
579         } else {
580                 goto err_free_skb;
581         }
582
583         gre_build_header(skb, 8, TUNNEL_SEQ,
584                          proto, 0, htonl(tunnel->o_seqno++));
585
586         ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
587
588         return;
589
590 err_free_skb:
591         kfree_skb(skb);
592         dev->stats.tx_dropped++;
593 }
594
595 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
596 {
597         struct ip_tunnel_info *info = skb_tunnel_info(skb);
598         const struct ip_tunnel_key *key;
599         struct rtable *rt;
600         struct flowi4 fl4;
601
602         if (ip_tunnel_info_af(info) != AF_INET)
603                 return -EINVAL;
604
605         key = &info->key;
606         ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
607                             tunnel_id_to_key32(key->tun_id), key->tos, 0,
608                             skb->mark, skb_get_hash(skb));
609         rt = ip_route_output_key(dev_net(dev), &fl4);
610         if (IS_ERR(rt))
611                 return PTR_ERR(rt);
612
613         ip_rt_put(rt);
614         info->key.u.ipv4.src = fl4.saddr;
615         return 0;
616 }
617
618 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
619                               struct net_device *dev)
620 {
621         struct ip_tunnel *tunnel = netdev_priv(dev);
622         const struct iphdr *tnl_params;
623
624         if (!pskb_inet_may_pull(skb))
625                 goto free_skb;
626
627         if (tunnel->collect_md) {
628                 gre_fb_xmit(skb, dev, skb->protocol);
629                 return NETDEV_TX_OK;
630         }
631
632         if (dev->header_ops) {
633                 const int pull_len = tunnel->hlen + sizeof(struct iphdr);
634
635                 if (skb_cow_head(skb, 0))
636                         goto free_skb;
637
638                 tnl_params = (const struct iphdr *)skb->data;
639
640                 if (pull_len > skb_transport_offset(skb))
641                         goto free_skb;
642
643                 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
644                  * to gre header.
645                  */
646                 skb_pull(skb, pull_len);
647                 skb_reset_mac_header(skb);
648         } else {
649                 if (skb_cow_head(skb, dev->needed_headroom))
650                         goto free_skb;
651
652                 tnl_params = &tunnel->parms.iph;
653         }
654
655         if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
656                 goto free_skb;
657
658         __gre_xmit(skb, dev, tnl_params, skb->protocol);
659         return NETDEV_TX_OK;
660
661 free_skb:
662         kfree_skb(skb);
663         dev->stats.tx_dropped++;
664         return NETDEV_TX_OK;
665 }
666
667 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
668                                struct net_device *dev)
669 {
670         struct ip_tunnel *tunnel = netdev_priv(dev);
671         bool truncate = false;
672         __be16 proto;
673
674         if (!pskb_inet_may_pull(skb))
675                 goto free_skb;
676
677         if (tunnel->collect_md) {
678                 erspan_fb_xmit(skb, dev);
679                 return NETDEV_TX_OK;
680         }
681
682         if (gre_handle_offloads(skb, false))
683                 goto free_skb;
684
685         if (skb_cow_head(skb, dev->needed_headroom))
686                 goto free_skb;
687
688         if (skb->len > dev->mtu + dev->hard_header_len) {
689                 pskb_trim(skb, dev->mtu + dev->hard_header_len);
690                 truncate = true;
691         }
692
693         /* Push ERSPAN header */
694         if (tunnel->erspan_ver == 0) {
695                 proto = htons(ETH_P_ERSPAN);
696                 tunnel->parms.o_flags &= ~TUNNEL_SEQ;
697         } else if (tunnel->erspan_ver == 1) {
698                 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
699                                     tunnel->index,
700                                     truncate, true);
701                 proto = htons(ETH_P_ERSPAN);
702         } else if (tunnel->erspan_ver == 2) {
703                 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
704                                        tunnel->dir, tunnel->hwid,
705                                        truncate, true);
706                 proto = htons(ETH_P_ERSPAN2);
707         } else {
708                 goto free_skb;
709         }
710
711         tunnel->parms.o_flags &= ~TUNNEL_KEY;
712         __gre_xmit(skb, dev, &tunnel->parms.iph, proto);
713         return NETDEV_TX_OK;
714
715 free_skb:
716         kfree_skb(skb);
717         dev->stats.tx_dropped++;
718         return NETDEV_TX_OK;
719 }
720
721 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
722                                 struct net_device *dev)
723 {
724         struct ip_tunnel *tunnel = netdev_priv(dev);
725
726         if (!pskb_inet_may_pull(skb))
727                 goto free_skb;
728
729         if (tunnel->collect_md) {
730                 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
731                 return NETDEV_TX_OK;
732         }
733
734         if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
735                 goto free_skb;
736
737         if (skb_cow_head(skb, dev->needed_headroom))
738                 goto free_skb;
739
740         __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
741         return NETDEV_TX_OK;
742
743 free_skb:
744         kfree_skb(skb);
745         dev->stats.tx_dropped++;
746         return NETDEV_TX_OK;
747 }
748
749 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
750 {
751         struct ip_tunnel *tunnel = netdev_priv(dev);
752         int len;
753
754         len = tunnel->tun_hlen;
755         tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
756         len = tunnel->tun_hlen - len;
757         tunnel->hlen = tunnel->hlen + len;
758
759         if (dev->header_ops)
760                 dev->hard_header_len += len;
761         else
762                 dev->needed_headroom += len;
763
764         if (set_mtu)
765                 dev->mtu = max_t(int, dev->mtu - len, 68);
766
767         if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
768                 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
769                     tunnel->encap.type == TUNNEL_ENCAP_NONE) {
770                         dev->features |= NETIF_F_GSO_SOFTWARE;
771                         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
772                 } else {
773                         dev->features &= ~NETIF_F_GSO_SOFTWARE;
774                         dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
775                 }
776                 dev->features |= NETIF_F_LLTX;
777         } else {
778                 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
779                 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
780         }
781 }
782
783 static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
784                             int cmd)
785 {
786         int err;
787
788         if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
789                 if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
790                     p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
791                     ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
792                         return -EINVAL;
793         }
794
795         p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
796         p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
797
798         err = ip_tunnel_ctl(dev, p, cmd);
799         if (err)
800                 return err;
801
802         if (cmd == SIOCCHGTUNNEL) {
803                 struct ip_tunnel *t = netdev_priv(dev);
804
805                 t->parms.i_flags = p->i_flags;
806                 t->parms.o_flags = p->o_flags;
807
808                 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
809                         ipgre_link_update(dev, true);
810         }
811
812         p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
813         p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
814         return 0;
815 }
816
817 /* Nice toy. Unfortunately, useless in real life :-)
818    It allows to construct virtual multiprotocol broadcast "LAN"
819    over the Internet, provided multicast routing is tuned.
820
821
822    I have no idea was this bicycle invented before me,
823    so that I had to set ARPHRD_IPGRE to a random value.
824    I have an impression, that Cisco could make something similar,
825    but this feature is apparently missing in IOS<=11.2(8).
826
827    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
828    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
829
830    ping -t 255 224.66.66.66
831
832    If nobody answers, mbone does not work.
833
834    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
835    ip addr add 10.66.66.<somewhat>/24 dev Universe
836    ifconfig Universe up
837    ifconfig Universe add fe80::<Your_real_addr>/10
838    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
839    ftp 10.66.66.66
840    ...
841    ftp fec0:6666:6666::193.233.7.65
842    ...
843  */
844 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
845                         unsigned short type,
846                         const void *daddr, const void *saddr, unsigned int len)
847 {
848         struct ip_tunnel *t = netdev_priv(dev);
849         struct iphdr *iph;
850         struct gre_base_hdr *greh;
851
852         iph = skb_push(skb, t->hlen + sizeof(*iph));
853         greh = (struct gre_base_hdr *)(iph+1);
854         greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
855         greh->protocol = htons(type);
856
857         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
858
859         /* Set the source hardware address. */
860         if (saddr)
861                 memcpy(&iph->saddr, saddr, 4);
862         if (daddr)
863                 memcpy(&iph->daddr, daddr, 4);
864         if (iph->daddr)
865                 return t->hlen + sizeof(*iph);
866
867         return -(t->hlen + sizeof(*iph));
868 }
869
870 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
871 {
872         const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
873         memcpy(haddr, &iph->saddr, 4);
874         return 4;
875 }
876
877 static const struct header_ops ipgre_header_ops = {
878         .create = ipgre_header,
879         .parse  = ipgre_header_parse,
880 };
881
882 #ifdef CONFIG_NET_IPGRE_BROADCAST
883 static int ipgre_open(struct net_device *dev)
884 {
885         struct ip_tunnel *t = netdev_priv(dev);
886
887         if (ipv4_is_multicast(t->parms.iph.daddr)) {
888                 struct flowi4 fl4;
889                 struct rtable *rt;
890
891                 rt = ip_route_output_gre(t->net, &fl4,
892                                          t->parms.iph.daddr,
893                                          t->parms.iph.saddr,
894                                          t->parms.o_key,
895                                          RT_TOS(t->parms.iph.tos),
896                                          t->parms.link);
897                 if (IS_ERR(rt))
898                         return -EADDRNOTAVAIL;
899                 dev = rt->dst.dev;
900                 ip_rt_put(rt);
901                 if (!__in_dev_get_rtnl(dev))
902                         return -EADDRNOTAVAIL;
903                 t->mlink = dev->ifindex;
904                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
905         }
906         return 0;
907 }
908
909 static int ipgre_close(struct net_device *dev)
910 {
911         struct ip_tunnel *t = netdev_priv(dev);
912
913         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
914                 struct in_device *in_dev;
915                 in_dev = inetdev_by_index(t->net, t->mlink);
916                 if (in_dev)
917                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
918         }
919         return 0;
920 }
921 #endif
922
923 static const struct net_device_ops ipgre_netdev_ops = {
924         .ndo_init               = ipgre_tunnel_init,
925         .ndo_uninit             = ip_tunnel_uninit,
926 #ifdef CONFIG_NET_IPGRE_BROADCAST
927         .ndo_open               = ipgre_open,
928         .ndo_stop               = ipgre_close,
929 #endif
930         .ndo_start_xmit         = ipgre_xmit,
931         .ndo_siocdevprivate     = ip_tunnel_siocdevprivate,
932         .ndo_change_mtu         = ip_tunnel_change_mtu,
933         .ndo_get_stats64        = dev_get_tstats64,
934         .ndo_get_iflink         = ip_tunnel_get_iflink,
935         .ndo_tunnel_ctl         = ipgre_tunnel_ctl,
936 };
937
938 #define GRE_FEATURES (NETIF_F_SG |              \
939                       NETIF_F_FRAGLIST |        \
940                       NETIF_F_HIGHDMA |         \
941                       NETIF_F_HW_CSUM)
942
943 static void ipgre_tunnel_setup(struct net_device *dev)
944 {
945         dev->netdev_ops         = &ipgre_netdev_ops;
946         dev->type               = ARPHRD_IPGRE;
947         ip_tunnel_setup(dev, ipgre_net_id);
948 }
949
950 static void __gre_tunnel_init(struct net_device *dev)
951 {
952         struct ip_tunnel *tunnel;
953
954         tunnel = netdev_priv(dev);
955         tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
956         tunnel->parms.iph.protocol = IPPROTO_GRE;
957
958         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
959         dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
960
961         dev->features           |= GRE_FEATURES;
962         dev->hw_features        |= GRE_FEATURES;
963
964         if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
965                 /* TCP offload with GRE SEQ is not supported, nor
966                  * can we support 2 levels of outer headers requiring
967                  * an update.
968                  */
969                 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
970                     (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
971                         dev->features    |= NETIF_F_GSO_SOFTWARE;
972                         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
973                 }
974
975                 /* Can use a lockless transmit, unless we generate
976                  * output sequences
977                  */
978                 dev->features |= NETIF_F_LLTX;
979         }
980 }
981
982 static int ipgre_tunnel_init(struct net_device *dev)
983 {
984         struct ip_tunnel *tunnel = netdev_priv(dev);
985         struct iphdr *iph = &tunnel->parms.iph;
986
987         __gre_tunnel_init(dev);
988
989         memcpy(dev->dev_addr, &iph->saddr, 4);
990         memcpy(dev->broadcast, &iph->daddr, 4);
991
992         dev->flags              = IFF_NOARP;
993         netif_keep_dst(dev);
994         dev->addr_len           = 4;
995
996         if (iph->daddr && !tunnel->collect_md) {
997 #ifdef CONFIG_NET_IPGRE_BROADCAST
998                 if (ipv4_is_multicast(iph->daddr)) {
999                         if (!iph->saddr)
1000                                 return -EINVAL;
1001                         dev->flags = IFF_BROADCAST;
1002                         dev->header_ops = &ipgre_header_ops;
1003                         dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1004                         dev->needed_headroom = 0;
1005                 }
1006 #endif
1007         } else if (!tunnel->collect_md) {
1008                 dev->header_ops = &ipgre_header_ops;
1009                 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1010                 dev->needed_headroom = 0;
1011         }
1012
1013         return ip_tunnel_init(dev);
1014 }
1015
1016 static const struct gre_protocol ipgre_protocol = {
1017         .handler     = gre_rcv,
1018         .err_handler = gre_err,
1019 };
1020
1021 static int __net_init ipgre_init_net(struct net *net)
1022 {
1023         return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1024 }
1025
1026 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1027 {
1028         ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1029 }
1030
1031 static struct pernet_operations ipgre_net_ops = {
1032         .init = ipgre_init_net,
1033         .exit_batch = ipgre_exit_batch_net,
1034         .id   = &ipgre_net_id,
1035         .size = sizeof(struct ip_tunnel_net),
1036 };
1037
1038 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1039                                  struct netlink_ext_ack *extack)
1040 {
1041         __be16 flags;
1042
1043         if (!data)
1044                 return 0;
1045
1046         flags = 0;
1047         if (data[IFLA_GRE_IFLAGS])
1048                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1049         if (data[IFLA_GRE_OFLAGS])
1050                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1051         if (flags & (GRE_VERSION|GRE_ROUTING))
1052                 return -EINVAL;
1053
1054         if (data[IFLA_GRE_COLLECT_METADATA] &&
1055             data[IFLA_GRE_ENCAP_TYPE] &&
1056             nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1057                 return -EINVAL;
1058
1059         return 0;
1060 }
1061
1062 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1063                               struct netlink_ext_ack *extack)
1064 {
1065         __be32 daddr;
1066
1067         if (tb[IFLA_ADDRESS]) {
1068                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1069                         return -EINVAL;
1070                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1071                         return -EADDRNOTAVAIL;
1072         }
1073
1074         if (!data)
1075                 goto out;
1076
1077         if (data[IFLA_GRE_REMOTE]) {
1078                 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1079                 if (!daddr)
1080                         return -EINVAL;
1081         }
1082
1083 out:
1084         return ipgre_tunnel_validate(tb, data, extack);
1085 }
1086
1087 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1088                            struct netlink_ext_ack *extack)
1089 {
1090         __be16 flags = 0;
1091         int ret;
1092
1093         if (!data)
1094                 return 0;
1095
1096         ret = ipgre_tap_validate(tb, data, extack);
1097         if (ret)
1098                 return ret;
1099
1100         if (data[IFLA_GRE_ERSPAN_VER] &&
1101             nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1102                 return 0;
1103
1104         /* ERSPAN type II/III should only have GRE sequence and key flag */
1105         if (data[IFLA_GRE_OFLAGS])
1106                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1107         if (data[IFLA_GRE_IFLAGS])
1108                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1109         if (!data[IFLA_GRE_COLLECT_METADATA] &&
1110             flags != (GRE_SEQ | GRE_KEY))
1111                 return -EINVAL;
1112
1113         /* ERSPAN Session ID only has 10-bit. Since we reuse
1114          * 32-bit key field as ID, check it's range.
1115          */
1116         if (data[IFLA_GRE_IKEY] &&
1117             (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1118                 return -EINVAL;
1119
1120         if (data[IFLA_GRE_OKEY] &&
1121             (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1122                 return -EINVAL;
1123
1124         return 0;
1125 }
1126
1127 static int ipgre_netlink_parms(struct net_device *dev,
1128                                 struct nlattr *data[],
1129                                 struct nlattr *tb[],
1130                                 struct ip_tunnel_parm *parms,
1131                                 __u32 *fwmark)
1132 {
1133         struct ip_tunnel *t = netdev_priv(dev);
1134
1135         memset(parms, 0, sizeof(*parms));
1136
1137         parms->iph.protocol = IPPROTO_GRE;
1138
1139         if (!data)
1140                 return 0;
1141
1142         if (data[IFLA_GRE_LINK])
1143                 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1144
1145         if (data[IFLA_GRE_IFLAGS])
1146                 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1147
1148         if (data[IFLA_GRE_OFLAGS])
1149                 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1150
1151         if (data[IFLA_GRE_IKEY])
1152                 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1153
1154         if (data[IFLA_GRE_OKEY])
1155                 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1156
1157         if (data[IFLA_GRE_LOCAL])
1158                 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1159
1160         if (data[IFLA_GRE_REMOTE])
1161                 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1162
1163         if (data[IFLA_GRE_TTL])
1164                 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1165
1166         if (data[IFLA_GRE_TOS])
1167                 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1168
1169         if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1170                 if (t->ignore_df)
1171                         return -EINVAL;
1172                 parms->iph.frag_off = htons(IP_DF);
1173         }
1174
1175         if (data[IFLA_GRE_COLLECT_METADATA]) {
1176                 t->collect_md = true;
1177                 if (dev->type == ARPHRD_IPGRE)
1178                         dev->type = ARPHRD_NONE;
1179         }
1180
1181         if (data[IFLA_GRE_IGNORE_DF]) {
1182                 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1183                   && (parms->iph.frag_off & htons(IP_DF)))
1184                         return -EINVAL;
1185                 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1186         }
1187
1188         if (data[IFLA_GRE_FWMARK])
1189                 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1190
1191         return 0;
1192 }
1193
1194 static int erspan_netlink_parms(struct net_device *dev,
1195                                 struct nlattr *data[],
1196                                 struct nlattr *tb[],
1197                                 struct ip_tunnel_parm *parms,
1198                                 __u32 *fwmark)
1199 {
1200         struct ip_tunnel *t = netdev_priv(dev);
1201         int err;
1202
1203         err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1204         if (err)
1205                 return err;
1206         if (!data)
1207                 return 0;
1208
1209         if (data[IFLA_GRE_ERSPAN_VER]) {
1210                 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1211
1212                 if (t->erspan_ver > 2)
1213                         return -EINVAL;
1214         }
1215
1216         if (t->erspan_ver == 1) {
1217                 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1218                         t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1219                         if (t->index & ~INDEX_MASK)
1220                                 return -EINVAL;
1221                 }
1222         } else if (t->erspan_ver == 2) {
1223                 if (data[IFLA_GRE_ERSPAN_DIR]) {
1224                         t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1225                         if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1226                                 return -EINVAL;
1227                 }
1228                 if (data[IFLA_GRE_ERSPAN_HWID]) {
1229                         t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1230                         if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1231                                 return -EINVAL;
1232                 }
1233         }
1234
1235         return 0;
1236 }
1237
1238 /* This function returns true when ENCAP attributes are present in the nl msg */
1239 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1240                                       struct ip_tunnel_encap *ipencap)
1241 {
1242         bool ret = false;
1243
1244         memset(ipencap, 0, sizeof(*ipencap));
1245
1246         if (!data)
1247                 return ret;
1248
1249         if (data[IFLA_GRE_ENCAP_TYPE]) {
1250                 ret = true;
1251                 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1252         }
1253
1254         if (data[IFLA_GRE_ENCAP_FLAGS]) {
1255                 ret = true;
1256                 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1257         }
1258
1259         if (data[IFLA_GRE_ENCAP_SPORT]) {
1260                 ret = true;
1261                 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1262         }
1263
1264         if (data[IFLA_GRE_ENCAP_DPORT]) {
1265                 ret = true;
1266                 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1267         }
1268
1269         return ret;
1270 }
1271
1272 static int gre_tap_init(struct net_device *dev)
1273 {
1274         __gre_tunnel_init(dev);
1275         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1276         netif_keep_dst(dev);
1277
1278         return ip_tunnel_init(dev);
1279 }
1280
1281 static const struct net_device_ops gre_tap_netdev_ops = {
1282         .ndo_init               = gre_tap_init,
1283         .ndo_uninit             = ip_tunnel_uninit,
1284         .ndo_start_xmit         = gre_tap_xmit,
1285         .ndo_set_mac_address    = eth_mac_addr,
1286         .ndo_validate_addr      = eth_validate_addr,
1287         .ndo_change_mtu         = ip_tunnel_change_mtu,
1288         .ndo_get_stats64        = dev_get_tstats64,
1289         .ndo_get_iflink         = ip_tunnel_get_iflink,
1290         .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1291 };
1292
1293 static int erspan_tunnel_init(struct net_device *dev)
1294 {
1295         struct ip_tunnel *tunnel = netdev_priv(dev);
1296
1297         if (tunnel->erspan_ver == 0)
1298                 tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1299         else
1300                 tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1301
1302         tunnel->parms.iph.protocol = IPPROTO_GRE;
1303         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1304                        erspan_hdr_len(tunnel->erspan_ver);
1305
1306         dev->features           |= GRE_FEATURES;
1307         dev->hw_features        |= GRE_FEATURES;
1308         dev->priv_flags         |= IFF_LIVE_ADDR_CHANGE;
1309         netif_keep_dst(dev);
1310
1311         return ip_tunnel_init(dev);
1312 }
1313
1314 static const struct net_device_ops erspan_netdev_ops = {
1315         .ndo_init               = erspan_tunnel_init,
1316         .ndo_uninit             = ip_tunnel_uninit,
1317         .ndo_start_xmit         = erspan_xmit,
1318         .ndo_set_mac_address    = eth_mac_addr,
1319         .ndo_validate_addr      = eth_validate_addr,
1320         .ndo_change_mtu         = ip_tunnel_change_mtu,
1321         .ndo_get_stats64        = dev_get_tstats64,
1322         .ndo_get_iflink         = ip_tunnel_get_iflink,
1323         .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1324 };
1325
1326 static void ipgre_tap_setup(struct net_device *dev)
1327 {
1328         ether_setup(dev);
1329         dev->max_mtu = 0;
1330         dev->netdev_ops = &gre_tap_netdev_ops;
1331         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1332         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1333         ip_tunnel_setup(dev, gre_tap_net_id);
1334 }
1335
1336 static int
1337 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1338 {
1339         struct ip_tunnel_encap ipencap;
1340
1341         if (ipgre_netlink_encap_parms(data, &ipencap)) {
1342                 struct ip_tunnel *t = netdev_priv(dev);
1343                 int err = ip_tunnel_encap_setup(t, &ipencap);
1344
1345                 if (err < 0)
1346                         return err;
1347         }
1348
1349         return 0;
1350 }
1351
1352 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1353                          struct nlattr *tb[], struct nlattr *data[],
1354                          struct netlink_ext_ack *extack)
1355 {
1356         struct ip_tunnel_parm p;
1357         __u32 fwmark = 0;
1358         int err;
1359
1360         err = ipgre_newlink_encap_setup(dev, data);
1361         if (err)
1362                 return err;
1363
1364         err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1365         if (err < 0)
1366                 return err;
1367         return ip_tunnel_newlink(dev, tb, &p, fwmark);
1368 }
1369
1370 static int erspan_newlink(struct net *src_net, struct net_device *dev,
1371                           struct nlattr *tb[], struct nlattr *data[],
1372                           struct netlink_ext_ack *extack)
1373 {
1374         struct ip_tunnel_parm p;
1375         __u32 fwmark = 0;
1376         int err;
1377
1378         err = ipgre_newlink_encap_setup(dev, data);
1379         if (err)
1380                 return err;
1381
1382         err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1383         if (err)
1384                 return err;
1385         return ip_tunnel_newlink(dev, tb, &p, fwmark);
1386 }
1387
1388 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1389                             struct nlattr *data[],
1390                             struct netlink_ext_ack *extack)
1391 {
1392         struct ip_tunnel *t = netdev_priv(dev);
1393         __u32 fwmark = t->fwmark;
1394         struct ip_tunnel_parm p;
1395         int err;
1396
1397         err = ipgre_newlink_encap_setup(dev, data);
1398         if (err)
1399                 return err;
1400
1401         err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1402         if (err < 0)
1403                 return err;
1404
1405         err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1406         if (err < 0)
1407                 return err;
1408
1409         t->parms.i_flags = p.i_flags;
1410         t->parms.o_flags = p.o_flags;
1411
1412         ipgre_link_update(dev, !tb[IFLA_MTU]);
1413
1414         return 0;
1415 }
1416
1417 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1418                              struct nlattr *data[],
1419                              struct netlink_ext_ack *extack)
1420 {
1421         struct ip_tunnel *t = netdev_priv(dev);
1422         __u32 fwmark = t->fwmark;
1423         struct ip_tunnel_parm p;
1424         int err;
1425
1426         err = ipgre_newlink_encap_setup(dev, data);
1427         if (err)
1428                 return err;
1429
1430         err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1431         if (err < 0)
1432                 return err;
1433
1434         err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1435         if (err < 0)
1436                 return err;
1437
1438         t->parms.i_flags = p.i_flags;
1439         t->parms.o_flags = p.o_flags;
1440
1441         return 0;
1442 }
1443
1444 static size_t ipgre_get_size(const struct net_device *dev)
1445 {
1446         return
1447                 /* IFLA_GRE_LINK */
1448                 nla_total_size(4) +
1449                 /* IFLA_GRE_IFLAGS */
1450                 nla_total_size(2) +
1451                 /* IFLA_GRE_OFLAGS */
1452                 nla_total_size(2) +
1453                 /* IFLA_GRE_IKEY */
1454                 nla_total_size(4) +
1455                 /* IFLA_GRE_OKEY */
1456                 nla_total_size(4) +
1457                 /* IFLA_GRE_LOCAL */
1458                 nla_total_size(4) +
1459                 /* IFLA_GRE_REMOTE */
1460                 nla_total_size(4) +
1461                 /* IFLA_GRE_TTL */
1462                 nla_total_size(1) +
1463                 /* IFLA_GRE_TOS */
1464                 nla_total_size(1) +
1465                 /* IFLA_GRE_PMTUDISC */
1466                 nla_total_size(1) +
1467                 /* IFLA_GRE_ENCAP_TYPE */
1468                 nla_total_size(2) +
1469                 /* IFLA_GRE_ENCAP_FLAGS */
1470                 nla_total_size(2) +
1471                 /* IFLA_GRE_ENCAP_SPORT */
1472                 nla_total_size(2) +
1473                 /* IFLA_GRE_ENCAP_DPORT */
1474                 nla_total_size(2) +
1475                 /* IFLA_GRE_COLLECT_METADATA */
1476                 nla_total_size(0) +
1477                 /* IFLA_GRE_IGNORE_DF */
1478                 nla_total_size(1) +
1479                 /* IFLA_GRE_FWMARK */
1480                 nla_total_size(4) +
1481                 /* IFLA_GRE_ERSPAN_INDEX */
1482                 nla_total_size(4) +
1483                 /* IFLA_GRE_ERSPAN_VER */
1484                 nla_total_size(1) +
1485                 /* IFLA_GRE_ERSPAN_DIR */
1486                 nla_total_size(1) +
1487                 /* IFLA_GRE_ERSPAN_HWID */
1488                 nla_total_size(2) +
1489                 0;
1490 }
1491
1492 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1493 {
1494         struct ip_tunnel *t = netdev_priv(dev);
1495         struct ip_tunnel_parm *p = &t->parms;
1496         __be16 o_flags = p->o_flags;
1497
1498         if (t->erspan_ver <= 2) {
1499                 if (t->erspan_ver != 0 && !t->collect_md)
1500                         o_flags |= TUNNEL_KEY;
1501
1502                 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1503                         goto nla_put_failure;
1504
1505                 if (t->erspan_ver == 1) {
1506                         if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1507                                 goto nla_put_failure;
1508                 } else if (t->erspan_ver == 2) {
1509                         if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1510                                 goto nla_put_failure;
1511                         if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1512                                 goto nla_put_failure;
1513                 }
1514         }
1515
1516         if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1517             nla_put_be16(skb, IFLA_GRE_IFLAGS,
1518                          gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1519             nla_put_be16(skb, IFLA_GRE_OFLAGS,
1520                          gre_tnl_flags_to_gre_flags(o_flags)) ||
1521             nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1522             nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1523             nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1524             nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1525             nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1526             nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1527             nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1528                        !!(p->iph.frag_off & htons(IP_DF))) ||
1529             nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1530                 goto nla_put_failure;
1531
1532         if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1533                         t->encap.type) ||
1534             nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1535                          t->encap.sport) ||
1536             nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1537                          t->encap.dport) ||
1538             nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1539                         t->encap.flags))
1540                 goto nla_put_failure;
1541
1542         if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1543                 goto nla_put_failure;
1544
1545         if (t->collect_md) {
1546                 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1547                         goto nla_put_failure;
1548         }
1549
1550         return 0;
1551
1552 nla_put_failure:
1553         return -EMSGSIZE;
1554 }
1555
1556 static void erspan_setup(struct net_device *dev)
1557 {
1558         struct ip_tunnel *t = netdev_priv(dev);
1559
1560         ether_setup(dev);
1561         dev->max_mtu = 0;
1562         dev->netdev_ops = &erspan_netdev_ops;
1563         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1564         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1565         ip_tunnel_setup(dev, erspan_net_id);
1566         t->erspan_ver = 1;
1567 }
1568
1569 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1570         [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1571         [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1572         [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1573         [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1574         [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1575         [IFLA_GRE_LOCAL]        = { .len = sizeof_field(struct iphdr, saddr) },
1576         [IFLA_GRE_REMOTE]       = { .len = sizeof_field(struct iphdr, daddr) },
1577         [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1578         [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1579         [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1580         [IFLA_GRE_ENCAP_TYPE]   = { .type = NLA_U16 },
1581         [IFLA_GRE_ENCAP_FLAGS]  = { .type = NLA_U16 },
1582         [IFLA_GRE_ENCAP_SPORT]  = { .type = NLA_U16 },
1583         [IFLA_GRE_ENCAP_DPORT]  = { .type = NLA_U16 },
1584         [IFLA_GRE_COLLECT_METADATA]     = { .type = NLA_FLAG },
1585         [IFLA_GRE_IGNORE_DF]    = { .type = NLA_U8 },
1586         [IFLA_GRE_FWMARK]       = { .type = NLA_U32 },
1587         [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1588         [IFLA_GRE_ERSPAN_VER]   = { .type = NLA_U8 },
1589         [IFLA_GRE_ERSPAN_DIR]   = { .type = NLA_U8 },
1590         [IFLA_GRE_ERSPAN_HWID]  = { .type = NLA_U16 },
1591 };
1592
1593 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1594         .kind           = "gre",
1595         .maxtype        = IFLA_GRE_MAX,
1596         .policy         = ipgre_policy,
1597         .priv_size      = sizeof(struct ip_tunnel),
1598         .setup          = ipgre_tunnel_setup,
1599         .validate       = ipgre_tunnel_validate,
1600         .newlink        = ipgre_newlink,
1601         .changelink     = ipgre_changelink,
1602         .dellink        = ip_tunnel_dellink,
1603         .get_size       = ipgre_get_size,
1604         .fill_info      = ipgre_fill_info,
1605         .get_link_net   = ip_tunnel_get_link_net,
1606 };
1607
1608 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1609         .kind           = "gretap",
1610         .maxtype        = IFLA_GRE_MAX,
1611         .policy         = ipgre_policy,
1612         .priv_size      = sizeof(struct ip_tunnel),
1613         .setup          = ipgre_tap_setup,
1614         .validate       = ipgre_tap_validate,
1615         .newlink        = ipgre_newlink,
1616         .changelink     = ipgre_changelink,
1617         .dellink        = ip_tunnel_dellink,
1618         .get_size       = ipgre_get_size,
1619         .fill_info      = ipgre_fill_info,
1620         .get_link_net   = ip_tunnel_get_link_net,
1621 };
1622
1623 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1624         .kind           = "erspan",
1625         .maxtype        = IFLA_GRE_MAX,
1626         .policy         = ipgre_policy,
1627         .priv_size      = sizeof(struct ip_tunnel),
1628         .setup          = erspan_setup,
1629         .validate       = erspan_validate,
1630         .newlink        = erspan_newlink,
1631         .changelink     = erspan_changelink,
1632         .dellink        = ip_tunnel_dellink,
1633         .get_size       = ipgre_get_size,
1634         .fill_info      = ipgre_fill_info,
1635         .get_link_net   = ip_tunnel_get_link_net,
1636 };
1637
1638 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1639                                         u8 name_assign_type)
1640 {
1641         struct nlattr *tb[IFLA_MAX + 1];
1642         struct net_device *dev;
1643         LIST_HEAD(list_kill);
1644         struct ip_tunnel *t;
1645         int err;
1646
1647         memset(&tb, 0, sizeof(tb));
1648
1649         dev = rtnl_create_link(net, name, name_assign_type,
1650                                &ipgre_tap_ops, tb, NULL);
1651         if (IS_ERR(dev))
1652                 return dev;
1653
1654         /* Configure flow based GRE device. */
1655         t = netdev_priv(dev);
1656         t->collect_md = true;
1657
1658         err = ipgre_newlink(net, dev, tb, NULL, NULL);
1659         if (err < 0) {
1660                 free_netdev(dev);
1661                 return ERR_PTR(err);
1662         }
1663
1664         /* openvswitch users expect packet sizes to be unrestricted,
1665          * so set the largest MTU we can.
1666          */
1667         err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1668         if (err)
1669                 goto out;
1670
1671         err = rtnl_configure_link(dev, NULL);
1672         if (err < 0)
1673                 goto out;
1674
1675         return dev;
1676 out:
1677         ip_tunnel_dellink(dev, &list_kill);
1678         unregister_netdevice_many(&list_kill);
1679         return ERR_PTR(err);
1680 }
1681 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1682
1683 static int __net_init ipgre_tap_init_net(struct net *net)
1684 {
1685         return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1686 }
1687
1688 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1689 {
1690         ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1691 }
1692
1693 static struct pernet_operations ipgre_tap_net_ops = {
1694         .init = ipgre_tap_init_net,
1695         .exit_batch = ipgre_tap_exit_batch_net,
1696         .id   = &gre_tap_net_id,
1697         .size = sizeof(struct ip_tunnel_net),
1698 };
1699
1700 static int __net_init erspan_init_net(struct net *net)
1701 {
1702         return ip_tunnel_init_net(net, erspan_net_id,
1703                                   &erspan_link_ops, "erspan0");
1704 }
1705
1706 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1707 {
1708         ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1709 }
1710
1711 static struct pernet_operations erspan_net_ops = {
1712         .init = erspan_init_net,
1713         .exit_batch = erspan_exit_batch_net,
1714         .id   = &erspan_net_id,
1715         .size = sizeof(struct ip_tunnel_net),
1716 };
1717
1718 static int __init ipgre_init(void)
1719 {
1720         int err;
1721
1722         pr_info("GRE over IPv4 tunneling driver\n");
1723
1724         err = register_pernet_device(&ipgre_net_ops);
1725         if (err < 0)
1726                 return err;
1727
1728         err = register_pernet_device(&ipgre_tap_net_ops);
1729         if (err < 0)
1730                 goto pnet_tap_failed;
1731
1732         err = register_pernet_device(&erspan_net_ops);
1733         if (err < 0)
1734                 goto pnet_erspan_failed;
1735
1736         err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1737         if (err < 0) {
1738                 pr_info("%s: can't add protocol\n", __func__);
1739                 goto add_proto_failed;
1740         }
1741
1742         err = rtnl_link_register(&ipgre_link_ops);
1743         if (err < 0)
1744                 goto rtnl_link_failed;
1745
1746         err = rtnl_link_register(&ipgre_tap_ops);
1747         if (err < 0)
1748                 goto tap_ops_failed;
1749
1750         err = rtnl_link_register(&erspan_link_ops);
1751         if (err < 0)
1752                 goto erspan_link_failed;
1753
1754         return 0;
1755
1756 erspan_link_failed:
1757         rtnl_link_unregister(&ipgre_tap_ops);
1758 tap_ops_failed:
1759         rtnl_link_unregister(&ipgre_link_ops);
1760 rtnl_link_failed:
1761         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1762 add_proto_failed:
1763         unregister_pernet_device(&erspan_net_ops);
1764 pnet_erspan_failed:
1765         unregister_pernet_device(&ipgre_tap_net_ops);
1766 pnet_tap_failed:
1767         unregister_pernet_device(&ipgre_net_ops);
1768         return err;
1769 }
1770
1771 static void __exit ipgre_fini(void)
1772 {
1773         rtnl_link_unregister(&ipgre_tap_ops);
1774         rtnl_link_unregister(&ipgre_link_ops);
1775         rtnl_link_unregister(&erspan_link_ops);
1776         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1777         unregister_pernet_device(&ipgre_tap_net_ops);
1778         unregister_pernet_device(&ipgre_net_ops);
1779         unregister_pernet_device(&erspan_net_ops);
1780 }
1781
1782 module_init(ipgre_init);
1783 module_exit(ipgre_fini);
1784 MODULE_LICENSE("GPL");
1785 MODULE_ALIAS_RTNL_LINK("gre");
1786 MODULE_ALIAS_RTNL_LINK("gretap");
1787 MODULE_ALIAS_RTNL_LINK("erspan");
1788 MODULE_ALIAS_NETDEV("gre0");
1789 MODULE_ALIAS_NETDEV("gretap0");
1790 MODULE_ALIAS_NETDEV("erspan0");