c7a7bd58a23c585778cce9bb0dcf6525caef6e55
[linux-2.6-microblaze.git] / net / ipv4 / ip_gre.c
1 /*
2  *      Linux NET3:     GRE over IP protocol decoder.
3  *
4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *      This program is free software; you can redistribute it and/or
7  *      modify it under the terms of the GNU General Public License
8  *      as published by the Free Software Foundation; either version
9  *      2 of the License, or (at your option) any later version.
10  *
11  */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
52
53 /*
54    Problems & solutions
55    --------------------
56
57    1. The most important issue is detecting local dead loops.
58    They would cause complete host lockup in transmit, which
59    would be "resolved" by stack overflow or, if queueing is enabled,
60    with infinite looping in net_bh.
61
62    We cannot track such dead loops during route installation,
63    it is infeasible task. The most general solutions would be
64    to keep skb->encapsulation counter (sort of local ttl),
65    and silently drop packet when it expires. It is a good
66    solution, but it supposes maintaining new variable in ALL
67    skb, even if no tunneling is used.
68
69    Current solution: xmit_recursion breaks dead loops. This is a percpu
70    counter, since when we enter the first ndo_xmit(), cpu migration is
71    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
72
73    2. Networking dead loops would not kill routers, but would really
74    kill network. IP hop limit plays role of "t->recursion" in this case,
75    if we copy it from packet being encapsulated to upper header.
76    It is very good solution, but it introduces two problems:
77
78    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79      do not work over tunnels.
80    - traceroute does not work. I planned to relay ICMP from tunnel,
81      so that this problem would be solved and traceroute output
82      would even more informative. This idea appeared to be wrong:
83      only Linux complies to rfc1812 now (yes, guys, Linux is the only
84      true router now :-)), all routers (at least, in neighbourhood of mine)
85      return only 8 bytes of payload. It is the end.
86
87    Hence, if we want that OSPF worked or traceroute said something reasonable,
88    we should search for another solution.
89
90    One of them is to parse packet trying to detect inner encapsulation
91    made by our node. It is difficult or even impossible, especially,
92    taking into account fragmentation. TO be short, ttl is not solution at all.
93
94    Current solution: The solution was UNEXPECTEDLY SIMPLE.
95    We force DF flag on tunnels with preconfigured hop limit,
96    that is ALL. :-) Well, it does not remove the problem completely,
97    but exponential growth of network traffic is changed to linear
98    (branches, that exceed pmtu are pruned) and tunnel mtu
99    rapidly degrades to value <68, where looping stops.
100    Yes, it is not good if there exists a router in the loop,
101    which does not force DF, even when encapsulating packets have DF set.
102    But it is not our problem! Nobody could accuse us, we made
103    all that we could make. Even if it is your gated who injected
104    fatal route to network, even if it were you who configured
105    fatal static route: you are innocent. :-)
106
107    Alexey Kuznetsov.
108  */
109
110 static bool log_ecn_error = true;
111 module_param(log_ecn_error, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
113
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115 static int ipgre_tunnel_init(struct net_device *dev);
116 static void erspan_build_header(struct sk_buff *skb,
117                                 u32 id, u32 index,
118                                 bool truncate, bool is_ipv4);
119
120 static unsigned int ipgre_net_id __read_mostly;
121 static unsigned int gre_tap_net_id __read_mostly;
122 static unsigned int erspan_net_id __read_mostly;
123
124 static int ipgre_err(struct sk_buff *skb, u32 info,
125                      const struct tnl_ptk_info *tpi)
126 {
127
128         /* All the routers (except for Linux) return only
129            8 bytes of packet payload. It means, that precise relaying of
130            ICMP in the real Internet is absolutely infeasible.
131
132            Moreover, Cisco "wise men" put GRE key to the third word
133            in GRE header. It makes impossible maintaining even soft
134            state for keyed GRE tunnels with enabled checksum. Tell
135            them "thank you".
136
137            Well, I wonder, rfc1812 was written by Cisco employee,
138            what the hell these idiots break standards established
139            by themselves???
140            */
141         struct net *net = dev_net(skb->dev);
142         struct ip_tunnel_net *itn;
143         const struct iphdr *iph;
144         const int type = icmp_hdr(skb)->type;
145         const int code = icmp_hdr(skb)->code;
146         unsigned int data_len = 0;
147         struct ip_tunnel *t;
148
149         if (tpi->proto == htons(ETH_P_TEB))
150                 itn = net_generic(net, gre_tap_net_id);
151         else if (tpi->proto == htons(ETH_P_ERSPAN) ||
152                  tpi->proto == htons(ETH_P_ERSPAN2))
153                 itn = net_generic(net, erspan_net_id);
154         else
155                 itn = net_generic(net, ipgre_net_id);
156
157         iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
158         t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
159                              iph->daddr, iph->saddr, tpi->key);
160
161         if (!t)
162                 return -ENOENT;
163
164         switch (type) {
165         default:
166         case ICMP_PARAMETERPROB:
167                 return 0;
168
169         case ICMP_DEST_UNREACH:
170                 switch (code) {
171                 case ICMP_SR_FAILED:
172                 case ICMP_PORT_UNREACH:
173                         /* Impossible event. */
174                         return 0;
175                 default:
176                         /* All others are translated to HOST_UNREACH.
177                            rfc2003 contains "deep thoughts" about NET_UNREACH,
178                            I believe they are just ether pollution. --ANK
179                          */
180                         break;
181                 }
182                 break;
183
184         case ICMP_TIME_EXCEEDED:
185                 if (code != ICMP_EXC_TTL)
186                         return 0;
187                 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
188                 break;
189
190         case ICMP_REDIRECT:
191                 break;
192         }
193
194 #if IS_ENABLED(CONFIG_IPV6)
195        if (tpi->proto == htons(ETH_P_IPV6) &&
196            !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
197                                        type, data_len))
198                return 0;
199 #endif
200
201         if (t->parms.iph.daddr == 0 ||
202             ipv4_is_multicast(t->parms.iph.daddr))
203                 return 0;
204
205         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
206                 return 0;
207
208         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
209                 t->err_count++;
210         else
211                 t->err_count = 1;
212         t->err_time = jiffies;
213
214         return 0;
215 }
216
217 static void gre_err(struct sk_buff *skb, u32 info)
218 {
219         /* All the routers (except for Linux) return only
220          * 8 bytes of packet payload. It means, that precise relaying of
221          * ICMP in the real Internet is absolutely infeasible.
222          *
223          * Moreover, Cisco "wise men" put GRE key to the third word
224          * in GRE header. It makes impossible maintaining even soft
225          * state for keyed
226          * GRE tunnels with enabled checksum. Tell them "thank you".
227          *
228          * Well, I wonder, rfc1812 was written by Cisco employee,
229          * what the hell these idiots break standards established
230          * by themselves???
231          */
232
233         const struct iphdr *iph = (struct iphdr *)skb->data;
234         const int type = icmp_hdr(skb)->type;
235         const int code = icmp_hdr(skb)->code;
236         struct tnl_ptk_info tpi;
237
238         if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
239                              iph->ihl * 4) < 0)
240                 return;
241
242         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
243                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
244                                  skb->dev->ifindex, IPPROTO_GRE);
245                 return;
246         }
247         if (type == ICMP_REDIRECT) {
248                 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
249                               IPPROTO_GRE);
250                 return;
251         }
252
253         ipgre_err(skb, info, &tpi);
254 }
255
256 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
257                       int gre_hdr_len)
258 {
259         struct net *net = dev_net(skb->dev);
260         struct metadata_dst *tun_dst = NULL;
261         struct erspan_base_hdr *ershdr;
262         struct erspan_metadata *pkt_md;
263         struct ip_tunnel_net *itn;
264         struct ip_tunnel *tunnel;
265         const struct iphdr *iph;
266         struct erspan_md2 *md2;
267         int ver;
268         int len;
269
270         itn = net_generic(net, erspan_net_id);
271         len = gre_hdr_len + sizeof(*ershdr);
272
273         /* Check based hdr len */
274         if (unlikely(!pskb_may_pull(skb, len)))
275                 return PACKET_REJECT;
276
277         iph = ip_hdr(skb);
278         ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
279         ver = ershdr->ver;
280
281         /* The original GRE header does not have key field,
282          * Use ERSPAN 10-bit session ID as key.
283          */
284         tpi->key = cpu_to_be32(get_session_id(ershdr));
285         tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
286                                   tpi->flags | TUNNEL_KEY,
287                                   iph->saddr, iph->daddr, tpi->key);
288
289         if (tunnel) {
290                 len = gre_hdr_len + erspan_hdr_len(ver);
291                 if (unlikely(!pskb_may_pull(skb, len)))
292                         return PACKET_REJECT;
293
294                 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
295                 pkt_md = (struct erspan_metadata *)(ershdr + 1);
296
297                 if (__iptunnel_pull_header(skb,
298                                            len,
299                                            htons(ETH_P_TEB),
300                                            false, false) < 0)
301                         goto drop;
302
303                 if (tunnel->collect_md) {
304                         struct ip_tunnel_info *info;
305                         struct erspan_metadata *md;
306                         __be64 tun_id;
307                         __be16 flags;
308
309                         tpi->flags |= TUNNEL_KEY;
310                         flags = tpi->flags;
311                         tun_id = key32_to_tunnel_id(tpi->key);
312
313                         tun_dst = ip_tun_rx_dst(skb, flags,
314                                                 tun_id, sizeof(*md));
315                         if (!tun_dst)
316                                 return PACKET_REJECT;
317
318                         md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
319                         md->version = ver;
320                         md2 = &md->u.md2;
321                         memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
322                                                        ERSPAN_V2_MDSIZE);
323
324                         info = &tun_dst->u.tun_info;
325                         info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
326                         info->options_len = sizeof(*md);
327                 }
328
329                 skb_reset_mac_header(skb);
330                 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
331                 return PACKET_RCVD;
332         }
333         return PACKET_REJECT;
334
335 drop:
336         kfree_skb(skb);
337         return PACKET_RCVD;
338 }
339
340 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
341                        struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
342 {
343         struct metadata_dst *tun_dst = NULL;
344         const struct iphdr *iph;
345         struct ip_tunnel *tunnel;
346
347         iph = ip_hdr(skb);
348         tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
349                                   iph->saddr, iph->daddr, tpi->key);
350
351         if (tunnel) {
352                 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
353                                            raw_proto, false) < 0)
354                         goto drop;
355
356                 if (tunnel->dev->type != ARPHRD_NONE)
357                         skb_pop_mac_header(skb);
358                 else
359                         skb_reset_mac_header(skb);
360                 if (tunnel->collect_md) {
361                         __be16 flags;
362                         __be64 tun_id;
363
364                         flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
365                         tun_id = key32_to_tunnel_id(tpi->key);
366                         tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
367                         if (!tun_dst)
368                                 return PACKET_REJECT;
369                 }
370
371                 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
372                 return PACKET_RCVD;
373         }
374         return PACKET_NEXT;
375
376 drop:
377         kfree_skb(skb);
378         return PACKET_RCVD;
379 }
380
381 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
382                      int hdr_len)
383 {
384         struct net *net = dev_net(skb->dev);
385         struct ip_tunnel_net *itn;
386         int res;
387
388         if (tpi->proto == htons(ETH_P_TEB))
389                 itn = net_generic(net, gre_tap_net_id);
390         else
391                 itn = net_generic(net, ipgre_net_id);
392
393         res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
394         if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
395                 /* ipgre tunnels in collect metadata mode should receive
396                  * also ETH_P_TEB traffic.
397                  */
398                 itn = net_generic(net, ipgre_net_id);
399                 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
400         }
401         return res;
402 }
403
404 static int gre_rcv(struct sk_buff *skb)
405 {
406         struct tnl_ptk_info tpi;
407         bool csum_err = false;
408         int hdr_len;
409
410 #ifdef CONFIG_NET_IPGRE_BROADCAST
411         if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
412                 /* Looped back packet, drop it! */
413                 if (rt_is_output_route(skb_rtable(skb)))
414                         goto drop;
415         }
416 #endif
417
418         hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
419         if (hdr_len < 0)
420                 goto drop;
421
422         if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
423                      tpi.proto == htons(ETH_P_ERSPAN2))) {
424                 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
425                         return 0;
426                 goto out;
427         }
428
429         if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
430                 return 0;
431
432 out:
433         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
434 drop:
435         kfree_skb(skb);
436         return 0;
437 }
438
439 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
440                        const struct iphdr *tnl_params,
441                        __be16 proto)
442 {
443         struct ip_tunnel *tunnel = netdev_priv(dev);
444
445         if (tunnel->parms.o_flags & TUNNEL_SEQ)
446                 tunnel->o_seqno++;
447
448         /* Push GRE header. */
449         gre_build_header(skb, tunnel->tun_hlen,
450                          tunnel->parms.o_flags, proto, tunnel->parms.o_key,
451                          htonl(tunnel->o_seqno));
452
453         ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
454 }
455
456 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
457 {
458         return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
459 }
460
461 static struct rtable *gre_get_rt(struct sk_buff *skb,
462                                  struct net_device *dev,
463                                  struct flowi4 *fl,
464                                  const struct ip_tunnel_key *key)
465 {
466         struct net *net = dev_net(dev);
467
468         memset(fl, 0, sizeof(*fl));
469         fl->daddr = key->u.ipv4.dst;
470         fl->saddr = key->u.ipv4.src;
471         fl->flowi4_tos = RT_TOS(key->tos);
472         fl->flowi4_mark = skb->mark;
473         fl->flowi4_proto = IPPROTO_GRE;
474
475         return ip_route_output_key(net, fl);
476 }
477
478 static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
479                                       struct net_device *dev,
480                                       struct flowi4 *fl,
481                                       int tunnel_hlen)
482 {
483         struct ip_tunnel_info *tun_info;
484         const struct ip_tunnel_key *key;
485         struct rtable *rt = NULL;
486         int min_headroom;
487         bool use_cache;
488         int err;
489
490         tun_info = skb_tunnel_info(skb);
491         key = &tun_info->key;
492         use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
493
494         if (use_cache)
495                 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
496         if (!rt) {
497                 rt = gre_get_rt(skb, dev, fl, key);
498                 if (IS_ERR(rt))
499                         goto err_free_skb;
500                 if (use_cache)
501                         dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
502                                           fl->saddr);
503         }
504
505         min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
506                         + tunnel_hlen + sizeof(struct iphdr);
507         if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
508                 int head_delta = SKB_DATA_ALIGN(min_headroom -
509                                                 skb_headroom(skb) +
510                                                 16);
511                 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
512                                        0, GFP_ATOMIC);
513                 if (unlikely(err))
514                         goto err_free_rt;
515         }
516         return rt;
517
518 err_free_rt:
519         ip_rt_put(rt);
520 err_free_skb:
521         kfree_skb(skb);
522         dev->stats.tx_dropped++;
523         return NULL;
524 }
525
526 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
527                         __be16 proto)
528 {
529         struct ip_tunnel *tunnel = netdev_priv(dev);
530         struct ip_tunnel_info *tun_info;
531         const struct ip_tunnel_key *key;
532         struct rtable *rt = NULL;
533         struct flowi4 fl;
534         int tunnel_hlen;
535         __be16 df, flags;
536
537         tun_info = skb_tunnel_info(skb);
538         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
539                      ip_tunnel_info_af(tun_info) != AF_INET))
540                 goto err_free_skb;
541
542         key = &tun_info->key;
543         tunnel_hlen = gre_calc_hlen(key->tun_flags);
544
545         rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
546         if (!rt)
547                 return;
548
549         /* Push Tunnel header. */
550         if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
551                 goto err_free_rt;
552
553         flags = tun_info->key.tun_flags &
554                 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
555         gre_build_header(skb, tunnel_hlen, flags, proto,
556                          tunnel_id_to_key32(tun_info->key.tun_id),
557                          (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
558
559         df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
560
561         iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
562                       key->tos, key->ttl, df, false);
563         return;
564
565 err_free_rt:
566         ip_rt_put(rt);
567 err_free_skb:
568         kfree_skb(skb);
569         dev->stats.tx_dropped++;
570 }
571
572 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
573                            __be16 proto)
574 {
575         struct ip_tunnel *tunnel = netdev_priv(dev);
576         struct ip_tunnel_info *tun_info;
577         const struct ip_tunnel_key *key;
578         struct erspan_metadata *md;
579         struct rtable *rt = NULL;
580         bool truncate = false;
581         struct flowi4 fl;
582         int tunnel_hlen;
583         int version;
584         __be16 df;
585         int nhoff;
586         int thoff;
587
588         tun_info = skb_tunnel_info(skb);
589         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
590                      ip_tunnel_info_af(tun_info) != AF_INET))
591                 goto err_free_skb;
592
593         key = &tun_info->key;
594         if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
595                 goto err_free_rt;
596         md = ip_tunnel_info_opts(tun_info);
597         if (!md)
598                 goto err_free_rt;
599
600         /* ERSPAN has fixed 8 byte GRE header */
601         version = md->version;
602         tunnel_hlen = 8 + erspan_hdr_len(version);
603
604         rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
605         if (!rt)
606                 return;
607
608         if (gre_handle_offloads(skb, false))
609                 goto err_free_rt;
610
611         if (skb->len > dev->mtu + dev->hard_header_len) {
612                 pskb_trim(skb, dev->mtu + dev->hard_header_len);
613                 truncate = true;
614         }
615
616         nhoff = skb_network_header(skb) - skb_mac_header(skb);
617         if (skb->protocol == htons(ETH_P_IP) &&
618             (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
619                 truncate = true;
620
621         thoff = skb_transport_header(skb) - skb_mac_header(skb);
622         if (skb->protocol == htons(ETH_P_IPV6) &&
623             (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
624                 truncate = true;
625
626         if (version == 1) {
627                 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
628                                     ntohl(md->u.index), truncate, true);
629         } else if (version == 2) {
630                 erspan_build_header_v2(skb,
631                                        ntohl(tunnel_id_to_key32(key->tun_id)),
632                                        md->u.md2.dir,
633                                        get_hwid(&md->u.md2),
634                                        truncate, true);
635         } else {
636                 goto err_free_rt;
637         }
638
639         gre_build_header(skb, 8, TUNNEL_SEQ,
640                          htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
641
642         df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
643
644         iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
645                       key->tos, key->ttl, df, false);
646         return;
647
648 err_free_rt:
649         ip_rt_put(rt);
650 err_free_skb:
651         kfree_skb(skb);
652         dev->stats.tx_dropped++;
653 }
654
655 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
656 {
657         struct ip_tunnel_info *info = skb_tunnel_info(skb);
658         struct rtable *rt;
659         struct flowi4 fl4;
660
661         if (ip_tunnel_info_af(info) != AF_INET)
662                 return -EINVAL;
663
664         rt = gre_get_rt(skb, dev, &fl4, &info->key);
665         if (IS_ERR(rt))
666                 return PTR_ERR(rt);
667
668         ip_rt_put(rt);
669         info->key.u.ipv4.src = fl4.saddr;
670         return 0;
671 }
672
673 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
674                               struct net_device *dev)
675 {
676         struct ip_tunnel *tunnel = netdev_priv(dev);
677         const struct iphdr *tnl_params;
678
679         if (tunnel->collect_md) {
680                 gre_fb_xmit(skb, dev, skb->protocol);
681                 return NETDEV_TX_OK;
682         }
683
684         if (dev->header_ops) {
685                 /* Need space for new headers */
686                 if (skb_cow_head(skb, dev->needed_headroom -
687                                       (tunnel->hlen + sizeof(struct iphdr))))
688                         goto free_skb;
689
690                 tnl_params = (const struct iphdr *)skb->data;
691
692                 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
693                  * to gre header.
694                  */
695                 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
696                 skb_reset_mac_header(skb);
697         } else {
698                 if (skb_cow_head(skb, dev->needed_headroom))
699                         goto free_skb;
700
701                 tnl_params = &tunnel->parms.iph;
702         }
703
704         if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
705                 goto free_skb;
706
707         __gre_xmit(skb, dev, tnl_params, skb->protocol);
708         return NETDEV_TX_OK;
709
710 free_skb:
711         kfree_skb(skb);
712         dev->stats.tx_dropped++;
713         return NETDEV_TX_OK;
714 }
715
716 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
717                                struct net_device *dev)
718 {
719         struct ip_tunnel *tunnel = netdev_priv(dev);
720         bool truncate = false;
721
722         if (tunnel->collect_md) {
723                 erspan_fb_xmit(skb, dev, skb->protocol);
724                 return NETDEV_TX_OK;
725         }
726
727         if (gre_handle_offloads(skb, false))
728                 goto free_skb;
729
730         if (skb_cow_head(skb, dev->needed_headroom))
731                 goto free_skb;
732
733         if (skb->len > dev->mtu + dev->hard_header_len) {
734                 pskb_trim(skb, dev->mtu + dev->hard_header_len);
735                 truncate = true;
736         }
737
738         /* Push ERSPAN header */
739         if (tunnel->erspan_ver == 1)
740                 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
741                                     tunnel->index,
742                                     truncate, true);
743         else if (tunnel->erspan_ver == 2)
744                 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
745                                        tunnel->dir, tunnel->hwid,
746                                        truncate, true);
747         else
748                 goto free_skb;
749
750         tunnel->parms.o_flags &= ~TUNNEL_KEY;
751         __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
752         return NETDEV_TX_OK;
753
754 free_skb:
755         kfree_skb(skb);
756         dev->stats.tx_dropped++;
757         return NETDEV_TX_OK;
758 }
759
760 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
761                                 struct net_device *dev)
762 {
763         struct ip_tunnel *tunnel = netdev_priv(dev);
764
765         if (tunnel->collect_md) {
766                 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
767                 return NETDEV_TX_OK;
768         }
769
770         if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
771                 goto free_skb;
772
773         if (skb_cow_head(skb, dev->needed_headroom))
774                 goto free_skb;
775
776         __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
777         return NETDEV_TX_OK;
778
779 free_skb:
780         kfree_skb(skb);
781         dev->stats.tx_dropped++;
782         return NETDEV_TX_OK;
783 }
784
785 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
786 {
787         struct ip_tunnel *tunnel = netdev_priv(dev);
788         int len;
789
790         len = tunnel->tun_hlen;
791         tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
792         len = tunnel->tun_hlen - len;
793         tunnel->hlen = tunnel->hlen + len;
794
795         dev->needed_headroom = dev->needed_headroom + len;
796         if (set_mtu)
797                 dev->mtu = max_t(int, dev->mtu - len, 68);
798
799         if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
800                 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
801                     tunnel->encap.type == TUNNEL_ENCAP_NONE) {
802                         dev->features |= NETIF_F_GSO_SOFTWARE;
803                         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
804                 } else {
805                         dev->features &= ~NETIF_F_GSO_SOFTWARE;
806                         dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
807                 }
808                 dev->features |= NETIF_F_LLTX;
809         } else {
810                 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
811                 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
812         }
813 }
814
815 static int ipgre_tunnel_ioctl(struct net_device *dev,
816                               struct ifreq *ifr, int cmd)
817 {
818         struct ip_tunnel_parm p;
819         int err;
820
821         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
822                 return -EFAULT;
823
824         if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
825                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
826                     p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
827                     ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
828                         return -EINVAL;
829         }
830
831         p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
832         p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
833
834         err = ip_tunnel_ioctl(dev, &p, cmd);
835         if (err)
836                 return err;
837
838         if (cmd == SIOCCHGTUNNEL) {
839                 struct ip_tunnel *t = netdev_priv(dev);
840
841                 t->parms.i_flags = p.i_flags;
842                 t->parms.o_flags = p.o_flags;
843
844                 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
845                         ipgre_link_update(dev, true);
846         }
847
848         p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
849         p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
850
851         if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
852                 return -EFAULT;
853
854         return 0;
855 }
856
857 /* Nice toy. Unfortunately, useless in real life :-)
858    It allows to construct virtual multiprotocol broadcast "LAN"
859    over the Internet, provided multicast routing is tuned.
860
861
862    I have no idea was this bicycle invented before me,
863    so that I had to set ARPHRD_IPGRE to a random value.
864    I have an impression, that Cisco could make something similar,
865    but this feature is apparently missing in IOS<=11.2(8).
866
867    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
868    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
869
870    ping -t 255 224.66.66.66
871
872    If nobody answers, mbone does not work.
873
874    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
875    ip addr add 10.66.66.<somewhat>/24 dev Universe
876    ifconfig Universe up
877    ifconfig Universe add fe80::<Your_real_addr>/10
878    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
879    ftp 10.66.66.66
880    ...
881    ftp fec0:6666:6666::193.233.7.65
882    ...
883  */
884 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
885                         unsigned short type,
886                         const void *daddr, const void *saddr, unsigned int len)
887 {
888         struct ip_tunnel *t = netdev_priv(dev);
889         struct iphdr *iph;
890         struct gre_base_hdr *greh;
891
892         iph = skb_push(skb, t->hlen + sizeof(*iph));
893         greh = (struct gre_base_hdr *)(iph+1);
894         greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
895         greh->protocol = htons(type);
896
897         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
898
899         /* Set the source hardware address. */
900         if (saddr)
901                 memcpy(&iph->saddr, saddr, 4);
902         if (daddr)
903                 memcpy(&iph->daddr, daddr, 4);
904         if (iph->daddr)
905                 return t->hlen + sizeof(*iph);
906
907         return -(t->hlen + sizeof(*iph));
908 }
909
910 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
911 {
912         const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
913         memcpy(haddr, &iph->saddr, 4);
914         return 4;
915 }
916
917 static const struct header_ops ipgre_header_ops = {
918         .create = ipgre_header,
919         .parse  = ipgre_header_parse,
920 };
921
922 #ifdef CONFIG_NET_IPGRE_BROADCAST
923 static int ipgre_open(struct net_device *dev)
924 {
925         struct ip_tunnel *t = netdev_priv(dev);
926
927         if (ipv4_is_multicast(t->parms.iph.daddr)) {
928                 struct flowi4 fl4;
929                 struct rtable *rt;
930
931                 rt = ip_route_output_gre(t->net, &fl4,
932                                          t->parms.iph.daddr,
933                                          t->parms.iph.saddr,
934                                          t->parms.o_key,
935                                          RT_TOS(t->parms.iph.tos),
936                                          t->parms.link);
937                 if (IS_ERR(rt))
938                         return -EADDRNOTAVAIL;
939                 dev = rt->dst.dev;
940                 ip_rt_put(rt);
941                 if (!__in_dev_get_rtnl(dev))
942                         return -EADDRNOTAVAIL;
943                 t->mlink = dev->ifindex;
944                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
945         }
946         return 0;
947 }
948
949 static int ipgre_close(struct net_device *dev)
950 {
951         struct ip_tunnel *t = netdev_priv(dev);
952
953         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
954                 struct in_device *in_dev;
955                 in_dev = inetdev_by_index(t->net, t->mlink);
956                 if (in_dev)
957                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
958         }
959         return 0;
960 }
961 #endif
962
963 static const struct net_device_ops ipgre_netdev_ops = {
964         .ndo_init               = ipgre_tunnel_init,
965         .ndo_uninit             = ip_tunnel_uninit,
966 #ifdef CONFIG_NET_IPGRE_BROADCAST
967         .ndo_open               = ipgre_open,
968         .ndo_stop               = ipgre_close,
969 #endif
970         .ndo_start_xmit         = ipgre_xmit,
971         .ndo_do_ioctl           = ipgre_tunnel_ioctl,
972         .ndo_change_mtu         = ip_tunnel_change_mtu,
973         .ndo_get_stats64        = ip_tunnel_get_stats64,
974         .ndo_get_iflink         = ip_tunnel_get_iflink,
975 };
976
977 #define GRE_FEATURES (NETIF_F_SG |              \
978                       NETIF_F_FRAGLIST |        \
979                       NETIF_F_HIGHDMA |         \
980                       NETIF_F_HW_CSUM)
981
982 static void ipgre_tunnel_setup(struct net_device *dev)
983 {
984         dev->netdev_ops         = &ipgre_netdev_ops;
985         dev->type               = ARPHRD_IPGRE;
986         ip_tunnel_setup(dev, ipgre_net_id);
987 }
988
989 static void __gre_tunnel_init(struct net_device *dev)
990 {
991         struct ip_tunnel *tunnel;
992
993         tunnel = netdev_priv(dev);
994         tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
995         tunnel->parms.iph.protocol = IPPROTO_GRE;
996
997         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
998
999         dev->features           |= GRE_FEATURES;
1000         dev->hw_features        |= GRE_FEATURES;
1001
1002         if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
1003                 /* TCP offload with GRE SEQ is not supported, nor
1004                  * can we support 2 levels of outer headers requiring
1005                  * an update.
1006                  */
1007                 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
1008                     (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
1009                         dev->features    |= NETIF_F_GSO_SOFTWARE;
1010                         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1011                 }
1012
1013                 /* Can use a lockless transmit, unless we generate
1014                  * output sequences
1015                  */
1016                 dev->features |= NETIF_F_LLTX;
1017         }
1018 }
1019
1020 static int ipgre_tunnel_init(struct net_device *dev)
1021 {
1022         struct ip_tunnel *tunnel = netdev_priv(dev);
1023         struct iphdr *iph = &tunnel->parms.iph;
1024
1025         __gre_tunnel_init(dev);
1026
1027         memcpy(dev->dev_addr, &iph->saddr, 4);
1028         memcpy(dev->broadcast, &iph->daddr, 4);
1029
1030         dev->flags              = IFF_NOARP;
1031         netif_keep_dst(dev);
1032         dev->addr_len           = 4;
1033
1034         if (iph->daddr && !tunnel->collect_md) {
1035 #ifdef CONFIG_NET_IPGRE_BROADCAST
1036                 if (ipv4_is_multicast(iph->daddr)) {
1037                         if (!iph->saddr)
1038                                 return -EINVAL;
1039                         dev->flags = IFF_BROADCAST;
1040                         dev->header_ops = &ipgre_header_ops;
1041                 }
1042 #endif
1043         } else if (!tunnel->collect_md) {
1044                 dev->header_ops = &ipgre_header_ops;
1045         }
1046
1047         return ip_tunnel_init(dev);
1048 }
1049
1050 static const struct gre_protocol ipgre_protocol = {
1051         .handler     = gre_rcv,
1052         .err_handler = gre_err,
1053 };
1054
1055 static int __net_init ipgre_init_net(struct net *net)
1056 {
1057         return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1058 }
1059
1060 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1061 {
1062         ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1063 }
1064
1065 static struct pernet_operations ipgre_net_ops = {
1066         .init = ipgre_init_net,
1067         .exit_batch = ipgre_exit_batch_net,
1068         .id   = &ipgre_net_id,
1069         .size = sizeof(struct ip_tunnel_net),
1070 };
1071
1072 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1073                                  struct netlink_ext_ack *extack)
1074 {
1075         __be16 flags;
1076
1077         if (!data)
1078                 return 0;
1079
1080         flags = 0;
1081         if (data[IFLA_GRE_IFLAGS])
1082                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1083         if (data[IFLA_GRE_OFLAGS])
1084                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1085         if (flags & (GRE_VERSION|GRE_ROUTING))
1086                 return -EINVAL;
1087
1088         if (data[IFLA_GRE_COLLECT_METADATA] &&
1089             data[IFLA_GRE_ENCAP_TYPE] &&
1090             nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1091                 return -EINVAL;
1092
1093         return 0;
1094 }
1095
1096 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1097                               struct netlink_ext_ack *extack)
1098 {
1099         __be32 daddr;
1100
1101         if (tb[IFLA_ADDRESS]) {
1102                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1103                         return -EINVAL;
1104                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1105                         return -EADDRNOTAVAIL;
1106         }
1107
1108         if (!data)
1109                 goto out;
1110
1111         if (data[IFLA_GRE_REMOTE]) {
1112                 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1113                 if (!daddr)
1114                         return -EINVAL;
1115         }
1116
1117 out:
1118         return ipgre_tunnel_validate(tb, data, extack);
1119 }
1120
1121 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1122                            struct netlink_ext_ack *extack)
1123 {
1124         __be16 flags = 0;
1125         int ret;
1126
1127         if (!data)
1128                 return 0;
1129
1130         ret = ipgre_tap_validate(tb, data, extack);
1131         if (ret)
1132                 return ret;
1133
1134         /* ERSPAN should only have GRE sequence and key flag */
1135         if (data[IFLA_GRE_OFLAGS])
1136                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1137         if (data[IFLA_GRE_IFLAGS])
1138                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1139         if (!data[IFLA_GRE_COLLECT_METADATA] &&
1140             flags != (GRE_SEQ | GRE_KEY))
1141                 return -EINVAL;
1142
1143         /* ERSPAN Session ID only has 10-bit. Since we reuse
1144          * 32-bit key field as ID, check it's range.
1145          */
1146         if (data[IFLA_GRE_IKEY] &&
1147             (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1148                 return -EINVAL;
1149
1150         if (data[IFLA_GRE_OKEY] &&
1151             (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1152                 return -EINVAL;
1153
1154         return 0;
1155 }
1156
1157 static int ipgre_netlink_parms(struct net_device *dev,
1158                                 struct nlattr *data[],
1159                                 struct nlattr *tb[],
1160                                 struct ip_tunnel_parm *parms,
1161                                 __u32 *fwmark)
1162 {
1163         struct ip_tunnel *t = netdev_priv(dev);
1164
1165         memset(parms, 0, sizeof(*parms));
1166
1167         parms->iph.protocol = IPPROTO_GRE;
1168
1169         if (!data)
1170                 return 0;
1171
1172         if (data[IFLA_GRE_LINK])
1173                 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1174
1175         if (data[IFLA_GRE_IFLAGS])
1176                 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1177
1178         if (data[IFLA_GRE_OFLAGS])
1179                 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1180
1181         if (data[IFLA_GRE_IKEY])
1182                 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1183
1184         if (data[IFLA_GRE_OKEY])
1185                 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1186
1187         if (data[IFLA_GRE_LOCAL])
1188                 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1189
1190         if (data[IFLA_GRE_REMOTE])
1191                 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1192
1193         if (data[IFLA_GRE_TTL])
1194                 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1195
1196         if (data[IFLA_GRE_TOS])
1197                 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1198
1199         if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1200                 if (t->ignore_df)
1201                         return -EINVAL;
1202                 parms->iph.frag_off = htons(IP_DF);
1203         }
1204
1205         if (data[IFLA_GRE_COLLECT_METADATA]) {
1206                 t->collect_md = true;
1207                 if (dev->type == ARPHRD_IPGRE)
1208                         dev->type = ARPHRD_NONE;
1209         }
1210
1211         if (data[IFLA_GRE_IGNORE_DF]) {
1212                 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1213                   && (parms->iph.frag_off & htons(IP_DF)))
1214                         return -EINVAL;
1215                 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1216         }
1217
1218         if (data[IFLA_GRE_FWMARK])
1219                 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1220
1221         if (data[IFLA_GRE_ERSPAN_VER]) {
1222                 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1223
1224                 if (t->erspan_ver != 1 && t->erspan_ver != 2)
1225                         return -EINVAL;
1226         }
1227
1228         if (t->erspan_ver == 1) {
1229                 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1230                         t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1231                         if (t->index & ~INDEX_MASK)
1232                                 return -EINVAL;
1233                 }
1234         } else if (t->erspan_ver == 2) {
1235                 if (data[IFLA_GRE_ERSPAN_DIR]) {
1236                         t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1237                         if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1238                                 return -EINVAL;
1239                 }
1240                 if (data[IFLA_GRE_ERSPAN_HWID]) {
1241                         t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1242                         if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1243                                 return -EINVAL;
1244                 }
1245         }
1246
1247         return 0;
1248 }
1249
1250 /* This function returns true when ENCAP attributes are present in the nl msg */
1251 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1252                                       struct ip_tunnel_encap *ipencap)
1253 {
1254         bool ret = false;
1255
1256         memset(ipencap, 0, sizeof(*ipencap));
1257
1258         if (!data)
1259                 return ret;
1260
1261         if (data[IFLA_GRE_ENCAP_TYPE]) {
1262                 ret = true;
1263                 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1264         }
1265
1266         if (data[IFLA_GRE_ENCAP_FLAGS]) {
1267                 ret = true;
1268                 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1269         }
1270
1271         if (data[IFLA_GRE_ENCAP_SPORT]) {
1272                 ret = true;
1273                 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1274         }
1275
1276         if (data[IFLA_GRE_ENCAP_DPORT]) {
1277                 ret = true;
1278                 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1279         }
1280
1281         return ret;
1282 }
1283
1284 static int gre_tap_init(struct net_device *dev)
1285 {
1286         __gre_tunnel_init(dev);
1287         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1288         netif_keep_dst(dev);
1289
1290         return ip_tunnel_init(dev);
1291 }
1292
1293 static const struct net_device_ops gre_tap_netdev_ops = {
1294         .ndo_init               = gre_tap_init,
1295         .ndo_uninit             = ip_tunnel_uninit,
1296         .ndo_start_xmit         = gre_tap_xmit,
1297         .ndo_set_mac_address    = eth_mac_addr,
1298         .ndo_validate_addr      = eth_validate_addr,
1299         .ndo_change_mtu         = ip_tunnel_change_mtu,
1300         .ndo_get_stats64        = ip_tunnel_get_stats64,
1301         .ndo_get_iflink         = ip_tunnel_get_iflink,
1302         .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1303 };
1304
1305 static int erspan_tunnel_init(struct net_device *dev)
1306 {
1307         struct ip_tunnel *tunnel = netdev_priv(dev);
1308
1309         tunnel->tun_hlen = 8;
1310         tunnel->parms.iph.protocol = IPPROTO_GRE;
1311         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1312                        erspan_hdr_len(tunnel->erspan_ver);
1313
1314         dev->features           |= GRE_FEATURES;
1315         dev->hw_features        |= GRE_FEATURES;
1316         dev->priv_flags         |= IFF_LIVE_ADDR_CHANGE;
1317         netif_keep_dst(dev);
1318
1319         return ip_tunnel_init(dev);
1320 }
1321
1322 static const struct net_device_ops erspan_netdev_ops = {
1323         .ndo_init               = erspan_tunnel_init,
1324         .ndo_uninit             = ip_tunnel_uninit,
1325         .ndo_start_xmit         = erspan_xmit,
1326         .ndo_set_mac_address    = eth_mac_addr,
1327         .ndo_validate_addr      = eth_validate_addr,
1328         .ndo_change_mtu         = ip_tunnel_change_mtu,
1329         .ndo_get_stats64        = ip_tunnel_get_stats64,
1330         .ndo_get_iflink         = ip_tunnel_get_iflink,
1331         .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1332 };
1333
1334 static void ipgre_tap_setup(struct net_device *dev)
1335 {
1336         ether_setup(dev);
1337         dev->max_mtu = 0;
1338         dev->netdev_ops = &gre_tap_netdev_ops;
1339         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1340         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1341         ip_tunnel_setup(dev, gre_tap_net_id);
1342 }
1343
1344 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1345                          struct nlattr *tb[], struct nlattr *data[],
1346                          struct netlink_ext_ack *extack)
1347 {
1348         struct ip_tunnel_parm p;
1349         struct ip_tunnel_encap ipencap;
1350         __u32 fwmark = 0;
1351         int err;
1352
1353         if (ipgre_netlink_encap_parms(data, &ipencap)) {
1354                 struct ip_tunnel *t = netdev_priv(dev);
1355                 err = ip_tunnel_encap_setup(t, &ipencap);
1356
1357                 if (err < 0)
1358                         return err;
1359         }
1360
1361         err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1362         if (err < 0)
1363                 return err;
1364         return ip_tunnel_newlink(dev, tb, &p, fwmark);
1365 }
1366
1367 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1368                             struct nlattr *data[],
1369                             struct netlink_ext_ack *extack)
1370 {
1371         struct ip_tunnel *t = netdev_priv(dev);
1372         struct ip_tunnel_encap ipencap;
1373         __u32 fwmark = t->fwmark;
1374         struct ip_tunnel_parm p;
1375         int err;
1376
1377         if (ipgre_netlink_encap_parms(data, &ipencap)) {
1378                 err = ip_tunnel_encap_setup(t, &ipencap);
1379
1380                 if (err < 0)
1381                         return err;
1382         }
1383
1384         err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1385         if (err < 0)
1386                 return err;
1387
1388         err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1389         if (err < 0)
1390                 return err;
1391
1392         t->parms.i_flags = p.i_flags;
1393         t->parms.o_flags = p.o_flags;
1394
1395         if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
1396                 ipgre_link_update(dev, !tb[IFLA_MTU]);
1397
1398         return 0;
1399 }
1400
1401 static size_t ipgre_get_size(const struct net_device *dev)
1402 {
1403         return
1404                 /* IFLA_GRE_LINK */
1405                 nla_total_size(4) +
1406                 /* IFLA_GRE_IFLAGS */
1407                 nla_total_size(2) +
1408                 /* IFLA_GRE_OFLAGS */
1409                 nla_total_size(2) +
1410                 /* IFLA_GRE_IKEY */
1411                 nla_total_size(4) +
1412                 /* IFLA_GRE_OKEY */
1413                 nla_total_size(4) +
1414                 /* IFLA_GRE_LOCAL */
1415                 nla_total_size(4) +
1416                 /* IFLA_GRE_REMOTE */
1417                 nla_total_size(4) +
1418                 /* IFLA_GRE_TTL */
1419                 nla_total_size(1) +
1420                 /* IFLA_GRE_TOS */
1421                 nla_total_size(1) +
1422                 /* IFLA_GRE_PMTUDISC */
1423                 nla_total_size(1) +
1424                 /* IFLA_GRE_ENCAP_TYPE */
1425                 nla_total_size(2) +
1426                 /* IFLA_GRE_ENCAP_FLAGS */
1427                 nla_total_size(2) +
1428                 /* IFLA_GRE_ENCAP_SPORT */
1429                 nla_total_size(2) +
1430                 /* IFLA_GRE_ENCAP_DPORT */
1431                 nla_total_size(2) +
1432                 /* IFLA_GRE_COLLECT_METADATA */
1433                 nla_total_size(0) +
1434                 /* IFLA_GRE_IGNORE_DF */
1435                 nla_total_size(1) +
1436                 /* IFLA_GRE_FWMARK */
1437                 nla_total_size(4) +
1438                 /* IFLA_GRE_ERSPAN_INDEX */
1439                 nla_total_size(4) +
1440                 /* IFLA_GRE_ERSPAN_VER */
1441                 nla_total_size(1) +
1442                 /* IFLA_GRE_ERSPAN_DIR */
1443                 nla_total_size(1) +
1444                 /* IFLA_GRE_ERSPAN_HWID */
1445                 nla_total_size(2) +
1446                 0;
1447 }
1448
1449 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1450 {
1451         struct ip_tunnel *t = netdev_priv(dev);
1452         struct ip_tunnel_parm *p = &t->parms;
1453
1454         if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1455             nla_put_be16(skb, IFLA_GRE_IFLAGS,
1456                          gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1457             nla_put_be16(skb, IFLA_GRE_OFLAGS,
1458                          gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1459             nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1460             nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1461             nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1462             nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1463             nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1464             nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1465             nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1466                        !!(p->iph.frag_off & htons(IP_DF))) ||
1467             nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1468                 goto nla_put_failure;
1469
1470         if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1471                         t->encap.type) ||
1472             nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1473                          t->encap.sport) ||
1474             nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1475                          t->encap.dport) ||
1476             nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1477                         t->encap.flags))
1478                 goto nla_put_failure;
1479
1480         if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1481                 goto nla_put_failure;
1482
1483         if (t->collect_md) {
1484                 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1485                         goto nla_put_failure;
1486         }
1487
1488         if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1489                 goto nla_put_failure;
1490
1491         if (t->erspan_ver == 1) {
1492                 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1493                         goto nla_put_failure;
1494         } else if (t->erspan_ver == 2) {
1495                 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1496                         goto nla_put_failure;
1497                 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1498                         goto nla_put_failure;
1499         }
1500
1501         return 0;
1502
1503 nla_put_failure:
1504         return -EMSGSIZE;
1505 }
1506
1507 static void erspan_setup(struct net_device *dev)
1508 {
1509         struct ip_tunnel *t = netdev_priv(dev);
1510
1511         ether_setup(dev);
1512         dev->netdev_ops = &erspan_netdev_ops;
1513         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1514         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1515         ip_tunnel_setup(dev, erspan_net_id);
1516         t->erspan_ver = 1;
1517 }
1518
1519 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1520         [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1521         [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1522         [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1523         [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1524         [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1525         [IFLA_GRE_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1526         [IFLA_GRE_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1527         [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1528         [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1529         [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1530         [IFLA_GRE_ENCAP_TYPE]   = { .type = NLA_U16 },
1531         [IFLA_GRE_ENCAP_FLAGS]  = { .type = NLA_U16 },
1532         [IFLA_GRE_ENCAP_SPORT]  = { .type = NLA_U16 },
1533         [IFLA_GRE_ENCAP_DPORT]  = { .type = NLA_U16 },
1534         [IFLA_GRE_COLLECT_METADATA]     = { .type = NLA_FLAG },
1535         [IFLA_GRE_IGNORE_DF]    = { .type = NLA_U8 },
1536         [IFLA_GRE_FWMARK]       = { .type = NLA_U32 },
1537         [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1538         [IFLA_GRE_ERSPAN_VER]   = { .type = NLA_U8 },
1539         [IFLA_GRE_ERSPAN_DIR]   = { .type = NLA_U8 },
1540         [IFLA_GRE_ERSPAN_HWID]  = { .type = NLA_U16 },
1541 };
1542
1543 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1544         .kind           = "gre",
1545         .maxtype        = IFLA_GRE_MAX,
1546         .policy         = ipgre_policy,
1547         .priv_size      = sizeof(struct ip_tunnel),
1548         .setup          = ipgre_tunnel_setup,
1549         .validate       = ipgre_tunnel_validate,
1550         .newlink        = ipgre_newlink,
1551         .changelink     = ipgre_changelink,
1552         .dellink        = ip_tunnel_dellink,
1553         .get_size       = ipgre_get_size,
1554         .fill_info      = ipgre_fill_info,
1555         .get_link_net   = ip_tunnel_get_link_net,
1556 };
1557
1558 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1559         .kind           = "gretap",
1560         .maxtype        = IFLA_GRE_MAX,
1561         .policy         = ipgre_policy,
1562         .priv_size      = sizeof(struct ip_tunnel),
1563         .setup          = ipgre_tap_setup,
1564         .validate       = ipgre_tap_validate,
1565         .newlink        = ipgre_newlink,
1566         .changelink     = ipgre_changelink,
1567         .dellink        = ip_tunnel_dellink,
1568         .get_size       = ipgre_get_size,
1569         .fill_info      = ipgre_fill_info,
1570         .get_link_net   = ip_tunnel_get_link_net,
1571 };
1572
1573 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1574         .kind           = "erspan",
1575         .maxtype        = IFLA_GRE_MAX,
1576         .policy         = ipgre_policy,
1577         .priv_size      = sizeof(struct ip_tunnel),
1578         .setup          = erspan_setup,
1579         .validate       = erspan_validate,
1580         .newlink        = ipgre_newlink,
1581         .changelink     = ipgre_changelink,
1582         .dellink        = ip_tunnel_dellink,
1583         .get_size       = ipgre_get_size,
1584         .fill_info      = ipgre_fill_info,
1585         .get_link_net   = ip_tunnel_get_link_net,
1586 };
1587
1588 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1589                                         u8 name_assign_type)
1590 {
1591         struct nlattr *tb[IFLA_MAX + 1];
1592         struct net_device *dev;
1593         LIST_HEAD(list_kill);
1594         struct ip_tunnel *t;
1595         int err;
1596
1597         memset(&tb, 0, sizeof(tb));
1598
1599         dev = rtnl_create_link(net, name, name_assign_type,
1600                                &ipgre_tap_ops, tb, NULL);
1601         if (IS_ERR(dev))
1602                 return dev;
1603
1604         /* Configure flow based GRE device. */
1605         t = netdev_priv(dev);
1606         t->collect_md = true;
1607
1608         err = ipgre_newlink(net, dev, tb, NULL, NULL);
1609         if (err < 0) {
1610                 free_netdev(dev);
1611                 return ERR_PTR(err);
1612         }
1613
1614         /* openvswitch users expect packet sizes to be unrestricted,
1615          * so set the largest MTU we can.
1616          */
1617         err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1618         if (err)
1619                 goto out;
1620
1621         err = rtnl_configure_link(dev, NULL);
1622         if (err < 0)
1623                 goto out;
1624
1625         return dev;
1626 out:
1627         ip_tunnel_dellink(dev, &list_kill);
1628         unregister_netdevice_many(&list_kill);
1629         return ERR_PTR(err);
1630 }
1631 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1632
1633 static int __net_init ipgre_tap_init_net(struct net *net)
1634 {
1635         return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1636 }
1637
1638 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1639 {
1640         ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1641 }
1642
1643 static struct pernet_operations ipgre_tap_net_ops = {
1644         .init = ipgre_tap_init_net,
1645         .exit_batch = ipgre_tap_exit_batch_net,
1646         .id   = &gre_tap_net_id,
1647         .size = sizeof(struct ip_tunnel_net),
1648 };
1649
1650 static int __net_init erspan_init_net(struct net *net)
1651 {
1652         return ip_tunnel_init_net(net, erspan_net_id,
1653                                   &erspan_link_ops, "erspan0");
1654 }
1655
1656 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1657 {
1658         ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1659 }
1660
1661 static struct pernet_operations erspan_net_ops = {
1662         .init = erspan_init_net,
1663         .exit_batch = erspan_exit_batch_net,
1664         .id   = &erspan_net_id,
1665         .size = sizeof(struct ip_tunnel_net),
1666 };
1667
1668 static int __init ipgre_init(void)
1669 {
1670         int err;
1671
1672         pr_info("GRE over IPv4 tunneling driver\n");
1673
1674         err = register_pernet_device(&ipgre_net_ops);
1675         if (err < 0)
1676                 return err;
1677
1678         err = register_pernet_device(&ipgre_tap_net_ops);
1679         if (err < 0)
1680                 goto pnet_tap_failed;
1681
1682         err = register_pernet_device(&erspan_net_ops);
1683         if (err < 0)
1684                 goto pnet_erspan_failed;
1685
1686         err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1687         if (err < 0) {
1688                 pr_info("%s: can't add protocol\n", __func__);
1689                 goto add_proto_failed;
1690         }
1691
1692         err = rtnl_link_register(&ipgre_link_ops);
1693         if (err < 0)
1694                 goto rtnl_link_failed;
1695
1696         err = rtnl_link_register(&ipgre_tap_ops);
1697         if (err < 0)
1698                 goto tap_ops_failed;
1699
1700         err = rtnl_link_register(&erspan_link_ops);
1701         if (err < 0)
1702                 goto erspan_link_failed;
1703
1704         return 0;
1705
1706 erspan_link_failed:
1707         rtnl_link_unregister(&ipgre_tap_ops);
1708 tap_ops_failed:
1709         rtnl_link_unregister(&ipgre_link_ops);
1710 rtnl_link_failed:
1711         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1712 add_proto_failed:
1713         unregister_pernet_device(&erspan_net_ops);
1714 pnet_erspan_failed:
1715         unregister_pernet_device(&ipgre_tap_net_ops);
1716 pnet_tap_failed:
1717         unregister_pernet_device(&ipgre_net_ops);
1718         return err;
1719 }
1720
1721 static void __exit ipgre_fini(void)
1722 {
1723         rtnl_link_unregister(&ipgre_tap_ops);
1724         rtnl_link_unregister(&ipgre_link_ops);
1725         rtnl_link_unregister(&erspan_link_ops);
1726         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1727         unregister_pernet_device(&ipgre_tap_net_ops);
1728         unregister_pernet_device(&ipgre_net_ops);
1729         unregister_pernet_device(&erspan_net_ops);
1730 }
1731
1732 module_init(ipgre_init);
1733 module_exit(ipgre_fini);
1734 MODULE_LICENSE("GPL");
1735 MODULE_ALIAS_RTNL_LINK("gre");
1736 MODULE_ALIAS_RTNL_LINK("gretap");
1737 MODULE_ALIAS_RTNL_LINK("erspan");
1738 MODULE_ALIAS_NETDEV("gre0");
1739 MODULE_ALIAS_NETDEV("gretap0");
1740 MODULE_ALIAS_NETDEV("erspan0");