Merge branches 'acpi-pm', 'acpi-pci', 'acpi-sysfs' and 'acpi-tables'
[linux-2.6-microblaze.git] / net / ipv4 / ip_tunnel.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52
53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55         return hash_32((__force u32)key ^ (__force u32)remote,
56                          IP_TNL_HASH_BITS);
57 }
58
59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60                                 __be16 flags, __be32 key)
61 {
62         if (p->i_flags & TUNNEL_KEY) {
63                 if (flags & TUNNEL_KEY)
64                         return key == p->i_key;
65                 else
66                         /* key expected, none present */
67                         return false;
68         } else
69                 return !(flags & TUNNEL_KEY);
70 }
71
72 /* Fallback tunnel: no source, no destination, no key, no options
73
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84                                    int link, __be16 flags,
85                                    __be32 remote, __be32 local,
86                                    __be32 key)
87 {
88         struct ip_tunnel *t, *cand = NULL;
89         struct hlist_head *head;
90         struct net_device *ndev;
91         unsigned int hash;
92
93         hash = ip_tunnel_hash(key, remote);
94         head = &itn->tunnels[hash];
95
96         hlist_for_each_entry_rcu(t, head, hash_node) {
97                 if (local != t->parms.iph.saddr ||
98                     remote != t->parms.iph.daddr ||
99                     !(t->dev->flags & IFF_UP))
100                         continue;
101
102                 if (!ip_tunnel_key_match(&t->parms, flags, key))
103                         continue;
104
105                 if (t->parms.link == link)
106                         return t;
107                 else
108                         cand = t;
109         }
110
111         hlist_for_each_entry_rcu(t, head, hash_node) {
112                 if (remote != t->parms.iph.daddr ||
113                     t->parms.iph.saddr != 0 ||
114                     !(t->dev->flags & IFF_UP))
115                         continue;
116
117                 if (!ip_tunnel_key_match(&t->parms, flags, key))
118                         continue;
119
120                 if (t->parms.link == link)
121                         return t;
122                 else if (!cand)
123                         cand = t;
124         }
125
126         hash = ip_tunnel_hash(key, 0);
127         head = &itn->tunnels[hash];
128
129         hlist_for_each_entry_rcu(t, head, hash_node) {
130                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132                         continue;
133
134                 if (!(t->dev->flags & IFF_UP))
135                         continue;
136
137                 if (!ip_tunnel_key_match(&t->parms, flags, key))
138                         continue;
139
140                 if (t->parms.link == link)
141                         return t;
142                 else if (!cand)
143                         cand = t;
144         }
145
146         hlist_for_each_entry_rcu(t, head, hash_node) {
147                 if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148                     t->parms.iph.saddr != 0 ||
149                     t->parms.iph.daddr != 0 ||
150                     !(t->dev->flags & IFF_UP))
151                         continue;
152
153                 if (t->parms.link == link)
154                         return t;
155                 else if (!cand)
156                         cand = t;
157         }
158
159         if (cand)
160                 return cand;
161
162         t = rcu_dereference(itn->collect_md_tun);
163         if (t && t->dev->flags & IFF_UP)
164                 return t;
165
166         ndev = READ_ONCE(itn->fb_tunnel_dev);
167         if (ndev && ndev->flags & IFF_UP)
168                 return netdev_priv(ndev);
169
170         return NULL;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173
174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175                                     struct ip_tunnel_parm *parms)
176 {
177         unsigned int h;
178         __be32 remote;
179         __be32 i_key = parms->i_key;
180
181         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182                 remote = parms->iph.daddr;
183         else
184                 remote = 0;
185
186         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187                 i_key = 0;
188
189         h = ip_tunnel_hash(i_key, remote);
190         return &itn->tunnels[h];
191 }
192
193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194 {
195         struct hlist_head *head = ip_bucket(itn, &t->parms);
196
197         if (t->collect_md)
198                 rcu_assign_pointer(itn->collect_md_tun, t);
199         hlist_add_head_rcu(&t->hash_node, head);
200 }
201
202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203 {
204         if (t->collect_md)
205                 rcu_assign_pointer(itn->collect_md_tun, NULL);
206         hlist_del_init_rcu(&t->hash_node);
207 }
208
209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210                                         struct ip_tunnel_parm *parms,
211                                         int type)
212 {
213         __be32 remote = parms->iph.daddr;
214         __be32 local = parms->iph.saddr;
215         __be32 key = parms->i_key;
216         __be16 flags = parms->i_flags;
217         int link = parms->link;
218         struct ip_tunnel *t = NULL;
219         struct hlist_head *head = ip_bucket(itn, parms);
220
221         hlist_for_each_entry_rcu(t, head, hash_node) {
222                 if (local == t->parms.iph.saddr &&
223                     remote == t->parms.iph.daddr &&
224                     link == t->parms.link &&
225                     type == t->dev->type &&
226                     ip_tunnel_key_match(&t->parms, flags, key))
227                         break;
228         }
229         return t;
230 }
231
232 static struct net_device *__ip_tunnel_create(struct net *net,
233                                              const struct rtnl_link_ops *ops,
234                                              struct ip_tunnel_parm *parms)
235 {
236         int err;
237         struct ip_tunnel *tunnel;
238         struct net_device *dev;
239         char name[IFNAMSIZ];
240
241         err = -E2BIG;
242         if (parms->name[0]) {
243                 if (!dev_valid_name(parms->name))
244                         goto failed;
245                 strlcpy(name, parms->name, IFNAMSIZ);
246         } else {
247                 if (strlen(ops->kind) > (IFNAMSIZ - 3))
248                         goto failed;
249                 strcpy(name, ops->kind);
250                 strcat(name, "%d");
251         }
252
253         ASSERT_RTNL();
254         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255         if (!dev) {
256                 err = -ENOMEM;
257                 goto failed;
258         }
259         dev_net_set(dev, net);
260
261         dev->rtnl_link_ops = ops;
262
263         tunnel = netdev_priv(dev);
264         tunnel->parms = *parms;
265         tunnel->net = net;
266
267         err = register_netdevice(dev);
268         if (err)
269                 goto failed_free;
270
271         return dev;
272
273 failed_free:
274         free_netdev(dev);
275 failed:
276         return ERR_PTR(err);
277 }
278
279 static int ip_tunnel_bind_dev(struct net_device *dev)
280 {
281         struct net_device *tdev = NULL;
282         struct ip_tunnel *tunnel = netdev_priv(dev);
283         const struct iphdr *iph;
284         int hlen = LL_MAX_HEADER;
285         int mtu = ETH_DATA_LEN;
286         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287
288         iph = &tunnel->parms.iph;
289
290         /* Guess output device to choose reasonable mtu and needed_headroom */
291         if (iph->daddr) {
292                 struct flowi4 fl4;
293                 struct rtable *rt;
294
295                 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296                                     iph->saddr, tunnel->parms.o_key,
297                                     RT_TOS(iph->tos), dev_net(dev),
298                                     tunnel->parms.link, tunnel->fwmark, 0);
299                 rt = ip_route_output_key(tunnel->net, &fl4);
300
301                 if (!IS_ERR(rt)) {
302                         tdev = rt->dst.dev;
303                         ip_rt_put(rt);
304                 }
305                 if (dev->type != ARPHRD_ETHER)
306                         dev->flags |= IFF_POINTOPOINT;
307
308                 dst_cache_reset(&tunnel->dst_cache);
309         }
310
311         if (!tdev && tunnel->parms.link)
312                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313
314         if (tdev) {
315                 hlen = tdev->hard_header_len + tdev->needed_headroom;
316                 mtu = min(tdev->mtu, IP_MAX_MTU);
317         }
318
319         dev->needed_headroom = t_hlen + hlen;
320         mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321
322         if (mtu < IPV4_MIN_MTU)
323                 mtu = IPV4_MIN_MTU;
324
325         return mtu;
326 }
327
328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
329                                           struct ip_tunnel_net *itn,
330                                           struct ip_tunnel_parm *parms)
331 {
332         struct ip_tunnel *nt;
333         struct net_device *dev;
334         int t_hlen;
335         int mtu;
336         int err;
337
338         dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339         if (IS_ERR(dev))
340                 return ERR_CAST(dev);
341
342         mtu = ip_tunnel_bind_dev(dev);
343         err = dev_set_mtu(dev, mtu);
344         if (err)
345                 goto err_dev_set_mtu;
346
347         nt = netdev_priv(dev);
348         t_hlen = nt->hlen + sizeof(struct iphdr);
349         dev->min_mtu = ETH_MIN_MTU;
350         dev->max_mtu = IP_MAX_MTU - t_hlen;
351         if (dev->type == ARPHRD_ETHER)
352                 dev->max_mtu -= dev->hard_header_len;
353
354         ip_tunnel_add(itn, nt);
355         return nt;
356
357 err_dev_set_mtu:
358         unregister_netdevice(dev);
359         return ERR_PTR(err);
360 }
361
362 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
363                   const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
364                   bool log_ecn_error)
365 {
366         const struct iphdr *iph = ip_hdr(skb);
367         int err;
368
369 #ifdef CONFIG_NET_IPGRE_BROADCAST
370         if (ipv4_is_multicast(iph->daddr)) {
371                 tunnel->dev->stats.multicast++;
372                 skb->pkt_type = PACKET_BROADCAST;
373         }
374 #endif
375
376         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
377              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
378                 tunnel->dev->stats.rx_crc_errors++;
379                 tunnel->dev->stats.rx_errors++;
380                 goto drop;
381         }
382
383         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
384                 if (!(tpi->flags&TUNNEL_SEQ) ||
385                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
386                         tunnel->dev->stats.rx_fifo_errors++;
387                         tunnel->dev->stats.rx_errors++;
388                         goto drop;
389                 }
390                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
391         }
392
393         skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
394
395         err = IP_ECN_decapsulate(iph, skb);
396         if (unlikely(err)) {
397                 if (log_ecn_error)
398                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
399                                         &iph->saddr, iph->tos);
400                 if (err > 1) {
401                         ++tunnel->dev->stats.rx_frame_errors;
402                         ++tunnel->dev->stats.rx_errors;
403                         goto drop;
404                 }
405         }
406
407         dev_sw_netstats_rx_add(tunnel->dev, skb->len);
408         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
409
410         if (tunnel->dev->type == ARPHRD_ETHER) {
411                 skb->protocol = eth_type_trans(skb, tunnel->dev);
412                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
413         } else {
414                 skb->dev = tunnel->dev;
415         }
416
417         if (tun_dst)
418                 skb_dst_set(skb, (struct dst_entry *)tun_dst);
419
420         gro_cells_receive(&tunnel->gro_cells, skb);
421         return 0;
422
423 drop:
424         if (tun_dst)
425                 dst_release((struct dst_entry *)tun_dst);
426         kfree_skb(skb);
427         return 0;
428 }
429 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
430
431 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
432                             unsigned int num)
433 {
434         if (num >= MAX_IPTUN_ENCAP_OPS)
435                 return -ERANGE;
436
437         return !cmpxchg((const struct ip_tunnel_encap_ops **)
438                         &iptun_encaps[num],
439                         NULL, ops) ? 0 : -1;
440 }
441 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
442
443 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
444                             unsigned int num)
445 {
446         int ret;
447
448         if (num >= MAX_IPTUN_ENCAP_OPS)
449                 return -ERANGE;
450
451         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
452                        &iptun_encaps[num],
453                        ops, NULL) == ops) ? 0 : -1;
454
455         synchronize_net();
456
457         return ret;
458 }
459 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
460
461 int ip_tunnel_encap_setup(struct ip_tunnel *t,
462                           struct ip_tunnel_encap *ipencap)
463 {
464         int hlen;
465
466         memset(&t->encap, 0, sizeof(t->encap));
467
468         hlen = ip_encap_hlen(ipencap);
469         if (hlen < 0)
470                 return hlen;
471
472         t->encap.type = ipencap->type;
473         t->encap.sport = ipencap->sport;
474         t->encap.dport = ipencap->dport;
475         t->encap.flags = ipencap->flags;
476
477         t->encap_hlen = hlen;
478         t->hlen = t->encap_hlen + t->tun_hlen;
479
480         return 0;
481 }
482 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
483
484 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
485                             struct rtable *rt, __be16 df,
486                             const struct iphdr *inner_iph,
487                             int tunnel_hlen, __be32 dst, bool md)
488 {
489         struct ip_tunnel *tunnel = netdev_priv(dev);
490         int pkt_size;
491         int mtu;
492
493         tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
494         pkt_size = skb->len - tunnel_hlen;
495         pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
496
497         if (df) {
498                 mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
499                 mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
500         } else {
501                 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
502         }
503
504         if (skb_valid_dst(skb))
505                 skb_dst_update_pmtu_no_confirm(skb, mtu);
506
507         if (skb->protocol == htons(ETH_P_IP)) {
508                 if (!skb_is_gso(skb) &&
509                     (inner_iph->frag_off & htons(IP_DF)) &&
510                     mtu < pkt_size) {
511                         icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
512                         return -E2BIG;
513                 }
514         }
515 #if IS_ENABLED(CONFIG_IPV6)
516         else if (skb->protocol == htons(ETH_P_IPV6)) {
517                 struct rt6_info *rt6;
518                 __be32 daddr;
519
520                 rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
521                                            NULL;
522                 daddr = md ? dst : tunnel->parms.iph.daddr;
523
524                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
525                            mtu >= IPV6_MIN_MTU) {
526                         if ((daddr && !ipv4_is_multicast(daddr)) ||
527                             rt6->rt6i_dst.plen == 128) {
528                                 rt6->rt6i_flags |= RTF_MODIFIED;
529                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
530                         }
531                 }
532
533                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
534                                         mtu < pkt_size) {
535                         icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
536                         return -E2BIG;
537                 }
538         }
539 #endif
540         return 0;
541 }
542
543 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
544                        u8 proto, int tunnel_hlen)
545 {
546         struct ip_tunnel *tunnel = netdev_priv(dev);
547         u32 headroom = sizeof(struct iphdr);
548         struct ip_tunnel_info *tun_info;
549         const struct ip_tunnel_key *key;
550         const struct iphdr *inner_iph;
551         struct rtable *rt = NULL;
552         struct flowi4 fl4;
553         __be16 df = 0;
554         u8 tos, ttl;
555         bool use_cache;
556
557         tun_info = skb_tunnel_info(skb);
558         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
559                      ip_tunnel_info_af(tun_info) != AF_INET))
560                 goto tx_error;
561         key = &tun_info->key;
562         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
563         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
564         tos = key->tos;
565         if (tos == 1) {
566                 if (skb->protocol == htons(ETH_P_IP))
567                         tos = inner_iph->tos;
568                 else if (skb->protocol == htons(ETH_P_IPV6))
569                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
570         }
571         ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
572                             tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
573                             dev_net(dev), 0, skb->mark, skb_get_hash(skb));
574         if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
575                 goto tx_error;
576
577         use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
578         if (use_cache)
579                 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
580         if (!rt) {
581                 rt = ip_route_output_key(tunnel->net, &fl4);
582                 if (IS_ERR(rt)) {
583                         dev->stats.tx_carrier_errors++;
584                         goto tx_error;
585                 }
586                 if (use_cache)
587                         dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
588                                           fl4.saddr);
589         }
590         if (rt->dst.dev == dev) {
591                 ip_rt_put(rt);
592                 dev->stats.collisions++;
593                 goto tx_error;
594         }
595
596         if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
597                 df = htons(IP_DF);
598         if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
599                             key->u.ipv4.dst, true)) {
600                 ip_rt_put(rt);
601                 goto tx_error;
602         }
603
604         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
605         ttl = key->ttl;
606         if (ttl == 0) {
607                 if (skb->protocol == htons(ETH_P_IP))
608                         ttl = inner_iph->ttl;
609                 else if (skb->protocol == htons(ETH_P_IPV6))
610                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
611                 else
612                         ttl = ip4_dst_hoplimit(&rt->dst);
613         }
614
615         headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
616         if (headroom > dev->needed_headroom)
617                 dev->needed_headroom = headroom;
618
619         if (skb_cow_head(skb, dev->needed_headroom)) {
620                 ip_rt_put(rt);
621                 goto tx_dropped;
622         }
623         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
624                       df, !net_eq(tunnel->net, dev_net(dev)));
625         return;
626 tx_error:
627         dev->stats.tx_errors++;
628         goto kfree;
629 tx_dropped:
630         dev->stats.tx_dropped++;
631 kfree:
632         kfree_skb(skb);
633 }
634 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
635
636 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
637                     const struct iphdr *tnl_params, u8 protocol)
638 {
639         struct ip_tunnel *tunnel = netdev_priv(dev);
640         struct ip_tunnel_info *tun_info = NULL;
641         const struct iphdr *inner_iph;
642         unsigned int max_headroom;      /* The extra header space needed */
643         struct rtable *rt = NULL;               /* Route to the other host */
644         bool use_cache = false;
645         struct flowi4 fl4;
646         bool md = false;
647         bool connected;
648         u8 tos, ttl;
649         __be32 dst;
650         __be16 df;
651
652         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
653         connected = (tunnel->parms.iph.daddr != 0);
654
655         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
656
657         dst = tnl_params->daddr;
658         if (dst == 0) {
659                 /* NBMA tunnel */
660
661                 if (!skb_dst(skb)) {
662                         dev->stats.tx_fifo_errors++;
663                         goto tx_error;
664                 }
665
666                 tun_info = skb_tunnel_info(skb);
667                 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
668                     ip_tunnel_info_af(tun_info) == AF_INET &&
669                     tun_info->key.u.ipv4.dst) {
670                         dst = tun_info->key.u.ipv4.dst;
671                         md = true;
672                         connected = true;
673                 }
674                 else if (skb->protocol == htons(ETH_P_IP)) {
675                         rt = skb_rtable(skb);
676                         dst = rt_nexthop(rt, inner_iph->daddr);
677                 }
678 #if IS_ENABLED(CONFIG_IPV6)
679                 else if (skb->protocol == htons(ETH_P_IPV6)) {
680                         const struct in6_addr *addr6;
681                         struct neighbour *neigh;
682                         bool do_tx_error_icmp;
683                         int addr_type;
684
685                         neigh = dst_neigh_lookup(skb_dst(skb),
686                                                  &ipv6_hdr(skb)->daddr);
687                         if (!neigh)
688                                 goto tx_error;
689
690                         addr6 = (const struct in6_addr *)&neigh->primary_key;
691                         addr_type = ipv6_addr_type(addr6);
692
693                         if (addr_type == IPV6_ADDR_ANY) {
694                                 addr6 = &ipv6_hdr(skb)->daddr;
695                                 addr_type = ipv6_addr_type(addr6);
696                         }
697
698                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
699                                 do_tx_error_icmp = true;
700                         else {
701                                 do_tx_error_icmp = false;
702                                 dst = addr6->s6_addr32[3];
703                         }
704                         neigh_release(neigh);
705                         if (do_tx_error_icmp)
706                                 goto tx_error_icmp;
707                 }
708 #endif
709                 else
710                         goto tx_error;
711
712                 if (!md)
713                         connected = false;
714         }
715
716         tos = tnl_params->tos;
717         if (tos & 0x1) {
718                 tos &= ~0x1;
719                 if (skb->protocol == htons(ETH_P_IP)) {
720                         tos = inner_iph->tos;
721                         connected = false;
722                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
723                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
724                         connected = false;
725                 }
726         }
727
728         ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
729                             tunnel->parms.o_key, RT_TOS(tos),
730                             dev_net(dev), tunnel->parms.link,
731                             tunnel->fwmark, skb_get_hash(skb));
732
733         if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
734                 goto tx_error;
735
736         if (connected && md) {
737                 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
738                 if (use_cache)
739                         rt = dst_cache_get_ip4(&tun_info->dst_cache,
740                                                &fl4.saddr);
741         } else {
742                 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
743                                                 &fl4.saddr) : NULL;
744         }
745
746         if (!rt) {
747                 rt = ip_route_output_key(tunnel->net, &fl4);
748
749                 if (IS_ERR(rt)) {
750                         dev->stats.tx_carrier_errors++;
751                         goto tx_error;
752                 }
753                 if (use_cache)
754                         dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
755                                           fl4.saddr);
756                 else if (!md && connected)
757                         dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
758                                           fl4.saddr);
759         }
760
761         if (rt->dst.dev == dev) {
762                 ip_rt_put(rt);
763                 dev->stats.collisions++;
764                 goto tx_error;
765         }
766
767         df = tnl_params->frag_off;
768         if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
769                 df |= (inner_iph->frag_off & htons(IP_DF));
770
771         if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
772                 ip_rt_put(rt);
773                 goto tx_error;
774         }
775
776         if (tunnel->err_count > 0) {
777                 if (time_before(jiffies,
778                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
779                         tunnel->err_count--;
780
781                         dst_link_failure(skb);
782                 } else
783                         tunnel->err_count = 0;
784         }
785
786         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
787         ttl = tnl_params->ttl;
788         if (ttl == 0) {
789                 if (skb->protocol == htons(ETH_P_IP))
790                         ttl = inner_iph->ttl;
791 #if IS_ENABLED(CONFIG_IPV6)
792                 else if (skb->protocol == htons(ETH_P_IPV6))
793                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
794 #endif
795                 else
796                         ttl = ip4_dst_hoplimit(&rt->dst);
797         }
798
799         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
800                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
801         if (max_headroom > dev->needed_headroom)
802                 dev->needed_headroom = max_headroom;
803
804         if (skb_cow_head(skb, dev->needed_headroom)) {
805                 ip_rt_put(rt);
806                 dev->stats.tx_dropped++;
807                 kfree_skb(skb);
808                 return;
809         }
810
811         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
812                       df, !net_eq(tunnel->net, dev_net(dev)));
813         return;
814
815 #if IS_ENABLED(CONFIG_IPV6)
816 tx_error_icmp:
817         dst_link_failure(skb);
818 #endif
819 tx_error:
820         dev->stats.tx_errors++;
821         kfree_skb(skb);
822 }
823 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
824
825 static void ip_tunnel_update(struct ip_tunnel_net *itn,
826                              struct ip_tunnel *t,
827                              struct net_device *dev,
828                              struct ip_tunnel_parm *p,
829                              bool set_mtu,
830                              __u32 fwmark)
831 {
832         ip_tunnel_del(itn, t);
833         t->parms.iph.saddr = p->iph.saddr;
834         t->parms.iph.daddr = p->iph.daddr;
835         t->parms.i_key = p->i_key;
836         t->parms.o_key = p->o_key;
837         if (dev->type != ARPHRD_ETHER) {
838                 __dev_addr_set(dev, &p->iph.saddr, 4);
839                 memcpy(dev->broadcast, &p->iph.daddr, 4);
840         }
841         ip_tunnel_add(itn, t);
842
843         t->parms.iph.ttl = p->iph.ttl;
844         t->parms.iph.tos = p->iph.tos;
845         t->parms.iph.frag_off = p->iph.frag_off;
846
847         if (t->parms.link != p->link || t->fwmark != fwmark) {
848                 int mtu;
849
850                 t->parms.link = p->link;
851                 t->fwmark = fwmark;
852                 mtu = ip_tunnel_bind_dev(dev);
853                 if (set_mtu)
854                         dev->mtu = mtu;
855         }
856         dst_cache_reset(&t->dst_cache);
857         netdev_state_change(dev);
858 }
859
860 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
861 {
862         int err = 0;
863         struct ip_tunnel *t = netdev_priv(dev);
864         struct net *net = t->net;
865         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
866
867         switch (cmd) {
868         case SIOCGETTUNNEL:
869                 if (dev == itn->fb_tunnel_dev) {
870                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
871                         if (!t)
872                                 t = netdev_priv(dev);
873                 }
874                 memcpy(p, &t->parms, sizeof(*p));
875                 break;
876
877         case SIOCADDTUNNEL:
878         case SIOCCHGTUNNEL:
879                 err = -EPERM;
880                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
881                         goto done;
882                 if (p->iph.ttl)
883                         p->iph.frag_off |= htons(IP_DF);
884                 if (!(p->i_flags & VTI_ISVTI)) {
885                         if (!(p->i_flags & TUNNEL_KEY))
886                                 p->i_key = 0;
887                         if (!(p->o_flags & TUNNEL_KEY))
888                                 p->o_key = 0;
889                 }
890
891                 t = ip_tunnel_find(itn, p, itn->type);
892
893                 if (cmd == SIOCADDTUNNEL) {
894                         if (!t) {
895                                 t = ip_tunnel_create(net, itn, p);
896                                 err = PTR_ERR_OR_ZERO(t);
897                                 break;
898                         }
899
900                         err = -EEXIST;
901                         break;
902                 }
903                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
904                         if (t) {
905                                 if (t->dev != dev) {
906                                         err = -EEXIST;
907                                         break;
908                                 }
909                         } else {
910                                 unsigned int nflags = 0;
911
912                                 if (ipv4_is_multicast(p->iph.daddr))
913                                         nflags = IFF_BROADCAST;
914                                 else if (p->iph.daddr)
915                                         nflags = IFF_POINTOPOINT;
916
917                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
918                                         err = -EINVAL;
919                                         break;
920                                 }
921
922                                 t = netdev_priv(dev);
923                         }
924                 }
925
926                 if (t) {
927                         err = 0;
928                         ip_tunnel_update(itn, t, dev, p, true, 0);
929                 } else {
930                         err = -ENOENT;
931                 }
932                 break;
933
934         case SIOCDELTUNNEL:
935                 err = -EPERM;
936                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
937                         goto done;
938
939                 if (dev == itn->fb_tunnel_dev) {
940                         err = -ENOENT;
941                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
942                         if (!t)
943                                 goto done;
944                         err = -EPERM;
945                         if (t == netdev_priv(itn->fb_tunnel_dev))
946                                 goto done;
947                         dev = t->dev;
948                 }
949                 unregister_netdevice(dev);
950                 err = 0;
951                 break;
952
953         default:
954                 err = -EINVAL;
955         }
956
957 done:
958         return err;
959 }
960 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
961
962 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
963                              void __user *data, int cmd)
964 {
965         struct ip_tunnel_parm p;
966         int err;
967
968         if (copy_from_user(&p, data, sizeof(p)))
969                 return -EFAULT;
970         err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
971         if (!err && copy_to_user(data, &p, sizeof(p)))
972                 return -EFAULT;
973         return err;
974 }
975 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
976
977 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
978 {
979         struct ip_tunnel *tunnel = netdev_priv(dev);
980         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
981         int max_mtu = IP_MAX_MTU - t_hlen;
982
983         if (dev->type == ARPHRD_ETHER)
984                 max_mtu -= dev->hard_header_len;
985
986         if (new_mtu < ETH_MIN_MTU)
987                 return -EINVAL;
988
989         if (new_mtu > max_mtu) {
990                 if (strict)
991                         return -EINVAL;
992
993                 new_mtu = max_mtu;
994         }
995
996         dev->mtu = new_mtu;
997         return 0;
998 }
999 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1000
1001 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1002 {
1003         return __ip_tunnel_change_mtu(dev, new_mtu, true);
1004 }
1005 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1006
1007 static void ip_tunnel_dev_free(struct net_device *dev)
1008 {
1009         struct ip_tunnel *tunnel = netdev_priv(dev);
1010
1011         gro_cells_destroy(&tunnel->gro_cells);
1012         dst_cache_destroy(&tunnel->dst_cache);
1013         free_percpu(dev->tstats);
1014 }
1015
1016 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1017 {
1018         struct ip_tunnel *tunnel = netdev_priv(dev);
1019         struct ip_tunnel_net *itn;
1020
1021         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1022
1023         if (itn->fb_tunnel_dev != dev) {
1024                 ip_tunnel_del(itn, netdev_priv(dev));
1025                 unregister_netdevice_queue(dev, head);
1026         }
1027 }
1028 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1029
1030 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1031 {
1032         struct ip_tunnel *tunnel = netdev_priv(dev);
1033
1034         return tunnel->net;
1035 }
1036 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1037
1038 int ip_tunnel_get_iflink(const struct net_device *dev)
1039 {
1040         struct ip_tunnel *tunnel = netdev_priv(dev);
1041
1042         return tunnel->parms.link;
1043 }
1044 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1045
1046 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1047                                   struct rtnl_link_ops *ops, char *devname)
1048 {
1049         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1050         struct ip_tunnel_parm parms;
1051         unsigned int i;
1052
1053         itn->rtnl_link_ops = ops;
1054         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1055                 INIT_HLIST_HEAD(&itn->tunnels[i]);
1056
1057         if (!ops || !net_has_fallback_tunnels(net)) {
1058                 struct ip_tunnel_net *it_init_net;
1059
1060                 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1061                 itn->type = it_init_net->type;
1062                 itn->fb_tunnel_dev = NULL;
1063                 return 0;
1064         }
1065
1066         memset(&parms, 0, sizeof(parms));
1067         if (devname)
1068                 strlcpy(parms.name, devname, IFNAMSIZ);
1069
1070         rtnl_lock();
1071         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1072         /* FB netdevice is special: we have one, and only one per netns.
1073          * Allowing to move it to another netns is clearly unsafe.
1074          */
1075         if (!IS_ERR(itn->fb_tunnel_dev)) {
1076                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1077                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1078                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1079                 itn->type = itn->fb_tunnel_dev->type;
1080         }
1081         rtnl_unlock();
1082
1083         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1084 }
1085 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1086
1087 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1088                               struct list_head *head,
1089                               struct rtnl_link_ops *ops)
1090 {
1091         struct net_device *dev, *aux;
1092         int h;
1093
1094         for_each_netdev_safe(net, dev, aux)
1095                 if (dev->rtnl_link_ops == ops)
1096                         unregister_netdevice_queue(dev, head);
1097
1098         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1099                 struct ip_tunnel *t;
1100                 struct hlist_node *n;
1101                 struct hlist_head *thead = &itn->tunnels[h];
1102
1103                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1104                         /* If dev is in the same netns, it has already
1105                          * been added to the list by the previous loop.
1106                          */
1107                         if (!net_eq(dev_net(t->dev), net))
1108                                 unregister_netdevice_queue(t->dev, head);
1109         }
1110 }
1111
1112 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1113                            struct rtnl_link_ops *ops)
1114 {
1115         struct ip_tunnel_net *itn;
1116         struct net *net;
1117         LIST_HEAD(list);
1118
1119         rtnl_lock();
1120         list_for_each_entry(net, net_list, exit_list) {
1121                 itn = net_generic(net, id);
1122                 ip_tunnel_destroy(net, itn, &list, ops);
1123         }
1124         unregister_netdevice_many(&list);
1125         rtnl_unlock();
1126 }
1127 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1128
1129 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1130                       struct ip_tunnel_parm *p, __u32 fwmark)
1131 {
1132         struct ip_tunnel *nt;
1133         struct net *net = dev_net(dev);
1134         struct ip_tunnel_net *itn;
1135         int mtu;
1136         int err;
1137
1138         nt = netdev_priv(dev);
1139         itn = net_generic(net, nt->ip_tnl_net_id);
1140
1141         if (nt->collect_md) {
1142                 if (rtnl_dereference(itn->collect_md_tun))
1143                         return -EEXIST;
1144         } else {
1145                 if (ip_tunnel_find(itn, p, dev->type))
1146                         return -EEXIST;
1147         }
1148
1149         nt->net = net;
1150         nt->parms = *p;
1151         nt->fwmark = fwmark;
1152         err = register_netdevice(dev);
1153         if (err)
1154                 goto err_register_netdevice;
1155
1156         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1157                 eth_hw_addr_random(dev);
1158
1159         mtu = ip_tunnel_bind_dev(dev);
1160         if (tb[IFLA_MTU]) {
1161                 unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1162
1163                 if (dev->type == ARPHRD_ETHER)
1164                         max -= dev->hard_header_len;
1165
1166                 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1167         }
1168
1169         err = dev_set_mtu(dev, mtu);
1170         if (err)
1171                 goto err_dev_set_mtu;
1172
1173         ip_tunnel_add(itn, nt);
1174         return 0;
1175
1176 err_dev_set_mtu:
1177         unregister_netdevice(dev);
1178 err_register_netdevice:
1179         return err;
1180 }
1181 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1182
1183 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1184                          struct ip_tunnel_parm *p, __u32 fwmark)
1185 {
1186         struct ip_tunnel *t;
1187         struct ip_tunnel *tunnel = netdev_priv(dev);
1188         struct net *net = tunnel->net;
1189         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1190
1191         if (dev == itn->fb_tunnel_dev)
1192                 return -EINVAL;
1193
1194         t = ip_tunnel_find(itn, p, dev->type);
1195
1196         if (t) {
1197                 if (t->dev != dev)
1198                         return -EEXIST;
1199         } else {
1200                 t = tunnel;
1201
1202                 if (dev->type != ARPHRD_ETHER) {
1203                         unsigned int nflags = 0;
1204
1205                         if (ipv4_is_multicast(p->iph.daddr))
1206                                 nflags = IFF_BROADCAST;
1207                         else if (p->iph.daddr)
1208                                 nflags = IFF_POINTOPOINT;
1209
1210                         if ((dev->flags ^ nflags) &
1211                             (IFF_POINTOPOINT | IFF_BROADCAST))
1212                                 return -EINVAL;
1213                 }
1214         }
1215
1216         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1217         return 0;
1218 }
1219 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1220
1221 int ip_tunnel_init(struct net_device *dev)
1222 {
1223         struct ip_tunnel *tunnel = netdev_priv(dev);
1224         struct iphdr *iph = &tunnel->parms.iph;
1225         int err;
1226
1227         dev->needs_free_netdev = true;
1228         dev->priv_destructor = ip_tunnel_dev_free;
1229         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1230         if (!dev->tstats)
1231                 return -ENOMEM;
1232
1233         err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1234         if (err) {
1235                 free_percpu(dev->tstats);
1236                 return err;
1237         }
1238
1239         err = gro_cells_init(&tunnel->gro_cells, dev);
1240         if (err) {
1241                 dst_cache_destroy(&tunnel->dst_cache);
1242                 free_percpu(dev->tstats);
1243                 return err;
1244         }
1245
1246         tunnel->dev = dev;
1247         tunnel->net = dev_net(dev);
1248         strcpy(tunnel->parms.name, dev->name);
1249         iph->version            = 4;
1250         iph->ihl                = 5;
1251
1252         if (tunnel->collect_md)
1253                 netif_keep_dst(dev);
1254         return 0;
1255 }
1256 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1257
1258 void ip_tunnel_uninit(struct net_device *dev)
1259 {
1260         struct ip_tunnel *tunnel = netdev_priv(dev);
1261         struct net *net = tunnel->net;
1262         struct ip_tunnel_net *itn;
1263
1264         itn = net_generic(net, tunnel->ip_tnl_net_id);
1265         ip_tunnel_del(itn, netdev_priv(dev));
1266         if (itn->fb_tunnel_dev == dev)
1267                 WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1268
1269         dst_cache_reset(&tunnel->dst_cache);
1270 }
1271 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1272
1273 /* Do least required initialization, rest of init is done in tunnel_init call */
1274 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1275 {
1276         struct ip_tunnel *tunnel = netdev_priv(dev);
1277         tunnel->ip_tnl_net_id = net_id;
1278 }
1279 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1280
1281 MODULE_LICENSE("GPL");