Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[linux-2.6-microblaze.git] / net / ipv6 / af_inet6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      PF_INET6 socket protocol family
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Adapted from linux/net/ipv4/af_inet.c
10  *
11  *      Fixes:
12  *      piggy, Karl Knutson     :       Socket protocol table
13  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
14  *      Arnaldo Melo            :       check proc_net_create return, cleanups
15  */
16
17 #define pr_fmt(fmt) "IPv6: " fmt
18
19 #include <linux/module.h>
20 #include <linux/capability.h>
21 #include <linux/errno.h>
22 #include <linux/types.h>
23 #include <linux/socket.h>
24 #include <linux/in.h>
25 #include <linux/kernel.h>
26 #include <linux/timer.h>
27 #include <linux/string.h>
28 #include <linux/sockios.h>
29 #include <linux/net.h>
30 #include <linux/fcntl.h>
31 #include <linux/mm.h>
32 #include <linux/interrupt.h>
33 #include <linux/proc_fs.h>
34 #include <linux/stat.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37
38 #include <linux/inet.h>
39 #include <linux/netdevice.h>
40 #include <linux/icmpv6.h>
41 #include <linux/netfilter_ipv6.h>
42
43 #include <net/ip.h>
44 #include <net/ipv6.h>
45 #include <net/udp.h>
46 #include <net/udplite.h>
47 #include <net/tcp.h>
48 #include <net/ping.h>
49 #include <net/protocol.h>
50 #include <net/inet_common.h>
51 #include <net/route.h>
52 #include <net/transp_v6.h>
53 #include <net/ip6_route.h>
54 #include <net/addrconf.h>
55 #include <net/ipv6_stubs.h>
56 #include <net/ndisc.h>
57 #ifdef CONFIG_IPV6_TUNNEL
58 #include <net/ip6_tunnel.h>
59 #endif
60 #include <net/calipso.h>
61 #include <net/seg6.h>
62 #include <net/rpl.h>
63 #include <net/compat.h>
64 #include <net/xfrm.h>
65 #include <net/ioam6.h>
66
67 #include <linux/uaccess.h>
68 #include <linux/mroute6.h>
69
70 #include "ip6_offload.h"
71
72 MODULE_AUTHOR("Cast of dozens");
73 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
74 MODULE_LICENSE("GPL");
75
76 /* The inetsw6 table contains everything that inet6_create needs to
77  * build a new socket.
78  */
79 static struct list_head inetsw6[SOCK_MAX];
80 static DEFINE_SPINLOCK(inetsw6_lock);
81
82 struct ipv6_params ipv6_defaults = {
83         .disable_ipv6 = 0,
84         .autoconf = 1,
85 };
86
87 static int disable_ipv6_mod;
88
89 module_param_named(disable, disable_ipv6_mod, int, 0444);
90 MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional");
91
92 module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444);
93 MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
94
95 module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444);
96 MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
97
98 bool ipv6_mod_enabled(void)
99 {
100         return disable_ipv6_mod == 0;
101 }
102 EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
103
104 static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
105 {
106         const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
107
108         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
109 }
110
111 static int inet6_create(struct net *net, struct socket *sock, int protocol,
112                         int kern)
113 {
114         struct inet_sock *inet;
115         struct ipv6_pinfo *np;
116         struct sock *sk;
117         struct inet_protosw *answer;
118         struct proto *answer_prot;
119         unsigned char answer_flags;
120         int try_loading_module = 0;
121         int err;
122
123         if (protocol < 0 || protocol >= IPPROTO_MAX)
124                 return -EINVAL;
125
126         /* Look for the requested type/protocol pair. */
127 lookup_protocol:
128         err = -ESOCKTNOSUPPORT;
129         rcu_read_lock();
130         list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
131
132                 err = 0;
133                 /* Check the non-wild match. */
134                 if (protocol == answer->protocol) {
135                         if (protocol != IPPROTO_IP)
136                                 break;
137                 } else {
138                         /* Check for the two wild cases. */
139                         if (IPPROTO_IP == protocol) {
140                                 protocol = answer->protocol;
141                                 break;
142                         }
143                         if (IPPROTO_IP == answer->protocol)
144                                 break;
145                 }
146                 err = -EPROTONOSUPPORT;
147         }
148
149         if (err) {
150                 if (try_loading_module < 2) {
151                         rcu_read_unlock();
152                         /*
153                          * Be more specific, e.g. net-pf-10-proto-132-type-1
154                          * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM)
155                          */
156                         if (++try_loading_module == 1)
157                                 request_module("net-pf-%d-proto-%d-type-%d",
158                                                 PF_INET6, protocol, sock->type);
159                         /*
160                          * Fall back to generic, e.g. net-pf-10-proto-132
161                          * (net-pf-PF_INET6-proto-IPPROTO_SCTP)
162                          */
163                         else
164                                 request_module("net-pf-%d-proto-%d",
165                                                 PF_INET6, protocol);
166                         goto lookup_protocol;
167                 } else
168                         goto out_rcu_unlock;
169         }
170
171         err = -EPERM;
172         if (sock->type == SOCK_RAW && !kern &&
173             !ns_capable(net->user_ns, CAP_NET_RAW))
174                 goto out_rcu_unlock;
175
176         sock->ops = answer->ops;
177         answer_prot = answer->prot;
178         answer_flags = answer->flags;
179         rcu_read_unlock();
180
181         WARN_ON(!answer_prot->slab);
182
183         err = -ENOBUFS;
184         sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern);
185         if (!sk)
186                 goto out;
187
188         sock_init_data(sock, sk);
189
190         err = 0;
191         if (INET_PROTOSW_REUSE & answer_flags)
192                 sk->sk_reuse = SK_CAN_REUSE;
193
194         inet = inet_sk(sk);
195         inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
196
197         if (SOCK_RAW == sock->type) {
198                 inet->inet_num = protocol;
199                 if (IPPROTO_RAW == protocol)
200                         inet->hdrincl = 1;
201         }
202
203         sk->sk_destruct         = inet_sock_destruct;
204         sk->sk_family           = PF_INET6;
205         sk->sk_protocol         = protocol;
206
207         sk->sk_backlog_rcv      = answer->prot->backlog_rcv;
208
209         inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
210         np->hop_limit   = -1;
211         np->mcast_hops  = IPV6_DEFAULT_MCASTHOPS;
212         np->mc_loop     = 1;
213         np->mc_all      = 1;
214         np->pmtudisc    = IPV6_PMTUDISC_WANT;
215         np->repflow     = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
216         sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
217
218         /* Init the ipv4 part of the socket since we can have sockets
219          * using v6 API for ipv4.
220          */
221         inet->uc_ttl    = -1;
222
223         inet->mc_loop   = 1;
224         inet->mc_ttl    = 1;
225         inet->mc_index  = 0;
226         RCU_INIT_POINTER(inet->mc_list, NULL);
227         inet->rcv_tos   = 0;
228
229         if (net->ipv4.sysctl_ip_no_pmtu_disc)
230                 inet->pmtudisc = IP_PMTUDISC_DONT;
231         else
232                 inet->pmtudisc = IP_PMTUDISC_WANT;
233         /*
234          * Increment only the relevant sk_prot->socks debug field, this changes
235          * the previous behaviour of incrementing both the equivalent to
236          * answer->prot->socks (inet6_sock_nr) and inet_sock_nr.
237          *
238          * This allows better debug granularity as we'll know exactly how many
239          * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6
240          * transport protocol socks. -acme
241          */
242         sk_refcnt_debug_inc(sk);
243
244         if (inet->inet_num) {
245                 /* It assumes that any protocol which allows
246                  * the user to assign a number at socket
247                  * creation time automatically shares.
248                  */
249                 inet->inet_sport = htons(inet->inet_num);
250                 err = sk->sk_prot->hash(sk);
251                 if (err) {
252                         sk_common_release(sk);
253                         goto out;
254                 }
255         }
256         if (sk->sk_prot->init) {
257                 err = sk->sk_prot->init(sk);
258                 if (err) {
259                         sk_common_release(sk);
260                         goto out;
261                 }
262         }
263
264         if (!kern) {
265                 err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
266                 if (err) {
267                         sk_common_release(sk);
268                         goto out;
269                 }
270         }
271 out:
272         return err;
273 out_rcu_unlock:
274         rcu_read_unlock();
275         goto out;
276 }
277
278 static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
279                         u32 flags)
280 {
281         struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
282         struct inet_sock *inet = inet_sk(sk);
283         struct ipv6_pinfo *np = inet6_sk(sk);
284         struct net *net = sock_net(sk);
285         __be32 v4addr = 0;
286         unsigned short snum;
287         bool saved_ipv6only;
288         int addr_type = 0;
289         int err = 0;
290
291         if (addr->sin6_family != AF_INET6)
292                 return -EAFNOSUPPORT;
293
294         addr_type = ipv6_addr_type(&addr->sin6_addr);
295         if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
296                 return -EINVAL;
297
298         snum = ntohs(addr->sin6_port);
299         if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
300             snum && inet_port_requires_bind_service(net, snum) &&
301             !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
302                 return -EACCES;
303
304         if (flags & BIND_WITH_LOCK)
305                 lock_sock(sk);
306
307         /* Check these errors (active socket, double bind). */
308         if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
309                 err = -EINVAL;
310                 goto out;
311         }
312
313         /* Check if the address belongs to the host. */
314         if (addr_type == IPV6_ADDR_MAPPED) {
315                 struct net_device *dev = NULL;
316                 int chk_addr_ret;
317
318                 /* Binding to v4-mapped address on a v6-only socket
319                  * makes no sense
320                  */
321                 if (sk->sk_ipv6only) {
322                         err = -EINVAL;
323                         goto out;
324                 }
325
326                 rcu_read_lock();
327                 if (sk->sk_bound_dev_if) {
328                         dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
329                         if (!dev) {
330                                 err = -ENODEV;
331                                 goto out_unlock;
332                         }
333                 }
334
335                 /* Reproduce AF_INET checks to make the bindings consistent */
336                 v4addr = addr->sin6_addr.s6_addr32[3];
337                 chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr);
338                 rcu_read_unlock();
339
340                 if (!inet_can_nonlocal_bind(net, inet) &&
341                     v4addr != htonl(INADDR_ANY) &&
342                     chk_addr_ret != RTN_LOCAL &&
343                     chk_addr_ret != RTN_MULTICAST &&
344                     chk_addr_ret != RTN_BROADCAST) {
345                         err = -EADDRNOTAVAIL;
346                         goto out;
347                 }
348         } else {
349                 if (addr_type != IPV6_ADDR_ANY) {
350                         struct net_device *dev = NULL;
351
352                         rcu_read_lock();
353                         if (__ipv6_addr_needs_scope_id(addr_type)) {
354                                 if (addr_len >= sizeof(struct sockaddr_in6) &&
355                                     addr->sin6_scope_id) {
356                                         /* Override any existing binding, if another one
357                                          * is supplied by user.
358                                          */
359                                         sk->sk_bound_dev_if = addr->sin6_scope_id;
360                                 }
361
362                                 /* Binding to link-local address requires an interface */
363                                 if (!sk->sk_bound_dev_if) {
364                                         err = -EINVAL;
365                                         goto out_unlock;
366                                 }
367                         }
368
369                         if (sk->sk_bound_dev_if) {
370                                 dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
371                                 if (!dev) {
372                                         err = -ENODEV;
373                                         goto out_unlock;
374                                 }
375                         }
376
377                         /* ipv4 addr of the socket is invalid.  Only the
378                          * unspecified and mapped address have a v4 equivalent.
379                          */
380                         v4addr = LOOPBACK4_IPV6;
381                         if (!(addr_type & IPV6_ADDR_MULTICAST)) {
382                                 if (!ipv6_can_nonlocal_bind(net, inet) &&
383                                     !ipv6_chk_addr(net, &addr->sin6_addr,
384                                                    dev, 0)) {
385                                         err = -EADDRNOTAVAIL;
386                                         goto out_unlock;
387                                 }
388                         }
389                         rcu_read_unlock();
390                 }
391         }
392
393         inet->inet_rcv_saddr = v4addr;
394         inet->inet_saddr = v4addr;
395
396         sk->sk_v6_rcv_saddr = addr->sin6_addr;
397
398         if (!(addr_type & IPV6_ADDR_MULTICAST))
399                 np->saddr = addr->sin6_addr;
400
401         saved_ipv6only = sk->sk_ipv6only;
402         if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
403                 sk->sk_ipv6only = 1;
404
405         /* Make sure we are allowed to bind here. */
406         if (snum || !(inet->bind_address_no_port ||
407                       (flags & BIND_FORCE_ADDRESS_NO_PORT))) {
408                 if (sk->sk_prot->get_port(sk, snum)) {
409                         sk->sk_ipv6only = saved_ipv6only;
410                         inet_reset_saddr(sk);
411                         err = -EADDRINUSE;
412                         goto out;
413                 }
414                 if (!(flags & BIND_FROM_BPF)) {
415                         err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
416                         if (err) {
417                                 sk->sk_ipv6only = saved_ipv6only;
418                                 inet_reset_saddr(sk);
419                                 goto out;
420                         }
421                 }
422         }
423
424         if (addr_type != IPV6_ADDR_ANY)
425                 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
426         if (snum)
427                 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
428         inet->inet_sport = htons(inet->inet_num);
429         inet->inet_dport = 0;
430         inet->inet_daddr = 0;
431 out:
432         if (flags & BIND_WITH_LOCK)
433                 release_sock(sk);
434         return err;
435 out_unlock:
436         rcu_read_unlock();
437         goto out;
438 }
439
440 /* bind for INET6 API */
441 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
442 {
443         struct sock *sk = sock->sk;
444         u32 flags = BIND_WITH_LOCK;
445         int err = 0;
446
447         /* If the socket has its own bind function then use it. */
448         if (sk->sk_prot->bind)
449                 return sk->sk_prot->bind(sk, uaddr, addr_len);
450
451         if (addr_len < SIN6_LEN_RFC2133)
452                 return -EINVAL;
453
454         /* BPF prog is run before any checks are done so that if the prog
455          * changes context in a wrong way it will be caught.
456          */
457         err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
458                                                  CGROUP_INET6_BIND, &flags);
459         if (err)
460                 return err;
461
462         return __inet6_bind(sk, uaddr, addr_len, flags);
463 }
464 EXPORT_SYMBOL(inet6_bind);
465
466 int inet6_release(struct socket *sock)
467 {
468         struct sock *sk = sock->sk;
469
470         if (!sk)
471                 return -EINVAL;
472
473         /* Free mc lists */
474         ipv6_sock_mc_close(sk);
475
476         /* Free ac lists */
477         ipv6_sock_ac_close(sk);
478
479         return inet_release(sock);
480 }
481 EXPORT_SYMBOL(inet6_release);
482
483 void inet6_destroy_sock(struct sock *sk)
484 {
485         struct ipv6_pinfo *np = inet6_sk(sk);
486         struct sk_buff *skb;
487         struct ipv6_txoptions *opt;
488
489         /* Release rx options */
490
491         skb = xchg(&np->pktoptions, NULL);
492         kfree_skb(skb);
493
494         skb = xchg(&np->rxpmtu, NULL);
495         kfree_skb(skb);
496
497         /* Free flowlabels */
498         fl6_free_socklist(sk);
499
500         /* Free tx options */
501
502         opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
503         if (opt) {
504                 atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
505                 txopt_put(opt);
506         }
507 }
508 EXPORT_SYMBOL_GPL(inet6_destroy_sock);
509
510 /*
511  *      This does both peername and sockname.
512  */
513 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
514                   int peer)
515 {
516         struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
517         struct sock *sk = sock->sk;
518         struct inet_sock *inet = inet_sk(sk);
519         struct ipv6_pinfo *np = inet6_sk(sk);
520
521         sin->sin6_family = AF_INET6;
522         sin->sin6_flowinfo = 0;
523         sin->sin6_scope_id = 0;
524         if (peer) {
525                 if (!inet->inet_dport)
526                         return -ENOTCONN;
527                 if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
528                     peer == 1)
529                         return -ENOTCONN;
530                 sin->sin6_port = inet->inet_dport;
531                 sin->sin6_addr = sk->sk_v6_daddr;
532                 if (np->sndflow)
533                         sin->sin6_flowinfo = np->flow_label;
534                 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
535                                             CGROUP_INET6_GETPEERNAME,
536                                             NULL);
537         } else {
538                 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
539                         sin->sin6_addr = np->saddr;
540                 else
541                         sin->sin6_addr = sk->sk_v6_rcv_saddr;
542                 sin->sin6_port = inet->inet_sport;
543                 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
544                                             CGROUP_INET6_GETSOCKNAME,
545                                             NULL);
546         }
547         sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
548                                                  sk->sk_bound_dev_if);
549         return sizeof(*sin);
550 }
551 EXPORT_SYMBOL(inet6_getname);
552
553 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
554 {
555         void __user *argp = (void __user *)arg;
556         struct sock *sk = sock->sk;
557         struct net *net = sock_net(sk);
558
559         switch (cmd) {
560         case SIOCADDRT:
561         case SIOCDELRT: {
562                 struct in6_rtmsg rtmsg;
563
564                 if (copy_from_user(&rtmsg, argp, sizeof(rtmsg)))
565                         return -EFAULT;
566                 return ipv6_route_ioctl(net, cmd, &rtmsg);
567         }
568         case SIOCSIFADDR:
569                 return addrconf_add_ifaddr(net, argp);
570         case SIOCDIFADDR:
571                 return addrconf_del_ifaddr(net, argp);
572         case SIOCSIFDSTADDR:
573                 return addrconf_set_dstaddr(net, argp);
574         default:
575                 if (!sk->sk_prot->ioctl)
576                         return -ENOIOCTLCMD;
577                 return sk->sk_prot->ioctl(sk, cmd, arg);
578         }
579         /*NOTREACHED*/
580         return 0;
581 }
582 EXPORT_SYMBOL(inet6_ioctl);
583
584 #ifdef CONFIG_COMPAT
585 struct compat_in6_rtmsg {
586         struct in6_addr         rtmsg_dst;
587         struct in6_addr         rtmsg_src;
588         struct in6_addr         rtmsg_gateway;
589         u32                     rtmsg_type;
590         u16                     rtmsg_dst_len;
591         u16                     rtmsg_src_len;
592         u32                     rtmsg_metric;
593         u32                     rtmsg_info;
594         u32                     rtmsg_flags;
595         s32                     rtmsg_ifindex;
596 };
597
598 static int inet6_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
599                 struct compat_in6_rtmsg __user *ur)
600 {
601         struct in6_rtmsg rt;
602
603         if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst,
604                         3 * sizeof(struct in6_addr)) ||
605             get_user(rt.rtmsg_type, &ur->rtmsg_type) ||
606             get_user(rt.rtmsg_dst_len, &ur->rtmsg_dst_len) ||
607             get_user(rt.rtmsg_src_len, &ur->rtmsg_src_len) ||
608             get_user(rt.rtmsg_metric, &ur->rtmsg_metric) ||
609             get_user(rt.rtmsg_info, &ur->rtmsg_info) ||
610             get_user(rt.rtmsg_flags, &ur->rtmsg_flags) ||
611             get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex))
612                 return -EFAULT;
613
614
615         return ipv6_route_ioctl(sock_net(sk), cmd, &rt);
616 }
617
618 int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
619 {
620         void __user *argp = compat_ptr(arg);
621         struct sock *sk = sock->sk;
622
623         switch (cmd) {
624         case SIOCADDRT:
625         case SIOCDELRT:
626                 return inet6_compat_routing_ioctl(sk, cmd, argp);
627         default:
628                 return -ENOIOCTLCMD;
629         }
630 }
631 EXPORT_SYMBOL_GPL(inet6_compat_ioctl);
632 #endif /* CONFIG_COMPAT */
633
634 INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *,
635                                             size_t));
636 int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
637 {
638         struct sock *sk = sock->sk;
639
640         if (unlikely(inet_send_prepare(sk)))
641                 return -EAGAIN;
642
643         return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
644                                sk, msg, size);
645 }
646
647 INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *,
648                                             size_t, int, int, int *));
649 int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
650                   int flags)
651 {
652         struct sock *sk = sock->sk;
653         int addr_len = 0;
654         int err;
655
656         if (likely(!(flags & MSG_ERRQUEUE)))
657                 sock_rps_record_flow(sk);
658
659         err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
660                               sk, msg, size, flags & MSG_DONTWAIT,
661                               flags & ~MSG_DONTWAIT, &addr_len);
662         if (err >= 0)
663                 msg->msg_namelen = addr_len;
664         return err;
665 }
666
667 const struct proto_ops inet6_stream_ops = {
668         .family            = PF_INET6,
669         .owner             = THIS_MODULE,
670         .release           = inet6_release,
671         .bind              = inet6_bind,
672         .connect           = inet_stream_connect,       /* ok           */
673         .socketpair        = sock_no_socketpair,        /* a do nothing */
674         .accept            = inet_accept,               /* ok           */
675         .getname           = inet6_getname,
676         .poll              = tcp_poll,                  /* ok           */
677         .ioctl             = inet6_ioctl,               /* must change  */
678         .gettstamp         = sock_gettstamp,
679         .listen            = inet_listen,               /* ok           */
680         .shutdown          = inet_shutdown,             /* ok           */
681         .setsockopt        = sock_common_setsockopt,    /* ok           */
682         .getsockopt        = sock_common_getsockopt,    /* ok           */
683         .sendmsg           = inet6_sendmsg,             /* retpoline's sake */
684         .recvmsg           = inet6_recvmsg,             /* retpoline's sake */
685 #ifdef CONFIG_MMU
686         .mmap              = tcp_mmap,
687 #endif
688         .sendpage          = inet_sendpage,
689         .sendmsg_locked    = tcp_sendmsg_locked,
690         .sendpage_locked   = tcp_sendpage_locked,
691         .splice_read       = tcp_splice_read,
692         .read_sock         = tcp_read_sock,
693         .peek_len          = tcp_peek_len,
694 #ifdef CONFIG_COMPAT
695         .compat_ioctl      = inet6_compat_ioctl,
696 #endif
697         .set_rcvlowat      = tcp_set_rcvlowat,
698 };
699
700 const struct proto_ops inet6_dgram_ops = {
701         .family            = PF_INET6,
702         .owner             = THIS_MODULE,
703         .release           = inet6_release,
704         .bind              = inet6_bind,
705         .connect           = inet_dgram_connect,        /* ok           */
706         .socketpair        = sock_no_socketpair,        /* a do nothing */
707         .accept            = sock_no_accept,            /* a do nothing */
708         .getname           = inet6_getname,
709         .poll              = udp_poll,                  /* ok           */
710         .ioctl             = inet6_ioctl,               /* must change  */
711         .gettstamp         = sock_gettstamp,
712         .listen            = sock_no_listen,            /* ok           */
713         .shutdown          = inet_shutdown,             /* ok           */
714         .setsockopt        = sock_common_setsockopt,    /* ok           */
715         .getsockopt        = sock_common_getsockopt,    /* ok           */
716         .sendmsg           = inet6_sendmsg,             /* retpoline's sake */
717         .recvmsg           = inet6_recvmsg,             /* retpoline's sake */
718         .read_sock         = udp_read_sock,
719         .mmap              = sock_no_mmap,
720         .sendpage          = sock_no_sendpage,
721         .set_peek_off      = sk_set_peek_off,
722 #ifdef CONFIG_COMPAT
723         .compat_ioctl      = inet6_compat_ioctl,
724 #endif
725 };
726
727 static const struct net_proto_family inet6_family_ops = {
728         .family = PF_INET6,
729         .create = inet6_create,
730         .owner  = THIS_MODULE,
731 };
732
733 int inet6_register_protosw(struct inet_protosw *p)
734 {
735         struct list_head *lh;
736         struct inet_protosw *answer;
737         struct list_head *last_perm;
738         int protocol = p->protocol;
739         int ret;
740
741         spin_lock_bh(&inetsw6_lock);
742
743         ret = -EINVAL;
744         if (p->type >= SOCK_MAX)
745                 goto out_illegal;
746
747         /* If we are trying to override a permanent protocol, bail. */
748         answer = NULL;
749         ret = -EPERM;
750         last_perm = &inetsw6[p->type];
751         list_for_each(lh, &inetsw6[p->type]) {
752                 answer = list_entry(lh, struct inet_protosw, list);
753
754                 /* Check only the non-wild match. */
755                 if (INET_PROTOSW_PERMANENT & answer->flags) {
756                         if (protocol == answer->protocol)
757                                 break;
758                         last_perm = lh;
759                 }
760
761                 answer = NULL;
762         }
763         if (answer)
764                 goto out_permanent;
765
766         /* Add the new entry after the last permanent entry if any, so that
767          * the new entry does not override a permanent entry when matched with
768          * a wild-card protocol. But it is allowed to override any existing
769          * non-permanent entry.  This means that when we remove this entry, the
770          * system automatically returns to the old behavior.
771          */
772         list_add_rcu(&p->list, last_perm);
773         ret = 0;
774 out:
775         spin_unlock_bh(&inetsw6_lock);
776         return ret;
777
778 out_permanent:
779         pr_err("Attempt to override permanent protocol %d\n", protocol);
780         goto out;
781
782 out_illegal:
783         pr_err("Ignoring attempt to register invalid socket type %d\n",
784                p->type);
785         goto out;
786 }
787 EXPORT_SYMBOL(inet6_register_protosw);
788
789 void
790 inet6_unregister_protosw(struct inet_protosw *p)
791 {
792         if (INET_PROTOSW_PERMANENT & p->flags) {
793                 pr_err("Attempt to unregister permanent protocol %d\n",
794                        p->protocol);
795         } else {
796                 spin_lock_bh(&inetsw6_lock);
797                 list_del_rcu(&p->list);
798                 spin_unlock_bh(&inetsw6_lock);
799
800                 synchronize_net();
801         }
802 }
803 EXPORT_SYMBOL(inet6_unregister_protosw);
804
805 int inet6_sk_rebuild_header(struct sock *sk)
806 {
807         struct ipv6_pinfo *np = inet6_sk(sk);
808         struct dst_entry *dst;
809
810         dst = __sk_dst_check(sk, np->dst_cookie);
811
812         if (!dst) {
813                 struct inet_sock *inet = inet_sk(sk);
814                 struct in6_addr *final_p, final;
815                 struct flowi6 fl6;
816
817                 memset(&fl6, 0, sizeof(fl6));
818                 fl6.flowi6_proto = sk->sk_protocol;
819                 fl6.daddr = sk->sk_v6_daddr;
820                 fl6.saddr = np->saddr;
821                 fl6.flowlabel = np->flow_label;
822                 fl6.flowi6_oif = sk->sk_bound_dev_if;
823                 fl6.flowi6_mark = sk->sk_mark;
824                 fl6.fl6_dport = inet->inet_dport;
825                 fl6.fl6_sport = inet->inet_sport;
826                 fl6.flowi6_uid = sk->sk_uid;
827                 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
828
829                 rcu_read_lock();
830                 final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
831                                          &final);
832                 rcu_read_unlock();
833
834                 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
835                 if (IS_ERR(dst)) {
836                         sk->sk_route_caps = 0;
837                         sk->sk_err_soft = -PTR_ERR(dst);
838                         return PTR_ERR(dst);
839                 }
840
841                 ip6_dst_store(sk, dst, NULL, NULL);
842         }
843
844         return 0;
845 }
846 EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
847
848 bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
849                        const struct inet6_skb_parm *opt)
850 {
851         const struct ipv6_pinfo *np = inet6_sk(sk);
852
853         if (np->rxopt.all) {
854                 if (((opt->flags & IP6SKB_HOPBYHOP) &&
855                      (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
856                     (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) &&
857                      np->rxopt.bits.rxflow) ||
858                     (opt->srcrt && (np->rxopt.bits.srcrt ||
859                      np->rxopt.bits.osrcrt)) ||
860                     ((opt->dst1 || opt->dst0) &&
861                      (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
862                         return true;
863         }
864         return false;
865 }
866 EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
867
868 static struct packet_type ipv6_packet_type __read_mostly = {
869         .type = cpu_to_be16(ETH_P_IPV6),
870         .func = ipv6_rcv,
871         .list_func = ipv6_list_rcv,
872 };
873
874 static int __init ipv6_packet_init(void)
875 {
876         dev_add_pack(&ipv6_packet_type);
877         return 0;
878 }
879
880 static void ipv6_packet_cleanup(void)
881 {
882         dev_remove_pack(&ipv6_packet_type);
883 }
884
885 static int __net_init ipv6_init_mibs(struct net *net)
886 {
887         int i;
888
889         net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib);
890         if (!net->mib.udp_stats_in6)
891                 return -ENOMEM;
892         net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib);
893         if (!net->mib.udplite_stats_in6)
894                 goto err_udplite_mib;
895         net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib);
896         if (!net->mib.ipv6_statistics)
897                 goto err_ip_mib;
898
899         for_each_possible_cpu(i) {
900                 struct ipstats_mib *af_inet6_stats;
901                 af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i);
902                 u64_stats_init(&af_inet6_stats->syncp);
903         }
904
905
906         net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib);
907         if (!net->mib.icmpv6_statistics)
908                 goto err_icmp_mib;
909         net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),
910                                                 GFP_KERNEL);
911         if (!net->mib.icmpv6msg_statistics)
912                 goto err_icmpmsg_mib;
913         return 0;
914
915 err_icmpmsg_mib:
916         free_percpu(net->mib.icmpv6_statistics);
917 err_icmp_mib:
918         free_percpu(net->mib.ipv6_statistics);
919 err_ip_mib:
920         free_percpu(net->mib.udplite_stats_in6);
921 err_udplite_mib:
922         free_percpu(net->mib.udp_stats_in6);
923         return -ENOMEM;
924 }
925
926 static void ipv6_cleanup_mibs(struct net *net)
927 {
928         free_percpu(net->mib.udp_stats_in6);
929         free_percpu(net->mib.udplite_stats_in6);
930         free_percpu(net->mib.ipv6_statistics);
931         free_percpu(net->mib.icmpv6_statistics);
932         kfree(net->mib.icmpv6msg_statistics);
933 }
934
935 static int __net_init inet6_net_init(struct net *net)
936 {
937         int err = 0;
938
939         net->ipv6.sysctl.bindv6only = 0;
940         net->ipv6.sysctl.icmpv6_time = 1*HZ;
941         net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
942         net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
943         net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;
944
945         /* By default, rate limit error messages.
946          * Except for pmtu discovery, it would break it.
947          * proc_do_large_bitmap needs pointer to the bitmap.
948          */
949         bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1);
950         bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1);
951         net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask;
952
953         net->ipv6.sysctl.flowlabel_consistency = 1;
954         net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
955         net->ipv6.sysctl.idgen_retries = 3;
956         net->ipv6.sysctl.idgen_delay = 1 * HZ;
957         net->ipv6.sysctl.flowlabel_state_ranges = 0;
958         net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT;
959         net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
960         net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
961         net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
962         net->ipv6.sysctl.fib_notify_on_flag_change = 0;
963         atomic_set(&net->ipv6.fib6_sernum, 1);
964
965         net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID;
966         net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE;
967
968         err = ipv6_init_mibs(net);
969         if (err)
970                 return err;
971 #ifdef CONFIG_PROC_FS
972         err = udp6_proc_init(net);
973         if (err)
974                 goto out;
975         err = tcp6_proc_init(net);
976         if (err)
977                 goto proc_tcp6_fail;
978         err = ac6_proc_init(net);
979         if (err)
980                 goto proc_ac6_fail;
981 #endif
982         return err;
983
984 #ifdef CONFIG_PROC_FS
985 proc_ac6_fail:
986         tcp6_proc_exit(net);
987 proc_tcp6_fail:
988         udp6_proc_exit(net);
989 out:
990         ipv6_cleanup_mibs(net);
991         return err;
992 #endif
993 }
994
995 static void __net_exit inet6_net_exit(struct net *net)
996 {
997 #ifdef CONFIG_PROC_FS
998         udp6_proc_exit(net);
999         tcp6_proc_exit(net);
1000         ac6_proc_exit(net);
1001 #endif
1002         ipv6_cleanup_mibs(net);
1003 }
1004
1005 static struct pernet_operations inet6_net_ops = {
1006         .init = inet6_net_init,
1007         .exit = inet6_net_exit,
1008 };
1009
1010 static int ipv6_route_input(struct sk_buff *skb)
1011 {
1012         ip6_route_input(skb);
1013         return skb_dst(skb)->error;
1014 }
1015
1016 static const struct ipv6_stub ipv6_stub_impl = {
1017         .ipv6_sock_mc_join = ipv6_sock_mc_join,
1018         .ipv6_sock_mc_drop = ipv6_sock_mc_drop,
1019         .ipv6_dst_lookup_flow = ip6_dst_lookup_flow,
1020         .ipv6_route_input  = ipv6_route_input,
1021         .fib6_get_table    = fib6_get_table,
1022         .fib6_table_lookup = fib6_table_lookup,
1023         .fib6_lookup       = fib6_lookup,
1024         .fib6_select_path  = fib6_select_path,
1025         .ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
1026         .fib6_nh_init      = fib6_nh_init,
1027         .fib6_nh_release   = fib6_nh_release,
1028         .fib6_update_sernum = fib6_update_sernum_stub,
1029         .fib6_rt_update    = fib6_rt_update,
1030         .ip6_del_rt        = ip6_del_rt,
1031         .udpv6_encap_enable = udpv6_encap_enable,
1032         .ndisc_send_na = ndisc_send_na,
1033 #if IS_ENABLED(CONFIG_XFRM)
1034         .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
1035         .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
1036         .xfrm6_rcv_encap = xfrm6_rcv_encap,
1037 #endif
1038         .nd_tbl = &nd_tbl,
1039         .ipv6_fragment = ip6_fragment,
1040         .ipv6_dev_find = ipv6_dev_find,
1041 };
1042
1043 static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
1044         .inet6_bind = __inet6_bind,
1045         .udp6_lib_lookup = __udp6_lib_lookup,
1046 };
1047
1048 static int __init inet6_init(void)
1049 {
1050         struct list_head *r;
1051         int err = 0;
1052
1053         sock_skb_cb_check_size(sizeof(struct inet6_skb_parm));
1054
1055         /* Register the socket-side information for inet6_create.  */
1056         for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
1057                 INIT_LIST_HEAD(r);
1058
1059         if (disable_ipv6_mod) {
1060                 pr_info("Loaded, but administratively disabled, reboot required to enable\n");
1061                 goto out;
1062         }
1063
1064         err = proto_register(&tcpv6_prot, 1);
1065         if (err)
1066                 goto out;
1067
1068         err = proto_register(&udpv6_prot, 1);
1069         if (err)
1070                 goto out_unregister_tcp_proto;
1071
1072         err = proto_register(&udplitev6_prot, 1);
1073         if (err)
1074                 goto out_unregister_udp_proto;
1075
1076         err = proto_register(&rawv6_prot, 1);
1077         if (err)
1078                 goto out_unregister_udplite_proto;
1079
1080         err = proto_register(&pingv6_prot, 1);
1081         if (err)
1082                 goto out_unregister_raw_proto;
1083
1084         /* We MUST register RAW sockets before we create the ICMP6,
1085          * IGMP6, or NDISC control sockets.
1086          */
1087         err = rawv6_init();
1088         if (err)
1089                 goto out_unregister_ping_proto;
1090
1091         /* Register the family here so that the init calls below will
1092          * be able to create sockets. (?? is this dangerous ??)
1093          */
1094         err = sock_register(&inet6_family_ops);
1095         if (err)
1096                 goto out_sock_register_fail;
1097
1098         /*
1099          *      ipngwg API draft makes clear that the correct semantics
1100          *      for TCP and UDP is to consider one TCP and UDP instance
1101          *      in a host available by both INET and INET6 APIs and
1102          *      able to communicate via both network protocols.
1103          */
1104
1105         err = register_pernet_subsys(&inet6_net_ops);
1106         if (err)
1107                 goto register_pernet_fail;
1108         err = ip6_mr_init();
1109         if (err)
1110                 goto ipmr_fail;
1111         err = icmpv6_init();
1112         if (err)
1113                 goto icmp_fail;
1114         err = ndisc_init();
1115         if (err)
1116                 goto ndisc_fail;
1117         err = igmp6_init();
1118         if (err)
1119                 goto igmp_fail;
1120
1121         err = ipv6_netfilter_init();
1122         if (err)
1123                 goto netfilter_fail;
1124         /* Create /proc/foo6 entries. */
1125 #ifdef CONFIG_PROC_FS
1126         err = -ENOMEM;
1127         if (raw6_proc_init())
1128                 goto proc_raw6_fail;
1129         if (udplite6_proc_init())
1130                 goto proc_udplite6_fail;
1131         if (ipv6_misc_proc_init())
1132                 goto proc_misc6_fail;
1133         if (if6_proc_init())
1134                 goto proc_if6_fail;
1135 #endif
1136         err = ip6_route_init();
1137         if (err)
1138                 goto ip6_route_fail;
1139         err = ndisc_late_init();
1140         if (err)
1141                 goto ndisc_late_fail;
1142         err = ip6_flowlabel_init();
1143         if (err)
1144                 goto ip6_flowlabel_fail;
1145         err = ipv6_anycast_init();
1146         if (err)
1147                 goto ipv6_anycast_fail;
1148         err = addrconf_init();
1149         if (err)
1150                 goto addrconf_fail;
1151
1152         /* Init v6 extension headers. */
1153         err = ipv6_exthdrs_init();
1154         if (err)
1155                 goto ipv6_exthdrs_fail;
1156
1157         err = ipv6_frag_init();
1158         if (err)
1159                 goto ipv6_frag_fail;
1160
1161         /* Init v6 transport protocols. */
1162         err = udpv6_init();
1163         if (err)
1164                 goto udpv6_fail;
1165
1166         err = udplitev6_init();
1167         if (err)
1168                 goto udplitev6_fail;
1169
1170         err = udpv6_offload_init();
1171         if (err)
1172                 goto udpv6_offload_fail;
1173
1174         err = tcpv6_init();
1175         if (err)
1176                 goto tcpv6_fail;
1177
1178         err = ipv6_packet_init();
1179         if (err)
1180                 goto ipv6_packet_fail;
1181
1182         err = pingv6_init();
1183         if (err)
1184                 goto pingv6_fail;
1185
1186         err = calipso_init();
1187         if (err)
1188                 goto calipso_fail;
1189
1190         err = seg6_init();
1191         if (err)
1192                 goto seg6_fail;
1193
1194         err = rpl_init();
1195         if (err)
1196                 goto rpl_fail;
1197
1198         err = ioam6_init();
1199         if (err)
1200                 goto ioam6_fail;
1201
1202         err = igmp6_late_init();
1203         if (err)
1204                 goto igmp6_late_err;
1205
1206 #ifdef CONFIG_SYSCTL
1207         err = ipv6_sysctl_register();
1208         if (err)
1209                 goto sysctl_fail;
1210 #endif
1211
1212         /* ensure that ipv6 stubs are visible only after ipv6 is ready */
1213         wmb();
1214         ipv6_stub = &ipv6_stub_impl;
1215         ipv6_bpf_stub = &ipv6_bpf_stub_impl;
1216 out:
1217         return err;
1218
1219 #ifdef CONFIG_SYSCTL
1220 sysctl_fail:
1221         igmp6_late_cleanup();
1222 #endif
1223 igmp6_late_err:
1224         ioam6_exit();
1225 ioam6_fail:
1226         rpl_exit();
1227 rpl_fail:
1228         seg6_exit();
1229 seg6_fail:
1230         calipso_exit();
1231 calipso_fail:
1232         pingv6_exit();
1233 pingv6_fail:
1234         ipv6_packet_cleanup();
1235 ipv6_packet_fail:
1236         tcpv6_exit();
1237 tcpv6_fail:
1238         udpv6_offload_exit();
1239 udpv6_offload_fail:
1240         udplitev6_exit();
1241 udplitev6_fail:
1242         udpv6_exit();
1243 udpv6_fail:
1244         ipv6_frag_exit();
1245 ipv6_frag_fail:
1246         ipv6_exthdrs_exit();
1247 ipv6_exthdrs_fail:
1248         addrconf_cleanup();
1249 addrconf_fail:
1250         ipv6_anycast_cleanup();
1251 ipv6_anycast_fail:
1252         ip6_flowlabel_cleanup();
1253 ip6_flowlabel_fail:
1254         ndisc_late_cleanup();
1255 ndisc_late_fail:
1256         ip6_route_cleanup();
1257 ip6_route_fail:
1258 #ifdef CONFIG_PROC_FS
1259         if6_proc_exit();
1260 proc_if6_fail:
1261         ipv6_misc_proc_exit();
1262 proc_misc6_fail:
1263         udplite6_proc_exit();
1264 proc_udplite6_fail:
1265         raw6_proc_exit();
1266 proc_raw6_fail:
1267 #endif
1268         ipv6_netfilter_fini();
1269 netfilter_fail:
1270         igmp6_cleanup();
1271 igmp_fail:
1272         ndisc_cleanup();
1273 ndisc_fail:
1274         icmpv6_cleanup();
1275 icmp_fail:
1276         ip6_mr_cleanup();
1277 ipmr_fail:
1278         unregister_pernet_subsys(&inet6_net_ops);
1279 register_pernet_fail:
1280         sock_unregister(PF_INET6);
1281         rtnl_unregister_all(PF_INET6);
1282 out_sock_register_fail:
1283         rawv6_exit();
1284 out_unregister_ping_proto:
1285         proto_unregister(&pingv6_prot);
1286 out_unregister_raw_proto:
1287         proto_unregister(&rawv6_prot);
1288 out_unregister_udplite_proto:
1289         proto_unregister(&udplitev6_prot);
1290 out_unregister_udp_proto:
1291         proto_unregister(&udpv6_prot);
1292 out_unregister_tcp_proto:
1293         proto_unregister(&tcpv6_prot);
1294         goto out;
1295 }
1296 module_init(inet6_init);
1297
1298 MODULE_ALIAS_NETPROTO(PF_INET6);