Merge tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache
[linux-2.6-microblaze.git] / net / ipv6 / af_inet6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      PF_INET6 socket protocol family
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Adapted from linux/net/ipv4/af_inet.c
10  *
11  *      Fixes:
12  *      piggy, Karl Knutson     :       Socket protocol table
13  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
14  *      Arnaldo Melo            :       check proc_net_create return, cleanups
15  */
16
17 #define pr_fmt(fmt) "IPv6: " fmt
18
19 #include <linux/module.h>
20 #include <linux/capability.h>
21 #include <linux/errno.h>
22 #include <linux/types.h>
23 #include <linux/socket.h>
24 #include <linux/in.h>
25 #include <linux/kernel.h>
26 #include <linux/timer.h>
27 #include <linux/string.h>
28 #include <linux/sockios.h>
29 #include <linux/net.h>
30 #include <linux/fcntl.h>
31 #include <linux/mm.h>
32 #include <linux/interrupt.h>
33 #include <linux/proc_fs.h>
34 #include <linux/stat.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37
38 #include <linux/inet.h>
39 #include <linux/netdevice.h>
40 #include <linux/icmpv6.h>
41 #include <linux/netfilter_ipv6.h>
42
43 #include <net/ip.h>
44 #include <net/ipv6.h>
45 #include <net/udp.h>
46 #include <net/udplite.h>
47 #include <net/tcp.h>
48 #include <net/ping.h>
49 #include <net/protocol.h>
50 #include <net/inet_common.h>
51 #include <net/route.h>
52 #include <net/transp_v6.h>
53 #include <net/ip6_route.h>
54 #include <net/addrconf.h>
55 #include <net/ipv6_stubs.h>
56 #include <net/ndisc.h>
57 #ifdef CONFIG_IPV6_TUNNEL
58 #include <net/ip6_tunnel.h>
59 #endif
60 #include <net/calipso.h>
61 #include <net/seg6.h>
62 #include <net/rpl.h>
63 #include <net/compat.h>
64 #include <net/xfrm.h>
65 #include <net/ioam6.h>
66
67 #include <linux/uaccess.h>
68 #include <linux/mroute6.h>
69
70 #include "ip6_offload.h"
71
72 MODULE_AUTHOR("Cast of dozens");
73 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
74 MODULE_LICENSE("GPL");
75
76 /* The inetsw6 table contains everything that inet6_create needs to
77  * build a new socket.
78  */
79 static struct list_head inetsw6[SOCK_MAX];
80 static DEFINE_SPINLOCK(inetsw6_lock);
81
82 struct ipv6_params ipv6_defaults = {
83         .disable_ipv6 = 0,
84         .autoconf = 1,
85 };
86
87 static int disable_ipv6_mod;
88
89 module_param_named(disable, disable_ipv6_mod, int, 0444);
90 MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional");
91
92 module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444);
93 MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
94
95 module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444);
96 MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
97
98 bool ipv6_mod_enabled(void)
99 {
100         return disable_ipv6_mod == 0;
101 }
102 EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
103
104 static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
105 {
106         const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
107
108         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
109 }
110
111 static int inet6_create(struct net *net, struct socket *sock, int protocol,
112                         int kern)
113 {
114         struct inet_sock *inet;
115         struct ipv6_pinfo *np;
116         struct sock *sk;
117         struct inet_protosw *answer;
118         struct proto *answer_prot;
119         unsigned char answer_flags;
120         int try_loading_module = 0;
121         int err;
122
123         if (protocol < 0 || protocol >= IPPROTO_MAX)
124                 return -EINVAL;
125
126         /* Look for the requested type/protocol pair. */
127 lookup_protocol:
128         err = -ESOCKTNOSUPPORT;
129         rcu_read_lock();
130         list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
131
132                 err = 0;
133                 /* Check the non-wild match. */
134                 if (protocol == answer->protocol) {
135                         if (protocol != IPPROTO_IP)
136                                 break;
137                 } else {
138                         /* Check for the two wild cases. */
139                         if (IPPROTO_IP == protocol) {
140                                 protocol = answer->protocol;
141                                 break;
142                         }
143                         if (IPPROTO_IP == answer->protocol)
144                                 break;
145                 }
146                 err = -EPROTONOSUPPORT;
147         }
148
149         if (err) {
150                 if (try_loading_module < 2) {
151                         rcu_read_unlock();
152                         /*
153                          * Be more specific, e.g. net-pf-10-proto-132-type-1
154                          * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM)
155                          */
156                         if (++try_loading_module == 1)
157                                 request_module("net-pf-%d-proto-%d-type-%d",
158                                                 PF_INET6, protocol, sock->type);
159                         /*
160                          * Fall back to generic, e.g. net-pf-10-proto-132
161                          * (net-pf-PF_INET6-proto-IPPROTO_SCTP)
162                          */
163                         else
164                                 request_module("net-pf-%d-proto-%d",
165                                                 PF_INET6, protocol);
166                         goto lookup_protocol;
167                 } else
168                         goto out_rcu_unlock;
169         }
170
171         err = -EPERM;
172         if (sock->type == SOCK_RAW && !kern &&
173             !ns_capable(net->user_ns, CAP_NET_RAW))
174                 goto out_rcu_unlock;
175
176         sock->ops = answer->ops;
177         answer_prot = answer->prot;
178         answer_flags = answer->flags;
179         rcu_read_unlock();
180
181         WARN_ON(!answer_prot->slab);
182
183         err = -ENOBUFS;
184         sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern);
185         if (!sk)
186                 goto out;
187
188         sock_init_data(sock, sk);
189
190         err = 0;
191         if (INET_PROTOSW_REUSE & answer_flags)
192                 sk->sk_reuse = SK_CAN_REUSE;
193
194         inet = inet_sk(sk);
195         inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
196
197         if (SOCK_RAW == sock->type) {
198                 inet->inet_num = protocol;
199                 if (IPPROTO_RAW == protocol)
200                         inet->hdrincl = 1;
201         }
202
203         sk->sk_destruct         = inet_sock_destruct;
204         sk->sk_family           = PF_INET6;
205         sk->sk_protocol         = protocol;
206
207         sk->sk_backlog_rcv      = answer->prot->backlog_rcv;
208
209         inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
210         np->hop_limit   = -1;
211         np->mcast_hops  = IPV6_DEFAULT_MCASTHOPS;
212         np->mc_loop     = 1;
213         np->mc_all      = 1;
214         np->pmtudisc    = IPV6_PMTUDISC_WANT;
215         np->repflow     = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
216         sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
217
218         /* Init the ipv4 part of the socket since we can have sockets
219          * using v6 API for ipv4.
220          */
221         inet->uc_ttl    = -1;
222
223         inet->mc_loop   = 1;
224         inet->mc_ttl    = 1;
225         inet->mc_index  = 0;
226         RCU_INIT_POINTER(inet->mc_list, NULL);
227         inet->rcv_tos   = 0;
228
229         if (net->ipv4.sysctl_ip_no_pmtu_disc)
230                 inet->pmtudisc = IP_PMTUDISC_DONT;
231         else
232                 inet->pmtudisc = IP_PMTUDISC_WANT;
233         /*
234          * Increment only the relevant sk_prot->socks debug field, this changes
235          * the previous behaviour of incrementing both the equivalent to
236          * answer->prot->socks (inet6_sock_nr) and inet_sock_nr.
237          *
238          * This allows better debug granularity as we'll know exactly how many
239          * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6
240          * transport protocol socks. -acme
241          */
242         sk_refcnt_debug_inc(sk);
243
244         if (inet->inet_num) {
245                 /* It assumes that any protocol which allows
246                  * the user to assign a number at socket
247                  * creation time automatically shares.
248                  */
249                 inet->inet_sport = htons(inet->inet_num);
250                 err = sk->sk_prot->hash(sk);
251                 if (err) {
252                         sk_common_release(sk);
253                         goto out;
254                 }
255         }
256         if (sk->sk_prot->init) {
257                 err = sk->sk_prot->init(sk);
258                 if (err) {
259                         sk_common_release(sk);
260                         goto out;
261                 }
262         }
263
264         if (!kern) {
265                 err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
266                 if (err) {
267                         sk_common_release(sk);
268                         goto out;
269                 }
270         }
271 out:
272         return err;
273 out_rcu_unlock:
274         rcu_read_unlock();
275         goto out;
276 }
277
278 static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
279                         u32 flags)
280 {
281         struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
282         struct inet_sock *inet = inet_sk(sk);
283         struct ipv6_pinfo *np = inet6_sk(sk);
284         struct net *net = sock_net(sk);
285         __be32 v4addr = 0;
286         unsigned short snum;
287         bool saved_ipv6only;
288         int addr_type = 0;
289         int err = 0;
290
291         if (addr->sin6_family != AF_INET6)
292                 return -EAFNOSUPPORT;
293
294         addr_type = ipv6_addr_type(&addr->sin6_addr);
295         if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
296                 return -EINVAL;
297
298         snum = ntohs(addr->sin6_port);
299         if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
300             snum && inet_port_requires_bind_service(net, snum) &&
301             !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
302                 return -EACCES;
303
304         if (flags & BIND_WITH_LOCK)
305                 lock_sock(sk);
306
307         /* Check these errors (active socket, double bind). */
308         if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
309                 err = -EINVAL;
310                 goto out;
311         }
312
313         /* Check if the address belongs to the host. */
314         if (addr_type == IPV6_ADDR_MAPPED) {
315                 struct net_device *dev = NULL;
316                 int chk_addr_ret;
317
318                 /* Binding to v4-mapped address on a v6-only socket
319                  * makes no sense
320                  */
321                 if (sk->sk_ipv6only) {
322                         err = -EINVAL;
323                         goto out;
324                 }
325
326                 rcu_read_lock();
327                 if (sk->sk_bound_dev_if) {
328                         dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
329                         if (!dev) {
330                                 err = -ENODEV;
331                                 goto out_unlock;
332                         }
333                 }
334
335                 /* Reproduce AF_INET checks to make the bindings consistent */
336                 v4addr = addr->sin6_addr.s6_addr32[3];
337                 chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr);
338                 rcu_read_unlock();
339
340                 if (!inet_addr_valid_or_nonlocal(net, inet, v4addr,
341                                                  chk_addr_ret)) {
342                         err = -EADDRNOTAVAIL;
343                         goto out;
344                 }
345         } else {
346                 if (addr_type != IPV6_ADDR_ANY) {
347                         struct net_device *dev = NULL;
348
349                         rcu_read_lock();
350                         if (__ipv6_addr_needs_scope_id(addr_type)) {
351                                 if (addr_len >= sizeof(struct sockaddr_in6) &&
352                                     addr->sin6_scope_id) {
353                                         /* Override any existing binding, if another one
354                                          * is supplied by user.
355                                          */
356                                         sk->sk_bound_dev_if = addr->sin6_scope_id;
357                                 }
358
359                                 /* Binding to link-local address requires an interface */
360                                 if (!sk->sk_bound_dev_if) {
361                                         err = -EINVAL;
362                                         goto out_unlock;
363                                 }
364                         }
365
366                         if (sk->sk_bound_dev_if) {
367                                 dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
368                                 if (!dev) {
369                                         err = -ENODEV;
370                                         goto out_unlock;
371                                 }
372                         }
373
374                         /* ipv4 addr of the socket is invalid.  Only the
375                          * unspecified and mapped address have a v4 equivalent.
376                          */
377                         v4addr = LOOPBACK4_IPV6;
378                         if (!(addr_type & IPV6_ADDR_MULTICAST)) {
379                                 if (!ipv6_can_nonlocal_bind(net, inet) &&
380                                     !ipv6_chk_addr(net, &addr->sin6_addr,
381                                                    dev, 0)) {
382                                         err = -EADDRNOTAVAIL;
383                                         goto out_unlock;
384                                 }
385                         }
386                         rcu_read_unlock();
387                 }
388         }
389
390         inet->inet_rcv_saddr = v4addr;
391         inet->inet_saddr = v4addr;
392
393         sk->sk_v6_rcv_saddr = addr->sin6_addr;
394
395         if (!(addr_type & IPV6_ADDR_MULTICAST))
396                 np->saddr = addr->sin6_addr;
397
398         saved_ipv6only = sk->sk_ipv6only;
399         if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
400                 sk->sk_ipv6only = 1;
401
402         /* Make sure we are allowed to bind here. */
403         if (snum || !(inet->bind_address_no_port ||
404                       (flags & BIND_FORCE_ADDRESS_NO_PORT))) {
405                 if (sk->sk_prot->get_port(sk, snum)) {
406                         sk->sk_ipv6only = saved_ipv6only;
407                         inet_reset_saddr(sk);
408                         err = -EADDRINUSE;
409                         goto out;
410                 }
411                 if (!(flags & BIND_FROM_BPF)) {
412                         err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
413                         if (err) {
414                                 sk->sk_ipv6only = saved_ipv6only;
415                                 inet_reset_saddr(sk);
416                                 if (sk->sk_prot->put_port)
417                                         sk->sk_prot->put_port(sk);
418                                 goto out;
419                         }
420                 }
421         }
422
423         if (addr_type != IPV6_ADDR_ANY)
424                 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
425         if (snum)
426                 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
427         inet->inet_sport = htons(inet->inet_num);
428         inet->inet_dport = 0;
429         inet->inet_daddr = 0;
430 out:
431         if (flags & BIND_WITH_LOCK)
432                 release_sock(sk);
433         return err;
434 out_unlock:
435         rcu_read_unlock();
436         goto out;
437 }
438
439 /* bind for INET6 API */
440 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
441 {
442         struct sock *sk = sock->sk;
443         u32 flags = BIND_WITH_LOCK;
444         int err = 0;
445
446         /* If the socket has its own bind function then use it. */
447         if (sk->sk_prot->bind)
448                 return sk->sk_prot->bind(sk, uaddr, addr_len);
449
450         if (addr_len < SIN6_LEN_RFC2133)
451                 return -EINVAL;
452
453         /* BPF prog is run before any checks are done so that if the prog
454          * changes context in a wrong way it will be caught.
455          */
456         err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
457                                                  CGROUP_INET6_BIND, &flags);
458         if (err)
459                 return err;
460
461         return __inet6_bind(sk, uaddr, addr_len, flags);
462 }
463 EXPORT_SYMBOL(inet6_bind);
464
465 int inet6_release(struct socket *sock)
466 {
467         struct sock *sk = sock->sk;
468
469         if (!sk)
470                 return -EINVAL;
471
472         /* Free mc lists */
473         ipv6_sock_mc_close(sk);
474
475         /* Free ac lists */
476         ipv6_sock_ac_close(sk);
477
478         return inet_release(sock);
479 }
480 EXPORT_SYMBOL(inet6_release);
481
482 void inet6_destroy_sock(struct sock *sk)
483 {
484         struct ipv6_pinfo *np = inet6_sk(sk);
485         struct sk_buff *skb;
486         struct ipv6_txoptions *opt;
487
488         /* Release rx options */
489
490         skb = xchg(&np->pktoptions, NULL);
491         kfree_skb(skb);
492
493         skb = xchg(&np->rxpmtu, NULL);
494         kfree_skb(skb);
495
496         /* Free flowlabels */
497         fl6_free_socklist(sk);
498
499         /* Free tx options */
500
501         opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
502         if (opt) {
503                 atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
504                 txopt_put(opt);
505         }
506 }
507 EXPORT_SYMBOL_GPL(inet6_destroy_sock);
508
509 /*
510  *      This does both peername and sockname.
511  */
512 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
513                   int peer)
514 {
515         struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
516         struct sock *sk = sock->sk;
517         struct inet_sock *inet = inet_sk(sk);
518         struct ipv6_pinfo *np = inet6_sk(sk);
519
520         sin->sin6_family = AF_INET6;
521         sin->sin6_flowinfo = 0;
522         sin->sin6_scope_id = 0;
523         lock_sock(sk);
524         if (peer) {
525                 if (!inet->inet_dport ||
526                     (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
527                     peer == 1)) {
528                         release_sock(sk);
529                         return -ENOTCONN;
530                 }
531                 sin->sin6_port = inet->inet_dport;
532                 sin->sin6_addr = sk->sk_v6_daddr;
533                 if (np->sndflow)
534                         sin->sin6_flowinfo = np->flow_label;
535                 BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
536                                        CGROUP_INET6_GETPEERNAME);
537         } else {
538                 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
539                         sin->sin6_addr = np->saddr;
540                 else
541                         sin->sin6_addr = sk->sk_v6_rcv_saddr;
542                 sin->sin6_port = inet->inet_sport;
543                 BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
544                                        CGROUP_INET6_GETSOCKNAME);
545         }
546         sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
547                                                  sk->sk_bound_dev_if);
548         release_sock(sk);
549         return sizeof(*sin);
550 }
551 EXPORT_SYMBOL(inet6_getname);
552
553 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
554 {
555         void __user *argp = (void __user *)arg;
556         struct sock *sk = sock->sk;
557         struct net *net = sock_net(sk);
558
559         switch (cmd) {
560         case SIOCADDRT:
561         case SIOCDELRT: {
562                 struct in6_rtmsg rtmsg;
563
564                 if (copy_from_user(&rtmsg, argp, sizeof(rtmsg)))
565                         return -EFAULT;
566                 return ipv6_route_ioctl(net, cmd, &rtmsg);
567         }
568         case SIOCSIFADDR:
569                 return addrconf_add_ifaddr(net, argp);
570         case SIOCDIFADDR:
571                 return addrconf_del_ifaddr(net, argp);
572         case SIOCSIFDSTADDR:
573                 return addrconf_set_dstaddr(net, argp);
574         default:
575                 if (!sk->sk_prot->ioctl)
576                         return -ENOIOCTLCMD;
577                 return sk->sk_prot->ioctl(sk, cmd, arg);
578         }
579         /*NOTREACHED*/
580         return 0;
581 }
582 EXPORT_SYMBOL(inet6_ioctl);
583
584 #ifdef CONFIG_COMPAT
585 struct compat_in6_rtmsg {
586         struct in6_addr         rtmsg_dst;
587         struct in6_addr         rtmsg_src;
588         struct in6_addr         rtmsg_gateway;
589         u32                     rtmsg_type;
590         u16                     rtmsg_dst_len;
591         u16                     rtmsg_src_len;
592         u32                     rtmsg_metric;
593         u32                     rtmsg_info;
594         u32                     rtmsg_flags;
595         s32                     rtmsg_ifindex;
596 };
597
598 static int inet6_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
599                 struct compat_in6_rtmsg __user *ur)
600 {
601         struct in6_rtmsg rt;
602
603         if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst,
604                         3 * sizeof(struct in6_addr)) ||
605             get_user(rt.rtmsg_type, &ur->rtmsg_type) ||
606             get_user(rt.rtmsg_dst_len, &ur->rtmsg_dst_len) ||
607             get_user(rt.rtmsg_src_len, &ur->rtmsg_src_len) ||
608             get_user(rt.rtmsg_metric, &ur->rtmsg_metric) ||
609             get_user(rt.rtmsg_info, &ur->rtmsg_info) ||
610             get_user(rt.rtmsg_flags, &ur->rtmsg_flags) ||
611             get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex))
612                 return -EFAULT;
613
614
615         return ipv6_route_ioctl(sock_net(sk), cmd, &rt);
616 }
617
618 int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
619 {
620         void __user *argp = compat_ptr(arg);
621         struct sock *sk = sock->sk;
622
623         switch (cmd) {
624         case SIOCADDRT:
625         case SIOCDELRT:
626                 return inet6_compat_routing_ioctl(sk, cmd, argp);
627         default:
628                 return -ENOIOCTLCMD;
629         }
630 }
631 EXPORT_SYMBOL_GPL(inet6_compat_ioctl);
632 #endif /* CONFIG_COMPAT */
633
634 INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *,
635                                             size_t));
636 int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
637 {
638         struct sock *sk = sock->sk;
639
640         if (unlikely(inet_send_prepare(sk)))
641                 return -EAGAIN;
642
643         return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
644                                sk, msg, size);
645 }
646
647 INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *,
648                                             size_t, int, int, int *));
649 int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
650                   int flags)
651 {
652         struct sock *sk = sock->sk;
653         int addr_len = 0;
654         int err;
655
656         if (likely(!(flags & MSG_ERRQUEUE)))
657                 sock_rps_record_flow(sk);
658
659         err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
660                               sk, msg, size, flags & MSG_DONTWAIT,
661                               flags & ~MSG_DONTWAIT, &addr_len);
662         if (err >= 0)
663                 msg->msg_namelen = addr_len;
664         return err;
665 }
666
667 const struct proto_ops inet6_stream_ops = {
668         .family            = PF_INET6,
669         .owner             = THIS_MODULE,
670         .release           = inet6_release,
671         .bind              = inet6_bind,
672         .connect           = inet_stream_connect,       /* ok           */
673         .socketpair        = sock_no_socketpair,        /* a do nothing */
674         .accept            = inet_accept,               /* ok           */
675         .getname           = inet6_getname,
676         .poll              = tcp_poll,                  /* ok           */
677         .ioctl             = inet6_ioctl,               /* must change  */
678         .gettstamp         = sock_gettstamp,
679         .listen            = inet_listen,               /* ok           */
680         .shutdown          = inet_shutdown,             /* ok           */
681         .setsockopt        = sock_common_setsockopt,    /* ok           */
682         .getsockopt        = sock_common_getsockopt,    /* ok           */
683         .sendmsg           = inet6_sendmsg,             /* retpoline's sake */
684         .recvmsg           = inet6_recvmsg,             /* retpoline's sake */
685 #ifdef CONFIG_MMU
686         .mmap              = tcp_mmap,
687 #endif
688         .sendpage          = inet_sendpage,
689         .sendmsg_locked    = tcp_sendmsg_locked,
690         .sendpage_locked   = tcp_sendpage_locked,
691         .splice_read       = tcp_splice_read,
692         .read_sock         = tcp_read_sock,
693         .peek_len          = tcp_peek_len,
694 #ifdef CONFIG_COMPAT
695         .compat_ioctl      = inet6_compat_ioctl,
696 #endif
697         .set_rcvlowat      = tcp_set_rcvlowat,
698 };
699
700 const struct proto_ops inet6_dgram_ops = {
701         .family            = PF_INET6,
702         .owner             = THIS_MODULE,
703         .release           = inet6_release,
704         .bind              = inet6_bind,
705         .connect           = inet_dgram_connect,        /* ok           */
706         .socketpair        = sock_no_socketpair,        /* a do nothing */
707         .accept            = sock_no_accept,            /* a do nothing */
708         .getname           = inet6_getname,
709         .poll              = udp_poll,                  /* ok           */
710         .ioctl             = inet6_ioctl,               /* must change  */
711         .gettstamp         = sock_gettstamp,
712         .listen            = sock_no_listen,            /* ok           */
713         .shutdown          = inet_shutdown,             /* ok           */
714         .setsockopt        = sock_common_setsockopt,    /* ok           */
715         .getsockopt        = sock_common_getsockopt,    /* ok           */
716         .sendmsg           = inet6_sendmsg,             /* retpoline's sake */
717         .recvmsg           = inet6_recvmsg,             /* retpoline's sake */
718         .read_sock         = udp_read_sock,
719         .mmap              = sock_no_mmap,
720         .sendpage          = sock_no_sendpage,
721         .set_peek_off      = sk_set_peek_off,
722 #ifdef CONFIG_COMPAT
723         .compat_ioctl      = inet6_compat_ioctl,
724 #endif
725 };
726
727 static const struct net_proto_family inet6_family_ops = {
728         .family = PF_INET6,
729         .create = inet6_create,
730         .owner  = THIS_MODULE,
731 };
732
733 int inet6_register_protosw(struct inet_protosw *p)
734 {
735         struct list_head *lh;
736         struct inet_protosw *answer;
737         struct list_head *last_perm;
738         int protocol = p->protocol;
739         int ret;
740
741         spin_lock_bh(&inetsw6_lock);
742
743         ret = -EINVAL;
744         if (p->type >= SOCK_MAX)
745                 goto out_illegal;
746
747         /* If we are trying to override a permanent protocol, bail. */
748         answer = NULL;
749         ret = -EPERM;
750         last_perm = &inetsw6[p->type];
751         list_for_each(lh, &inetsw6[p->type]) {
752                 answer = list_entry(lh, struct inet_protosw, list);
753
754                 /* Check only the non-wild match. */
755                 if (INET_PROTOSW_PERMANENT & answer->flags) {
756                         if (protocol == answer->protocol)
757                                 break;
758                         last_perm = lh;
759                 }
760
761                 answer = NULL;
762         }
763         if (answer)
764                 goto out_permanent;
765
766         /* Add the new entry after the last permanent entry if any, so that
767          * the new entry does not override a permanent entry when matched with
768          * a wild-card protocol. But it is allowed to override any existing
769          * non-permanent entry.  This means that when we remove this entry, the
770          * system automatically returns to the old behavior.
771          */
772         list_add_rcu(&p->list, last_perm);
773         ret = 0;
774 out:
775         spin_unlock_bh(&inetsw6_lock);
776         return ret;
777
778 out_permanent:
779         pr_err("Attempt to override permanent protocol %d\n", protocol);
780         goto out;
781
782 out_illegal:
783         pr_err("Ignoring attempt to register invalid socket type %d\n",
784                p->type);
785         goto out;
786 }
787 EXPORT_SYMBOL(inet6_register_protosw);
788
789 void
790 inet6_unregister_protosw(struct inet_protosw *p)
791 {
792         if (INET_PROTOSW_PERMANENT & p->flags) {
793                 pr_err("Attempt to unregister permanent protocol %d\n",
794                        p->protocol);
795         } else {
796                 spin_lock_bh(&inetsw6_lock);
797                 list_del_rcu(&p->list);
798                 spin_unlock_bh(&inetsw6_lock);
799
800                 synchronize_net();
801         }
802 }
803 EXPORT_SYMBOL(inet6_unregister_protosw);
804
805 int inet6_sk_rebuild_header(struct sock *sk)
806 {
807         struct ipv6_pinfo *np = inet6_sk(sk);
808         struct dst_entry *dst;
809
810         dst = __sk_dst_check(sk, np->dst_cookie);
811
812         if (!dst) {
813                 struct inet_sock *inet = inet_sk(sk);
814                 struct in6_addr *final_p, final;
815                 struct flowi6 fl6;
816
817                 memset(&fl6, 0, sizeof(fl6));
818                 fl6.flowi6_proto = sk->sk_protocol;
819                 fl6.daddr = sk->sk_v6_daddr;
820                 fl6.saddr = np->saddr;
821                 fl6.flowlabel = np->flow_label;
822                 fl6.flowi6_oif = sk->sk_bound_dev_if;
823                 fl6.flowi6_mark = sk->sk_mark;
824                 fl6.fl6_dport = inet->inet_dport;
825                 fl6.fl6_sport = inet->inet_sport;
826                 fl6.flowi6_uid = sk->sk_uid;
827                 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
828
829                 rcu_read_lock();
830                 final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
831                                          &final);
832                 rcu_read_unlock();
833
834                 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
835                 if (IS_ERR(dst)) {
836                         sk->sk_route_caps = 0;
837                         sk->sk_err_soft = -PTR_ERR(dst);
838                         return PTR_ERR(dst);
839                 }
840
841                 ip6_dst_store(sk, dst, NULL, NULL);
842         }
843
844         return 0;
845 }
846 EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
847
848 bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
849                        const struct inet6_skb_parm *opt)
850 {
851         const struct ipv6_pinfo *np = inet6_sk(sk);
852
853         if (np->rxopt.all) {
854                 if (((opt->flags & IP6SKB_HOPBYHOP) &&
855                      (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
856                     (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) &&
857                      np->rxopt.bits.rxflow) ||
858                     (opt->srcrt && (np->rxopt.bits.srcrt ||
859                      np->rxopt.bits.osrcrt)) ||
860                     ((opt->dst1 || opt->dst0) &&
861                      (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
862                         return true;
863         }
864         return false;
865 }
866 EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
867
868 static struct packet_type ipv6_packet_type __read_mostly = {
869         .type = cpu_to_be16(ETH_P_IPV6),
870         .func = ipv6_rcv,
871         .list_func = ipv6_list_rcv,
872 };
873
874 static int __init ipv6_packet_init(void)
875 {
876         dev_add_pack(&ipv6_packet_type);
877         return 0;
878 }
879
880 static void ipv6_packet_cleanup(void)
881 {
882         dev_remove_pack(&ipv6_packet_type);
883 }
884
885 static int __net_init ipv6_init_mibs(struct net *net)
886 {
887         int i;
888
889         net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib);
890         if (!net->mib.udp_stats_in6)
891                 return -ENOMEM;
892         net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib);
893         if (!net->mib.udplite_stats_in6)
894                 goto err_udplite_mib;
895         net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib);
896         if (!net->mib.ipv6_statistics)
897                 goto err_ip_mib;
898
899         for_each_possible_cpu(i) {
900                 struct ipstats_mib *af_inet6_stats;
901                 af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i);
902                 u64_stats_init(&af_inet6_stats->syncp);
903         }
904
905
906         net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib);
907         if (!net->mib.icmpv6_statistics)
908                 goto err_icmp_mib;
909         net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),
910                                                 GFP_KERNEL);
911         if (!net->mib.icmpv6msg_statistics)
912                 goto err_icmpmsg_mib;
913         return 0;
914
915 err_icmpmsg_mib:
916         free_percpu(net->mib.icmpv6_statistics);
917 err_icmp_mib:
918         free_percpu(net->mib.ipv6_statistics);
919 err_ip_mib:
920         free_percpu(net->mib.udplite_stats_in6);
921 err_udplite_mib:
922         free_percpu(net->mib.udp_stats_in6);
923         return -ENOMEM;
924 }
925
926 static void ipv6_cleanup_mibs(struct net *net)
927 {
928         free_percpu(net->mib.udp_stats_in6);
929         free_percpu(net->mib.udplite_stats_in6);
930         free_percpu(net->mib.ipv6_statistics);
931         free_percpu(net->mib.icmpv6_statistics);
932         kfree(net->mib.icmpv6msg_statistics);
933 }
934
935 static int __net_init inet6_net_init(struct net *net)
936 {
937         int err = 0;
938
939         net->ipv6.sysctl.bindv6only = 0;
940         net->ipv6.sysctl.icmpv6_time = 1*HZ;
941         net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
942         net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
943         net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;
944
945         /* By default, rate limit error messages.
946          * Except for pmtu discovery, it would break it.
947          * proc_do_large_bitmap needs pointer to the bitmap.
948          */
949         bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1);
950         bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1);
951         net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask;
952
953         net->ipv6.sysctl.flowlabel_consistency = 1;
954         net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
955         net->ipv6.sysctl.idgen_retries = 3;
956         net->ipv6.sysctl.idgen_delay = 1 * HZ;
957         net->ipv6.sysctl.flowlabel_state_ranges = 0;
958         net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT;
959         net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
960         net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
961         net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
962         net->ipv6.sysctl.fib_notify_on_flag_change = 0;
963         atomic_set(&net->ipv6.fib6_sernum, 1);
964
965         net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID;
966         net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE;
967
968         err = ipv6_init_mibs(net);
969         if (err)
970                 return err;
971 #ifdef CONFIG_PROC_FS
972         err = udp6_proc_init(net);
973         if (err)
974                 goto out;
975         err = tcp6_proc_init(net);
976         if (err)
977                 goto proc_tcp6_fail;
978         err = ac6_proc_init(net);
979         if (err)
980                 goto proc_ac6_fail;
981 #endif
982         return err;
983
984 #ifdef CONFIG_PROC_FS
985 proc_ac6_fail:
986         tcp6_proc_exit(net);
987 proc_tcp6_fail:
988         udp6_proc_exit(net);
989 out:
990         ipv6_cleanup_mibs(net);
991         return err;
992 #endif
993 }
994
995 static void __net_exit inet6_net_exit(struct net *net)
996 {
997 #ifdef CONFIG_PROC_FS
998         udp6_proc_exit(net);
999         tcp6_proc_exit(net);
1000         ac6_proc_exit(net);
1001 #endif
1002         ipv6_cleanup_mibs(net);
1003 }
1004
1005 static struct pernet_operations inet6_net_ops = {
1006         .init = inet6_net_init,
1007         .exit = inet6_net_exit,
1008 };
1009
1010 static int ipv6_route_input(struct sk_buff *skb)
1011 {
1012         ip6_route_input(skb);
1013         return skb_dst(skb)->error;
1014 }
1015
1016 static const struct ipv6_stub ipv6_stub_impl = {
1017         .ipv6_sock_mc_join = ipv6_sock_mc_join,
1018         .ipv6_sock_mc_drop = ipv6_sock_mc_drop,
1019         .ipv6_dst_lookup_flow = ip6_dst_lookup_flow,
1020         .ipv6_route_input  = ipv6_route_input,
1021         .fib6_get_table    = fib6_get_table,
1022         .fib6_table_lookup = fib6_table_lookup,
1023         .fib6_lookup       = fib6_lookup,
1024         .fib6_select_path  = fib6_select_path,
1025         .ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
1026         .fib6_nh_init      = fib6_nh_init,
1027         .fib6_nh_release   = fib6_nh_release,
1028         .fib6_nh_release_dsts = fib6_nh_release_dsts,
1029         .fib6_update_sernum = fib6_update_sernum_stub,
1030         .fib6_rt_update    = fib6_rt_update,
1031         .ip6_del_rt        = ip6_del_rt,
1032         .udpv6_encap_enable = udpv6_encap_enable,
1033         .ndisc_send_na = ndisc_send_na,
1034 #if IS_ENABLED(CONFIG_XFRM)
1035         .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
1036         .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
1037         .xfrm6_rcv_encap = xfrm6_rcv_encap,
1038 #endif
1039         .nd_tbl = &nd_tbl,
1040         .ipv6_fragment = ip6_fragment,
1041         .ipv6_dev_find = ipv6_dev_find,
1042 };
1043
1044 static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
1045         .inet6_bind = __inet6_bind,
1046         .udp6_lib_lookup = __udp6_lib_lookup,
1047 };
1048
1049 static int __init inet6_init(void)
1050 {
1051         struct list_head *r;
1052         int err = 0;
1053
1054         sock_skb_cb_check_size(sizeof(struct inet6_skb_parm));
1055
1056         /* Register the socket-side information for inet6_create.  */
1057         for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
1058                 INIT_LIST_HEAD(r);
1059
1060         if (disable_ipv6_mod) {
1061                 pr_info("Loaded, but administratively disabled, reboot required to enable\n");
1062                 goto out;
1063         }
1064
1065         err = proto_register(&tcpv6_prot, 1);
1066         if (err)
1067                 goto out;
1068
1069         err = proto_register(&udpv6_prot, 1);
1070         if (err)
1071                 goto out_unregister_tcp_proto;
1072
1073         err = proto_register(&udplitev6_prot, 1);
1074         if (err)
1075                 goto out_unregister_udp_proto;
1076
1077         err = proto_register(&rawv6_prot, 1);
1078         if (err)
1079                 goto out_unregister_udplite_proto;
1080
1081         err = proto_register(&pingv6_prot, 1);
1082         if (err)
1083                 goto out_unregister_raw_proto;
1084
1085         /* We MUST register RAW sockets before we create the ICMP6,
1086          * IGMP6, or NDISC control sockets.
1087          */
1088         err = rawv6_init();
1089         if (err)
1090                 goto out_unregister_ping_proto;
1091
1092         /* Register the family here so that the init calls below will
1093          * be able to create sockets. (?? is this dangerous ??)
1094          */
1095         err = sock_register(&inet6_family_ops);
1096         if (err)
1097                 goto out_sock_register_fail;
1098
1099         /*
1100          *      ipngwg API draft makes clear that the correct semantics
1101          *      for TCP and UDP is to consider one TCP and UDP instance
1102          *      in a host available by both INET and INET6 APIs and
1103          *      able to communicate via both network protocols.
1104          */
1105
1106         err = register_pernet_subsys(&inet6_net_ops);
1107         if (err)
1108                 goto register_pernet_fail;
1109         err = ip6_mr_init();
1110         if (err)
1111                 goto ipmr_fail;
1112         err = icmpv6_init();
1113         if (err)
1114                 goto icmp_fail;
1115         err = ndisc_init();
1116         if (err)
1117                 goto ndisc_fail;
1118         err = igmp6_init();
1119         if (err)
1120                 goto igmp_fail;
1121
1122         err = ipv6_netfilter_init();
1123         if (err)
1124                 goto netfilter_fail;
1125         /* Create /proc/foo6 entries. */
1126 #ifdef CONFIG_PROC_FS
1127         err = -ENOMEM;
1128         if (raw6_proc_init())
1129                 goto proc_raw6_fail;
1130         if (udplite6_proc_init())
1131                 goto proc_udplite6_fail;
1132         if (ipv6_misc_proc_init())
1133                 goto proc_misc6_fail;
1134         if (if6_proc_init())
1135                 goto proc_if6_fail;
1136 #endif
1137         err = ip6_route_init();
1138         if (err)
1139                 goto ip6_route_fail;
1140         err = ndisc_late_init();
1141         if (err)
1142                 goto ndisc_late_fail;
1143         err = ip6_flowlabel_init();
1144         if (err)
1145                 goto ip6_flowlabel_fail;
1146         err = ipv6_anycast_init();
1147         if (err)
1148                 goto ipv6_anycast_fail;
1149         err = addrconf_init();
1150         if (err)
1151                 goto addrconf_fail;
1152
1153         /* Init v6 extension headers. */
1154         err = ipv6_exthdrs_init();
1155         if (err)
1156                 goto ipv6_exthdrs_fail;
1157
1158         err = ipv6_frag_init();
1159         if (err)
1160                 goto ipv6_frag_fail;
1161
1162         /* Init v6 transport protocols. */
1163         err = udpv6_init();
1164         if (err)
1165                 goto udpv6_fail;
1166
1167         err = udplitev6_init();
1168         if (err)
1169                 goto udplitev6_fail;
1170
1171         err = udpv6_offload_init();
1172         if (err)
1173                 goto udpv6_offload_fail;
1174
1175         err = tcpv6_init();
1176         if (err)
1177                 goto tcpv6_fail;
1178
1179         err = ipv6_packet_init();
1180         if (err)
1181                 goto ipv6_packet_fail;
1182
1183         err = pingv6_init();
1184         if (err)
1185                 goto pingv6_fail;
1186
1187         err = calipso_init();
1188         if (err)
1189                 goto calipso_fail;
1190
1191         err = seg6_init();
1192         if (err)
1193                 goto seg6_fail;
1194
1195         err = rpl_init();
1196         if (err)
1197                 goto rpl_fail;
1198
1199         err = ioam6_init();
1200         if (err)
1201                 goto ioam6_fail;
1202
1203         err = igmp6_late_init();
1204         if (err)
1205                 goto igmp6_late_err;
1206
1207 #ifdef CONFIG_SYSCTL
1208         err = ipv6_sysctl_register();
1209         if (err)
1210                 goto sysctl_fail;
1211 #endif
1212
1213         /* ensure that ipv6 stubs are visible only after ipv6 is ready */
1214         wmb();
1215         ipv6_stub = &ipv6_stub_impl;
1216         ipv6_bpf_stub = &ipv6_bpf_stub_impl;
1217 out:
1218         return err;
1219
1220 #ifdef CONFIG_SYSCTL
1221 sysctl_fail:
1222         igmp6_late_cleanup();
1223 #endif
1224 igmp6_late_err:
1225         ioam6_exit();
1226 ioam6_fail:
1227         rpl_exit();
1228 rpl_fail:
1229         seg6_exit();
1230 seg6_fail:
1231         calipso_exit();
1232 calipso_fail:
1233         pingv6_exit();
1234 pingv6_fail:
1235         ipv6_packet_cleanup();
1236 ipv6_packet_fail:
1237         tcpv6_exit();
1238 tcpv6_fail:
1239         udpv6_offload_exit();
1240 udpv6_offload_fail:
1241         udplitev6_exit();
1242 udplitev6_fail:
1243         udpv6_exit();
1244 udpv6_fail:
1245         ipv6_frag_exit();
1246 ipv6_frag_fail:
1247         ipv6_exthdrs_exit();
1248 ipv6_exthdrs_fail:
1249         addrconf_cleanup();
1250 addrconf_fail:
1251         ipv6_anycast_cleanup();
1252 ipv6_anycast_fail:
1253         ip6_flowlabel_cleanup();
1254 ip6_flowlabel_fail:
1255         ndisc_late_cleanup();
1256 ndisc_late_fail:
1257         ip6_route_cleanup();
1258 ip6_route_fail:
1259 #ifdef CONFIG_PROC_FS
1260         if6_proc_exit();
1261 proc_if6_fail:
1262         ipv6_misc_proc_exit();
1263 proc_misc6_fail:
1264         udplite6_proc_exit();
1265 proc_udplite6_fail:
1266         raw6_proc_exit();
1267 proc_raw6_fail:
1268 #endif
1269         ipv6_netfilter_fini();
1270 netfilter_fail:
1271         igmp6_cleanup();
1272 igmp_fail:
1273         ndisc_cleanup();
1274 ndisc_fail:
1275         icmpv6_cleanup();
1276 icmp_fail:
1277         ip6_mr_cleanup();
1278 ipmr_fail:
1279         unregister_pernet_subsys(&inet6_net_ops);
1280 register_pernet_fail:
1281         sock_unregister(PF_INET6);
1282         rtnl_unregister_all(PF_INET6);
1283 out_sock_register_fail:
1284         rawv6_exit();
1285 out_unregister_ping_proto:
1286         proto_unregister(&pingv6_prot);
1287 out_unregister_raw_proto:
1288         proto_unregister(&rawv6_prot);
1289 out_unregister_udplite_proto:
1290         proto_unregister(&udplitev6_prot);
1291 out_unregister_udp_proto:
1292         proto_unregister(&udpv6_prot);
1293 out_unregister_tcp_proto:
1294         proto_unregister(&tcpv6_prot);
1295         goto out;
1296 }
1297 module_init(inet6_init);
1298
1299 MODULE_ALIAS_NETPROTO(PF_INET6);