Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
[linux-2.6-microblaze.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 sk->sk_rx_dst = dst;
111                 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112                 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152         struct tcp_sock *tp = tcp_sk(sk);
153         struct in6_addr *saddr = NULL, *final_p, final;
154         struct ipv6_txoptions *opt;
155         struct flowi6 fl6;
156         struct dst_entry *dst;
157         int addr_type;
158         int err;
159         struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (__ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
282
283         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         if (!saddr) {
290                 saddr = &fl6.saddr;
291                 sk->sk_v6_rcv_saddr = *saddr;
292         }
293
294         /* set the source address */
295         np->saddr = *saddr;
296         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297
298         sk->sk_gso_type = SKB_GSO_TCPV6;
299         ip6_dst_store(sk, dst, NULL, NULL);
300
301         icsk->icsk_ext_hdr_len = 0;
302         if (opt)
303                 icsk->icsk_ext_hdr_len = opt->opt_flen +
304                                          opt->opt_nflen;
305
306         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307
308         inet->inet_dport = usin->sin6_port;
309
310         tcp_set_state(sk, TCP_SYN_SENT);
311         err = inet6_hash_connect(tcp_death_row, sk);
312         if (err)
313                 goto late_failure;
314
315         sk_set_txhash(sk);
316
317         if (likely(!tp->repair)) {
318                 if (!tp->write_seq)
319                         WRITE_ONCE(tp->write_seq,
320                                    secure_tcpv6_seq(np->saddr.s6_addr32,
321                                                     sk->sk_v6_daddr.s6_addr32,
322                                                     inet->inet_sport,
323                                                     inet->inet_dport));
324                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
325                                                    np->saddr.s6_addr32,
326                                                    sk->sk_v6_daddr.s6_addr32);
327         }
328
329         if (tcp_fastopen_defer_connect(sk, &err))
330                 return err;
331         if (err)
332                 goto late_failure;
333
334         err = tcp_connect(sk);
335         if (err)
336                 goto late_failure;
337
338         return 0;
339
340 late_failure:
341         tcp_set_state(sk, TCP_CLOSE);
342 failure:
343         inet->inet_dport = 0;
344         sk->sk_route_caps = 0;
345         return err;
346 }
347
348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350         struct dst_entry *dst;
351
352         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
353                 return;
354
355         dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
356         if (!dst)
357                 return;
358
359         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
360                 tcp_sync_mss(sk, dst_mtu(dst));
361                 tcp_simple_retransmit(sk);
362         }
363 }
364
365 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
366                 u8 type, u8 code, int offset, __be32 info)
367 {
368         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
369         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
370         struct net *net = dev_net(skb->dev);
371         struct request_sock *fastopen;
372         struct ipv6_pinfo *np;
373         struct tcp_sock *tp;
374         __u32 seq, snd_una;
375         struct sock *sk;
376         bool fatal;
377         int err;
378
379         sk = __inet6_lookup_established(net, &tcp_hashinfo,
380                                         &hdr->daddr, th->dest,
381                                         &hdr->saddr, ntohs(th->source),
382                                         skb->dev->ifindex, inet6_sdif(skb));
383
384         if (!sk) {
385                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
386                                   ICMP6_MIB_INERRORS);
387                 return -ENOENT;
388         }
389
390         if (sk->sk_state == TCP_TIME_WAIT) {
391                 inet_twsk_put(inet_twsk(sk));
392                 return 0;
393         }
394         seq = ntohl(th->seq);
395         fatal = icmpv6_err_convert(type, code, &err);
396         if (sk->sk_state == TCP_NEW_SYN_RECV) {
397                 tcp_req_err(sk, seq, fatal);
398                 return 0;
399         }
400
401         bh_lock_sock(sk);
402         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
403                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
404
405         if (sk->sk_state == TCP_CLOSE)
406                 goto out;
407
408         if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
409                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
410                 goto out;
411         }
412
413         tp = tcp_sk(sk);
414         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
415         fastopen = rcu_dereference(tp->fastopen_rsk);
416         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
417         if (sk->sk_state != TCP_LISTEN &&
418             !between(seq, snd_una, tp->snd_nxt)) {
419                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
420                 goto out;
421         }
422
423         np = tcp_inet6_sk(sk);
424
425         if (type == NDISC_REDIRECT) {
426                 if (!sock_owned_by_user(sk)) {
427                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
428
429                         if (dst)
430                                 dst->ops->redirect(dst, sk, skb);
431                 }
432                 goto out;
433         }
434
435         if (type == ICMPV6_PKT_TOOBIG) {
436                 /* We are not interested in TCP_LISTEN and open_requests
437                  * (SYN-ACKs send out by Linux are always <576bytes so
438                  * they should go through unfragmented).
439                  */
440                 if (sk->sk_state == TCP_LISTEN)
441                         goto out;
442
443                 if (!ip6_sk_accept_pmtu(sk))
444                         goto out;
445
446                 tp->mtu_info = ntohl(info);
447                 if (!sock_owned_by_user(sk))
448                         tcp_v6_mtu_reduced(sk);
449                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
450                                            &sk->sk_tsq_flags))
451                         sock_hold(sk);
452                 goto out;
453         }
454
455
456         /* Might be for an request_sock */
457         switch (sk->sk_state) {
458         case TCP_SYN_SENT:
459         case TCP_SYN_RECV:
460                 /* Only in fast or simultaneous open. If a fast open socket is
461                  * is already accepted it is treated as a connected one below.
462                  */
463                 if (fastopen && !fastopen->sk)
464                         break;
465
466                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
467
468                 if (!sock_owned_by_user(sk)) {
469                         sk->sk_err = err;
470                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
471
472                         tcp_done(sk);
473                 } else
474                         sk->sk_err_soft = err;
475                 goto out;
476         case TCP_LISTEN:
477                 break;
478         default:
479                 /* check if this ICMP message allows revert of backoff.
480                  * (see RFC 6069)
481                  */
482                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
483                     code == ICMPV6_NOROUTE)
484                         tcp_ld_RTO_revert(sk, seq);
485         }
486
487         if (!sock_owned_by_user(sk) && np->recverr) {
488                 sk->sk_err = err;
489                 sk->sk_error_report(sk);
490         } else
491                 sk->sk_err_soft = err;
492
493 out:
494         bh_unlock_sock(sk);
495         sock_put(sk);
496         return 0;
497 }
498
499
500 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
501                               struct flowi *fl,
502                               struct request_sock *req,
503                               struct tcp_fastopen_cookie *foc,
504                               enum tcp_synack_type synack_type,
505                               struct sk_buff *syn_skb)
506 {
507         struct inet_request_sock *ireq = inet_rsk(req);
508         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
509         struct ipv6_txoptions *opt;
510         struct flowi6 *fl6 = &fl->u.ip6;
511         struct sk_buff *skb;
512         int err = -ENOMEM;
513
514         /* First, grab a route. */
515         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
516                                                IPPROTO_TCP)) == NULL)
517                 goto done;
518
519         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
520
521         if (skb) {
522                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
523                                     &ireq->ir_v6_rmt_addr);
524
525                 fl6->daddr = ireq->ir_v6_rmt_addr;
526                 if (np->repflow && ireq->pktopts)
527                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
528
529                 rcu_read_lock();
530                 opt = ireq->ipv6_opt;
531                 if (!opt)
532                         opt = rcu_dereference(np->opt);
533                 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
534                                sk->sk_priority);
535                 rcu_read_unlock();
536                 err = net_xmit_eval(err);
537         }
538
539 done:
540         return err;
541 }
542
543
544 static void tcp_v6_reqsk_destructor(struct request_sock *req)
545 {
546         kfree(inet_rsk(req)->ipv6_opt);
547         kfree_skb(inet_rsk(req)->pktopts);
548 }
549
550 #ifdef CONFIG_TCP_MD5SIG
551 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
552                                                    const struct in6_addr *addr,
553                                                    int l3index)
554 {
555         return tcp_md5_do_lookup(sk, l3index,
556                                  (union tcp_md5_addr *)addr, AF_INET6);
557 }
558
559 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
560                                                 const struct sock *addr_sk)
561 {
562         int l3index;
563
564         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
565                                                  addr_sk->sk_bound_dev_if);
566         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
567                                     l3index);
568 }
569
570 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
571                                  sockptr_t optval, int optlen)
572 {
573         struct tcp_md5sig cmd;
574         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
575         int l3index = 0;
576         u8 prefixlen;
577
578         if (optlen < sizeof(cmd))
579                 return -EINVAL;
580
581         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
582                 return -EFAULT;
583
584         if (sin6->sin6_family != AF_INET6)
585                 return -EINVAL;
586
587         if (optname == TCP_MD5SIG_EXT &&
588             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
589                 prefixlen = cmd.tcpm_prefixlen;
590                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
591                                         prefixlen > 32))
592                         return -EINVAL;
593         } else {
594                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
595         }
596
597         if (optname == TCP_MD5SIG_EXT &&
598             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
599                 struct net_device *dev;
600
601                 rcu_read_lock();
602                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
603                 if (dev && netif_is_l3_master(dev))
604                         l3index = dev->ifindex;
605                 rcu_read_unlock();
606
607                 /* ok to reference set/not set outside of rcu;
608                  * right now device MUST be an L3 master
609                  */
610                 if (!dev || !l3index)
611                         return -EINVAL;
612         }
613
614         if (!cmd.tcpm_keylen) {
615                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
616                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
617                                               AF_INET, prefixlen,
618                                               l3index);
619                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
620                                       AF_INET6, prefixlen, l3index);
621         }
622
623         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
624                 return -EINVAL;
625
626         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
627                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
628                                       AF_INET, prefixlen, l3index,
629                                       cmd.tcpm_key, cmd.tcpm_keylen,
630                                       GFP_KERNEL);
631
632         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
633                               AF_INET6, prefixlen, l3index,
634                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
635 }
636
637 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
638                                    const struct in6_addr *daddr,
639                                    const struct in6_addr *saddr,
640                                    const struct tcphdr *th, int nbytes)
641 {
642         struct tcp6_pseudohdr *bp;
643         struct scatterlist sg;
644         struct tcphdr *_th;
645
646         bp = hp->scratch;
647         /* 1. TCP pseudo-header (RFC2460) */
648         bp->saddr = *saddr;
649         bp->daddr = *daddr;
650         bp->protocol = cpu_to_be32(IPPROTO_TCP);
651         bp->len = cpu_to_be32(nbytes);
652
653         _th = (struct tcphdr *)(bp + 1);
654         memcpy(_th, th, sizeof(*th));
655         _th->check = 0;
656
657         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
658         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
659                                 sizeof(*bp) + sizeof(*th));
660         return crypto_ahash_update(hp->md5_req);
661 }
662
663 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
664                                const struct in6_addr *daddr, struct in6_addr *saddr,
665                                const struct tcphdr *th)
666 {
667         struct tcp_md5sig_pool *hp;
668         struct ahash_request *req;
669
670         hp = tcp_get_md5sig_pool();
671         if (!hp)
672                 goto clear_hash_noput;
673         req = hp->md5_req;
674
675         if (crypto_ahash_init(req))
676                 goto clear_hash;
677         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
678                 goto clear_hash;
679         if (tcp_md5_hash_key(hp, key))
680                 goto clear_hash;
681         ahash_request_set_crypt(req, NULL, md5_hash, 0);
682         if (crypto_ahash_final(req))
683                 goto clear_hash;
684
685         tcp_put_md5sig_pool();
686         return 0;
687
688 clear_hash:
689         tcp_put_md5sig_pool();
690 clear_hash_noput:
691         memset(md5_hash, 0, 16);
692         return 1;
693 }
694
695 static int tcp_v6_md5_hash_skb(char *md5_hash,
696                                const struct tcp_md5sig_key *key,
697                                const struct sock *sk,
698                                const struct sk_buff *skb)
699 {
700         const struct in6_addr *saddr, *daddr;
701         struct tcp_md5sig_pool *hp;
702         struct ahash_request *req;
703         const struct tcphdr *th = tcp_hdr(skb);
704
705         if (sk) { /* valid for establish/request sockets */
706                 saddr = &sk->sk_v6_rcv_saddr;
707                 daddr = &sk->sk_v6_daddr;
708         } else {
709                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
710                 saddr = &ip6h->saddr;
711                 daddr = &ip6h->daddr;
712         }
713
714         hp = tcp_get_md5sig_pool();
715         if (!hp)
716                 goto clear_hash_noput;
717         req = hp->md5_req;
718
719         if (crypto_ahash_init(req))
720                 goto clear_hash;
721
722         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
723                 goto clear_hash;
724         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
725                 goto clear_hash;
726         if (tcp_md5_hash_key(hp, key))
727                 goto clear_hash;
728         ahash_request_set_crypt(req, NULL, md5_hash, 0);
729         if (crypto_ahash_final(req))
730                 goto clear_hash;
731
732         tcp_put_md5sig_pool();
733         return 0;
734
735 clear_hash:
736         tcp_put_md5sig_pool();
737 clear_hash_noput:
738         memset(md5_hash, 0, 16);
739         return 1;
740 }
741
742 #endif
743
744 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
745                                     const struct sk_buff *skb,
746                                     int dif, int sdif)
747 {
748 #ifdef CONFIG_TCP_MD5SIG
749         const __u8 *hash_location = NULL;
750         struct tcp_md5sig_key *hash_expected;
751         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
752         const struct tcphdr *th = tcp_hdr(skb);
753         int genhash, l3index;
754         u8 newhash[16];
755
756         /* sdif set, means packet ingressed via a device
757          * in an L3 domain and dif is set to the l3mdev
758          */
759         l3index = sdif ? dif : 0;
760
761         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
762         hash_location = tcp_parse_md5sig_option(th);
763
764         /* We've parsed the options - do we have a hash? */
765         if (!hash_expected && !hash_location)
766                 return false;
767
768         if (hash_expected && !hash_location) {
769                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
770                 return true;
771         }
772
773         if (!hash_expected && hash_location) {
774                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
775                 return true;
776         }
777
778         /* check the signature */
779         genhash = tcp_v6_md5_hash_skb(newhash,
780                                       hash_expected,
781                                       NULL, skb);
782
783         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
784                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
785                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
786                                      genhash ? "failed" : "mismatch",
787                                      &ip6h->saddr, ntohs(th->source),
788                                      &ip6h->daddr, ntohs(th->dest), l3index);
789                 return true;
790         }
791 #endif
792         return false;
793 }
794
795 static void tcp_v6_init_req(struct request_sock *req,
796                             const struct sock *sk_listener,
797                             struct sk_buff *skb)
798 {
799         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
800         struct inet_request_sock *ireq = inet_rsk(req);
801         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
802
803         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
804         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
805
806         /* So that link locals have meaning */
807         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
808             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
809                 ireq->ir_iif = tcp_v6_iif(skb);
810
811         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
812             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
813              np->rxopt.bits.rxinfo ||
814              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
815              np->rxopt.bits.rxohlim || np->repflow)) {
816                 refcount_inc(&skb->users);
817                 ireq->pktopts = skb;
818         }
819 }
820
821 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
822                                           struct flowi *fl,
823                                           const struct request_sock *req)
824 {
825         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
826 }
827
828 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
829         .family         =       AF_INET6,
830         .obj_size       =       sizeof(struct tcp6_request_sock),
831         .rtx_syn_ack    =       tcp_rtx_synack,
832         .send_ack       =       tcp_v6_reqsk_send_ack,
833         .destructor     =       tcp_v6_reqsk_destructor,
834         .send_reset     =       tcp_v6_send_reset,
835         .syn_ack_timeout =      tcp_syn_ack_timeout,
836 };
837
838 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
839         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
840                                 sizeof(struct ipv6hdr),
841 #ifdef CONFIG_TCP_MD5SIG
842         .req_md5_lookup =       tcp_v6_md5_lookup,
843         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
844 #endif
845         .init_req       =       tcp_v6_init_req,
846 #ifdef CONFIG_SYN_COOKIES
847         .cookie_init_seq =      cookie_v6_init_sequence,
848 #endif
849         .route_req      =       tcp_v6_route_req,
850         .init_seq       =       tcp_v6_init_seq,
851         .init_ts_off    =       tcp_v6_init_ts_off,
852         .send_synack    =       tcp_v6_send_synack,
853 };
854
855 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
856                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
857                                  int oif, struct tcp_md5sig_key *key, int rst,
858                                  u8 tclass, __be32 label, u32 priority)
859 {
860         const struct tcphdr *th = tcp_hdr(skb);
861         struct tcphdr *t1;
862         struct sk_buff *buff;
863         struct flowi6 fl6;
864         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
865         struct sock *ctl_sk = net->ipv6.tcp_sk;
866         unsigned int tot_len = sizeof(struct tcphdr);
867         struct dst_entry *dst;
868         __be32 *topt;
869         __u32 mark = 0;
870
871         if (tsecr)
872                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
873 #ifdef CONFIG_TCP_MD5SIG
874         if (key)
875                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
876 #endif
877
878         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
879                          GFP_ATOMIC);
880         if (!buff)
881                 return;
882
883         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
884
885         t1 = skb_push(buff, tot_len);
886         skb_reset_transport_header(buff);
887
888         /* Swap the send and the receive. */
889         memset(t1, 0, sizeof(*t1));
890         t1->dest = th->source;
891         t1->source = th->dest;
892         t1->doff = tot_len / 4;
893         t1->seq = htonl(seq);
894         t1->ack_seq = htonl(ack);
895         t1->ack = !rst || !th->ack;
896         t1->rst = rst;
897         t1->window = htons(win);
898
899         topt = (__be32 *)(t1 + 1);
900
901         if (tsecr) {
902                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
903                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
904                 *topt++ = htonl(tsval);
905                 *topt++ = htonl(tsecr);
906         }
907
908 #ifdef CONFIG_TCP_MD5SIG
909         if (key) {
910                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
911                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
912                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
913                                     &ipv6_hdr(skb)->saddr,
914                                     &ipv6_hdr(skb)->daddr, t1);
915         }
916 #endif
917
918         memset(&fl6, 0, sizeof(fl6));
919         fl6.daddr = ipv6_hdr(skb)->saddr;
920         fl6.saddr = ipv6_hdr(skb)->daddr;
921         fl6.flowlabel = label;
922
923         buff->ip_summed = CHECKSUM_PARTIAL;
924         buff->csum = 0;
925
926         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
927
928         fl6.flowi6_proto = IPPROTO_TCP;
929         if (rt6_need_strict(&fl6.daddr) && !oif)
930                 fl6.flowi6_oif = tcp_v6_iif(skb);
931         else {
932                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
933                         oif = skb->skb_iif;
934
935                 fl6.flowi6_oif = oif;
936         }
937
938         if (sk) {
939                 if (sk->sk_state == TCP_TIME_WAIT) {
940                         mark = inet_twsk(sk)->tw_mark;
941                         /* autoflowlabel relies on buff->hash */
942                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
943                                      PKT_HASH_TYPE_L4);
944                 } else {
945                         mark = sk->sk_mark;
946                 }
947                 buff->tstamp = tcp_transmit_time(sk);
948         }
949         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
950         fl6.fl6_dport = t1->dest;
951         fl6.fl6_sport = t1->source;
952         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
953         security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
954
955         /* Pass a socket to ip6_dst_lookup either it is for RST
956          * Underlying function will use this to retrieve the network
957          * namespace
958          */
959         dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
960         if (!IS_ERR(dst)) {
961                 skb_dst_set(buff, dst);
962                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
963                          priority);
964                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
965                 if (rst)
966                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
967                 return;
968         }
969
970         kfree_skb(buff);
971 }
972
973 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
974 {
975         const struct tcphdr *th = tcp_hdr(skb);
976         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
977         u32 seq = 0, ack_seq = 0;
978         struct tcp_md5sig_key *key = NULL;
979 #ifdef CONFIG_TCP_MD5SIG
980         const __u8 *hash_location = NULL;
981         unsigned char newhash[16];
982         int genhash;
983         struct sock *sk1 = NULL;
984 #endif
985         __be32 label = 0;
986         u32 priority = 0;
987         struct net *net;
988         int oif = 0;
989
990         if (th->rst)
991                 return;
992
993         /* If sk not NULL, it means we did a successful lookup and incoming
994          * route had to be correct. prequeue might have dropped our dst.
995          */
996         if (!sk && !ipv6_unicast_destination(skb))
997                 return;
998
999         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1000 #ifdef CONFIG_TCP_MD5SIG
1001         rcu_read_lock();
1002         hash_location = tcp_parse_md5sig_option(th);
1003         if (sk && sk_fullsock(sk)) {
1004                 int l3index;
1005
1006                 /* sdif set, means packet ingressed via a device
1007                  * in an L3 domain and inet_iif is set to it.
1008                  */
1009                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1010                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1011         } else if (hash_location) {
1012                 int dif = tcp_v6_iif_l3_slave(skb);
1013                 int sdif = tcp_v6_sdif(skb);
1014                 int l3index;
1015
1016                 /*
1017                  * active side is lost. Try to find listening socket through
1018                  * source port, and then find md5 key through listening socket.
1019                  * we are not loose security here:
1020                  * Incoming packet is checked with md5 hash with finding key,
1021                  * no RST generated if md5 hash doesn't match.
1022                  */
1023                 sk1 = inet6_lookup_listener(net,
1024                                            &tcp_hashinfo, NULL, 0,
1025                                            &ipv6h->saddr,
1026                                            th->source, &ipv6h->daddr,
1027                                            ntohs(th->source), dif, sdif);
1028                 if (!sk1)
1029                         goto out;
1030
1031                 /* sdif set, means packet ingressed via a device
1032                  * in an L3 domain and dif is set to it.
1033                  */
1034                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1035
1036                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1037                 if (!key)
1038                         goto out;
1039
1040                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1041                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1042                         goto out;
1043         }
1044 #endif
1045
1046         if (th->ack)
1047                 seq = ntohl(th->ack_seq);
1048         else
1049                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1050                           (th->doff << 2);
1051
1052         if (sk) {
1053                 oif = sk->sk_bound_dev_if;
1054                 if (sk_fullsock(sk)) {
1055                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1056
1057                         trace_tcp_send_reset(sk, skb);
1058                         if (np->repflow)
1059                                 label = ip6_flowlabel(ipv6h);
1060                         priority = sk->sk_priority;
1061                 }
1062                 if (sk->sk_state == TCP_TIME_WAIT) {
1063                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1064                         priority = inet_twsk(sk)->tw_priority;
1065                 }
1066         } else {
1067                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1068                         label = ip6_flowlabel(ipv6h);
1069         }
1070
1071         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1072                              label, priority);
1073
1074 #ifdef CONFIG_TCP_MD5SIG
1075 out:
1076         rcu_read_unlock();
1077 #endif
1078 }
1079
1080 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1081                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1082                             struct tcp_md5sig_key *key, u8 tclass,
1083                             __be32 label, u32 priority)
1084 {
1085         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1086                              tclass, label, priority);
1087 }
1088
1089 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1090 {
1091         struct inet_timewait_sock *tw = inet_twsk(sk);
1092         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1093
1094         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1095                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1096                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1097                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1098                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1099
1100         inet_twsk_put(tw);
1101 }
1102
1103 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1104                                   struct request_sock *req)
1105 {
1106         int l3index;
1107
1108         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1109
1110         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1111          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1112          */
1113         /* RFC 7323 2.3
1114          * The window field (SEG.WND) of every outgoing segment, with the
1115          * exception of <SYN> segments, MUST be right-shifted by
1116          * Rcv.Wind.Shift bits:
1117          */
1118         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1119                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1120                         tcp_rsk(req)->rcv_nxt,
1121                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1122                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1123                         req->ts_recent, sk->sk_bound_dev_if,
1124                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1125                         0, 0, sk->sk_priority);
1126 }
1127
1128
1129 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1130 {
1131 #ifdef CONFIG_SYN_COOKIES
1132         const struct tcphdr *th = tcp_hdr(skb);
1133
1134         if (!th->syn)
1135                 sk = cookie_v6_check(sk, skb);
1136 #endif
1137         return sk;
1138 }
1139
1140 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1141                          struct tcphdr *th, u32 *cookie)
1142 {
1143         u16 mss = 0;
1144 #ifdef CONFIG_SYN_COOKIES
1145         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1146                                     &tcp_request_sock_ipv6_ops, sk, th);
1147         if (mss) {
1148                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1149                 tcp_synq_overflow(sk);
1150         }
1151 #endif
1152         return mss;
1153 }
1154
1155 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1156 {
1157         if (skb->protocol == htons(ETH_P_IP))
1158                 return tcp_v4_conn_request(sk, skb);
1159
1160         if (!ipv6_unicast_destination(skb))
1161                 goto drop;
1162
1163         return tcp_conn_request(&tcp6_request_sock_ops,
1164                                 &tcp_request_sock_ipv6_ops, sk, skb);
1165
1166 drop:
1167         tcp_listendrop(sk);
1168         return 0; /* don't send reset */
1169 }
1170
1171 static void tcp_v6_restore_cb(struct sk_buff *skb)
1172 {
1173         /* We need to move header back to the beginning if xfrm6_policy_check()
1174          * and tcp_v6_fill_cb() are going to be called again.
1175          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1176          */
1177         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1178                 sizeof(struct inet6_skb_parm));
1179 }
1180
1181 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1182                                          struct request_sock *req,
1183                                          struct dst_entry *dst,
1184                                          struct request_sock *req_unhash,
1185                                          bool *own_req)
1186 {
1187         struct inet_request_sock *ireq;
1188         struct ipv6_pinfo *newnp;
1189         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1190         struct ipv6_txoptions *opt;
1191         struct inet_sock *newinet;
1192         struct tcp_sock *newtp;
1193         struct sock *newsk;
1194 #ifdef CONFIG_TCP_MD5SIG
1195         struct tcp_md5sig_key *key;
1196         int l3index;
1197 #endif
1198         struct flowi6 fl6;
1199
1200         if (skb->protocol == htons(ETH_P_IP)) {
1201                 /*
1202                  *      v6 mapped
1203                  */
1204
1205                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1206                                              req_unhash, own_req);
1207
1208                 if (!newsk)
1209                         return NULL;
1210
1211                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1212
1213                 newinet = inet_sk(newsk);
1214                 newnp = tcp_inet6_sk(newsk);
1215                 newtp = tcp_sk(newsk);
1216
1217                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1218
1219                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1220
1221                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1222                 if (sk_is_mptcp(newsk))
1223                         mptcpv6_handle_mapped(newsk, true);
1224                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1225 #ifdef CONFIG_TCP_MD5SIG
1226                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1227 #endif
1228
1229                 newnp->ipv6_mc_list = NULL;
1230                 newnp->ipv6_ac_list = NULL;
1231                 newnp->ipv6_fl_list = NULL;
1232                 newnp->pktoptions  = NULL;
1233                 newnp->opt         = NULL;
1234                 newnp->mcast_oif   = inet_iif(skb);
1235                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1236                 newnp->rcv_flowinfo = 0;
1237                 if (np->repflow)
1238                         newnp->flow_label = 0;
1239
1240                 /*
1241                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1242                  * here, tcp_create_openreq_child now does this for us, see the comment in
1243                  * that function for the gory details. -acme
1244                  */
1245
1246                 /* It is tricky place. Until this moment IPv4 tcp
1247                    worked with IPv6 icsk.icsk_af_ops.
1248                    Sync it now.
1249                  */
1250                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1251
1252                 return newsk;
1253         }
1254
1255         ireq = inet_rsk(req);
1256
1257         if (sk_acceptq_is_full(sk))
1258                 goto out_overflow;
1259
1260         if (!dst) {
1261                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1262                 if (!dst)
1263                         goto out;
1264         }
1265
1266         newsk = tcp_create_openreq_child(sk, req, skb);
1267         if (!newsk)
1268                 goto out_nonewsk;
1269
1270         /*
1271          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1272          * count here, tcp_create_openreq_child now does this for us, see the
1273          * comment in that function for the gory details. -acme
1274          */
1275
1276         newsk->sk_gso_type = SKB_GSO_TCPV6;
1277         ip6_dst_store(newsk, dst, NULL, NULL);
1278         inet6_sk_rx_dst_set(newsk, skb);
1279
1280         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1281
1282         newtp = tcp_sk(newsk);
1283         newinet = inet_sk(newsk);
1284         newnp = tcp_inet6_sk(newsk);
1285
1286         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1287
1288         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1289         newnp->saddr = ireq->ir_v6_loc_addr;
1290         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1291         newsk->sk_bound_dev_if = ireq->ir_iif;
1292
1293         /* Now IPv6 options...
1294
1295            First: no IPv4 options.
1296          */
1297         newinet->inet_opt = NULL;
1298         newnp->ipv6_mc_list = NULL;
1299         newnp->ipv6_ac_list = NULL;
1300         newnp->ipv6_fl_list = NULL;
1301
1302         /* Clone RX bits */
1303         newnp->rxopt.all = np->rxopt.all;
1304
1305         newnp->pktoptions = NULL;
1306         newnp->opt        = NULL;
1307         newnp->mcast_oif  = tcp_v6_iif(skb);
1308         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1309         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1310         if (np->repflow)
1311                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1312
1313         /* Clone native IPv6 options from listening socket (if any)
1314
1315            Yes, keeping reference count would be much more clever,
1316            but we make one more one thing there: reattach optmem
1317            to newsk.
1318          */
1319         opt = ireq->ipv6_opt;
1320         if (!opt)
1321                 opt = rcu_dereference(np->opt);
1322         if (opt) {
1323                 opt = ipv6_dup_options(newsk, opt);
1324                 RCU_INIT_POINTER(newnp->opt, opt);
1325         }
1326         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1327         if (opt)
1328                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1329                                                     opt->opt_flen;
1330
1331         tcp_ca_openreq_child(newsk, dst);
1332
1333         tcp_sync_mss(newsk, dst_mtu(dst));
1334         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1335
1336         tcp_initialize_rcv_mss(newsk);
1337
1338         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1339         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1340
1341 #ifdef CONFIG_TCP_MD5SIG
1342         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1343
1344         /* Copy over the MD5 key from the original socket */
1345         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1346         if (key) {
1347                 /* We're using one, so create a matching key
1348                  * on the newsk structure. If we fail to get
1349                  * memory, then we end up not copying the key
1350                  * across. Shucks.
1351                  */
1352                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1353                                AF_INET6, 128, l3index, key->key, key->keylen,
1354                                sk_gfp_mask(sk, GFP_ATOMIC));
1355         }
1356 #endif
1357
1358         if (__inet_inherit_port(sk, newsk) < 0) {
1359                 inet_csk_prepare_forced_close(newsk);
1360                 tcp_done(newsk);
1361                 goto out;
1362         }
1363         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1364         if (*own_req) {
1365                 tcp_move_syn(newtp, req);
1366
1367                 /* Clone pktoptions received with SYN, if we own the req */
1368                 if (ireq->pktopts) {
1369                         newnp->pktoptions = skb_clone(ireq->pktopts,
1370                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1371                         consume_skb(ireq->pktopts);
1372                         ireq->pktopts = NULL;
1373                         if (newnp->pktoptions) {
1374                                 tcp_v6_restore_cb(newnp->pktoptions);
1375                                 skb_set_owner_r(newnp->pktoptions, newsk);
1376                         }
1377                 }
1378         }
1379
1380         return newsk;
1381
1382 out_overflow:
1383         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1384 out_nonewsk:
1385         dst_release(dst);
1386 out:
1387         tcp_listendrop(sk);
1388         return NULL;
1389 }
1390
1391 /* The socket must have it's spinlock held when we get
1392  * here, unless it is a TCP_LISTEN socket.
1393  *
1394  * We have a potential double-lock case here, so even when
1395  * doing backlog processing we use the BH locking scheme.
1396  * This is because we cannot sleep with the original spinlock
1397  * held.
1398  */
1399 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1400 {
1401         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1402         struct sk_buff *opt_skb = NULL;
1403         struct tcp_sock *tp;
1404
1405         /* Imagine: socket is IPv6. IPv4 packet arrives,
1406            goes to IPv4 receive handler and backlogged.
1407            From backlog it always goes here. Kerboom...
1408            Fortunately, tcp_rcv_established and rcv_established
1409            handle them correctly, but it is not case with
1410            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1411          */
1412
1413         if (skb->protocol == htons(ETH_P_IP))
1414                 return tcp_v4_do_rcv(sk, skb);
1415
1416         /*
1417          *      socket locking is here for SMP purposes as backlog rcv
1418          *      is currently called with bh processing disabled.
1419          */
1420
1421         /* Do Stevens' IPV6_PKTOPTIONS.
1422
1423            Yes, guys, it is the only place in our code, where we
1424            may make it not affecting IPv4.
1425            The rest of code is protocol independent,
1426            and I do not like idea to uglify IPv4.
1427
1428            Actually, all the idea behind IPV6_PKTOPTIONS
1429            looks not very well thought. For now we latch
1430            options, received in the last packet, enqueued
1431            by tcp. Feel free to propose better solution.
1432                                                --ANK (980728)
1433          */
1434         if (np->rxopt.all)
1435                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1436
1437         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1438                 struct dst_entry *dst = sk->sk_rx_dst;
1439
1440                 sock_rps_save_rxhash(sk, skb);
1441                 sk_mark_napi_id(sk, skb);
1442                 if (dst) {
1443                         if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1444                             dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1445                                 dst_release(dst);
1446                                 sk->sk_rx_dst = NULL;
1447                         }
1448                 }
1449
1450                 tcp_rcv_established(sk, skb);
1451                 if (opt_skb)
1452                         goto ipv6_pktoptions;
1453                 return 0;
1454         }
1455
1456         if (tcp_checksum_complete(skb))
1457                 goto csum_err;
1458
1459         if (sk->sk_state == TCP_LISTEN) {
1460                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1461
1462                 if (!nsk)
1463                         goto discard;
1464
1465                 if (nsk != sk) {
1466                         if (tcp_child_process(sk, nsk, skb))
1467                                 goto reset;
1468                         if (opt_skb)
1469                                 __kfree_skb(opt_skb);
1470                         return 0;
1471                 }
1472         } else
1473                 sock_rps_save_rxhash(sk, skb);
1474
1475         if (tcp_rcv_state_process(sk, skb))
1476                 goto reset;
1477         if (opt_skb)
1478                 goto ipv6_pktoptions;
1479         return 0;
1480
1481 reset:
1482         tcp_v6_send_reset(sk, skb);
1483 discard:
1484         if (opt_skb)
1485                 __kfree_skb(opt_skb);
1486         kfree_skb(skb);
1487         return 0;
1488 csum_err:
1489         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1490         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1491         goto discard;
1492
1493
1494 ipv6_pktoptions:
1495         /* Do you ask, what is it?
1496
1497            1. skb was enqueued by tcp.
1498            2. skb is added to tail of read queue, rather than out of order.
1499            3. socket is not in passive state.
1500            4. Finally, it really contains options, which user wants to receive.
1501          */
1502         tp = tcp_sk(sk);
1503         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1504             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1505                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1506                         np->mcast_oif = tcp_v6_iif(opt_skb);
1507                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1508                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1509                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1510                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1511                 if (np->repflow)
1512                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1513                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1514                         skb_set_owner_r(opt_skb, sk);
1515                         tcp_v6_restore_cb(opt_skb);
1516                         opt_skb = xchg(&np->pktoptions, opt_skb);
1517                 } else {
1518                         __kfree_skb(opt_skb);
1519                         opt_skb = xchg(&np->pktoptions, NULL);
1520                 }
1521         }
1522
1523         kfree_skb(opt_skb);
1524         return 0;
1525 }
1526
1527 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1528                            const struct tcphdr *th)
1529 {
1530         /* This is tricky: we move IP6CB at its correct location into
1531          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1532          * _decode_session6() uses IP6CB().
1533          * barrier() makes sure compiler won't play aliasing games.
1534          */
1535         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1536                 sizeof(struct inet6_skb_parm));
1537         barrier();
1538
1539         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1540         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1541                                     skb->len - th->doff*4);
1542         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1543         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1544         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1545         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1546         TCP_SKB_CB(skb)->sacked = 0;
1547         TCP_SKB_CB(skb)->has_rxtstamp =
1548                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1549 }
1550
1551 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1552 {
1553         struct sk_buff *skb_to_free;
1554         int sdif = inet6_sdif(skb);
1555         int dif = inet6_iif(skb);
1556         const struct tcphdr *th;
1557         const struct ipv6hdr *hdr;
1558         bool refcounted;
1559         struct sock *sk;
1560         int ret;
1561         struct net *net = dev_net(skb->dev);
1562
1563         if (skb->pkt_type != PACKET_HOST)
1564                 goto discard_it;
1565
1566         /*
1567          *      Count it even if it's bad.
1568          */
1569         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1570
1571         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1572                 goto discard_it;
1573
1574         th = (const struct tcphdr *)skb->data;
1575
1576         if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1577                 goto bad_packet;
1578         if (!pskb_may_pull(skb, th->doff*4))
1579                 goto discard_it;
1580
1581         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1582                 goto csum_error;
1583
1584         th = (const struct tcphdr *)skb->data;
1585         hdr = ipv6_hdr(skb);
1586
1587 lookup:
1588         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1589                                 th->source, th->dest, inet6_iif(skb), sdif,
1590                                 &refcounted);
1591         if (!sk)
1592                 goto no_tcp_socket;
1593
1594 process:
1595         if (sk->sk_state == TCP_TIME_WAIT)
1596                 goto do_time_wait;
1597
1598         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1599                 struct request_sock *req = inet_reqsk(sk);
1600                 bool req_stolen = false;
1601                 struct sock *nsk;
1602
1603                 sk = req->rsk_listener;
1604                 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1605                         sk_drops_add(sk, skb);
1606                         reqsk_put(req);
1607                         goto discard_it;
1608                 }
1609                 if (tcp_checksum_complete(skb)) {
1610                         reqsk_put(req);
1611                         goto csum_error;
1612                 }
1613                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1614                         inet_csk_reqsk_queue_drop_and_put(sk, req);
1615                         goto lookup;
1616                 }
1617                 sock_hold(sk);
1618                 refcounted = true;
1619                 nsk = NULL;
1620                 if (!tcp_filter(sk, skb)) {
1621                         th = (const struct tcphdr *)skb->data;
1622                         hdr = ipv6_hdr(skb);
1623                         tcp_v6_fill_cb(skb, hdr, th);
1624                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1625                 }
1626                 if (!nsk) {
1627                         reqsk_put(req);
1628                         if (req_stolen) {
1629                                 /* Another cpu got exclusive access to req
1630                                  * and created a full blown socket.
1631                                  * Try to feed this packet to this socket
1632                                  * instead of discarding it.
1633                                  */
1634                                 tcp_v6_restore_cb(skb);
1635                                 sock_put(sk);
1636                                 goto lookup;
1637                         }
1638                         goto discard_and_relse;
1639                 }
1640                 if (nsk == sk) {
1641                         reqsk_put(req);
1642                         tcp_v6_restore_cb(skb);
1643                 } else if (tcp_child_process(sk, nsk, skb)) {
1644                         tcp_v6_send_reset(nsk, skb);
1645                         goto discard_and_relse;
1646                 } else {
1647                         sock_put(sk);
1648                         return 0;
1649                 }
1650         }
1651         if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1652                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1653                 goto discard_and_relse;
1654         }
1655
1656         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1657                 goto discard_and_relse;
1658
1659         if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1660                 goto discard_and_relse;
1661
1662         if (tcp_filter(sk, skb))
1663                 goto discard_and_relse;
1664         th = (const struct tcphdr *)skb->data;
1665         hdr = ipv6_hdr(skb);
1666         tcp_v6_fill_cb(skb, hdr, th);
1667
1668         skb->dev = NULL;
1669
1670         if (sk->sk_state == TCP_LISTEN) {
1671                 ret = tcp_v6_do_rcv(sk, skb);
1672                 goto put_and_return;
1673         }
1674
1675         sk_incoming_cpu_update(sk);
1676
1677         bh_lock_sock_nested(sk);
1678         tcp_segs_in(tcp_sk(sk), skb);
1679         ret = 0;
1680         if (!sock_owned_by_user(sk)) {
1681                 skb_to_free = sk->sk_rx_skb_cache;
1682                 sk->sk_rx_skb_cache = NULL;
1683                 ret = tcp_v6_do_rcv(sk, skb);
1684         } else {
1685                 if (tcp_add_backlog(sk, skb))
1686                         goto discard_and_relse;
1687                 skb_to_free = NULL;
1688         }
1689         bh_unlock_sock(sk);
1690         if (skb_to_free)
1691                 __kfree_skb(skb_to_free);
1692 put_and_return:
1693         if (refcounted)
1694                 sock_put(sk);
1695         return ret ? -1 : 0;
1696
1697 no_tcp_socket:
1698         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1699                 goto discard_it;
1700
1701         tcp_v6_fill_cb(skb, hdr, th);
1702
1703         if (tcp_checksum_complete(skb)) {
1704 csum_error:
1705                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1706 bad_packet:
1707                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1708         } else {
1709                 tcp_v6_send_reset(NULL, skb);
1710         }
1711
1712 discard_it:
1713         kfree_skb(skb);
1714         return 0;
1715
1716 discard_and_relse:
1717         sk_drops_add(sk, skb);
1718         if (refcounted)
1719                 sock_put(sk);
1720         goto discard_it;
1721
1722 do_time_wait:
1723         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1724                 inet_twsk_put(inet_twsk(sk));
1725                 goto discard_it;
1726         }
1727
1728         tcp_v6_fill_cb(skb, hdr, th);
1729
1730         if (tcp_checksum_complete(skb)) {
1731                 inet_twsk_put(inet_twsk(sk));
1732                 goto csum_error;
1733         }
1734
1735         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1736         case TCP_TW_SYN:
1737         {
1738                 struct sock *sk2;
1739
1740                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1741                                             skb, __tcp_hdrlen(th),
1742                                             &ipv6_hdr(skb)->saddr, th->source,
1743                                             &ipv6_hdr(skb)->daddr,
1744                                             ntohs(th->dest),
1745                                             tcp_v6_iif_l3_slave(skb),
1746                                             sdif);
1747                 if (sk2) {
1748                         struct inet_timewait_sock *tw = inet_twsk(sk);
1749                         inet_twsk_deschedule_put(tw);
1750                         sk = sk2;
1751                         tcp_v6_restore_cb(skb);
1752                         refcounted = false;
1753                         goto process;
1754                 }
1755         }
1756                 /* to ACK */
1757                 fallthrough;
1758         case TCP_TW_ACK:
1759                 tcp_v6_timewait_ack(sk, skb);
1760                 break;
1761         case TCP_TW_RST:
1762                 tcp_v6_send_reset(sk, skb);
1763                 inet_twsk_deschedule_put(inet_twsk(sk));
1764                 goto discard_it;
1765         case TCP_TW_SUCCESS:
1766                 ;
1767         }
1768         goto discard_it;
1769 }
1770
1771 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1772 {
1773         const struct ipv6hdr *hdr;
1774         const struct tcphdr *th;
1775         struct sock *sk;
1776
1777         if (skb->pkt_type != PACKET_HOST)
1778                 return;
1779
1780         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1781                 return;
1782
1783         hdr = ipv6_hdr(skb);
1784         th = tcp_hdr(skb);
1785
1786         if (th->doff < sizeof(struct tcphdr) / 4)
1787                 return;
1788
1789         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1790         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1791                                         &hdr->saddr, th->source,
1792                                         &hdr->daddr, ntohs(th->dest),
1793                                         inet6_iif(skb), inet6_sdif(skb));
1794         if (sk) {
1795                 skb->sk = sk;
1796                 skb->destructor = sock_edemux;
1797                 if (sk_fullsock(sk)) {
1798                         struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1799
1800                         if (dst)
1801                                 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1802                         if (dst &&
1803                             inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1804                                 skb_dst_set_noref(skb, dst);
1805                 }
1806         }
1807 }
1808
1809 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1810         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1811         .twsk_unique    = tcp_twsk_unique,
1812         .twsk_destructor = tcp_twsk_destructor,
1813 };
1814
1815 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1816 {
1817         struct ipv6_pinfo *np = inet6_sk(sk);
1818
1819         __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1820 }
1821
1822 const struct inet_connection_sock_af_ops ipv6_specific = {
1823         .queue_xmit        = inet6_csk_xmit,
1824         .send_check        = tcp_v6_send_check,
1825         .rebuild_header    = inet6_sk_rebuild_header,
1826         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1827         .conn_request      = tcp_v6_conn_request,
1828         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1829         .net_header_len    = sizeof(struct ipv6hdr),
1830         .net_frag_header_len = sizeof(struct frag_hdr),
1831         .setsockopt        = ipv6_setsockopt,
1832         .getsockopt        = ipv6_getsockopt,
1833         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1834         .sockaddr_len      = sizeof(struct sockaddr_in6),
1835         .mtu_reduced       = tcp_v6_mtu_reduced,
1836 };
1837
1838 #ifdef CONFIG_TCP_MD5SIG
1839 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1840         .md5_lookup     =       tcp_v6_md5_lookup,
1841         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1842         .md5_parse      =       tcp_v6_parse_md5_keys,
1843 };
1844 #endif
1845
1846 /*
1847  *      TCP over IPv4 via INET6 API
1848  */
1849 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1850         .queue_xmit        = ip_queue_xmit,
1851         .send_check        = tcp_v4_send_check,
1852         .rebuild_header    = inet_sk_rebuild_header,
1853         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1854         .conn_request      = tcp_v6_conn_request,
1855         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1856         .net_header_len    = sizeof(struct iphdr),
1857         .setsockopt        = ipv6_setsockopt,
1858         .getsockopt        = ipv6_getsockopt,
1859         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1860         .sockaddr_len      = sizeof(struct sockaddr_in6),
1861         .mtu_reduced       = tcp_v4_mtu_reduced,
1862 };
1863
1864 #ifdef CONFIG_TCP_MD5SIG
1865 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1866         .md5_lookup     =       tcp_v4_md5_lookup,
1867         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1868         .md5_parse      =       tcp_v6_parse_md5_keys,
1869 };
1870 #endif
1871
1872 /* NOTE: A lot of things set to zero explicitly by call to
1873  *       sk_alloc() so need not be done here.
1874  */
1875 static int tcp_v6_init_sock(struct sock *sk)
1876 {
1877         struct inet_connection_sock *icsk = inet_csk(sk);
1878
1879         tcp_init_sock(sk);
1880
1881         icsk->icsk_af_ops = &ipv6_specific;
1882
1883 #ifdef CONFIG_TCP_MD5SIG
1884         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1885 #endif
1886
1887         return 0;
1888 }
1889
1890 static void tcp_v6_destroy_sock(struct sock *sk)
1891 {
1892         tcp_v4_destroy_sock(sk);
1893         inet6_destroy_sock(sk);
1894 }
1895
1896 #ifdef CONFIG_PROC_FS
1897 /* Proc filesystem TCPv6 sock list dumping. */
1898 static void get_openreq6(struct seq_file *seq,
1899                          const struct request_sock *req, int i)
1900 {
1901         long ttd = req->rsk_timer.expires - jiffies;
1902         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1903         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1904
1905         if (ttd < 0)
1906                 ttd = 0;
1907
1908         seq_printf(seq,
1909                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1910                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1911                    i,
1912                    src->s6_addr32[0], src->s6_addr32[1],
1913                    src->s6_addr32[2], src->s6_addr32[3],
1914                    inet_rsk(req)->ir_num,
1915                    dest->s6_addr32[0], dest->s6_addr32[1],
1916                    dest->s6_addr32[2], dest->s6_addr32[3],
1917                    ntohs(inet_rsk(req)->ir_rmt_port),
1918                    TCP_SYN_RECV,
1919                    0, 0, /* could print option size, but that is af dependent. */
1920                    1,   /* timers active (only the expire timer) */
1921                    jiffies_to_clock_t(ttd),
1922                    req->num_timeout,
1923                    from_kuid_munged(seq_user_ns(seq),
1924                                     sock_i_uid(req->rsk_listener)),
1925                    0,  /* non standard timer */
1926                    0, /* open_requests have no inode */
1927                    0, req);
1928 }
1929
1930 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1931 {
1932         const struct in6_addr *dest, *src;
1933         __u16 destp, srcp;
1934         int timer_active;
1935         unsigned long timer_expires;
1936         const struct inet_sock *inet = inet_sk(sp);
1937         const struct tcp_sock *tp = tcp_sk(sp);
1938         const struct inet_connection_sock *icsk = inet_csk(sp);
1939         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1940         int rx_queue;
1941         int state;
1942
1943         dest  = &sp->sk_v6_daddr;
1944         src   = &sp->sk_v6_rcv_saddr;
1945         destp = ntohs(inet->inet_dport);
1946         srcp  = ntohs(inet->inet_sport);
1947
1948         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1949             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1950             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1951                 timer_active    = 1;
1952                 timer_expires   = icsk->icsk_timeout;
1953         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1954                 timer_active    = 4;
1955                 timer_expires   = icsk->icsk_timeout;
1956         } else if (timer_pending(&sp->sk_timer)) {
1957                 timer_active    = 2;
1958                 timer_expires   = sp->sk_timer.expires;
1959         } else {
1960                 timer_active    = 0;
1961                 timer_expires = jiffies;
1962         }
1963
1964         state = inet_sk_state_load(sp);
1965         if (state == TCP_LISTEN)
1966                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
1967         else
1968                 /* Because we don't lock the socket,
1969                  * we might find a transient negative value.
1970                  */
1971                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1972                                       READ_ONCE(tp->copied_seq), 0);
1973
1974         seq_printf(seq,
1975                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1976                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1977                    i,
1978                    src->s6_addr32[0], src->s6_addr32[1],
1979                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1980                    dest->s6_addr32[0], dest->s6_addr32[1],
1981                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1982                    state,
1983                    READ_ONCE(tp->write_seq) - tp->snd_una,
1984                    rx_queue,
1985                    timer_active,
1986                    jiffies_delta_to_clock_t(timer_expires - jiffies),
1987                    icsk->icsk_retransmits,
1988                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1989                    icsk->icsk_probes_out,
1990                    sock_i_ino(sp),
1991                    refcount_read(&sp->sk_refcnt), sp,
1992                    jiffies_to_clock_t(icsk->icsk_rto),
1993                    jiffies_to_clock_t(icsk->icsk_ack.ato),
1994                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1995                    tp->snd_cwnd,
1996                    state == TCP_LISTEN ?
1997                         fastopenq->max_qlen :
1998                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1999                    );
2000 }
2001
2002 static void get_timewait6_sock(struct seq_file *seq,
2003                                struct inet_timewait_sock *tw, int i)
2004 {
2005         long delta = tw->tw_timer.expires - jiffies;
2006         const struct in6_addr *dest, *src;
2007         __u16 destp, srcp;
2008
2009         dest = &tw->tw_v6_daddr;
2010         src  = &tw->tw_v6_rcv_saddr;
2011         destp = ntohs(tw->tw_dport);
2012         srcp  = ntohs(tw->tw_sport);
2013
2014         seq_printf(seq,
2015                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2016                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2017                    i,
2018                    src->s6_addr32[0], src->s6_addr32[1],
2019                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2020                    dest->s6_addr32[0], dest->s6_addr32[1],
2021                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2022                    tw->tw_substate, 0, 0,
2023                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2024                    refcount_read(&tw->tw_refcnt), tw);
2025 }
2026
2027 static int tcp6_seq_show(struct seq_file *seq, void *v)
2028 {
2029         struct tcp_iter_state *st;
2030         struct sock *sk = v;
2031
2032         if (v == SEQ_START_TOKEN) {
2033                 seq_puts(seq,
2034                          "  sl  "
2035                          "local_address                         "
2036                          "remote_address                        "
2037                          "st tx_queue rx_queue tr tm->when retrnsmt"
2038                          "   uid  timeout inode\n");
2039                 goto out;
2040         }
2041         st = seq->private;
2042
2043         if (sk->sk_state == TCP_TIME_WAIT)
2044                 get_timewait6_sock(seq, v, st->num);
2045         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2046                 get_openreq6(seq, v, st->num);
2047         else
2048                 get_tcp6_sock(seq, v, st->num);
2049 out:
2050         return 0;
2051 }
2052
2053 static const struct seq_operations tcp6_seq_ops = {
2054         .show           = tcp6_seq_show,
2055         .start          = tcp_seq_start,
2056         .next           = tcp_seq_next,
2057         .stop           = tcp_seq_stop,
2058 };
2059
2060 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2061         .family         = AF_INET6,
2062 };
2063
2064 int __net_init tcp6_proc_init(struct net *net)
2065 {
2066         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2067                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2068                 return -ENOMEM;
2069         return 0;
2070 }
2071
2072 void tcp6_proc_exit(struct net *net)
2073 {
2074         remove_proc_entry("tcp6", net->proc_net);
2075 }
2076 #endif
2077
2078 struct proto tcpv6_prot = {
2079         .name                   = "TCPv6",
2080         .owner                  = THIS_MODULE,
2081         .close                  = tcp_close,
2082         .pre_connect            = tcp_v6_pre_connect,
2083         .connect                = tcp_v6_connect,
2084         .disconnect             = tcp_disconnect,
2085         .accept                 = inet_csk_accept,
2086         .ioctl                  = tcp_ioctl,
2087         .init                   = tcp_v6_init_sock,
2088         .destroy                = tcp_v6_destroy_sock,
2089         .shutdown               = tcp_shutdown,
2090         .setsockopt             = tcp_setsockopt,
2091         .getsockopt             = tcp_getsockopt,
2092         .keepalive              = tcp_set_keepalive,
2093         .recvmsg                = tcp_recvmsg,
2094         .sendmsg                = tcp_sendmsg,
2095         .sendpage               = tcp_sendpage,
2096         .backlog_rcv            = tcp_v6_do_rcv,
2097         .release_cb             = tcp_release_cb,
2098         .hash                   = inet6_hash,
2099         .unhash                 = inet_unhash,
2100         .get_port               = inet_csk_get_port,
2101         .enter_memory_pressure  = tcp_enter_memory_pressure,
2102         .leave_memory_pressure  = tcp_leave_memory_pressure,
2103         .stream_memory_free     = tcp_stream_memory_free,
2104         .sockets_allocated      = &tcp_sockets_allocated,
2105         .memory_allocated       = &tcp_memory_allocated,
2106         .memory_pressure        = &tcp_memory_pressure,
2107         .orphan_count           = &tcp_orphan_count,
2108         .sysctl_mem             = sysctl_tcp_mem,
2109         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2110         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2111         .max_header             = MAX_TCP_HEADER,
2112         .obj_size               = sizeof(struct tcp6_sock),
2113         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2114         .twsk_prot              = &tcp6_timewait_sock_ops,
2115         .rsk_prot               = &tcp6_request_sock_ops,
2116         .h.hashinfo             = &tcp_hashinfo,
2117         .no_autobind            = true,
2118         .diag_destroy           = tcp_abort,
2119 };
2120 EXPORT_SYMBOL_GPL(tcpv6_prot);
2121
2122 /* thinking of making this const? Don't.
2123  * early_demux can change based on sysctl.
2124  */
2125 static struct inet6_protocol tcpv6_protocol = {
2126         .early_demux    =       tcp_v6_early_demux,
2127         .early_demux_handler =  tcp_v6_early_demux,
2128         .handler        =       tcp_v6_rcv,
2129         .err_handler    =       tcp_v6_err,
2130         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2131 };
2132
2133 static struct inet_protosw tcpv6_protosw = {
2134         .type           =       SOCK_STREAM,
2135         .protocol       =       IPPROTO_TCP,
2136         .prot           =       &tcpv6_prot,
2137         .ops            =       &inet6_stream_ops,
2138         .flags          =       INET_PROTOSW_PERMANENT |
2139                                 INET_PROTOSW_ICSK,
2140 };
2141
2142 static int __net_init tcpv6_net_init(struct net *net)
2143 {
2144         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2145                                     SOCK_RAW, IPPROTO_TCP, net);
2146 }
2147
2148 static void __net_exit tcpv6_net_exit(struct net *net)
2149 {
2150         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2151 }
2152
2153 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2154 {
2155         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2156 }
2157
2158 static struct pernet_operations tcpv6_net_ops = {
2159         .init       = tcpv6_net_init,
2160         .exit       = tcpv6_net_exit,
2161         .exit_batch = tcpv6_net_exit_batch,
2162 };
2163
2164 int __init tcpv6_init(void)
2165 {
2166         int ret;
2167
2168         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2169         if (ret)
2170                 goto out;
2171
2172         /* register inet6 protocol */
2173         ret = inet6_register_protosw(&tcpv6_protosw);
2174         if (ret)
2175                 goto out_tcpv6_protocol;
2176
2177         ret = register_pernet_subsys(&tcpv6_net_ops);
2178         if (ret)
2179                 goto out_tcpv6_protosw;
2180
2181         ret = mptcpv6_init();
2182         if (ret)
2183                 goto out_tcpv6_pernet_subsys;
2184
2185 out:
2186         return ret;
2187
2188 out_tcpv6_pernet_subsys:
2189         unregister_pernet_subsys(&tcpv6_net_ops);
2190 out_tcpv6_protosw:
2191         inet6_unregister_protosw(&tcpv6_protosw);
2192 out_tcpv6_protocol:
2193         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2194         goto out;
2195 }
2196
2197 void tcpv6_exit(void)
2198 {
2199         unregister_pernet_subsys(&tcpv6_net_ops);
2200         inet6_unregister_protosw(&tcpv6_protosw);
2201         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2202 }