MAINTAINERS: Update git tree for Broadcom iProc SoCs
[linux-2.6-microblaze.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152         struct tcp_sock *tp = tcp_sk(sk);
153         struct in6_addr *saddr = NULL, *final_p, final;
154         struct ipv6_txoptions *opt;
155         struct flowi6 fl6;
156         struct dst_entry *dst;
157         int addr_type;
158         int err;
159         struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (__ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         if (!saddr) {
290                 saddr = &fl6.saddr;
291                 sk->sk_v6_rcv_saddr = *saddr;
292         }
293
294         /* set the source address */
295         np->saddr = *saddr;
296         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297
298         sk->sk_gso_type = SKB_GSO_TCPV6;
299         ip6_dst_store(sk, dst, NULL, NULL);
300
301         icsk->icsk_ext_hdr_len = 0;
302         if (opt)
303                 icsk->icsk_ext_hdr_len = opt->opt_flen +
304                                          opt->opt_nflen;
305
306         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307
308         inet->inet_dport = usin->sin6_port;
309
310         tcp_set_state(sk, TCP_SYN_SENT);
311         err = inet6_hash_connect(tcp_death_row, sk);
312         if (err)
313                 goto late_failure;
314
315         sk_set_txhash(sk);
316
317         if (likely(!tp->repair)) {
318                 if (!tp->write_seq)
319                         WRITE_ONCE(tp->write_seq,
320                                    secure_tcpv6_seq(np->saddr.s6_addr32,
321                                                     sk->sk_v6_daddr.s6_addr32,
322                                                     inet->inet_sport,
323                                                     inet->inet_dport));
324                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
325                                                    np->saddr.s6_addr32,
326                                                    sk->sk_v6_daddr.s6_addr32);
327         }
328
329         if (tcp_fastopen_defer_connect(sk, &err))
330                 return err;
331         if (err)
332                 goto late_failure;
333
334         err = tcp_connect(sk);
335         if (err)
336                 goto late_failure;
337
338         return 0;
339
340 late_failure:
341         tcp_set_state(sk, TCP_CLOSE);
342 failure:
343         inet->inet_dport = 0;
344         sk->sk_route_caps = 0;
345         return err;
346 }
347
348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350         struct dst_entry *dst;
351         u32 mtu;
352
353         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
354                 return;
355
356         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
357
358         /* Drop requests trying to increase our current mss.
359          * Check done in __ip6_rt_update_pmtu() is too late.
360          */
361         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
362                 return;
363
364         dst = inet6_csk_update_pmtu(sk, mtu);
365         if (!dst)
366                 return;
367
368         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
369                 tcp_sync_mss(sk, dst_mtu(dst));
370                 tcp_simple_retransmit(sk);
371         }
372 }
373
374 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
375                 u8 type, u8 code, int offset, __be32 info)
376 {
377         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
378         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
379         struct net *net = dev_net(skb->dev);
380         struct request_sock *fastopen;
381         struct ipv6_pinfo *np;
382         struct tcp_sock *tp;
383         __u32 seq, snd_una;
384         struct sock *sk;
385         bool fatal;
386         int err;
387
388         sk = __inet6_lookup_established(net, &tcp_hashinfo,
389                                         &hdr->daddr, th->dest,
390                                         &hdr->saddr, ntohs(th->source),
391                                         skb->dev->ifindex, inet6_sdif(skb));
392
393         if (!sk) {
394                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
395                                   ICMP6_MIB_INERRORS);
396                 return -ENOENT;
397         }
398
399         if (sk->sk_state == TCP_TIME_WAIT) {
400                 inet_twsk_put(inet_twsk(sk));
401                 return 0;
402         }
403         seq = ntohl(th->seq);
404         fatal = icmpv6_err_convert(type, code, &err);
405         if (sk->sk_state == TCP_NEW_SYN_RECV) {
406                 tcp_req_err(sk, seq, fatal);
407                 return 0;
408         }
409
410         bh_lock_sock(sk);
411         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
412                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
413
414         if (sk->sk_state == TCP_CLOSE)
415                 goto out;
416
417         if (static_branch_unlikely(&ip6_min_hopcount)) {
418                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
419                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
420                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
421                         goto out;
422                 }
423         }
424
425         tp = tcp_sk(sk);
426         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
427         fastopen = rcu_dereference(tp->fastopen_rsk);
428         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
429         if (sk->sk_state != TCP_LISTEN &&
430             !between(seq, snd_una, tp->snd_nxt)) {
431                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
432                 goto out;
433         }
434
435         np = tcp_inet6_sk(sk);
436
437         if (type == NDISC_REDIRECT) {
438                 if (!sock_owned_by_user(sk)) {
439                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
440
441                         if (dst)
442                                 dst->ops->redirect(dst, sk, skb);
443                 }
444                 goto out;
445         }
446
447         if (type == ICMPV6_PKT_TOOBIG) {
448                 u32 mtu = ntohl(info);
449
450                 /* We are not interested in TCP_LISTEN and open_requests
451                  * (SYN-ACKs send out by Linux are always <576bytes so
452                  * they should go through unfragmented).
453                  */
454                 if (sk->sk_state == TCP_LISTEN)
455                         goto out;
456
457                 if (!ip6_sk_accept_pmtu(sk))
458                         goto out;
459
460                 if (mtu < IPV6_MIN_MTU)
461                         goto out;
462
463                 WRITE_ONCE(tp->mtu_info, mtu);
464
465                 if (!sock_owned_by_user(sk))
466                         tcp_v6_mtu_reduced(sk);
467                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
468                                            &sk->sk_tsq_flags))
469                         sock_hold(sk);
470                 goto out;
471         }
472
473
474         /* Might be for an request_sock */
475         switch (sk->sk_state) {
476         case TCP_SYN_SENT:
477         case TCP_SYN_RECV:
478                 /* Only in fast or simultaneous open. If a fast open socket is
479                  * already accepted it is treated as a connected one below.
480                  */
481                 if (fastopen && !fastopen->sk)
482                         break;
483
484                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
485
486                 if (!sock_owned_by_user(sk)) {
487                         sk->sk_err = err;
488                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
489
490                         tcp_done(sk);
491                 } else
492                         sk->sk_err_soft = err;
493                 goto out;
494         case TCP_LISTEN:
495                 break;
496         default:
497                 /* check if this ICMP message allows revert of backoff.
498                  * (see RFC 6069)
499                  */
500                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
501                     code == ICMPV6_NOROUTE)
502                         tcp_ld_RTO_revert(sk, seq);
503         }
504
505         if (!sock_owned_by_user(sk) && np->recverr) {
506                 sk->sk_err = err;
507                 sk_error_report(sk);
508         } else
509                 sk->sk_err_soft = err;
510
511 out:
512         bh_unlock_sock(sk);
513         sock_put(sk);
514         return 0;
515 }
516
517
518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
519                               struct flowi *fl,
520                               struct request_sock *req,
521                               struct tcp_fastopen_cookie *foc,
522                               enum tcp_synack_type synack_type,
523                               struct sk_buff *syn_skb)
524 {
525         struct inet_request_sock *ireq = inet_rsk(req);
526         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
527         struct ipv6_txoptions *opt;
528         struct flowi6 *fl6 = &fl->u.ip6;
529         struct sk_buff *skb;
530         int err = -ENOMEM;
531         u8 tclass;
532
533         /* First, grab a route. */
534         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
535                                                IPPROTO_TCP)) == NULL)
536                 goto done;
537
538         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
539
540         if (skb) {
541                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
542                                     &ireq->ir_v6_rmt_addr);
543
544                 fl6->daddr = ireq->ir_v6_rmt_addr;
545                 if (np->repflow && ireq->pktopts)
546                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
547
548                 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
549                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
550                                 (np->tclass & INET_ECN_MASK) :
551                                 np->tclass;
552
553                 if (!INET_ECN_is_capable(tclass) &&
554                     tcp_bpf_ca_needs_ecn((struct sock *)req))
555                         tclass |= INET_ECN_ECT_0;
556
557                 rcu_read_lock();
558                 opt = ireq->ipv6_opt;
559                 if (!opt)
560                         opt = rcu_dereference(np->opt);
561                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
562                                tclass, sk->sk_priority);
563                 rcu_read_unlock();
564                 err = net_xmit_eval(err);
565         }
566
567 done:
568         return err;
569 }
570
571
572 static void tcp_v6_reqsk_destructor(struct request_sock *req)
573 {
574         kfree(inet_rsk(req)->ipv6_opt);
575         consume_skb(inet_rsk(req)->pktopts);
576 }
577
578 #ifdef CONFIG_TCP_MD5SIG
579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
580                                                    const struct in6_addr *addr,
581                                                    int l3index)
582 {
583         return tcp_md5_do_lookup(sk, l3index,
584                                  (union tcp_md5_addr *)addr, AF_INET6);
585 }
586
587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
588                                                 const struct sock *addr_sk)
589 {
590         int l3index;
591
592         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
593                                                  addr_sk->sk_bound_dev_if);
594         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
595                                     l3index);
596 }
597
598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
599                                  sockptr_t optval, int optlen)
600 {
601         struct tcp_md5sig cmd;
602         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
603         int l3index = 0;
604         u8 prefixlen;
605         u8 flags;
606
607         if (optlen < sizeof(cmd))
608                 return -EINVAL;
609
610         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
611                 return -EFAULT;
612
613         if (sin6->sin6_family != AF_INET6)
614                 return -EINVAL;
615
616         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
617
618         if (optname == TCP_MD5SIG_EXT &&
619             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
620                 prefixlen = cmd.tcpm_prefixlen;
621                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
622                                         prefixlen > 32))
623                         return -EINVAL;
624         } else {
625                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
626         }
627
628         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
629             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
630                 struct net_device *dev;
631
632                 rcu_read_lock();
633                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
634                 if (dev && netif_is_l3_master(dev))
635                         l3index = dev->ifindex;
636                 rcu_read_unlock();
637
638                 /* ok to reference set/not set outside of rcu;
639                  * right now device MUST be an L3 master
640                  */
641                 if (!dev || !l3index)
642                         return -EINVAL;
643         }
644
645         if (!cmd.tcpm_keylen) {
646                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
647                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
648                                               AF_INET, prefixlen,
649                                               l3index, flags);
650                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
651                                       AF_INET6, prefixlen, l3index, flags);
652         }
653
654         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
655                 return -EINVAL;
656
657         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
658                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
659                                       AF_INET, prefixlen, l3index, flags,
660                                       cmd.tcpm_key, cmd.tcpm_keylen,
661                                       GFP_KERNEL);
662
663         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
664                               AF_INET6, prefixlen, l3index, flags,
665                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
666 }
667
668 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
669                                    const struct in6_addr *daddr,
670                                    const struct in6_addr *saddr,
671                                    const struct tcphdr *th, int nbytes)
672 {
673         struct tcp6_pseudohdr *bp;
674         struct scatterlist sg;
675         struct tcphdr *_th;
676
677         bp = hp->scratch;
678         /* 1. TCP pseudo-header (RFC2460) */
679         bp->saddr = *saddr;
680         bp->daddr = *daddr;
681         bp->protocol = cpu_to_be32(IPPROTO_TCP);
682         bp->len = cpu_to_be32(nbytes);
683
684         _th = (struct tcphdr *)(bp + 1);
685         memcpy(_th, th, sizeof(*th));
686         _th->check = 0;
687
688         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
689         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
690                                 sizeof(*bp) + sizeof(*th));
691         return crypto_ahash_update(hp->md5_req);
692 }
693
694 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
695                                const struct in6_addr *daddr, struct in6_addr *saddr,
696                                const struct tcphdr *th)
697 {
698         struct tcp_md5sig_pool *hp;
699         struct ahash_request *req;
700
701         hp = tcp_get_md5sig_pool();
702         if (!hp)
703                 goto clear_hash_noput;
704         req = hp->md5_req;
705
706         if (crypto_ahash_init(req))
707                 goto clear_hash;
708         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
709                 goto clear_hash;
710         if (tcp_md5_hash_key(hp, key))
711                 goto clear_hash;
712         ahash_request_set_crypt(req, NULL, md5_hash, 0);
713         if (crypto_ahash_final(req))
714                 goto clear_hash;
715
716         tcp_put_md5sig_pool();
717         return 0;
718
719 clear_hash:
720         tcp_put_md5sig_pool();
721 clear_hash_noput:
722         memset(md5_hash, 0, 16);
723         return 1;
724 }
725
726 static int tcp_v6_md5_hash_skb(char *md5_hash,
727                                const struct tcp_md5sig_key *key,
728                                const struct sock *sk,
729                                const struct sk_buff *skb)
730 {
731         const struct in6_addr *saddr, *daddr;
732         struct tcp_md5sig_pool *hp;
733         struct ahash_request *req;
734         const struct tcphdr *th = tcp_hdr(skb);
735
736         if (sk) { /* valid for establish/request sockets */
737                 saddr = &sk->sk_v6_rcv_saddr;
738                 daddr = &sk->sk_v6_daddr;
739         } else {
740                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
741                 saddr = &ip6h->saddr;
742                 daddr = &ip6h->daddr;
743         }
744
745         hp = tcp_get_md5sig_pool();
746         if (!hp)
747                 goto clear_hash_noput;
748         req = hp->md5_req;
749
750         if (crypto_ahash_init(req))
751                 goto clear_hash;
752
753         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
754                 goto clear_hash;
755         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
756                 goto clear_hash;
757         if (tcp_md5_hash_key(hp, key))
758                 goto clear_hash;
759         ahash_request_set_crypt(req, NULL, md5_hash, 0);
760         if (crypto_ahash_final(req))
761                 goto clear_hash;
762
763         tcp_put_md5sig_pool();
764         return 0;
765
766 clear_hash:
767         tcp_put_md5sig_pool();
768 clear_hash_noput:
769         memset(md5_hash, 0, 16);
770         return 1;
771 }
772
773 #endif
774
775 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
776                                     const struct sk_buff *skb,
777                                     int dif, int sdif)
778 {
779 #ifdef CONFIG_TCP_MD5SIG
780         const __u8 *hash_location = NULL;
781         struct tcp_md5sig_key *hash_expected;
782         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
783         const struct tcphdr *th = tcp_hdr(skb);
784         int genhash, l3index;
785         u8 newhash[16];
786
787         /* sdif set, means packet ingressed via a device
788          * in an L3 domain and dif is set to the l3mdev
789          */
790         l3index = sdif ? dif : 0;
791
792         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
793         hash_location = tcp_parse_md5sig_option(th);
794
795         /* We've parsed the options - do we have a hash? */
796         if (!hash_expected && !hash_location)
797                 return false;
798
799         if (hash_expected && !hash_location) {
800                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
801                 return true;
802         }
803
804         if (!hash_expected && hash_location) {
805                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
806                 return true;
807         }
808
809         /* check the signature */
810         genhash = tcp_v6_md5_hash_skb(newhash,
811                                       hash_expected,
812                                       NULL, skb);
813
814         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
815                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
816                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
817                                      genhash ? "failed" : "mismatch",
818                                      &ip6h->saddr, ntohs(th->source),
819                                      &ip6h->daddr, ntohs(th->dest), l3index);
820                 return true;
821         }
822 #endif
823         return false;
824 }
825
826 static void tcp_v6_init_req(struct request_sock *req,
827                             const struct sock *sk_listener,
828                             struct sk_buff *skb)
829 {
830         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
831         struct inet_request_sock *ireq = inet_rsk(req);
832         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
833
834         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
835         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
836
837         /* So that link locals have meaning */
838         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
839             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
840                 ireq->ir_iif = tcp_v6_iif(skb);
841
842         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
843             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
844              np->rxopt.bits.rxinfo ||
845              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
846              np->rxopt.bits.rxohlim || np->repflow)) {
847                 refcount_inc(&skb->users);
848                 ireq->pktopts = skb;
849         }
850 }
851
852 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
853                                           struct sk_buff *skb,
854                                           struct flowi *fl,
855                                           struct request_sock *req)
856 {
857         tcp_v6_init_req(req, sk, skb);
858
859         if (security_inet_conn_request(sk, skb, req))
860                 return NULL;
861
862         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
863 }
864
865 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
866         .family         =       AF_INET6,
867         .obj_size       =       sizeof(struct tcp6_request_sock),
868         .rtx_syn_ack    =       tcp_rtx_synack,
869         .send_ack       =       tcp_v6_reqsk_send_ack,
870         .destructor     =       tcp_v6_reqsk_destructor,
871         .send_reset     =       tcp_v6_send_reset,
872         .syn_ack_timeout =      tcp_syn_ack_timeout,
873 };
874
875 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
876         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
877                                 sizeof(struct ipv6hdr),
878 #ifdef CONFIG_TCP_MD5SIG
879         .req_md5_lookup =       tcp_v6_md5_lookup,
880         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
881 #endif
882 #ifdef CONFIG_SYN_COOKIES
883         .cookie_init_seq =      cookie_v6_init_sequence,
884 #endif
885         .route_req      =       tcp_v6_route_req,
886         .init_seq       =       tcp_v6_init_seq,
887         .init_ts_off    =       tcp_v6_init_ts_off,
888         .send_synack    =       tcp_v6_send_synack,
889 };
890
891 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
892                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
893                                  int oif, struct tcp_md5sig_key *key, int rst,
894                                  u8 tclass, __be32 label, u32 priority)
895 {
896         const struct tcphdr *th = tcp_hdr(skb);
897         struct tcphdr *t1;
898         struct sk_buff *buff;
899         struct flowi6 fl6;
900         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
901         struct sock *ctl_sk = net->ipv6.tcp_sk;
902         unsigned int tot_len = sizeof(struct tcphdr);
903         __be32 mrst = 0, *topt;
904         struct dst_entry *dst;
905         __u32 mark = 0;
906
907         if (tsecr)
908                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
909 #ifdef CONFIG_TCP_MD5SIG
910         if (key)
911                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
912 #endif
913
914 #ifdef CONFIG_MPTCP
915         if (rst && !key) {
916                 mrst = mptcp_reset_option(skb);
917
918                 if (mrst)
919                         tot_len += sizeof(__be32);
920         }
921 #endif
922
923         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
924                          GFP_ATOMIC);
925         if (!buff)
926                 return;
927
928         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
929
930         t1 = skb_push(buff, tot_len);
931         skb_reset_transport_header(buff);
932
933         /* Swap the send and the receive. */
934         memset(t1, 0, sizeof(*t1));
935         t1->dest = th->source;
936         t1->source = th->dest;
937         t1->doff = tot_len / 4;
938         t1->seq = htonl(seq);
939         t1->ack_seq = htonl(ack);
940         t1->ack = !rst || !th->ack;
941         t1->rst = rst;
942         t1->window = htons(win);
943
944         topt = (__be32 *)(t1 + 1);
945
946         if (tsecr) {
947                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
948                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
949                 *topt++ = htonl(tsval);
950                 *topt++ = htonl(tsecr);
951         }
952
953         if (mrst)
954                 *topt++ = mrst;
955
956 #ifdef CONFIG_TCP_MD5SIG
957         if (key) {
958                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
959                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
960                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
961                                     &ipv6_hdr(skb)->saddr,
962                                     &ipv6_hdr(skb)->daddr, t1);
963         }
964 #endif
965
966         memset(&fl6, 0, sizeof(fl6));
967         fl6.daddr = ipv6_hdr(skb)->saddr;
968         fl6.saddr = ipv6_hdr(skb)->daddr;
969         fl6.flowlabel = label;
970
971         buff->ip_summed = CHECKSUM_PARTIAL;
972
973         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
974
975         fl6.flowi6_proto = IPPROTO_TCP;
976         if (rt6_need_strict(&fl6.daddr) && !oif)
977                 fl6.flowi6_oif = tcp_v6_iif(skb);
978         else {
979                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
980                         oif = skb->skb_iif;
981
982                 fl6.flowi6_oif = oif;
983         }
984
985         if (sk) {
986                 if (sk->sk_state == TCP_TIME_WAIT) {
987                         mark = inet_twsk(sk)->tw_mark;
988                         /* autoflowlabel relies on buff->hash */
989                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
990                                      PKT_HASH_TYPE_L4);
991                 } else {
992                         mark = sk->sk_mark;
993                 }
994                 buff->tstamp = tcp_transmit_time(sk);
995         }
996         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
997         fl6.fl6_dport = t1->dest;
998         fl6.fl6_sport = t1->source;
999         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
1000         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
1001
1002         /* Pass a socket to ip6_dst_lookup either it is for RST
1003          * Underlying function will use this to retrieve the network
1004          * namespace
1005          */
1006         dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1007         if (!IS_ERR(dst)) {
1008                 skb_dst_set(buff, dst);
1009                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1010                          tclass & ~INET_ECN_MASK, priority);
1011                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1012                 if (rst)
1013                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1014                 return;
1015         }
1016
1017         kfree_skb(buff);
1018 }
1019
1020 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1021 {
1022         const struct tcphdr *th = tcp_hdr(skb);
1023         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1024         u32 seq = 0, ack_seq = 0;
1025         struct tcp_md5sig_key *key = NULL;
1026 #ifdef CONFIG_TCP_MD5SIG
1027         const __u8 *hash_location = NULL;
1028         unsigned char newhash[16];
1029         int genhash;
1030         struct sock *sk1 = NULL;
1031 #endif
1032         __be32 label = 0;
1033         u32 priority = 0;
1034         struct net *net;
1035         int oif = 0;
1036
1037         if (th->rst)
1038                 return;
1039
1040         /* If sk not NULL, it means we did a successful lookup and incoming
1041          * route had to be correct. prequeue might have dropped our dst.
1042          */
1043         if (!sk && !ipv6_unicast_destination(skb))
1044                 return;
1045
1046         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1047 #ifdef CONFIG_TCP_MD5SIG
1048         rcu_read_lock();
1049         hash_location = tcp_parse_md5sig_option(th);
1050         if (sk && sk_fullsock(sk)) {
1051                 int l3index;
1052
1053                 /* sdif set, means packet ingressed via a device
1054                  * in an L3 domain and inet_iif is set to it.
1055                  */
1056                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1057                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1058         } else if (hash_location) {
1059                 int dif = tcp_v6_iif_l3_slave(skb);
1060                 int sdif = tcp_v6_sdif(skb);
1061                 int l3index;
1062
1063                 /*
1064                  * active side is lost. Try to find listening socket through
1065                  * source port, and then find md5 key through listening socket.
1066                  * we are not loose security here:
1067                  * Incoming packet is checked with md5 hash with finding key,
1068                  * no RST generated if md5 hash doesn't match.
1069                  */
1070                 sk1 = inet6_lookup_listener(net,
1071                                            &tcp_hashinfo, NULL, 0,
1072                                            &ipv6h->saddr,
1073                                            th->source, &ipv6h->daddr,
1074                                            ntohs(th->source), dif, sdif);
1075                 if (!sk1)
1076                         goto out;
1077
1078                 /* sdif set, means packet ingressed via a device
1079                  * in an L3 domain and dif is set to it.
1080                  */
1081                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1082
1083                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1084                 if (!key)
1085                         goto out;
1086
1087                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1088                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1089                         goto out;
1090         }
1091 #endif
1092
1093         if (th->ack)
1094                 seq = ntohl(th->ack_seq);
1095         else
1096                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1097                           (th->doff << 2);
1098
1099         if (sk) {
1100                 oif = sk->sk_bound_dev_if;
1101                 if (sk_fullsock(sk)) {
1102                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1103
1104                         trace_tcp_send_reset(sk, skb);
1105                         if (np->repflow)
1106                                 label = ip6_flowlabel(ipv6h);
1107                         priority = sk->sk_priority;
1108                 }
1109                 if (sk->sk_state == TCP_TIME_WAIT) {
1110                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1111                         priority = inet_twsk(sk)->tw_priority;
1112                 }
1113         } else {
1114                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1115                         label = ip6_flowlabel(ipv6h);
1116         }
1117
1118         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1119                              ipv6_get_dsfield(ipv6h), label, priority);
1120
1121 #ifdef CONFIG_TCP_MD5SIG
1122 out:
1123         rcu_read_unlock();
1124 #endif
1125 }
1126
1127 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1128                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1129                             struct tcp_md5sig_key *key, u8 tclass,
1130                             __be32 label, u32 priority)
1131 {
1132         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1133                              tclass, label, priority);
1134 }
1135
1136 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1137 {
1138         struct inet_timewait_sock *tw = inet_twsk(sk);
1139         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1140
1141         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1142                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1143                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1144                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1145                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1146
1147         inet_twsk_put(tw);
1148 }
1149
1150 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1151                                   struct request_sock *req)
1152 {
1153         int l3index;
1154
1155         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1156
1157         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1158          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1159          */
1160         /* RFC 7323 2.3
1161          * The window field (SEG.WND) of every outgoing segment, with the
1162          * exception of <SYN> segments, MUST be right-shifted by
1163          * Rcv.Wind.Shift bits:
1164          */
1165         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1166                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1167                         tcp_rsk(req)->rcv_nxt,
1168                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1169                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1170                         req->ts_recent, sk->sk_bound_dev_if,
1171                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1172                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1173 }
1174
1175
1176 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1177 {
1178 #ifdef CONFIG_SYN_COOKIES
1179         const struct tcphdr *th = tcp_hdr(skb);
1180
1181         if (!th->syn)
1182                 sk = cookie_v6_check(sk, skb);
1183 #endif
1184         return sk;
1185 }
1186
1187 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1188                          struct tcphdr *th, u32 *cookie)
1189 {
1190         u16 mss = 0;
1191 #ifdef CONFIG_SYN_COOKIES
1192         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1193                                     &tcp_request_sock_ipv6_ops, sk, th);
1194         if (mss) {
1195                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1196                 tcp_synq_overflow(sk);
1197         }
1198 #endif
1199         return mss;
1200 }
1201
1202 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1203 {
1204         if (skb->protocol == htons(ETH_P_IP))
1205                 return tcp_v4_conn_request(sk, skb);
1206
1207         if (!ipv6_unicast_destination(skb))
1208                 goto drop;
1209
1210         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1211                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1212                 return 0;
1213         }
1214
1215         return tcp_conn_request(&tcp6_request_sock_ops,
1216                                 &tcp_request_sock_ipv6_ops, sk, skb);
1217
1218 drop:
1219         tcp_listendrop(sk);
1220         return 0; /* don't send reset */
1221 }
1222
1223 static void tcp_v6_restore_cb(struct sk_buff *skb)
1224 {
1225         /* We need to move header back to the beginning if xfrm6_policy_check()
1226          * and tcp_v6_fill_cb() are going to be called again.
1227          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1228          */
1229         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1230                 sizeof(struct inet6_skb_parm));
1231 }
1232
1233 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1234                                          struct request_sock *req,
1235                                          struct dst_entry *dst,
1236                                          struct request_sock *req_unhash,
1237                                          bool *own_req)
1238 {
1239         struct inet_request_sock *ireq;
1240         struct ipv6_pinfo *newnp;
1241         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1242         struct ipv6_txoptions *opt;
1243         struct inet_sock *newinet;
1244         bool found_dup_sk = false;
1245         struct tcp_sock *newtp;
1246         struct sock *newsk;
1247 #ifdef CONFIG_TCP_MD5SIG
1248         struct tcp_md5sig_key *key;
1249         int l3index;
1250 #endif
1251         struct flowi6 fl6;
1252
1253         if (skb->protocol == htons(ETH_P_IP)) {
1254                 /*
1255                  *      v6 mapped
1256                  */
1257
1258                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1259                                              req_unhash, own_req);
1260
1261                 if (!newsk)
1262                         return NULL;
1263
1264                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1265
1266                 newnp = tcp_inet6_sk(newsk);
1267                 newtp = tcp_sk(newsk);
1268
1269                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1270
1271                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1272
1273                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1274                 if (sk_is_mptcp(newsk))
1275                         mptcpv6_handle_mapped(newsk, true);
1276                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1277 #ifdef CONFIG_TCP_MD5SIG
1278                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1279 #endif
1280
1281                 newnp->ipv6_mc_list = NULL;
1282                 newnp->ipv6_ac_list = NULL;
1283                 newnp->ipv6_fl_list = NULL;
1284                 newnp->pktoptions  = NULL;
1285                 newnp->opt         = NULL;
1286                 newnp->mcast_oif   = inet_iif(skb);
1287                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1288                 newnp->rcv_flowinfo = 0;
1289                 if (np->repflow)
1290                         newnp->flow_label = 0;
1291
1292                 /*
1293                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1294                  * here, tcp_create_openreq_child now does this for us, see the comment in
1295                  * that function for the gory details. -acme
1296                  */
1297
1298                 /* It is tricky place. Until this moment IPv4 tcp
1299                    worked with IPv6 icsk.icsk_af_ops.
1300                    Sync it now.
1301                  */
1302                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1303
1304                 return newsk;
1305         }
1306
1307         ireq = inet_rsk(req);
1308
1309         if (sk_acceptq_is_full(sk))
1310                 goto out_overflow;
1311
1312         if (!dst) {
1313                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1314                 if (!dst)
1315                         goto out;
1316         }
1317
1318         newsk = tcp_create_openreq_child(sk, req, skb);
1319         if (!newsk)
1320                 goto out_nonewsk;
1321
1322         /*
1323          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1324          * count here, tcp_create_openreq_child now does this for us, see the
1325          * comment in that function for the gory details. -acme
1326          */
1327
1328         newsk->sk_gso_type = SKB_GSO_TCPV6;
1329         ip6_dst_store(newsk, dst, NULL, NULL);
1330         inet6_sk_rx_dst_set(newsk, skb);
1331
1332         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1333
1334         newtp = tcp_sk(newsk);
1335         newinet = inet_sk(newsk);
1336         newnp = tcp_inet6_sk(newsk);
1337
1338         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1339
1340         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1341         newnp->saddr = ireq->ir_v6_loc_addr;
1342         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1343         newsk->sk_bound_dev_if = ireq->ir_iif;
1344
1345         /* Now IPv6 options...
1346
1347            First: no IPv4 options.
1348          */
1349         newinet->inet_opt = NULL;
1350         newnp->ipv6_mc_list = NULL;
1351         newnp->ipv6_ac_list = NULL;
1352         newnp->ipv6_fl_list = NULL;
1353
1354         /* Clone RX bits */
1355         newnp->rxopt.all = np->rxopt.all;
1356
1357         newnp->pktoptions = NULL;
1358         newnp->opt        = NULL;
1359         newnp->mcast_oif  = tcp_v6_iif(skb);
1360         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1361         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1362         if (np->repflow)
1363                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1364
1365         /* Set ToS of the new socket based upon the value of incoming SYN.
1366          * ECT bits are set later in tcp_init_transfer().
1367          */
1368         if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1369                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1370
1371         /* Clone native IPv6 options from listening socket (if any)
1372
1373            Yes, keeping reference count would be much more clever,
1374            but we make one more one thing there: reattach optmem
1375            to newsk.
1376          */
1377         opt = ireq->ipv6_opt;
1378         if (!opt)
1379                 opt = rcu_dereference(np->opt);
1380         if (opt) {
1381                 opt = ipv6_dup_options(newsk, opt);
1382                 RCU_INIT_POINTER(newnp->opt, opt);
1383         }
1384         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1385         if (opt)
1386                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1387                                                     opt->opt_flen;
1388
1389         tcp_ca_openreq_child(newsk, dst);
1390
1391         tcp_sync_mss(newsk, dst_mtu(dst));
1392         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1393
1394         tcp_initialize_rcv_mss(newsk);
1395
1396         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1397         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1398
1399 #ifdef CONFIG_TCP_MD5SIG
1400         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1401
1402         /* Copy over the MD5 key from the original socket */
1403         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1404         if (key) {
1405                 /* We're using one, so create a matching key
1406                  * on the newsk structure. If we fail to get
1407                  * memory, then we end up not copying the key
1408                  * across. Shucks.
1409                  */
1410                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1411                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1412                                sk_gfp_mask(sk, GFP_ATOMIC));
1413         }
1414 #endif
1415
1416         if (__inet_inherit_port(sk, newsk) < 0) {
1417                 inet_csk_prepare_forced_close(newsk);
1418                 tcp_done(newsk);
1419                 goto out;
1420         }
1421         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1422                                        &found_dup_sk);
1423         if (*own_req) {
1424                 tcp_move_syn(newtp, req);
1425
1426                 /* Clone pktoptions received with SYN, if we own the req */
1427                 if (ireq->pktopts) {
1428                         newnp->pktoptions = skb_clone(ireq->pktopts,
1429                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1430                         consume_skb(ireq->pktopts);
1431                         ireq->pktopts = NULL;
1432                         if (newnp->pktoptions) {
1433                                 tcp_v6_restore_cb(newnp->pktoptions);
1434                                 skb_set_owner_r(newnp->pktoptions, newsk);
1435                         }
1436                 }
1437         } else {
1438                 if (!req_unhash && found_dup_sk) {
1439                         /* This code path should only be executed in the
1440                          * syncookie case only
1441                          */
1442                         bh_unlock_sock(newsk);
1443                         sock_put(newsk);
1444                         newsk = NULL;
1445                 }
1446         }
1447
1448         return newsk;
1449
1450 out_overflow:
1451         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1452 out_nonewsk:
1453         dst_release(dst);
1454 out:
1455         tcp_listendrop(sk);
1456         return NULL;
1457 }
1458
1459 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1460                                                            u32));
1461 /* The socket must have it's spinlock held when we get
1462  * here, unless it is a TCP_LISTEN socket.
1463  *
1464  * We have a potential double-lock case here, so even when
1465  * doing backlog processing we use the BH locking scheme.
1466  * This is because we cannot sleep with the original spinlock
1467  * held.
1468  */
1469 INDIRECT_CALLABLE_SCOPE
1470 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1471 {
1472         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1473         struct sk_buff *opt_skb = NULL;
1474         struct tcp_sock *tp;
1475
1476         /* Imagine: socket is IPv6. IPv4 packet arrives,
1477            goes to IPv4 receive handler and backlogged.
1478            From backlog it always goes here. Kerboom...
1479            Fortunately, tcp_rcv_established and rcv_established
1480            handle them correctly, but it is not case with
1481            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1482          */
1483
1484         if (skb->protocol == htons(ETH_P_IP))
1485                 return tcp_v4_do_rcv(sk, skb);
1486
1487         /*
1488          *      socket locking is here for SMP purposes as backlog rcv
1489          *      is currently called with bh processing disabled.
1490          */
1491
1492         /* Do Stevens' IPV6_PKTOPTIONS.
1493
1494            Yes, guys, it is the only place in our code, where we
1495            may make it not affecting IPv4.
1496            The rest of code is protocol independent,
1497            and I do not like idea to uglify IPv4.
1498
1499            Actually, all the idea behind IPV6_PKTOPTIONS
1500            looks not very well thought. For now we latch
1501            options, received in the last packet, enqueued
1502            by tcp. Feel free to propose better solution.
1503                                                --ANK (980728)
1504          */
1505         if (np->rxopt.all)
1506                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1507
1508         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1509                 struct dst_entry *dst;
1510
1511                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1512                                                 lockdep_sock_is_held(sk));
1513
1514                 sock_rps_save_rxhash(sk, skb);
1515                 sk_mark_napi_id(sk, skb);
1516                 if (dst) {
1517                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1518                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1519                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1520                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1521                                 dst_release(dst);
1522                         }
1523                 }
1524
1525                 tcp_rcv_established(sk, skb);
1526                 if (opt_skb)
1527                         goto ipv6_pktoptions;
1528                 return 0;
1529         }
1530
1531         if (tcp_checksum_complete(skb))
1532                 goto csum_err;
1533
1534         if (sk->sk_state == TCP_LISTEN) {
1535                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1536
1537                 if (!nsk)
1538                         goto discard;
1539
1540                 if (nsk != sk) {
1541                         if (tcp_child_process(sk, nsk, skb))
1542                                 goto reset;
1543                         if (opt_skb)
1544                                 __kfree_skb(opt_skb);
1545                         return 0;
1546                 }
1547         } else
1548                 sock_rps_save_rxhash(sk, skb);
1549
1550         if (tcp_rcv_state_process(sk, skb))
1551                 goto reset;
1552         if (opt_skb)
1553                 goto ipv6_pktoptions;
1554         return 0;
1555
1556 reset:
1557         tcp_v6_send_reset(sk, skb);
1558 discard:
1559         if (opt_skb)
1560                 __kfree_skb(opt_skb);
1561         kfree_skb(skb);
1562         return 0;
1563 csum_err:
1564         trace_tcp_bad_csum(skb);
1565         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1566         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1567         goto discard;
1568
1569
1570 ipv6_pktoptions:
1571         /* Do you ask, what is it?
1572
1573            1. skb was enqueued by tcp.
1574            2. skb is added to tail of read queue, rather than out of order.
1575            3. socket is not in passive state.
1576            4. Finally, it really contains options, which user wants to receive.
1577          */
1578         tp = tcp_sk(sk);
1579         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1580             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1581                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1582                         np->mcast_oif = tcp_v6_iif(opt_skb);
1583                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1584                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1585                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1586                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1587                 if (np->repflow)
1588                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1589                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1590                         skb_set_owner_r(opt_skb, sk);
1591                         tcp_v6_restore_cb(opt_skb);
1592                         opt_skb = xchg(&np->pktoptions, opt_skb);
1593                 } else {
1594                         __kfree_skb(opt_skb);
1595                         opt_skb = xchg(&np->pktoptions, NULL);
1596                 }
1597         }
1598
1599         consume_skb(opt_skb);
1600         return 0;
1601 }
1602
1603 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1604                            const struct tcphdr *th)
1605 {
1606         /* This is tricky: we move IP6CB at its correct location into
1607          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1608          * _decode_session6() uses IP6CB().
1609          * barrier() makes sure compiler won't play aliasing games.
1610          */
1611         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1612                 sizeof(struct inet6_skb_parm));
1613         barrier();
1614
1615         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1616         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1617                                     skb->len - th->doff*4);
1618         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1619         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1620         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1621         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1622         TCP_SKB_CB(skb)->sacked = 0;
1623         TCP_SKB_CB(skb)->has_rxtstamp =
1624                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1625 }
1626
1627 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1628 {
1629         int sdif = inet6_sdif(skb);
1630         int dif = inet6_iif(skb);
1631         const struct tcphdr *th;
1632         const struct ipv6hdr *hdr;
1633         bool refcounted;
1634         struct sock *sk;
1635         int ret;
1636         struct net *net = dev_net(skb->dev);
1637
1638         if (skb->pkt_type != PACKET_HOST)
1639                 goto discard_it;
1640
1641         /*
1642          *      Count it even if it's bad.
1643          */
1644         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1645
1646         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1647                 goto discard_it;
1648
1649         th = (const struct tcphdr *)skb->data;
1650
1651         if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1652                 goto bad_packet;
1653         if (!pskb_may_pull(skb, th->doff*4))
1654                 goto discard_it;
1655
1656         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1657                 goto csum_error;
1658
1659         th = (const struct tcphdr *)skb->data;
1660         hdr = ipv6_hdr(skb);
1661
1662 lookup:
1663         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1664                                 th->source, th->dest, inet6_iif(skb), sdif,
1665                                 &refcounted);
1666         if (!sk)
1667                 goto no_tcp_socket;
1668
1669 process:
1670         if (sk->sk_state == TCP_TIME_WAIT)
1671                 goto do_time_wait;
1672
1673         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1674                 struct request_sock *req = inet_reqsk(sk);
1675                 bool req_stolen = false;
1676                 struct sock *nsk;
1677
1678                 sk = req->rsk_listener;
1679                 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1680                         sk_drops_add(sk, skb);
1681                         reqsk_put(req);
1682                         goto discard_it;
1683                 }
1684                 if (tcp_checksum_complete(skb)) {
1685                         reqsk_put(req);
1686                         goto csum_error;
1687                 }
1688                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1689                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1690                         if (!nsk) {
1691                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1692                                 goto lookup;
1693                         }
1694                         sk = nsk;
1695                         /* reuseport_migrate_sock() has already held one sk_refcnt
1696                          * before returning.
1697                          */
1698                 } else {
1699                         sock_hold(sk);
1700                 }
1701                 refcounted = true;
1702                 nsk = NULL;
1703                 if (!tcp_filter(sk, skb)) {
1704                         th = (const struct tcphdr *)skb->data;
1705                         hdr = ipv6_hdr(skb);
1706                         tcp_v6_fill_cb(skb, hdr, th);
1707                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1708                 }
1709                 if (!nsk) {
1710                         reqsk_put(req);
1711                         if (req_stolen) {
1712                                 /* Another cpu got exclusive access to req
1713                                  * and created a full blown socket.
1714                                  * Try to feed this packet to this socket
1715                                  * instead of discarding it.
1716                                  */
1717                                 tcp_v6_restore_cb(skb);
1718                                 sock_put(sk);
1719                                 goto lookup;
1720                         }
1721                         goto discard_and_relse;
1722                 }
1723                 if (nsk == sk) {
1724                         reqsk_put(req);
1725                         tcp_v6_restore_cb(skb);
1726                 } else if (tcp_child_process(sk, nsk, skb)) {
1727                         tcp_v6_send_reset(nsk, skb);
1728                         goto discard_and_relse;
1729                 } else {
1730                         sock_put(sk);
1731                         return 0;
1732                 }
1733         }
1734
1735         if (static_branch_unlikely(&ip6_min_hopcount)) {
1736                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1737                 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1738                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1739                         goto discard_and_relse;
1740                 }
1741         }
1742
1743         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1744                 goto discard_and_relse;
1745
1746         if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1747                 goto discard_and_relse;
1748
1749         if (tcp_filter(sk, skb))
1750                 goto discard_and_relse;
1751         th = (const struct tcphdr *)skb->data;
1752         hdr = ipv6_hdr(skb);
1753         tcp_v6_fill_cb(skb, hdr, th);
1754
1755         skb->dev = NULL;
1756
1757         if (sk->sk_state == TCP_LISTEN) {
1758                 ret = tcp_v6_do_rcv(sk, skb);
1759                 goto put_and_return;
1760         }
1761
1762         sk_incoming_cpu_update(sk);
1763
1764         sk_defer_free_flush(sk);
1765         bh_lock_sock_nested(sk);
1766         tcp_segs_in(tcp_sk(sk), skb);
1767         ret = 0;
1768         if (!sock_owned_by_user(sk)) {
1769                 ret = tcp_v6_do_rcv(sk, skb);
1770         } else {
1771                 if (tcp_add_backlog(sk, skb))
1772                         goto discard_and_relse;
1773         }
1774         bh_unlock_sock(sk);
1775 put_and_return:
1776         if (refcounted)
1777                 sock_put(sk);
1778         return ret ? -1 : 0;
1779
1780 no_tcp_socket:
1781         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1782                 goto discard_it;
1783
1784         tcp_v6_fill_cb(skb, hdr, th);
1785
1786         if (tcp_checksum_complete(skb)) {
1787 csum_error:
1788                 trace_tcp_bad_csum(skb);
1789                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1790 bad_packet:
1791                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1792         } else {
1793                 tcp_v6_send_reset(NULL, skb);
1794         }
1795
1796 discard_it:
1797         kfree_skb(skb);
1798         return 0;
1799
1800 discard_and_relse:
1801         sk_drops_add(sk, skb);
1802         if (refcounted)
1803                 sock_put(sk);
1804         goto discard_it;
1805
1806 do_time_wait:
1807         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1808                 inet_twsk_put(inet_twsk(sk));
1809                 goto discard_it;
1810         }
1811
1812         tcp_v6_fill_cb(skb, hdr, th);
1813
1814         if (tcp_checksum_complete(skb)) {
1815                 inet_twsk_put(inet_twsk(sk));
1816                 goto csum_error;
1817         }
1818
1819         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1820         case TCP_TW_SYN:
1821         {
1822                 struct sock *sk2;
1823
1824                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1825                                             skb, __tcp_hdrlen(th),
1826                                             &ipv6_hdr(skb)->saddr, th->source,
1827                                             &ipv6_hdr(skb)->daddr,
1828                                             ntohs(th->dest),
1829                                             tcp_v6_iif_l3_slave(skb),
1830                                             sdif);
1831                 if (sk2) {
1832                         struct inet_timewait_sock *tw = inet_twsk(sk);
1833                         inet_twsk_deschedule_put(tw);
1834                         sk = sk2;
1835                         tcp_v6_restore_cb(skb);
1836                         refcounted = false;
1837                         goto process;
1838                 }
1839         }
1840                 /* to ACK */
1841                 fallthrough;
1842         case TCP_TW_ACK:
1843                 tcp_v6_timewait_ack(sk, skb);
1844                 break;
1845         case TCP_TW_RST:
1846                 tcp_v6_send_reset(sk, skb);
1847                 inet_twsk_deschedule_put(inet_twsk(sk));
1848                 goto discard_it;
1849         case TCP_TW_SUCCESS:
1850                 ;
1851         }
1852         goto discard_it;
1853 }
1854
1855 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1856 {
1857         const struct ipv6hdr *hdr;
1858         const struct tcphdr *th;
1859         struct sock *sk;
1860
1861         if (skb->pkt_type != PACKET_HOST)
1862                 return;
1863
1864         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1865                 return;
1866
1867         hdr = ipv6_hdr(skb);
1868         th = tcp_hdr(skb);
1869
1870         if (th->doff < sizeof(struct tcphdr) / 4)
1871                 return;
1872
1873         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1874         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1875                                         &hdr->saddr, th->source,
1876                                         &hdr->daddr, ntohs(th->dest),
1877                                         inet6_iif(skb), inet6_sdif(skb));
1878         if (sk) {
1879                 skb->sk = sk;
1880                 skb->destructor = sock_edemux;
1881                 if (sk_fullsock(sk)) {
1882                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1883
1884                         if (dst)
1885                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1886                         if (dst &&
1887                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1888                                 skb_dst_set_noref(skb, dst);
1889                 }
1890         }
1891 }
1892
1893 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1894         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1895         .twsk_unique    = tcp_twsk_unique,
1896         .twsk_destructor = tcp_twsk_destructor,
1897 };
1898
1899 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1900 {
1901         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1902 }
1903
1904 const struct inet_connection_sock_af_ops ipv6_specific = {
1905         .queue_xmit        = inet6_csk_xmit,
1906         .send_check        = tcp_v6_send_check,
1907         .rebuild_header    = inet6_sk_rebuild_header,
1908         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1909         .conn_request      = tcp_v6_conn_request,
1910         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1911         .net_header_len    = sizeof(struct ipv6hdr),
1912         .net_frag_header_len = sizeof(struct frag_hdr),
1913         .setsockopt        = ipv6_setsockopt,
1914         .getsockopt        = ipv6_getsockopt,
1915         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1916         .sockaddr_len      = sizeof(struct sockaddr_in6),
1917         .mtu_reduced       = tcp_v6_mtu_reduced,
1918 };
1919
1920 #ifdef CONFIG_TCP_MD5SIG
1921 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1922         .md5_lookup     =       tcp_v6_md5_lookup,
1923         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1924         .md5_parse      =       tcp_v6_parse_md5_keys,
1925 };
1926 #endif
1927
1928 /*
1929  *      TCP over IPv4 via INET6 API
1930  */
1931 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1932         .queue_xmit        = ip_queue_xmit,
1933         .send_check        = tcp_v4_send_check,
1934         .rebuild_header    = inet_sk_rebuild_header,
1935         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1936         .conn_request      = tcp_v6_conn_request,
1937         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1938         .net_header_len    = sizeof(struct iphdr),
1939         .setsockopt        = ipv6_setsockopt,
1940         .getsockopt        = ipv6_getsockopt,
1941         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1942         .sockaddr_len      = sizeof(struct sockaddr_in6),
1943         .mtu_reduced       = tcp_v4_mtu_reduced,
1944 };
1945
1946 #ifdef CONFIG_TCP_MD5SIG
1947 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1948         .md5_lookup     =       tcp_v4_md5_lookup,
1949         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1950         .md5_parse      =       tcp_v6_parse_md5_keys,
1951 };
1952 #endif
1953
1954 /* NOTE: A lot of things set to zero explicitly by call to
1955  *       sk_alloc() so need not be done here.
1956  */
1957 static int tcp_v6_init_sock(struct sock *sk)
1958 {
1959         struct inet_connection_sock *icsk = inet_csk(sk);
1960
1961         tcp_init_sock(sk);
1962
1963         icsk->icsk_af_ops = &ipv6_specific;
1964
1965 #ifdef CONFIG_TCP_MD5SIG
1966         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1967 #endif
1968
1969         return 0;
1970 }
1971
1972 static void tcp_v6_destroy_sock(struct sock *sk)
1973 {
1974         tcp_v4_destroy_sock(sk);
1975         inet6_destroy_sock(sk);
1976 }
1977
1978 #ifdef CONFIG_PROC_FS
1979 /* Proc filesystem TCPv6 sock list dumping. */
1980 static void get_openreq6(struct seq_file *seq,
1981                          const struct request_sock *req, int i)
1982 {
1983         long ttd = req->rsk_timer.expires - jiffies;
1984         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1985         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1986
1987         if (ttd < 0)
1988                 ttd = 0;
1989
1990         seq_printf(seq,
1991                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1992                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1993                    i,
1994                    src->s6_addr32[0], src->s6_addr32[1],
1995                    src->s6_addr32[2], src->s6_addr32[3],
1996                    inet_rsk(req)->ir_num,
1997                    dest->s6_addr32[0], dest->s6_addr32[1],
1998                    dest->s6_addr32[2], dest->s6_addr32[3],
1999                    ntohs(inet_rsk(req)->ir_rmt_port),
2000                    TCP_SYN_RECV,
2001                    0, 0, /* could print option size, but that is af dependent. */
2002                    1,   /* timers active (only the expire timer) */
2003                    jiffies_to_clock_t(ttd),
2004                    req->num_timeout,
2005                    from_kuid_munged(seq_user_ns(seq),
2006                                     sock_i_uid(req->rsk_listener)),
2007                    0,  /* non standard timer */
2008                    0, /* open_requests have no inode */
2009                    0, req);
2010 }
2011
2012 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2013 {
2014         const struct in6_addr *dest, *src;
2015         __u16 destp, srcp;
2016         int timer_active;
2017         unsigned long timer_expires;
2018         const struct inet_sock *inet = inet_sk(sp);
2019         const struct tcp_sock *tp = tcp_sk(sp);
2020         const struct inet_connection_sock *icsk = inet_csk(sp);
2021         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2022         int rx_queue;
2023         int state;
2024
2025         dest  = &sp->sk_v6_daddr;
2026         src   = &sp->sk_v6_rcv_saddr;
2027         destp = ntohs(inet->inet_dport);
2028         srcp  = ntohs(inet->inet_sport);
2029
2030         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2031             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2032             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2033                 timer_active    = 1;
2034                 timer_expires   = icsk->icsk_timeout;
2035         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2036                 timer_active    = 4;
2037                 timer_expires   = icsk->icsk_timeout;
2038         } else if (timer_pending(&sp->sk_timer)) {
2039                 timer_active    = 2;
2040                 timer_expires   = sp->sk_timer.expires;
2041         } else {
2042                 timer_active    = 0;
2043                 timer_expires = jiffies;
2044         }
2045
2046         state = inet_sk_state_load(sp);
2047         if (state == TCP_LISTEN)
2048                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2049         else
2050                 /* Because we don't lock the socket,
2051                  * we might find a transient negative value.
2052                  */
2053                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2054                                       READ_ONCE(tp->copied_seq), 0);
2055
2056         seq_printf(seq,
2057                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2058                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2059                    i,
2060                    src->s6_addr32[0], src->s6_addr32[1],
2061                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2062                    dest->s6_addr32[0], dest->s6_addr32[1],
2063                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2064                    state,
2065                    READ_ONCE(tp->write_seq) - tp->snd_una,
2066                    rx_queue,
2067                    timer_active,
2068                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2069                    icsk->icsk_retransmits,
2070                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2071                    icsk->icsk_probes_out,
2072                    sock_i_ino(sp),
2073                    refcount_read(&sp->sk_refcnt), sp,
2074                    jiffies_to_clock_t(icsk->icsk_rto),
2075                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2076                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2077                    tp->snd_cwnd,
2078                    state == TCP_LISTEN ?
2079                         fastopenq->max_qlen :
2080                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2081                    );
2082 }
2083
2084 static void get_timewait6_sock(struct seq_file *seq,
2085                                struct inet_timewait_sock *tw, int i)
2086 {
2087         long delta = tw->tw_timer.expires - jiffies;
2088         const struct in6_addr *dest, *src;
2089         __u16 destp, srcp;
2090
2091         dest = &tw->tw_v6_daddr;
2092         src  = &tw->tw_v6_rcv_saddr;
2093         destp = ntohs(tw->tw_dport);
2094         srcp  = ntohs(tw->tw_sport);
2095
2096         seq_printf(seq,
2097                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2098                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2099                    i,
2100                    src->s6_addr32[0], src->s6_addr32[1],
2101                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2102                    dest->s6_addr32[0], dest->s6_addr32[1],
2103                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2104                    tw->tw_substate, 0, 0,
2105                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2106                    refcount_read(&tw->tw_refcnt), tw);
2107 }
2108
2109 static int tcp6_seq_show(struct seq_file *seq, void *v)
2110 {
2111         struct tcp_iter_state *st;
2112         struct sock *sk = v;
2113
2114         if (v == SEQ_START_TOKEN) {
2115                 seq_puts(seq,
2116                          "  sl  "
2117                          "local_address                         "
2118                          "remote_address                        "
2119                          "st tx_queue rx_queue tr tm->when retrnsmt"
2120                          "   uid  timeout inode\n");
2121                 goto out;
2122         }
2123         st = seq->private;
2124
2125         if (sk->sk_state == TCP_TIME_WAIT)
2126                 get_timewait6_sock(seq, v, st->num);
2127         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2128                 get_openreq6(seq, v, st->num);
2129         else
2130                 get_tcp6_sock(seq, v, st->num);
2131 out:
2132         return 0;
2133 }
2134
2135 static const struct seq_operations tcp6_seq_ops = {
2136         .show           = tcp6_seq_show,
2137         .start          = tcp_seq_start,
2138         .next           = tcp_seq_next,
2139         .stop           = tcp_seq_stop,
2140 };
2141
2142 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2143         .family         = AF_INET6,
2144 };
2145
2146 int __net_init tcp6_proc_init(struct net *net)
2147 {
2148         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2149                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2150                 return -ENOMEM;
2151         return 0;
2152 }
2153
2154 void tcp6_proc_exit(struct net *net)
2155 {
2156         remove_proc_entry("tcp6", net->proc_net);
2157 }
2158 #endif
2159
2160 struct proto tcpv6_prot = {
2161         .name                   = "TCPv6",
2162         .owner                  = THIS_MODULE,
2163         .close                  = tcp_close,
2164         .pre_connect            = tcp_v6_pre_connect,
2165         .connect                = tcp_v6_connect,
2166         .disconnect             = tcp_disconnect,
2167         .accept                 = inet_csk_accept,
2168         .ioctl                  = tcp_ioctl,
2169         .init                   = tcp_v6_init_sock,
2170         .destroy                = tcp_v6_destroy_sock,
2171         .shutdown               = tcp_shutdown,
2172         .setsockopt             = tcp_setsockopt,
2173         .getsockopt             = tcp_getsockopt,
2174         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2175         .keepalive              = tcp_set_keepalive,
2176         .recvmsg                = tcp_recvmsg,
2177         .sendmsg                = tcp_sendmsg,
2178         .sendpage               = tcp_sendpage,
2179         .backlog_rcv            = tcp_v6_do_rcv,
2180         .release_cb             = tcp_release_cb,
2181         .hash                   = inet6_hash,
2182         .unhash                 = inet_unhash,
2183         .get_port               = inet_csk_get_port,
2184         .put_port               = inet_put_port,
2185 #ifdef CONFIG_BPF_SYSCALL
2186         .psock_update_sk_prot   = tcp_bpf_update_proto,
2187 #endif
2188         .enter_memory_pressure  = tcp_enter_memory_pressure,
2189         .leave_memory_pressure  = tcp_leave_memory_pressure,
2190         .stream_memory_free     = tcp_stream_memory_free,
2191         .sockets_allocated      = &tcp_sockets_allocated,
2192         .memory_allocated       = &tcp_memory_allocated,
2193         .memory_pressure        = &tcp_memory_pressure,
2194         .orphan_count           = &tcp_orphan_count,
2195         .sysctl_mem             = sysctl_tcp_mem,
2196         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2197         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2198         .max_header             = MAX_TCP_HEADER,
2199         .obj_size               = sizeof(struct tcp6_sock),
2200         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2201         .twsk_prot              = &tcp6_timewait_sock_ops,
2202         .rsk_prot               = &tcp6_request_sock_ops,
2203         .h.hashinfo             = &tcp_hashinfo,
2204         .no_autobind            = true,
2205         .diag_destroy           = tcp_abort,
2206 };
2207 EXPORT_SYMBOL_GPL(tcpv6_prot);
2208
2209 /* thinking of making this const? Don't.
2210  * early_demux can change based on sysctl.
2211  */
2212 static struct inet6_protocol tcpv6_protocol = {
2213         .early_demux    =       tcp_v6_early_demux,
2214         .early_demux_handler =  tcp_v6_early_demux,
2215         .handler        =       tcp_v6_rcv,
2216         .err_handler    =       tcp_v6_err,
2217         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2218 };
2219
2220 static struct inet_protosw tcpv6_protosw = {
2221         .type           =       SOCK_STREAM,
2222         .protocol       =       IPPROTO_TCP,
2223         .prot           =       &tcpv6_prot,
2224         .ops            =       &inet6_stream_ops,
2225         .flags          =       INET_PROTOSW_PERMANENT |
2226                                 INET_PROTOSW_ICSK,
2227 };
2228
2229 static int __net_init tcpv6_net_init(struct net *net)
2230 {
2231         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2232                                     SOCK_RAW, IPPROTO_TCP, net);
2233 }
2234
2235 static void __net_exit tcpv6_net_exit(struct net *net)
2236 {
2237         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2238 }
2239
2240 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2241 {
2242         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2243 }
2244
2245 static struct pernet_operations tcpv6_net_ops = {
2246         .init       = tcpv6_net_init,
2247         .exit       = tcpv6_net_exit,
2248         .exit_batch = tcpv6_net_exit_batch,
2249 };
2250
2251 int __init tcpv6_init(void)
2252 {
2253         int ret;
2254
2255         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2256         if (ret)
2257                 goto out;
2258
2259         /* register inet6 protocol */
2260         ret = inet6_register_protosw(&tcpv6_protosw);
2261         if (ret)
2262                 goto out_tcpv6_protocol;
2263
2264         ret = register_pernet_subsys(&tcpv6_net_ops);
2265         if (ret)
2266                 goto out_tcpv6_protosw;
2267
2268         ret = mptcpv6_init();
2269         if (ret)
2270                 goto out_tcpv6_pernet_subsys;
2271
2272 out:
2273         return ret;
2274
2275 out_tcpv6_pernet_subsys:
2276         unregister_pernet_subsys(&tcpv6_net_ops);
2277 out_tcpv6_protosw:
2278         inet6_unregister_protosw(&tcpv6_protosw);
2279 out_tcpv6_protocol:
2280         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2281         goto out;
2282 }
2283
2284 void tcpv6_exit(void)
2285 {
2286         unregister_pernet_subsys(&tcpv6_net_ops);
2287         inet6_unregister_protosw(&tcpv6_protosw);
2288         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2289 }