Merge tag 'mfd-next-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd
[linux-2.6-microblaze.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 sk->sk_rx_dst = dst;
111                 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112                 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152         struct tcp_sock *tp = tcp_sk(sk);
153         struct in6_addr *saddr = NULL, *final_p, final;
154         struct ipv6_txoptions *opt;
155         struct flowi6 fl6;
156         struct dst_entry *dst;
157         int addr_type;
158         int err;
159         struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (__ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         if (!saddr) {
290                 saddr = &fl6.saddr;
291                 sk->sk_v6_rcv_saddr = *saddr;
292         }
293
294         /* set the source address */
295         np->saddr = *saddr;
296         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297
298         sk->sk_gso_type = SKB_GSO_TCPV6;
299         ip6_dst_store(sk, dst, NULL, NULL);
300
301         icsk->icsk_ext_hdr_len = 0;
302         if (opt)
303                 icsk->icsk_ext_hdr_len = opt->opt_flen +
304                                          opt->opt_nflen;
305
306         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307
308         inet->inet_dport = usin->sin6_port;
309
310         tcp_set_state(sk, TCP_SYN_SENT);
311         err = inet6_hash_connect(tcp_death_row, sk);
312         if (err)
313                 goto late_failure;
314
315         sk_set_txhash(sk);
316
317         if (likely(!tp->repair)) {
318                 if (!tp->write_seq)
319                         WRITE_ONCE(tp->write_seq,
320                                    secure_tcpv6_seq(np->saddr.s6_addr32,
321                                                     sk->sk_v6_daddr.s6_addr32,
322                                                     inet->inet_sport,
323                                                     inet->inet_dport));
324                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
325                                                    np->saddr.s6_addr32,
326                                                    sk->sk_v6_daddr.s6_addr32);
327         }
328
329         if (tcp_fastopen_defer_connect(sk, &err))
330                 return err;
331         if (err)
332                 goto late_failure;
333
334         err = tcp_connect(sk);
335         if (err)
336                 goto late_failure;
337
338         return 0;
339
340 late_failure:
341         tcp_set_state(sk, TCP_CLOSE);
342 failure:
343         inet->inet_dport = 0;
344         sk->sk_route_caps = 0;
345         return err;
346 }
347
348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350         struct dst_entry *dst;
351         u32 mtu;
352
353         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
354                 return;
355
356         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
357
358         /* Drop requests trying to increase our current mss.
359          * Check done in __ip6_rt_update_pmtu() is too late.
360          */
361         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
362                 return;
363
364         dst = inet6_csk_update_pmtu(sk, mtu);
365         if (!dst)
366                 return;
367
368         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
369                 tcp_sync_mss(sk, dst_mtu(dst));
370                 tcp_simple_retransmit(sk);
371         }
372 }
373
374 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
375                 u8 type, u8 code, int offset, __be32 info)
376 {
377         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
378         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
379         struct net *net = dev_net(skb->dev);
380         struct request_sock *fastopen;
381         struct ipv6_pinfo *np;
382         struct tcp_sock *tp;
383         __u32 seq, snd_una;
384         struct sock *sk;
385         bool fatal;
386         int err;
387
388         sk = __inet6_lookup_established(net, &tcp_hashinfo,
389                                         &hdr->daddr, th->dest,
390                                         &hdr->saddr, ntohs(th->source),
391                                         skb->dev->ifindex, inet6_sdif(skb));
392
393         if (!sk) {
394                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
395                                   ICMP6_MIB_INERRORS);
396                 return -ENOENT;
397         }
398
399         if (sk->sk_state == TCP_TIME_WAIT) {
400                 inet_twsk_put(inet_twsk(sk));
401                 return 0;
402         }
403         seq = ntohl(th->seq);
404         fatal = icmpv6_err_convert(type, code, &err);
405         if (sk->sk_state == TCP_NEW_SYN_RECV) {
406                 tcp_req_err(sk, seq, fatal);
407                 return 0;
408         }
409
410         bh_lock_sock(sk);
411         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
412                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
413
414         if (sk->sk_state == TCP_CLOSE)
415                 goto out;
416
417         if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
418                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
419                 goto out;
420         }
421
422         tp = tcp_sk(sk);
423         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
424         fastopen = rcu_dereference(tp->fastopen_rsk);
425         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
426         if (sk->sk_state != TCP_LISTEN &&
427             !between(seq, snd_una, tp->snd_nxt)) {
428                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
429                 goto out;
430         }
431
432         np = tcp_inet6_sk(sk);
433
434         if (type == NDISC_REDIRECT) {
435                 if (!sock_owned_by_user(sk)) {
436                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
437
438                         if (dst)
439                                 dst->ops->redirect(dst, sk, skb);
440                 }
441                 goto out;
442         }
443
444         if (type == ICMPV6_PKT_TOOBIG) {
445                 u32 mtu = ntohl(info);
446
447                 /* We are not interested in TCP_LISTEN and open_requests
448                  * (SYN-ACKs send out by Linux are always <576bytes so
449                  * they should go through unfragmented).
450                  */
451                 if (sk->sk_state == TCP_LISTEN)
452                         goto out;
453
454                 if (!ip6_sk_accept_pmtu(sk))
455                         goto out;
456
457                 if (mtu < IPV6_MIN_MTU)
458                         goto out;
459
460                 WRITE_ONCE(tp->mtu_info, mtu);
461
462                 if (!sock_owned_by_user(sk))
463                         tcp_v6_mtu_reduced(sk);
464                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
465                                            &sk->sk_tsq_flags))
466                         sock_hold(sk);
467                 goto out;
468         }
469
470
471         /* Might be for an request_sock */
472         switch (sk->sk_state) {
473         case TCP_SYN_SENT:
474         case TCP_SYN_RECV:
475                 /* Only in fast or simultaneous open. If a fast open socket is
476                  * already accepted it is treated as a connected one below.
477                  */
478                 if (fastopen && !fastopen->sk)
479                         break;
480
481                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
482
483                 if (!sock_owned_by_user(sk)) {
484                         sk->sk_err = err;
485                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
486
487                         tcp_done(sk);
488                 } else
489                         sk->sk_err_soft = err;
490                 goto out;
491         case TCP_LISTEN:
492                 break;
493         default:
494                 /* check if this ICMP message allows revert of backoff.
495                  * (see RFC 6069)
496                  */
497                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
498                     code == ICMPV6_NOROUTE)
499                         tcp_ld_RTO_revert(sk, seq);
500         }
501
502         if (!sock_owned_by_user(sk) && np->recverr) {
503                 sk->sk_err = err;
504                 sk_error_report(sk);
505         } else
506                 sk->sk_err_soft = err;
507
508 out:
509         bh_unlock_sock(sk);
510         sock_put(sk);
511         return 0;
512 }
513
514
515 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
516                               struct flowi *fl,
517                               struct request_sock *req,
518                               struct tcp_fastopen_cookie *foc,
519                               enum tcp_synack_type synack_type,
520                               struct sk_buff *syn_skb)
521 {
522         struct inet_request_sock *ireq = inet_rsk(req);
523         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
524         struct ipv6_txoptions *opt;
525         struct flowi6 *fl6 = &fl->u.ip6;
526         struct sk_buff *skb;
527         int err = -ENOMEM;
528         u8 tclass;
529
530         /* First, grab a route. */
531         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
532                                                IPPROTO_TCP)) == NULL)
533                 goto done;
534
535         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
536
537         if (skb) {
538                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
539                                     &ireq->ir_v6_rmt_addr);
540
541                 fl6->daddr = ireq->ir_v6_rmt_addr;
542                 if (np->repflow && ireq->pktopts)
543                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
544
545                 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
546                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
547                                 (np->tclass & INET_ECN_MASK) :
548                                 np->tclass;
549
550                 if (!INET_ECN_is_capable(tclass) &&
551                     tcp_bpf_ca_needs_ecn((struct sock *)req))
552                         tclass |= INET_ECN_ECT_0;
553
554                 rcu_read_lock();
555                 opt = ireq->ipv6_opt;
556                 if (!opt)
557                         opt = rcu_dereference(np->opt);
558                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
559                                tclass, sk->sk_priority);
560                 rcu_read_unlock();
561                 err = net_xmit_eval(err);
562         }
563
564 done:
565         return err;
566 }
567
568
569 static void tcp_v6_reqsk_destructor(struct request_sock *req)
570 {
571         kfree(inet_rsk(req)->ipv6_opt);
572         kfree_skb(inet_rsk(req)->pktopts);
573 }
574
575 #ifdef CONFIG_TCP_MD5SIG
576 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
577                                                    const struct in6_addr *addr,
578                                                    int l3index)
579 {
580         return tcp_md5_do_lookup(sk, l3index,
581                                  (union tcp_md5_addr *)addr, AF_INET6);
582 }
583
584 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
585                                                 const struct sock *addr_sk)
586 {
587         int l3index;
588
589         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
590                                                  addr_sk->sk_bound_dev_if);
591         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
592                                     l3index);
593 }
594
595 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
596                                  sockptr_t optval, int optlen)
597 {
598         struct tcp_md5sig cmd;
599         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
600         int l3index = 0;
601         u8 prefixlen;
602
603         if (optlen < sizeof(cmd))
604                 return -EINVAL;
605
606         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
607                 return -EFAULT;
608
609         if (sin6->sin6_family != AF_INET6)
610                 return -EINVAL;
611
612         if (optname == TCP_MD5SIG_EXT &&
613             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
614                 prefixlen = cmd.tcpm_prefixlen;
615                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
616                                         prefixlen > 32))
617                         return -EINVAL;
618         } else {
619                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
620         }
621
622         if (optname == TCP_MD5SIG_EXT &&
623             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
624                 struct net_device *dev;
625
626                 rcu_read_lock();
627                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
628                 if (dev && netif_is_l3_master(dev))
629                         l3index = dev->ifindex;
630                 rcu_read_unlock();
631
632                 /* ok to reference set/not set outside of rcu;
633                  * right now device MUST be an L3 master
634                  */
635                 if (!dev || !l3index)
636                         return -EINVAL;
637         }
638
639         if (!cmd.tcpm_keylen) {
640                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
641                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
642                                               AF_INET, prefixlen,
643                                               l3index);
644                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
645                                       AF_INET6, prefixlen, l3index);
646         }
647
648         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
649                 return -EINVAL;
650
651         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
652                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
653                                       AF_INET, prefixlen, l3index,
654                                       cmd.tcpm_key, cmd.tcpm_keylen,
655                                       GFP_KERNEL);
656
657         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
658                               AF_INET6, prefixlen, l3index,
659                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
660 }
661
662 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
663                                    const struct in6_addr *daddr,
664                                    const struct in6_addr *saddr,
665                                    const struct tcphdr *th, int nbytes)
666 {
667         struct tcp6_pseudohdr *bp;
668         struct scatterlist sg;
669         struct tcphdr *_th;
670
671         bp = hp->scratch;
672         /* 1. TCP pseudo-header (RFC2460) */
673         bp->saddr = *saddr;
674         bp->daddr = *daddr;
675         bp->protocol = cpu_to_be32(IPPROTO_TCP);
676         bp->len = cpu_to_be32(nbytes);
677
678         _th = (struct tcphdr *)(bp + 1);
679         memcpy(_th, th, sizeof(*th));
680         _th->check = 0;
681
682         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
683         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
684                                 sizeof(*bp) + sizeof(*th));
685         return crypto_ahash_update(hp->md5_req);
686 }
687
688 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
689                                const struct in6_addr *daddr, struct in6_addr *saddr,
690                                const struct tcphdr *th)
691 {
692         struct tcp_md5sig_pool *hp;
693         struct ahash_request *req;
694
695         hp = tcp_get_md5sig_pool();
696         if (!hp)
697                 goto clear_hash_noput;
698         req = hp->md5_req;
699
700         if (crypto_ahash_init(req))
701                 goto clear_hash;
702         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
703                 goto clear_hash;
704         if (tcp_md5_hash_key(hp, key))
705                 goto clear_hash;
706         ahash_request_set_crypt(req, NULL, md5_hash, 0);
707         if (crypto_ahash_final(req))
708                 goto clear_hash;
709
710         tcp_put_md5sig_pool();
711         return 0;
712
713 clear_hash:
714         tcp_put_md5sig_pool();
715 clear_hash_noput:
716         memset(md5_hash, 0, 16);
717         return 1;
718 }
719
720 static int tcp_v6_md5_hash_skb(char *md5_hash,
721                                const struct tcp_md5sig_key *key,
722                                const struct sock *sk,
723                                const struct sk_buff *skb)
724 {
725         const struct in6_addr *saddr, *daddr;
726         struct tcp_md5sig_pool *hp;
727         struct ahash_request *req;
728         const struct tcphdr *th = tcp_hdr(skb);
729
730         if (sk) { /* valid for establish/request sockets */
731                 saddr = &sk->sk_v6_rcv_saddr;
732                 daddr = &sk->sk_v6_daddr;
733         } else {
734                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
735                 saddr = &ip6h->saddr;
736                 daddr = &ip6h->daddr;
737         }
738
739         hp = tcp_get_md5sig_pool();
740         if (!hp)
741                 goto clear_hash_noput;
742         req = hp->md5_req;
743
744         if (crypto_ahash_init(req))
745                 goto clear_hash;
746
747         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
748                 goto clear_hash;
749         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
750                 goto clear_hash;
751         if (tcp_md5_hash_key(hp, key))
752                 goto clear_hash;
753         ahash_request_set_crypt(req, NULL, md5_hash, 0);
754         if (crypto_ahash_final(req))
755                 goto clear_hash;
756
757         tcp_put_md5sig_pool();
758         return 0;
759
760 clear_hash:
761         tcp_put_md5sig_pool();
762 clear_hash_noput:
763         memset(md5_hash, 0, 16);
764         return 1;
765 }
766
767 #endif
768
769 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
770                                     const struct sk_buff *skb,
771                                     int dif, int sdif)
772 {
773 #ifdef CONFIG_TCP_MD5SIG
774         const __u8 *hash_location = NULL;
775         struct tcp_md5sig_key *hash_expected;
776         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
777         const struct tcphdr *th = tcp_hdr(skb);
778         int genhash, l3index;
779         u8 newhash[16];
780
781         /* sdif set, means packet ingressed via a device
782          * in an L3 domain and dif is set to the l3mdev
783          */
784         l3index = sdif ? dif : 0;
785
786         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
787         hash_location = tcp_parse_md5sig_option(th);
788
789         /* We've parsed the options - do we have a hash? */
790         if (!hash_expected && !hash_location)
791                 return false;
792
793         if (hash_expected && !hash_location) {
794                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
795                 return true;
796         }
797
798         if (!hash_expected && hash_location) {
799                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
800                 return true;
801         }
802
803         /* check the signature */
804         genhash = tcp_v6_md5_hash_skb(newhash,
805                                       hash_expected,
806                                       NULL, skb);
807
808         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
809                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
810                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
811                                      genhash ? "failed" : "mismatch",
812                                      &ip6h->saddr, ntohs(th->source),
813                                      &ip6h->daddr, ntohs(th->dest), l3index);
814                 return true;
815         }
816 #endif
817         return false;
818 }
819
820 static void tcp_v6_init_req(struct request_sock *req,
821                             const struct sock *sk_listener,
822                             struct sk_buff *skb)
823 {
824         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
825         struct inet_request_sock *ireq = inet_rsk(req);
826         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
827
828         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
829         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
830
831         /* So that link locals have meaning */
832         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
833             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
834                 ireq->ir_iif = tcp_v6_iif(skb);
835
836         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
837             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
838              np->rxopt.bits.rxinfo ||
839              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
840              np->rxopt.bits.rxohlim || np->repflow)) {
841                 refcount_inc(&skb->users);
842                 ireq->pktopts = skb;
843         }
844 }
845
846 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
847                                           struct sk_buff *skb,
848                                           struct flowi *fl,
849                                           struct request_sock *req)
850 {
851         tcp_v6_init_req(req, sk, skb);
852
853         if (security_inet_conn_request(sk, skb, req))
854                 return NULL;
855
856         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
857 }
858
859 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
860         .family         =       AF_INET6,
861         .obj_size       =       sizeof(struct tcp6_request_sock),
862         .rtx_syn_ack    =       tcp_rtx_synack,
863         .send_ack       =       tcp_v6_reqsk_send_ack,
864         .destructor     =       tcp_v6_reqsk_destructor,
865         .send_reset     =       tcp_v6_send_reset,
866         .syn_ack_timeout =      tcp_syn_ack_timeout,
867 };
868
869 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
870         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
871                                 sizeof(struct ipv6hdr),
872 #ifdef CONFIG_TCP_MD5SIG
873         .req_md5_lookup =       tcp_v6_md5_lookup,
874         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
875 #endif
876 #ifdef CONFIG_SYN_COOKIES
877         .cookie_init_seq =      cookie_v6_init_sequence,
878 #endif
879         .route_req      =       tcp_v6_route_req,
880         .init_seq       =       tcp_v6_init_seq,
881         .init_ts_off    =       tcp_v6_init_ts_off,
882         .send_synack    =       tcp_v6_send_synack,
883 };
884
885 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
886                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
887                                  int oif, struct tcp_md5sig_key *key, int rst,
888                                  u8 tclass, __be32 label, u32 priority)
889 {
890         const struct tcphdr *th = tcp_hdr(skb);
891         struct tcphdr *t1;
892         struct sk_buff *buff;
893         struct flowi6 fl6;
894         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
895         struct sock *ctl_sk = net->ipv6.tcp_sk;
896         unsigned int tot_len = sizeof(struct tcphdr);
897         __be32 mrst = 0, *topt;
898         struct dst_entry *dst;
899         __u32 mark = 0;
900
901         if (tsecr)
902                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
903 #ifdef CONFIG_TCP_MD5SIG
904         if (key)
905                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
906 #endif
907
908 #ifdef CONFIG_MPTCP
909         if (rst && !key) {
910                 mrst = mptcp_reset_option(skb);
911
912                 if (mrst)
913                         tot_len += sizeof(__be32);
914         }
915 #endif
916
917         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
918                          GFP_ATOMIC);
919         if (!buff)
920                 return;
921
922         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
923
924         t1 = skb_push(buff, tot_len);
925         skb_reset_transport_header(buff);
926
927         /* Swap the send and the receive. */
928         memset(t1, 0, sizeof(*t1));
929         t1->dest = th->source;
930         t1->source = th->dest;
931         t1->doff = tot_len / 4;
932         t1->seq = htonl(seq);
933         t1->ack_seq = htonl(ack);
934         t1->ack = !rst || !th->ack;
935         t1->rst = rst;
936         t1->window = htons(win);
937
938         topt = (__be32 *)(t1 + 1);
939
940         if (tsecr) {
941                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
942                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
943                 *topt++ = htonl(tsval);
944                 *topt++ = htonl(tsecr);
945         }
946
947         if (mrst)
948                 *topt++ = mrst;
949
950 #ifdef CONFIG_TCP_MD5SIG
951         if (key) {
952                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
953                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
954                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
955                                     &ipv6_hdr(skb)->saddr,
956                                     &ipv6_hdr(skb)->daddr, t1);
957         }
958 #endif
959
960         memset(&fl6, 0, sizeof(fl6));
961         fl6.daddr = ipv6_hdr(skb)->saddr;
962         fl6.saddr = ipv6_hdr(skb)->daddr;
963         fl6.flowlabel = label;
964
965         buff->ip_summed = CHECKSUM_PARTIAL;
966         buff->csum = 0;
967
968         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
969
970         fl6.flowi6_proto = IPPROTO_TCP;
971         if (rt6_need_strict(&fl6.daddr) && !oif)
972                 fl6.flowi6_oif = tcp_v6_iif(skb);
973         else {
974                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
975                         oif = skb->skb_iif;
976
977                 fl6.flowi6_oif = oif;
978         }
979
980         if (sk) {
981                 if (sk->sk_state == TCP_TIME_WAIT) {
982                         mark = inet_twsk(sk)->tw_mark;
983                         /* autoflowlabel relies on buff->hash */
984                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
985                                      PKT_HASH_TYPE_L4);
986                 } else {
987                         mark = sk->sk_mark;
988                 }
989                 buff->tstamp = tcp_transmit_time(sk);
990         }
991         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
992         fl6.fl6_dport = t1->dest;
993         fl6.fl6_sport = t1->source;
994         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
995         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
996
997         /* Pass a socket to ip6_dst_lookup either it is for RST
998          * Underlying function will use this to retrieve the network
999          * namespace
1000          */
1001         dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1002         if (!IS_ERR(dst)) {
1003                 skb_dst_set(buff, dst);
1004                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1005                          tclass & ~INET_ECN_MASK, priority);
1006                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1007                 if (rst)
1008                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1009                 return;
1010         }
1011
1012         kfree_skb(buff);
1013 }
1014
1015 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1016 {
1017         const struct tcphdr *th = tcp_hdr(skb);
1018         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1019         u32 seq = 0, ack_seq = 0;
1020         struct tcp_md5sig_key *key = NULL;
1021 #ifdef CONFIG_TCP_MD5SIG
1022         const __u8 *hash_location = NULL;
1023         unsigned char newhash[16];
1024         int genhash;
1025         struct sock *sk1 = NULL;
1026 #endif
1027         __be32 label = 0;
1028         u32 priority = 0;
1029         struct net *net;
1030         int oif = 0;
1031
1032         if (th->rst)
1033                 return;
1034
1035         /* If sk not NULL, it means we did a successful lookup and incoming
1036          * route had to be correct. prequeue might have dropped our dst.
1037          */
1038         if (!sk && !ipv6_unicast_destination(skb))
1039                 return;
1040
1041         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1042 #ifdef CONFIG_TCP_MD5SIG
1043         rcu_read_lock();
1044         hash_location = tcp_parse_md5sig_option(th);
1045         if (sk && sk_fullsock(sk)) {
1046                 int l3index;
1047
1048                 /* sdif set, means packet ingressed via a device
1049                  * in an L3 domain and inet_iif is set to it.
1050                  */
1051                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1052                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1053         } else if (hash_location) {
1054                 int dif = tcp_v6_iif_l3_slave(skb);
1055                 int sdif = tcp_v6_sdif(skb);
1056                 int l3index;
1057
1058                 /*
1059                  * active side is lost. Try to find listening socket through
1060                  * source port, and then find md5 key through listening socket.
1061                  * we are not loose security here:
1062                  * Incoming packet is checked with md5 hash with finding key,
1063                  * no RST generated if md5 hash doesn't match.
1064                  */
1065                 sk1 = inet6_lookup_listener(net,
1066                                            &tcp_hashinfo, NULL, 0,
1067                                            &ipv6h->saddr,
1068                                            th->source, &ipv6h->daddr,
1069                                            ntohs(th->source), dif, sdif);
1070                 if (!sk1)
1071                         goto out;
1072
1073                 /* sdif set, means packet ingressed via a device
1074                  * in an L3 domain and dif is set to it.
1075                  */
1076                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1077
1078                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1079                 if (!key)
1080                         goto out;
1081
1082                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1083                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1084                         goto out;
1085         }
1086 #endif
1087
1088         if (th->ack)
1089                 seq = ntohl(th->ack_seq);
1090         else
1091                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1092                           (th->doff << 2);
1093
1094         if (sk) {
1095                 oif = sk->sk_bound_dev_if;
1096                 if (sk_fullsock(sk)) {
1097                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1098
1099                         trace_tcp_send_reset(sk, skb);
1100                         if (np->repflow)
1101                                 label = ip6_flowlabel(ipv6h);
1102                         priority = sk->sk_priority;
1103                 }
1104                 if (sk->sk_state == TCP_TIME_WAIT) {
1105                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1106                         priority = inet_twsk(sk)->tw_priority;
1107                 }
1108         } else {
1109                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1110                         label = ip6_flowlabel(ipv6h);
1111         }
1112
1113         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1114                              ipv6_get_dsfield(ipv6h), label, priority);
1115
1116 #ifdef CONFIG_TCP_MD5SIG
1117 out:
1118         rcu_read_unlock();
1119 #endif
1120 }
1121
1122 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1123                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1124                             struct tcp_md5sig_key *key, u8 tclass,
1125                             __be32 label, u32 priority)
1126 {
1127         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1128                              tclass, label, priority);
1129 }
1130
1131 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1132 {
1133         struct inet_timewait_sock *tw = inet_twsk(sk);
1134         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1135
1136         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1137                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1138                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1139                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1140                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1141
1142         inet_twsk_put(tw);
1143 }
1144
1145 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1146                                   struct request_sock *req)
1147 {
1148         int l3index;
1149
1150         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1151
1152         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1153          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1154          */
1155         /* RFC 7323 2.3
1156          * The window field (SEG.WND) of every outgoing segment, with the
1157          * exception of <SYN> segments, MUST be right-shifted by
1158          * Rcv.Wind.Shift bits:
1159          */
1160         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1161                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1162                         tcp_rsk(req)->rcv_nxt,
1163                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1164                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1165                         req->ts_recent, sk->sk_bound_dev_if,
1166                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1167                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1168 }
1169
1170
1171 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1172 {
1173 #ifdef CONFIG_SYN_COOKIES
1174         const struct tcphdr *th = tcp_hdr(skb);
1175
1176         if (!th->syn)
1177                 sk = cookie_v6_check(sk, skb);
1178 #endif
1179         return sk;
1180 }
1181
1182 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1183                          struct tcphdr *th, u32 *cookie)
1184 {
1185         u16 mss = 0;
1186 #ifdef CONFIG_SYN_COOKIES
1187         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1188                                     &tcp_request_sock_ipv6_ops, sk, th);
1189         if (mss) {
1190                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1191                 tcp_synq_overflow(sk);
1192         }
1193 #endif
1194         return mss;
1195 }
1196
1197 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1198 {
1199         if (skb->protocol == htons(ETH_P_IP))
1200                 return tcp_v4_conn_request(sk, skb);
1201
1202         if (!ipv6_unicast_destination(skb))
1203                 goto drop;
1204
1205         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1206                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1207                 return 0;
1208         }
1209
1210         return tcp_conn_request(&tcp6_request_sock_ops,
1211                                 &tcp_request_sock_ipv6_ops, sk, skb);
1212
1213 drop:
1214         tcp_listendrop(sk);
1215         return 0; /* don't send reset */
1216 }
1217
1218 static void tcp_v6_restore_cb(struct sk_buff *skb)
1219 {
1220         /* We need to move header back to the beginning if xfrm6_policy_check()
1221          * and tcp_v6_fill_cb() are going to be called again.
1222          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1223          */
1224         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1225                 sizeof(struct inet6_skb_parm));
1226 }
1227
1228 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1229                                          struct request_sock *req,
1230                                          struct dst_entry *dst,
1231                                          struct request_sock *req_unhash,
1232                                          bool *own_req)
1233 {
1234         struct inet_request_sock *ireq;
1235         struct ipv6_pinfo *newnp;
1236         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1237         struct ipv6_txoptions *opt;
1238         struct inet_sock *newinet;
1239         bool found_dup_sk = false;
1240         struct tcp_sock *newtp;
1241         struct sock *newsk;
1242 #ifdef CONFIG_TCP_MD5SIG
1243         struct tcp_md5sig_key *key;
1244         int l3index;
1245 #endif
1246         struct flowi6 fl6;
1247
1248         if (skb->protocol == htons(ETH_P_IP)) {
1249                 /*
1250                  *      v6 mapped
1251                  */
1252
1253                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1254                                              req_unhash, own_req);
1255
1256                 if (!newsk)
1257                         return NULL;
1258
1259                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1260
1261                 newinet = inet_sk(newsk);
1262                 newnp = tcp_inet6_sk(newsk);
1263                 newtp = tcp_sk(newsk);
1264
1265                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1266
1267                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1268
1269                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1270                 if (sk_is_mptcp(newsk))
1271                         mptcpv6_handle_mapped(newsk, true);
1272                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1273 #ifdef CONFIG_TCP_MD5SIG
1274                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1275 #endif
1276
1277                 newnp->ipv6_mc_list = NULL;
1278                 newnp->ipv6_ac_list = NULL;
1279                 newnp->ipv6_fl_list = NULL;
1280                 newnp->pktoptions  = NULL;
1281                 newnp->opt         = NULL;
1282                 newnp->mcast_oif   = inet_iif(skb);
1283                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1284                 newnp->rcv_flowinfo = 0;
1285                 if (np->repflow)
1286                         newnp->flow_label = 0;
1287
1288                 /*
1289                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1290                  * here, tcp_create_openreq_child now does this for us, see the comment in
1291                  * that function for the gory details. -acme
1292                  */
1293
1294                 /* It is tricky place. Until this moment IPv4 tcp
1295                    worked with IPv6 icsk.icsk_af_ops.
1296                    Sync it now.
1297                  */
1298                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1299
1300                 return newsk;
1301         }
1302
1303         ireq = inet_rsk(req);
1304
1305         if (sk_acceptq_is_full(sk))
1306                 goto out_overflow;
1307
1308         if (!dst) {
1309                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1310                 if (!dst)
1311                         goto out;
1312         }
1313
1314         newsk = tcp_create_openreq_child(sk, req, skb);
1315         if (!newsk)
1316                 goto out_nonewsk;
1317
1318         /*
1319          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1320          * count here, tcp_create_openreq_child now does this for us, see the
1321          * comment in that function for the gory details. -acme
1322          */
1323
1324         newsk->sk_gso_type = SKB_GSO_TCPV6;
1325         ip6_dst_store(newsk, dst, NULL, NULL);
1326         inet6_sk_rx_dst_set(newsk, skb);
1327
1328         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1329
1330         newtp = tcp_sk(newsk);
1331         newinet = inet_sk(newsk);
1332         newnp = tcp_inet6_sk(newsk);
1333
1334         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1335
1336         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1337         newnp->saddr = ireq->ir_v6_loc_addr;
1338         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1339         newsk->sk_bound_dev_if = ireq->ir_iif;
1340
1341         /* Now IPv6 options...
1342
1343            First: no IPv4 options.
1344          */
1345         newinet->inet_opt = NULL;
1346         newnp->ipv6_mc_list = NULL;
1347         newnp->ipv6_ac_list = NULL;
1348         newnp->ipv6_fl_list = NULL;
1349
1350         /* Clone RX bits */
1351         newnp->rxopt.all = np->rxopt.all;
1352
1353         newnp->pktoptions = NULL;
1354         newnp->opt        = NULL;
1355         newnp->mcast_oif  = tcp_v6_iif(skb);
1356         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1357         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1358         if (np->repflow)
1359                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1360
1361         /* Set ToS of the new socket based upon the value of incoming SYN.
1362          * ECT bits are set later in tcp_init_transfer().
1363          */
1364         if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1365                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1366
1367         /* Clone native IPv6 options from listening socket (if any)
1368
1369            Yes, keeping reference count would be much more clever,
1370            but we make one more one thing there: reattach optmem
1371            to newsk.
1372          */
1373         opt = ireq->ipv6_opt;
1374         if (!opt)
1375                 opt = rcu_dereference(np->opt);
1376         if (opt) {
1377                 opt = ipv6_dup_options(newsk, opt);
1378                 RCU_INIT_POINTER(newnp->opt, opt);
1379         }
1380         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1381         if (opt)
1382                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1383                                                     opt->opt_flen;
1384
1385         tcp_ca_openreq_child(newsk, dst);
1386
1387         tcp_sync_mss(newsk, dst_mtu(dst));
1388         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1389
1390         tcp_initialize_rcv_mss(newsk);
1391
1392         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1393         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1394
1395 #ifdef CONFIG_TCP_MD5SIG
1396         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1397
1398         /* Copy over the MD5 key from the original socket */
1399         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1400         if (key) {
1401                 /* We're using one, so create a matching key
1402                  * on the newsk structure. If we fail to get
1403                  * memory, then we end up not copying the key
1404                  * across. Shucks.
1405                  */
1406                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1407                                AF_INET6, 128, l3index, key->key, key->keylen,
1408                                sk_gfp_mask(sk, GFP_ATOMIC));
1409         }
1410 #endif
1411
1412         if (__inet_inherit_port(sk, newsk) < 0) {
1413                 inet_csk_prepare_forced_close(newsk);
1414                 tcp_done(newsk);
1415                 goto out;
1416         }
1417         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1418                                        &found_dup_sk);
1419         if (*own_req) {
1420                 tcp_move_syn(newtp, req);
1421
1422                 /* Clone pktoptions received with SYN, if we own the req */
1423                 if (ireq->pktopts) {
1424                         newnp->pktoptions = skb_clone(ireq->pktopts,
1425                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1426                         consume_skb(ireq->pktopts);
1427                         ireq->pktopts = NULL;
1428                         if (newnp->pktoptions) {
1429                                 tcp_v6_restore_cb(newnp->pktoptions);
1430                                 skb_set_owner_r(newnp->pktoptions, newsk);
1431                         }
1432                 }
1433         } else {
1434                 if (!req_unhash && found_dup_sk) {
1435                         /* This code path should only be executed in the
1436                          * syncookie case only
1437                          */
1438                         bh_unlock_sock(newsk);
1439                         sock_put(newsk);
1440                         newsk = NULL;
1441                 }
1442         }
1443
1444         return newsk;
1445
1446 out_overflow:
1447         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1448 out_nonewsk:
1449         dst_release(dst);
1450 out:
1451         tcp_listendrop(sk);
1452         return NULL;
1453 }
1454
1455 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1456                                                            u32));
1457 /* The socket must have it's spinlock held when we get
1458  * here, unless it is a TCP_LISTEN socket.
1459  *
1460  * We have a potential double-lock case here, so even when
1461  * doing backlog processing we use the BH locking scheme.
1462  * This is because we cannot sleep with the original spinlock
1463  * held.
1464  */
1465 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1466 {
1467         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1468         struct sk_buff *opt_skb = NULL;
1469         struct tcp_sock *tp;
1470
1471         /* Imagine: socket is IPv6. IPv4 packet arrives,
1472            goes to IPv4 receive handler and backlogged.
1473            From backlog it always goes here. Kerboom...
1474            Fortunately, tcp_rcv_established and rcv_established
1475            handle them correctly, but it is not case with
1476            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1477          */
1478
1479         if (skb->protocol == htons(ETH_P_IP))
1480                 return tcp_v4_do_rcv(sk, skb);
1481
1482         /*
1483          *      socket locking is here for SMP purposes as backlog rcv
1484          *      is currently called with bh processing disabled.
1485          */
1486
1487         /* Do Stevens' IPV6_PKTOPTIONS.
1488
1489            Yes, guys, it is the only place in our code, where we
1490            may make it not affecting IPv4.
1491            The rest of code is protocol independent,
1492            and I do not like idea to uglify IPv4.
1493
1494            Actually, all the idea behind IPV6_PKTOPTIONS
1495            looks not very well thought. For now we latch
1496            options, received in the last packet, enqueued
1497            by tcp. Feel free to propose better solution.
1498                                                --ANK (980728)
1499          */
1500         if (np->rxopt.all)
1501                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1502
1503         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1504                 struct dst_entry *dst = sk->sk_rx_dst;
1505
1506                 sock_rps_save_rxhash(sk, skb);
1507                 sk_mark_napi_id(sk, skb);
1508                 if (dst) {
1509                         if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1510                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1511                                             dst, np->rx_dst_cookie) == NULL) {
1512                                 dst_release(dst);
1513                                 sk->sk_rx_dst = NULL;
1514                         }
1515                 }
1516
1517                 tcp_rcv_established(sk, skb);
1518                 if (opt_skb)
1519                         goto ipv6_pktoptions;
1520                 return 0;
1521         }
1522
1523         if (tcp_checksum_complete(skb))
1524                 goto csum_err;
1525
1526         if (sk->sk_state == TCP_LISTEN) {
1527                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1528
1529                 if (!nsk)
1530                         goto discard;
1531
1532                 if (nsk != sk) {
1533                         if (tcp_child_process(sk, nsk, skb))
1534                                 goto reset;
1535                         if (opt_skb)
1536                                 __kfree_skb(opt_skb);
1537                         return 0;
1538                 }
1539         } else
1540                 sock_rps_save_rxhash(sk, skb);
1541
1542         if (tcp_rcv_state_process(sk, skb))
1543                 goto reset;
1544         if (opt_skb)
1545                 goto ipv6_pktoptions;
1546         return 0;
1547
1548 reset:
1549         tcp_v6_send_reset(sk, skb);
1550 discard:
1551         if (opt_skb)
1552                 __kfree_skb(opt_skb);
1553         kfree_skb(skb);
1554         return 0;
1555 csum_err:
1556         trace_tcp_bad_csum(skb);
1557         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1558         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1559         goto discard;
1560
1561
1562 ipv6_pktoptions:
1563         /* Do you ask, what is it?
1564
1565            1. skb was enqueued by tcp.
1566            2. skb is added to tail of read queue, rather than out of order.
1567            3. socket is not in passive state.
1568            4. Finally, it really contains options, which user wants to receive.
1569          */
1570         tp = tcp_sk(sk);
1571         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1572             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1573                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1574                         np->mcast_oif = tcp_v6_iif(opt_skb);
1575                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1576                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1577                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1578                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1579                 if (np->repflow)
1580                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1581                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1582                         skb_set_owner_r(opt_skb, sk);
1583                         tcp_v6_restore_cb(opt_skb);
1584                         opt_skb = xchg(&np->pktoptions, opt_skb);
1585                 } else {
1586                         __kfree_skb(opt_skb);
1587                         opt_skb = xchg(&np->pktoptions, NULL);
1588                 }
1589         }
1590
1591         kfree_skb(opt_skb);
1592         return 0;
1593 }
1594
1595 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1596                            const struct tcphdr *th)
1597 {
1598         /* This is tricky: we move IP6CB at its correct location into
1599          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1600          * _decode_session6() uses IP6CB().
1601          * barrier() makes sure compiler won't play aliasing games.
1602          */
1603         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1604                 sizeof(struct inet6_skb_parm));
1605         barrier();
1606
1607         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1608         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1609                                     skb->len - th->doff*4);
1610         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1611         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1612         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1613         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1614         TCP_SKB_CB(skb)->sacked = 0;
1615         TCP_SKB_CB(skb)->has_rxtstamp =
1616                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1617 }
1618
1619 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1620 {
1621         struct sk_buff *skb_to_free;
1622         int sdif = inet6_sdif(skb);
1623         int dif = inet6_iif(skb);
1624         const struct tcphdr *th;
1625         const struct ipv6hdr *hdr;
1626         bool refcounted;
1627         struct sock *sk;
1628         int ret;
1629         struct net *net = dev_net(skb->dev);
1630
1631         if (skb->pkt_type != PACKET_HOST)
1632                 goto discard_it;
1633
1634         /*
1635          *      Count it even if it's bad.
1636          */
1637         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1638
1639         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1640                 goto discard_it;
1641
1642         th = (const struct tcphdr *)skb->data;
1643
1644         if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1645                 goto bad_packet;
1646         if (!pskb_may_pull(skb, th->doff*4))
1647                 goto discard_it;
1648
1649         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1650                 goto csum_error;
1651
1652         th = (const struct tcphdr *)skb->data;
1653         hdr = ipv6_hdr(skb);
1654
1655 lookup:
1656         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1657                                 th->source, th->dest, inet6_iif(skb), sdif,
1658                                 &refcounted);
1659         if (!sk)
1660                 goto no_tcp_socket;
1661
1662 process:
1663         if (sk->sk_state == TCP_TIME_WAIT)
1664                 goto do_time_wait;
1665
1666         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1667                 struct request_sock *req = inet_reqsk(sk);
1668                 bool req_stolen = false;
1669                 struct sock *nsk;
1670
1671                 sk = req->rsk_listener;
1672                 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1673                         sk_drops_add(sk, skb);
1674                         reqsk_put(req);
1675                         goto discard_it;
1676                 }
1677                 if (tcp_checksum_complete(skb)) {
1678                         reqsk_put(req);
1679                         goto csum_error;
1680                 }
1681                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1682                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1683                         if (!nsk) {
1684                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1685                                 goto lookup;
1686                         }
1687                         sk = nsk;
1688                         /* reuseport_migrate_sock() has already held one sk_refcnt
1689                          * before returning.
1690                          */
1691                 } else {
1692                         sock_hold(sk);
1693                 }
1694                 refcounted = true;
1695                 nsk = NULL;
1696                 if (!tcp_filter(sk, skb)) {
1697                         th = (const struct tcphdr *)skb->data;
1698                         hdr = ipv6_hdr(skb);
1699                         tcp_v6_fill_cb(skb, hdr, th);
1700                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1701                 }
1702                 if (!nsk) {
1703                         reqsk_put(req);
1704                         if (req_stolen) {
1705                                 /* Another cpu got exclusive access to req
1706                                  * and created a full blown socket.
1707                                  * Try to feed this packet to this socket
1708                                  * instead of discarding it.
1709                                  */
1710                                 tcp_v6_restore_cb(skb);
1711                                 sock_put(sk);
1712                                 goto lookup;
1713                         }
1714                         goto discard_and_relse;
1715                 }
1716                 if (nsk == sk) {
1717                         reqsk_put(req);
1718                         tcp_v6_restore_cb(skb);
1719                 } else if (tcp_child_process(sk, nsk, skb)) {
1720                         tcp_v6_send_reset(nsk, skb);
1721                         goto discard_and_relse;
1722                 } else {
1723                         sock_put(sk);
1724                         return 0;
1725                 }
1726         }
1727         if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1728                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1729                 goto discard_and_relse;
1730         }
1731
1732         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1733                 goto discard_and_relse;
1734
1735         if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1736                 goto discard_and_relse;
1737
1738         if (tcp_filter(sk, skb))
1739                 goto discard_and_relse;
1740         th = (const struct tcphdr *)skb->data;
1741         hdr = ipv6_hdr(skb);
1742         tcp_v6_fill_cb(skb, hdr, th);
1743
1744         skb->dev = NULL;
1745
1746         if (sk->sk_state == TCP_LISTEN) {
1747                 ret = tcp_v6_do_rcv(sk, skb);
1748                 goto put_and_return;
1749         }
1750
1751         sk_incoming_cpu_update(sk);
1752
1753         bh_lock_sock_nested(sk);
1754         tcp_segs_in(tcp_sk(sk), skb);
1755         ret = 0;
1756         if (!sock_owned_by_user(sk)) {
1757                 skb_to_free = sk->sk_rx_skb_cache;
1758                 sk->sk_rx_skb_cache = NULL;
1759                 ret = tcp_v6_do_rcv(sk, skb);
1760         } else {
1761                 if (tcp_add_backlog(sk, skb))
1762                         goto discard_and_relse;
1763                 skb_to_free = NULL;
1764         }
1765         bh_unlock_sock(sk);
1766         if (skb_to_free)
1767                 __kfree_skb(skb_to_free);
1768 put_and_return:
1769         if (refcounted)
1770                 sock_put(sk);
1771         return ret ? -1 : 0;
1772
1773 no_tcp_socket:
1774         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1775                 goto discard_it;
1776
1777         tcp_v6_fill_cb(skb, hdr, th);
1778
1779         if (tcp_checksum_complete(skb)) {
1780 csum_error:
1781                 trace_tcp_bad_csum(skb);
1782                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1783 bad_packet:
1784                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1785         } else {
1786                 tcp_v6_send_reset(NULL, skb);
1787         }
1788
1789 discard_it:
1790         kfree_skb(skb);
1791         return 0;
1792
1793 discard_and_relse:
1794         sk_drops_add(sk, skb);
1795         if (refcounted)
1796                 sock_put(sk);
1797         goto discard_it;
1798
1799 do_time_wait:
1800         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1801                 inet_twsk_put(inet_twsk(sk));
1802                 goto discard_it;
1803         }
1804
1805         tcp_v6_fill_cb(skb, hdr, th);
1806
1807         if (tcp_checksum_complete(skb)) {
1808                 inet_twsk_put(inet_twsk(sk));
1809                 goto csum_error;
1810         }
1811
1812         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1813         case TCP_TW_SYN:
1814         {
1815                 struct sock *sk2;
1816
1817                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1818                                             skb, __tcp_hdrlen(th),
1819                                             &ipv6_hdr(skb)->saddr, th->source,
1820                                             &ipv6_hdr(skb)->daddr,
1821                                             ntohs(th->dest),
1822                                             tcp_v6_iif_l3_slave(skb),
1823                                             sdif);
1824                 if (sk2) {
1825                         struct inet_timewait_sock *tw = inet_twsk(sk);
1826                         inet_twsk_deschedule_put(tw);
1827                         sk = sk2;
1828                         tcp_v6_restore_cb(skb);
1829                         refcounted = false;
1830                         goto process;
1831                 }
1832         }
1833                 /* to ACK */
1834                 fallthrough;
1835         case TCP_TW_ACK:
1836                 tcp_v6_timewait_ack(sk, skb);
1837                 break;
1838         case TCP_TW_RST:
1839                 tcp_v6_send_reset(sk, skb);
1840                 inet_twsk_deschedule_put(inet_twsk(sk));
1841                 goto discard_it;
1842         case TCP_TW_SUCCESS:
1843                 ;
1844         }
1845         goto discard_it;
1846 }
1847
1848 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1849 {
1850         const struct ipv6hdr *hdr;
1851         const struct tcphdr *th;
1852         struct sock *sk;
1853
1854         if (skb->pkt_type != PACKET_HOST)
1855                 return;
1856
1857         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1858                 return;
1859
1860         hdr = ipv6_hdr(skb);
1861         th = tcp_hdr(skb);
1862
1863         if (th->doff < sizeof(struct tcphdr) / 4)
1864                 return;
1865
1866         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1867         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1868                                         &hdr->saddr, th->source,
1869                                         &hdr->daddr, ntohs(th->dest),
1870                                         inet6_iif(skb), inet6_sdif(skb));
1871         if (sk) {
1872                 skb->sk = sk;
1873                 skb->destructor = sock_edemux;
1874                 if (sk_fullsock(sk)) {
1875                         struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1876
1877                         if (dst)
1878                                 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1879                         if (dst &&
1880                             inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1881                                 skb_dst_set_noref(skb, dst);
1882                 }
1883         }
1884 }
1885
1886 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1887         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1888         .twsk_unique    = tcp_twsk_unique,
1889         .twsk_destructor = tcp_twsk_destructor,
1890 };
1891
1892 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1893 {
1894         struct ipv6_pinfo *np = inet6_sk(sk);
1895
1896         __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1897 }
1898
1899 const struct inet_connection_sock_af_ops ipv6_specific = {
1900         .queue_xmit        = inet6_csk_xmit,
1901         .send_check        = tcp_v6_send_check,
1902         .rebuild_header    = inet6_sk_rebuild_header,
1903         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1904         .conn_request      = tcp_v6_conn_request,
1905         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1906         .net_header_len    = sizeof(struct ipv6hdr),
1907         .net_frag_header_len = sizeof(struct frag_hdr),
1908         .setsockopt        = ipv6_setsockopt,
1909         .getsockopt        = ipv6_getsockopt,
1910         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1911         .sockaddr_len      = sizeof(struct sockaddr_in6),
1912         .mtu_reduced       = tcp_v6_mtu_reduced,
1913 };
1914
1915 #ifdef CONFIG_TCP_MD5SIG
1916 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1917         .md5_lookup     =       tcp_v6_md5_lookup,
1918         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1919         .md5_parse      =       tcp_v6_parse_md5_keys,
1920 };
1921 #endif
1922
1923 /*
1924  *      TCP over IPv4 via INET6 API
1925  */
1926 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1927         .queue_xmit        = ip_queue_xmit,
1928         .send_check        = tcp_v4_send_check,
1929         .rebuild_header    = inet_sk_rebuild_header,
1930         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1931         .conn_request      = tcp_v6_conn_request,
1932         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1933         .net_header_len    = sizeof(struct iphdr),
1934         .setsockopt        = ipv6_setsockopt,
1935         .getsockopt        = ipv6_getsockopt,
1936         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1937         .sockaddr_len      = sizeof(struct sockaddr_in6),
1938         .mtu_reduced       = tcp_v4_mtu_reduced,
1939 };
1940
1941 #ifdef CONFIG_TCP_MD5SIG
1942 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1943         .md5_lookup     =       tcp_v4_md5_lookup,
1944         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1945         .md5_parse      =       tcp_v6_parse_md5_keys,
1946 };
1947 #endif
1948
1949 /* NOTE: A lot of things set to zero explicitly by call to
1950  *       sk_alloc() so need not be done here.
1951  */
1952 static int tcp_v6_init_sock(struct sock *sk)
1953 {
1954         struct inet_connection_sock *icsk = inet_csk(sk);
1955
1956         tcp_init_sock(sk);
1957
1958         icsk->icsk_af_ops = &ipv6_specific;
1959
1960 #ifdef CONFIG_TCP_MD5SIG
1961         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1962 #endif
1963
1964         return 0;
1965 }
1966
1967 static void tcp_v6_destroy_sock(struct sock *sk)
1968 {
1969         tcp_v4_destroy_sock(sk);
1970         inet6_destroy_sock(sk);
1971 }
1972
1973 #ifdef CONFIG_PROC_FS
1974 /* Proc filesystem TCPv6 sock list dumping. */
1975 static void get_openreq6(struct seq_file *seq,
1976                          const struct request_sock *req, int i)
1977 {
1978         long ttd = req->rsk_timer.expires - jiffies;
1979         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1980         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1981
1982         if (ttd < 0)
1983                 ttd = 0;
1984
1985         seq_printf(seq,
1986                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1987                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1988                    i,
1989                    src->s6_addr32[0], src->s6_addr32[1],
1990                    src->s6_addr32[2], src->s6_addr32[3],
1991                    inet_rsk(req)->ir_num,
1992                    dest->s6_addr32[0], dest->s6_addr32[1],
1993                    dest->s6_addr32[2], dest->s6_addr32[3],
1994                    ntohs(inet_rsk(req)->ir_rmt_port),
1995                    TCP_SYN_RECV,
1996                    0, 0, /* could print option size, but that is af dependent. */
1997                    1,   /* timers active (only the expire timer) */
1998                    jiffies_to_clock_t(ttd),
1999                    req->num_timeout,
2000                    from_kuid_munged(seq_user_ns(seq),
2001                                     sock_i_uid(req->rsk_listener)),
2002                    0,  /* non standard timer */
2003                    0, /* open_requests have no inode */
2004                    0, req);
2005 }
2006
2007 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2008 {
2009         const struct in6_addr *dest, *src;
2010         __u16 destp, srcp;
2011         int timer_active;
2012         unsigned long timer_expires;
2013         const struct inet_sock *inet = inet_sk(sp);
2014         const struct tcp_sock *tp = tcp_sk(sp);
2015         const struct inet_connection_sock *icsk = inet_csk(sp);
2016         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2017         int rx_queue;
2018         int state;
2019
2020         dest  = &sp->sk_v6_daddr;
2021         src   = &sp->sk_v6_rcv_saddr;
2022         destp = ntohs(inet->inet_dport);
2023         srcp  = ntohs(inet->inet_sport);
2024
2025         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2026             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2027             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2028                 timer_active    = 1;
2029                 timer_expires   = icsk->icsk_timeout;
2030         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2031                 timer_active    = 4;
2032                 timer_expires   = icsk->icsk_timeout;
2033         } else if (timer_pending(&sp->sk_timer)) {
2034                 timer_active    = 2;
2035                 timer_expires   = sp->sk_timer.expires;
2036         } else {
2037                 timer_active    = 0;
2038                 timer_expires = jiffies;
2039         }
2040
2041         state = inet_sk_state_load(sp);
2042         if (state == TCP_LISTEN)
2043                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2044         else
2045                 /* Because we don't lock the socket,
2046                  * we might find a transient negative value.
2047                  */
2048                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2049                                       READ_ONCE(tp->copied_seq), 0);
2050
2051         seq_printf(seq,
2052                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2053                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2054                    i,
2055                    src->s6_addr32[0], src->s6_addr32[1],
2056                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2057                    dest->s6_addr32[0], dest->s6_addr32[1],
2058                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2059                    state,
2060                    READ_ONCE(tp->write_seq) - tp->snd_una,
2061                    rx_queue,
2062                    timer_active,
2063                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2064                    icsk->icsk_retransmits,
2065                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2066                    icsk->icsk_probes_out,
2067                    sock_i_ino(sp),
2068                    refcount_read(&sp->sk_refcnt), sp,
2069                    jiffies_to_clock_t(icsk->icsk_rto),
2070                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2071                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2072                    tp->snd_cwnd,
2073                    state == TCP_LISTEN ?
2074                         fastopenq->max_qlen :
2075                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2076                    );
2077 }
2078
2079 static void get_timewait6_sock(struct seq_file *seq,
2080                                struct inet_timewait_sock *tw, int i)
2081 {
2082         long delta = tw->tw_timer.expires - jiffies;
2083         const struct in6_addr *dest, *src;
2084         __u16 destp, srcp;
2085
2086         dest = &tw->tw_v6_daddr;
2087         src  = &tw->tw_v6_rcv_saddr;
2088         destp = ntohs(tw->tw_dport);
2089         srcp  = ntohs(tw->tw_sport);
2090
2091         seq_printf(seq,
2092                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2093                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2094                    i,
2095                    src->s6_addr32[0], src->s6_addr32[1],
2096                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2097                    dest->s6_addr32[0], dest->s6_addr32[1],
2098                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2099                    tw->tw_substate, 0, 0,
2100                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2101                    refcount_read(&tw->tw_refcnt), tw);
2102 }
2103
2104 static int tcp6_seq_show(struct seq_file *seq, void *v)
2105 {
2106         struct tcp_iter_state *st;
2107         struct sock *sk = v;
2108
2109         if (v == SEQ_START_TOKEN) {
2110                 seq_puts(seq,
2111                          "  sl  "
2112                          "local_address                         "
2113                          "remote_address                        "
2114                          "st tx_queue rx_queue tr tm->when retrnsmt"
2115                          "   uid  timeout inode\n");
2116                 goto out;
2117         }
2118         st = seq->private;
2119
2120         if (sk->sk_state == TCP_TIME_WAIT)
2121                 get_timewait6_sock(seq, v, st->num);
2122         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2123                 get_openreq6(seq, v, st->num);
2124         else
2125                 get_tcp6_sock(seq, v, st->num);
2126 out:
2127         return 0;
2128 }
2129
2130 static const struct seq_operations tcp6_seq_ops = {
2131         .show           = tcp6_seq_show,
2132         .start          = tcp_seq_start,
2133         .next           = tcp_seq_next,
2134         .stop           = tcp_seq_stop,
2135 };
2136
2137 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2138         .family         = AF_INET6,
2139 };
2140
2141 int __net_init tcp6_proc_init(struct net *net)
2142 {
2143         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2144                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2145                 return -ENOMEM;
2146         return 0;
2147 }
2148
2149 void tcp6_proc_exit(struct net *net)
2150 {
2151         remove_proc_entry("tcp6", net->proc_net);
2152 }
2153 #endif
2154
2155 struct proto tcpv6_prot = {
2156         .name                   = "TCPv6",
2157         .owner                  = THIS_MODULE,
2158         .close                  = tcp_close,
2159         .pre_connect            = tcp_v6_pre_connect,
2160         .connect                = tcp_v6_connect,
2161         .disconnect             = tcp_disconnect,
2162         .accept                 = inet_csk_accept,
2163         .ioctl                  = tcp_ioctl,
2164         .init                   = tcp_v6_init_sock,
2165         .destroy                = tcp_v6_destroy_sock,
2166         .shutdown               = tcp_shutdown,
2167         .setsockopt             = tcp_setsockopt,
2168         .getsockopt             = tcp_getsockopt,
2169         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2170         .keepalive              = tcp_set_keepalive,
2171         .recvmsg                = tcp_recvmsg,
2172         .sendmsg                = tcp_sendmsg,
2173         .sendpage               = tcp_sendpage,
2174         .backlog_rcv            = tcp_v6_do_rcv,
2175         .release_cb             = tcp_release_cb,
2176         .hash                   = inet6_hash,
2177         .unhash                 = inet_unhash,
2178         .get_port               = inet_csk_get_port,
2179 #ifdef CONFIG_BPF_SYSCALL
2180         .psock_update_sk_prot   = tcp_bpf_update_proto,
2181 #endif
2182         .enter_memory_pressure  = tcp_enter_memory_pressure,
2183         .leave_memory_pressure  = tcp_leave_memory_pressure,
2184         .stream_memory_free     = tcp_stream_memory_free,
2185         .sockets_allocated      = &tcp_sockets_allocated,
2186         .memory_allocated       = &tcp_memory_allocated,
2187         .memory_pressure        = &tcp_memory_pressure,
2188         .orphan_count           = &tcp_orphan_count,
2189         .sysctl_mem             = sysctl_tcp_mem,
2190         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2191         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2192         .max_header             = MAX_TCP_HEADER,
2193         .obj_size               = sizeof(struct tcp6_sock),
2194         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2195         .twsk_prot              = &tcp6_timewait_sock_ops,
2196         .rsk_prot               = &tcp6_request_sock_ops,
2197         .h.hashinfo             = &tcp_hashinfo,
2198         .no_autobind            = true,
2199         .diag_destroy           = tcp_abort,
2200 };
2201 EXPORT_SYMBOL_GPL(tcpv6_prot);
2202
2203 /* thinking of making this const? Don't.
2204  * early_demux can change based on sysctl.
2205  */
2206 static struct inet6_protocol tcpv6_protocol = {
2207         .early_demux    =       tcp_v6_early_demux,
2208         .early_demux_handler =  tcp_v6_early_demux,
2209         .handler        =       tcp_v6_rcv,
2210         .err_handler    =       tcp_v6_err,
2211         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2212 };
2213
2214 static struct inet_protosw tcpv6_protosw = {
2215         .type           =       SOCK_STREAM,
2216         .protocol       =       IPPROTO_TCP,
2217         .prot           =       &tcpv6_prot,
2218         .ops            =       &inet6_stream_ops,
2219         .flags          =       INET_PROTOSW_PERMANENT |
2220                                 INET_PROTOSW_ICSK,
2221 };
2222
2223 static int __net_init tcpv6_net_init(struct net *net)
2224 {
2225         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2226                                     SOCK_RAW, IPPROTO_TCP, net);
2227 }
2228
2229 static void __net_exit tcpv6_net_exit(struct net *net)
2230 {
2231         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2232 }
2233
2234 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2235 {
2236         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2237 }
2238
2239 static struct pernet_operations tcpv6_net_ops = {
2240         .init       = tcpv6_net_init,
2241         .exit       = tcpv6_net_exit,
2242         .exit_batch = tcpv6_net_exit_batch,
2243 };
2244
2245 int __init tcpv6_init(void)
2246 {
2247         int ret;
2248
2249         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2250         if (ret)
2251                 goto out;
2252
2253         /* register inet6 protocol */
2254         ret = inet6_register_protosw(&tcpv6_protosw);
2255         if (ret)
2256                 goto out_tcpv6_protocol;
2257
2258         ret = register_pernet_subsys(&tcpv6_net_ops);
2259         if (ret)
2260                 goto out_tcpv6_protosw;
2261
2262         ret = mptcpv6_init();
2263         if (ret)
2264                 goto out_tcpv6_pernet_subsys;
2265
2266 out:
2267         return ret;
2268
2269 out_tcpv6_pernet_subsys:
2270         unregister_pernet_subsys(&tcpv6_net_ops);
2271 out_tcpv6_protosw:
2272         inet6_unregister_protosw(&tcpv6_protosw);
2273 out_tcpv6_protocol:
2274         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2275         goto out;
2276 }
2277
2278 void tcpv6_exit(void)
2279 {
2280         unregister_pernet_subsys(&tcpv6_net_ops);
2281         inet6_unregister_protosw(&tcpv6_protosw);
2282         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2283 }