ipv6: weaken the v4mapped source check
[linux-2.6-microblaze.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 sk->sk_rx_dst = dst;
111                 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112                 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152         struct tcp_sock *tp = tcp_sk(sk);
153         struct in6_addr *saddr = NULL, *final_p, final;
154         struct ipv6_txoptions *opt;
155         struct flowi6 fl6;
156         struct dst_entry *dst;
157         int addr_type;
158         int err;
159         struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (__ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         if (!saddr) {
290                 saddr = &fl6.saddr;
291                 sk->sk_v6_rcv_saddr = *saddr;
292         }
293
294         /* set the source address */
295         np->saddr = *saddr;
296         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297
298         sk->sk_gso_type = SKB_GSO_TCPV6;
299         ip6_dst_store(sk, dst, NULL, NULL);
300
301         icsk->icsk_ext_hdr_len = 0;
302         if (opt)
303                 icsk->icsk_ext_hdr_len = opt->opt_flen +
304                                          opt->opt_nflen;
305
306         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307
308         inet->inet_dport = usin->sin6_port;
309
310         tcp_set_state(sk, TCP_SYN_SENT);
311         err = inet6_hash_connect(tcp_death_row, sk);
312         if (err)
313                 goto late_failure;
314
315         sk_set_txhash(sk);
316
317         if (likely(!tp->repair)) {
318                 if (!tp->write_seq)
319                         WRITE_ONCE(tp->write_seq,
320                                    secure_tcpv6_seq(np->saddr.s6_addr32,
321                                                     sk->sk_v6_daddr.s6_addr32,
322                                                     inet->inet_sport,
323                                                     inet->inet_dport));
324                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
325                                                    np->saddr.s6_addr32,
326                                                    sk->sk_v6_daddr.s6_addr32);
327         }
328
329         if (tcp_fastopen_defer_connect(sk, &err))
330                 return err;
331         if (err)
332                 goto late_failure;
333
334         err = tcp_connect(sk);
335         if (err)
336                 goto late_failure;
337
338         return 0;
339
340 late_failure:
341         tcp_set_state(sk, TCP_CLOSE);
342 failure:
343         inet->inet_dport = 0;
344         sk->sk_route_caps = 0;
345         return err;
346 }
347
348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350         struct dst_entry *dst;
351
352         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
353                 return;
354
355         dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
356         if (!dst)
357                 return;
358
359         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
360                 tcp_sync_mss(sk, dst_mtu(dst));
361                 tcp_simple_retransmit(sk);
362         }
363 }
364
365 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
366                 u8 type, u8 code, int offset, __be32 info)
367 {
368         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
369         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
370         struct net *net = dev_net(skb->dev);
371         struct request_sock *fastopen;
372         struct ipv6_pinfo *np;
373         struct tcp_sock *tp;
374         __u32 seq, snd_una;
375         struct sock *sk;
376         bool fatal;
377         int err;
378
379         sk = __inet6_lookup_established(net, &tcp_hashinfo,
380                                         &hdr->daddr, th->dest,
381                                         &hdr->saddr, ntohs(th->source),
382                                         skb->dev->ifindex, inet6_sdif(skb));
383
384         if (!sk) {
385                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
386                                   ICMP6_MIB_INERRORS);
387                 return -ENOENT;
388         }
389
390         if (sk->sk_state == TCP_TIME_WAIT) {
391                 inet_twsk_put(inet_twsk(sk));
392                 return 0;
393         }
394         seq = ntohl(th->seq);
395         fatal = icmpv6_err_convert(type, code, &err);
396         if (sk->sk_state == TCP_NEW_SYN_RECV) {
397                 tcp_req_err(sk, seq, fatal);
398                 return 0;
399         }
400
401         bh_lock_sock(sk);
402         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
403                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
404
405         if (sk->sk_state == TCP_CLOSE)
406                 goto out;
407
408         if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
409                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
410                 goto out;
411         }
412
413         tp = tcp_sk(sk);
414         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
415         fastopen = rcu_dereference(tp->fastopen_rsk);
416         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
417         if (sk->sk_state != TCP_LISTEN &&
418             !between(seq, snd_una, tp->snd_nxt)) {
419                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
420                 goto out;
421         }
422
423         np = tcp_inet6_sk(sk);
424
425         if (type == NDISC_REDIRECT) {
426                 if (!sock_owned_by_user(sk)) {
427                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
428
429                         if (dst)
430                                 dst->ops->redirect(dst, sk, skb);
431                 }
432                 goto out;
433         }
434
435         if (type == ICMPV6_PKT_TOOBIG) {
436                 /* We are not interested in TCP_LISTEN and open_requests
437                  * (SYN-ACKs send out by Linux are always <576bytes so
438                  * they should go through unfragmented).
439                  */
440                 if (sk->sk_state == TCP_LISTEN)
441                         goto out;
442
443                 if (!ip6_sk_accept_pmtu(sk))
444                         goto out;
445
446                 tp->mtu_info = ntohl(info);
447                 if (!sock_owned_by_user(sk))
448                         tcp_v6_mtu_reduced(sk);
449                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
450                                            &sk->sk_tsq_flags))
451                         sock_hold(sk);
452                 goto out;
453         }
454
455
456         /* Might be for an request_sock */
457         switch (sk->sk_state) {
458         case TCP_SYN_SENT:
459         case TCP_SYN_RECV:
460                 /* Only in fast or simultaneous open. If a fast open socket is
461                  * already accepted it is treated as a connected one below.
462                  */
463                 if (fastopen && !fastopen->sk)
464                         break;
465
466                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
467
468                 if (!sock_owned_by_user(sk)) {
469                         sk->sk_err = err;
470                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
471
472                         tcp_done(sk);
473                 } else
474                         sk->sk_err_soft = err;
475                 goto out;
476         case TCP_LISTEN:
477                 break;
478         default:
479                 /* check if this ICMP message allows revert of backoff.
480                  * (see RFC 6069)
481                  */
482                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
483                     code == ICMPV6_NOROUTE)
484                         tcp_ld_RTO_revert(sk, seq);
485         }
486
487         if (!sock_owned_by_user(sk) && np->recverr) {
488                 sk->sk_err = err;
489                 sk->sk_error_report(sk);
490         } else
491                 sk->sk_err_soft = err;
492
493 out:
494         bh_unlock_sock(sk);
495         sock_put(sk);
496         return 0;
497 }
498
499
500 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
501                               struct flowi *fl,
502                               struct request_sock *req,
503                               struct tcp_fastopen_cookie *foc,
504                               enum tcp_synack_type synack_type,
505                               struct sk_buff *syn_skb)
506 {
507         struct inet_request_sock *ireq = inet_rsk(req);
508         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
509         struct ipv6_txoptions *opt;
510         struct flowi6 *fl6 = &fl->u.ip6;
511         struct sk_buff *skb;
512         int err = -ENOMEM;
513         u8 tclass;
514
515         /* First, grab a route. */
516         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
517                                                IPPROTO_TCP)) == NULL)
518                 goto done;
519
520         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
521
522         if (skb) {
523                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
524                                     &ireq->ir_v6_rmt_addr);
525
526                 fl6->daddr = ireq->ir_v6_rmt_addr;
527                 if (np->repflow && ireq->pktopts)
528                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
529
530                 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
531                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
532                                 (np->tclass & INET_ECN_MASK) :
533                                 np->tclass;
534
535                 if (!INET_ECN_is_capable(tclass) &&
536                     tcp_bpf_ca_needs_ecn((struct sock *)req))
537                         tclass |= INET_ECN_ECT_0;
538
539                 rcu_read_lock();
540                 opt = ireq->ipv6_opt;
541                 if (!opt)
542                         opt = rcu_dereference(np->opt);
543                 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,
544                                tclass, sk->sk_priority);
545                 rcu_read_unlock();
546                 err = net_xmit_eval(err);
547         }
548
549 done:
550         return err;
551 }
552
553
554 static void tcp_v6_reqsk_destructor(struct request_sock *req)
555 {
556         kfree(inet_rsk(req)->ipv6_opt);
557         kfree_skb(inet_rsk(req)->pktopts);
558 }
559
560 #ifdef CONFIG_TCP_MD5SIG
561 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
562                                                    const struct in6_addr *addr,
563                                                    int l3index)
564 {
565         return tcp_md5_do_lookup(sk, l3index,
566                                  (union tcp_md5_addr *)addr, AF_INET6);
567 }
568
569 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
570                                                 const struct sock *addr_sk)
571 {
572         int l3index;
573
574         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
575                                                  addr_sk->sk_bound_dev_if);
576         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
577                                     l3index);
578 }
579
580 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
581                                  sockptr_t optval, int optlen)
582 {
583         struct tcp_md5sig cmd;
584         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
585         int l3index = 0;
586         u8 prefixlen;
587
588         if (optlen < sizeof(cmd))
589                 return -EINVAL;
590
591         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
592                 return -EFAULT;
593
594         if (sin6->sin6_family != AF_INET6)
595                 return -EINVAL;
596
597         if (optname == TCP_MD5SIG_EXT &&
598             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
599                 prefixlen = cmd.tcpm_prefixlen;
600                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
601                                         prefixlen > 32))
602                         return -EINVAL;
603         } else {
604                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
605         }
606
607         if (optname == TCP_MD5SIG_EXT &&
608             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
609                 struct net_device *dev;
610
611                 rcu_read_lock();
612                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
613                 if (dev && netif_is_l3_master(dev))
614                         l3index = dev->ifindex;
615                 rcu_read_unlock();
616
617                 /* ok to reference set/not set outside of rcu;
618                  * right now device MUST be an L3 master
619                  */
620                 if (!dev || !l3index)
621                         return -EINVAL;
622         }
623
624         if (!cmd.tcpm_keylen) {
625                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
626                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
627                                               AF_INET, prefixlen,
628                                               l3index);
629                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
630                                       AF_INET6, prefixlen, l3index);
631         }
632
633         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
634                 return -EINVAL;
635
636         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
637                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
638                                       AF_INET, prefixlen, l3index,
639                                       cmd.tcpm_key, cmd.tcpm_keylen,
640                                       GFP_KERNEL);
641
642         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
643                               AF_INET6, prefixlen, l3index,
644                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
645 }
646
647 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
648                                    const struct in6_addr *daddr,
649                                    const struct in6_addr *saddr,
650                                    const struct tcphdr *th, int nbytes)
651 {
652         struct tcp6_pseudohdr *bp;
653         struct scatterlist sg;
654         struct tcphdr *_th;
655
656         bp = hp->scratch;
657         /* 1. TCP pseudo-header (RFC2460) */
658         bp->saddr = *saddr;
659         bp->daddr = *daddr;
660         bp->protocol = cpu_to_be32(IPPROTO_TCP);
661         bp->len = cpu_to_be32(nbytes);
662
663         _th = (struct tcphdr *)(bp + 1);
664         memcpy(_th, th, sizeof(*th));
665         _th->check = 0;
666
667         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
668         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
669                                 sizeof(*bp) + sizeof(*th));
670         return crypto_ahash_update(hp->md5_req);
671 }
672
673 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
674                                const struct in6_addr *daddr, struct in6_addr *saddr,
675                                const struct tcphdr *th)
676 {
677         struct tcp_md5sig_pool *hp;
678         struct ahash_request *req;
679
680         hp = tcp_get_md5sig_pool();
681         if (!hp)
682                 goto clear_hash_noput;
683         req = hp->md5_req;
684
685         if (crypto_ahash_init(req))
686                 goto clear_hash;
687         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
688                 goto clear_hash;
689         if (tcp_md5_hash_key(hp, key))
690                 goto clear_hash;
691         ahash_request_set_crypt(req, NULL, md5_hash, 0);
692         if (crypto_ahash_final(req))
693                 goto clear_hash;
694
695         tcp_put_md5sig_pool();
696         return 0;
697
698 clear_hash:
699         tcp_put_md5sig_pool();
700 clear_hash_noput:
701         memset(md5_hash, 0, 16);
702         return 1;
703 }
704
705 static int tcp_v6_md5_hash_skb(char *md5_hash,
706                                const struct tcp_md5sig_key *key,
707                                const struct sock *sk,
708                                const struct sk_buff *skb)
709 {
710         const struct in6_addr *saddr, *daddr;
711         struct tcp_md5sig_pool *hp;
712         struct ahash_request *req;
713         const struct tcphdr *th = tcp_hdr(skb);
714
715         if (sk) { /* valid for establish/request sockets */
716                 saddr = &sk->sk_v6_rcv_saddr;
717                 daddr = &sk->sk_v6_daddr;
718         } else {
719                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
720                 saddr = &ip6h->saddr;
721                 daddr = &ip6h->daddr;
722         }
723
724         hp = tcp_get_md5sig_pool();
725         if (!hp)
726                 goto clear_hash_noput;
727         req = hp->md5_req;
728
729         if (crypto_ahash_init(req))
730                 goto clear_hash;
731
732         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
733                 goto clear_hash;
734         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
735                 goto clear_hash;
736         if (tcp_md5_hash_key(hp, key))
737                 goto clear_hash;
738         ahash_request_set_crypt(req, NULL, md5_hash, 0);
739         if (crypto_ahash_final(req))
740                 goto clear_hash;
741
742         tcp_put_md5sig_pool();
743         return 0;
744
745 clear_hash:
746         tcp_put_md5sig_pool();
747 clear_hash_noput:
748         memset(md5_hash, 0, 16);
749         return 1;
750 }
751
752 #endif
753
754 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
755                                     const struct sk_buff *skb,
756                                     int dif, int sdif)
757 {
758 #ifdef CONFIG_TCP_MD5SIG
759         const __u8 *hash_location = NULL;
760         struct tcp_md5sig_key *hash_expected;
761         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
762         const struct tcphdr *th = tcp_hdr(skb);
763         int genhash, l3index;
764         u8 newhash[16];
765
766         /* sdif set, means packet ingressed via a device
767          * in an L3 domain and dif is set to the l3mdev
768          */
769         l3index = sdif ? dif : 0;
770
771         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
772         hash_location = tcp_parse_md5sig_option(th);
773
774         /* We've parsed the options - do we have a hash? */
775         if (!hash_expected && !hash_location)
776                 return false;
777
778         if (hash_expected && !hash_location) {
779                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
780                 return true;
781         }
782
783         if (!hash_expected && hash_location) {
784                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
785                 return true;
786         }
787
788         /* check the signature */
789         genhash = tcp_v6_md5_hash_skb(newhash,
790                                       hash_expected,
791                                       NULL, skb);
792
793         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
794                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
795                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
796                                      genhash ? "failed" : "mismatch",
797                                      &ip6h->saddr, ntohs(th->source),
798                                      &ip6h->daddr, ntohs(th->dest), l3index);
799                 return true;
800         }
801 #endif
802         return false;
803 }
804
805 static void tcp_v6_init_req(struct request_sock *req,
806                             const struct sock *sk_listener,
807                             struct sk_buff *skb)
808 {
809         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
810         struct inet_request_sock *ireq = inet_rsk(req);
811         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
812
813         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
814         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
815
816         /* So that link locals have meaning */
817         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
818             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
819                 ireq->ir_iif = tcp_v6_iif(skb);
820
821         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
822             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
823              np->rxopt.bits.rxinfo ||
824              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
825              np->rxopt.bits.rxohlim || np->repflow)) {
826                 refcount_inc(&skb->users);
827                 ireq->pktopts = skb;
828         }
829 }
830
831 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
832                                           struct sk_buff *skb,
833                                           struct flowi *fl,
834                                           struct request_sock *req)
835 {
836         tcp_v6_init_req(req, sk, skb);
837
838         if (security_inet_conn_request(sk, skb, req))
839                 return NULL;
840
841         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
842 }
843
844 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
845         .family         =       AF_INET6,
846         .obj_size       =       sizeof(struct tcp6_request_sock),
847         .rtx_syn_ack    =       tcp_rtx_synack,
848         .send_ack       =       tcp_v6_reqsk_send_ack,
849         .destructor     =       tcp_v6_reqsk_destructor,
850         .send_reset     =       tcp_v6_send_reset,
851         .syn_ack_timeout =      tcp_syn_ack_timeout,
852 };
853
854 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
855         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
856                                 sizeof(struct ipv6hdr),
857 #ifdef CONFIG_TCP_MD5SIG
858         .req_md5_lookup =       tcp_v6_md5_lookup,
859         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
860 #endif
861 #ifdef CONFIG_SYN_COOKIES
862         .cookie_init_seq =      cookie_v6_init_sequence,
863 #endif
864         .route_req      =       tcp_v6_route_req,
865         .init_seq       =       tcp_v6_init_seq,
866         .init_ts_off    =       tcp_v6_init_ts_off,
867         .send_synack    =       tcp_v6_send_synack,
868 };
869
870 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
871                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
872                                  int oif, struct tcp_md5sig_key *key, int rst,
873                                  u8 tclass, __be32 label, u32 priority)
874 {
875         const struct tcphdr *th = tcp_hdr(skb);
876         struct tcphdr *t1;
877         struct sk_buff *buff;
878         struct flowi6 fl6;
879         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
880         struct sock *ctl_sk = net->ipv6.tcp_sk;
881         unsigned int tot_len = sizeof(struct tcphdr);
882         struct dst_entry *dst;
883         __be32 *topt;
884         __u32 mark = 0;
885
886         if (tsecr)
887                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
888 #ifdef CONFIG_TCP_MD5SIG
889         if (key)
890                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
891 #endif
892
893         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
894                          GFP_ATOMIC);
895         if (!buff)
896                 return;
897
898         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
899
900         t1 = skb_push(buff, tot_len);
901         skb_reset_transport_header(buff);
902
903         /* Swap the send and the receive. */
904         memset(t1, 0, sizeof(*t1));
905         t1->dest = th->source;
906         t1->source = th->dest;
907         t1->doff = tot_len / 4;
908         t1->seq = htonl(seq);
909         t1->ack_seq = htonl(ack);
910         t1->ack = !rst || !th->ack;
911         t1->rst = rst;
912         t1->window = htons(win);
913
914         topt = (__be32 *)(t1 + 1);
915
916         if (tsecr) {
917                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
918                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
919                 *topt++ = htonl(tsval);
920                 *topt++ = htonl(tsecr);
921         }
922
923 #ifdef CONFIG_TCP_MD5SIG
924         if (key) {
925                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
926                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
927                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
928                                     &ipv6_hdr(skb)->saddr,
929                                     &ipv6_hdr(skb)->daddr, t1);
930         }
931 #endif
932
933         memset(&fl6, 0, sizeof(fl6));
934         fl6.daddr = ipv6_hdr(skb)->saddr;
935         fl6.saddr = ipv6_hdr(skb)->daddr;
936         fl6.flowlabel = label;
937
938         buff->ip_summed = CHECKSUM_PARTIAL;
939         buff->csum = 0;
940
941         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
942
943         fl6.flowi6_proto = IPPROTO_TCP;
944         if (rt6_need_strict(&fl6.daddr) && !oif)
945                 fl6.flowi6_oif = tcp_v6_iif(skb);
946         else {
947                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
948                         oif = skb->skb_iif;
949
950                 fl6.flowi6_oif = oif;
951         }
952
953         if (sk) {
954                 if (sk->sk_state == TCP_TIME_WAIT) {
955                         mark = inet_twsk(sk)->tw_mark;
956                         /* autoflowlabel relies on buff->hash */
957                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
958                                      PKT_HASH_TYPE_L4);
959                 } else {
960                         mark = sk->sk_mark;
961                 }
962                 buff->tstamp = tcp_transmit_time(sk);
963         }
964         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
965         fl6.fl6_dport = t1->dest;
966         fl6.fl6_sport = t1->source;
967         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
968         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
969
970         /* Pass a socket to ip6_dst_lookup either it is for RST
971          * Underlying function will use this to retrieve the network
972          * namespace
973          */
974         dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
975         if (!IS_ERR(dst)) {
976                 skb_dst_set(buff, dst);
977                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
978                          tclass & ~INET_ECN_MASK, priority);
979                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
980                 if (rst)
981                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
982                 return;
983         }
984
985         kfree_skb(buff);
986 }
987
988 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
989 {
990         const struct tcphdr *th = tcp_hdr(skb);
991         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
992         u32 seq = 0, ack_seq = 0;
993         struct tcp_md5sig_key *key = NULL;
994 #ifdef CONFIG_TCP_MD5SIG
995         const __u8 *hash_location = NULL;
996         unsigned char newhash[16];
997         int genhash;
998         struct sock *sk1 = NULL;
999 #endif
1000         __be32 label = 0;
1001         u32 priority = 0;
1002         struct net *net;
1003         int oif = 0;
1004
1005         if (th->rst)
1006                 return;
1007
1008         /* If sk not NULL, it means we did a successful lookup and incoming
1009          * route had to be correct. prequeue might have dropped our dst.
1010          */
1011         if (!sk && !ipv6_unicast_destination(skb))
1012                 return;
1013
1014         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1015 #ifdef CONFIG_TCP_MD5SIG
1016         rcu_read_lock();
1017         hash_location = tcp_parse_md5sig_option(th);
1018         if (sk && sk_fullsock(sk)) {
1019                 int l3index;
1020
1021                 /* sdif set, means packet ingressed via a device
1022                  * in an L3 domain and inet_iif is set to it.
1023                  */
1024                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1025                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1026         } else if (hash_location) {
1027                 int dif = tcp_v6_iif_l3_slave(skb);
1028                 int sdif = tcp_v6_sdif(skb);
1029                 int l3index;
1030
1031                 /*
1032                  * active side is lost. Try to find listening socket through
1033                  * source port, and then find md5 key through listening socket.
1034                  * we are not loose security here:
1035                  * Incoming packet is checked with md5 hash with finding key,
1036                  * no RST generated if md5 hash doesn't match.
1037                  */
1038                 sk1 = inet6_lookup_listener(net,
1039                                            &tcp_hashinfo, NULL, 0,
1040                                            &ipv6h->saddr,
1041                                            th->source, &ipv6h->daddr,
1042                                            ntohs(th->source), dif, sdif);
1043                 if (!sk1)
1044                         goto out;
1045
1046                 /* sdif set, means packet ingressed via a device
1047                  * in an L3 domain and dif is set to it.
1048                  */
1049                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1050
1051                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1052                 if (!key)
1053                         goto out;
1054
1055                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1056                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1057                         goto out;
1058         }
1059 #endif
1060
1061         if (th->ack)
1062                 seq = ntohl(th->ack_seq);
1063         else
1064                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1065                           (th->doff << 2);
1066
1067         if (sk) {
1068                 oif = sk->sk_bound_dev_if;
1069                 if (sk_fullsock(sk)) {
1070                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1071
1072                         trace_tcp_send_reset(sk, skb);
1073                         if (np->repflow)
1074                                 label = ip6_flowlabel(ipv6h);
1075                         priority = sk->sk_priority;
1076                 }
1077                 if (sk->sk_state == TCP_TIME_WAIT) {
1078                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1079                         priority = inet_twsk(sk)->tw_priority;
1080                 }
1081         } else {
1082                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1083                         label = ip6_flowlabel(ipv6h);
1084         }
1085
1086         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1087                              ipv6_get_dsfield(ipv6h), label, priority);
1088
1089 #ifdef CONFIG_TCP_MD5SIG
1090 out:
1091         rcu_read_unlock();
1092 #endif
1093 }
1094
1095 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1096                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1097                             struct tcp_md5sig_key *key, u8 tclass,
1098                             __be32 label, u32 priority)
1099 {
1100         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1101                              tclass, label, priority);
1102 }
1103
1104 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1105 {
1106         struct inet_timewait_sock *tw = inet_twsk(sk);
1107         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1108
1109         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1110                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1111                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1112                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1113                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1114
1115         inet_twsk_put(tw);
1116 }
1117
1118 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1119                                   struct request_sock *req)
1120 {
1121         int l3index;
1122
1123         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1124
1125         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1126          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1127          */
1128         /* RFC 7323 2.3
1129          * The window field (SEG.WND) of every outgoing segment, with the
1130          * exception of <SYN> segments, MUST be right-shifted by
1131          * Rcv.Wind.Shift bits:
1132          */
1133         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1134                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1135                         tcp_rsk(req)->rcv_nxt,
1136                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1137                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1138                         req->ts_recent, sk->sk_bound_dev_if,
1139                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1140                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1141 }
1142
1143
1144 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1145 {
1146 #ifdef CONFIG_SYN_COOKIES
1147         const struct tcphdr *th = tcp_hdr(skb);
1148
1149         if (!th->syn)
1150                 sk = cookie_v6_check(sk, skb);
1151 #endif
1152         return sk;
1153 }
1154
1155 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1156                          struct tcphdr *th, u32 *cookie)
1157 {
1158         u16 mss = 0;
1159 #ifdef CONFIG_SYN_COOKIES
1160         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1161                                     &tcp_request_sock_ipv6_ops, sk, th);
1162         if (mss) {
1163                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1164                 tcp_synq_overflow(sk);
1165         }
1166 #endif
1167         return mss;
1168 }
1169
1170 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1171 {
1172         if (skb->protocol == htons(ETH_P_IP))
1173                 return tcp_v4_conn_request(sk, skb);
1174
1175         if (!ipv6_unicast_destination(skb))
1176                 goto drop;
1177
1178         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1179                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1180                 return 0;
1181         }
1182
1183         return tcp_conn_request(&tcp6_request_sock_ops,
1184                                 &tcp_request_sock_ipv6_ops, sk, skb);
1185
1186 drop:
1187         tcp_listendrop(sk);
1188         return 0; /* don't send reset */
1189 }
1190
1191 static void tcp_v6_restore_cb(struct sk_buff *skb)
1192 {
1193         /* We need to move header back to the beginning if xfrm6_policy_check()
1194          * and tcp_v6_fill_cb() are going to be called again.
1195          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1196          */
1197         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1198                 sizeof(struct inet6_skb_parm));
1199 }
1200
1201 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1202                                          struct request_sock *req,
1203                                          struct dst_entry *dst,
1204                                          struct request_sock *req_unhash,
1205                                          bool *own_req)
1206 {
1207         struct inet_request_sock *ireq;
1208         struct ipv6_pinfo *newnp;
1209         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1210         struct ipv6_txoptions *opt;
1211         struct inet_sock *newinet;
1212         bool found_dup_sk = false;
1213         struct tcp_sock *newtp;
1214         struct sock *newsk;
1215 #ifdef CONFIG_TCP_MD5SIG
1216         struct tcp_md5sig_key *key;
1217         int l3index;
1218 #endif
1219         struct flowi6 fl6;
1220
1221         if (skb->protocol == htons(ETH_P_IP)) {
1222                 /*
1223                  *      v6 mapped
1224                  */
1225
1226                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1227                                              req_unhash, own_req);
1228
1229                 if (!newsk)
1230                         return NULL;
1231
1232                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1233
1234                 newinet = inet_sk(newsk);
1235                 newnp = tcp_inet6_sk(newsk);
1236                 newtp = tcp_sk(newsk);
1237
1238                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1239
1240                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1241
1242                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1243                 if (sk_is_mptcp(newsk))
1244                         mptcpv6_handle_mapped(newsk, true);
1245                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1246 #ifdef CONFIG_TCP_MD5SIG
1247                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1248 #endif
1249
1250                 newnp->ipv6_mc_list = NULL;
1251                 newnp->ipv6_ac_list = NULL;
1252                 newnp->ipv6_fl_list = NULL;
1253                 newnp->pktoptions  = NULL;
1254                 newnp->opt         = NULL;
1255                 newnp->mcast_oif   = inet_iif(skb);
1256                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1257                 newnp->rcv_flowinfo = 0;
1258                 if (np->repflow)
1259                         newnp->flow_label = 0;
1260
1261                 /*
1262                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1263                  * here, tcp_create_openreq_child now does this for us, see the comment in
1264                  * that function for the gory details. -acme
1265                  */
1266
1267                 /* It is tricky place. Until this moment IPv4 tcp
1268                    worked with IPv6 icsk.icsk_af_ops.
1269                    Sync it now.
1270                  */
1271                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1272
1273                 return newsk;
1274         }
1275
1276         ireq = inet_rsk(req);
1277
1278         if (sk_acceptq_is_full(sk))
1279                 goto out_overflow;
1280
1281         if (!dst) {
1282                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1283                 if (!dst)
1284                         goto out;
1285         }
1286
1287         newsk = tcp_create_openreq_child(sk, req, skb);
1288         if (!newsk)
1289                 goto out_nonewsk;
1290
1291         /*
1292          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1293          * count here, tcp_create_openreq_child now does this for us, see the
1294          * comment in that function for the gory details. -acme
1295          */
1296
1297         newsk->sk_gso_type = SKB_GSO_TCPV6;
1298         ip6_dst_store(newsk, dst, NULL, NULL);
1299         inet6_sk_rx_dst_set(newsk, skb);
1300
1301         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1302
1303         newtp = tcp_sk(newsk);
1304         newinet = inet_sk(newsk);
1305         newnp = tcp_inet6_sk(newsk);
1306
1307         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1308
1309         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1310         newnp->saddr = ireq->ir_v6_loc_addr;
1311         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1312         newsk->sk_bound_dev_if = ireq->ir_iif;
1313
1314         /* Now IPv6 options...
1315
1316            First: no IPv4 options.
1317          */
1318         newinet->inet_opt = NULL;
1319         newnp->ipv6_mc_list = NULL;
1320         newnp->ipv6_ac_list = NULL;
1321         newnp->ipv6_fl_list = NULL;
1322
1323         /* Clone RX bits */
1324         newnp->rxopt.all = np->rxopt.all;
1325
1326         newnp->pktoptions = NULL;
1327         newnp->opt        = NULL;
1328         newnp->mcast_oif  = tcp_v6_iif(skb);
1329         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1330         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1331         if (np->repflow)
1332                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1333
1334         /* Set ToS of the new socket based upon the value of incoming SYN.
1335          * ECT bits are set later in tcp_init_transfer().
1336          */
1337         if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1338                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1339
1340         /* Clone native IPv6 options from listening socket (if any)
1341
1342            Yes, keeping reference count would be much more clever,
1343            but we make one more one thing there: reattach optmem
1344            to newsk.
1345          */
1346         opt = ireq->ipv6_opt;
1347         if (!opt)
1348                 opt = rcu_dereference(np->opt);
1349         if (opt) {
1350                 opt = ipv6_dup_options(newsk, opt);
1351                 RCU_INIT_POINTER(newnp->opt, opt);
1352         }
1353         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1354         if (opt)
1355                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1356                                                     opt->opt_flen;
1357
1358         tcp_ca_openreq_child(newsk, dst);
1359
1360         tcp_sync_mss(newsk, dst_mtu(dst));
1361         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1362
1363         tcp_initialize_rcv_mss(newsk);
1364
1365         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1366         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1367
1368 #ifdef CONFIG_TCP_MD5SIG
1369         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1370
1371         /* Copy over the MD5 key from the original socket */
1372         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1373         if (key) {
1374                 /* We're using one, so create a matching key
1375                  * on the newsk structure. If we fail to get
1376                  * memory, then we end up not copying the key
1377                  * across. Shucks.
1378                  */
1379                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1380                                AF_INET6, 128, l3index, key->key, key->keylen,
1381                                sk_gfp_mask(sk, GFP_ATOMIC));
1382         }
1383 #endif
1384
1385         if (__inet_inherit_port(sk, newsk) < 0) {
1386                 inet_csk_prepare_forced_close(newsk);
1387                 tcp_done(newsk);
1388                 goto out;
1389         }
1390         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1391                                        &found_dup_sk);
1392         if (*own_req) {
1393                 tcp_move_syn(newtp, req);
1394
1395                 /* Clone pktoptions received with SYN, if we own the req */
1396                 if (ireq->pktopts) {
1397                         newnp->pktoptions = skb_clone(ireq->pktopts,
1398                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1399                         consume_skb(ireq->pktopts);
1400                         ireq->pktopts = NULL;
1401                         if (newnp->pktoptions) {
1402                                 tcp_v6_restore_cb(newnp->pktoptions);
1403                                 skb_set_owner_r(newnp->pktoptions, newsk);
1404                         }
1405                 }
1406         } else {
1407                 if (!req_unhash && found_dup_sk) {
1408                         /* This code path should only be executed in the
1409                          * syncookie case only
1410                          */
1411                         bh_unlock_sock(newsk);
1412                         sock_put(newsk);
1413                         newsk = NULL;
1414                 }
1415         }
1416
1417         return newsk;
1418
1419 out_overflow:
1420         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1421 out_nonewsk:
1422         dst_release(dst);
1423 out:
1424         tcp_listendrop(sk);
1425         return NULL;
1426 }
1427
1428 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1429                                                            u32));
1430 /* The socket must have it's spinlock held when we get
1431  * here, unless it is a TCP_LISTEN socket.
1432  *
1433  * We have a potential double-lock case here, so even when
1434  * doing backlog processing we use the BH locking scheme.
1435  * This is because we cannot sleep with the original spinlock
1436  * held.
1437  */
1438 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1439 {
1440         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1441         struct sk_buff *opt_skb = NULL;
1442         struct tcp_sock *tp;
1443
1444         /* Imagine: socket is IPv6. IPv4 packet arrives,
1445            goes to IPv4 receive handler and backlogged.
1446            From backlog it always goes here. Kerboom...
1447            Fortunately, tcp_rcv_established and rcv_established
1448            handle them correctly, but it is not case with
1449            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1450          */
1451
1452         if (skb->protocol == htons(ETH_P_IP))
1453                 return tcp_v4_do_rcv(sk, skb);
1454
1455         /*
1456          *      socket locking is here for SMP purposes as backlog rcv
1457          *      is currently called with bh processing disabled.
1458          */
1459
1460         /* Do Stevens' IPV6_PKTOPTIONS.
1461
1462            Yes, guys, it is the only place in our code, where we
1463            may make it not affecting IPv4.
1464            The rest of code is protocol independent,
1465            and I do not like idea to uglify IPv4.
1466
1467            Actually, all the idea behind IPV6_PKTOPTIONS
1468            looks not very well thought. For now we latch
1469            options, received in the last packet, enqueued
1470            by tcp. Feel free to propose better solution.
1471                                                --ANK (980728)
1472          */
1473         if (np->rxopt.all)
1474                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1475
1476         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1477                 struct dst_entry *dst = sk->sk_rx_dst;
1478
1479                 sock_rps_save_rxhash(sk, skb);
1480                 sk_mark_napi_id(sk, skb);
1481                 if (dst) {
1482                         if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1483                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1484                                             dst, np->rx_dst_cookie) == NULL) {
1485                                 dst_release(dst);
1486                                 sk->sk_rx_dst = NULL;
1487                         }
1488                 }
1489
1490                 tcp_rcv_established(sk, skb);
1491                 if (opt_skb)
1492                         goto ipv6_pktoptions;
1493                 return 0;
1494         }
1495
1496         if (tcp_checksum_complete(skb))
1497                 goto csum_err;
1498
1499         if (sk->sk_state == TCP_LISTEN) {
1500                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1501
1502                 if (!nsk)
1503                         goto discard;
1504
1505                 if (nsk != sk) {
1506                         if (tcp_child_process(sk, nsk, skb))
1507                                 goto reset;
1508                         if (opt_skb)
1509                                 __kfree_skb(opt_skb);
1510                         return 0;
1511                 }
1512         } else
1513                 sock_rps_save_rxhash(sk, skb);
1514
1515         if (tcp_rcv_state_process(sk, skb))
1516                 goto reset;
1517         if (opt_skb)
1518                 goto ipv6_pktoptions;
1519         return 0;
1520
1521 reset:
1522         tcp_v6_send_reset(sk, skb);
1523 discard:
1524         if (opt_skb)
1525                 __kfree_skb(opt_skb);
1526         kfree_skb(skb);
1527         return 0;
1528 csum_err:
1529         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1530         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1531         goto discard;
1532
1533
1534 ipv6_pktoptions:
1535         /* Do you ask, what is it?
1536
1537            1. skb was enqueued by tcp.
1538            2. skb is added to tail of read queue, rather than out of order.
1539            3. socket is not in passive state.
1540            4. Finally, it really contains options, which user wants to receive.
1541          */
1542         tp = tcp_sk(sk);
1543         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1544             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1545                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1546                         np->mcast_oif = tcp_v6_iif(opt_skb);
1547                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1548                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1549                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1550                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1551                 if (np->repflow)
1552                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1553                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1554                         skb_set_owner_r(opt_skb, sk);
1555                         tcp_v6_restore_cb(opt_skb);
1556                         opt_skb = xchg(&np->pktoptions, opt_skb);
1557                 } else {
1558                         __kfree_skb(opt_skb);
1559                         opt_skb = xchg(&np->pktoptions, NULL);
1560                 }
1561         }
1562
1563         kfree_skb(opt_skb);
1564         return 0;
1565 }
1566
1567 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1568                            const struct tcphdr *th)
1569 {
1570         /* This is tricky: we move IP6CB at its correct location into
1571          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1572          * _decode_session6() uses IP6CB().
1573          * barrier() makes sure compiler won't play aliasing games.
1574          */
1575         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1576                 sizeof(struct inet6_skb_parm));
1577         barrier();
1578
1579         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1580         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1581                                     skb->len - th->doff*4);
1582         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1583         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1584         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1585         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1586         TCP_SKB_CB(skb)->sacked = 0;
1587         TCP_SKB_CB(skb)->has_rxtstamp =
1588                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1589 }
1590
1591 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1592 {
1593         struct sk_buff *skb_to_free;
1594         int sdif = inet6_sdif(skb);
1595         int dif = inet6_iif(skb);
1596         const struct tcphdr *th;
1597         const struct ipv6hdr *hdr;
1598         bool refcounted;
1599         struct sock *sk;
1600         int ret;
1601         struct net *net = dev_net(skb->dev);
1602
1603         if (skb->pkt_type != PACKET_HOST)
1604                 goto discard_it;
1605
1606         /*
1607          *      Count it even if it's bad.
1608          */
1609         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1610
1611         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1612                 goto discard_it;
1613
1614         th = (const struct tcphdr *)skb->data;
1615
1616         if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1617                 goto bad_packet;
1618         if (!pskb_may_pull(skb, th->doff*4))
1619                 goto discard_it;
1620
1621         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1622                 goto csum_error;
1623
1624         th = (const struct tcphdr *)skb->data;
1625         hdr = ipv6_hdr(skb);
1626
1627 lookup:
1628         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1629                                 th->source, th->dest, inet6_iif(skb), sdif,
1630                                 &refcounted);
1631         if (!sk)
1632                 goto no_tcp_socket;
1633
1634 process:
1635         if (sk->sk_state == TCP_TIME_WAIT)
1636                 goto do_time_wait;
1637
1638         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1639                 struct request_sock *req = inet_reqsk(sk);
1640                 bool req_stolen = false;
1641                 struct sock *nsk;
1642
1643                 sk = req->rsk_listener;
1644                 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1645                         sk_drops_add(sk, skb);
1646                         reqsk_put(req);
1647                         goto discard_it;
1648                 }
1649                 if (tcp_checksum_complete(skb)) {
1650                         reqsk_put(req);
1651                         goto csum_error;
1652                 }
1653                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1654                         inet_csk_reqsk_queue_drop_and_put(sk, req);
1655                         goto lookup;
1656                 }
1657                 sock_hold(sk);
1658                 refcounted = true;
1659                 nsk = NULL;
1660                 if (!tcp_filter(sk, skb)) {
1661                         th = (const struct tcphdr *)skb->data;
1662                         hdr = ipv6_hdr(skb);
1663                         tcp_v6_fill_cb(skb, hdr, th);
1664                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1665                 }
1666                 if (!nsk) {
1667                         reqsk_put(req);
1668                         if (req_stolen) {
1669                                 /* Another cpu got exclusive access to req
1670                                  * and created a full blown socket.
1671                                  * Try to feed this packet to this socket
1672                                  * instead of discarding it.
1673                                  */
1674                                 tcp_v6_restore_cb(skb);
1675                                 sock_put(sk);
1676                                 goto lookup;
1677                         }
1678                         goto discard_and_relse;
1679                 }
1680                 if (nsk == sk) {
1681                         reqsk_put(req);
1682                         tcp_v6_restore_cb(skb);
1683                 } else if (tcp_child_process(sk, nsk, skb)) {
1684                         tcp_v6_send_reset(nsk, skb);
1685                         goto discard_and_relse;
1686                 } else {
1687                         sock_put(sk);
1688                         return 0;
1689                 }
1690         }
1691         if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1692                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1693                 goto discard_and_relse;
1694         }
1695
1696         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1697                 goto discard_and_relse;
1698
1699         if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1700                 goto discard_and_relse;
1701
1702         if (tcp_filter(sk, skb))
1703                 goto discard_and_relse;
1704         th = (const struct tcphdr *)skb->data;
1705         hdr = ipv6_hdr(skb);
1706         tcp_v6_fill_cb(skb, hdr, th);
1707
1708         skb->dev = NULL;
1709
1710         if (sk->sk_state == TCP_LISTEN) {
1711                 ret = tcp_v6_do_rcv(sk, skb);
1712                 goto put_and_return;
1713         }
1714
1715         sk_incoming_cpu_update(sk);
1716
1717         bh_lock_sock_nested(sk);
1718         tcp_segs_in(tcp_sk(sk), skb);
1719         ret = 0;
1720         if (!sock_owned_by_user(sk)) {
1721                 skb_to_free = sk->sk_rx_skb_cache;
1722                 sk->sk_rx_skb_cache = NULL;
1723                 ret = tcp_v6_do_rcv(sk, skb);
1724         } else {
1725                 if (tcp_add_backlog(sk, skb))
1726                         goto discard_and_relse;
1727                 skb_to_free = NULL;
1728         }
1729         bh_unlock_sock(sk);
1730         if (skb_to_free)
1731                 __kfree_skb(skb_to_free);
1732 put_and_return:
1733         if (refcounted)
1734                 sock_put(sk);
1735         return ret ? -1 : 0;
1736
1737 no_tcp_socket:
1738         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1739                 goto discard_it;
1740
1741         tcp_v6_fill_cb(skb, hdr, th);
1742
1743         if (tcp_checksum_complete(skb)) {
1744 csum_error:
1745                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1746 bad_packet:
1747                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1748         } else {
1749                 tcp_v6_send_reset(NULL, skb);
1750         }
1751
1752 discard_it:
1753         kfree_skb(skb);
1754         return 0;
1755
1756 discard_and_relse:
1757         sk_drops_add(sk, skb);
1758         if (refcounted)
1759                 sock_put(sk);
1760         goto discard_it;
1761
1762 do_time_wait:
1763         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1764                 inet_twsk_put(inet_twsk(sk));
1765                 goto discard_it;
1766         }
1767
1768         tcp_v6_fill_cb(skb, hdr, th);
1769
1770         if (tcp_checksum_complete(skb)) {
1771                 inet_twsk_put(inet_twsk(sk));
1772                 goto csum_error;
1773         }
1774
1775         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1776         case TCP_TW_SYN:
1777         {
1778                 struct sock *sk2;
1779
1780                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1781                                             skb, __tcp_hdrlen(th),
1782                                             &ipv6_hdr(skb)->saddr, th->source,
1783                                             &ipv6_hdr(skb)->daddr,
1784                                             ntohs(th->dest),
1785                                             tcp_v6_iif_l3_slave(skb),
1786                                             sdif);
1787                 if (sk2) {
1788                         struct inet_timewait_sock *tw = inet_twsk(sk);
1789                         inet_twsk_deschedule_put(tw);
1790                         sk = sk2;
1791                         tcp_v6_restore_cb(skb);
1792                         refcounted = false;
1793                         goto process;
1794                 }
1795         }
1796                 /* to ACK */
1797                 fallthrough;
1798         case TCP_TW_ACK:
1799                 tcp_v6_timewait_ack(sk, skb);
1800                 break;
1801         case TCP_TW_RST:
1802                 tcp_v6_send_reset(sk, skb);
1803                 inet_twsk_deschedule_put(inet_twsk(sk));
1804                 goto discard_it;
1805         case TCP_TW_SUCCESS:
1806                 ;
1807         }
1808         goto discard_it;
1809 }
1810
1811 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1812 {
1813         const struct ipv6hdr *hdr;
1814         const struct tcphdr *th;
1815         struct sock *sk;
1816
1817         if (skb->pkt_type != PACKET_HOST)
1818                 return;
1819
1820         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1821                 return;
1822
1823         hdr = ipv6_hdr(skb);
1824         th = tcp_hdr(skb);
1825
1826         if (th->doff < sizeof(struct tcphdr) / 4)
1827                 return;
1828
1829         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1830         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1831                                         &hdr->saddr, th->source,
1832                                         &hdr->daddr, ntohs(th->dest),
1833                                         inet6_iif(skb), inet6_sdif(skb));
1834         if (sk) {
1835                 skb->sk = sk;
1836                 skb->destructor = sock_edemux;
1837                 if (sk_fullsock(sk)) {
1838                         struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1839
1840                         if (dst)
1841                                 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1842                         if (dst &&
1843                             inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1844                                 skb_dst_set_noref(skb, dst);
1845                 }
1846         }
1847 }
1848
1849 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1850         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1851         .twsk_unique    = tcp_twsk_unique,
1852         .twsk_destructor = tcp_twsk_destructor,
1853 };
1854
1855 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1856 {
1857         struct ipv6_pinfo *np = inet6_sk(sk);
1858
1859         __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1860 }
1861
1862 const struct inet_connection_sock_af_ops ipv6_specific = {
1863         .queue_xmit        = inet6_csk_xmit,
1864         .send_check        = tcp_v6_send_check,
1865         .rebuild_header    = inet6_sk_rebuild_header,
1866         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1867         .conn_request      = tcp_v6_conn_request,
1868         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1869         .net_header_len    = sizeof(struct ipv6hdr),
1870         .net_frag_header_len = sizeof(struct frag_hdr),
1871         .setsockopt        = ipv6_setsockopt,
1872         .getsockopt        = ipv6_getsockopt,
1873         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1874         .sockaddr_len      = sizeof(struct sockaddr_in6),
1875         .mtu_reduced       = tcp_v6_mtu_reduced,
1876 };
1877
1878 #ifdef CONFIG_TCP_MD5SIG
1879 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1880         .md5_lookup     =       tcp_v6_md5_lookup,
1881         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1882         .md5_parse      =       tcp_v6_parse_md5_keys,
1883 };
1884 #endif
1885
1886 /*
1887  *      TCP over IPv4 via INET6 API
1888  */
1889 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1890         .queue_xmit        = ip_queue_xmit,
1891         .send_check        = tcp_v4_send_check,
1892         .rebuild_header    = inet_sk_rebuild_header,
1893         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1894         .conn_request      = tcp_v6_conn_request,
1895         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1896         .net_header_len    = sizeof(struct iphdr),
1897         .setsockopt        = ipv6_setsockopt,
1898         .getsockopt        = ipv6_getsockopt,
1899         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1900         .sockaddr_len      = sizeof(struct sockaddr_in6),
1901         .mtu_reduced       = tcp_v4_mtu_reduced,
1902 };
1903
1904 #ifdef CONFIG_TCP_MD5SIG
1905 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1906         .md5_lookup     =       tcp_v4_md5_lookup,
1907         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1908         .md5_parse      =       tcp_v6_parse_md5_keys,
1909 };
1910 #endif
1911
1912 /* NOTE: A lot of things set to zero explicitly by call to
1913  *       sk_alloc() so need not be done here.
1914  */
1915 static int tcp_v6_init_sock(struct sock *sk)
1916 {
1917         struct inet_connection_sock *icsk = inet_csk(sk);
1918
1919         tcp_init_sock(sk);
1920
1921         icsk->icsk_af_ops = &ipv6_specific;
1922
1923 #ifdef CONFIG_TCP_MD5SIG
1924         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1925 #endif
1926
1927         return 0;
1928 }
1929
1930 static void tcp_v6_destroy_sock(struct sock *sk)
1931 {
1932         tcp_v4_destroy_sock(sk);
1933         inet6_destroy_sock(sk);
1934 }
1935
1936 #ifdef CONFIG_PROC_FS
1937 /* Proc filesystem TCPv6 sock list dumping. */
1938 static void get_openreq6(struct seq_file *seq,
1939                          const struct request_sock *req, int i)
1940 {
1941         long ttd = req->rsk_timer.expires - jiffies;
1942         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1943         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1944
1945         if (ttd < 0)
1946                 ttd = 0;
1947
1948         seq_printf(seq,
1949                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1950                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1951                    i,
1952                    src->s6_addr32[0], src->s6_addr32[1],
1953                    src->s6_addr32[2], src->s6_addr32[3],
1954                    inet_rsk(req)->ir_num,
1955                    dest->s6_addr32[0], dest->s6_addr32[1],
1956                    dest->s6_addr32[2], dest->s6_addr32[3],
1957                    ntohs(inet_rsk(req)->ir_rmt_port),
1958                    TCP_SYN_RECV,
1959                    0, 0, /* could print option size, but that is af dependent. */
1960                    1,   /* timers active (only the expire timer) */
1961                    jiffies_to_clock_t(ttd),
1962                    req->num_timeout,
1963                    from_kuid_munged(seq_user_ns(seq),
1964                                     sock_i_uid(req->rsk_listener)),
1965                    0,  /* non standard timer */
1966                    0, /* open_requests have no inode */
1967                    0, req);
1968 }
1969
1970 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1971 {
1972         const struct in6_addr *dest, *src;
1973         __u16 destp, srcp;
1974         int timer_active;
1975         unsigned long timer_expires;
1976         const struct inet_sock *inet = inet_sk(sp);
1977         const struct tcp_sock *tp = tcp_sk(sp);
1978         const struct inet_connection_sock *icsk = inet_csk(sp);
1979         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1980         int rx_queue;
1981         int state;
1982
1983         dest  = &sp->sk_v6_daddr;
1984         src   = &sp->sk_v6_rcv_saddr;
1985         destp = ntohs(inet->inet_dport);
1986         srcp  = ntohs(inet->inet_sport);
1987
1988         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1989             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1990             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1991                 timer_active    = 1;
1992                 timer_expires   = icsk->icsk_timeout;
1993         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1994                 timer_active    = 4;
1995                 timer_expires   = icsk->icsk_timeout;
1996         } else if (timer_pending(&sp->sk_timer)) {
1997                 timer_active    = 2;
1998                 timer_expires   = sp->sk_timer.expires;
1999         } else {
2000                 timer_active    = 0;
2001                 timer_expires = jiffies;
2002         }
2003
2004         state = inet_sk_state_load(sp);
2005         if (state == TCP_LISTEN)
2006                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2007         else
2008                 /* Because we don't lock the socket,
2009                  * we might find a transient negative value.
2010                  */
2011                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2012                                       READ_ONCE(tp->copied_seq), 0);
2013
2014         seq_printf(seq,
2015                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2016                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2017                    i,
2018                    src->s6_addr32[0], src->s6_addr32[1],
2019                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2020                    dest->s6_addr32[0], dest->s6_addr32[1],
2021                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2022                    state,
2023                    READ_ONCE(tp->write_seq) - tp->snd_una,
2024                    rx_queue,
2025                    timer_active,
2026                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2027                    icsk->icsk_retransmits,
2028                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2029                    icsk->icsk_probes_out,
2030                    sock_i_ino(sp),
2031                    refcount_read(&sp->sk_refcnt), sp,
2032                    jiffies_to_clock_t(icsk->icsk_rto),
2033                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2034                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2035                    tp->snd_cwnd,
2036                    state == TCP_LISTEN ?
2037                         fastopenq->max_qlen :
2038                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2039                    );
2040 }
2041
2042 static void get_timewait6_sock(struct seq_file *seq,
2043                                struct inet_timewait_sock *tw, int i)
2044 {
2045         long delta = tw->tw_timer.expires - jiffies;
2046         const struct in6_addr *dest, *src;
2047         __u16 destp, srcp;
2048
2049         dest = &tw->tw_v6_daddr;
2050         src  = &tw->tw_v6_rcv_saddr;
2051         destp = ntohs(tw->tw_dport);
2052         srcp  = ntohs(tw->tw_sport);
2053
2054         seq_printf(seq,
2055                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2056                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2057                    i,
2058                    src->s6_addr32[0], src->s6_addr32[1],
2059                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2060                    dest->s6_addr32[0], dest->s6_addr32[1],
2061                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2062                    tw->tw_substate, 0, 0,
2063                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2064                    refcount_read(&tw->tw_refcnt), tw);
2065 }
2066
2067 static int tcp6_seq_show(struct seq_file *seq, void *v)
2068 {
2069         struct tcp_iter_state *st;
2070         struct sock *sk = v;
2071
2072         if (v == SEQ_START_TOKEN) {
2073                 seq_puts(seq,
2074                          "  sl  "
2075                          "local_address                         "
2076                          "remote_address                        "
2077                          "st tx_queue rx_queue tr tm->when retrnsmt"
2078                          "   uid  timeout inode\n");
2079                 goto out;
2080         }
2081         st = seq->private;
2082
2083         if (sk->sk_state == TCP_TIME_WAIT)
2084                 get_timewait6_sock(seq, v, st->num);
2085         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2086                 get_openreq6(seq, v, st->num);
2087         else
2088                 get_tcp6_sock(seq, v, st->num);
2089 out:
2090         return 0;
2091 }
2092
2093 static const struct seq_operations tcp6_seq_ops = {
2094         .show           = tcp6_seq_show,
2095         .start          = tcp_seq_start,
2096         .next           = tcp_seq_next,
2097         .stop           = tcp_seq_stop,
2098 };
2099
2100 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2101         .family         = AF_INET6,
2102 };
2103
2104 int __net_init tcp6_proc_init(struct net *net)
2105 {
2106         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2107                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2108                 return -ENOMEM;
2109         return 0;
2110 }
2111
2112 void tcp6_proc_exit(struct net *net)
2113 {
2114         remove_proc_entry("tcp6", net->proc_net);
2115 }
2116 #endif
2117
2118 struct proto tcpv6_prot = {
2119         .name                   = "TCPv6",
2120         .owner                  = THIS_MODULE,
2121         .close                  = tcp_close,
2122         .pre_connect            = tcp_v6_pre_connect,
2123         .connect                = tcp_v6_connect,
2124         .disconnect             = tcp_disconnect,
2125         .accept                 = inet_csk_accept,
2126         .ioctl                  = tcp_ioctl,
2127         .init                   = tcp_v6_init_sock,
2128         .destroy                = tcp_v6_destroy_sock,
2129         .shutdown               = tcp_shutdown,
2130         .setsockopt             = tcp_setsockopt,
2131         .getsockopt             = tcp_getsockopt,
2132         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2133         .keepalive              = tcp_set_keepalive,
2134         .recvmsg                = tcp_recvmsg,
2135         .sendmsg                = tcp_sendmsg,
2136         .sendpage               = tcp_sendpage,
2137         .backlog_rcv            = tcp_v6_do_rcv,
2138         .release_cb             = tcp_release_cb,
2139         .hash                   = inet6_hash,
2140         .unhash                 = inet_unhash,
2141         .get_port               = inet_csk_get_port,
2142         .enter_memory_pressure  = tcp_enter_memory_pressure,
2143         .leave_memory_pressure  = tcp_leave_memory_pressure,
2144         .stream_memory_free     = tcp_stream_memory_free,
2145         .sockets_allocated      = &tcp_sockets_allocated,
2146         .memory_allocated       = &tcp_memory_allocated,
2147         .memory_pressure        = &tcp_memory_pressure,
2148         .orphan_count           = &tcp_orphan_count,
2149         .sysctl_mem             = sysctl_tcp_mem,
2150         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2151         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2152         .max_header             = MAX_TCP_HEADER,
2153         .obj_size               = sizeof(struct tcp6_sock),
2154         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2155         .twsk_prot              = &tcp6_timewait_sock_ops,
2156         .rsk_prot               = &tcp6_request_sock_ops,
2157         .h.hashinfo             = &tcp_hashinfo,
2158         .no_autobind            = true,
2159         .diag_destroy           = tcp_abort,
2160 };
2161 EXPORT_SYMBOL_GPL(tcpv6_prot);
2162
2163 /* thinking of making this const? Don't.
2164  * early_demux can change based on sysctl.
2165  */
2166 static struct inet6_protocol tcpv6_protocol = {
2167         .early_demux    =       tcp_v6_early_demux,
2168         .early_demux_handler =  tcp_v6_early_demux,
2169         .handler        =       tcp_v6_rcv,
2170         .err_handler    =       tcp_v6_err,
2171         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2172 };
2173
2174 static struct inet_protosw tcpv6_protosw = {
2175         .type           =       SOCK_STREAM,
2176         .protocol       =       IPPROTO_TCP,
2177         .prot           =       &tcpv6_prot,
2178         .ops            =       &inet6_stream_ops,
2179         .flags          =       INET_PROTOSW_PERMANENT |
2180                                 INET_PROTOSW_ICSK,
2181 };
2182
2183 static int __net_init tcpv6_net_init(struct net *net)
2184 {
2185         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2186                                     SOCK_RAW, IPPROTO_TCP, net);
2187 }
2188
2189 static void __net_exit tcpv6_net_exit(struct net *net)
2190 {
2191         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2192 }
2193
2194 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2195 {
2196         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2197 }
2198
2199 static struct pernet_operations tcpv6_net_ops = {
2200         .init       = tcpv6_net_init,
2201         .exit       = tcpv6_net_exit,
2202         .exit_batch = tcpv6_net_exit_batch,
2203 };
2204
2205 int __init tcpv6_init(void)
2206 {
2207         int ret;
2208
2209         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2210         if (ret)
2211                 goto out;
2212
2213         /* register inet6 protocol */
2214         ret = inet6_register_protosw(&tcpv6_protosw);
2215         if (ret)
2216                 goto out_tcpv6_protocol;
2217
2218         ret = register_pernet_subsys(&tcpv6_net_ops);
2219         if (ret)
2220                 goto out_tcpv6_protosw;
2221
2222         ret = mptcpv6_init();
2223         if (ret)
2224                 goto out_tcpv6_pernet_subsys;
2225
2226 out:
2227         return ret;
2228
2229 out_tcpv6_pernet_subsys:
2230         unregister_pernet_subsys(&tcpv6_net_ops);
2231 out_tcpv6_protosw:
2232         inet6_unregister_protosw(&tcpv6_protosw);
2233 out_tcpv6_protocol:
2234         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2235         goto out;
2236 }
2237
2238 void tcpv6_exit(void)
2239 {
2240         unregister_pernet_subsys(&tcpv6_net_ops);
2241         inet6_unregister_protosw(&tcpv6_protosw);
2242         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2243 }