1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
7 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
69 #include <trace/events/tcp.h>
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 static const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr)
90 /* Helper returning the inet6 address from a given tcp socket.
91 * It can be used in TCP stack instead of inet6_sk(sk).
92 * This avoids a dereference and allow compiler optimizations.
93 * It is a specialized version of inet6_sk_generic().
95 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
102 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 struct dst_entry *dst = skb_dst(skb);
106 if (dst && dst_hold_safe(dst)) {
107 const struct rt6_info *rt = (const struct rt6_info *)dst;
110 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
111 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
115 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
118 ipv6_hdr(skb)->saddr.s6_addr32,
120 tcp_hdr(skb)->source);
123 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
126 ipv6_hdr(skb)->saddr.s6_addr32);
129 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
132 /* This check is replicated from tcp_v6_connect() and intended to
133 * prevent BPF program called below from accessing bytes that are out
134 * of the bound specified by user in addr_len.
136 if (addr_len < SIN6_LEN_RFC2133)
139 sock_owned_by_me(sk);
141 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
144 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
147 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
148 struct inet_sock *inet = inet_sk(sk);
149 struct inet_connection_sock *icsk = inet_csk(sk);
150 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
151 struct tcp_sock *tp = tcp_sk(sk);
152 struct in6_addr *saddr = NULL, *final_p, final;
153 struct ipv6_txoptions *opt;
155 struct dst_entry *dst;
158 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 if (addr_len < SIN6_LEN_RFC2133)
163 if (usin->sin6_family != AF_INET6)
164 return -EAFNOSUPPORT;
166 memset(&fl6, 0, sizeof(fl6));
169 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
170 IP6_ECN_flow_init(fl6.flowlabel);
171 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
172 struct ip6_flowlabel *flowlabel;
173 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
174 if (IS_ERR(flowlabel))
176 fl6_sock_release(flowlabel);
181 * connect() to INADDR_ANY means loopback (BSD'ism).
184 if (ipv6_addr_any(&usin->sin6_addr)) {
185 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
186 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189 usin->sin6_addr = in6addr_loopback;
192 addr_type = ipv6_addr_type(&usin->sin6_addr);
194 if (addr_type & IPV6_ADDR_MULTICAST)
197 if (addr_type&IPV6_ADDR_LINKLOCAL) {
198 if (addr_len >= sizeof(struct sockaddr_in6) &&
199 usin->sin6_scope_id) {
200 /* If interface is set while binding, indices
203 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206 sk->sk_bound_dev_if = usin->sin6_scope_id;
209 /* Connect to link-local address requires an interface */
210 if (!sk->sk_bound_dev_if)
214 if (tp->rx_opt.ts_recent_stamp &&
215 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216 tp->rx_opt.ts_recent = 0;
217 tp->rx_opt.ts_recent_stamp = 0;
221 sk->sk_v6_daddr = usin->sin6_addr;
222 np->flow_label = fl6.flowlabel;
228 if (addr_type & IPV6_ADDR_MAPPED) {
229 u32 exthdrlen = icsk->icsk_ext_hdr_len;
230 struct sockaddr_in sin;
232 if (__ipv6_only_sock(sk))
235 sin.sin_family = AF_INET;
236 sin.sin_port = usin->sin6_port;
237 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 icsk->icsk_af_ops = &ipv6_mapped;
240 sk->sk_backlog_rcv = tcp_v4_do_rcv;
241 #ifdef CONFIG_TCP_MD5SIG
242 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
245 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
248 icsk->icsk_ext_hdr_len = exthdrlen;
249 icsk->icsk_af_ops = &ipv6_specific;
250 sk->sk_backlog_rcv = tcp_v6_do_rcv;
251 #ifdef CONFIG_TCP_MD5SIG
252 tp->af_specific = &tcp_sock_ipv6_specific;
256 np->saddr = sk->sk_v6_rcv_saddr;
261 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
262 saddr = &sk->sk_v6_rcv_saddr;
264 fl6.flowi6_proto = IPPROTO_TCP;
265 fl6.daddr = sk->sk_v6_daddr;
266 fl6.saddr = saddr ? *saddr : np->saddr;
267 fl6.flowi6_oif = sk->sk_bound_dev_if;
268 fl6.flowi6_mark = sk->sk_mark;
269 fl6.fl6_dport = usin->sin6_port;
270 fl6.fl6_sport = inet->inet_sport;
271 fl6.flowi6_uid = sk->sk_uid;
273 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
274 final_p = fl6_update_dst(&fl6, opt, &final);
276 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
278 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
286 sk->sk_v6_rcv_saddr = *saddr;
289 /* set the source address */
291 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
293 sk->sk_gso_type = SKB_GSO_TCPV6;
294 ip6_dst_store(sk, dst, NULL, NULL);
296 icsk->icsk_ext_hdr_len = 0;
298 icsk->icsk_ext_hdr_len = opt->opt_flen +
301 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
303 inet->inet_dport = usin->sin6_port;
305 tcp_set_state(sk, TCP_SYN_SENT);
306 err = inet6_hash_connect(tcp_death_row, sk);
312 if (likely(!tp->repair)) {
314 tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
315 sk->sk_v6_daddr.s6_addr32,
318 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
320 sk->sk_v6_daddr.s6_addr32);
323 if (tcp_fastopen_defer_connect(sk, &err))
328 err = tcp_connect(sk);
335 tcp_set_state(sk, TCP_CLOSE);
337 inet->inet_dport = 0;
338 sk->sk_route_caps = 0;
342 static void tcp_v6_mtu_reduced(struct sock *sk)
344 struct dst_entry *dst;
346 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
349 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
353 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
354 tcp_sync_mss(sk, dst_mtu(dst));
355 tcp_simple_retransmit(sk);
359 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
360 u8 type, u8 code, int offset, __be32 info)
362 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
363 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
364 struct net *net = dev_net(skb->dev);
365 struct request_sock *fastopen;
366 struct ipv6_pinfo *np;
373 sk = __inet6_lookup_established(net, &tcp_hashinfo,
374 &hdr->daddr, th->dest,
375 &hdr->saddr, ntohs(th->source),
376 skb->dev->ifindex, inet6_sdif(skb));
379 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
384 if (sk->sk_state == TCP_TIME_WAIT) {
385 inet_twsk_put(inet_twsk(sk));
388 seq = ntohl(th->seq);
389 fatal = icmpv6_err_convert(type, code, &err);
390 if (sk->sk_state == TCP_NEW_SYN_RECV) {
391 tcp_req_err(sk, seq, fatal);
396 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
397 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
399 if (sk->sk_state == TCP_CLOSE)
402 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
403 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
408 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
409 fastopen = tp->fastopen_rsk;
410 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
411 if (sk->sk_state != TCP_LISTEN &&
412 !between(seq, snd_una, tp->snd_nxt)) {
413 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
417 np = tcp_inet6_sk(sk);
419 if (type == NDISC_REDIRECT) {
420 if (!sock_owned_by_user(sk)) {
421 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
424 dst->ops->redirect(dst, sk, skb);
429 if (type == ICMPV6_PKT_TOOBIG) {
430 /* We are not interested in TCP_LISTEN and open_requests
431 * (SYN-ACKs send out by Linux are always <576bytes so
432 * they should go through unfragmented).
434 if (sk->sk_state == TCP_LISTEN)
437 if (!ip6_sk_accept_pmtu(sk))
440 tp->mtu_info = ntohl(info);
441 if (!sock_owned_by_user(sk))
442 tcp_v6_mtu_reduced(sk);
443 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
450 /* Might be for an request_sock */
451 switch (sk->sk_state) {
454 /* Only in fast or simultaneous open. If a fast open socket is
455 * is already accepted it is treated as a connected one below.
457 if (fastopen && !fastopen->sk)
460 if (!sock_owned_by_user(sk)) {
462 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
466 sk->sk_err_soft = err;
470 if (!sock_owned_by_user(sk) && np->recverr) {
472 sk->sk_error_report(sk);
474 sk->sk_err_soft = err;
483 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
485 struct request_sock *req,
486 struct tcp_fastopen_cookie *foc,
487 enum tcp_synack_type synack_type)
489 struct inet_request_sock *ireq = inet_rsk(req);
490 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
491 struct ipv6_txoptions *opt;
492 struct flowi6 *fl6 = &fl->u.ip6;
496 /* First, grab a route. */
497 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
498 IPPROTO_TCP)) == NULL)
501 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
504 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
505 &ireq->ir_v6_rmt_addr);
507 fl6->daddr = ireq->ir_v6_rmt_addr;
508 if (np->repflow && ireq->pktopts)
509 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
512 opt = ireq->ipv6_opt;
514 opt = rcu_dereference(np->opt);
515 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
517 err = net_xmit_eval(err);
525 static void tcp_v6_reqsk_destructor(struct request_sock *req)
527 kfree(inet_rsk(req)->ipv6_opt);
528 kfree_skb(inet_rsk(req)->pktopts);
531 #ifdef CONFIG_TCP_MD5SIG
532 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
533 const struct in6_addr *addr)
535 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
538 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
539 const struct sock *addr_sk)
541 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
544 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
545 char __user *optval, int optlen)
547 struct tcp_md5sig cmd;
548 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
551 if (optlen < sizeof(cmd))
554 if (copy_from_user(&cmd, optval, sizeof(cmd)))
557 if (sin6->sin6_family != AF_INET6)
560 if (optname == TCP_MD5SIG_EXT &&
561 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
562 prefixlen = cmd.tcpm_prefixlen;
563 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
567 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
570 if (!cmd.tcpm_keylen) {
571 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
572 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
574 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
575 AF_INET6, prefixlen);
578 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
581 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
582 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
583 AF_INET, prefixlen, cmd.tcpm_key,
584 cmd.tcpm_keylen, GFP_KERNEL);
586 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
587 AF_INET6, prefixlen, cmd.tcpm_key,
588 cmd.tcpm_keylen, GFP_KERNEL);
591 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
592 const struct in6_addr *daddr,
593 const struct in6_addr *saddr,
594 const struct tcphdr *th, int nbytes)
596 struct tcp6_pseudohdr *bp;
597 struct scatterlist sg;
601 /* 1. TCP pseudo-header (RFC2460) */
604 bp->protocol = cpu_to_be32(IPPROTO_TCP);
605 bp->len = cpu_to_be32(nbytes);
607 _th = (struct tcphdr *)(bp + 1);
608 memcpy(_th, th, sizeof(*th));
611 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
612 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
613 sizeof(*bp) + sizeof(*th));
614 return crypto_ahash_update(hp->md5_req);
617 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
618 const struct in6_addr *daddr, struct in6_addr *saddr,
619 const struct tcphdr *th)
621 struct tcp_md5sig_pool *hp;
622 struct ahash_request *req;
624 hp = tcp_get_md5sig_pool();
626 goto clear_hash_noput;
629 if (crypto_ahash_init(req))
631 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
633 if (tcp_md5_hash_key(hp, key))
635 ahash_request_set_crypt(req, NULL, md5_hash, 0);
636 if (crypto_ahash_final(req))
639 tcp_put_md5sig_pool();
643 tcp_put_md5sig_pool();
645 memset(md5_hash, 0, 16);
649 static int tcp_v6_md5_hash_skb(char *md5_hash,
650 const struct tcp_md5sig_key *key,
651 const struct sock *sk,
652 const struct sk_buff *skb)
654 const struct in6_addr *saddr, *daddr;
655 struct tcp_md5sig_pool *hp;
656 struct ahash_request *req;
657 const struct tcphdr *th = tcp_hdr(skb);
659 if (sk) { /* valid for establish/request sockets */
660 saddr = &sk->sk_v6_rcv_saddr;
661 daddr = &sk->sk_v6_daddr;
663 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
664 saddr = &ip6h->saddr;
665 daddr = &ip6h->daddr;
668 hp = tcp_get_md5sig_pool();
670 goto clear_hash_noput;
673 if (crypto_ahash_init(req))
676 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
678 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
680 if (tcp_md5_hash_key(hp, key))
682 ahash_request_set_crypt(req, NULL, md5_hash, 0);
683 if (crypto_ahash_final(req))
686 tcp_put_md5sig_pool();
690 tcp_put_md5sig_pool();
692 memset(md5_hash, 0, 16);
698 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
699 const struct sk_buff *skb)
701 #ifdef CONFIG_TCP_MD5SIG
702 const __u8 *hash_location = NULL;
703 struct tcp_md5sig_key *hash_expected;
704 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
705 const struct tcphdr *th = tcp_hdr(skb);
709 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
710 hash_location = tcp_parse_md5sig_option(th);
712 /* We've parsed the options - do we have a hash? */
713 if (!hash_expected && !hash_location)
716 if (hash_expected && !hash_location) {
717 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
721 if (!hash_expected && hash_location) {
722 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
726 /* check the signature */
727 genhash = tcp_v6_md5_hash_skb(newhash,
731 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
732 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
733 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
734 genhash ? "failed" : "mismatch",
735 &ip6h->saddr, ntohs(th->source),
736 &ip6h->daddr, ntohs(th->dest));
743 static void tcp_v6_init_req(struct request_sock *req,
744 const struct sock *sk_listener,
747 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
748 struct inet_request_sock *ireq = inet_rsk(req);
749 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
751 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
752 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
754 /* So that link locals have meaning */
755 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
756 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
757 ireq->ir_iif = tcp_v6_iif(skb);
759 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
760 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
761 np->rxopt.bits.rxinfo ||
762 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
763 np->rxopt.bits.rxohlim || np->repflow)) {
764 refcount_inc(&skb->users);
769 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
771 const struct request_sock *req)
773 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
776 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
778 .obj_size = sizeof(struct tcp6_request_sock),
779 .rtx_syn_ack = tcp_rtx_synack,
780 .send_ack = tcp_v6_reqsk_send_ack,
781 .destructor = tcp_v6_reqsk_destructor,
782 .send_reset = tcp_v6_send_reset,
783 .syn_ack_timeout = tcp_syn_ack_timeout,
786 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
787 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
788 sizeof(struct ipv6hdr),
789 #ifdef CONFIG_TCP_MD5SIG
790 .req_md5_lookup = tcp_v6_md5_lookup,
791 .calc_md5_hash = tcp_v6_md5_hash_skb,
793 .init_req = tcp_v6_init_req,
794 #ifdef CONFIG_SYN_COOKIES
795 .cookie_init_seq = cookie_v6_init_sequence,
797 .route_req = tcp_v6_route_req,
798 .init_seq = tcp_v6_init_seq,
799 .init_ts_off = tcp_v6_init_ts_off,
800 .send_synack = tcp_v6_send_synack,
803 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
804 u32 ack, u32 win, u32 tsval, u32 tsecr,
805 int oif, struct tcp_md5sig_key *key, int rst,
806 u8 tclass, __be32 label)
808 const struct tcphdr *th = tcp_hdr(skb);
810 struct sk_buff *buff;
812 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
813 struct sock *ctl_sk = net->ipv6.tcp_sk;
814 unsigned int tot_len = sizeof(struct tcphdr);
815 struct dst_entry *dst;
820 tot_len += TCPOLEN_TSTAMP_ALIGNED;
821 #ifdef CONFIG_TCP_MD5SIG
823 tot_len += TCPOLEN_MD5SIG_ALIGNED;
826 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
831 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
833 t1 = skb_push(buff, tot_len);
834 skb_reset_transport_header(buff);
836 /* Swap the send and the receive. */
837 memset(t1, 0, sizeof(*t1));
838 t1->dest = th->source;
839 t1->source = th->dest;
840 t1->doff = tot_len / 4;
841 t1->seq = htonl(seq);
842 t1->ack_seq = htonl(ack);
843 t1->ack = !rst || !th->ack;
845 t1->window = htons(win);
847 topt = (__be32 *)(t1 + 1);
850 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
851 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
852 *topt++ = htonl(tsval);
853 *topt++ = htonl(tsecr);
856 #ifdef CONFIG_TCP_MD5SIG
858 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
859 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
860 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
861 &ipv6_hdr(skb)->saddr,
862 &ipv6_hdr(skb)->daddr, t1);
866 memset(&fl6, 0, sizeof(fl6));
867 fl6.daddr = ipv6_hdr(skb)->saddr;
868 fl6.saddr = ipv6_hdr(skb)->daddr;
869 fl6.flowlabel = label;
871 buff->ip_summed = CHECKSUM_PARTIAL;
874 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
876 fl6.flowi6_proto = IPPROTO_TCP;
877 if (rt6_need_strict(&fl6.daddr) && !oif)
878 fl6.flowi6_oif = tcp_v6_iif(skb);
880 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
883 fl6.flowi6_oif = oif;
887 if (sk->sk_state == TCP_TIME_WAIT) {
888 mark = inet_twsk(sk)->tw_mark;
889 /* autoflowlabel relies on buff->hash */
890 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
895 buff->tstamp = tcp_transmit_time(sk);
897 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
898 fl6.fl6_dport = t1->dest;
899 fl6.fl6_sport = t1->source;
900 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
901 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
903 /* Pass a socket to ip6_dst_lookup either it is for RST
904 * Underlying function will use this to retrieve the network
907 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
909 skb_dst_set(buff, dst);
910 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
911 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
913 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
920 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
922 const struct tcphdr *th = tcp_hdr(skb);
923 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
924 u32 seq = 0, ack_seq = 0;
925 struct tcp_md5sig_key *key = NULL;
926 #ifdef CONFIG_TCP_MD5SIG
927 const __u8 *hash_location = NULL;
928 unsigned char newhash[16];
930 struct sock *sk1 = NULL;
939 /* If sk not NULL, it means we did a successful lookup and incoming
940 * route had to be correct. prequeue might have dropped our dst.
942 if (!sk && !ipv6_unicast_destination(skb))
945 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
946 #ifdef CONFIG_TCP_MD5SIG
948 hash_location = tcp_parse_md5sig_option(th);
949 if (sk && sk_fullsock(sk)) {
950 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
951 } else if (hash_location) {
953 * active side is lost. Try to find listening socket through
954 * source port, and then find md5 key through listening socket.
955 * we are not loose security here:
956 * Incoming packet is checked with md5 hash with finding key,
957 * no RST generated if md5 hash doesn't match.
959 sk1 = inet6_lookup_listener(net,
960 &tcp_hashinfo, NULL, 0,
962 th->source, &ipv6h->daddr,
964 tcp_v6_iif_l3_slave(skb),
969 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
973 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
974 if (genhash || memcmp(hash_location, newhash, 16) != 0)
980 seq = ntohl(th->ack_seq);
982 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
986 oif = sk->sk_bound_dev_if;
987 if (sk_fullsock(sk)) {
988 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
990 trace_tcp_send_reset(sk, skb);
992 label = ip6_flowlabel(ipv6h);
994 if (sk->sk_state == TCP_TIME_WAIT)
995 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
997 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
998 label = ip6_flowlabel(ipv6h);
1001 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1004 #ifdef CONFIG_TCP_MD5SIG
1010 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1011 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1012 struct tcp_md5sig_key *key, u8 tclass,
1015 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1019 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1021 struct inet_timewait_sock *tw = inet_twsk(sk);
1022 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1024 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1025 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1026 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1027 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1028 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
1033 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1034 struct request_sock *req)
1036 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1037 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1040 * The window field (SEG.WND) of every outgoing segment, with the
1041 * exception of <SYN> segments, MUST be right-shifted by
1042 * Rcv.Wind.Shift bits:
1044 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1045 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1046 tcp_rsk(req)->rcv_nxt,
1047 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1048 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1049 req->ts_recent, sk->sk_bound_dev_if,
1050 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1055 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1057 #ifdef CONFIG_SYN_COOKIES
1058 const struct tcphdr *th = tcp_hdr(skb);
1061 sk = cookie_v6_check(sk, skb);
1066 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1068 if (skb->protocol == htons(ETH_P_IP))
1069 return tcp_v4_conn_request(sk, skb);
1071 if (!ipv6_unicast_destination(skb))
1074 return tcp_conn_request(&tcp6_request_sock_ops,
1075 &tcp_request_sock_ipv6_ops, sk, skb);
1079 return 0; /* don't send reset */
1082 static void tcp_v6_restore_cb(struct sk_buff *skb)
1084 /* We need to move header back to the beginning if xfrm6_policy_check()
1085 * and tcp_v6_fill_cb() are going to be called again.
1086 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1088 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1089 sizeof(struct inet6_skb_parm));
1092 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1093 struct request_sock *req,
1094 struct dst_entry *dst,
1095 struct request_sock *req_unhash,
1098 struct inet_request_sock *ireq;
1099 struct ipv6_pinfo *newnp;
1100 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1101 struct ipv6_txoptions *opt;
1102 struct inet_sock *newinet;
1103 struct tcp_sock *newtp;
1105 #ifdef CONFIG_TCP_MD5SIG
1106 struct tcp_md5sig_key *key;
1110 if (skb->protocol == htons(ETH_P_IP)) {
1115 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1116 req_unhash, own_req);
1121 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1123 newinet = inet_sk(newsk);
1124 newnp = tcp_inet6_sk(newsk);
1125 newtp = tcp_sk(newsk);
1127 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1129 newnp->saddr = newsk->sk_v6_rcv_saddr;
1131 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1132 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1133 #ifdef CONFIG_TCP_MD5SIG
1134 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1137 newnp->ipv6_mc_list = NULL;
1138 newnp->ipv6_ac_list = NULL;
1139 newnp->ipv6_fl_list = NULL;
1140 newnp->pktoptions = NULL;
1142 newnp->mcast_oif = inet_iif(skb);
1143 newnp->mcast_hops = ip_hdr(skb)->ttl;
1144 newnp->rcv_flowinfo = 0;
1146 newnp->flow_label = 0;
1149 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1150 * here, tcp_create_openreq_child now does this for us, see the comment in
1151 * that function for the gory details. -acme
1154 /* It is tricky place. Until this moment IPv4 tcp
1155 worked with IPv6 icsk.icsk_af_ops.
1158 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1163 ireq = inet_rsk(req);
1165 if (sk_acceptq_is_full(sk))
1169 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1174 newsk = tcp_create_openreq_child(sk, req, skb);
1179 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1180 * count here, tcp_create_openreq_child now does this for us, see the
1181 * comment in that function for the gory details. -acme
1184 newsk->sk_gso_type = SKB_GSO_TCPV6;
1185 ip6_dst_store(newsk, dst, NULL, NULL);
1186 inet6_sk_rx_dst_set(newsk, skb);
1188 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1190 newtp = tcp_sk(newsk);
1191 newinet = inet_sk(newsk);
1192 newnp = tcp_inet6_sk(newsk);
1194 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1196 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1197 newnp->saddr = ireq->ir_v6_loc_addr;
1198 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1199 newsk->sk_bound_dev_if = ireq->ir_iif;
1201 /* Now IPv6 options...
1203 First: no IPv4 options.
1205 newinet->inet_opt = NULL;
1206 newnp->ipv6_mc_list = NULL;
1207 newnp->ipv6_ac_list = NULL;
1208 newnp->ipv6_fl_list = NULL;
1211 newnp->rxopt.all = np->rxopt.all;
1213 newnp->pktoptions = NULL;
1215 newnp->mcast_oif = tcp_v6_iif(skb);
1216 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1217 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1219 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1221 /* Clone native IPv6 options from listening socket (if any)
1223 Yes, keeping reference count would be much more clever,
1224 but we make one more one thing there: reattach optmem
1227 opt = ireq->ipv6_opt;
1229 opt = rcu_dereference(np->opt);
1231 opt = ipv6_dup_options(newsk, opt);
1232 RCU_INIT_POINTER(newnp->opt, opt);
1234 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1236 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1239 tcp_ca_openreq_child(newsk, dst);
1241 tcp_sync_mss(newsk, dst_mtu(dst));
1242 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1244 tcp_initialize_rcv_mss(newsk);
1246 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1247 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1249 #ifdef CONFIG_TCP_MD5SIG
1250 /* Copy over the MD5 key from the original socket */
1251 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1253 /* We're using one, so create a matching key
1254 * on the newsk structure. If we fail to get
1255 * memory, then we end up not copying the key
1258 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1259 AF_INET6, 128, key->key, key->keylen,
1260 sk_gfp_mask(sk, GFP_ATOMIC));
1264 if (__inet_inherit_port(sk, newsk) < 0) {
1265 inet_csk_prepare_forced_close(newsk);
1269 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1271 tcp_move_syn(newtp, req);
1273 /* Clone pktoptions received with SYN, if we own the req */
1274 if (ireq->pktopts) {
1275 newnp->pktoptions = skb_clone(ireq->pktopts,
1276 sk_gfp_mask(sk, GFP_ATOMIC));
1277 consume_skb(ireq->pktopts);
1278 ireq->pktopts = NULL;
1279 if (newnp->pktoptions) {
1280 tcp_v6_restore_cb(newnp->pktoptions);
1281 skb_set_owner_r(newnp->pktoptions, newsk);
1289 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1297 /* The socket must have it's spinlock held when we get
1298 * here, unless it is a TCP_LISTEN socket.
1300 * We have a potential double-lock case here, so even when
1301 * doing backlog processing we use the BH locking scheme.
1302 * This is because we cannot sleep with the original spinlock
1305 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1307 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1308 struct sk_buff *opt_skb = NULL;
1309 struct tcp_sock *tp;
1311 /* Imagine: socket is IPv6. IPv4 packet arrives,
1312 goes to IPv4 receive handler and backlogged.
1313 From backlog it always goes here. Kerboom...
1314 Fortunately, tcp_rcv_established and rcv_established
1315 handle them correctly, but it is not case with
1316 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1319 if (skb->protocol == htons(ETH_P_IP))
1320 return tcp_v4_do_rcv(sk, skb);
1323 * socket locking is here for SMP purposes as backlog rcv
1324 * is currently called with bh processing disabled.
1327 /* Do Stevens' IPV6_PKTOPTIONS.
1329 Yes, guys, it is the only place in our code, where we
1330 may make it not affecting IPv4.
1331 The rest of code is protocol independent,
1332 and I do not like idea to uglify IPv4.
1334 Actually, all the idea behind IPV6_PKTOPTIONS
1335 looks not very well thought. For now we latch
1336 options, received in the last packet, enqueued
1337 by tcp. Feel free to propose better solution.
1341 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1343 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1344 struct dst_entry *dst = sk->sk_rx_dst;
1346 sock_rps_save_rxhash(sk, skb);
1347 sk_mark_napi_id(sk, skb);
1349 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1350 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1352 sk->sk_rx_dst = NULL;
1356 tcp_rcv_established(sk, skb);
1358 goto ipv6_pktoptions;
1362 if (tcp_checksum_complete(skb))
1365 if (sk->sk_state == TCP_LISTEN) {
1366 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1372 if (tcp_child_process(sk, nsk, skb))
1375 __kfree_skb(opt_skb);
1379 sock_rps_save_rxhash(sk, skb);
1381 if (tcp_rcv_state_process(sk, skb))
1384 goto ipv6_pktoptions;
1388 tcp_v6_send_reset(sk, skb);
1391 __kfree_skb(opt_skb);
1395 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1396 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1401 /* Do you ask, what is it?
1403 1. skb was enqueued by tcp.
1404 2. skb is added to tail of read queue, rather than out of order.
1405 3. socket is not in passive state.
1406 4. Finally, it really contains options, which user wants to receive.
1409 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1410 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1411 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1412 np->mcast_oif = tcp_v6_iif(opt_skb);
1413 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1414 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1415 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1416 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1418 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1419 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1420 skb_set_owner_r(opt_skb, sk);
1421 tcp_v6_restore_cb(opt_skb);
1422 opt_skb = xchg(&np->pktoptions, opt_skb);
1424 __kfree_skb(opt_skb);
1425 opt_skb = xchg(&np->pktoptions, NULL);
1433 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1434 const struct tcphdr *th)
1436 /* This is tricky: we move IP6CB at its correct location into
1437 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1438 * _decode_session6() uses IP6CB().
1439 * barrier() makes sure compiler won't play aliasing games.
1441 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1442 sizeof(struct inet6_skb_parm));
1445 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1446 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1447 skb->len - th->doff*4);
1448 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1449 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1450 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1451 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1452 TCP_SKB_CB(skb)->sacked = 0;
1453 TCP_SKB_CB(skb)->has_rxtstamp =
1454 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1457 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1459 struct sk_buff *skb_to_free;
1460 int sdif = inet6_sdif(skb);
1461 const struct tcphdr *th;
1462 const struct ipv6hdr *hdr;
1466 struct net *net = dev_net(skb->dev);
1468 if (skb->pkt_type != PACKET_HOST)
1472 * Count it even if it's bad.
1474 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1476 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1479 th = (const struct tcphdr *)skb->data;
1481 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1483 if (!pskb_may_pull(skb, th->doff*4))
1486 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1489 th = (const struct tcphdr *)skb->data;
1490 hdr = ipv6_hdr(skb);
1493 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1494 th->source, th->dest, inet6_iif(skb), sdif,
1500 if (sk->sk_state == TCP_TIME_WAIT)
1503 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1504 struct request_sock *req = inet_reqsk(sk);
1505 bool req_stolen = false;
1508 sk = req->rsk_listener;
1509 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1510 sk_drops_add(sk, skb);
1514 if (tcp_checksum_complete(skb)) {
1518 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1519 inet_csk_reqsk_queue_drop_and_put(sk, req);
1525 if (!tcp_filter(sk, skb)) {
1526 th = (const struct tcphdr *)skb->data;
1527 hdr = ipv6_hdr(skb);
1528 tcp_v6_fill_cb(skb, hdr, th);
1529 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1534 /* Another cpu got exclusive access to req
1535 * and created a full blown socket.
1536 * Try to feed this packet to this socket
1537 * instead of discarding it.
1539 tcp_v6_restore_cb(skb);
1543 goto discard_and_relse;
1547 tcp_v6_restore_cb(skb);
1548 } else if (tcp_child_process(sk, nsk, skb)) {
1549 tcp_v6_send_reset(nsk, skb);
1550 goto discard_and_relse;
1556 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1557 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1558 goto discard_and_relse;
1561 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1562 goto discard_and_relse;
1564 if (tcp_v6_inbound_md5_hash(sk, skb))
1565 goto discard_and_relse;
1567 if (tcp_filter(sk, skb))
1568 goto discard_and_relse;
1569 th = (const struct tcphdr *)skb->data;
1570 hdr = ipv6_hdr(skb);
1571 tcp_v6_fill_cb(skb, hdr, th);
1575 if (sk->sk_state == TCP_LISTEN) {
1576 ret = tcp_v6_do_rcv(sk, skb);
1577 goto put_and_return;
1580 sk_incoming_cpu_update(sk);
1582 bh_lock_sock_nested(sk);
1583 tcp_segs_in(tcp_sk(sk), skb);
1585 if (!sock_owned_by_user(sk)) {
1586 skb_to_free = sk->sk_rx_skb_cache;
1587 sk->sk_rx_skb_cache = NULL;
1588 ret = tcp_v6_do_rcv(sk, skb);
1590 if (tcp_add_backlog(sk, skb))
1591 goto discard_and_relse;
1596 __kfree_skb(skb_to_free);
1600 return ret ? -1 : 0;
1603 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1606 tcp_v6_fill_cb(skb, hdr, th);
1608 if (tcp_checksum_complete(skb)) {
1610 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1612 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1614 tcp_v6_send_reset(NULL, skb);
1622 sk_drops_add(sk, skb);
1628 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1629 inet_twsk_put(inet_twsk(sk));
1633 tcp_v6_fill_cb(skb, hdr, th);
1635 if (tcp_checksum_complete(skb)) {
1636 inet_twsk_put(inet_twsk(sk));
1640 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1645 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1646 skb, __tcp_hdrlen(th),
1647 &ipv6_hdr(skb)->saddr, th->source,
1648 &ipv6_hdr(skb)->daddr,
1650 tcp_v6_iif_l3_slave(skb),
1653 struct inet_timewait_sock *tw = inet_twsk(sk);
1654 inet_twsk_deschedule_put(tw);
1656 tcp_v6_restore_cb(skb);
1664 tcp_v6_timewait_ack(sk, skb);
1667 tcp_v6_send_reset(sk, skb);
1668 inet_twsk_deschedule_put(inet_twsk(sk));
1670 case TCP_TW_SUCCESS:
1676 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1678 const struct ipv6hdr *hdr;
1679 const struct tcphdr *th;
1682 if (skb->pkt_type != PACKET_HOST)
1685 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1688 hdr = ipv6_hdr(skb);
1691 if (th->doff < sizeof(struct tcphdr) / 4)
1694 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1695 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1696 &hdr->saddr, th->source,
1697 &hdr->daddr, ntohs(th->dest),
1698 inet6_iif(skb), inet6_sdif(skb));
1701 skb->destructor = sock_edemux;
1702 if (sk_fullsock(sk)) {
1703 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1706 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1708 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1709 skb_dst_set_noref(skb, dst);
1714 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1715 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1716 .twsk_unique = tcp_twsk_unique,
1717 .twsk_destructor = tcp_twsk_destructor,
1720 static const struct inet_connection_sock_af_ops ipv6_specific = {
1721 .queue_xmit = inet6_csk_xmit,
1722 .send_check = tcp_v6_send_check,
1723 .rebuild_header = inet6_sk_rebuild_header,
1724 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1725 .conn_request = tcp_v6_conn_request,
1726 .syn_recv_sock = tcp_v6_syn_recv_sock,
1727 .net_header_len = sizeof(struct ipv6hdr),
1728 .net_frag_header_len = sizeof(struct frag_hdr),
1729 .setsockopt = ipv6_setsockopt,
1730 .getsockopt = ipv6_getsockopt,
1731 .addr2sockaddr = inet6_csk_addr2sockaddr,
1732 .sockaddr_len = sizeof(struct sockaddr_in6),
1733 #ifdef CONFIG_COMPAT
1734 .compat_setsockopt = compat_ipv6_setsockopt,
1735 .compat_getsockopt = compat_ipv6_getsockopt,
1737 .mtu_reduced = tcp_v6_mtu_reduced,
1740 #ifdef CONFIG_TCP_MD5SIG
1741 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1742 .md5_lookup = tcp_v6_md5_lookup,
1743 .calc_md5_hash = tcp_v6_md5_hash_skb,
1744 .md5_parse = tcp_v6_parse_md5_keys,
1749 * TCP over IPv4 via INET6 API
1751 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1752 .queue_xmit = ip_queue_xmit,
1753 .send_check = tcp_v4_send_check,
1754 .rebuild_header = inet_sk_rebuild_header,
1755 .sk_rx_dst_set = inet_sk_rx_dst_set,
1756 .conn_request = tcp_v6_conn_request,
1757 .syn_recv_sock = tcp_v6_syn_recv_sock,
1758 .net_header_len = sizeof(struct iphdr),
1759 .setsockopt = ipv6_setsockopt,
1760 .getsockopt = ipv6_getsockopt,
1761 .addr2sockaddr = inet6_csk_addr2sockaddr,
1762 .sockaddr_len = sizeof(struct sockaddr_in6),
1763 #ifdef CONFIG_COMPAT
1764 .compat_setsockopt = compat_ipv6_setsockopt,
1765 .compat_getsockopt = compat_ipv6_getsockopt,
1767 .mtu_reduced = tcp_v4_mtu_reduced,
1770 #ifdef CONFIG_TCP_MD5SIG
1771 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1772 .md5_lookup = tcp_v4_md5_lookup,
1773 .calc_md5_hash = tcp_v4_md5_hash_skb,
1774 .md5_parse = tcp_v6_parse_md5_keys,
1778 /* NOTE: A lot of things set to zero explicitly by call to
1779 * sk_alloc() so need not be done here.
1781 static int tcp_v6_init_sock(struct sock *sk)
1783 struct inet_connection_sock *icsk = inet_csk(sk);
1787 icsk->icsk_af_ops = &ipv6_specific;
1789 #ifdef CONFIG_TCP_MD5SIG
1790 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1796 static void tcp_v6_destroy_sock(struct sock *sk)
1798 tcp_v4_destroy_sock(sk);
1799 inet6_destroy_sock(sk);
1802 #ifdef CONFIG_PROC_FS
1803 /* Proc filesystem TCPv6 sock list dumping. */
1804 static void get_openreq6(struct seq_file *seq,
1805 const struct request_sock *req, int i)
1807 long ttd = req->rsk_timer.expires - jiffies;
1808 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1809 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1815 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1816 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1818 src->s6_addr32[0], src->s6_addr32[1],
1819 src->s6_addr32[2], src->s6_addr32[3],
1820 inet_rsk(req)->ir_num,
1821 dest->s6_addr32[0], dest->s6_addr32[1],
1822 dest->s6_addr32[2], dest->s6_addr32[3],
1823 ntohs(inet_rsk(req)->ir_rmt_port),
1825 0, 0, /* could print option size, but that is af dependent. */
1826 1, /* timers active (only the expire timer) */
1827 jiffies_to_clock_t(ttd),
1829 from_kuid_munged(seq_user_ns(seq),
1830 sock_i_uid(req->rsk_listener)),
1831 0, /* non standard timer */
1832 0, /* open_requests have no inode */
1836 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1838 const struct in6_addr *dest, *src;
1841 unsigned long timer_expires;
1842 const struct inet_sock *inet = inet_sk(sp);
1843 const struct tcp_sock *tp = tcp_sk(sp);
1844 const struct inet_connection_sock *icsk = inet_csk(sp);
1845 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1849 dest = &sp->sk_v6_daddr;
1850 src = &sp->sk_v6_rcv_saddr;
1851 destp = ntohs(inet->inet_dport);
1852 srcp = ntohs(inet->inet_sport);
1854 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1855 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1856 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1858 timer_expires = icsk->icsk_timeout;
1859 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1861 timer_expires = icsk->icsk_timeout;
1862 } else if (timer_pending(&sp->sk_timer)) {
1864 timer_expires = sp->sk_timer.expires;
1867 timer_expires = jiffies;
1870 state = inet_sk_state_load(sp);
1871 if (state == TCP_LISTEN)
1872 rx_queue = sp->sk_ack_backlog;
1874 /* Because we don't lock the socket,
1875 * we might find a transient negative value.
1877 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1880 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1881 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1883 src->s6_addr32[0], src->s6_addr32[1],
1884 src->s6_addr32[2], src->s6_addr32[3], srcp,
1885 dest->s6_addr32[0], dest->s6_addr32[1],
1886 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1888 tp->write_seq - tp->snd_una,
1891 jiffies_delta_to_clock_t(timer_expires - jiffies),
1892 icsk->icsk_retransmits,
1893 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1894 icsk->icsk_probes_out,
1896 refcount_read(&sp->sk_refcnt), sp,
1897 jiffies_to_clock_t(icsk->icsk_rto),
1898 jiffies_to_clock_t(icsk->icsk_ack.ato),
1899 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1901 state == TCP_LISTEN ?
1902 fastopenq->max_qlen :
1903 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1907 static void get_timewait6_sock(struct seq_file *seq,
1908 struct inet_timewait_sock *tw, int i)
1910 long delta = tw->tw_timer.expires - jiffies;
1911 const struct in6_addr *dest, *src;
1914 dest = &tw->tw_v6_daddr;
1915 src = &tw->tw_v6_rcv_saddr;
1916 destp = ntohs(tw->tw_dport);
1917 srcp = ntohs(tw->tw_sport);
1920 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1921 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1923 src->s6_addr32[0], src->s6_addr32[1],
1924 src->s6_addr32[2], src->s6_addr32[3], srcp,
1925 dest->s6_addr32[0], dest->s6_addr32[1],
1926 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1927 tw->tw_substate, 0, 0,
1928 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1929 refcount_read(&tw->tw_refcnt), tw);
1932 static int tcp6_seq_show(struct seq_file *seq, void *v)
1934 struct tcp_iter_state *st;
1935 struct sock *sk = v;
1937 if (v == SEQ_START_TOKEN) {
1942 "st tx_queue rx_queue tr tm->when retrnsmt"
1943 " uid timeout inode\n");
1948 if (sk->sk_state == TCP_TIME_WAIT)
1949 get_timewait6_sock(seq, v, st->num);
1950 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1951 get_openreq6(seq, v, st->num);
1953 get_tcp6_sock(seq, v, st->num);
1958 static const struct seq_operations tcp6_seq_ops = {
1959 .show = tcp6_seq_show,
1960 .start = tcp_seq_start,
1961 .next = tcp_seq_next,
1962 .stop = tcp_seq_stop,
1965 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1969 int __net_init tcp6_proc_init(struct net *net)
1971 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
1972 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
1977 void tcp6_proc_exit(struct net *net)
1979 remove_proc_entry("tcp6", net->proc_net);
1983 struct proto tcpv6_prot = {
1985 .owner = THIS_MODULE,
1987 .pre_connect = tcp_v6_pre_connect,
1988 .connect = tcp_v6_connect,
1989 .disconnect = tcp_disconnect,
1990 .accept = inet_csk_accept,
1992 .init = tcp_v6_init_sock,
1993 .destroy = tcp_v6_destroy_sock,
1994 .shutdown = tcp_shutdown,
1995 .setsockopt = tcp_setsockopt,
1996 .getsockopt = tcp_getsockopt,
1997 .keepalive = tcp_set_keepalive,
1998 .recvmsg = tcp_recvmsg,
1999 .sendmsg = tcp_sendmsg,
2000 .sendpage = tcp_sendpage,
2001 .backlog_rcv = tcp_v6_do_rcv,
2002 .release_cb = tcp_release_cb,
2004 .unhash = inet_unhash,
2005 .get_port = inet_csk_get_port,
2006 .enter_memory_pressure = tcp_enter_memory_pressure,
2007 .leave_memory_pressure = tcp_leave_memory_pressure,
2008 .stream_memory_free = tcp_stream_memory_free,
2009 .sockets_allocated = &tcp_sockets_allocated,
2010 .memory_allocated = &tcp_memory_allocated,
2011 .memory_pressure = &tcp_memory_pressure,
2012 .orphan_count = &tcp_orphan_count,
2013 .sysctl_mem = sysctl_tcp_mem,
2014 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2015 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2016 .max_header = MAX_TCP_HEADER,
2017 .obj_size = sizeof(struct tcp6_sock),
2018 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2019 .twsk_prot = &tcp6_timewait_sock_ops,
2020 .rsk_prot = &tcp6_request_sock_ops,
2021 .h.hashinfo = &tcp_hashinfo,
2022 .no_autobind = true,
2023 #ifdef CONFIG_COMPAT
2024 .compat_setsockopt = compat_tcp_setsockopt,
2025 .compat_getsockopt = compat_tcp_getsockopt,
2027 .diag_destroy = tcp_abort,
2030 /* thinking of making this const? Don't.
2031 * early_demux can change based on sysctl.
2033 static struct inet6_protocol tcpv6_protocol = {
2034 .early_demux = tcp_v6_early_demux,
2035 .early_demux_handler = tcp_v6_early_demux,
2036 .handler = tcp_v6_rcv,
2037 .err_handler = tcp_v6_err,
2038 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2041 static struct inet_protosw tcpv6_protosw = {
2042 .type = SOCK_STREAM,
2043 .protocol = IPPROTO_TCP,
2044 .prot = &tcpv6_prot,
2045 .ops = &inet6_stream_ops,
2046 .flags = INET_PROTOSW_PERMANENT |
2050 static int __net_init tcpv6_net_init(struct net *net)
2052 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2053 SOCK_RAW, IPPROTO_TCP, net);
2056 static void __net_exit tcpv6_net_exit(struct net *net)
2058 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2061 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2063 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2066 static struct pernet_operations tcpv6_net_ops = {
2067 .init = tcpv6_net_init,
2068 .exit = tcpv6_net_exit,
2069 .exit_batch = tcpv6_net_exit_batch,
2072 int __init tcpv6_init(void)
2076 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2080 /* register inet6 protocol */
2081 ret = inet6_register_protosw(&tcpv6_protosw);
2083 goto out_tcpv6_protocol;
2085 ret = register_pernet_subsys(&tcpv6_net_ops);
2087 goto out_tcpv6_protosw;
2092 inet6_unregister_protosw(&tcpv6_protosw);
2094 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2098 void tcpv6_exit(void)
2100 unregister_pernet_subsys(&tcpv6_net_ops);
2101 inet6_unregister_protosw(&tcpv6_protosw);
2102 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);