1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
7 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
69 #include <trace/events/tcp.h>
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
105 struct dst_entry *dst = skb_dst(skb);
107 if (dst && dst_hold_safe(dst)) {
108 const struct rt6_info *rt = (const struct rt6_info *)dst;
111 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 ipv6_hdr(skb)->saddr.s6_addr32,
121 tcp_hdr(skb)->source);
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 ipv6_hdr(skb)->saddr.s6_addr32);
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
133 /* This check is replicated from tcp_v6_connect() and intended to
134 * prevent BPF program called below from accessing bytes that are out
135 * of the bound specified by user in addr_len.
137 if (addr_len < SIN6_LEN_RFC2133)
140 sock_owned_by_me(sk);
142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 struct inet_sock *inet = inet_sk(sk);
150 struct inet_connection_sock *icsk = inet_csk(sk);
151 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 struct tcp_sock *tp = tcp_sk(sk);
153 struct in6_addr *saddr = NULL, *final_p, final;
154 struct ipv6_txoptions *opt;
156 struct dst_entry *dst;
159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
161 if (addr_len < SIN6_LEN_RFC2133)
164 if (usin->sin6_family != AF_INET6)
165 return -EAFNOSUPPORT;
167 memset(&fl6, 0, sizeof(fl6));
170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 IP6_ECN_flow_init(fl6.flowlabel);
172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 struct ip6_flowlabel *flowlabel;
174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 if (IS_ERR(flowlabel))
177 fl6_sock_release(flowlabel);
182 * connect() to INADDR_ANY means loopback (BSD'ism).
185 if (ipv6_addr_any(&usin->sin6_addr)) {
186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
190 usin->sin6_addr = in6addr_loopback;
193 addr_type = ipv6_addr_type(&usin->sin6_addr);
195 if (addr_type & IPV6_ADDR_MULTICAST)
198 if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 if (addr_len >= sizeof(struct sockaddr_in6) &&
200 usin->sin6_scope_id) {
201 /* If interface is set while binding, indices
204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
207 sk->sk_bound_dev_if = usin->sin6_scope_id;
210 /* Connect to link-local address requires an interface */
211 if (!sk->sk_bound_dev_if)
215 if (tp->rx_opt.ts_recent_stamp &&
216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 tp->rx_opt.ts_recent = 0;
218 tp->rx_opt.ts_recent_stamp = 0;
219 WRITE_ONCE(tp->write_seq, 0);
222 sk->sk_v6_daddr = usin->sin6_addr;
223 np->flow_label = fl6.flowlabel;
229 if (addr_type & IPV6_ADDR_MAPPED) {
230 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 struct sockaddr_in sin;
233 if (__ipv6_only_sock(sk))
236 sin.sin_family = AF_INET;
237 sin.sin_port = usin->sin6_port;
238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240 icsk->icsk_af_ops = &ipv6_mapped;
242 mptcpv6_handle_mapped(sk, true);
243 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251 icsk->icsk_ext_hdr_len = exthdrlen;
252 icsk->icsk_af_ops = &ipv6_specific;
254 mptcpv6_handle_mapped(sk, false);
255 sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 tp->af_specific = &tcp_sock_ipv6_specific;
261 np->saddr = sk->sk_v6_rcv_saddr;
266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 saddr = &sk->sk_v6_rcv_saddr;
269 fl6.flowi6_proto = IPPROTO_TCP;
270 fl6.daddr = sk->sk_v6_daddr;
271 fl6.saddr = saddr ? *saddr : np->saddr;
272 fl6.flowi6_oif = sk->sk_bound_dev_if;
273 fl6.flowi6_mark = sk->sk_mark;
274 fl6.fl6_dport = usin->sin6_port;
275 fl6.fl6_sport = inet->inet_sport;
276 fl6.flowi6_uid = sk->sk_uid;
278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 final_p = fl6_update_dst(&fl6, opt, &final);
281 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
291 sk->sk_v6_rcv_saddr = *saddr;
294 /* set the source address */
296 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
298 sk->sk_gso_type = SKB_GSO_TCPV6;
299 ip6_dst_store(sk, dst, NULL, NULL);
301 icsk->icsk_ext_hdr_len = 0;
303 icsk->icsk_ext_hdr_len = opt->opt_flen +
306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
308 inet->inet_dport = usin->sin6_port;
310 tcp_set_state(sk, TCP_SYN_SENT);
311 err = inet6_hash_connect(tcp_death_row, sk);
317 if (likely(!tp->repair)) {
319 WRITE_ONCE(tp->write_seq,
320 secure_tcpv6_seq(np->saddr.s6_addr32,
321 sk->sk_v6_daddr.s6_addr32,
324 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326 sk->sk_v6_daddr.s6_addr32);
329 if (tcp_fastopen_defer_connect(sk, &err))
334 err = tcp_connect(sk);
341 tcp_set_state(sk, TCP_CLOSE);
343 inet->inet_dport = 0;
344 sk->sk_route_caps = 0;
348 static void tcp_v6_mtu_reduced(struct sock *sk)
350 struct dst_entry *dst;
352 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
355 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
359 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
360 tcp_sync_mss(sk, dst_mtu(dst));
361 tcp_simple_retransmit(sk);
365 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
366 u8 type, u8 code, int offset, __be32 info)
368 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
369 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
370 struct net *net = dev_net(skb->dev);
371 struct request_sock *fastopen;
372 struct ipv6_pinfo *np;
379 sk = __inet6_lookup_established(net, &tcp_hashinfo,
380 &hdr->daddr, th->dest,
381 &hdr->saddr, ntohs(th->source),
382 skb->dev->ifindex, inet6_sdif(skb));
385 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
390 if (sk->sk_state == TCP_TIME_WAIT) {
391 inet_twsk_put(inet_twsk(sk));
394 seq = ntohl(th->seq);
395 fatal = icmpv6_err_convert(type, code, &err);
396 if (sk->sk_state == TCP_NEW_SYN_RECV) {
397 tcp_req_err(sk, seq, fatal);
402 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
403 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
405 if (sk->sk_state == TCP_CLOSE)
408 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
409 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
414 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
415 fastopen = rcu_dereference(tp->fastopen_rsk);
416 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
417 if (sk->sk_state != TCP_LISTEN &&
418 !between(seq, snd_una, tp->snd_nxt)) {
419 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
423 np = tcp_inet6_sk(sk);
425 if (type == NDISC_REDIRECT) {
426 if (!sock_owned_by_user(sk)) {
427 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
430 dst->ops->redirect(dst, sk, skb);
435 if (type == ICMPV6_PKT_TOOBIG) {
436 /* We are not interested in TCP_LISTEN and open_requests
437 * (SYN-ACKs send out by Linux are always <576bytes so
438 * they should go through unfragmented).
440 if (sk->sk_state == TCP_LISTEN)
443 if (!ip6_sk_accept_pmtu(sk))
446 tp->mtu_info = ntohl(info);
447 if (!sock_owned_by_user(sk))
448 tcp_v6_mtu_reduced(sk);
449 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
456 /* Might be for an request_sock */
457 switch (sk->sk_state) {
460 /* Only in fast or simultaneous open. If a fast open socket is
461 * is already accepted it is treated as a connected one below.
463 if (fastopen && !fastopen->sk)
466 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
468 if (!sock_owned_by_user(sk)) {
470 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
474 sk->sk_err_soft = err;
479 /* check if this ICMP message allows revert of backoff.
482 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
483 code == ICMPV6_NOROUTE)
484 tcp_ld_RTO_revert(sk, seq);
487 if (!sock_owned_by_user(sk) && np->recverr) {
489 sk->sk_error_report(sk);
491 sk->sk_err_soft = err;
500 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
502 struct request_sock *req,
503 struct tcp_fastopen_cookie *foc,
504 enum tcp_synack_type synack_type,
505 struct sk_buff *syn_skb)
507 struct inet_request_sock *ireq = inet_rsk(req);
508 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
509 struct ipv6_txoptions *opt;
510 struct flowi6 *fl6 = &fl->u.ip6;
514 /* First, grab a route. */
515 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
516 IPPROTO_TCP)) == NULL)
519 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
522 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
523 &ireq->ir_v6_rmt_addr);
525 fl6->daddr = ireq->ir_v6_rmt_addr;
526 if (np->repflow && ireq->pktopts)
527 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
530 opt = ireq->ipv6_opt;
532 opt = rcu_dereference(np->opt);
533 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
536 err = net_xmit_eval(err);
544 static void tcp_v6_reqsk_destructor(struct request_sock *req)
546 kfree(inet_rsk(req)->ipv6_opt);
547 kfree_skb(inet_rsk(req)->pktopts);
550 #ifdef CONFIG_TCP_MD5SIG
551 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
552 const struct in6_addr *addr,
555 return tcp_md5_do_lookup(sk, l3index,
556 (union tcp_md5_addr *)addr, AF_INET6);
559 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
560 const struct sock *addr_sk)
564 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
565 addr_sk->sk_bound_dev_if);
566 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
570 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
571 sockptr_t optval, int optlen)
573 struct tcp_md5sig cmd;
574 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
578 if (optlen < sizeof(cmd))
581 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
584 if (sin6->sin6_family != AF_INET6)
587 if (optname == TCP_MD5SIG_EXT &&
588 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
589 prefixlen = cmd.tcpm_prefixlen;
590 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
594 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
597 if (optname == TCP_MD5SIG_EXT &&
598 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
599 struct net_device *dev;
602 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
603 if (dev && netif_is_l3_master(dev))
604 l3index = dev->ifindex;
607 /* ok to reference set/not set outside of rcu;
608 * right now device MUST be an L3 master
610 if (!dev || !l3index)
614 if (!cmd.tcpm_keylen) {
615 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
616 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
619 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
620 AF_INET6, prefixlen, l3index);
623 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
626 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
627 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
628 AF_INET, prefixlen, l3index,
629 cmd.tcpm_key, cmd.tcpm_keylen,
632 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
633 AF_INET6, prefixlen, l3index,
634 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
637 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
638 const struct in6_addr *daddr,
639 const struct in6_addr *saddr,
640 const struct tcphdr *th, int nbytes)
642 struct tcp6_pseudohdr *bp;
643 struct scatterlist sg;
647 /* 1. TCP pseudo-header (RFC2460) */
650 bp->protocol = cpu_to_be32(IPPROTO_TCP);
651 bp->len = cpu_to_be32(nbytes);
653 _th = (struct tcphdr *)(bp + 1);
654 memcpy(_th, th, sizeof(*th));
657 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
658 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
659 sizeof(*bp) + sizeof(*th));
660 return crypto_ahash_update(hp->md5_req);
663 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
664 const struct in6_addr *daddr, struct in6_addr *saddr,
665 const struct tcphdr *th)
667 struct tcp_md5sig_pool *hp;
668 struct ahash_request *req;
670 hp = tcp_get_md5sig_pool();
672 goto clear_hash_noput;
675 if (crypto_ahash_init(req))
677 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
679 if (tcp_md5_hash_key(hp, key))
681 ahash_request_set_crypt(req, NULL, md5_hash, 0);
682 if (crypto_ahash_final(req))
685 tcp_put_md5sig_pool();
689 tcp_put_md5sig_pool();
691 memset(md5_hash, 0, 16);
695 static int tcp_v6_md5_hash_skb(char *md5_hash,
696 const struct tcp_md5sig_key *key,
697 const struct sock *sk,
698 const struct sk_buff *skb)
700 const struct in6_addr *saddr, *daddr;
701 struct tcp_md5sig_pool *hp;
702 struct ahash_request *req;
703 const struct tcphdr *th = tcp_hdr(skb);
705 if (sk) { /* valid for establish/request sockets */
706 saddr = &sk->sk_v6_rcv_saddr;
707 daddr = &sk->sk_v6_daddr;
709 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
710 saddr = &ip6h->saddr;
711 daddr = &ip6h->daddr;
714 hp = tcp_get_md5sig_pool();
716 goto clear_hash_noput;
719 if (crypto_ahash_init(req))
722 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
724 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
726 if (tcp_md5_hash_key(hp, key))
728 ahash_request_set_crypt(req, NULL, md5_hash, 0);
729 if (crypto_ahash_final(req))
732 tcp_put_md5sig_pool();
736 tcp_put_md5sig_pool();
738 memset(md5_hash, 0, 16);
744 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
745 const struct sk_buff *skb,
748 #ifdef CONFIG_TCP_MD5SIG
749 const __u8 *hash_location = NULL;
750 struct tcp_md5sig_key *hash_expected;
751 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
752 const struct tcphdr *th = tcp_hdr(skb);
753 int genhash, l3index;
756 /* sdif set, means packet ingressed via a device
757 * in an L3 domain and dif is set to the l3mdev
759 l3index = sdif ? dif : 0;
761 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
762 hash_location = tcp_parse_md5sig_option(th);
764 /* We've parsed the options - do we have a hash? */
765 if (!hash_expected && !hash_location)
768 if (hash_expected && !hash_location) {
769 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
773 if (!hash_expected && hash_location) {
774 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
778 /* check the signature */
779 genhash = tcp_v6_md5_hash_skb(newhash,
783 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
784 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
785 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
786 genhash ? "failed" : "mismatch",
787 &ip6h->saddr, ntohs(th->source),
788 &ip6h->daddr, ntohs(th->dest), l3index);
795 static void tcp_v6_init_req(struct request_sock *req,
796 const struct sock *sk_listener,
799 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
800 struct inet_request_sock *ireq = inet_rsk(req);
801 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
803 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
804 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
806 /* So that link locals have meaning */
807 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
808 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
809 ireq->ir_iif = tcp_v6_iif(skb);
811 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
812 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
813 np->rxopt.bits.rxinfo ||
814 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
815 np->rxopt.bits.rxohlim || np->repflow)) {
816 refcount_inc(&skb->users);
821 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
823 const struct request_sock *req)
825 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
828 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
830 .obj_size = sizeof(struct tcp6_request_sock),
831 .rtx_syn_ack = tcp_rtx_synack,
832 .send_ack = tcp_v6_reqsk_send_ack,
833 .destructor = tcp_v6_reqsk_destructor,
834 .send_reset = tcp_v6_send_reset,
835 .syn_ack_timeout = tcp_syn_ack_timeout,
838 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
839 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
840 sizeof(struct ipv6hdr),
841 #ifdef CONFIG_TCP_MD5SIG
842 .req_md5_lookup = tcp_v6_md5_lookup,
843 .calc_md5_hash = tcp_v6_md5_hash_skb,
845 .init_req = tcp_v6_init_req,
846 #ifdef CONFIG_SYN_COOKIES
847 .cookie_init_seq = cookie_v6_init_sequence,
849 .route_req = tcp_v6_route_req,
850 .init_seq = tcp_v6_init_seq,
851 .init_ts_off = tcp_v6_init_ts_off,
852 .send_synack = tcp_v6_send_synack,
855 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
856 u32 ack, u32 win, u32 tsval, u32 tsecr,
857 int oif, struct tcp_md5sig_key *key, int rst,
858 u8 tclass, __be32 label, u32 priority)
860 const struct tcphdr *th = tcp_hdr(skb);
862 struct sk_buff *buff;
864 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
865 struct sock *ctl_sk = net->ipv6.tcp_sk;
866 unsigned int tot_len = sizeof(struct tcphdr);
867 struct dst_entry *dst;
872 tot_len += TCPOLEN_TSTAMP_ALIGNED;
873 #ifdef CONFIG_TCP_MD5SIG
875 tot_len += TCPOLEN_MD5SIG_ALIGNED;
878 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
883 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
885 t1 = skb_push(buff, tot_len);
886 skb_reset_transport_header(buff);
888 /* Swap the send and the receive. */
889 memset(t1, 0, sizeof(*t1));
890 t1->dest = th->source;
891 t1->source = th->dest;
892 t1->doff = tot_len / 4;
893 t1->seq = htonl(seq);
894 t1->ack_seq = htonl(ack);
895 t1->ack = !rst || !th->ack;
897 t1->window = htons(win);
899 topt = (__be32 *)(t1 + 1);
902 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
903 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
904 *topt++ = htonl(tsval);
905 *topt++ = htonl(tsecr);
908 #ifdef CONFIG_TCP_MD5SIG
910 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
911 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
912 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
913 &ipv6_hdr(skb)->saddr,
914 &ipv6_hdr(skb)->daddr, t1);
918 memset(&fl6, 0, sizeof(fl6));
919 fl6.daddr = ipv6_hdr(skb)->saddr;
920 fl6.saddr = ipv6_hdr(skb)->daddr;
921 fl6.flowlabel = label;
923 buff->ip_summed = CHECKSUM_PARTIAL;
926 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
928 fl6.flowi6_proto = IPPROTO_TCP;
929 if (rt6_need_strict(&fl6.daddr) && !oif)
930 fl6.flowi6_oif = tcp_v6_iif(skb);
932 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
935 fl6.flowi6_oif = oif;
939 if (sk->sk_state == TCP_TIME_WAIT) {
940 mark = inet_twsk(sk)->tw_mark;
941 /* autoflowlabel relies on buff->hash */
942 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
947 buff->tstamp = tcp_transmit_time(sk);
949 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
950 fl6.fl6_dport = t1->dest;
951 fl6.fl6_sport = t1->source;
952 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
953 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
955 /* Pass a socket to ip6_dst_lookup either it is for RST
956 * Underlying function will use this to retrieve the network
959 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
961 skb_dst_set(buff, dst);
962 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
964 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
966 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
973 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
975 const struct tcphdr *th = tcp_hdr(skb);
976 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
977 u32 seq = 0, ack_seq = 0;
978 struct tcp_md5sig_key *key = NULL;
979 #ifdef CONFIG_TCP_MD5SIG
980 const __u8 *hash_location = NULL;
981 unsigned char newhash[16];
983 struct sock *sk1 = NULL;
993 /* If sk not NULL, it means we did a successful lookup and incoming
994 * route had to be correct. prequeue might have dropped our dst.
996 if (!sk && !ipv6_unicast_destination(skb))
999 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1000 #ifdef CONFIG_TCP_MD5SIG
1002 hash_location = tcp_parse_md5sig_option(th);
1003 if (sk && sk_fullsock(sk)) {
1006 /* sdif set, means packet ingressed via a device
1007 * in an L3 domain and inet_iif is set to it.
1009 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1010 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1011 } else if (hash_location) {
1012 int dif = tcp_v6_iif_l3_slave(skb);
1013 int sdif = tcp_v6_sdif(skb);
1017 * active side is lost. Try to find listening socket through
1018 * source port, and then find md5 key through listening socket.
1019 * we are not loose security here:
1020 * Incoming packet is checked with md5 hash with finding key,
1021 * no RST generated if md5 hash doesn't match.
1023 sk1 = inet6_lookup_listener(net,
1024 &tcp_hashinfo, NULL, 0,
1026 th->source, &ipv6h->daddr,
1027 ntohs(th->source), dif, sdif);
1031 /* sdif set, means packet ingressed via a device
1032 * in an L3 domain and dif is set to it.
1034 l3index = tcp_v6_sdif(skb) ? dif : 0;
1036 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1040 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1041 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1047 seq = ntohl(th->ack_seq);
1049 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1053 oif = sk->sk_bound_dev_if;
1054 if (sk_fullsock(sk)) {
1055 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1057 trace_tcp_send_reset(sk, skb);
1059 label = ip6_flowlabel(ipv6h);
1060 priority = sk->sk_priority;
1062 if (sk->sk_state == TCP_TIME_WAIT) {
1063 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1064 priority = inet_twsk(sk)->tw_priority;
1067 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1068 label = ip6_flowlabel(ipv6h);
1071 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1074 #ifdef CONFIG_TCP_MD5SIG
1080 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1081 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1082 struct tcp_md5sig_key *key, u8 tclass,
1083 __be32 label, u32 priority)
1085 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1086 tclass, label, priority);
1089 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1091 struct inet_timewait_sock *tw = inet_twsk(sk);
1092 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1094 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1095 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1096 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1097 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1098 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1103 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1104 struct request_sock *req)
1108 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1110 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1111 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1114 * The window field (SEG.WND) of every outgoing segment, with the
1115 * exception of <SYN> segments, MUST be right-shifted by
1116 * Rcv.Wind.Shift bits:
1118 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1119 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1120 tcp_rsk(req)->rcv_nxt,
1121 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1122 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1123 req->ts_recent, sk->sk_bound_dev_if,
1124 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1125 0, 0, sk->sk_priority);
1129 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1131 #ifdef CONFIG_SYN_COOKIES
1132 const struct tcphdr *th = tcp_hdr(skb);
1135 sk = cookie_v6_check(sk, skb);
1140 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1141 struct tcphdr *th, u32 *cookie)
1144 #ifdef CONFIG_SYN_COOKIES
1145 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1146 &tcp_request_sock_ipv6_ops, sk, th);
1148 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1149 tcp_synq_overflow(sk);
1155 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1157 if (skb->protocol == htons(ETH_P_IP))
1158 return tcp_v4_conn_request(sk, skb);
1160 if (!ipv6_unicast_destination(skb))
1163 return tcp_conn_request(&tcp6_request_sock_ops,
1164 &tcp_request_sock_ipv6_ops, sk, skb);
1168 return 0; /* don't send reset */
1171 static void tcp_v6_restore_cb(struct sk_buff *skb)
1173 /* We need to move header back to the beginning if xfrm6_policy_check()
1174 * and tcp_v6_fill_cb() are going to be called again.
1175 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1177 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1178 sizeof(struct inet6_skb_parm));
1181 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1182 struct request_sock *req,
1183 struct dst_entry *dst,
1184 struct request_sock *req_unhash,
1187 struct inet_request_sock *ireq;
1188 struct ipv6_pinfo *newnp;
1189 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1190 struct ipv6_txoptions *opt;
1191 struct inet_sock *newinet;
1192 struct tcp_sock *newtp;
1194 #ifdef CONFIG_TCP_MD5SIG
1195 struct tcp_md5sig_key *key;
1200 if (skb->protocol == htons(ETH_P_IP)) {
1205 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1206 req_unhash, own_req);
1211 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1213 newinet = inet_sk(newsk);
1214 newnp = tcp_inet6_sk(newsk);
1215 newtp = tcp_sk(newsk);
1217 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1219 newnp->saddr = newsk->sk_v6_rcv_saddr;
1221 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1222 if (sk_is_mptcp(newsk))
1223 mptcpv6_handle_mapped(newsk, true);
1224 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1225 #ifdef CONFIG_TCP_MD5SIG
1226 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1229 newnp->ipv6_mc_list = NULL;
1230 newnp->ipv6_ac_list = NULL;
1231 newnp->ipv6_fl_list = NULL;
1232 newnp->pktoptions = NULL;
1234 newnp->mcast_oif = inet_iif(skb);
1235 newnp->mcast_hops = ip_hdr(skb)->ttl;
1236 newnp->rcv_flowinfo = 0;
1238 newnp->flow_label = 0;
1241 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1242 * here, tcp_create_openreq_child now does this for us, see the comment in
1243 * that function for the gory details. -acme
1246 /* It is tricky place. Until this moment IPv4 tcp
1247 worked with IPv6 icsk.icsk_af_ops.
1250 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1255 ireq = inet_rsk(req);
1257 if (sk_acceptq_is_full(sk))
1261 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1266 newsk = tcp_create_openreq_child(sk, req, skb);
1271 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1272 * count here, tcp_create_openreq_child now does this for us, see the
1273 * comment in that function for the gory details. -acme
1276 newsk->sk_gso_type = SKB_GSO_TCPV6;
1277 ip6_dst_store(newsk, dst, NULL, NULL);
1278 inet6_sk_rx_dst_set(newsk, skb);
1280 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1282 newtp = tcp_sk(newsk);
1283 newinet = inet_sk(newsk);
1284 newnp = tcp_inet6_sk(newsk);
1286 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1288 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1289 newnp->saddr = ireq->ir_v6_loc_addr;
1290 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1291 newsk->sk_bound_dev_if = ireq->ir_iif;
1293 /* Now IPv6 options...
1295 First: no IPv4 options.
1297 newinet->inet_opt = NULL;
1298 newnp->ipv6_mc_list = NULL;
1299 newnp->ipv6_ac_list = NULL;
1300 newnp->ipv6_fl_list = NULL;
1303 newnp->rxopt.all = np->rxopt.all;
1305 newnp->pktoptions = NULL;
1307 newnp->mcast_oif = tcp_v6_iif(skb);
1308 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1309 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1311 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1313 /* Clone native IPv6 options from listening socket (if any)
1315 Yes, keeping reference count would be much more clever,
1316 but we make one more one thing there: reattach optmem
1319 opt = ireq->ipv6_opt;
1321 opt = rcu_dereference(np->opt);
1323 opt = ipv6_dup_options(newsk, opt);
1324 RCU_INIT_POINTER(newnp->opt, opt);
1326 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1328 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1331 tcp_ca_openreq_child(newsk, dst);
1333 tcp_sync_mss(newsk, dst_mtu(dst));
1334 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1336 tcp_initialize_rcv_mss(newsk);
1338 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1339 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1341 #ifdef CONFIG_TCP_MD5SIG
1342 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1344 /* Copy over the MD5 key from the original socket */
1345 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1347 /* We're using one, so create a matching key
1348 * on the newsk structure. If we fail to get
1349 * memory, then we end up not copying the key
1352 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1353 AF_INET6, 128, l3index, key->key, key->keylen,
1354 sk_gfp_mask(sk, GFP_ATOMIC));
1358 if (__inet_inherit_port(sk, newsk) < 0) {
1359 inet_csk_prepare_forced_close(newsk);
1363 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1365 tcp_move_syn(newtp, req);
1367 /* Clone pktoptions received with SYN, if we own the req */
1368 if (ireq->pktopts) {
1369 newnp->pktoptions = skb_clone(ireq->pktopts,
1370 sk_gfp_mask(sk, GFP_ATOMIC));
1371 consume_skb(ireq->pktopts);
1372 ireq->pktopts = NULL;
1373 if (newnp->pktoptions) {
1374 tcp_v6_restore_cb(newnp->pktoptions);
1375 skb_set_owner_r(newnp->pktoptions, newsk);
1383 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1391 /* The socket must have it's spinlock held when we get
1392 * here, unless it is a TCP_LISTEN socket.
1394 * We have a potential double-lock case here, so even when
1395 * doing backlog processing we use the BH locking scheme.
1396 * This is because we cannot sleep with the original spinlock
1399 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1401 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1402 struct sk_buff *opt_skb = NULL;
1403 struct tcp_sock *tp;
1405 /* Imagine: socket is IPv6. IPv4 packet arrives,
1406 goes to IPv4 receive handler and backlogged.
1407 From backlog it always goes here. Kerboom...
1408 Fortunately, tcp_rcv_established and rcv_established
1409 handle them correctly, but it is not case with
1410 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1413 if (skb->protocol == htons(ETH_P_IP))
1414 return tcp_v4_do_rcv(sk, skb);
1417 * socket locking is here for SMP purposes as backlog rcv
1418 * is currently called with bh processing disabled.
1421 /* Do Stevens' IPV6_PKTOPTIONS.
1423 Yes, guys, it is the only place in our code, where we
1424 may make it not affecting IPv4.
1425 The rest of code is protocol independent,
1426 and I do not like idea to uglify IPv4.
1428 Actually, all the idea behind IPV6_PKTOPTIONS
1429 looks not very well thought. For now we latch
1430 options, received in the last packet, enqueued
1431 by tcp. Feel free to propose better solution.
1435 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1437 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1438 struct dst_entry *dst = sk->sk_rx_dst;
1440 sock_rps_save_rxhash(sk, skb);
1441 sk_mark_napi_id(sk, skb);
1443 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1444 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1446 sk->sk_rx_dst = NULL;
1450 tcp_rcv_established(sk, skb);
1452 goto ipv6_pktoptions;
1456 if (tcp_checksum_complete(skb))
1459 if (sk->sk_state == TCP_LISTEN) {
1460 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1466 if (tcp_child_process(sk, nsk, skb))
1469 __kfree_skb(opt_skb);
1473 sock_rps_save_rxhash(sk, skb);
1475 if (tcp_rcv_state_process(sk, skb))
1478 goto ipv6_pktoptions;
1482 tcp_v6_send_reset(sk, skb);
1485 __kfree_skb(opt_skb);
1489 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1490 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1495 /* Do you ask, what is it?
1497 1. skb was enqueued by tcp.
1498 2. skb is added to tail of read queue, rather than out of order.
1499 3. socket is not in passive state.
1500 4. Finally, it really contains options, which user wants to receive.
1503 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1504 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1505 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1506 np->mcast_oif = tcp_v6_iif(opt_skb);
1507 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1508 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1509 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1510 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1512 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1513 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1514 skb_set_owner_r(opt_skb, sk);
1515 tcp_v6_restore_cb(opt_skb);
1516 opt_skb = xchg(&np->pktoptions, opt_skb);
1518 __kfree_skb(opt_skb);
1519 opt_skb = xchg(&np->pktoptions, NULL);
1527 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1528 const struct tcphdr *th)
1530 /* This is tricky: we move IP6CB at its correct location into
1531 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1532 * _decode_session6() uses IP6CB().
1533 * barrier() makes sure compiler won't play aliasing games.
1535 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1536 sizeof(struct inet6_skb_parm));
1539 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1540 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1541 skb->len - th->doff*4);
1542 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1543 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1544 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1545 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1546 TCP_SKB_CB(skb)->sacked = 0;
1547 TCP_SKB_CB(skb)->has_rxtstamp =
1548 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1551 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1553 struct sk_buff *skb_to_free;
1554 int sdif = inet6_sdif(skb);
1555 int dif = inet6_iif(skb);
1556 const struct tcphdr *th;
1557 const struct ipv6hdr *hdr;
1561 struct net *net = dev_net(skb->dev);
1563 if (skb->pkt_type != PACKET_HOST)
1567 * Count it even if it's bad.
1569 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1571 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1574 th = (const struct tcphdr *)skb->data;
1576 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1578 if (!pskb_may_pull(skb, th->doff*4))
1581 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1584 th = (const struct tcphdr *)skb->data;
1585 hdr = ipv6_hdr(skb);
1588 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1589 th->source, th->dest, inet6_iif(skb), sdif,
1595 if (sk->sk_state == TCP_TIME_WAIT)
1598 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1599 struct request_sock *req = inet_reqsk(sk);
1600 bool req_stolen = false;
1603 sk = req->rsk_listener;
1604 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1605 sk_drops_add(sk, skb);
1609 if (tcp_checksum_complete(skb)) {
1613 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1614 inet_csk_reqsk_queue_drop_and_put(sk, req);
1620 if (!tcp_filter(sk, skb)) {
1621 th = (const struct tcphdr *)skb->data;
1622 hdr = ipv6_hdr(skb);
1623 tcp_v6_fill_cb(skb, hdr, th);
1624 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1629 /* Another cpu got exclusive access to req
1630 * and created a full blown socket.
1631 * Try to feed this packet to this socket
1632 * instead of discarding it.
1634 tcp_v6_restore_cb(skb);
1638 goto discard_and_relse;
1642 tcp_v6_restore_cb(skb);
1643 } else if (tcp_child_process(sk, nsk, skb)) {
1644 tcp_v6_send_reset(nsk, skb);
1645 goto discard_and_relse;
1651 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1652 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1653 goto discard_and_relse;
1656 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1657 goto discard_and_relse;
1659 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1660 goto discard_and_relse;
1662 if (tcp_filter(sk, skb))
1663 goto discard_and_relse;
1664 th = (const struct tcphdr *)skb->data;
1665 hdr = ipv6_hdr(skb);
1666 tcp_v6_fill_cb(skb, hdr, th);
1670 if (sk->sk_state == TCP_LISTEN) {
1671 ret = tcp_v6_do_rcv(sk, skb);
1672 goto put_and_return;
1675 sk_incoming_cpu_update(sk);
1677 bh_lock_sock_nested(sk);
1678 tcp_segs_in(tcp_sk(sk), skb);
1680 if (!sock_owned_by_user(sk)) {
1681 skb_to_free = sk->sk_rx_skb_cache;
1682 sk->sk_rx_skb_cache = NULL;
1683 ret = tcp_v6_do_rcv(sk, skb);
1685 if (tcp_add_backlog(sk, skb))
1686 goto discard_and_relse;
1691 __kfree_skb(skb_to_free);
1695 return ret ? -1 : 0;
1698 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1701 tcp_v6_fill_cb(skb, hdr, th);
1703 if (tcp_checksum_complete(skb)) {
1705 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1707 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1709 tcp_v6_send_reset(NULL, skb);
1717 sk_drops_add(sk, skb);
1723 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1724 inet_twsk_put(inet_twsk(sk));
1728 tcp_v6_fill_cb(skb, hdr, th);
1730 if (tcp_checksum_complete(skb)) {
1731 inet_twsk_put(inet_twsk(sk));
1735 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1740 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1741 skb, __tcp_hdrlen(th),
1742 &ipv6_hdr(skb)->saddr, th->source,
1743 &ipv6_hdr(skb)->daddr,
1745 tcp_v6_iif_l3_slave(skb),
1748 struct inet_timewait_sock *tw = inet_twsk(sk);
1749 inet_twsk_deschedule_put(tw);
1751 tcp_v6_restore_cb(skb);
1759 tcp_v6_timewait_ack(sk, skb);
1762 tcp_v6_send_reset(sk, skb);
1763 inet_twsk_deschedule_put(inet_twsk(sk));
1765 case TCP_TW_SUCCESS:
1771 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1773 const struct ipv6hdr *hdr;
1774 const struct tcphdr *th;
1777 if (skb->pkt_type != PACKET_HOST)
1780 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1783 hdr = ipv6_hdr(skb);
1786 if (th->doff < sizeof(struct tcphdr) / 4)
1789 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1790 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1791 &hdr->saddr, th->source,
1792 &hdr->daddr, ntohs(th->dest),
1793 inet6_iif(skb), inet6_sdif(skb));
1796 skb->destructor = sock_edemux;
1797 if (sk_fullsock(sk)) {
1798 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1801 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1803 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1804 skb_dst_set_noref(skb, dst);
1809 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1810 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1811 .twsk_unique = tcp_twsk_unique,
1812 .twsk_destructor = tcp_twsk_destructor,
1815 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1817 struct ipv6_pinfo *np = inet6_sk(sk);
1819 __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1822 const struct inet_connection_sock_af_ops ipv6_specific = {
1823 .queue_xmit = inet6_csk_xmit,
1824 .send_check = tcp_v6_send_check,
1825 .rebuild_header = inet6_sk_rebuild_header,
1826 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1827 .conn_request = tcp_v6_conn_request,
1828 .syn_recv_sock = tcp_v6_syn_recv_sock,
1829 .net_header_len = sizeof(struct ipv6hdr),
1830 .net_frag_header_len = sizeof(struct frag_hdr),
1831 .setsockopt = ipv6_setsockopt,
1832 .getsockopt = ipv6_getsockopt,
1833 .addr2sockaddr = inet6_csk_addr2sockaddr,
1834 .sockaddr_len = sizeof(struct sockaddr_in6),
1835 .mtu_reduced = tcp_v6_mtu_reduced,
1838 #ifdef CONFIG_TCP_MD5SIG
1839 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1840 .md5_lookup = tcp_v6_md5_lookup,
1841 .calc_md5_hash = tcp_v6_md5_hash_skb,
1842 .md5_parse = tcp_v6_parse_md5_keys,
1847 * TCP over IPv4 via INET6 API
1849 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1850 .queue_xmit = ip_queue_xmit,
1851 .send_check = tcp_v4_send_check,
1852 .rebuild_header = inet_sk_rebuild_header,
1853 .sk_rx_dst_set = inet_sk_rx_dst_set,
1854 .conn_request = tcp_v6_conn_request,
1855 .syn_recv_sock = tcp_v6_syn_recv_sock,
1856 .net_header_len = sizeof(struct iphdr),
1857 .setsockopt = ipv6_setsockopt,
1858 .getsockopt = ipv6_getsockopt,
1859 .addr2sockaddr = inet6_csk_addr2sockaddr,
1860 .sockaddr_len = sizeof(struct sockaddr_in6),
1861 .mtu_reduced = tcp_v4_mtu_reduced,
1864 #ifdef CONFIG_TCP_MD5SIG
1865 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1866 .md5_lookup = tcp_v4_md5_lookup,
1867 .calc_md5_hash = tcp_v4_md5_hash_skb,
1868 .md5_parse = tcp_v6_parse_md5_keys,
1872 /* NOTE: A lot of things set to zero explicitly by call to
1873 * sk_alloc() so need not be done here.
1875 static int tcp_v6_init_sock(struct sock *sk)
1877 struct inet_connection_sock *icsk = inet_csk(sk);
1881 icsk->icsk_af_ops = &ipv6_specific;
1883 #ifdef CONFIG_TCP_MD5SIG
1884 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1890 static void tcp_v6_destroy_sock(struct sock *sk)
1892 tcp_v4_destroy_sock(sk);
1893 inet6_destroy_sock(sk);
1896 #ifdef CONFIG_PROC_FS
1897 /* Proc filesystem TCPv6 sock list dumping. */
1898 static void get_openreq6(struct seq_file *seq,
1899 const struct request_sock *req, int i)
1901 long ttd = req->rsk_timer.expires - jiffies;
1902 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1903 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1909 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1910 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1912 src->s6_addr32[0], src->s6_addr32[1],
1913 src->s6_addr32[2], src->s6_addr32[3],
1914 inet_rsk(req)->ir_num,
1915 dest->s6_addr32[0], dest->s6_addr32[1],
1916 dest->s6_addr32[2], dest->s6_addr32[3],
1917 ntohs(inet_rsk(req)->ir_rmt_port),
1919 0, 0, /* could print option size, but that is af dependent. */
1920 1, /* timers active (only the expire timer) */
1921 jiffies_to_clock_t(ttd),
1923 from_kuid_munged(seq_user_ns(seq),
1924 sock_i_uid(req->rsk_listener)),
1925 0, /* non standard timer */
1926 0, /* open_requests have no inode */
1930 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1932 const struct in6_addr *dest, *src;
1935 unsigned long timer_expires;
1936 const struct inet_sock *inet = inet_sk(sp);
1937 const struct tcp_sock *tp = tcp_sk(sp);
1938 const struct inet_connection_sock *icsk = inet_csk(sp);
1939 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1943 dest = &sp->sk_v6_daddr;
1944 src = &sp->sk_v6_rcv_saddr;
1945 destp = ntohs(inet->inet_dport);
1946 srcp = ntohs(inet->inet_sport);
1948 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1949 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1950 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1952 timer_expires = icsk->icsk_timeout;
1953 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1955 timer_expires = icsk->icsk_timeout;
1956 } else if (timer_pending(&sp->sk_timer)) {
1958 timer_expires = sp->sk_timer.expires;
1961 timer_expires = jiffies;
1964 state = inet_sk_state_load(sp);
1965 if (state == TCP_LISTEN)
1966 rx_queue = READ_ONCE(sp->sk_ack_backlog);
1968 /* Because we don't lock the socket,
1969 * we might find a transient negative value.
1971 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1972 READ_ONCE(tp->copied_seq), 0);
1975 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1976 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1978 src->s6_addr32[0], src->s6_addr32[1],
1979 src->s6_addr32[2], src->s6_addr32[3], srcp,
1980 dest->s6_addr32[0], dest->s6_addr32[1],
1981 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1983 READ_ONCE(tp->write_seq) - tp->snd_una,
1986 jiffies_delta_to_clock_t(timer_expires - jiffies),
1987 icsk->icsk_retransmits,
1988 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1989 icsk->icsk_probes_out,
1991 refcount_read(&sp->sk_refcnt), sp,
1992 jiffies_to_clock_t(icsk->icsk_rto),
1993 jiffies_to_clock_t(icsk->icsk_ack.ato),
1994 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1996 state == TCP_LISTEN ?
1997 fastopenq->max_qlen :
1998 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2002 static void get_timewait6_sock(struct seq_file *seq,
2003 struct inet_timewait_sock *tw, int i)
2005 long delta = tw->tw_timer.expires - jiffies;
2006 const struct in6_addr *dest, *src;
2009 dest = &tw->tw_v6_daddr;
2010 src = &tw->tw_v6_rcv_saddr;
2011 destp = ntohs(tw->tw_dport);
2012 srcp = ntohs(tw->tw_sport);
2015 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2016 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2018 src->s6_addr32[0], src->s6_addr32[1],
2019 src->s6_addr32[2], src->s6_addr32[3], srcp,
2020 dest->s6_addr32[0], dest->s6_addr32[1],
2021 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2022 tw->tw_substate, 0, 0,
2023 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2024 refcount_read(&tw->tw_refcnt), tw);
2027 static int tcp6_seq_show(struct seq_file *seq, void *v)
2029 struct tcp_iter_state *st;
2030 struct sock *sk = v;
2032 if (v == SEQ_START_TOKEN) {
2037 "st tx_queue rx_queue tr tm->when retrnsmt"
2038 " uid timeout inode\n");
2043 if (sk->sk_state == TCP_TIME_WAIT)
2044 get_timewait6_sock(seq, v, st->num);
2045 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2046 get_openreq6(seq, v, st->num);
2048 get_tcp6_sock(seq, v, st->num);
2053 static const struct seq_operations tcp6_seq_ops = {
2054 .show = tcp6_seq_show,
2055 .start = tcp_seq_start,
2056 .next = tcp_seq_next,
2057 .stop = tcp_seq_stop,
2060 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2064 int __net_init tcp6_proc_init(struct net *net)
2066 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2067 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2072 void tcp6_proc_exit(struct net *net)
2074 remove_proc_entry("tcp6", net->proc_net);
2078 struct proto tcpv6_prot = {
2080 .owner = THIS_MODULE,
2082 .pre_connect = tcp_v6_pre_connect,
2083 .connect = tcp_v6_connect,
2084 .disconnect = tcp_disconnect,
2085 .accept = inet_csk_accept,
2087 .init = tcp_v6_init_sock,
2088 .destroy = tcp_v6_destroy_sock,
2089 .shutdown = tcp_shutdown,
2090 .setsockopt = tcp_setsockopt,
2091 .getsockopt = tcp_getsockopt,
2092 .keepalive = tcp_set_keepalive,
2093 .recvmsg = tcp_recvmsg,
2094 .sendmsg = tcp_sendmsg,
2095 .sendpage = tcp_sendpage,
2096 .backlog_rcv = tcp_v6_do_rcv,
2097 .release_cb = tcp_release_cb,
2099 .unhash = inet_unhash,
2100 .get_port = inet_csk_get_port,
2101 .enter_memory_pressure = tcp_enter_memory_pressure,
2102 .leave_memory_pressure = tcp_leave_memory_pressure,
2103 .stream_memory_free = tcp_stream_memory_free,
2104 .sockets_allocated = &tcp_sockets_allocated,
2105 .memory_allocated = &tcp_memory_allocated,
2106 .memory_pressure = &tcp_memory_pressure,
2107 .orphan_count = &tcp_orphan_count,
2108 .sysctl_mem = sysctl_tcp_mem,
2109 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2110 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2111 .max_header = MAX_TCP_HEADER,
2112 .obj_size = sizeof(struct tcp6_sock),
2113 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2114 .twsk_prot = &tcp6_timewait_sock_ops,
2115 .rsk_prot = &tcp6_request_sock_ops,
2116 .h.hashinfo = &tcp_hashinfo,
2117 .no_autobind = true,
2118 .diag_destroy = tcp_abort,
2120 EXPORT_SYMBOL_GPL(tcpv6_prot);
2122 /* thinking of making this const? Don't.
2123 * early_demux can change based on sysctl.
2125 static struct inet6_protocol tcpv6_protocol = {
2126 .early_demux = tcp_v6_early_demux,
2127 .early_demux_handler = tcp_v6_early_demux,
2128 .handler = tcp_v6_rcv,
2129 .err_handler = tcp_v6_err,
2130 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2133 static struct inet_protosw tcpv6_protosw = {
2134 .type = SOCK_STREAM,
2135 .protocol = IPPROTO_TCP,
2136 .prot = &tcpv6_prot,
2137 .ops = &inet6_stream_ops,
2138 .flags = INET_PROTOSW_PERMANENT |
2142 static int __net_init tcpv6_net_init(struct net *net)
2144 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2145 SOCK_RAW, IPPROTO_TCP, net);
2148 static void __net_exit tcpv6_net_exit(struct net *net)
2150 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2153 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2155 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2158 static struct pernet_operations tcpv6_net_ops = {
2159 .init = tcpv6_net_init,
2160 .exit = tcpv6_net_exit,
2161 .exit_batch = tcpv6_net_exit_batch,
2164 int __init tcpv6_init(void)
2168 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2172 /* register inet6 protocol */
2173 ret = inet6_register_protosw(&tcpv6_protosw);
2175 goto out_tcpv6_protocol;
2177 ret = register_pernet_subsys(&tcpv6_net_ops);
2179 goto out_tcpv6_protosw;
2181 ret = mptcpv6_init();
2183 goto out_tcpv6_pernet_subsys;
2188 out_tcpv6_pernet_subsys:
2189 unregister_pernet_subsys(&tcpv6_net_ops);
2191 inet6_unregister_protosw(&tcpv6_protosw);
2193 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2197 void tcpv6_exit(void)
2199 unregister_pernet_subsys(&tcpv6_net_ops);
2200 inet6_unregister_protosw(&tcpv6_protosw);
2201 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);