3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
51 #include <net/inet6_connection_sock.h>
53 #include <net/transp_v6.h>
54 #include <net/addrconf.h>
55 #include <net/ip6_route.h>
56 #include <net/ip6_checksum.h>
57 #include <net/inet_ecn.h>
58 #include <net/protocol.h>
60 #include <net/addrconf.h>
62 #include <net/dsfield.h>
64 #include <asm/uaccess.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 static void tcp_v6_send_reset(struct sk_buff *skb);
70 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
71 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
74 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
75 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
77 static struct tcp_func ipv6_mapped;
78 static struct tcp_func ipv6_specific;
80 int inet6_csk_bind_conflict(const struct sock *sk,
81 const struct inet_bind_bucket *tb)
83 const struct sock *sk2;
84 const struct hlist_node *node;
86 /* We must walk the whole port owner list in this case. -DaveM */
87 sk_for_each_bound(sk2, node, &tb->owners) {
89 (!sk->sk_bound_dev_if ||
90 !sk2->sk_bound_dev_if ||
91 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
92 (!sk->sk_reuse || !sk2->sk_reuse ||
93 sk2->sk_state == TCP_LISTEN) &&
94 ipv6_rcv_saddr_equal(sk, sk2))
101 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
103 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
104 inet6_csk_bind_conflict);
107 static void tcp_v6_hash(struct sock *sk)
109 if (sk->sk_state != TCP_CLOSE) {
110 struct tcp_sock *tp = tcp_sk(sk);
112 if (tp->af_specific == &ipv6_mapped) {
117 __inet6_hash(&tcp_hashinfo, sk);
122 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
123 struct in6_addr *saddr,
124 struct in6_addr *daddr,
127 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
130 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
132 if (skb->protocol == htons(ETH_P_IPV6)) {
133 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
134 skb->nh.ipv6h->saddr.s6_addr32,
138 return secure_tcp_sequence_number(skb->nh.iph->daddr,
145 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
146 struct inet_timewait_sock **twp)
148 struct inet_sock *inet = inet_sk(sk);
149 const struct ipv6_pinfo *np = inet6_sk(sk);
150 const struct in6_addr *daddr = &np->rcv_saddr;
151 const struct in6_addr *saddr = &np->daddr;
152 const int dif = sk->sk_bound_dev_if;
153 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
154 unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
155 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
157 const struct hlist_node *node;
158 struct inet_timewait_sock *tw;
160 prefetch(head->chain.first);
161 write_lock(&head->lock);
163 /* Check TIME-WAIT sockets first. */
164 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
165 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
169 if(*((__u32 *)&(tw->tw_dport)) == ports &&
170 sk2->sk_family == PF_INET6 &&
171 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
172 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
173 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
174 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
175 struct tcp_sock *tp = tcp_sk(sk);
177 if (tcptw->tw_ts_recent_stamp &&
179 (sysctl_tcp_tw_reuse &&
180 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
181 /* See comment in tcp_ipv4.c */
182 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
185 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
186 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
195 /* And established part... */
196 sk_for_each(sk2, node, &head->chain) {
197 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
202 BUG_TRAP(sk_unhashed(sk));
203 __sk_add_node(sk, &head->chain);
205 sock_prot_inc_use(sk->sk_prot);
206 write_unlock(&head->lock);
210 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
212 /* Silly. Should hash-dance instead... */
213 inet_twsk_deschedule(tw, &tcp_death_row);
214 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
221 write_unlock(&head->lock);
222 return -EADDRNOTAVAIL;
225 static inline u32 tcpv6_port_offset(const struct sock *sk)
227 const struct inet_sock *inet = inet_sk(sk);
228 const struct ipv6_pinfo *np = inet6_sk(sk);
230 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
235 static int tcp_v6_hash_connect(struct sock *sk)
237 unsigned short snum = inet_sk(sk)->num;
238 struct inet_bind_hashbucket *head;
239 struct inet_bind_bucket *tb;
243 int low = sysctl_local_port_range[0];
244 int high = sysctl_local_port_range[1];
245 int range = high - low;
249 u32 offset = hint + tcpv6_port_offset(sk);
250 struct hlist_node *node;
251 struct inet_timewait_sock *tw = NULL;
254 for (i = 1; i <= range; i++) {
255 port = low + (i + offset) % range;
256 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
257 spin_lock(&head->lock);
259 /* Does not bother with rcv_saddr checks,
260 * because the established check is already
263 inet_bind_bucket_for_each(tb, node, &head->chain) {
264 if (tb->port == port) {
265 BUG_TRAP(!hlist_empty(&tb->owners));
266 if (tb->fastreuse >= 0)
268 if (!__tcp_v6_check_established(sk,
276 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
278 spin_unlock(&head->lock);
285 spin_unlock(&head->lock);
289 return -EADDRNOTAVAIL;
294 /* Head lock still held and bh's disabled */
295 inet_bind_hash(sk, tb, port);
296 if (sk_unhashed(sk)) {
297 inet_sk(sk)->sport = htons(port);
298 __inet6_hash(&tcp_hashinfo, sk);
300 spin_unlock(&head->lock);
303 inet_twsk_deschedule(tw, &tcp_death_row);
311 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
312 tb = inet_csk(sk)->icsk_bind_hash;
313 spin_lock_bh(&head->lock);
315 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
316 __inet6_hash(&tcp_hashinfo, sk);
317 spin_unlock_bh(&head->lock);
320 spin_unlock(&head->lock);
321 /* No definite answer... Walk to established hash table */
322 ret = __tcp_v6_check_established(sk, snum, NULL);
329 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
332 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
333 struct inet_sock *inet = inet_sk(sk);
334 struct ipv6_pinfo *np = inet6_sk(sk);
335 struct tcp_sock *tp = tcp_sk(sk);
336 struct in6_addr *saddr = NULL, *final_p = NULL, final;
338 struct dst_entry *dst;
342 if (addr_len < SIN6_LEN_RFC2133)
345 if (usin->sin6_family != AF_INET6)
346 return(-EAFNOSUPPORT);
348 memset(&fl, 0, sizeof(fl));
351 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
352 IP6_ECN_flow_init(fl.fl6_flowlabel);
353 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
354 struct ip6_flowlabel *flowlabel;
355 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
356 if (flowlabel == NULL)
358 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
359 fl6_sock_release(flowlabel);
364 * connect() to INADDR_ANY means loopback (BSD'ism).
367 if(ipv6_addr_any(&usin->sin6_addr))
368 usin->sin6_addr.s6_addr[15] = 0x1;
370 addr_type = ipv6_addr_type(&usin->sin6_addr);
372 if(addr_type & IPV6_ADDR_MULTICAST)
375 if (addr_type&IPV6_ADDR_LINKLOCAL) {
376 if (addr_len >= sizeof(struct sockaddr_in6) &&
377 usin->sin6_scope_id) {
378 /* If interface is set while binding, indices
381 if (sk->sk_bound_dev_if &&
382 sk->sk_bound_dev_if != usin->sin6_scope_id)
385 sk->sk_bound_dev_if = usin->sin6_scope_id;
388 /* Connect to link-local address requires an interface */
389 if (!sk->sk_bound_dev_if)
393 if (tp->rx_opt.ts_recent_stamp &&
394 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
395 tp->rx_opt.ts_recent = 0;
396 tp->rx_opt.ts_recent_stamp = 0;
400 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
401 np->flow_label = fl.fl6_flowlabel;
407 if (addr_type == IPV6_ADDR_MAPPED) {
408 u32 exthdrlen = tp->ext_header_len;
409 struct sockaddr_in sin;
411 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
413 if (__ipv6_only_sock(sk))
416 sin.sin_family = AF_INET;
417 sin.sin_port = usin->sin6_port;
418 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
420 tp->af_specific = &ipv6_mapped;
421 sk->sk_backlog_rcv = tcp_v4_do_rcv;
423 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
426 tp->ext_header_len = exthdrlen;
427 tp->af_specific = &ipv6_specific;
428 sk->sk_backlog_rcv = tcp_v6_do_rcv;
431 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
433 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
440 if (!ipv6_addr_any(&np->rcv_saddr))
441 saddr = &np->rcv_saddr;
443 fl.proto = IPPROTO_TCP;
444 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
445 ipv6_addr_copy(&fl.fl6_src,
446 (saddr ? saddr : &np->saddr));
447 fl.oif = sk->sk_bound_dev_if;
448 fl.fl_ip_dport = usin->sin6_port;
449 fl.fl_ip_sport = inet->sport;
451 if (np->opt && np->opt->srcrt) {
452 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
453 ipv6_addr_copy(&final, &fl.fl6_dst);
454 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
458 err = ip6_dst_lookup(sk, &dst, &fl);
462 ipv6_addr_copy(&fl.fl6_dst, final_p);
464 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
469 ipv6_addr_copy(&np->rcv_saddr, saddr);
472 /* set the source address */
473 ipv6_addr_copy(&np->saddr, saddr);
474 inet->rcv_saddr = LOOPBACK4_IPV6;
476 ip6_dst_store(sk, dst, NULL);
477 sk->sk_route_caps = dst->dev->features &
478 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
480 tp->ext_header_len = 0;
482 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
484 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
486 inet->dport = usin->sin6_port;
488 tcp_set_state(sk, TCP_SYN_SENT);
489 err = tcp_v6_hash_connect(sk);
494 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
499 err = tcp_connect(sk);
506 tcp_set_state(sk, TCP_CLOSE);
510 sk->sk_route_caps = 0;
514 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
515 int type, int code, int offset, __u32 info)
517 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
518 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
519 struct ipv6_pinfo *np;
525 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
526 th->source, skb->dev->ifindex);
529 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
533 if (sk->sk_state == TCP_TIME_WAIT) {
534 inet_twsk_put((struct inet_timewait_sock *)sk);
539 if (sock_owned_by_user(sk))
540 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
542 if (sk->sk_state == TCP_CLOSE)
546 seq = ntohl(th->seq);
547 if (sk->sk_state != TCP_LISTEN &&
548 !between(seq, tp->snd_una, tp->snd_nxt)) {
549 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
555 if (type == ICMPV6_PKT_TOOBIG) {
556 struct dst_entry *dst = NULL;
558 if (sock_owned_by_user(sk))
560 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
563 /* icmp should have updated the destination cache entry */
564 dst = __sk_dst_check(sk, np->dst_cookie);
567 struct inet_sock *inet = inet_sk(sk);
570 /* BUGGG_FUTURE: Again, it is not clear how
571 to handle rthdr case. Ignore this complexity
574 memset(&fl, 0, sizeof(fl));
575 fl.proto = IPPROTO_TCP;
576 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
577 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
578 fl.oif = sk->sk_bound_dev_if;
579 fl.fl_ip_dport = inet->dport;
580 fl.fl_ip_sport = inet->sport;
582 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
583 sk->sk_err_soft = -err;
587 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
588 sk->sk_err_soft = -err;
595 if (tp->pmtu_cookie > dst_mtu(dst)) {
596 tcp_sync_mss(sk, dst_mtu(dst));
597 tcp_simple_retransmit(sk);
598 } /* else let the usual retransmit timer handle it */
603 icmpv6_err_convert(type, code, &err);
605 /* Might be for an request_sock */
606 switch (sk->sk_state) {
607 struct request_sock *req, **prev;
609 if (sock_owned_by_user(sk))
612 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
613 &hdr->saddr, inet6_iif(skb));
617 /* ICMPs are not backlogged, hence we cannot get
618 * an established socket here.
620 BUG_TRAP(req->sk == NULL);
622 if (seq != tcp_rsk(req)->snt_isn) {
623 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
627 inet_csk_reqsk_queue_drop(sk, req, prev);
631 case TCP_SYN_RECV: /* Cannot happen.
632 It can, it SYNs are crossed. --ANK */
633 if (!sock_owned_by_user(sk)) {
634 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
636 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
640 sk->sk_err_soft = err;
644 if (!sock_owned_by_user(sk) && np->recverr) {
646 sk->sk_error_report(sk);
648 sk->sk_err_soft = err;
656 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
657 struct dst_entry *dst)
659 struct inet6_request_sock *treq = inet6_rsk(req);
660 struct ipv6_pinfo *np = inet6_sk(sk);
661 struct sk_buff * skb;
662 struct ipv6_txoptions *opt = NULL;
663 struct in6_addr * final_p = NULL, final;
667 memset(&fl, 0, sizeof(fl));
668 fl.proto = IPPROTO_TCP;
669 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
670 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
671 fl.fl6_flowlabel = 0;
673 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
674 fl.fl_ip_sport = inet_sk(sk)->sport;
679 np->rxopt.bits.osrcrt == 2 &&
681 struct sk_buff *pktopts = treq->pktopts;
682 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
684 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
687 if (opt && opt->srcrt) {
688 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
689 ipv6_addr_copy(&final, &fl.fl6_dst);
690 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
694 err = ip6_dst_lookup(sk, &dst, &fl);
698 ipv6_addr_copy(&fl.fl6_dst, final_p);
699 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
703 skb = tcp_make_synack(sk, dst, req);
705 struct tcphdr *th = skb->h.th;
707 th->check = tcp_v6_check(th, skb->len,
708 &treq->loc_addr, &treq->rmt_addr,
709 csum_partial((char *)th, skb->len, skb->csum));
711 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
712 err = ip6_xmit(sk, skb, &fl, opt, 0);
713 if (err == NET_XMIT_CN)
718 if (opt && opt != np->opt)
719 sock_kfree_s(sk, opt, opt->tot_len);
723 static void tcp_v6_reqsk_destructor(struct request_sock *req)
725 if (inet6_rsk(req)->pktopts)
726 kfree_skb(inet6_rsk(req)->pktopts);
729 static struct request_sock_ops tcp6_request_sock_ops = {
731 .obj_size = sizeof(struct tcp6_request_sock),
732 .rtx_syn_ack = tcp_v6_send_synack,
733 .send_ack = tcp_v6_reqsk_send_ack,
734 .destructor = tcp_v6_reqsk_destructor,
735 .send_reset = tcp_v6_send_reset
738 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
740 struct ipv6_pinfo *np = inet6_sk(sk);
741 struct inet6_skb_parm *opt = IP6CB(skb);
744 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
745 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
746 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
747 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
754 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
757 struct ipv6_pinfo *np = inet6_sk(sk);
759 if (skb->ip_summed == CHECKSUM_HW) {
760 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
761 skb->csum = offsetof(struct tcphdr, check);
763 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
764 csum_partial((char *)th, th->doff<<2,
770 static void tcp_v6_send_reset(struct sk_buff *skb)
772 struct tcphdr *th = skb->h.th, *t1;
773 struct sk_buff *buff;
779 if (!ipv6_unicast_destination(skb))
783 * We need to grab some memory, and put together an RST,
784 * and then put it into the queue to be sent.
787 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
792 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
794 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
796 /* Swap the send and the receive. */
797 memset(t1, 0, sizeof(*t1));
798 t1->dest = th->source;
799 t1->source = th->dest;
800 t1->doff = sizeof(*t1)/4;
804 t1->seq = th->ack_seq;
807 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
808 + skb->len - (th->doff<<2));
811 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
813 memset(&fl, 0, sizeof(fl));
814 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
815 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
817 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
818 sizeof(*t1), IPPROTO_TCP,
821 fl.proto = IPPROTO_TCP;
822 fl.oif = inet6_iif(skb);
823 fl.fl_ip_dport = t1->dest;
824 fl.fl_ip_sport = t1->source;
826 /* sk = NULL, but it is safe for now. RST socket required. */
827 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
829 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
830 ip6_xmit(NULL, buff, &fl, NULL, 0);
831 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
832 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
840 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
842 struct tcphdr *th = skb->h.th, *t1;
843 struct sk_buff *buff;
845 int tot_len = sizeof(struct tcphdr);
850 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
855 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
857 t1 = (struct tcphdr *) skb_push(buff,tot_len);
859 /* Swap the send and the receive. */
860 memset(t1, 0, sizeof(*t1));
861 t1->dest = th->source;
862 t1->source = th->dest;
863 t1->doff = tot_len/4;
864 t1->seq = htonl(seq);
865 t1->ack_seq = htonl(ack);
867 t1->window = htons(win);
870 u32 *ptr = (u32*)(t1 + 1);
871 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
872 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
873 *ptr++ = htonl(tcp_time_stamp);
877 buff->csum = csum_partial((char *)t1, tot_len, 0);
879 memset(&fl, 0, sizeof(fl));
880 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
881 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
883 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
884 tot_len, IPPROTO_TCP,
887 fl.proto = IPPROTO_TCP;
888 fl.oif = inet6_iif(skb);
889 fl.fl_ip_dport = t1->dest;
890 fl.fl_ip_sport = t1->source;
892 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
893 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
894 ip6_xmit(NULL, buff, &fl, NULL, 0);
895 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
903 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
905 struct inet_timewait_sock *tw = inet_twsk(sk);
906 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
908 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
909 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
910 tcptw->tw_ts_recent);
915 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
917 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
921 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
923 struct request_sock *req, **prev;
924 const struct tcphdr *th = skb->h.th;
927 /* Find possible connection requests. */
928 req = inet6_csk_search_req(sk, &prev, th->source,
929 &skb->nh.ipv6h->saddr,
930 &skb->nh.ipv6h->daddr, inet6_iif(skb));
932 return tcp_check_req(sk, skb, req, prev);
934 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
935 th->source, &skb->nh.ipv6h->daddr,
936 ntohs(th->dest), inet6_iif(skb));
939 if (nsk->sk_state != TCP_TIME_WAIT) {
943 inet_twsk_put((struct inet_timewait_sock *)nsk);
947 #if 0 /*def CONFIG_SYN_COOKIES*/
948 if (!th->rst && !th->syn && th->ack)
949 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
954 /* FIXME: this is substantially similar to the ipv4 code.
955 * Can some kind of merge be done? -- erics
957 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
959 struct inet6_request_sock *treq;
960 struct ipv6_pinfo *np = inet6_sk(sk);
961 struct tcp_options_received tmp_opt;
962 struct tcp_sock *tp = tcp_sk(sk);
963 struct request_sock *req = NULL;
964 __u32 isn = TCP_SKB_CB(skb)->when;
966 if (skb->protocol == htons(ETH_P_IP))
967 return tcp_v4_conn_request(sk, skb);
969 if (!ipv6_unicast_destination(skb))
973 * There are no SYN attacks on IPv6, yet...
975 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
977 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
981 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
984 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
988 tcp_clear_options(&tmp_opt);
989 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
990 tmp_opt.user_mss = tp->rx_opt.user_mss;
992 tcp_parse_options(skb, &tmp_opt, 0);
994 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
995 tcp_openreq_init(req, &tmp_opt, skb);
997 treq = inet6_rsk(req);
998 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
999 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1000 TCP_ECN_create_request(req, skb->h.th);
1001 treq->pktopts = NULL;
1002 if (ipv6_opt_accepted(sk, skb) ||
1003 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1004 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1005 atomic_inc(&skb->users);
1006 treq->pktopts = skb;
1008 treq->iif = sk->sk_bound_dev_if;
1010 /* So that link locals have meaning */
1011 if (!sk->sk_bound_dev_if &&
1012 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1013 treq->iif = inet6_iif(skb);
1016 isn = tcp_v6_init_sequence(sk,skb);
1018 tcp_rsk(req)->snt_isn = isn;
1020 if (tcp_v6_send_synack(sk, req, NULL))
1023 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1030 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1031 return 0; /* don't send reset */
1034 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1035 struct request_sock *req,
1036 struct dst_entry *dst)
1038 struct inet6_request_sock *treq = inet6_rsk(req);
1039 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1040 struct tcp6_sock *newtcp6sk;
1041 struct inet_sock *newinet;
1042 struct tcp_sock *newtp;
1044 struct ipv6_txoptions *opt;
1046 if (skb->protocol == htons(ETH_P_IP)) {
1051 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1056 newtcp6sk = (struct tcp6_sock *)newsk;
1057 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1059 newinet = inet_sk(newsk);
1060 newnp = inet6_sk(newsk);
1061 newtp = tcp_sk(newsk);
1063 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1065 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1068 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1071 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1073 newtp->af_specific = &ipv6_mapped;
1074 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1075 newnp->pktoptions = NULL;
1077 newnp->mcast_oif = inet6_iif(skb);
1078 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1081 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1082 * here, tcp_create_openreq_child now does this for us, see the comment in
1083 * that function for the gory details. -acme
1086 /* It is tricky place. Until this moment IPv4 tcp
1087 worked with IPv6 af_tcp.af_specific.
1090 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1097 if (sk_acceptq_is_full(sk))
1100 if (np->rxopt.bits.osrcrt == 2 &&
1101 opt == NULL && treq->pktopts) {
1102 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1104 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1108 struct in6_addr *final_p = NULL, final;
1111 memset(&fl, 0, sizeof(fl));
1112 fl.proto = IPPROTO_TCP;
1113 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1114 if (opt && opt->srcrt) {
1115 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1116 ipv6_addr_copy(&final, &fl.fl6_dst);
1117 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1120 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1121 fl.oif = sk->sk_bound_dev_if;
1122 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1123 fl.fl_ip_sport = inet_sk(sk)->sport;
1125 if (ip6_dst_lookup(sk, &dst, &fl))
1129 ipv6_addr_copy(&fl.fl6_dst, final_p);
1131 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1135 newsk = tcp_create_openreq_child(sk, req, skb);
1140 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1141 * count here, tcp_create_openreq_child now does this for us, see the
1142 * comment in that function for the gory details. -acme
1145 ip6_dst_store(newsk, dst, NULL);
1146 newsk->sk_route_caps = dst->dev->features &
1147 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1149 newtcp6sk = (struct tcp6_sock *)newsk;
1150 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1152 newtp = tcp_sk(newsk);
1153 newinet = inet_sk(newsk);
1154 newnp = inet6_sk(newsk);
1156 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1158 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1159 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1160 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1161 newsk->sk_bound_dev_if = treq->iif;
1163 /* Now IPv6 options...
1165 First: no IPv4 options.
1167 newinet->opt = NULL;
1170 newnp->rxopt.all = np->rxopt.all;
1172 /* Clone pktoptions received with SYN */
1173 newnp->pktoptions = NULL;
1174 if (treq->pktopts != NULL) {
1175 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1176 kfree_skb(treq->pktopts);
1177 treq->pktopts = NULL;
1178 if (newnp->pktoptions)
1179 skb_set_owner_r(newnp->pktoptions, newsk);
1182 newnp->mcast_oif = inet6_iif(skb);
1183 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1185 /* Clone native IPv6 options from listening socket (if any)
1187 Yes, keeping reference count would be much more clever,
1188 but we make one more one thing there: reattach optmem
1192 newnp->opt = ipv6_dup_options(newsk, opt);
1194 sock_kfree_s(sk, opt, opt->tot_len);
1197 newtp->ext_header_len = 0;
1199 newtp->ext_header_len = newnp->opt->opt_nflen +
1200 newnp->opt->opt_flen;
1202 tcp_sync_mss(newsk, dst_mtu(dst));
1203 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1204 tcp_initialize_rcv_mss(newsk);
1206 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1208 __inet6_hash(&tcp_hashinfo, newsk);
1209 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1214 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1216 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1217 if (opt && opt != np->opt)
1218 sock_kfree_s(sk, opt, opt->tot_len);
1223 static int tcp_v6_checksum_init(struct sk_buff *skb)
1225 if (skb->ip_summed == CHECKSUM_HW) {
1226 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1227 &skb->nh.ipv6h->daddr,skb->csum)) {
1228 skb->ip_summed = CHECKSUM_UNNECESSARY;
1233 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1234 &skb->nh.ipv6h->daddr, 0);
1236 if (skb->len <= 76) {
1237 return __skb_checksum_complete(skb);
1242 /* The socket must have it's spinlock held when we get
1245 * We have a potential double-lock case here, so even when
1246 * doing backlog processing we use the BH locking scheme.
1247 * This is because we cannot sleep with the original spinlock
1250 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1252 struct ipv6_pinfo *np = inet6_sk(sk);
1253 struct tcp_sock *tp;
1254 struct sk_buff *opt_skb = NULL;
1256 /* Imagine: socket is IPv6. IPv4 packet arrives,
1257 goes to IPv4 receive handler and backlogged.
1258 From backlog it always goes here. Kerboom...
1259 Fortunately, tcp_rcv_established and rcv_established
1260 handle them correctly, but it is not case with
1261 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1264 if (skb->protocol == htons(ETH_P_IP))
1265 return tcp_v4_do_rcv(sk, skb);
1267 if (sk_filter(sk, skb, 0))
1271 * socket locking is here for SMP purposes as backlog rcv
1272 * is currently called with bh processing disabled.
1275 /* Do Stevens' IPV6_PKTOPTIONS.
1277 Yes, guys, it is the only place in our code, where we
1278 may make it not affecting IPv4.
1279 The rest of code is protocol independent,
1280 and I do not like idea to uglify IPv4.
1282 Actually, all the idea behind IPV6_PKTOPTIONS
1283 looks not very well thought. For now we latch
1284 options, received in the last packet, enqueued
1285 by tcp. Feel free to propose better solution.
1289 opt_skb = skb_clone(skb, GFP_ATOMIC);
1291 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1292 TCP_CHECK_TIMER(sk);
1293 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1295 TCP_CHECK_TIMER(sk);
1297 goto ipv6_pktoptions;
1301 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1304 if (sk->sk_state == TCP_LISTEN) {
1305 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1310 * Queue it on the new socket if the new socket is active,
1311 * otherwise we just shortcircuit this and continue with
1315 if (tcp_child_process(sk, nsk, skb))
1318 __kfree_skb(opt_skb);
1323 TCP_CHECK_TIMER(sk);
1324 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1326 TCP_CHECK_TIMER(sk);
1328 goto ipv6_pktoptions;
1332 tcp_v6_send_reset(skb);
1335 __kfree_skb(opt_skb);
1339 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1344 /* Do you ask, what is it?
1346 1. skb was enqueued by tcp.
1347 2. skb is added to tail of read queue, rather than out of order.
1348 3. socket is not in passive state.
1349 4. Finally, it really contains options, which user wants to receive.
1352 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1353 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1354 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1355 np->mcast_oif = inet6_iif(opt_skb);
1356 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1357 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1358 if (ipv6_opt_accepted(sk, opt_skb)) {
1359 skb_set_owner_r(opt_skb, sk);
1360 opt_skb = xchg(&np->pktoptions, opt_skb);
1362 __kfree_skb(opt_skb);
1363 opt_skb = xchg(&np->pktoptions, NULL);
1372 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1374 struct sk_buff *skb = *pskb;
1379 if (skb->pkt_type != PACKET_HOST)
1383 * Count it even if it's bad.
1385 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1387 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1392 if (th->doff < sizeof(struct tcphdr)/4)
1394 if (!pskb_may_pull(skb, th->doff*4))
1397 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1398 tcp_v6_checksum_init(skb)))
1402 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1403 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1404 skb->len - th->doff*4);
1405 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1406 TCP_SKB_CB(skb)->when = 0;
1407 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1408 TCP_SKB_CB(skb)->sacked = 0;
1410 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1411 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1418 if (sk->sk_state == TCP_TIME_WAIT)
1421 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1422 goto discard_and_relse;
1424 if (sk_filter(sk, skb, 0))
1425 goto discard_and_relse;
1431 if (!sock_owned_by_user(sk)) {
1432 if (!tcp_prequeue(sk, skb))
1433 ret = tcp_v6_do_rcv(sk, skb);
1435 sk_add_backlog(sk, skb);
1439 return ret ? -1 : 0;
1442 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1445 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1447 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1449 tcp_v6_send_reset(skb);
1466 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1467 inet_twsk_put((struct inet_timewait_sock *)sk);
1471 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1472 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1473 inet_twsk_put((struct inet_timewait_sock *)sk);
1477 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1483 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1484 &skb->nh.ipv6h->daddr,
1485 ntohs(th->dest), inet6_iif(skb));
1487 struct inet_timewait_sock *tw = inet_twsk(sk);
1488 inet_twsk_deschedule(tw, &tcp_death_row);
1493 /* Fall through to ACK */
1496 tcp_v6_timewait_ack(sk, skb);
1500 case TCP_TW_SUCCESS:;
1505 static int tcp_v6_rebuild_header(struct sock *sk)
1508 struct dst_entry *dst;
1509 struct ipv6_pinfo *np = inet6_sk(sk);
1511 dst = __sk_dst_check(sk, np->dst_cookie);
1514 struct inet_sock *inet = inet_sk(sk);
1515 struct in6_addr *final_p = NULL, final;
1518 memset(&fl, 0, sizeof(fl));
1519 fl.proto = IPPROTO_TCP;
1520 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1521 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1522 fl.fl6_flowlabel = np->flow_label;
1523 fl.oif = sk->sk_bound_dev_if;
1524 fl.fl_ip_dport = inet->dport;
1525 fl.fl_ip_sport = inet->sport;
1527 if (np->opt && np->opt->srcrt) {
1528 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1529 ipv6_addr_copy(&final, &fl.fl6_dst);
1530 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1534 err = ip6_dst_lookup(sk, &dst, &fl);
1536 sk->sk_route_caps = 0;
1540 ipv6_addr_copy(&fl.fl6_dst, final_p);
1542 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1543 sk->sk_err_soft = -err;
1547 ip6_dst_store(sk, dst, NULL);
1548 sk->sk_route_caps = dst->dev->features &
1549 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1555 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1557 struct sock *sk = skb->sk;
1558 struct inet_sock *inet = inet_sk(sk);
1559 struct ipv6_pinfo *np = inet6_sk(sk);
1561 struct dst_entry *dst;
1562 struct in6_addr *final_p = NULL, final;
1564 memset(&fl, 0, sizeof(fl));
1565 fl.proto = IPPROTO_TCP;
1566 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1567 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1568 fl.fl6_flowlabel = np->flow_label;
1569 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1570 fl.oif = sk->sk_bound_dev_if;
1571 fl.fl_ip_sport = inet->sport;
1572 fl.fl_ip_dport = inet->dport;
1574 if (np->opt && np->opt->srcrt) {
1575 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1576 ipv6_addr_copy(&final, &fl.fl6_dst);
1577 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1581 dst = __sk_dst_check(sk, np->dst_cookie);
1584 int err = ip6_dst_lookup(sk, &dst, &fl);
1587 sk->sk_err_soft = -err;
1592 ipv6_addr_copy(&fl.fl6_dst, final_p);
1594 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1595 sk->sk_route_caps = 0;
1599 ip6_dst_store(sk, dst, NULL);
1600 sk->sk_route_caps = dst->dev->features &
1601 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1604 skb->dst = dst_clone(dst);
1606 /* Restore final destination back after routing done */
1607 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1609 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1612 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1614 struct ipv6_pinfo *np = inet6_sk(sk);
1615 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1617 sin6->sin6_family = AF_INET6;
1618 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1619 sin6->sin6_port = inet_sk(sk)->dport;
1620 /* We do not store received flowlabel for TCP */
1621 sin6->sin6_flowinfo = 0;
1622 sin6->sin6_scope_id = 0;
1623 if (sk->sk_bound_dev_if &&
1624 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1625 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1628 static int tcp_v6_remember_stamp(struct sock *sk)
1630 /* Alas, not yet... */
1634 static struct tcp_func ipv6_specific = {
1635 .queue_xmit = tcp_v6_xmit,
1636 .send_check = tcp_v6_send_check,
1637 .rebuild_header = tcp_v6_rebuild_header,
1638 .conn_request = tcp_v6_conn_request,
1639 .syn_recv_sock = tcp_v6_syn_recv_sock,
1640 .remember_stamp = tcp_v6_remember_stamp,
1641 .net_header_len = sizeof(struct ipv6hdr),
1643 .setsockopt = ipv6_setsockopt,
1644 .getsockopt = ipv6_getsockopt,
1645 .addr2sockaddr = v6_addr2sockaddr,
1646 .sockaddr_len = sizeof(struct sockaddr_in6)
1650 * TCP over IPv4 via INET6 API
1653 static struct tcp_func ipv6_mapped = {
1654 .queue_xmit = ip_queue_xmit,
1655 .send_check = tcp_v4_send_check,
1656 .rebuild_header = inet_sk_rebuild_header,
1657 .conn_request = tcp_v6_conn_request,
1658 .syn_recv_sock = tcp_v6_syn_recv_sock,
1659 .remember_stamp = tcp_v4_remember_stamp,
1660 .net_header_len = sizeof(struct iphdr),
1662 .setsockopt = ipv6_setsockopt,
1663 .getsockopt = ipv6_getsockopt,
1664 .addr2sockaddr = v6_addr2sockaddr,
1665 .sockaddr_len = sizeof(struct sockaddr_in6)
1670 /* NOTE: A lot of things set to zero explicitly by call to
1671 * sk_alloc() so need not be done here.
1673 static int tcp_v6_init_sock(struct sock *sk)
1675 struct inet_connection_sock *icsk = inet_csk(sk);
1676 struct tcp_sock *tp = tcp_sk(sk);
1678 skb_queue_head_init(&tp->out_of_order_queue);
1679 tcp_init_xmit_timers(sk);
1680 tcp_prequeue_init(tp);
1682 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1683 tp->mdev = TCP_TIMEOUT_INIT;
1685 /* So many TCP implementations out there (incorrectly) count the
1686 * initial SYN frame in their delayed-ACK and congestion control
1687 * algorithms that we must have the following bandaid to talk
1688 * efficiently to them. -DaveM
1692 /* See draft-stevens-tcpca-spec-01 for discussion of the
1693 * initialization of these values.
1695 tp->snd_ssthresh = 0x7fffffff;
1696 tp->snd_cwnd_clamp = ~0;
1697 tp->mss_cache = 536;
1699 tp->reordering = sysctl_tcp_reordering;
1701 sk->sk_state = TCP_CLOSE;
1703 tp->af_specific = &ipv6_specific;
1704 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1705 sk->sk_write_space = sk_stream_write_space;
1706 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1708 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1709 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1711 atomic_inc(&tcp_sockets_allocated);
1716 static int tcp_v6_destroy_sock(struct sock *sk)
1718 tcp_v4_destroy_sock(sk);
1719 return inet6_destroy_sock(sk);
1722 /* Proc filesystem TCPv6 sock list dumping. */
1723 static void get_openreq6(struct seq_file *seq,
1724 struct sock *sk, struct request_sock *req, int i, int uid)
1726 int ttd = req->expires - jiffies;
1727 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1728 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1734 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1735 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1737 src->s6_addr32[0], src->s6_addr32[1],
1738 src->s6_addr32[2], src->s6_addr32[3],
1739 ntohs(inet_sk(sk)->sport),
1740 dest->s6_addr32[0], dest->s6_addr32[1],
1741 dest->s6_addr32[2], dest->s6_addr32[3],
1742 ntohs(inet_rsk(req)->rmt_port),
1744 0,0, /* could print option size, but that is af dependent. */
1745 1, /* timers active (only the expire timer) */
1746 jiffies_to_clock_t(ttd),
1749 0, /* non standard timer */
1750 0, /* open_requests have no inode */
1754 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1756 struct in6_addr *dest, *src;
1759 unsigned long timer_expires;
1760 struct inet_sock *inet = inet_sk(sp);
1761 struct tcp_sock *tp = tcp_sk(sp);
1762 const struct inet_connection_sock *icsk = inet_csk(sp);
1763 struct ipv6_pinfo *np = inet6_sk(sp);
1766 src = &np->rcv_saddr;
1767 destp = ntohs(inet->dport);
1768 srcp = ntohs(inet->sport);
1770 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1772 timer_expires = icsk->icsk_timeout;
1773 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1775 timer_expires = icsk->icsk_timeout;
1776 } else if (timer_pending(&sp->sk_timer)) {
1778 timer_expires = sp->sk_timer.expires;
1781 timer_expires = jiffies;
1785 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1786 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1788 src->s6_addr32[0], src->s6_addr32[1],
1789 src->s6_addr32[2], src->s6_addr32[3], srcp,
1790 dest->s6_addr32[0], dest->s6_addr32[1],
1791 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1793 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1795 jiffies_to_clock_t(timer_expires - jiffies),
1796 icsk->icsk_retransmits,
1798 icsk->icsk_probes_out,
1800 atomic_read(&sp->sk_refcnt), sp,
1803 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1804 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1808 static void get_timewait6_sock(struct seq_file *seq,
1809 struct inet_timewait_sock *tw, int i)
1811 struct in6_addr *dest, *src;
1813 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
1814 int ttd = tw->tw_ttd - jiffies;
1819 dest = &tcp6tw->tw_v6_daddr;
1820 src = &tcp6tw->tw_v6_rcv_saddr;
1821 destp = ntohs(tw->tw_dport);
1822 srcp = ntohs(tw->tw_sport);
1825 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1826 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1828 src->s6_addr32[0], src->s6_addr32[1],
1829 src->s6_addr32[2], src->s6_addr32[3], srcp,
1830 dest->s6_addr32[0], dest->s6_addr32[1],
1831 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1832 tw->tw_substate, 0, 0,
1833 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1834 atomic_read(&tw->tw_refcnt), tw);
1837 #ifdef CONFIG_PROC_FS
1838 static int tcp6_seq_show(struct seq_file *seq, void *v)
1840 struct tcp_iter_state *st;
1842 if (v == SEQ_START_TOKEN) {
1847 "st tx_queue rx_queue tr tm->when retrnsmt"
1848 " uid timeout inode\n");
1853 switch (st->state) {
1854 case TCP_SEQ_STATE_LISTENING:
1855 case TCP_SEQ_STATE_ESTABLISHED:
1856 get_tcp6_sock(seq, v, st->num);
1858 case TCP_SEQ_STATE_OPENREQ:
1859 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1861 case TCP_SEQ_STATE_TIME_WAIT:
1862 get_timewait6_sock(seq, v, st->num);
1869 static struct file_operations tcp6_seq_fops;
1870 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1871 .owner = THIS_MODULE,
1874 .seq_show = tcp6_seq_show,
1875 .seq_fops = &tcp6_seq_fops,
1878 int __init tcp6_proc_init(void)
1880 return tcp_proc_register(&tcp6_seq_afinfo);
1883 void tcp6_proc_exit(void)
1885 tcp_proc_unregister(&tcp6_seq_afinfo);
1889 struct proto tcpv6_prot = {
1891 .owner = THIS_MODULE,
1893 .connect = tcp_v6_connect,
1894 .disconnect = tcp_disconnect,
1895 .accept = inet_csk_accept,
1897 .init = tcp_v6_init_sock,
1898 .destroy = tcp_v6_destroy_sock,
1899 .shutdown = tcp_shutdown,
1900 .setsockopt = tcp_setsockopt,
1901 .getsockopt = tcp_getsockopt,
1902 .sendmsg = tcp_sendmsg,
1903 .recvmsg = tcp_recvmsg,
1904 .backlog_rcv = tcp_v6_do_rcv,
1905 .hash = tcp_v6_hash,
1906 .unhash = tcp_unhash,
1907 .get_port = tcp_v6_get_port,
1908 .enter_memory_pressure = tcp_enter_memory_pressure,
1909 .sockets_allocated = &tcp_sockets_allocated,
1910 .memory_allocated = &tcp_memory_allocated,
1911 .memory_pressure = &tcp_memory_pressure,
1912 .orphan_count = &tcp_orphan_count,
1913 .sysctl_mem = sysctl_tcp_mem,
1914 .sysctl_wmem = sysctl_tcp_wmem,
1915 .sysctl_rmem = sysctl_tcp_rmem,
1916 .max_header = MAX_TCP_HEADER,
1917 .obj_size = sizeof(struct tcp6_sock),
1918 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1919 .rsk_prot = &tcp6_request_sock_ops,
1922 static struct inet6_protocol tcpv6_protocol = {
1923 .handler = tcp_v6_rcv,
1924 .err_handler = tcp_v6_err,
1925 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1928 static struct inet_protosw tcpv6_protosw = {
1929 .type = SOCK_STREAM,
1930 .protocol = IPPROTO_TCP,
1931 .prot = &tcpv6_prot,
1932 .ops = &inet6_stream_ops,
1935 .flags = INET_PROTOSW_PERMANENT,
1938 void __init tcpv6_init(void)
1940 /* register inet6 protocol */
1941 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
1942 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
1943 inet6_register_protosw(&tcpv6_protosw);