2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
62 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
64 struct dst_entry *dst = skb_dst(skb);
65 struct net_device *dev = dst->dev;
66 struct neighbour *neigh;
67 struct in6_addr *nexthop;
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74 ((mroute6_is_socket(net, skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 net, sk, newskb, NULL, newskb->dev,
88 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(net, idev,
90 IPSTATS_MIB_OUTDISCARDS);
96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
109 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
119 sock_confirm_neigh(skb, neigh);
120 ret = neigh_output(neigh, skb);
121 rcu_read_unlock_bh();
124 rcu_read_unlock_bh();
126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
131 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
135 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
141 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb)->xfrm) {
144 IPCB(skb)->flags |= IPSKB_REROUTED;
145 return dst_output(net, sk, skb);
149 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
150 dst_allfrag(skb_dst(skb)) ||
151 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
152 return ip6_fragment(net, sk, skb, ip6_finish_output2);
154 return ip6_finish_output2(net, sk, skb);
157 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
159 struct net_device *dev = skb_dst(skb)->dev;
160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
162 skb->protocol = htons(ETH_P_IPV6);
165 if (unlikely(idev->cnf.disable_ipv6)) {
166 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
171 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 net, sk, skb, NULL, dev,
174 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
177 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
179 if (!np->autoflowlabel_set)
180 return ip6_default_np_autolabel(net);
182 return np->autoflowlabel;
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
191 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
192 __u32 mark, struct ipv6_txoptions *opt, int tclass)
194 struct net *net = sock_net(sk);
195 const struct ipv6_pinfo *np = inet6_sk(sk);
196 struct in6_addr *first_hop = &fl6->daddr;
197 struct dst_entry *dst = skb_dst(skb);
199 u8 proto = fl6->flowi6_proto;
200 int seg_len = skb->len;
205 unsigned int head_room;
207 /* First: exthdrs may take lots of space (~8K for now)
208 MAX_HEADER is not enough.
210 head_room = opt->opt_nflen + opt->opt_flen;
211 seg_len += head_room;
212 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
214 if (skb_headroom(skb) < head_room) {
215 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
217 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
218 IPSTATS_MIB_OUTDISCARDS);
224 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
225 * it is safe to call in our context (socket lock not held)
227 skb_set_owner_w(skb, (struct sock *)sk);
230 ipv6_push_frag_opts(skb, opt, &proto);
232 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
236 skb_push(skb, sizeof(struct ipv6hdr));
237 skb_reset_network_header(skb);
241 * Fill in the IPv6 header
244 hlimit = np->hop_limit;
246 hlimit = ip6_dst_hoplimit(dst);
248 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
249 ip6_autoflowlabel(net, np), fl6));
251 hdr->payload_len = htons(seg_len);
252 hdr->nexthdr = proto;
253 hdr->hop_limit = hlimit;
255 hdr->saddr = fl6->saddr;
256 hdr->daddr = *first_hop;
258 skb->protocol = htons(ETH_P_IPV6);
259 skb->priority = sk->sk_priority;
263 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
264 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
265 IPSTATS_MIB_OUT, skb->len);
267 /* if egress device is enslaved to an L3 master device pass the
268 * skb to its handler for processing
270 skb = l3mdev_ip6_out((struct sock *)sk, skb);
274 /* hooks should never assume socket lock is held.
275 * we promote our socket to non const
277 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
278 net, (struct sock *)sk, skb, NULL, dst->dev,
283 /* ipv6_local_error() does not require socket lock,
284 * we promote our socket to non const
286 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
288 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
292 EXPORT_SYMBOL(ip6_xmit);
294 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
296 struct ip6_ra_chain *ra;
297 struct sock *last = NULL;
299 read_lock(&ip6_ra_lock);
300 for (ra = ip6_ra_chain; ra; ra = ra->next) {
301 struct sock *sk = ra->sk;
302 if (sk && ra->sel == sel &&
303 (!sk->sk_bound_dev_if ||
304 sk->sk_bound_dev_if == skb->dev->ifindex)) {
306 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
308 rawv6_rcv(last, skb2);
315 rawv6_rcv(last, skb);
316 read_unlock(&ip6_ra_lock);
319 read_unlock(&ip6_ra_lock);
323 static int ip6_forward_proxy_check(struct sk_buff *skb)
325 struct ipv6hdr *hdr = ipv6_hdr(skb);
326 u8 nexthdr = hdr->nexthdr;
330 if (ipv6_ext_hdr(nexthdr)) {
331 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
335 offset = sizeof(struct ipv6hdr);
337 if (nexthdr == IPPROTO_ICMPV6) {
338 struct icmp6hdr *icmp6;
340 if (!pskb_may_pull(skb, (skb_network_header(skb) +
341 offset + 1 - skb->data)))
344 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
346 switch (icmp6->icmp6_type) {
347 case NDISC_ROUTER_SOLICITATION:
348 case NDISC_ROUTER_ADVERTISEMENT:
349 case NDISC_NEIGHBOUR_SOLICITATION:
350 case NDISC_NEIGHBOUR_ADVERTISEMENT:
352 /* For reaction involving unicast neighbor discovery
353 * message destined to the proxied address, pass it to
363 * The proxying router can't forward traffic sent to a link-local
364 * address, so signal the sender and discard the packet. This
365 * behavior is clarified by the MIPv6 specification.
367 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
368 dst_link_failure(skb);
375 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
378 return dst_output(net, sk, skb);
381 unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
384 struct inet6_dev *idev;
386 if (dst_metric_locked(dst, RTAX_MTU)) {
387 mtu = dst_metric_raw(dst, RTAX_MTU);
394 idev = __in6_dev_get(dst->dev);
396 mtu = idev->cnf.mtu6;
401 EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
403 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
408 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
409 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
415 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
421 int ip6_forward(struct sk_buff *skb)
423 struct dst_entry *dst = skb_dst(skb);
424 struct ipv6hdr *hdr = ipv6_hdr(skb);
425 struct inet6_skb_parm *opt = IP6CB(skb);
426 struct net *net = dev_net(dst->dev);
429 if (net->ipv6.devconf_all->forwarding == 0)
432 if (skb->pkt_type != PACKET_HOST)
435 if (unlikely(skb->sk))
438 if (skb_warn_if_lro(skb))
441 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
442 __IP6_INC_STATS(net, ip6_dst_idev(dst),
443 IPSTATS_MIB_INDISCARDS);
447 skb_forward_csum(skb);
450 * We DO NOT make any processing on
451 * RA packets, pushing them to user level AS IS
452 * without ane WARRANTY that application will be able
453 * to interpret them. The reason is that we
454 * cannot make anything clever here.
456 * We are not end-node, so that if packet contains
457 * AH/ESP, we cannot make anything.
458 * Defragmentation also would be mistake, RA packets
459 * cannot be fragmented, because there is no warranty
460 * that different fragments will go along one path. --ANK
462 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
463 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
468 * check and decrement ttl
470 if (hdr->hop_limit <= 1) {
471 /* Force OUTPUT device used as source address */
473 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
474 __IP6_INC_STATS(net, ip6_dst_idev(dst),
475 IPSTATS_MIB_INHDRERRORS);
481 /* XXX: idev->cnf.proxy_ndp? */
482 if (net->ipv6.devconf_all->proxy_ndp &&
483 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
484 int proxied = ip6_forward_proxy_check(skb);
486 return ip6_input(skb);
487 else if (proxied < 0) {
488 __IP6_INC_STATS(net, ip6_dst_idev(dst),
489 IPSTATS_MIB_INDISCARDS);
494 if (!xfrm6_route_forward(skb)) {
495 __IP6_INC_STATS(net, ip6_dst_idev(dst),
496 IPSTATS_MIB_INDISCARDS);
501 /* IPv6 specs say nothing about it, but it is clear that we cannot
502 send redirects to source routed frames.
503 We don't send redirects to frames decapsulated from IPsec.
505 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
506 struct in6_addr *target = NULL;
507 struct inet_peer *peer;
511 * incoming and outgoing devices are the same
515 rt = (struct rt6_info *) dst;
516 if (rt->rt6i_flags & RTF_GATEWAY)
517 target = &rt->rt6i_gateway;
519 target = &hdr->daddr;
521 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
523 /* Limit redirects both by destination (here)
524 and by source (inside ndisc_send_redirect)
526 if (inet_peer_xrlim_allow(peer, 1*HZ))
527 ndisc_send_redirect(skb, target);
531 int addrtype = ipv6_addr_type(&hdr->saddr);
533 /* This check is security critical. */
534 if (addrtype == IPV6_ADDR_ANY ||
535 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
537 if (addrtype & IPV6_ADDR_LINKLOCAL) {
538 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
539 ICMPV6_NOT_NEIGHBOUR, 0);
544 mtu = ip6_dst_mtu_forward(dst);
545 if (mtu < IPV6_MIN_MTU)
548 if (ip6_pkt_too_big(skb, mtu)) {
549 /* Again, force OUTPUT device used as source address */
551 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
552 __IP6_INC_STATS(net, ip6_dst_idev(dst),
553 IPSTATS_MIB_INTOOBIGERRORS);
554 __IP6_INC_STATS(net, ip6_dst_idev(dst),
555 IPSTATS_MIB_FRAGFAILS);
560 if (skb_cow(skb, dst->dev->hard_header_len)) {
561 __IP6_INC_STATS(net, ip6_dst_idev(dst),
562 IPSTATS_MIB_OUTDISCARDS);
568 /* Mangling hops number delayed to point after skb COW */
572 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
573 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
574 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
575 net, NULL, skb, skb->dev, dst->dev,
579 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
585 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
587 to->pkt_type = from->pkt_type;
588 to->priority = from->priority;
589 to->protocol = from->protocol;
591 skb_dst_set(to, dst_clone(skb_dst(from)));
593 to->mark = from->mark;
595 #ifdef CONFIG_NET_SCHED
596 to->tc_index = from->tc_index;
599 skb_copy_secmark(to, from);
602 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
603 int (*output)(struct net *, struct sock *, struct sk_buff *))
605 struct sk_buff *frag;
606 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
607 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
608 inet6_sk(skb->sk) : NULL;
609 struct ipv6hdr *tmp_hdr;
611 unsigned int mtu, hlen, left, len;
614 int ptr, offset = 0, err = 0;
615 u8 *prevhdr, nexthdr = 0;
617 err = ip6_find_1stfragopt(skb, &prevhdr);
623 mtu = ip6_skb_dst_mtu(skb);
625 /* We must not fragment if the socket is set to force MTU discovery
626 * or if the skb it not generated by a local socket.
628 if (unlikely(!skb->ignore_df && skb->len > mtu))
631 if (IP6CB(skb)->frag_max_size) {
632 if (IP6CB(skb)->frag_max_size > mtu)
635 /* don't send fragments larger than what we received */
636 mtu = IP6CB(skb)->frag_max_size;
637 if (mtu < IPV6_MIN_MTU)
641 if (np && np->frag_size < mtu) {
645 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
647 mtu -= hlen + sizeof(struct frag_hdr);
649 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
650 &ipv6_hdr(skb)->saddr);
652 if (skb->ip_summed == CHECKSUM_PARTIAL &&
653 (err = skb_checksum_help(skb)))
656 hroom = LL_RESERVED_SPACE(rt->dst.dev);
657 if (skb_has_frag_list(skb)) {
658 unsigned int first_len = skb_pagelen(skb);
659 struct sk_buff *frag2;
661 if (first_len - hlen > mtu ||
662 ((first_len - hlen) & 7) ||
664 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
667 skb_walk_frags(skb, frag) {
668 /* Correct geometry. */
669 if (frag->len > mtu ||
670 ((frag->len & 7) && frag->next) ||
671 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
672 goto slow_path_clean;
674 /* Partially cloned skb? */
675 if (skb_shared(frag))
676 goto slow_path_clean;
681 frag->destructor = sock_wfree;
683 skb->truesize -= frag->truesize;
690 *prevhdr = NEXTHDR_FRAGMENT;
691 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
696 frag = skb_shinfo(skb)->frag_list;
697 skb_frag_list_init(skb);
699 __skb_pull(skb, hlen);
700 fh = __skb_push(skb, sizeof(struct frag_hdr));
701 __skb_push(skb, hlen);
702 skb_reset_network_header(skb);
703 memcpy(skb_network_header(skb), tmp_hdr, hlen);
705 fh->nexthdr = nexthdr;
707 fh->frag_off = htons(IP6_MF);
708 fh->identification = frag_id;
710 first_len = skb_pagelen(skb);
711 skb->data_len = first_len - skb_headlen(skb);
712 skb->len = first_len;
713 ipv6_hdr(skb)->payload_len = htons(first_len -
714 sizeof(struct ipv6hdr));
717 /* Prepare header of the next frame,
718 * before previous one went down. */
720 frag->ip_summed = CHECKSUM_NONE;
721 skb_reset_transport_header(frag);
722 fh = __skb_push(frag, sizeof(struct frag_hdr));
723 __skb_push(frag, hlen);
724 skb_reset_network_header(frag);
725 memcpy(skb_network_header(frag), tmp_hdr,
727 offset += skb->len - hlen - sizeof(struct frag_hdr);
728 fh->nexthdr = nexthdr;
730 fh->frag_off = htons(offset);
732 fh->frag_off |= htons(IP6_MF);
733 fh->identification = frag_id;
734 ipv6_hdr(frag)->payload_len =
736 sizeof(struct ipv6hdr));
737 ip6_copy_metadata(frag, skb);
740 err = output(net, sk, skb);
742 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
743 IPSTATS_MIB_FRAGCREATES);
756 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
757 IPSTATS_MIB_FRAGOKS);
761 kfree_skb_list(frag);
763 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
764 IPSTATS_MIB_FRAGFAILS);
768 skb_walk_frags(skb, frag2) {
772 frag2->destructor = NULL;
773 skb->truesize += frag2->truesize;
778 left = skb->len - hlen; /* Space per frame */
779 ptr = hlen; /* Where to start from */
782 * Fragment the datagram.
785 troom = rt->dst.dev->needed_tailroom;
788 * Keep copying data until we run out.
791 u8 *fragnexthdr_offset;
794 /* IF: it doesn't fit, use 'mtu' - the data space left */
797 /* IF: we are not sending up to and including the packet end
798 then align the next start on an eight byte boundary */
803 /* Allocate buffer */
804 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
805 hroom + troom, GFP_ATOMIC);
812 * Set up data on packet
815 ip6_copy_metadata(frag, skb);
816 skb_reserve(frag, hroom);
817 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
818 skb_reset_network_header(frag);
819 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
820 frag->transport_header = (frag->network_header + hlen +
821 sizeof(struct frag_hdr));
824 * Charge the memory for the fragment to any owner
828 skb_set_owner_w(frag, skb->sk);
831 * Copy the packet header into the new buffer.
833 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
835 fragnexthdr_offset = skb_network_header(frag);
836 fragnexthdr_offset += prevhdr - skb_network_header(skb);
837 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
840 * Build fragment header.
842 fh->nexthdr = nexthdr;
844 fh->identification = frag_id;
847 * Copy a block of the IP datagram.
849 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
853 fh->frag_off = htons(offset);
855 fh->frag_off |= htons(IP6_MF);
856 ipv6_hdr(frag)->payload_len = htons(frag->len -
857 sizeof(struct ipv6hdr));
863 * Put this fragment into the sending queue.
865 err = output(net, sk, frag);
869 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
870 IPSTATS_MIB_FRAGCREATES);
872 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
873 IPSTATS_MIB_FRAGOKS);
878 if (skb->sk && dst_allfrag(skb_dst(skb)))
879 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
881 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
885 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
886 IPSTATS_MIB_FRAGFAILS);
891 static inline int ip6_rt_check(const struct rt6key *rt_key,
892 const struct in6_addr *fl_addr,
893 const struct in6_addr *addr_cache)
895 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
896 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
899 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
900 struct dst_entry *dst,
901 const struct flowi6 *fl6)
903 struct ipv6_pinfo *np = inet6_sk(sk);
909 if (dst->ops->family != AF_INET6) {
914 rt = (struct rt6_info *)dst;
915 /* Yes, checking route validity in not connected
916 * case is not very simple. Take into account,
917 * that we do not support routing by source, TOS,
918 * and MSG_DONTROUTE --ANK (980726)
920 * 1. ip6_rt_check(): If route was host route,
921 * check that cached destination is current.
922 * If it is network route, we still may
923 * check its validity using saved pointer
924 * to the last used address: daddr_cache.
925 * We do not want to save whole address now,
926 * (because main consumer of this service
927 * is tcp, which has not this problem),
928 * so that the last trick works only on connected
930 * 2. oif also should be the same.
932 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
933 #ifdef CONFIG_IPV6_SUBTREES
934 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
936 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
937 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
946 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
947 struct dst_entry **dst, struct flowi6 *fl6)
949 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
956 /* The correct way to handle this would be to do
957 * ip6_route_get_saddr, and then ip6_route_output; however,
958 * the route-specific preferred source forces the
959 * ip6_route_output call _before_ ip6_route_get_saddr.
961 * In source specific routing (no src=any default route),
962 * ip6_route_output will fail given src=any saddr, though, so
963 * that's why we try it again later.
965 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
967 bool had_dst = *dst != NULL;
970 *dst = ip6_route_output(net, sk, fl6);
971 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
972 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
973 sk ? inet6_sk(sk)->srcprefs : 0,
976 goto out_err_release;
978 /* If we had an erroneous initial result, pretend it
979 * never existed and let the SA-enabled version take
982 if (!had_dst && (*dst)->error) {
988 flags |= RT6_LOOKUP_F_IFACE;
992 *dst = ip6_route_output_flags(net, sk, fl6, flags);
996 goto out_err_release;
998 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1000 * Here if the dst entry we've looked up
1001 * has a neighbour entry that is in the INCOMPLETE
1002 * state and the src address from the flow is
1003 * marked as OPTIMISTIC, we release the found
1004 * dst entry and replace it instead with the
1005 * dst entry of the nexthop router
1007 rt = (struct rt6_info *) *dst;
1009 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1010 rt6_nexthop(rt, &fl6->daddr));
1011 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1012 rcu_read_unlock_bh();
1015 struct inet6_ifaddr *ifp;
1016 struct flowi6 fl_gw6;
1019 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1022 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1028 * We need to get the dst entry for the
1029 * default router instead
1032 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1033 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1034 *dst = ip6_route_output(net, sk, &fl_gw6);
1035 err = (*dst)->error;
1037 goto out_err_release;
1041 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1042 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1043 err = -EAFNOSUPPORT;
1044 goto out_err_release;
1053 if (err == -ENETUNREACH)
1054 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1059 * ip6_dst_lookup - perform route lookup on flow
1060 * @sk: socket which provides route info
1061 * @dst: pointer to dst_entry * for result
1062 * @fl6: flow to lookup
1064 * This function performs a route lookup on the given flow.
1066 * It returns zero on success, or a standard errno code on error.
1068 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1072 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1074 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1077 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1078 * @sk: socket which provides route info
1079 * @fl6: flow to lookup
1080 * @final_dst: final destination address for ipsec lookup
1082 * This function performs a route lookup on the given flow.
1084 * It returns a valid dst pointer on success, or a pointer encoded
1087 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1088 const struct in6_addr *final_dst)
1090 struct dst_entry *dst = NULL;
1093 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1095 return ERR_PTR(err);
1097 fl6->daddr = *final_dst;
1099 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1101 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1104 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1105 * @sk: socket which provides the dst cache and route info
1106 * @fl6: flow to lookup
1107 * @final_dst: final destination address for ipsec lookup
1108 * @connected: whether @sk is connected or not
1110 * This function performs a route lookup on the given flow with the
1111 * possibility of using the cached route in the socket if it is valid.
1112 * It will take the socket dst lock when operating on the dst cache.
1113 * As a result, this function can only be used in process context.
1115 * In addition, for a connected socket, cache the dst in the socket
1116 * if the current cache is not valid.
1118 * It returns a valid dst pointer on success, or a pointer encoded
1121 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1122 const struct in6_addr *final_dst,
1125 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1127 dst = ip6_sk_dst_check(sk, dst, fl6);
1131 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1132 if (connected && !IS_ERR(dst))
1133 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1137 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1139 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1142 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1145 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1148 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1151 static void ip6_append_data_mtu(unsigned int *mtu,
1153 unsigned int fragheaderlen,
1154 struct sk_buff *skb,
1155 struct rt6_info *rt,
1156 unsigned int orig_mtu)
1158 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1160 /* first fragment, reserve header_len */
1161 *mtu = orig_mtu - rt->dst.header_len;
1165 * this fragment is not first, the headers
1166 * space is regarded as data space.
1170 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1171 + fragheaderlen - sizeof(struct frag_hdr);
1175 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1176 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1177 struct rt6_info *rt, struct flowi6 *fl6)
1179 struct ipv6_pinfo *np = inet6_sk(sk);
1181 struct ipv6_txoptions *opt = ipc6->opt;
1187 if (WARN_ON(v6_cork->opt))
1190 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1191 if (unlikely(!v6_cork->opt))
1194 v6_cork->opt->tot_len = sizeof(*opt);
1195 v6_cork->opt->opt_flen = opt->opt_flen;
1196 v6_cork->opt->opt_nflen = opt->opt_nflen;
1198 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1200 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1203 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1205 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1208 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1210 if (opt->hopopt && !v6_cork->opt->hopopt)
1213 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1215 if (opt->srcrt && !v6_cork->opt->srcrt)
1218 /* need source address above miyazawa*/
1221 cork->base.dst = &rt->dst;
1222 cork->fl.u.ip6 = *fl6;
1223 v6_cork->hop_limit = ipc6->hlimit;
1224 v6_cork->tclass = ipc6->tclass;
1225 if (rt->dst.flags & DST_XFRM_TUNNEL)
1226 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1227 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1229 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1230 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1231 if (np->frag_size < mtu) {
1233 mtu = np->frag_size;
1235 if (mtu < IPV6_MIN_MTU)
1237 cork->base.fragsize = mtu;
1238 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1239 cork->base.flags |= IPCORK_ALLFRAG;
1240 cork->base.length = 0;
1245 static int __ip6_append_data(struct sock *sk,
1247 struct sk_buff_head *queue,
1248 struct inet_cork *cork,
1249 struct inet6_cork *v6_cork,
1250 struct page_frag *pfrag,
1251 int getfrag(void *from, char *to, int offset,
1252 int len, int odd, struct sk_buff *skb),
1253 void *from, int length, int transhdrlen,
1254 unsigned int flags, struct ipcm6_cookie *ipc6,
1255 const struct sockcm_cookie *sockc)
1257 struct sk_buff *skb, *skb_prev = NULL;
1258 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1260 int dst_exthdrlen = 0;
1267 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1268 struct ipv6_txoptions *opt = v6_cork->opt;
1269 int csummode = CHECKSUM_NONE;
1270 unsigned int maxnonfragsize, headersize;
1271 unsigned int wmem_alloc_delta = 0;
1273 skb = skb_peek_tail(queue);
1275 exthdrlen = opt ? opt->opt_flen : 0;
1276 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1279 mtu = cork->fragsize;
1282 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1284 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1285 (opt ? opt->opt_nflen : 0);
1286 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1287 sizeof(struct frag_hdr);
1289 headersize = sizeof(struct ipv6hdr) +
1290 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1291 (dst_allfrag(&rt->dst) ?
1292 sizeof(struct frag_hdr) : 0) +
1293 rt->rt6i_nfheader_len;
1295 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1296 * the first fragment
1298 if (headersize + transhdrlen > mtu)
1301 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1302 (sk->sk_protocol == IPPROTO_UDP ||
1303 sk->sk_protocol == IPPROTO_RAW)) {
1304 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1305 sizeof(struct ipv6hdr));
1309 if (ip6_sk_ignore_df(sk))
1310 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1312 maxnonfragsize = mtu;
1314 if (cork->length + length > maxnonfragsize - headersize) {
1316 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1317 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1321 /* CHECKSUM_PARTIAL only with no extension headers and when
1322 * we are not going to fragment
1324 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1325 headersize == sizeof(struct ipv6hdr) &&
1326 length <= mtu - headersize &&
1327 !(flags & MSG_MORE) &&
1328 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1329 csummode = CHECKSUM_PARTIAL;
1331 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1332 sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
1333 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1334 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1335 tskey = sk->sk_tskey++;
1339 * Let's try using as much space as possible.
1340 * Use MTU if total length of the message fits into the MTU.
1341 * Otherwise, we need to reserve fragment header and
1342 * fragment alignment (= 8-15 octects, in total).
1344 * Note that we may need to "move" the data from the tail of
1345 * of the buffer to the new fragment when we split
1348 * FIXME: It may be fragmented into multiple chunks
1349 * at once if non-fragmentable extension headers
1354 cork->length += length;
1358 while (length > 0) {
1359 /* Check if the remaining data fits into current packet. */
1360 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1362 copy = maxfraglen - skb->len;
1366 unsigned int datalen;
1367 unsigned int fraglen;
1368 unsigned int fraggap;
1369 unsigned int alloclen;
1371 /* There's no room in the current skb */
1373 fraggap = skb->len - maxfraglen;
1376 /* update mtu and maxfraglen if necessary */
1377 if (!skb || !skb_prev)
1378 ip6_append_data_mtu(&mtu, &maxfraglen,
1379 fragheaderlen, skb, rt,
1385 * If remaining data exceeds the mtu,
1386 * we know we need more fragment(s).
1388 datalen = length + fraggap;
1390 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1391 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1392 if ((flags & MSG_MORE) &&
1393 !(rt->dst.dev->features&NETIF_F_SG))
1396 alloclen = datalen + fragheaderlen;
1398 alloclen += dst_exthdrlen;
1400 if (datalen != length + fraggap) {
1402 * this is not the last fragment, the trailer
1403 * space is regarded as data space.
1405 datalen += rt->dst.trailer_len;
1408 alloclen += rt->dst.trailer_len;
1409 fraglen = datalen + fragheaderlen;
1412 * We just reserve space for fragment header.
1413 * Note: this may be overallocation if the message
1414 * (without MSG_MORE) fits into the MTU.
1416 alloclen += sizeof(struct frag_hdr);
1418 copy = datalen - transhdrlen - fraggap;
1424 skb = sock_alloc_send_skb(sk,
1426 (flags & MSG_DONTWAIT), &err);
1429 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1431 skb = alloc_skb(alloclen + hh_len,
1439 * Fill in the control structures
1441 skb->protocol = htons(ETH_P_IPV6);
1442 skb->ip_summed = csummode;
1444 /* reserve for fragmentation and ipsec header */
1445 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1448 /* Only the initial fragment is time stamped */
1449 skb_shinfo(skb)->tx_flags = tx_flags;
1451 skb_shinfo(skb)->tskey = tskey;
1455 * Find where to start putting bytes
1457 data = skb_put(skb, fraglen);
1458 skb_set_network_header(skb, exthdrlen);
1459 data += fragheaderlen;
1460 skb->transport_header = (skb->network_header +
1463 skb->csum = skb_copy_and_csum_bits(
1464 skb_prev, maxfraglen,
1465 data + transhdrlen, fraggap, 0);
1466 skb_prev->csum = csum_sub(skb_prev->csum,
1469 pskb_trim_unique(skb_prev, maxfraglen);
1472 getfrag(from, data + transhdrlen, offset,
1473 copy, fraggap, skb) < 0) {
1480 length -= datalen - fraggap;
1485 if ((flags & MSG_CONFIRM) && !skb_prev)
1486 skb_set_dst_pending_confirm(skb, 1);
1489 * Put the packet on the pending queue
1491 if (!skb->destructor) {
1492 skb->destructor = sock_wfree;
1494 wmem_alloc_delta += skb->truesize;
1496 __skb_queue_tail(queue, skb);
1503 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1507 if (getfrag(from, skb_put(skb, copy),
1508 offset, copy, off, skb) < 0) {
1509 __skb_trim(skb, off);
1514 int i = skb_shinfo(skb)->nr_frags;
1517 if (!sk_page_frag_refill(sk, pfrag))
1520 if (!skb_can_coalesce(skb, i, pfrag->page,
1523 if (i == MAX_SKB_FRAGS)
1526 __skb_fill_page_desc(skb, i, pfrag->page,
1528 skb_shinfo(skb)->nr_frags = ++i;
1529 get_page(pfrag->page);
1531 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1533 page_address(pfrag->page) + pfrag->offset,
1534 offset, copy, skb->len, skb) < 0)
1537 pfrag->offset += copy;
1538 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1540 skb->data_len += copy;
1541 skb->truesize += copy;
1542 wmem_alloc_delta += copy;
1548 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1554 cork->length -= length;
1555 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1556 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1560 int ip6_append_data(struct sock *sk,
1561 int getfrag(void *from, char *to, int offset, int len,
1562 int odd, struct sk_buff *skb),
1563 void *from, int length, int transhdrlen,
1564 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1565 struct rt6_info *rt, unsigned int flags,
1566 const struct sockcm_cookie *sockc)
1568 struct inet_sock *inet = inet_sk(sk);
1569 struct ipv6_pinfo *np = inet6_sk(sk);
1573 if (flags&MSG_PROBE)
1575 if (skb_queue_empty(&sk->sk_write_queue)) {
1579 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1584 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1585 length += exthdrlen;
1586 transhdrlen += exthdrlen;
1588 fl6 = &inet->cork.fl.u.ip6;
1592 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1593 &np->cork, sk_page_frag(sk), getfrag,
1594 from, length, transhdrlen, flags, ipc6, sockc);
1596 EXPORT_SYMBOL_GPL(ip6_append_data);
1598 static void ip6_cork_release(struct inet_cork_full *cork,
1599 struct inet6_cork *v6_cork)
1602 kfree(v6_cork->opt->dst0opt);
1603 kfree(v6_cork->opt->dst1opt);
1604 kfree(v6_cork->opt->hopopt);
1605 kfree(v6_cork->opt->srcrt);
1606 kfree(v6_cork->opt);
1607 v6_cork->opt = NULL;
1610 if (cork->base.dst) {
1611 dst_release(cork->base.dst);
1612 cork->base.dst = NULL;
1613 cork->base.flags &= ~IPCORK_ALLFRAG;
1615 memset(&cork->fl, 0, sizeof(cork->fl));
1618 struct sk_buff *__ip6_make_skb(struct sock *sk,
1619 struct sk_buff_head *queue,
1620 struct inet_cork_full *cork,
1621 struct inet6_cork *v6_cork)
1623 struct sk_buff *skb, *tmp_skb;
1624 struct sk_buff **tail_skb;
1625 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1626 struct ipv6_pinfo *np = inet6_sk(sk);
1627 struct net *net = sock_net(sk);
1628 struct ipv6hdr *hdr;
1629 struct ipv6_txoptions *opt = v6_cork->opt;
1630 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1631 struct flowi6 *fl6 = &cork->fl.u.ip6;
1632 unsigned char proto = fl6->flowi6_proto;
1634 skb = __skb_dequeue(queue);
1637 tail_skb = &(skb_shinfo(skb)->frag_list);
1639 /* move skb->data to ip header from ext header */
1640 if (skb->data < skb_network_header(skb))
1641 __skb_pull(skb, skb_network_offset(skb));
1642 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1643 __skb_pull(tmp_skb, skb_network_header_len(skb));
1644 *tail_skb = tmp_skb;
1645 tail_skb = &(tmp_skb->next);
1646 skb->len += tmp_skb->len;
1647 skb->data_len += tmp_skb->len;
1648 skb->truesize += tmp_skb->truesize;
1649 tmp_skb->destructor = NULL;
1653 /* Allow local fragmentation. */
1654 skb->ignore_df = ip6_sk_ignore_df(sk);
1656 *final_dst = fl6->daddr;
1657 __skb_pull(skb, skb_network_header_len(skb));
1658 if (opt && opt->opt_flen)
1659 ipv6_push_frag_opts(skb, opt, &proto);
1660 if (opt && opt->opt_nflen)
1661 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1663 skb_push(skb, sizeof(struct ipv6hdr));
1664 skb_reset_network_header(skb);
1665 hdr = ipv6_hdr(skb);
1667 ip6_flow_hdr(hdr, v6_cork->tclass,
1668 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1669 ip6_autoflowlabel(net, np), fl6));
1670 hdr->hop_limit = v6_cork->hop_limit;
1671 hdr->nexthdr = proto;
1672 hdr->saddr = fl6->saddr;
1673 hdr->daddr = *final_dst;
1675 skb->priority = sk->sk_priority;
1676 skb->mark = sk->sk_mark;
1678 skb_dst_set(skb, dst_clone(&rt->dst));
1679 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1680 if (proto == IPPROTO_ICMPV6) {
1681 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1683 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1684 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1687 ip6_cork_release(cork, v6_cork);
1692 int ip6_send_skb(struct sk_buff *skb)
1694 struct net *net = sock_net(skb->sk);
1695 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1698 err = ip6_local_out(net, skb->sk, skb);
1701 err = net_xmit_errno(err);
1703 IP6_INC_STATS(net, rt->rt6i_idev,
1704 IPSTATS_MIB_OUTDISCARDS);
1710 int ip6_push_pending_frames(struct sock *sk)
1712 struct sk_buff *skb;
1714 skb = ip6_finish_skb(sk);
1718 return ip6_send_skb(skb);
1720 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1722 static void __ip6_flush_pending_frames(struct sock *sk,
1723 struct sk_buff_head *queue,
1724 struct inet_cork_full *cork,
1725 struct inet6_cork *v6_cork)
1727 struct sk_buff *skb;
1729 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1731 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1732 IPSTATS_MIB_OUTDISCARDS);
1736 ip6_cork_release(cork, v6_cork);
1739 void ip6_flush_pending_frames(struct sock *sk)
1741 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1742 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1744 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1746 struct sk_buff *ip6_make_skb(struct sock *sk,
1747 int getfrag(void *from, char *to, int offset,
1748 int len, int odd, struct sk_buff *skb),
1749 void *from, int length, int transhdrlen,
1750 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1751 struct rt6_info *rt, unsigned int flags,
1752 const struct sockcm_cookie *sockc)
1754 struct inet_cork_full cork;
1755 struct inet6_cork v6_cork;
1756 struct sk_buff_head queue;
1757 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1760 if (flags & MSG_PROBE)
1763 __skb_queue_head_init(&queue);
1765 cork.base.flags = 0;
1767 cork.base.opt = NULL;
1768 cork.base.dst = NULL;
1770 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
1772 ip6_cork_release(&cork, &v6_cork);
1773 return ERR_PTR(err);
1775 if (ipc6->dontfrag < 0)
1776 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1778 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1779 ¤t->task_frag, getfrag, from,
1780 length + exthdrlen, transhdrlen + exthdrlen,
1781 flags, ipc6, sockc);
1783 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1784 return ERR_PTR(err);
1787 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);