net/ipv6/ip6_output.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  *      IPv6 output functions
   4  *      Linux INET6 implementation
   5  *
   6  *      Authors:
   7  *      Pedro Roque             <roque@di.fc.ul.pt>
   8  *
   9  *      Based on linux/net/ipv4/ip_output.c
  10  *
  11  *      Changes:
  12  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  13  *                              extension headers are implemented.
  14  *                              route changes now work.
  15  *                              ip6_forward does not confuse sniffers.
  16  *                              etc.
  17  *
  18  *      H. von Brand    :       Added missing #include <linux/string.h>
  19  *      Imran Patel     :       frag id should be in NBO
  20  *      Kazunori MIYAZAWA @USAGI
  21  *                      :       add ip6_append_data and related functions
  22  *                              for datagram xmit
  23  */
  24
  25 #include <linux/errno.h>
  26 #include <linux/kernel.h>
  27 #include <linux/string.h>
  28 #include <linux/socket.h>
  29 #include <linux/net.h>
  30 #include <linux/netdevice.h>
  31 #include <linux/if_arp.h>
  32 #include <linux/in6.h>
  33 #include <linux/tcp.h>
  34 #include <linux/route.h>
  35 #include <linux/module.h>
  36 #include <linux/slab.h>
  37
  38 #include <linux/bpf-cgroup.h>
  39 #include <linux/netfilter.h>
  40 #include <linux/netfilter_ipv6.h>
  41
  42 #include <net/sock.h>
  43 #include <net/snmp.h>
  44
  45 #include <net/ipv6.h>
  46 #include <net/ndisc.h>
  47 #include <net/protocol.h>
  48 #include <net/ip6_route.h>
  49 #include <net/addrconf.h>
  50 #include <net/rawv6.h>
  51 #include <net/icmp.h>
  52 #include <net/xfrm.h>
  53 #include <net/checksum.h>
  54 #include <linux/mroute6.h>
  55 #include <net/l3mdev.h>
  56 #include <net/lwtunnel.h>
  57
  58 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  59 {
  60         struct dst_entry *dst = skb_dst(skb);
  61         struct net_device *dev = dst->dev;
  62         const struct in6_addr *nexthop;
  63         struct neighbour *neigh;
  64         int ret;
  65
  66         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  67                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  68
  69                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  70                     ((mroute6_is_socket(net, skb) &&
  71                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  72                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  73                                          &ipv6_hdr(skb)->saddr))) {
  74                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  75
  76                         /* Do not check for IFF_ALLMULTI; multicast routing
  77                            is not supported in any case.
  78                          */
  79                         if (newskb)
  80                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  81                                         net, sk, newskb, NULL, newskb->dev,
  82                                         dev_loopback_xmit);
  83
  84                         if (ipv6_hdr(skb)->hop_limit == 0) {
  85                                 IP6_INC_STATS(net, idev,
  86                                               IPSTATS_MIB_OUTDISCARDS);
  87                                 kfree_skb(skb);
  88                                 return 0;
  89                         }
  90                 }
  91
  92                 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
  93
  94                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
  95                     IPV6_ADDR_SCOPE_NODELOCAL &&
  96                     !(dev->flags & IFF_LOOPBACK)) {
  97                         kfree_skb(skb);
  98                         return 0;
  99                 }
 100         }
 101
 102         if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 103                 int res = lwtunnel_xmit(skb);
 104
 105                 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 106                         return res;
 107         }
 108
 109         rcu_read_lock_bh();
 110         nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 111         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 112         if (unlikely(!neigh))
 113                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 114         if (!IS_ERR(neigh)) {
 115                 sock_confirm_neigh(skb, neigh);
 116                 ret = neigh_output(neigh, skb, false);
 117                 rcu_read_unlock_bh();
 118                 return ret;
 119         }
 120         rcu_read_unlock_bh();
 121
 122         IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 123         kfree_skb(skb);
 124         return -EINVAL;
 125 }
 126
 127 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 128 {
 129 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 130         /* Policy lookup after SNAT yielded a new policy */
 131         if (skb_dst(skb)->xfrm) {
 132                 IPCB(skb)->flags |= IPSKB_REROUTED;
 133                 return dst_output(net, sk, skb);
 134         }
 135 #endif
 136
 137         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 138             dst_allfrag(skb_dst(skb)) ||
 139             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 140                 return ip6_fragment(net, sk, skb, ip6_finish_output2);
 141         else
 142                 return ip6_finish_output2(net, sk, skb);
 143 }
 144
 145 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 146 {
 147         int ret;
 148
 149         ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 150         switch (ret) {
 151         case NET_XMIT_SUCCESS:
 152                 return __ip6_finish_output(net, sk, skb);
 153         case NET_XMIT_CN:
 154                 return __ip6_finish_output(net, sk, skb) ? : ret;
 155         default:
 156                 kfree_skb(skb);
 157                 return ret;
 158         }
 159 }
 160
 161 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 162 {
 163         struct net_device *dev = skb_dst(skb)->dev;
 164         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 165
 166         skb->protocol = htons(ETH_P_IPV6);
 167         skb->dev = dev;
 168
 169         if (unlikely(idev->cnf.disable_ipv6)) {
 170                 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 171                 kfree_skb(skb);
 172                 return 0;
 173         }
 174
 175         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 176                             net, sk, skb, NULL, dev,
 177                             ip6_finish_output,
 178                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 179 }
 180
 181 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 182 {
 183         if (!np->autoflowlabel_set)
 184                 return ip6_default_np_autolabel(net);
 185         else
 186                 return np->autoflowlabel;
 187 }
 188
 189 /*
 190  * xmit an sk_buff (used by TCP, SCTP and DCCP)
 191  * Note : socket lock is not held for SYNACK packets, but might be modified
 192  * by calls to skb_set_owner_w() and ipv6_local_error(),
 193  * which are using proper atomic operations or spinlocks.
 194  */
 195 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 196              __u32 mark, struct ipv6_txoptions *opt, int tclass)
 197 {
 198         struct net *net = sock_net(sk);
 199         const struct ipv6_pinfo *np = inet6_sk(sk);
 200         struct in6_addr *first_hop = &fl6->daddr;
 201         struct dst_entry *dst = skb_dst(skb);
 202         unsigned int head_room;
 203         struct ipv6hdr *hdr;
 204         u8  proto = fl6->flowi6_proto;
 205         int seg_len = skb->len;
 206         int hlimit = -1;
 207         u32 mtu;
 208
 209         head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 210         if (opt)
 211                 head_room += opt->opt_nflen + opt->opt_flen;
 212
 213         if (unlikely(skb_headroom(skb) < head_room)) {
 214                 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 215                 if (!skb2) {
 216                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 217                                       IPSTATS_MIB_OUTDISCARDS);
 218                         kfree_skb(skb);
 219                         return -ENOBUFS;
 220                 }
 221                 if (skb->sk)
 222                         skb_set_owner_w(skb2, skb->sk);
 223                 consume_skb(skb);
 224                 skb = skb2;
 225         }
 226
 227         if (opt) {
 228                 seg_len += opt->opt_nflen + opt->opt_flen;
 229
 230                 if (opt->opt_flen)
 231                         ipv6_push_frag_opts(skb, opt, &proto);
 232
 233                 if (opt->opt_nflen)
 234                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 235                                              &fl6->saddr);
 236         }
 237
 238         skb_push(skb, sizeof(struct ipv6hdr));
 239         skb_reset_network_header(skb);
 240         hdr = ipv6_hdr(skb);
 241
 242         /*
 243          *      Fill in the IPv6 header
 244          */
 245         if (np)
 246                 hlimit = np->hop_limit;
 247         if (hlimit < 0)
 248                 hlimit = ip6_dst_hoplimit(dst);
 249
 250         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 251                                 ip6_autoflowlabel(net, np), fl6));
 252
 253         hdr->payload_len = htons(seg_len);
 254         hdr->nexthdr = proto;
 255         hdr->hop_limit = hlimit;
 256
 257         hdr->saddr = fl6->saddr;
 258         hdr->daddr = *first_hop;
 259
 260         skb->protocol = htons(ETH_P_IPV6);
 261         skb->priority = sk->sk_priority;
 262         skb->mark = mark;
 263
 264         mtu = dst_mtu(dst);
 265         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 266                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 267                               IPSTATS_MIB_OUT, skb->len);
 268
 269                 /* if egress device is enslaved to an L3 master device pass the
 270                  * skb to its handler for processing
 271                  */
 272                 skb = l3mdev_ip6_out((struct sock *)sk, skb);
 273                 if (unlikely(!skb))
 274                         return 0;
 275
 276                 /* hooks should never assume socket lock is held.
 277                  * we promote our socket to non const
 278                  */
 279                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 280                                net, (struct sock *)sk, skb, NULL, dst->dev,
 281                                dst_output);
 282         }
 283
 284         skb->dev = dst->dev;
 285         /* ipv6_local_error() does not require socket lock,
 286          * we promote our socket to non const
 287          */
 288         ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 289
 290         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 291         kfree_skb(skb);
 292         return -EMSGSIZE;
 293 }
 294 EXPORT_SYMBOL(ip6_xmit);
 295
 296 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 297 {
 298         struct ip6_ra_chain *ra;
 299         struct sock *last = NULL;
 300
 301         read_lock(&ip6_ra_lock);
 302         for (ra = ip6_ra_chain; ra; ra = ra->next) {
 303                 struct sock *sk = ra->sk;
 304                 if (sk && ra->sel == sel &&
 305                     (!sk->sk_bound_dev_if ||
 306                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
 307                         struct ipv6_pinfo *np = inet6_sk(sk);
 308
 309                         if (np && np->rtalert_isolate &&
 310                             !net_eq(sock_net(sk), dev_net(skb->dev))) {
 311                                 continue;
 312                         }
 313                         if (last) {
 314                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 315                                 if (skb2)
 316                                         rawv6_rcv(last, skb2);
 317                         }
 318                         last = sk;
 319                 }
 320         }
 321
 322         if (last) {
 323                 rawv6_rcv(last, skb);
 324                 read_unlock(&ip6_ra_lock);
 325                 return 1;
 326         }
 327         read_unlock(&ip6_ra_lock);
 328         return 0;
 329 }
 330
 331 static int ip6_forward_proxy_check(struct sk_buff *skb)
 332 {
 333         struct ipv6hdr *hdr = ipv6_hdr(skb);
 334         u8 nexthdr = hdr->nexthdr;
 335         __be16 frag_off;
 336         int offset;
 337
 338         if (ipv6_ext_hdr(nexthdr)) {
 339                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 340                 if (offset < 0)
 341                         return 0;
 342         } else
 343                 offset = sizeof(struct ipv6hdr);
 344
 345         if (nexthdr == IPPROTO_ICMPV6) {
 346                 struct icmp6hdr *icmp6;
 347
 348                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
 349                                          offset + 1 - skb->data)))
 350                         return 0;
 351
 352                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 353
 354                 switch (icmp6->icmp6_type) {
 355                 case NDISC_ROUTER_SOLICITATION:
 356                 case NDISC_ROUTER_ADVERTISEMENT:
 357                 case NDISC_NEIGHBOUR_SOLICITATION:
 358                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
 359                 case NDISC_REDIRECT:
 360                         /* For reaction involving unicast neighbor discovery
 361                          * message destined to the proxied address, pass it to
 362                          * input function.
 363                          */
 364                         return 1;
 365                 default:
 366                         break;
 367                 }
 368         }
 369
 370         /*
 371          * The proxying router can't forward traffic sent to a link-local
 372          * address, so signal the sender and discard the packet. This
 373          * behavior is clarified by the MIPv6 specification.
 374          */
 375         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 376                 dst_link_failure(skb);
 377                 return -1;
 378         }
 379
 380         return 0;
 381 }
 382
 383 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 384                                      struct sk_buff *skb)
 385 {
 386         struct dst_entry *dst = skb_dst(skb);
 387
 388         __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 389         __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 390
 391 #ifdef CONFIG_NET_SWITCHDEV
 392         if (skb->offload_l3_fwd_mark) {
 393                 consume_skb(skb);
 394                 return 0;
 395         }
 396 #endif
 397
 398         skb->tstamp = 0;
 399         return dst_output(net, sk, skb);
 400 }
 401
 402 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 403 {
 404         if (skb->len <= mtu)
 405                 return false;
 406
 407         /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 408         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 409                 return true;
 410
 411         if (skb->ignore_df)
 412                 return false;
 413
 414         if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 415                 return false;
 416
 417         return true;
 418 }
 419
 420 int ip6_forward(struct sk_buff *skb)
 421 {
 422         struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
 423         struct dst_entry *dst = skb_dst(skb);
 424         struct ipv6hdr *hdr = ipv6_hdr(skb);
 425         struct inet6_skb_parm *opt = IP6CB(skb);
 426         struct net *net = dev_net(dst->dev);
 427         u32 mtu;
 428
 429         if (net->ipv6.devconf_all->forwarding == 0)
 430                 goto error;
 431
 432         if (skb->pkt_type != PACKET_HOST)
 433                 goto drop;
 434
 435         if (unlikely(skb->sk))
 436                 goto drop;
 437
 438         if (skb_warn_if_lro(skb))
 439                 goto drop;
 440
 441         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 442                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 443                 goto drop;
 444         }
 445
 446         skb_forward_csum(skb);
 447
 448         /*
 449          *      We DO NOT make any processing on
 450          *      RA packets, pushing them to user level AS IS
 451          *      without ane WARRANTY that application will be able
 452          *      to interpret them. The reason is that we
 453          *      cannot make anything clever here.
 454          *
 455          *      We are not end-node, so that if packet contains
 456          *      AH/ESP, we cannot make anything.
 457          *      Defragmentation also would be mistake, RA packets
 458          *      cannot be fragmented, because there is no warranty
 459          *      that different fragments will go along one path. --ANK
 460          */
 461         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 462                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 463                         return 0;
 464         }
 465
 466         /*
 467          *      check and decrement ttl
 468          */
 469         if (hdr->hop_limit <= 1) {
 470                 /* Force OUTPUT device used as source address */
 471                 skb->dev = dst->dev;
 472                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 473                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 474
 475                 kfree_skb(skb);
 476                 return -ETIMEDOUT;
 477         }
 478
 479         /* XXX: idev->cnf.proxy_ndp? */
 480         if (net->ipv6.devconf_all->proxy_ndp &&
 481             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 482                 int proxied = ip6_forward_proxy_check(skb);
 483                 if (proxied > 0)
 484                         return ip6_input(skb);
 485                 else if (proxied < 0) {
 486                         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 487                         goto drop;
 488                 }
 489         }
 490
 491         if (!xfrm6_route_forward(skb)) {
 492                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 493                 goto drop;
 494         }
 495         dst = skb_dst(skb);
 496
 497         /* IPv6 specs say nothing about it, but it is clear that we cannot
 498            send redirects to source routed frames.
 499            We don't send redirects to frames decapsulated from IPsec.
 500          */
 501         if (IP6CB(skb)->iif == dst->dev->ifindex &&
 502             opt->srcrt == 0 && !skb_sec_path(skb)) {
 503                 struct in6_addr *target = NULL;
 504                 struct inet_peer *peer;
 505                 struct rt6_info *rt;
 506
 507                 /*
 508                  *      incoming and outgoing devices are the same
 509                  *      send a redirect.
 510                  */
 511
 512                 rt = (struct rt6_info *) dst;
 513                 if (rt->rt6i_flags & RTF_GATEWAY)
 514                         target = &rt->rt6i_gateway;
 515                 else
 516                         target = &hdr->daddr;
 517
 518                 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 519
 520                 /* Limit redirects both by destination (here)
 521                    and by source (inside ndisc_send_redirect)
 522                  */
 523                 if (inet_peer_xrlim_allow(peer, 1*HZ))
 524                         ndisc_send_redirect(skb, target);
 525                 if (peer)
 526                         inet_putpeer(peer);
 527         } else {
 528                 int addrtype = ipv6_addr_type(&hdr->saddr);
 529
 530                 /* This check is security critical. */
 531                 if (addrtype == IPV6_ADDR_ANY ||
 532                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 533                         goto error;
 534                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
 535                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 536                                     ICMPV6_NOT_NEIGHBOUR, 0);
 537                         goto error;
 538                 }
 539         }
 540
 541         mtu = ip6_dst_mtu_forward(dst);
 542         if (mtu < IPV6_MIN_MTU)
 543                 mtu = IPV6_MIN_MTU;
 544
 545         if (ip6_pkt_too_big(skb, mtu)) {
 546                 /* Again, force OUTPUT device used as source address */
 547                 skb->dev = dst->dev;
 548                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 549                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
 550                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
 551                                 IPSTATS_MIB_FRAGFAILS);
 552                 kfree_skb(skb);
 553                 return -EMSGSIZE;
 554         }
 555
 556         if (skb_cow(skb, dst->dev->hard_header_len)) {
 557                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
 558                                 IPSTATS_MIB_OUTDISCARDS);
 559                 goto drop;
 560         }
 561
 562         hdr = ipv6_hdr(skb);
 563
 564         /* Mangling hops number delayed to point after skb COW */
 565
 566         hdr->hop_limit--;
 567
 568         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 569                        net, NULL, skb, skb->dev, dst->dev,
 570                        ip6_forward_finish);
 571
 572 error:
 573         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 574 drop:
 575         kfree_skb(skb);
 576         return -EINVAL;
 577 }
 578
 579 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 580 {
 581         to->pkt_type = from->pkt_type;
 582         to->priority = from->priority;
 583         to->protocol = from->protocol;
 584         skb_dst_drop(to);
 585         skb_dst_set(to, dst_clone(skb_dst(from)));
 586         to->dev = from->dev;
 587         to->mark = from->mark;
 588
 589         skb_copy_hash(to, from);
 590
 591 #ifdef CONFIG_NET_SCHED
 592         to->tc_index = from->tc_index;
 593 #endif
 594         nf_copy(to, from);
 595         skb_ext_copy(to, from);
 596         skb_copy_secmark(to, from);
 597 }
 598
 599 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
 600                       u8 nexthdr, __be32 frag_id,
 601                       struct ip6_fraglist_iter *iter)
 602 {
 603         unsigned int first_len;
 604         struct frag_hdr *fh;
 605
 606         /* BUILD HEADER */
 607         *prevhdr = NEXTHDR_FRAGMENT;
 608         iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 609         if (!iter->tmp_hdr)
 610                 return -ENOMEM;
 611
 612         iter->frag = skb_shinfo(skb)->frag_list;
 613         skb_frag_list_init(skb);
 614
 615         iter->offset = 0;
 616         iter->hlen = hlen;
 617         iter->frag_id = frag_id;
 618         iter->nexthdr = nexthdr;
 619
 620         __skb_pull(skb, hlen);
 621         fh = __skb_push(skb, sizeof(struct frag_hdr));
 622         __skb_push(skb, hlen);
 623         skb_reset_network_header(skb);
 624         memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
 625
 626         fh->nexthdr = nexthdr;
 627         fh->reserved = 0;
 628         fh->frag_off = htons(IP6_MF);
 629         fh->identification = frag_id;
 630
 631         first_len = skb_pagelen(skb);
 632         skb->data_len = first_len - skb_headlen(skb);
 633         skb->len = first_len;
 634         ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
 635
 636         return 0;
 637 }
 638 EXPORT_SYMBOL(ip6_fraglist_init);
 639
 640 void ip6_fraglist_prepare(struct sk_buff *skb,
 641                           struct ip6_fraglist_iter *iter)
 642 {
 643         struct sk_buff *frag = iter->frag;
 644         unsigned int hlen = iter->hlen;
 645         struct frag_hdr *fh;
 646
 647         frag->ip_summed = CHECKSUM_NONE;
 648         skb_reset_transport_header(frag);
 649         fh = __skb_push(frag, sizeof(struct frag_hdr));
 650         __skb_push(frag, hlen);
 651         skb_reset_network_header(frag);
 652         memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
 653         iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
 654         fh->nexthdr = iter->nexthdr;
 655         fh->reserved = 0;
 656         fh->frag_off = htons(iter->offset);
 657         if (frag->next)
 658                 fh->frag_off |= htons(IP6_MF);
 659         fh->identification = iter->frag_id;
 660         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 661         ip6_copy_metadata(frag, skb);
 662 }
 663 EXPORT_SYMBOL(ip6_fraglist_prepare);
 664
 665 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
 666                    unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
 667                    u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
 668 {
 669         state->prevhdr = prevhdr;
 670         state->nexthdr = nexthdr;
 671         state->frag_id = frag_id;
 672
 673         state->hlen = hlen;
 674         state->mtu = mtu;
 675
 676         state->left = skb->len - hlen;  /* Space per frame */
 677         state->ptr = hlen;              /* Where to start from */
 678
 679         state->hroom = hdr_room;
 680         state->troom = needed_tailroom;
 681
 682         state->offset = 0;
 683 }
 684 EXPORT_SYMBOL(ip6_frag_init);
 685
 686 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
 687 {
 688         u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
 689         struct sk_buff *frag;
 690         struct frag_hdr *fh;
 691         unsigned int len;
 692
 693         len = state->left;
 694         /* IF: it doesn't fit, use 'mtu' - the data space left */
 695         if (len > state->mtu)
 696                 len = state->mtu;
 697         /* IF: we are not sending up to and including the packet end
 698            then align the next start on an eight byte boundary */
 699         if (len < state->left)
 700                 len &= ~7;
 701
 702         /* Allocate buffer */
 703         frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
 704                          state->hroom + state->troom, GFP_ATOMIC);
 705         if (!frag)
 706                 return ERR_PTR(-ENOMEM);
 707
 708         /*
 709          *      Set up data on packet
 710          */
 711
 712         ip6_copy_metadata(frag, skb);
 713         skb_reserve(frag, state->hroom);
 714         skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
 715         skb_reset_network_header(frag);
 716         fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
 717         frag->transport_header = (frag->network_header + state->hlen +
 718                                   sizeof(struct frag_hdr));
 719
 720         /*
 721          *      Charge the memory for the fragment to any owner
 722          *      it might possess
 723          */
 724         if (skb->sk)
 725                 skb_set_owner_w(frag, skb->sk);
 726
 727         /*
 728          *      Copy the packet header into the new buffer.
 729          */
 730         skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
 731
 732         fragnexthdr_offset = skb_network_header(frag);
 733         fragnexthdr_offset += prevhdr - skb_network_header(skb);
 734         *fragnexthdr_offset = NEXTHDR_FRAGMENT;
 735
 736         /*
 737          *      Build fragment header.
 738          */
 739         fh->nexthdr = state->nexthdr;
 740         fh->reserved = 0;
 741         fh->identification = state->frag_id;
 742
 743         /*
 744          *      Copy a block of the IP datagram.
 745          */
 746         BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
 747                              len));
 748         state->left -= len;
 749
 750         fh->frag_off = htons(state->offset);
 751         if (state->left > 0)
 752                 fh->frag_off |= htons(IP6_MF);
 753         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 754
 755         state->ptr += len;
 756         state->offset += len;
 757
 758         return frag;
 759 }
 760 EXPORT_SYMBOL(ip6_frag_next);
 761
 762 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 763                  int (*output)(struct net *, struct sock *, struct sk_buff *))
 764 {
 765         struct sk_buff *frag;
 766         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 767         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 768                                 inet6_sk(skb->sk) : NULL;
 769         struct ip6_frag_state state;
 770         unsigned int mtu, hlen, nexthdr_offset;
 771         int hroom, err = 0;
 772         __be32 frag_id;
 773         u8 *prevhdr, nexthdr = 0;
 774
 775         err = ip6_find_1stfragopt(skb, &prevhdr);
 776         if (err < 0)
 777                 goto fail;
 778         hlen = err;
 779         nexthdr = *prevhdr;
 780         nexthdr_offset = prevhdr - skb_network_header(skb);
 781
 782         mtu = ip6_skb_dst_mtu(skb);
 783
 784         /* We must not fragment if the socket is set to force MTU discovery
 785          * or if the skb it not generated by a local socket.
 786          */
 787         if (unlikely(!skb->ignore_df && skb->len > mtu))
 788                 goto fail_toobig;
 789
 790         if (IP6CB(skb)->frag_max_size) {
 791                 if (IP6CB(skb)->frag_max_size > mtu)
 792                         goto fail_toobig;
 793
 794                 /* don't send fragments larger than what we received */
 795                 mtu = IP6CB(skb)->frag_max_size;
 796                 if (mtu < IPV6_MIN_MTU)
 797                         mtu = IPV6_MIN_MTU;
 798         }
 799
 800         if (np && np->frag_size < mtu) {
 801                 if (np->frag_size)
 802                         mtu = np->frag_size;
 803         }
 804         if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 805                 goto fail_toobig;
 806         mtu -= hlen + sizeof(struct frag_hdr);
 807
 808         frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 809                                     &ipv6_hdr(skb)->saddr);
 810
 811         if (skb->ip_summed == CHECKSUM_PARTIAL &&
 812             (err = skb_checksum_help(skb)))
 813                 goto fail;
 814
 815         prevhdr = skb_network_header(skb) + nexthdr_offset;
 816         hroom = LL_RESERVED_SPACE(rt->dst.dev);
 817         if (skb_has_frag_list(skb)) {
 818                 unsigned int first_len = skb_pagelen(skb);
 819                 struct ip6_fraglist_iter iter;
 820                 struct sk_buff *frag2;
 821
 822                 if (first_len - hlen > mtu ||
 823                     ((first_len - hlen) & 7) ||
 824                     skb_cloned(skb) ||
 825                     skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 826                         goto slow_path;
 827
 828                 skb_walk_frags(skb, frag) {
 829                         /* Correct geometry. */
 830                         if (frag->len > mtu ||
 831                             ((frag->len & 7) && frag->next) ||
 832                             skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 833                                 goto slow_path_clean;
 834
 835                         /* Partially cloned skb? */
 836                         if (skb_shared(frag))
 837                                 goto slow_path_clean;
 838
 839                         BUG_ON(frag->sk);
 840                         if (skb->sk) {
 841                                 frag->sk = skb->sk;
 842                                 frag->destructor = sock_wfree;
 843                         }
 844                         skb->truesize -= frag->truesize;
 845                 }
 846
 847                 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
 848                                         &iter);
 849                 if (err < 0)
 850                         goto fail;
 851
 852                 for (;;) {
 853                         /* Prepare header of the next frame,
 854                          * before previous one went down. */
 855                         if (iter.frag)
 856                                 ip6_fraglist_prepare(skb, &iter);
 857
 858                         err = output(net, sk, skb);
 859                         if (!err)
 860                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 861                                               IPSTATS_MIB_FRAGCREATES);
 862
 863                         if (err || !iter.frag)
 864                                 break;
 865
 866                         skb = ip6_fraglist_next(&iter);
 867                 }
 868
 869                 kfree(iter.tmp_hdr);
 870
 871                 if (err == 0) {
 872                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 873                                       IPSTATS_MIB_FRAGOKS);
 874                         return 0;
 875                 }
 876
 877                 kfree_skb_list(iter.frag);
 878
 879                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 880                               IPSTATS_MIB_FRAGFAILS);
 881                 return err;
 882
 883 slow_path_clean:
 884                 skb_walk_frags(skb, frag2) {
 885                         if (frag2 == frag)
 886                                 break;
 887                         frag2->sk = NULL;
 888                         frag2->destructor = NULL;
 889                         skb->truesize += frag2->truesize;
 890                 }
 891         }
 892
 893 slow_path:
 894         /*
 895          *      Fragment the datagram.
 896          */
 897
 898         ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
 899                       LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
 900                       &state);
 901
 902         /*
 903          *      Keep copying data until we run out.
 904          */
 905
 906         while (state.left > 0) {
 907                 frag = ip6_frag_next(skb, &state);
 908                 if (IS_ERR(frag)) {
 909                         err = PTR_ERR(frag);
 910                         goto fail;
 911                 }
 912
 913                 /*
 914                  *      Put this fragment into the sending queue.
 915                  */
 916                 err = output(net, sk, frag);
 917                 if (err)
 918                         goto fail;
 919
 920                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 921                               IPSTATS_MIB_FRAGCREATES);
 922         }
 923         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 924                       IPSTATS_MIB_FRAGOKS);
 925         consume_skb(skb);
 926         return err;
 927
 928 fail_toobig:
 929         if (skb->sk && dst_allfrag(skb_dst(skb)))
 930                 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 931
 932         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 933         err = -EMSGSIZE;
 934
 935 fail:
 936         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 937                       IPSTATS_MIB_FRAGFAILS);
 938         kfree_skb(skb);
 939         return err;
 940 }
 941
 942 static inline int ip6_rt_check(const struct rt6key *rt_key,
 943                                const struct in6_addr *fl_addr,
 944                                const struct in6_addr *addr_cache)
 945 {
 946         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 947                 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 948 }
 949
 950 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 951                                           struct dst_entry *dst,
 952                                           const struct flowi6 *fl6)
 953 {
 954         struct ipv6_pinfo *np = inet6_sk(sk);
 955         struct rt6_info *rt;
 956
 957         if (!dst)
 958                 goto out;
 959
 960         if (dst->ops->family != AF_INET6) {
 961                 dst_release(dst);
 962                 return NULL;
 963         }
 964
 965         rt = (struct rt6_info *)dst;
 966         /* Yes, checking route validity in not connected
 967          * case is not very simple. Take into account,
 968          * that we do not support routing by source, TOS,
 969          * and MSG_DONTROUTE            --ANK (980726)
 970          *
 971          * 1. ip6_rt_check(): If route was host route,
 972          *    check that cached destination is current.
 973          *    If it is network route, we still may
 974          *    check its validity using saved pointer
 975          *    to the last used address: daddr_cache.
 976          *    We do not want to save whole address now,
 977          *    (because main consumer of this service
 978          *    is tcp, which has not this problem),
 979          *    so that the last trick works only on connected
 980          *    sockets.
 981          * 2. oif also should be the same.
 982          */
 983         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 984 #ifdef CONFIG_IPV6_SUBTREES
 985             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 986 #endif
 987            (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
 988               (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 989                 dst_release(dst);
 990                 dst = NULL;
 991         }
 992
 993 out:
 994         return dst;
 995 }
 996
 997 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 998                                struct dst_entry **dst, struct flowi6 *fl6)
 999 {
1000 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1001         struct neighbour *n;
1002         struct rt6_info *rt;
1003 #endif
1004         int err;
1005         int flags = 0;
1006
1007         /* The correct way to handle this would be to do
1008          * ip6_route_get_saddr, and then ip6_route_output; however,
1009          * the route-specific preferred source forces the
1010          * ip6_route_output call _before_ ip6_route_get_saddr.
1011          *
1012          * In source specific routing (no src=any default route),
1013          * ip6_route_output will fail given src=any saddr, though, so
1014          * that's why we try it again later.
1015          */
1016         if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1017                 struct fib6_info *from;
1018                 struct rt6_info *rt;
1019                 bool had_dst = *dst != NULL;
1020
1021                 if (!had_dst)
1022                         *dst = ip6_route_output(net, sk, fl6);
1023                 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1024
1025                 rcu_read_lock();
1026                 from = rt ? rcu_dereference(rt->from) : NULL;
1027                 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1028                                           sk ? inet6_sk(sk)->srcprefs : 0,
1029                                           &fl6->saddr);
1030                 rcu_read_unlock();
1031
1032                 if (err)
1033                         goto out_err_release;
1034
1035                 /* If we had an erroneous initial result, pretend it
1036                  * never existed and let the SA-enabled version take
1037                  * over.
1038                  */
1039                 if (!had_dst && (*dst)->error) {
1040                         dst_release(*dst);
1041                         *dst = NULL;
1042                 }
1043
1044                 if (fl6->flowi6_oif)
1045                         flags |= RT6_LOOKUP_F_IFACE;
1046         }
1047
1048         if (!*dst)
1049                 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1050
1051         err = (*dst)->error;
1052         if (err)
1053                 goto out_err_release;
1054
1055 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1056         /*
1057          * Here if the dst entry we've looked up
1058          * has a neighbour entry that is in the INCOMPLETE
1059          * state and the src address from the flow is
1060          * marked as OPTIMISTIC, we release the found
1061          * dst entry and replace it instead with the
1062          * dst entry of the nexthop router
1063          */
1064         rt = (struct rt6_info *) *dst;
1065         rcu_read_lock_bh();
1066         n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1067                                       rt6_nexthop(rt, &fl6->daddr));
1068         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1069         rcu_read_unlock_bh();
1070
1071         if (err) {
1072                 struct inet6_ifaddr *ifp;
1073                 struct flowi6 fl_gw6;
1074                 int redirect;
1075
1076                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1077                                       (*dst)->dev, 1);
1078
1079                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1080                 if (ifp)
1081                         in6_ifa_put(ifp);
1082
1083                 if (redirect) {
1084                         /*
1085                          * We need to get the dst entry for the
1086                          * default router instead
1087                          */
1088                         dst_release(*dst);
1089                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1090                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1091                         *dst = ip6_route_output(net, sk, &fl_gw6);
1092                         err = (*dst)->error;
1093                         if (err)
1094                                 goto out_err_release;
1095                 }
1096         }
1097 #endif
1098         if (ipv6_addr_v4mapped(&fl6->saddr) &&
1099             !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1100                 err = -EAFNOSUPPORT;
1101                 goto out_err_release;
1102         }
1103
1104         return 0;
1105
1106 out_err_release:
1107         dst_release(*dst);
1108         *dst = NULL;
1109
1110         if (err == -ENETUNREACH)
1111                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1112         return err;
1113 }
1114
1115 /**
1116  *      ip6_dst_lookup - perform route lookup on flow
1117  *      @sk: socket which provides route info
1118  *      @dst: pointer to dst_entry * for result
1119  *      @fl6: flow to lookup
1120  *
1121  *      This function performs a route lookup on the given flow.
1122  *
1123  *      It returns zero on success, or a standard errno code on error.
1124  */
1125 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1126                    struct flowi6 *fl6)
1127 {
1128         *dst = NULL;
1129         return ip6_dst_lookup_tail(net, sk, dst, fl6);
1130 }
1131 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1132
1133 /**
1134  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1135  *      @sk: socket which provides route info
1136  *      @fl6: flow to lookup
1137  *      @final_dst: final destination address for ipsec lookup
1138  *
1139  *      This function performs a route lookup on the given flow.
1140  *
1141  *      It returns a valid dst pointer on success, or a pointer encoded
1142  *      error code.
1143  */
1144 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1145                                       const struct in6_addr *final_dst)
1146 {
1147         struct dst_entry *dst = NULL;
1148         int err;
1149
1150         err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1151         if (err)
1152                 return ERR_PTR(err);
1153         if (final_dst)
1154                 fl6->daddr = *final_dst;
1155
1156         return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1157 }
1158 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1159
1160 /**
1161  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1162  *      @sk: socket which provides the dst cache and route info
1163  *      @fl6: flow to lookup
1164  *      @final_dst: final destination address for ipsec lookup
1165  *      @connected: whether @sk is connected or not
1166  *
1167  *      This function performs a route lookup on the given flow with the
1168  *      possibility of using the cached route in the socket if it is valid.
1169  *      It will take the socket dst lock when operating on the dst cache.
1170  *      As a result, this function can only be used in process context.
1171  *
1172  *      In addition, for a connected socket, cache the dst in the socket
1173  *      if the current cache is not valid.
1174  *
1175  *      It returns a valid dst pointer on success, or a pointer encoded
1176  *      error code.
1177  */
1178 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1179                                          const struct in6_addr *final_dst,
1180                                          bool connected)
1181 {
1182         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1183
1184         dst = ip6_sk_dst_check(sk, dst, fl6);
1185         if (dst)
1186                 return dst;
1187
1188         dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1189         if (connected && !IS_ERR(dst))
1190                 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1191
1192         return dst;
1193 }
1194 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1195
1196 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1197                                                gfp_t gfp)
1198 {
1199         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1200 }
1201
1202 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1203                                                 gfp_t gfp)
1204 {
1205         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1206 }
1207
1208 static void ip6_append_data_mtu(unsigned int *mtu,
1209                                 int *maxfraglen,
1210                                 unsigned int fragheaderlen,
1211                                 struct sk_buff *skb,
1212                                 struct rt6_info *rt,
1213                                 unsigned int orig_mtu)
1214 {
1215         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1216                 if (!skb) {
1217                         /* first fragment, reserve header_len */
1218                         *mtu = orig_mtu - rt->dst.header_len;
1219
1220                 } else {
1221                         /*
1222                          * this fragment is not first, the headers
1223                          * space is regarded as data space.
1224                          */
1225                         *mtu = orig_mtu;
1226                 }
1227                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1228                               + fragheaderlen - sizeof(struct frag_hdr);
1229         }
1230 }
1231
1232 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1233                           struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1234                           struct rt6_info *rt, struct flowi6 *fl6)
1235 {
1236         struct ipv6_pinfo *np = inet6_sk(sk);
1237         unsigned int mtu;
1238         struct ipv6_txoptions *opt = ipc6->opt;
1239
1240         /*
1241          * setup for corking
1242          */
1243         if (opt) {
1244                 if (WARN_ON(v6_cork->opt))
1245                         return -EINVAL;
1246
1247                 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1248                 if (unlikely(!v6_cork->opt))
1249                         return -ENOBUFS;
1250
1251                 v6_cork->opt->tot_len = sizeof(*opt);
1252                 v6_cork->opt->opt_flen = opt->opt_flen;
1253                 v6_cork->opt->opt_nflen = opt->opt_nflen;
1254
1255                 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1256                                                     sk->sk_allocation);
1257                 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1258                         return -ENOBUFS;
1259
1260                 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1261                                                     sk->sk_allocation);
1262                 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1263                         return -ENOBUFS;
1264
1265                 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1266                                                    sk->sk_allocation);
1267                 if (opt->hopopt && !v6_cork->opt->hopopt)
1268                         return -ENOBUFS;
1269
1270                 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1271                                                     sk->sk_allocation);
1272                 if (opt->srcrt && !v6_cork->opt->srcrt)
1273                         return -ENOBUFS;
1274
1275                 /* need source address above miyazawa*/
1276         }
1277         dst_hold(&rt->dst);
1278         cork->base.dst = &rt->dst;
1279         cork->fl.u.ip6 = *fl6;
1280         v6_cork->hop_limit = ipc6->hlimit;
1281         v6_cork->tclass = ipc6->tclass;
1282         if (rt->dst.flags & DST_XFRM_TUNNEL)
1283                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1284                       READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1285         else
1286                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1287                         READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1288         if (np->frag_size < mtu) {
1289                 if (np->frag_size)
1290                         mtu = np->frag_size;
1291         }
1292         if (mtu < IPV6_MIN_MTU)
1293                 return -EINVAL;
1294         cork->base.fragsize = mtu;
1295         cork->base.gso_size = ipc6->gso_size;
1296         cork->base.tx_flags = 0;
1297         sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1298
1299         if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1300                 cork->base.flags |= IPCORK_ALLFRAG;
1301         cork->base.length = 0;
1302
1303         cork->base.transmit_time = ipc6->sockc.transmit_time;
1304
1305         return 0;
1306 }
1307
1308 static int __ip6_append_data(struct sock *sk,
1309                              struct flowi6 *fl6,
1310                              struct sk_buff_head *queue,
1311                              struct inet_cork *cork,
1312                              struct inet6_cork *v6_cork,
1313                              struct page_frag *pfrag,
1314                              int getfrag(void *from, char *to, int offset,
1315                                          int len, int odd, struct sk_buff *skb),
1316                              void *from, int length, int transhdrlen,
1317                              unsigned int flags, struct ipcm6_cookie *ipc6)
1318 {
1319         struct sk_buff *skb, *skb_prev = NULL;
1320         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1321         struct ubuf_info *uarg = NULL;
1322         int exthdrlen = 0;
1323         int dst_exthdrlen = 0;
1324         int hh_len;
1325         int copy;
1326         int err;
1327         int offset = 0;
1328         u32 tskey = 0;
1329         struct rt6_info *rt = (struct rt6_info *)cork->dst;
1330         struct ipv6_txoptions *opt = v6_cork->opt;
1331         int csummode = CHECKSUM_NONE;
1332         unsigned int maxnonfragsize, headersize;
1333         unsigned int wmem_alloc_delta = 0;
1334         bool paged, extra_uref = false;
1335
1336         skb = skb_peek_tail(queue);
1337         if (!skb) {
1338                 exthdrlen = opt ? opt->opt_flen : 0;
1339                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1340         }
1341
1342         paged = !!cork->gso_size;
1343         mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1344         orig_mtu = mtu;
1345
1346         if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1347             sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1348                 tskey = sk->sk_tskey++;
1349
1350         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1351
1352         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1353                         (opt ? opt->opt_nflen : 0);
1354         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1355                      sizeof(struct frag_hdr);
1356
1357         headersize = sizeof(struct ipv6hdr) +
1358                      (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1359                      (dst_allfrag(&rt->dst) ?
1360                       sizeof(struct frag_hdr) : 0) +
1361                      rt->rt6i_nfheader_len;
1362
1363         /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1364          * the first fragment
1365          */
1366         if (headersize + transhdrlen > mtu)
1367                 goto emsgsize;
1368
1369         if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1370             (sk->sk_protocol == IPPROTO_UDP ||
1371              sk->sk_protocol == IPPROTO_RAW)) {
1372                 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1373                                 sizeof(struct ipv6hdr));
1374                 goto emsgsize;
1375         }
1376
1377         if (ip6_sk_ignore_df(sk))
1378                 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1379         else
1380                 maxnonfragsize = mtu;
1381
1382         if (cork->length + length > maxnonfragsize - headersize) {
1383 emsgsize:
1384                 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1385                 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1386                 return -EMSGSIZE;
1387         }
1388
1389         /* CHECKSUM_PARTIAL only with no extension headers and when
1390          * we are not going to fragment
1391          */
1392         if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1393             headersize == sizeof(struct ipv6hdr) &&
1394             length <= mtu - headersize &&
1395             (!(flags & MSG_MORE) || cork->gso_size) &&
1396             rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1397                 csummode = CHECKSUM_PARTIAL;
1398
1399         if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1400                 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1401                 if (!uarg)
1402                         return -ENOBUFS;
1403                 extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1404                 if (rt->dst.dev->features & NETIF_F_SG &&
1405                     csummode == CHECKSUM_PARTIAL) {
1406                         paged = true;
1407                 } else {
1408                         uarg->zerocopy = 0;
1409                         skb_zcopy_set(skb, uarg, &extra_uref);
1410                 }
1411         }
1412
1413         /*
1414          * Let's try using as much space as possible.
1415          * Use MTU if total length of the message fits into the MTU.
1416          * Otherwise, we need to reserve fragment header and
1417          * fragment alignment (= 8-15 octects, in total).
1418          *
1419          * Note that we may need to "move" the data from the tail of
1420          * of the buffer to the new fragment when we split
1421          * the message.
1422          *
1423          * FIXME: It may be fragmented into multiple chunks
1424          *        at once if non-fragmentable extension headers
1425          *        are too large.
1426          * --yoshfuji
1427          */
1428
1429         cork->length += length;
1430         if (!skb)
1431                 goto alloc_new_skb;
1432
1433         while (length > 0) {
1434                 /* Check if the remaining data fits into current packet. */
1435                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1436                 if (copy < length)
1437                         copy = maxfraglen - skb->len;
1438
1439                 if (copy <= 0) {
1440                         char *data;
1441                         unsigned int datalen;
1442                         unsigned int fraglen;
1443                         unsigned int fraggap;
1444                         unsigned int alloclen;
1445                         unsigned int pagedlen;
1446 alloc_new_skb:
1447                         /* There's no room in the current skb */
1448                         if (skb)
1449                                 fraggap = skb->len - maxfraglen;
1450                         else
1451                                 fraggap = 0;
1452                         /* update mtu and maxfraglen if necessary */
1453                         if (!skb || !skb_prev)
1454                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1455                                                     fragheaderlen, skb, rt,
1456                                                     orig_mtu);
1457
1458                         skb_prev = skb;
1459
1460                         /*
1461                          * If remaining data exceeds the mtu,
1462                          * we know we need more fragment(s).
1463                          */
1464                         datalen = length + fraggap;
1465
1466                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1467                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1468                         fraglen = datalen + fragheaderlen;
1469                         pagedlen = 0;
1470
1471                         if ((flags & MSG_MORE) &&
1472                             !(rt->dst.dev->features&NETIF_F_SG))
1473                                 alloclen = mtu;
1474                         else if (!paged)
1475                                 alloclen = fraglen;
1476                         else {
1477                                 alloclen = min_t(int, fraglen, MAX_HEADER);
1478                                 pagedlen = fraglen - alloclen;
1479                         }
1480
1481                         alloclen += dst_exthdrlen;
1482
1483                         if (datalen != length + fraggap) {
1484                                 /*
1485                                  * this is not the last fragment, the trailer
1486                                  * space is regarded as data space.
1487                                  */
1488                                 datalen += rt->dst.trailer_len;
1489                         }
1490
1491                         alloclen += rt->dst.trailer_len;
1492                         fraglen = datalen + fragheaderlen;
1493
1494                         /*
1495                          * We just reserve space for fragment header.
1496                          * Note: this may be overallocation if the message
1497                          * (without MSG_MORE) fits into the MTU.
1498                          */
1499                         alloclen += sizeof(struct frag_hdr);
1500
1501                         copy = datalen - transhdrlen - fraggap - pagedlen;
1502                         if (copy < 0) {
1503                                 err = -EINVAL;
1504                                 goto error;
1505                         }
1506                         if (transhdrlen) {
1507                                 skb = sock_alloc_send_skb(sk,
1508                                                 alloclen + hh_len,
1509                                                 (flags & MSG_DONTWAIT), &err);
1510                         } else {
1511                                 skb = NULL;
1512                                 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1513                                     2 * sk->sk_sndbuf)
1514                                         skb = alloc_skb(alloclen + hh_len,
1515                                                         sk->sk_allocation);
1516                                 if (unlikely(!skb))
1517                                         err = -ENOBUFS;
1518                         }
1519                         if (!skb)
1520                                 goto error;
1521                         /*
1522                          *      Fill in the control structures
1523                          */
1524                         skb->protocol = htons(ETH_P_IPV6);
1525                         skb->ip_summed = csummode;
1526                         skb->csum = 0;
1527                         /* reserve for fragmentation and ipsec header */
1528                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1529                                     dst_exthdrlen);
1530
1531                         /*
1532                          *      Find where to start putting bytes
1533                          */
1534                         data = skb_put(skb, fraglen - pagedlen);
1535                         skb_set_network_header(skb, exthdrlen);
1536                         data += fragheaderlen;
1537                         skb->transport_header = (skb->network_header +
1538                                                  fragheaderlen);
1539                         if (fraggap) {
1540                                 skb->csum = skb_copy_and_csum_bits(
1541                                         skb_prev, maxfraglen,
1542                                         data + transhdrlen, fraggap, 0);
1543                                 skb_prev->csum = csum_sub(skb_prev->csum,
1544                                                           skb->csum);
1545                                 data += fraggap;
1546                                 pskb_trim_unique(skb_prev, maxfraglen);
1547                         }
1548                         if (copy > 0 &&
1549                             getfrag(from, data + transhdrlen, offset,
1550                                     copy, fraggap, skb) < 0) {
1551                                 err = -EFAULT;
1552                                 kfree_skb(skb);
1553                                 goto error;
1554                         }
1555
1556                         offset += copy;
1557                         length -= copy + transhdrlen;
1558                         transhdrlen = 0;
1559                         exthdrlen = 0;
1560                         dst_exthdrlen = 0;
1561
1562                         /* Only the initial fragment is time stamped */
1563                         skb_shinfo(skb)->tx_flags = cork->tx_flags;
1564                         cork->tx_flags = 0;
1565                         skb_shinfo(skb)->tskey = tskey;
1566                         tskey = 0;
1567                         skb_zcopy_set(skb, uarg, &extra_uref);
1568
1569                         if ((flags & MSG_CONFIRM) && !skb_prev)
1570                                 skb_set_dst_pending_confirm(skb, 1);
1571
1572                         /*
1573                          * Put the packet on the pending queue
1574                          */
1575                         if (!skb->destructor) {
1576                                 skb->destructor = sock_wfree;
1577                                 skb->sk = sk;
1578                                 wmem_alloc_delta += skb->truesize;
1579                         }
1580                         __skb_queue_tail(queue, skb);
1581                         continue;
1582                 }
1583
1584                 if (copy > length)
1585                         copy = length;
1586
1587                 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1588                     skb_tailroom(skb) >= copy) {
1589                         unsigned int off;
1590
1591                         off = skb->len;
1592                         if (getfrag(from, skb_put(skb, copy),
1593                                                 offset, copy, off, skb) < 0) {
1594                                 __skb_trim(skb, off);
1595                                 err = -EFAULT;
1596                                 goto error;
1597                         }
1598                 } else if (!uarg || !uarg->zerocopy) {
1599                         int i = skb_shinfo(skb)->nr_frags;
1600
1601                         err = -ENOMEM;
1602                         if (!sk_page_frag_refill(sk, pfrag))
1603                                 goto error;
1604
1605                         if (!skb_can_coalesce(skb, i, pfrag->page,
1606                                               pfrag->offset)) {
1607                                 err = -EMSGSIZE;
1608                                 if (i == MAX_SKB_FRAGS)
1609                                         goto error;
1610
1611                                 __skb_fill_page_desc(skb, i, pfrag->page,
1612                                                      pfrag->offset, 0);
1613                                 skb_shinfo(skb)->nr_frags = ++i;
1614                                 get_page(pfrag->page);
1615                         }
1616                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1617                         if (getfrag(from,
1618                                     page_address(pfrag->page) + pfrag->offset,
1619                                     offset, copy, skb->len, skb) < 0)
1620                                 goto error_efault;
1621
1622                         pfrag->offset += copy;
1623                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1624                         skb->len += copy;
1625                         skb->data_len += copy;
1626                         skb->truesize += copy;
1627                         wmem_alloc_delta += copy;
1628                 } else {
1629                         err = skb_zerocopy_iter_dgram(skb, from, copy);
1630                         if (err < 0)
1631                                 goto error;
1632                 }
1633                 offset += copy;
1634                 length -= copy;
1635         }
1636
1637         if (wmem_alloc_delta)
1638                 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1639         return 0;
1640
1641 error_efault:
1642         err = -EFAULT;
1643 error:
1644         if (uarg)
1645                 sock_zerocopy_put_abort(uarg, extra_uref);
1646         cork->length -= length;
1647         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1648         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1649         return err;
1650 }
1651
1652 int ip6_append_data(struct sock *sk,
1653                     int getfrag(void *from, char *to, int offset, int len,
1654                                 int odd, struct sk_buff *skb),
1655                     void *from, int length, int transhdrlen,
1656                     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1657                     struct rt6_info *rt, unsigned int flags)
1658 {
1659         struct inet_sock *inet = inet_sk(sk);
1660         struct ipv6_pinfo *np = inet6_sk(sk);
1661         int exthdrlen;
1662         int err;
1663
1664         if (flags&MSG_PROBE)
1665                 return 0;
1666         if (skb_queue_empty(&sk->sk_write_queue)) {
1667                 /*
1668                  * setup for corking
1669                  */
1670                 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1671                                      ipc6, rt, fl6);
1672                 if (err)
1673                         return err;
1674
1675                 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1676                 length += exthdrlen;
1677                 transhdrlen += exthdrlen;
1678         } else {
1679                 fl6 = &inet->cork.fl.u.ip6;
1680                 transhdrlen = 0;
1681         }
1682
1683         return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1684                                  &np->cork, sk_page_frag(sk), getfrag,
1685                                  from, length, transhdrlen, flags, ipc6);
1686 }
1687 EXPORT_SYMBOL_GPL(ip6_append_data);
1688
1689 static void ip6_cork_release(struct inet_cork_full *cork,
1690                              struct inet6_cork *v6_cork)
1691 {
1692         if (v6_cork->opt) {
1693                 kfree(v6_cork->opt->dst0opt);
1694                 kfree(v6_cork->opt->dst1opt);
1695                 kfree(v6_cork->opt->hopopt);
1696                 kfree(v6_cork->opt->srcrt);
1697                 kfree(v6_cork->opt);
1698                 v6_cork->opt = NULL;
1699         }
1700
1701         if (cork->base.dst) {
1702                 dst_release(cork->base.dst);
1703                 cork->base.dst = NULL;
1704                 cork->base.flags &= ~IPCORK_ALLFRAG;
1705         }
1706         memset(&cork->fl, 0, sizeof(cork->fl));
1707 }
1708
1709 struct sk_buff *__ip6_make_skb(struct sock *sk,
1710                                struct sk_buff_head *queue,
1711                                struct inet_cork_full *cork,
1712                                struct inet6_cork *v6_cork)
1713 {
1714         struct sk_buff *skb, *tmp_skb;
1715         struct sk_buff **tail_skb;
1716         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1717         struct ipv6_pinfo *np = inet6_sk(sk);
1718         struct net *net = sock_net(sk);
1719         struct ipv6hdr *hdr;
1720         struct ipv6_txoptions *opt = v6_cork->opt;
1721         struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1722         struct flowi6 *fl6 = &cork->fl.u.ip6;
1723         unsigned char proto = fl6->flowi6_proto;
1724
1725         skb = __skb_dequeue(queue);
1726         if (!skb)
1727                 goto out;
1728         tail_skb = &(skb_shinfo(skb)->frag_list);
1729
1730         /* move skb->data to ip header from ext header */
1731         if (skb->data < skb_network_header(skb))
1732                 __skb_pull(skb, skb_network_offset(skb));
1733         while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1734                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1735                 *tail_skb = tmp_skb;
1736                 tail_skb = &(tmp_skb->next);
1737                 skb->len += tmp_skb->len;
1738                 skb->data_len += tmp_skb->len;
1739                 skb->truesize += tmp_skb->truesize;
1740                 tmp_skb->destructor = NULL;
1741                 tmp_skb->sk = NULL;
1742         }
1743
1744         /* Allow local fragmentation. */
1745         skb->ignore_df = ip6_sk_ignore_df(sk);
1746
1747         *final_dst = fl6->daddr;
1748         __skb_pull(skb, skb_network_header_len(skb));
1749         if (opt && opt->opt_flen)
1750                 ipv6_push_frag_opts(skb, opt, &proto);
1751         if (opt && opt->opt_nflen)
1752                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1753
1754         skb_push(skb, sizeof(struct ipv6hdr));
1755         skb_reset_network_header(skb);
1756         hdr = ipv6_hdr(skb);
1757
1758         ip6_flow_hdr(hdr, v6_cork->tclass,
1759                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
1760                                         ip6_autoflowlabel(net, np), fl6));
1761         hdr->hop_limit = v6_cork->hop_limit;
1762         hdr->nexthdr = proto;
1763         hdr->saddr = fl6->saddr;
1764         hdr->daddr = *final_dst;
1765
1766         skb->priority = sk->sk_priority;
1767         skb->mark = sk->sk_mark;
1768
1769         skb->tstamp = cork->base.transmit_time;
1770
1771         skb_dst_set(skb, dst_clone(&rt->dst));
1772         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1773         if (proto == IPPROTO_ICMPV6) {
1774                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1775
1776                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1777                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1778         }
1779
1780         ip6_cork_release(cork, v6_cork);
1781 out:
1782         return skb;
1783 }
1784
1785 int ip6_send_skb(struct sk_buff *skb)
1786 {
1787         struct net *net = sock_net(skb->sk);
1788         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1789         int err;
1790
1791         err = ip6_local_out(net, skb->sk, skb);
1792         if (err) {
1793                 if (err > 0)
1794                         err = net_xmit_errno(err);
1795                 if (err)
1796                         IP6_INC_STATS(net, rt->rt6i_idev,
1797                                       IPSTATS_MIB_OUTDISCARDS);
1798         }
1799
1800         return err;
1801 }
1802
1803 int ip6_push_pending_frames(struct sock *sk)
1804 {
1805         struct sk_buff *skb;
1806
1807         skb = ip6_finish_skb(sk);
1808         if (!skb)
1809                 return 0;
1810
1811         return ip6_send_skb(skb);
1812 }
1813 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1814
1815 static void __ip6_flush_pending_frames(struct sock *sk,
1816                                        struct sk_buff_head *queue,
1817                                        struct inet_cork_full *cork,
1818                                        struct inet6_cork *v6_cork)
1819 {
1820         struct sk_buff *skb;
1821
1822         while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1823                 if (skb_dst(skb))
1824                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1825                                       IPSTATS_MIB_OUTDISCARDS);
1826                 kfree_skb(skb);
1827         }
1828
1829         ip6_cork_release(cork, v6_cork);
1830 }
1831
1832 void ip6_flush_pending_frames(struct sock *sk)
1833 {
1834         __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1835                                    &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1836 }
1837 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1838
1839 struct sk_buff *ip6_make_skb(struct sock *sk,
1840                              int getfrag(void *from, char *to, int offset,
1841                                          int len, int odd, struct sk_buff *skb),
1842                              void *from, int length, int transhdrlen,
1843                              struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1844                              struct rt6_info *rt, unsigned int flags,
1845                              struct inet_cork_full *cork)
1846 {
1847         struct inet6_cork v6_cork;
1848         struct sk_buff_head queue;
1849         int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1850         int err;
1851
1852         if (flags & MSG_PROBE)
1853                 return NULL;
1854
1855         __skb_queue_head_init(&queue);
1856
1857         cork->base.flags = 0;
1858         cork->base.addr = 0;
1859         cork->base.opt = NULL;
1860         cork->base.dst = NULL;
1861         v6_cork.opt = NULL;
1862         err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1863         if (err) {
1864                 ip6_cork_release(cork, &v6_cork);
1865                 return ERR_PTR(err);
1866         }
1867         if (ipc6->dontfrag < 0)
1868                 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1869
1870         err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1871                                 &current->task_frag, getfrag, from,
1872                                 length + exthdrlen, transhdrlen + exthdrlen,
1873                                 flags, ipc6);
1874         if (err) {
1875                 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1876                 return ERR_PTR(err);
1877         }
1878
1879         return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1880 }