net: ip: avoid OOM kills with large UDP sends over loopback
[linux-2.6-microblaze.git] / net / ipv6 / ip6_output.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      IPv6 output functions
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on linux/net/ipv4/ip_output.c
10  *
11  *      Changes:
12  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
13  *                              extension headers are implemented.
14  *                              route changes now work.
15  *                              ip6_forward does not confuse sniffers.
16  *                              etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *      Imran Patel     :       frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *                      :       add ip6_append_data and related functions
22  *                              for datagram xmit
23  */
24
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41
42 #include <net/sock.h>
43 #include <net/snmp.h>
44
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 #include <net/ip_tunnels.h>
58
59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60 {
61         struct dst_entry *dst = skb_dst(skb);
62         struct net_device *dev = dst->dev;
63         const struct in6_addr *nexthop;
64         struct neighbour *neigh;
65         int ret;
66
67         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
68                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
69
70                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
71                     ((mroute6_is_socket(net, skb) &&
72                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
73                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
74                                          &ipv6_hdr(skb)->saddr))) {
75                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
76
77                         /* Do not check for IFF_ALLMULTI; multicast routing
78                            is not supported in any case.
79                          */
80                         if (newskb)
81                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
82                                         net, sk, newskb, NULL, newskb->dev,
83                                         dev_loopback_xmit);
84
85                         if (ipv6_hdr(skb)->hop_limit == 0) {
86                                 IP6_INC_STATS(net, idev,
87                                               IPSTATS_MIB_OUTDISCARDS);
88                                 kfree_skb(skb);
89                                 return 0;
90                         }
91                 }
92
93                 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
94
95                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
96                     IPV6_ADDR_SCOPE_NODELOCAL &&
97                     !(dev->flags & IFF_LOOPBACK)) {
98                         kfree_skb(skb);
99                         return 0;
100                 }
101         }
102
103         if (lwtunnel_xmit_redirect(dst->lwtstate)) {
104                 int res = lwtunnel_xmit(skb);
105
106                 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
107                         return res;
108         }
109
110         rcu_read_lock_bh();
111         nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
112         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
113         if (unlikely(!neigh))
114                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
115         if (!IS_ERR(neigh)) {
116                 sock_confirm_neigh(skb, neigh);
117                 ret = neigh_output(neigh, skb, false);
118                 rcu_read_unlock_bh();
119                 return ret;
120         }
121         rcu_read_unlock_bh();
122
123         IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
124         kfree_skb(skb);
125         return -EINVAL;
126 }
127
128 static int
129 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
130                                     struct sk_buff *skb, unsigned int mtu)
131 {
132         struct sk_buff *segs, *nskb;
133         netdev_features_t features;
134         int ret = 0;
135
136         /* Please see corresponding comment in ip_finish_output_gso
137          * describing the cases where GSO segment length exceeds the
138          * egress MTU.
139          */
140         features = netif_skb_features(skb);
141         segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
142         if (IS_ERR_OR_NULL(segs)) {
143                 kfree_skb(skb);
144                 return -ENOMEM;
145         }
146
147         consume_skb(skb);
148
149         skb_list_walk_safe(segs, segs, nskb) {
150                 int err;
151
152                 skb_mark_not_on_list(segs);
153                 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
154                 if (err && ret == 0)
155                         ret = err;
156         }
157
158         return ret;
159 }
160
161 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
162 {
163         unsigned int mtu;
164
165 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
166         /* Policy lookup after SNAT yielded a new policy */
167         if (skb_dst(skb)->xfrm) {
168                 IPCB(skb)->flags |= IPSKB_REROUTED;
169                 return dst_output(net, sk, skb);
170         }
171 #endif
172
173         mtu = ip6_skb_dst_mtu(skb);
174         if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
175                 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
176
177         if ((skb->len > mtu && !skb_is_gso(skb)) ||
178             dst_allfrag(skb_dst(skb)) ||
179             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
180                 return ip6_fragment(net, sk, skb, ip6_finish_output2);
181         else
182                 return ip6_finish_output2(net, sk, skb);
183 }
184
185 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
186 {
187         int ret;
188
189         ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
190         switch (ret) {
191         case NET_XMIT_SUCCESS:
192                 return __ip6_finish_output(net, sk, skb);
193         case NET_XMIT_CN:
194                 return __ip6_finish_output(net, sk, skb) ? : ret;
195         default:
196                 kfree_skb(skb);
197                 return ret;
198         }
199 }
200
201 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
202 {
203         struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
204         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
205
206         skb->protocol = htons(ETH_P_IPV6);
207         skb->dev = dev;
208
209         if (unlikely(idev->cnf.disable_ipv6)) {
210                 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
211                 kfree_skb(skb);
212                 return 0;
213         }
214
215         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
216                             net, sk, skb, indev, dev,
217                             ip6_finish_output,
218                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
219 }
220 EXPORT_SYMBOL(ip6_output);
221
222 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
223 {
224         if (!np->autoflowlabel_set)
225                 return ip6_default_np_autolabel(net);
226         else
227                 return np->autoflowlabel;
228 }
229
230 /*
231  * xmit an sk_buff (used by TCP, SCTP and DCCP)
232  * Note : socket lock is not held for SYNACK packets, but might be modified
233  * by calls to skb_set_owner_w() and ipv6_local_error(),
234  * which are using proper atomic operations or spinlocks.
235  */
236 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
237              __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
238 {
239         struct net *net = sock_net(sk);
240         const struct ipv6_pinfo *np = inet6_sk(sk);
241         struct in6_addr *first_hop = &fl6->daddr;
242         struct dst_entry *dst = skb_dst(skb);
243         unsigned int head_room;
244         struct ipv6hdr *hdr;
245         u8  proto = fl6->flowi6_proto;
246         int seg_len = skb->len;
247         int hlimit = -1;
248         u32 mtu;
249
250         head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
251         if (opt)
252                 head_room += opt->opt_nflen + opt->opt_flen;
253
254         if (unlikely(skb_headroom(skb) < head_room)) {
255                 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
256                 if (!skb2) {
257                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
258                                       IPSTATS_MIB_OUTDISCARDS);
259                         kfree_skb(skb);
260                         return -ENOBUFS;
261                 }
262                 if (skb->sk)
263                         skb_set_owner_w(skb2, skb->sk);
264                 consume_skb(skb);
265                 skb = skb2;
266         }
267
268         if (opt) {
269                 seg_len += opt->opt_nflen + opt->opt_flen;
270
271                 if (opt->opt_flen)
272                         ipv6_push_frag_opts(skb, opt, &proto);
273
274                 if (opt->opt_nflen)
275                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
276                                              &fl6->saddr);
277         }
278
279         skb_push(skb, sizeof(struct ipv6hdr));
280         skb_reset_network_header(skb);
281         hdr = ipv6_hdr(skb);
282
283         /*
284          *      Fill in the IPv6 header
285          */
286         if (np)
287                 hlimit = np->hop_limit;
288         if (hlimit < 0)
289                 hlimit = ip6_dst_hoplimit(dst);
290
291         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
292                                 ip6_autoflowlabel(net, np), fl6));
293
294         hdr->payload_len = htons(seg_len);
295         hdr->nexthdr = proto;
296         hdr->hop_limit = hlimit;
297
298         hdr->saddr = fl6->saddr;
299         hdr->daddr = *first_hop;
300
301         skb->protocol = htons(ETH_P_IPV6);
302         skb->priority = priority;
303         skb->mark = mark;
304
305         mtu = dst_mtu(dst);
306         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
307                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
308                               IPSTATS_MIB_OUT, skb->len);
309
310                 /* if egress device is enslaved to an L3 master device pass the
311                  * skb to its handler for processing
312                  */
313                 skb = l3mdev_ip6_out((struct sock *)sk, skb);
314                 if (unlikely(!skb))
315                         return 0;
316
317                 /* hooks should never assume socket lock is held.
318                  * we promote our socket to non const
319                  */
320                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
321                                net, (struct sock *)sk, skb, NULL, dst->dev,
322                                dst_output);
323         }
324
325         skb->dev = dst->dev;
326         /* ipv6_local_error() does not require socket lock,
327          * we promote our socket to non const
328          */
329         ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
330
331         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
332         kfree_skb(skb);
333         return -EMSGSIZE;
334 }
335 EXPORT_SYMBOL(ip6_xmit);
336
337 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
338 {
339         struct ip6_ra_chain *ra;
340         struct sock *last = NULL;
341
342         read_lock(&ip6_ra_lock);
343         for (ra = ip6_ra_chain; ra; ra = ra->next) {
344                 struct sock *sk = ra->sk;
345                 if (sk && ra->sel == sel &&
346                     (!sk->sk_bound_dev_if ||
347                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
348                         struct ipv6_pinfo *np = inet6_sk(sk);
349
350                         if (np && np->rtalert_isolate &&
351                             !net_eq(sock_net(sk), dev_net(skb->dev))) {
352                                 continue;
353                         }
354                         if (last) {
355                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
356                                 if (skb2)
357                                         rawv6_rcv(last, skb2);
358                         }
359                         last = sk;
360                 }
361         }
362
363         if (last) {
364                 rawv6_rcv(last, skb);
365                 read_unlock(&ip6_ra_lock);
366                 return 1;
367         }
368         read_unlock(&ip6_ra_lock);
369         return 0;
370 }
371
372 static int ip6_forward_proxy_check(struct sk_buff *skb)
373 {
374         struct ipv6hdr *hdr = ipv6_hdr(skb);
375         u8 nexthdr = hdr->nexthdr;
376         __be16 frag_off;
377         int offset;
378
379         if (ipv6_ext_hdr(nexthdr)) {
380                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
381                 if (offset < 0)
382                         return 0;
383         } else
384                 offset = sizeof(struct ipv6hdr);
385
386         if (nexthdr == IPPROTO_ICMPV6) {
387                 struct icmp6hdr *icmp6;
388
389                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
390                                          offset + 1 - skb->data)))
391                         return 0;
392
393                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
394
395                 switch (icmp6->icmp6_type) {
396                 case NDISC_ROUTER_SOLICITATION:
397                 case NDISC_ROUTER_ADVERTISEMENT:
398                 case NDISC_NEIGHBOUR_SOLICITATION:
399                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
400                 case NDISC_REDIRECT:
401                         /* For reaction involving unicast neighbor discovery
402                          * message destined to the proxied address, pass it to
403                          * input function.
404                          */
405                         return 1;
406                 default:
407                         break;
408                 }
409         }
410
411         /*
412          * The proxying router can't forward traffic sent to a link-local
413          * address, so signal the sender and discard the packet. This
414          * behavior is clarified by the MIPv6 specification.
415          */
416         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
417                 dst_link_failure(skb);
418                 return -1;
419         }
420
421         return 0;
422 }
423
424 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
425                                      struct sk_buff *skb)
426 {
427         struct dst_entry *dst = skb_dst(skb);
428
429         __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
430         __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
431
432 #ifdef CONFIG_NET_SWITCHDEV
433         if (skb->offload_l3_fwd_mark) {
434                 consume_skb(skb);
435                 return 0;
436         }
437 #endif
438
439         skb->tstamp = 0;
440         return dst_output(net, sk, skb);
441 }
442
443 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
444 {
445         if (skb->len <= mtu)
446                 return false;
447
448         /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
449         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
450                 return true;
451
452         if (skb->ignore_df)
453                 return false;
454
455         if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
456                 return false;
457
458         return true;
459 }
460
461 int ip6_forward(struct sk_buff *skb)
462 {
463         struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
464         struct dst_entry *dst = skb_dst(skb);
465         struct ipv6hdr *hdr = ipv6_hdr(skb);
466         struct inet6_skb_parm *opt = IP6CB(skb);
467         struct net *net = dev_net(dst->dev);
468         u32 mtu;
469
470         if (net->ipv6.devconf_all->forwarding == 0)
471                 goto error;
472
473         if (skb->pkt_type != PACKET_HOST)
474                 goto drop;
475
476         if (unlikely(skb->sk))
477                 goto drop;
478
479         if (skb_warn_if_lro(skb))
480                 goto drop;
481
482         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
483                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
484                 goto drop;
485         }
486
487         skb_forward_csum(skb);
488
489         /*
490          *      We DO NOT make any processing on
491          *      RA packets, pushing them to user level AS IS
492          *      without ane WARRANTY that application will be able
493          *      to interpret them. The reason is that we
494          *      cannot make anything clever here.
495          *
496          *      We are not end-node, so that if packet contains
497          *      AH/ESP, we cannot make anything.
498          *      Defragmentation also would be mistake, RA packets
499          *      cannot be fragmented, because there is no warranty
500          *      that different fragments will go along one path. --ANK
501          */
502         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
503                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
504                         return 0;
505         }
506
507         /*
508          *      check and decrement ttl
509          */
510         if (hdr->hop_limit <= 1) {
511                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
512                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
513
514                 kfree_skb(skb);
515                 return -ETIMEDOUT;
516         }
517
518         /* XXX: idev->cnf.proxy_ndp? */
519         if (net->ipv6.devconf_all->proxy_ndp &&
520             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
521                 int proxied = ip6_forward_proxy_check(skb);
522                 if (proxied > 0)
523                         return ip6_input(skb);
524                 else if (proxied < 0) {
525                         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
526                         goto drop;
527                 }
528         }
529
530         if (!xfrm6_route_forward(skb)) {
531                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
532                 goto drop;
533         }
534         dst = skb_dst(skb);
535
536         /* IPv6 specs say nothing about it, but it is clear that we cannot
537            send redirects to source routed frames.
538            We don't send redirects to frames decapsulated from IPsec.
539          */
540         if (IP6CB(skb)->iif == dst->dev->ifindex &&
541             opt->srcrt == 0 && !skb_sec_path(skb)) {
542                 struct in6_addr *target = NULL;
543                 struct inet_peer *peer;
544                 struct rt6_info *rt;
545
546                 /*
547                  *      incoming and outgoing devices are the same
548                  *      send a redirect.
549                  */
550
551                 rt = (struct rt6_info *) dst;
552                 if (rt->rt6i_flags & RTF_GATEWAY)
553                         target = &rt->rt6i_gateway;
554                 else
555                         target = &hdr->daddr;
556
557                 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
558
559                 /* Limit redirects both by destination (here)
560                    and by source (inside ndisc_send_redirect)
561                  */
562                 if (inet_peer_xrlim_allow(peer, 1*HZ))
563                         ndisc_send_redirect(skb, target);
564                 if (peer)
565                         inet_putpeer(peer);
566         } else {
567                 int addrtype = ipv6_addr_type(&hdr->saddr);
568
569                 /* This check is security critical. */
570                 if (addrtype == IPV6_ADDR_ANY ||
571                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
572                         goto error;
573                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
574                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
575                                     ICMPV6_NOT_NEIGHBOUR, 0);
576                         goto error;
577                 }
578         }
579
580         mtu = ip6_dst_mtu_forward(dst);
581         if (mtu < IPV6_MIN_MTU)
582                 mtu = IPV6_MIN_MTU;
583
584         if (ip6_pkt_too_big(skb, mtu)) {
585                 /* Again, force OUTPUT device used as source address */
586                 skb->dev = dst->dev;
587                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
588                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
589                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
590                                 IPSTATS_MIB_FRAGFAILS);
591                 kfree_skb(skb);
592                 return -EMSGSIZE;
593         }
594
595         if (skb_cow(skb, dst->dev->hard_header_len)) {
596                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
597                                 IPSTATS_MIB_OUTDISCARDS);
598                 goto drop;
599         }
600
601         hdr = ipv6_hdr(skb);
602
603         /* Mangling hops number delayed to point after skb COW */
604
605         hdr->hop_limit--;
606
607         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
608                        net, NULL, skb, skb->dev, dst->dev,
609                        ip6_forward_finish);
610
611 error:
612         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
613 drop:
614         kfree_skb(skb);
615         return -EINVAL;
616 }
617
618 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
619 {
620         to->pkt_type = from->pkt_type;
621         to->priority = from->priority;
622         to->protocol = from->protocol;
623         skb_dst_drop(to);
624         skb_dst_set(to, dst_clone(skb_dst(from)));
625         to->dev = from->dev;
626         to->mark = from->mark;
627
628         skb_copy_hash(to, from);
629
630 #ifdef CONFIG_NET_SCHED
631         to->tc_index = from->tc_index;
632 #endif
633         nf_copy(to, from);
634         skb_ext_copy(to, from);
635         skb_copy_secmark(to, from);
636 }
637
638 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
639                       u8 nexthdr, __be32 frag_id,
640                       struct ip6_fraglist_iter *iter)
641 {
642         unsigned int first_len;
643         struct frag_hdr *fh;
644
645         /* BUILD HEADER */
646         *prevhdr = NEXTHDR_FRAGMENT;
647         iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
648         if (!iter->tmp_hdr)
649                 return -ENOMEM;
650
651         iter->frag = skb_shinfo(skb)->frag_list;
652         skb_frag_list_init(skb);
653
654         iter->offset = 0;
655         iter->hlen = hlen;
656         iter->frag_id = frag_id;
657         iter->nexthdr = nexthdr;
658
659         __skb_pull(skb, hlen);
660         fh = __skb_push(skb, sizeof(struct frag_hdr));
661         __skb_push(skb, hlen);
662         skb_reset_network_header(skb);
663         memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
664
665         fh->nexthdr = nexthdr;
666         fh->reserved = 0;
667         fh->frag_off = htons(IP6_MF);
668         fh->identification = frag_id;
669
670         first_len = skb_pagelen(skb);
671         skb->data_len = first_len - skb_headlen(skb);
672         skb->len = first_len;
673         ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
674
675         return 0;
676 }
677 EXPORT_SYMBOL(ip6_fraglist_init);
678
679 void ip6_fraglist_prepare(struct sk_buff *skb,
680                           struct ip6_fraglist_iter *iter)
681 {
682         struct sk_buff *frag = iter->frag;
683         unsigned int hlen = iter->hlen;
684         struct frag_hdr *fh;
685
686         frag->ip_summed = CHECKSUM_NONE;
687         skb_reset_transport_header(frag);
688         fh = __skb_push(frag, sizeof(struct frag_hdr));
689         __skb_push(frag, hlen);
690         skb_reset_network_header(frag);
691         memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
692         iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
693         fh->nexthdr = iter->nexthdr;
694         fh->reserved = 0;
695         fh->frag_off = htons(iter->offset);
696         if (frag->next)
697                 fh->frag_off |= htons(IP6_MF);
698         fh->identification = iter->frag_id;
699         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
700         ip6_copy_metadata(frag, skb);
701 }
702 EXPORT_SYMBOL(ip6_fraglist_prepare);
703
704 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
705                    unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
706                    u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
707 {
708         state->prevhdr = prevhdr;
709         state->nexthdr = nexthdr;
710         state->frag_id = frag_id;
711
712         state->hlen = hlen;
713         state->mtu = mtu;
714
715         state->left = skb->len - hlen;  /* Space per frame */
716         state->ptr = hlen;              /* Where to start from */
717
718         state->hroom = hdr_room;
719         state->troom = needed_tailroom;
720
721         state->offset = 0;
722 }
723 EXPORT_SYMBOL(ip6_frag_init);
724
725 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
726 {
727         u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
728         struct sk_buff *frag;
729         struct frag_hdr *fh;
730         unsigned int len;
731
732         len = state->left;
733         /* IF: it doesn't fit, use 'mtu' - the data space left */
734         if (len > state->mtu)
735                 len = state->mtu;
736         /* IF: we are not sending up to and including the packet end
737            then align the next start on an eight byte boundary */
738         if (len < state->left)
739                 len &= ~7;
740
741         /* Allocate buffer */
742         frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
743                          state->hroom + state->troom, GFP_ATOMIC);
744         if (!frag)
745                 return ERR_PTR(-ENOMEM);
746
747         /*
748          *      Set up data on packet
749          */
750
751         ip6_copy_metadata(frag, skb);
752         skb_reserve(frag, state->hroom);
753         skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
754         skb_reset_network_header(frag);
755         fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
756         frag->transport_header = (frag->network_header + state->hlen +
757                                   sizeof(struct frag_hdr));
758
759         /*
760          *      Charge the memory for the fragment to any owner
761          *      it might possess
762          */
763         if (skb->sk)
764                 skb_set_owner_w(frag, skb->sk);
765
766         /*
767          *      Copy the packet header into the new buffer.
768          */
769         skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
770
771         fragnexthdr_offset = skb_network_header(frag);
772         fragnexthdr_offset += prevhdr - skb_network_header(skb);
773         *fragnexthdr_offset = NEXTHDR_FRAGMENT;
774
775         /*
776          *      Build fragment header.
777          */
778         fh->nexthdr = state->nexthdr;
779         fh->reserved = 0;
780         fh->identification = state->frag_id;
781
782         /*
783          *      Copy a block of the IP datagram.
784          */
785         BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
786                              len));
787         state->left -= len;
788
789         fh->frag_off = htons(state->offset);
790         if (state->left > 0)
791                 fh->frag_off |= htons(IP6_MF);
792         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
793
794         state->ptr += len;
795         state->offset += len;
796
797         return frag;
798 }
799 EXPORT_SYMBOL(ip6_frag_next);
800
801 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
802                  int (*output)(struct net *, struct sock *, struct sk_buff *))
803 {
804         struct sk_buff *frag;
805         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
806         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
807                                 inet6_sk(skb->sk) : NULL;
808         struct ip6_frag_state state;
809         unsigned int mtu, hlen, nexthdr_offset;
810         ktime_t tstamp = skb->tstamp;
811         int hroom, err = 0;
812         __be32 frag_id;
813         u8 *prevhdr, nexthdr = 0;
814
815         err = ip6_find_1stfragopt(skb, &prevhdr);
816         if (err < 0)
817                 goto fail;
818         hlen = err;
819         nexthdr = *prevhdr;
820         nexthdr_offset = prevhdr - skb_network_header(skb);
821
822         mtu = ip6_skb_dst_mtu(skb);
823
824         /* We must not fragment if the socket is set to force MTU discovery
825          * or if the skb it not generated by a local socket.
826          */
827         if (unlikely(!skb->ignore_df && skb->len > mtu))
828                 goto fail_toobig;
829
830         if (IP6CB(skb)->frag_max_size) {
831                 if (IP6CB(skb)->frag_max_size > mtu)
832                         goto fail_toobig;
833
834                 /* don't send fragments larger than what we received */
835                 mtu = IP6CB(skb)->frag_max_size;
836                 if (mtu < IPV6_MIN_MTU)
837                         mtu = IPV6_MIN_MTU;
838         }
839
840         if (np && np->frag_size < mtu) {
841                 if (np->frag_size)
842                         mtu = np->frag_size;
843         }
844         if (mtu < hlen + sizeof(struct frag_hdr) + 8)
845                 goto fail_toobig;
846         mtu -= hlen + sizeof(struct frag_hdr);
847
848         frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
849                                     &ipv6_hdr(skb)->saddr);
850
851         if (skb->ip_summed == CHECKSUM_PARTIAL &&
852             (err = skb_checksum_help(skb)))
853                 goto fail;
854
855         prevhdr = skb_network_header(skb) + nexthdr_offset;
856         hroom = LL_RESERVED_SPACE(rt->dst.dev);
857         if (skb_has_frag_list(skb)) {
858                 unsigned int first_len = skb_pagelen(skb);
859                 struct ip6_fraglist_iter iter;
860                 struct sk_buff *frag2;
861
862                 if (first_len - hlen > mtu ||
863                     ((first_len - hlen) & 7) ||
864                     skb_cloned(skb) ||
865                     skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
866                         goto slow_path;
867
868                 skb_walk_frags(skb, frag) {
869                         /* Correct geometry. */
870                         if (frag->len > mtu ||
871                             ((frag->len & 7) && frag->next) ||
872                             skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
873                                 goto slow_path_clean;
874
875                         /* Partially cloned skb? */
876                         if (skb_shared(frag))
877                                 goto slow_path_clean;
878
879                         BUG_ON(frag->sk);
880                         if (skb->sk) {
881                                 frag->sk = skb->sk;
882                                 frag->destructor = sock_wfree;
883                         }
884                         skb->truesize -= frag->truesize;
885                 }
886
887                 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
888                                         &iter);
889                 if (err < 0)
890                         goto fail;
891
892                 for (;;) {
893                         /* Prepare header of the next frame,
894                          * before previous one went down. */
895                         if (iter.frag)
896                                 ip6_fraglist_prepare(skb, &iter);
897
898                         skb->tstamp = tstamp;
899                         err = output(net, sk, skb);
900                         if (!err)
901                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
902                                               IPSTATS_MIB_FRAGCREATES);
903
904                         if (err || !iter.frag)
905                                 break;
906
907                         skb = ip6_fraglist_next(&iter);
908                 }
909
910                 kfree(iter.tmp_hdr);
911
912                 if (err == 0) {
913                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
914                                       IPSTATS_MIB_FRAGOKS);
915                         return 0;
916                 }
917
918                 kfree_skb_list(iter.frag);
919
920                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
921                               IPSTATS_MIB_FRAGFAILS);
922                 return err;
923
924 slow_path_clean:
925                 skb_walk_frags(skb, frag2) {
926                         if (frag2 == frag)
927                                 break;
928                         frag2->sk = NULL;
929                         frag2->destructor = NULL;
930                         skb->truesize += frag2->truesize;
931                 }
932         }
933
934 slow_path:
935         /*
936          *      Fragment the datagram.
937          */
938
939         ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
940                       LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
941                       &state);
942
943         /*
944          *      Keep copying data until we run out.
945          */
946
947         while (state.left > 0) {
948                 frag = ip6_frag_next(skb, &state);
949                 if (IS_ERR(frag)) {
950                         err = PTR_ERR(frag);
951                         goto fail;
952                 }
953
954                 /*
955                  *      Put this fragment into the sending queue.
956                  */
957                 frag->tstamp = tstamp;
958                 err = output(net, sk, frag);
959                 if (err)
960                         goto fail;
961
962                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
963                               IPSTATS_MIB_FRAGCREATES);
964         }
965         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
966                       IPSTATS_MIB_FRAGOKS);
967         consume_skb(skb);
968         return err;
969
970 fail_toobig:
971         if (skb->sk && dst_allfrag(skb_dst(skb)))
972                 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
973
974         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
975         err = -EMSGSIZE;
976
977 fail:
978         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
979                       IPSTATS_MIB_FRAGFAILS);
980         kfree_skb(skb);
981         return err;
982 }
983
984 static inline int ip6_rt_check(const struct rt6key *rt_key,
985                                const struct in6_addr *fl_addr,
986                                const struct in6_addr *addr_cache)
987 {
988         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
989                 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
990 }
991
992 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
993                                           struct dst_entry *dst,
994                                           const struct flowi6 *fl6)
995 {
996         struct ipv6_pinfo *np = inet6_sk(sk);
997         struct rt6_info *rt;
998
999         if (!dst)
1000                 goto out;
1001
1002         if (dst->ops->family != AF_INET6) {
1003                 dst_release(dst);
1004                 return NULL;
1005         }
1006
1007         rt = (struct rt6_info *)dst;
1008         /* Yes, checking route validity in not connected
1009          * case is not very simple. Take into account,
1010          * that we do not support routing by source, TOS,
1011          * and MSG_DONTROUTE            --ANK (980726)
1012          *
1013          * 1. ip6_rt_check(): If route was host route,
1014          *    check that cached destination is current.
1015          *    If it is network route, we still may
1016          *    check its validity using saved pointer
1017          *    to the last used address: daddr_cache.
1018          *    We do not want to save whole address now,
1019          *    (because main consumer of this service
1020          *    is tcp, which has not this problem),
1021          *    so that the last trick works only on connected
1022          *    sockets.
1023          * 2. oif also should be the same.
1024          */
1025         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1026 #ifdef CONFIG_IPV6_SUBTREES
1027             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1028 #endif
1029            (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1030               (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1031                 dst_release(dst);
1032                 dst = NULL;
1033         }
1034
1035 out:
1036         return dst;
1037 }
1038
1039 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1040                                struct dst_entry **dst, struct flowi6 *fl6)
1041 {
1042 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1043         struct neighbour *n;
1044         struct rt6_info *rt;
1045 #endif
1046         int err;
1047         int flags = 0;
1048
1049         /* The correct way to handle this would be to do
1050          * ip6_route_get_saddr, and then ip6_route_output; however,
1051          * the route-specific preferred source forces the
1052          * ip6_route_output call _before_ ip6_route_get_saddr.
1053          *
1054          * In source specific routing (no src=any default route),
1055          * ip6_route_output will fail given src=any saddr, though, so
1056          * that's why we try it again later.
1057          */
1058         if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1059                 struct fib6_info *from;
1060                 struct rt6_info *rt;
1061                 bool had_dst = *dst != NULL;
1062
1063                 if (!had_dst)
1064                         *dst = ip6_route_output(net, sk, fl6);
1065                 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1066
1067                 rcu_read_lock();
1068                 from = rt ? rcu_dereference(rt->from) : NULL;
1069                 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1070                                           sk ? inet6_sk(sk)->srcprefs : 0,
1071                                           &fl6->saddr);
1072                 rcu_read_unlock();
1073
1074                 if (err)
1075                         goto out_err_release;
1076
1077                 /* If we had an erroneous initial result, pretend it
1078                  * never existed and let the SA-enabled version take
1079                  * over.
1080                  */
1081                 if (!had_dst && (*dst)->error) {
1082                         dst_release(*dst);
1083                         *dst = NULL;
1084                 }
1085
1086                 if (fl6->flowi6_oif)
1087                         flags |= RT6_LOOKUP_F_IFACE;
1088         }
1089
1090         if (!*dst)
1091                 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1092
1093         err = (*dst)->error;
1094         if (err)
1095                 goto out_err_release;
1096
1097 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1098         /*
1099          * Here if the dst entry we've looked up
1100          * has a neighbour entry that is in the INCOMPLETE
1101          * state and the src address from the flow is
1102          * marked as OPTIMISTIC, we release the found
1103          * dst entry and replace it instead with the
1104          * dst entry of the nexthop router
1105          */
1106         rt = (struct rt6_info *) *dst;
1107         rcu_read_lock_bh();
1108         n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1109                                       rt6_nexthop(rt, &fl6->daddr));
1110         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1111         rcu_read_unlock_bh();
1112
1113         if (err) {
1114                 struct inet6_ifaddr *ifp;
1115                 struct flowi6 fl_gw6;
1116                 int redirect;
1117
1118                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1119                                       (*dst)->dev, 1);
1120
1121                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1122                 if (ifp)
1123                         in6_ifa_put(ifp);
1124
1125                 if (redirect) {
1126                         /*
1127                          * We need to get the dst entry for the
1128                          * default router instead
1129                          */
1130                         dst_release(*dst);
1131                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1132                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1133                         *dst = ip6_route_output(net, sk, &fl_gw6);
1134                         err = (*dst)->error;
1135                         if (err)
1136                                 goto out_err_release;
1137                 }
1138         }
1139 #endif
1140         if (ipv6_addr_v4mapped(&fl6->saddr) &&
1141             !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1142                 err = -EAFNOSUPPORT;
1143                 goto out_err_release;
1144         }
1145
1146         return 0;
1147
1148 out_err_release:
1149         dst_release(*dst);
1150         *dst = NULL;
1151
1152         if (err == -ENETUNREACH)
1153                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1154         return err;
1155 }
1156
1157 /**
1158  *      ip6_dst_lookup - perform route lookup on flow
1159  *      @net: Network namespace to perform lookup in
1160  *      @sk: socket which provides route info
1161  *      @dst: pointer to dst_entry * for result
1162  *      @fl6: flow to lookup
1163  *
1164  *      This function performs a route lookup on the given flow.
1165  *
1166  *      It returns zero on success, or a standard errno code on error.
1167  */
1168 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1169                    struct flowi6 *fl6)
1170 {
1171         *dst = NULL;
1172         return ip6_dst_lookup_tail(net, sk, dst, fl6);
1173 }
1174 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1175
1176 /**
1177  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1178  *      @net: Network namespace to perform lookup in
1179  *      @sk: socket which provides route info
1180  *      @fl6: flow to lookup
1181  *      @final_dst: final destination address for ipsec lookup
1182  *
1183  *      This function performs a route lookup on the given flow.
1184  *
1185  *      It returns a valid dst pointer on success, or a pointer encoded
1186  *      error code.
1187  */
1188 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1189                                       const struct in6_addr *final_dst)
1190 {
1191         struct dst_entry *dst = NULL;
1192         int err;
1193
1194         err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1195         if (err)
1196                 return ERR_PTR(err);
1197         if (final_dst)
1198                 fl6->daddr = *final_dst;
1199
1200         return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1201 }
1202 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1203
1204 /**
1205  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1206  *      @sk: socket which provides the dst cache and route info
1207  *      @fl6: flow to lookup
1208  *      @final_dst: final destination address for ipsec lookup
1209  *      @connected: whether @sk is connected or not
1210  *
1211  *      This function performs a route lookup on the given flow with the
1212  *      possibility of using the cached route in the socket if it is valid.
1213  *      It will take the socket dst lock when operating on the dst cache.
1214  *      As a result, this function can only be used in process context.
1215  *
1216  *      In addition, for a connected socket, cache the dst in the socket
1217  *      if the current cache is not valid.
1218  *
1219  *      It returns a valid dst pointer on success, or a pointer encoded
1220  *      error code.
1221  */
1222 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1223                                          const struct in6_addr *final_dst,
1224                                          bool connected)
1225 {
1226         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1227
1228         dst = ip6_sk_dst_check(sk, dst, fl6);
1229         if (dst)
1230                 return dst;
1231
1232         dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1233         if (connected && !IS_ERR(dst))
1234                 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1235
1236         return dst;
1237 }
1238 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1239
1240 /**
1241  *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1242  *      @skb: Packet for which lookup is done
1243  *      @dev: Tunnel device
1244  *      @net: Network namespace of tunnel device
1245  *      @sock: Socket which provides route info
1246  *      @saddr: Memory to store the src ip address
1247  *      @info: Tunnel information
1248  *      @protocol: IP protocol
1249  *      @use_cache: Flag to enable cache usage
1250  *      This function performs a route lookup on a tunnel
1251  *
1252  *      It returns a valid dst pointer and stores src address to be used in
1253  *      tunnel in param saddr on success, else a pointer encoded error code.
1254  */
1255
1256 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1257                                         struct net_device *dev,
1258                                         struct net *net,
1259                                         struct socket *sock,
1260                                         struct in6_addr *saddr,
1261                                         const struct ip_tunnel_info *info,
1262                                         u8 protocol,
1263                                         bool use_cache)
1264 {
1265         struct dst_entry *dst = NULL;
1266 #ifdef CONFIG_DST_CACHE
1267         struct dst_cache *dst_cache;
1268 #endif
1269         struct flowi6 fl6;
1270         __u8 prio;
1271
1272 #ifdef CONFIG_DST_CACHE
1273         dst_cache = (struct dst_cache *)&info->dst_cache;
1274         if (use_cache) {
1275                 dst = dst_cache_get_ip6(dst_cache, saddr);
1276                 if (dst)
1277                         return dst;
1278         }
1279 #endif
1280         memset(&fl6, 0, sizeof(fl6));
1281         fl6.flowi6_mark = skb->mark;
1282         fl6.flowi6_proto = protocol;
1283         fl6.daddr = info->key.u.ipv6.dst;
1284         fl6.saddr = info->key.u.ipv6.src;
1285         prio = info->key.tos;
1286         fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1287                                           info->key.label);
1288
1289         dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1290                                               NULL);
1291         if (IS_ERR(dst)) {
1292                 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1293                 return ERR_PTR(-ENETUNREACH);
1294         }
1295         if (dst->dev == dev) { /* is this necessary? */
1296                 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1297                 dst_release(dst);
1298                 return ERR_PTR(-ELOOP);
1299         }
1300 #ifdef CONFIG_DST_CACHE
1301         if (use_cache)
1302                 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1303 #endif
1304         *saddr = fl6.saddr;
1305         return dst;
1306 }
1307 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1308
1309 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1310                                                gfp_t gfp)
1311 {
1312         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1313 }
1314
1315 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1316                                                 gfp_t gfp)
1317 {
1318         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1319 }
1320
1321 static void ip6_append_data_mtu(unsigned int *mtu,
1322                                 int *maxfraglen,
1323                                 unsigned int fragheaderlen,
1324                                 struct sk_buff *skb,
1325                                 struct rt6_info *rt,
1326                                 unsigned int orig_mtu)
1327 {
1328         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1329                 if (!skb) {
1330                         /* first fragment, reserve header_len */
1331                         *mtu = orig_mtu - rt->dst.header_len;
1332
1333                 } else {
1334                         /*
1335                          * this fragment is not first, the headers
1336                          * space is regarded as data space.
1337                          */
1338                         *mtu = orig_mtu;
1339                 }
1340                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1341                               + fragheaderlen - sizeof(struct frag_hdr);
1342         }
1343 }
1344
1345 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1346                           struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1347                           struct rt6_info *rt, struct flowi6 *fl6)
1348 {
1349         struct ipv6_pinfo *np = inet6_sk(sk);
1350         unsigned int mtu;
1351         struct ipv6_txoptions *opt = ipc6->opt;
1352
1353         /*
1354          * setup for corking
1355          */
1356         if (opt) {
1357                 if (WARN_ON(v6_cork->opt))
1358                         return -EINVAL;
1359
1360                 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1361                 if (unlikely(!v6_cork->opt))
1362                         return -ENOBUFS;
1363
1364                 v6_cork->opt->tot_len = sizeof(*opt);
1365                 v6_cork->opt->opt_flen = opt->opt_flen;
1366                 v6_cork->opt->opt_nflen = opt->opt_nflen;
1367
1368                 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1369                                                     sk->sk_allocation);
1370                 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1371                         return -ENOBUFS;
1372
1373                 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1374                                                     sk->sk_allocation);
1375                 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1376                         return -ENOBUFS;
1377
1378                 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1379                                                    sk->sk_allocation);
1380                 if (opt->hopopt && !v6_cork->opt->hopopt)
1381                         return -ENOBUFS;
1382
1383                 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1384                                                     sk->sk_allocation);
1385                 if (opt->srcrt && !v6_cork->opt->srcrt)
1386                         return -ENOBUFS;
1387
1388                 /* need source address above miyazawa*/
1389         }
1390         dst_hold(&rt->dst);
1391         cork->base.dst = &rt->dst;
1392         cork->fl.u.ip6 = *fl6;
1393         v6_cork->hop_limit = ipc6->hlimit;
1394         v6_cork->tclass = ipc6->tclass;
1395         if (rt->dst.flags & DST_XFRM_TUNNEL)
1396                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1397                       READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1398         else
1399                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1400                         READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1401         if (np->frag_size < mtu) {
1402                 if (np->frag_size)
1403                         mtu = np->frag_size;
1404         }
1405         if (mtu < IPV6_MIN_MTU)
1406                 return -EINVAL;
1407         cork->base.fragsize = mtu;
1408         cork->base.gso_size = ipc6->gso_size;
1409         cork->base.tx_flags = 0;
1410         cork->base.mark = ipc6->sockc.mark;
1411         sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1412
1413         if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1414                 cork->base.flags |= IPCORK_ALLFRAG;
1415         cork->base.length = 0;
1416
1417         cork->base.transmit_time = ipc6->sockc.transmit_time;
1418
1419         return 0;
1420 }
1421
1422 static int __ip6_append_data(struct sock *sk,
1423                              struct flowi6 *fl6,
1424                              struct sk_buff_head *queue,
1425                              struct inet_cork *cork,
1426                              struct inet6_cork *v6_cork,
1427                              struct page_frag *pfrag,
1428                              int getfrag(void *from, char *to, int offset,
1429                                          int len, int odd, struct sk_buff *skb),
1430                              void *from, int length, int transhdrlen,
1431                              unsigned int flags, struct ipcm6_cookie *ipc6)
1432 {
1433         struct sk_buff *skb, *skb_prev = NULL;
1434         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1435         struct ubuf_info *uarg = NULL;
1436         int exthdrlen = 0;
1437         int dst_exthdrlen = 0;
1438         int hh_len;
1439         int copy;
1440         int err;
1441         int offset = 0;
1442         u32 tskey = 0;
1443         struct rt6_info *rt = (struct rt6_info *)cork->dst;
1444         struct ipv6_txoptions *opt = v6_cork->opt;
1445         int csummode = CHECKSUM_NONE;
1446         unsigned int maxnonfragsize, headersize;
1447         unsigned int wmem_alloc_delta = 0;
1448         bool paged, extra_uref = false;
1449
1450         skb = skb_peek_tail(queue);
1451         if (!skb) {
1452                 exthdrlen = opt ? opt->opt_flen : 0;
1453                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1454         }
1455
1456         paged = !!cork->gso_size;
1457         mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1458         orig_mtu = mtu;
1459
1460         if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1461             sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1462                 tskey = sk->sk_tskey++;
1463
1464         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1465
1466         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1467                         (opt ? opt->opt_nflen : 0);
1468         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1469                      sizeof(struct frag_hdr);
1470
1471         headersize = sizeof(struct ipv6hdr) +
1472                      (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1473                      (dst_allfrag(&rt->dst) ?
1474                       sizeof(struct frag_hdr) : 0) +
1475                      rt->rt6i_nfheader_len;
1476
1477         /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1478          * the first fragment
1479          */
1480         if (headersize + transhdrlen > mtu)
1481                 goto emsgsize;
1482
1483         if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1484             (sk->sk_protocol == IPPROTO_UDP ||
1485              sk->sk_protocol == IPPROTO_RAW)) {
1486                 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1487                                 sizeof(struct ipv6hdr));
1488                 goto emsgsize;
1489         }
1490
1491         if (ip6_sk_ignore_df(sk))
1492                 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1493         else
1494                 maxnonfragsize = mtu;
1495
1496         if (cork->length + length > maxnonfragsize - headersize) {
1497 emsgsize:
1498                 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1499                 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1500                 return -EMSGSIZE;
1501         }
1502
1503         /* CHECKSUM_PARTIAL only with no extension headers and when
1504          * we are not going to fragment
1505          */
1506         if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1507             headersize == sizeof(struct ipv6hdr) &&
1508             length <= mtu - headersize &&
1509             (!(flags & MSG_MORE) || cork->gso_size) &&
1510             rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1511                 csummode = CHECKSUM_PARTIAL;
1512
1513         if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1514                 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
1515                 if (!uarg)
1516                         return -ENOBUFS;
1517                 extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1518                 if (rt->dst.dev->features & NETIF_F_SG &&
1519                     csummode == CHECKSUM_PARTIAL) {
1520                         paged = true;
1521                 } else {
1522                         uarg->zerocopy = 0;
1523                         skb_zcopy_set(skb, uarg, &extra_uref);
1524                 }
1525         }
1526
1527         /*
1528          * Let's try using as much space as possible.
1529          * Use MTU if total length of the message fits into the MTU.
1530          * Otherwise, we need to reserve fragment header and
1531          * fragment alignment (= 8-15 octects, in total).
1532          *
1533          * Note that we may need to "move" the data from the tail
1534          * of the buffer to the new fragment when we split
1535          * the message.
1536          *
1537          * FIXME: It may be fragmented into multiple chunks
1538          *        at once if non-fragmentable extension headers
1539          *        are too large.
1540          * --yoshfuji
1541          */
1542
1543         cork->length += length;
1544         if (!skb)
1545                 goto alloc_new_skb;
1546
1547         while (length > 0) {
1548                 /* Check if the remaining data fits into current packet. */
1549                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1550                 if (copy < length)
1551                         copy = maxfraglen - skb->len;
1552
1553                 if (copy <= 0) {
1554                         char *data;
1555                         unsigned int datalen;
1556                         unsigned int fraglen;
1557                         unsigned int fraggap;
1558                         unsigned int alloclen, alloc_extra;
1559                         unsigned int pagedlen;
1560 alloc_new_skb:
1561                         /* There's no room in the current skb */
1562                         if (skb)
1563                                 fraggap = skb->len - maxfraglen;
1564                         else
1565                                 fraggap = 0;
1566                         /* update mtu and maxfraglen if necessary */
1567                         if (!skb || !skb_prev)
1568                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1569                                                     fragheaderlen, skb, rt,
1570                                                     orig_mtu);
1571
1572                         skb_prev = skb;
1573
1574                         /*
1575                          * If remaining data exceeds the mtu,
1576                          * we know we need more fragment(s).
1577                          */
1578                         datalen = length + fraggap;
1579
1580                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1581                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1582                         fraglen = datalen + fragheaderlen;
1583                         pagedlen = 0;
1584
1585                         alloc_extra = hh_len;
1586                         alloc_extra += dst_exthdrlen;
1587                         alloc_extra += rt->dst.trailer_len;
1588
1589                         /* We just reserve space for fragment header.
1590                          * Note: this may be overallocation if the message
1591                          * (without MSG_MORE) fits into the MTU.
1592                          */
1593                         alloc_extra += sizeof(struct frag_hdr);
1594
1595                         if ((flags & MSG_MORE) &&
1596                             !(rt->dst.dev->features&NETIF_F_SG))
1597                                 alloclen = mtu;
1598                         else if (!paged &&
1599                                  (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1600                                   !(rt->dst.dev->features & NETIF_F_SG)))
1601                                 alloclen = fraglen;
1602                         else {
1603                                 alloclen = min_t(int, fraglen, MAX_HEADER);
1604                                 pagedlen = fraglen - alloclen;
1605                         }
1606                         alloclen += alloc_extra;
1607
1608                         if (datalen != length + fraggap) {
1609                                 /*
1610                                  * this is not the last fragment, the trailer
1611                                  * space is regarded as data space.
1612                                  */
1613                                 datalen += rt->dst.trailer_len;
1614                         }
1615
1616                         fraglen = datalen + fragheaderlen;
1617
1618                         copy = datalen - transhdrlen - fraggap - pagedlen;
1619                         if (copy < 0) {
1620                                 err = -EINVAL;
1621                                 goto error;
1622                         }
1623                         if (transhdrlen) {
1624                                 skb = sock_alloc_send_skb(sk, alloclen,
1625                                                 (flags & MSG_DONTWAIT), &err);
1626                         } else {
1627                                 skb = NULL;
1628                                 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1629                                     2 * sk->sk_sndbuf)
1630                                         skb = alloc_skb(alloclen,
1631                                                         sk->sk_allocation);
1632                                 if (unlikely(!skb))
1633                                         err = -ENOBUFS;
1634                         }
1635                         if (!skb)
1636                                 goto error;
1637                         /*
1638                          *      Fill in the control structures
1639                          */
1640                         skb->protocol = htons(ETH_P_IPV6);
1641                         skb->ip_summed = csummode;
1642                         skb->csum = 0;
1643                         /* reserve for fragmentation and ipsec header */
1644                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1645                                     dst_exthdrlen);
1646
1647                         /*
1648                          *      Find where to start putting bytes
1649                          */
1650                         data = skb_put(skb, fraglen - pagedlen);
1651                         skb_set_network_header(skb, exthdrlen);
1652                         data += fragheaderlen;
1653                         skb->transport_header = (skb->network_header +
1654                                                  fragheaderlen);
1655                         if (fraggap) {
1656                                 skb->csum = skb_copy_and_csum_bits(
1657                                         skb_prev, maxfraglen,
1658                                         data + transhdrlen, fraggap);
1659                                 skb_prev->csum = csum_sub(skb_prev->csum,
1660                                                           skb->csum);
1661                                 data += fraggap;
1662                                 pskb_trim_unique(skb_prev, maxfraglen);
1663                         }
1664                         if (copy > 0 &&
1665                             getfrag(from, data + transhdrlen, offset,
1666                                     copy, fraggap, skb) < 0) {
1667                                 err = -EFAULT;
1668                                 kfree_skb(skb);
1669                                 goto error;
1670                         }
1671
1672                         offset += copy;
1673                         length -= copy + transhdrlen;
1674                         transhdrlen = 0;
1675                         exthdrlen = 0;
1676                         dst_exthdrlen = 0;
1677
1678                         /* Only the initial fragment is time stamped */
1679                         skb_shinfo(skb)->tx_flags = cork->tx_flags;
1680                         cork->tx_flags = 0;
1681                         skb_shinfo(skb)->tskey = tskey;
1682                         tskey = 0;
1683                         skb_zcopy_set(skb, uarg, &extra_uref);
1684
1685                         if ((flags & MSG_CONFIRM) && !skb_prev)
1686                                 skb_set_dst_pending_confirm(skb, 1);
1687
1688                         /*
1689                          * Put the packet on the pending queue
1690                          */
1691                         if (!skb->destructor) {
1692                                 skb->destructor = sock_wfree;
1693                                 skb->sk = sk;
1694                                 wmem_alloc_delta += skb->truesize;
1695                         }
1696                         __skb_queue_tail(queue, skb);
1697                         continue;
1698                 }
1699
1700                 if (copy > length)
1701                         copy = length;
1702
1703                 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1704                     skb_tailroom(skb) >= copy) {
1705                         unsigned int off;
1706
1707                         off = skb->len;
1708                         if (getfrag(from, skb_put(skb, copy),
1709                                                 offset, copy, off, skb) < 0) {
1710                                 __skb_trim(skb, off);
1711                                 err = -EFAULT;
1712                                 goto error;
1713                         }
1714                 } else if (!uarg || !uarg->zerocopy) {
1715                         int i = skb_shinfo(skb)->nr_frags;
1716
1717                         err = -ENOMEM;
1718                         if (!sk_page_frag_refill(sk, pfrag))
1719                                 goto error;
1720
1721                         if (!skb_can_coalesce(skb, i, pfrag->page,
1722                                               pfrag->offset)) {
1723                                 err = -EMSGSIZE;
1724                                 if (i == MAX_SKB_FRAGS)
1725                                         goto error;
1726
1727                                 __skb_fill_page_desc(skb, i, pfrag->page,
1728                                                      pfrag->offset, 0);
1729                                 skb_shinfo(skb)->nr_frags = ++i;
1730                                 get_page(pfrag->page);
1731                         }
1732                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1733                         if (getfrag(from,
1734                                     page_address(pfrag->page) + pfrag->offset,
1735                                     offset, copy, skb->len, skb) < 0)
1736                                 goto error_efault;
1737
1738                         pfrag->offset += copy;
1739                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1740                         skb->len += copy;
1741                         skb->data_len += copy;
1742                         skb->truesize += copy;
1743                         wmem_alloc_delta += copy;
1744                 } else {
1745                         err = skb_zerocopy_iter_dgram(skb, from, copy);
1746                         if (err < 0)
1747                                 goto error;
1748                 }
1749                 offset += copy;
1750                 length -= copy;
1751         }
1752
1753         if (wmem_alloc_delta)
1754                 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1755         return 0;
1756
1757 error_efault:
1758         err = -EFAULT;
1759 error:
1760         net_zcopy_put_abort(uarg, extra_uref);
1761         cork->length -= length;
1762         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1763         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1764         return err;
1765 }
1766
1767 int ip6_append_data(struct sock *sk,
1768                     int getfrag(void *from, char *to, int offset, int len,
1769                                 int odd, struct sk_buff *skb),
1770                     void *from, int length, int transhdrlen,
1771                     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1772                     struct rt6_info *rt, unsigned int flags)
1773 {
1774         struct inet_sock *inet = inet_sk(sk);
1775         struct ipv6_pinfo *np = inet6_sk(sk);
1776         int exthdrlen;
1777         int err;
1778
1779         if (flags&MSG_PROBE)
1780                 return 0;
1781         if (skb_queue_empty(&sk->sk_write_queue)) {
1782                 /*
1783                  * setup for corking
1784                  */
1785                 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1786                                      ipc6, rt, fl6);
1787                 if (err)
1788                         return err;
1789
1790                 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1791                 length += exthdrlen;
1792                 transhdrlen += exthdrlen;
1793         } else {
1794                 fl6 = &inet->cork.fl.u.ip6;
1795                 transhdrlen = 0;
1796         }
1797
1798         return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1799                                  &np->cork, sk_page_frag(sk), getfrag,
1800                                  from, length, transhdrlen, flags, ipc6);
1801 }
1802 EXPORT_SYMBOL_GPL(ip6_append_data);
1803
1804 static void ip6_cork_release(struct inet_cork_full *cork,
1805                              struct inet6_cork *v6_cork)
1806 {
1807         if (v6_cork->opt) {
1808                 kfree(v6_cork->opt->dst0opt);
1809                 kfree(v6_cork->opt->dst1opt);
1810                 kfree(v6_cork->opt->hopopt);
1811                 kfree(v6_cork->opt->srcrt);
1812                 kfree(v6_cork->opt);
1813                 v6_cork->opt = NULL;
1814         }
1815
1816         if (cork->base.dst) {
1817                 dst_release(cork->base.dst);
1818                 cork->base.dst = NULL;
1819                 cork->base.flags &= ~IPCORK_ALLFRAG;
1820         }
1821         memset(&cork->fl, 0, sizeof(cork->fl));
1822 }
1823
1824 struct sk_buff *__ip6_make_skb(struct sock *sk,
1825                                struct sk_buff_head *queue,
1826                                struct inet_cork_full *cork,
1827                                struct inet6_cork *v6_cork)
1828 {
1829         struct sk_buff *skb, *tmp_skb;
1830         struct sk_buff **tail_skb;
1831         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1832         struct ipv6_pinfo *np = inet6_sk(sk);
1833         struct net *net = sock_net(sk);
1834         struct ipv6hdr *hdr;
1835         struct ipv6_txoptions *opt = v6_cork->opt;
1836         struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1837         struct flowi6 *fl6 = &cork->fl.u.ip6;
1838         unsigned char proto = fl6->flowi6_proto;
1839
1840         skb = __skb_dequeue(queue);
1841         if (!skb)
1842                 goto out;
1843         tail_skb = &(skb_shinfo(skb)->frag_list);
1844
1845         /* move skb->data to ip header from ext header */
1846         if (skb->data < skb_network_header(skb))
1847                 __skb_pull(skb, skb_network_offset(skb));
1848         while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1849                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1850                 *tail_skb = tmp_skb;
1851                 tail_skb = &(tmp_skb->next);
1852                 skb->len += tmp_skb->len;
1853                 skb->data_len += tmp_skb->len;
1854                 skb->truesize += tmp_skb->truesize;
1855                 tmp_skb->destructor = NULL;
1856                 tmp_skb->sk = NULL;
1857         }
1858
1859         /* Allow local fragmentation. */
1860         skb->ignore_df = ip6_sk_ignore_df(sk);
1861
1862         *final_dst = fl6->daddr;
1863         __skb_pull(skb, skb_network_header_len(skb));
1864         if (opt && opt->opt_flen)
1865                 ipv6_push_frag_opts(skb, opt, &proto);
1866         if (opt && opt->opt_nflen)
1867                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1868
1869         skb_push(skb, sizeof(struct ipv6hdr));
1870         skb_reset_network_header(skb);
1871         hdr = ipv6_hdr(skb);
1872
1873         ip6_flow_hdr(hdr, v6_cork->tclass,
1874                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
1875                                         ip6_autoflowlabel(net, np), fl6));
1876         hdr->hop_limit = v6_cork->hop_limit;
1877         hdr->nexthdr = proto;
1878         hdr->saddr = fl6->saddr;
1879         hdr->daddr = *final_dst;
1880
1881         skb->priority = sk->sk_priority;
1882         skb->mark = cork->base.mark;
1883
1884         skb->tstamp = cork->base.transmit_time;
1885
1886         skb_dst_set(skb, dst_clone(&rt->dst));
1887         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1888         if (proto == IPPROTO_ICMPV6) {
1889                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1890
1891                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1892                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1893         }
1894
1895         ip6_cork_release(cork, v6_cork);
1896 out:
1897         return skb;
1898 }
1899
1900 int ip6_send_skb(struct sk_buff *skb)
1901 {
1902         struct net *net = sock_net(skb->sk);
1903         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1904         int err;
1905
1906         err = ip6_local_out(net, skb->sk, skb);
1907         if (err) {
1908                 if (err > 0)
1909                         err = net_xmit_errno(err);
1910                 if (err)
1911                         IP6_INC_STATS(net, rt->rt6i_idev,
1912                                       IPSTATS_MIB_OUTDISCARDS);
1913         }
1914
1915         return err;
1916 }
1917
1918 int ip6_push_pending_frames(struct sock *sk)
1919 {
1920         struct sk_buff *skb;
1921
1922         skb = ip6_finish_skb(sk);
1923         if (!skb)
1924                 return 0;
1925
1926         return ip6_send_skb(skb);
1927 }
1928 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1929
1930 static void __ip6_flush_pending_frames(struct sock *sk,
1931                                        struct sk_buff_head *queue,
1932                                        struct inet_cork_full *cork,
1933                                        struct inet6_cork *v6_cork)
1934 {
1935         struct sk_buff *skb;
1936
1937         while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1938                 if (skb_dst(skb))
1939                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1940                                       IPSTATS_MIB_OUTDISCARDS);
1941                 kfree_skb(skb);
1942         }
1943
1944         ip6_cork_release(cork, v6_cork);
1945 }
1946
1947 void ip6_flush_pending_frames(struct sock *sk)
1948 {
1949         __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1950                                    &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1951 }
1952 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1953
1954 struct sk_buff *ip6_make_skb(struct sock *sk,
1955                              int getfrag(void *from, char *to, int offset,
1956                                          int len, int odd, struct sk_buff *skb),
1957                              void *from, int length, int transhdrlen,
1958                              struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1959                              struct rt6_info *rt, unsigned int flags,
1960                              struct inet_cork_full *cork)
1961 {
1962         struct inet6_cork v6_cork;
1963         struct sk_buff_head queue;
1964         int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1965         int err;
1966
1967         if (flags & MSG_PROBE)
1968                 return NULL;
1969
1970         __skb_queue_head_init(&queue);
1971
1972         cork->base.flags = 0;
1973         cork->base.addr = 0;
1974         cork->base.opt = NULL;
1975         cork->base.dst = NULL;
1976         v6_cork.opt = NULL;
1977         err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1978         if (err) {
1979                 ip6_cork_release(cork, &v6_cork);
1980                 return ERR_PTR(err);
1981         }
1982         if (ipc6->dontfrag < 0)
1983                 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1984
1985         err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1986                                 &current->task_frag, getfrag, from,
1987                                 length + exthdrlen, transhdrlen + exthdrlen,
1988                                 flags, ipc6);
1989         if (err) {
1990                 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1991                 return ERR_PTR(err);
1992         }
1993
1994         return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1995 }