Merge tag 'slab-for-5.17-part2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / net / ipv6 / ip6_output.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      IPv6 output functions
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on linux/net/ipv4/ip_output.c
10  *
11  *      Changes:
12  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
13  *                              extension headers are implemented.
14  *                              route changes now work.
15  *                              ip6_forward does not confuse sniffers.
16  *                              etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *      Imran Patel     :       frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *                      :       add ip6_append_data and related functions
22  *                              for datagram xmit
23  */
24
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41
42 #include <net/sock.h>
43 #include <net/snmp.h>
44
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 #include <net/ip_tunnels.h>
58
59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60 {
61         struct dst_entry *dst = skb_dst(skb);
62         struct net_device *dev = dst->dev;
63         struct inet6_dev *idev = ip6_dst_idev(dst);
64         unsigned int hh_len = LL_RESERVED_SPACE(dev);
65         const struct in6_addr *daddr, *nexthop;
66         struct ipv6hdr *hdr;
67         struct neighbour *neigh;
68         int ret;
69
70         /* Be paranoid, rather than too clever. */
71         if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
72                 skb = skb_expand_head(skb, hh_len);
73                 if (!skb) {
74                         IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
75                         return -ENOMEM;
76                 }
77         }
78
79         hdr = ipv6_hdr(skb);
80         daddr = &hdr->daddr;
81         if (ipv6_addr_is_multicast(daddr)) {
82                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
83                     ((mroute6_is_socket(net, skb) &&
84                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
85                      ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
86                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
87
88                         /* Do not check for IFF_ALLMULTI; multicast routing
89                            is not supported in any case.
90                          */
91                         if (newskb)
92                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
93                                         net, sk, newskb, NULL, newskb->dev,
94                                         dev_loopback_xmit);
95
96                         if (hdr->hop_limit == 0) {
97                                 IP6_INC_STATS(net, idev,
98                                               IPSTATS_MIB_OUTDISCARDS);
99                                 kfree_skb(skb);
100                                 return 0;
101                         }
102                 }
103
104                 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
105                 if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
106                     !(dev->flags & IFF_LOOPBACK)) {
107                         kfree_skb(skb);
108                         return 0;
109                 }
110         }
111
112         if (lwtunnel_xmit_redirect(dst->lwtstate)) {
113                 int res = lwtunnel_xmit(skb);
114
115                 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
116                         return res;
117         }
118
119         rcu_read_lock_bh();
120         nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
121         neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
122         if (unlikely(!neigh))
123                 neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
124         if (!IS_ERR(neigh)) {
125                 sock_confirm_neigh(skb, neigh);
126                 ret = neigh_output(neigh, skb, false);
127                 rcu_read_unlock_bh();
128                 return ret;
129         }
130         rcu_read_unlock_bh();
131
132         IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
133         kfree_skb(skb);
134         return -EINVAL;
135 }
136
137 static int
138 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
139                                     struct sk_buff *skb, unsigned int mtu)
140 {
141         struct sk_buff *segs, *nskb;
142         netdev_features_t features;
143         int ret = 0;
144
145         /* Please see corresponding comment in ip_finish_output_gso
146          * describing the cases where GSO segment length exceeds the
147          * egress MTU.
148          */
149         features = netif_skb_features(skb);
150         segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
151         if (IS_ERR_OR_NULL(segs)) {
152                 kfree_skb(skb);
153                 return -ENOMEM;
154         }
155
156         consume_skb(skb);
157
158         skb_list_walk_safe(segs, segs, nskb) {
159                 int err;
160
161                 skb_mark_not_on_list(segs);
162                 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
163                 if (err && ret == 0)
164                         ret = err;
165         }
166
167         return ret;
168 }
169
170 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
171 {
172         unsigned int mtu;
173
174 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
175         /* Policy lookup after SNAT yielded a new policy */
176         if (skb_dst(skb)->xfrm) {
177                 IP6CB(skb)->flags |= IP6SKB_REROUTED;
178                 return dst_output(net, sk, skb);
179         }
180 #endif
181
182         mtu = ip6_skb_dst_mtu(skb);
183         if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
184                 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
185
186         if ((skb->len > mtu && !skb_is_gso(skb)) ||
187             dst_allfrag(skb_dst(skb)) ||
188             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
189                 return ip6_fragment(net, sk, skb, ip6_finish_output2);
190         else
191                 return ip6_finish_output2(net, sk, skb);
192 }
193
194 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
195 {
196         int ret;
197
198         ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
199         switch (ret) {
200         case NET_XMIT_SUCCESS:
201                 return __ip6_finish_output(net, sk, skb);
202         case NET_XMIT_CN:
203                 return __ip6_finish_output(net, sk, skb) ? : ret;
204         default:
205                 kfree_skb(skb);
206                 return ret;
207         }
208 }
209
210 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
211 {
212         struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
213         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
214
215         skb->protocol = htons(ETH_P_IPV6);
216         skb->dev = dev;
217
218         if (unlikely(idev->cnf.disable_ipv6)) {
219                 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
220                 kfree_skb(skb);
221                 return 0;
222         }
223
224         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
225                             net, sk, skb, indev, dev,
226                             ip6_finish_output,
227                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
228 }
229 EXPORT_SYMBOL(ip6_output);
230
231 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
232 {
233         if (!np->autoflowlabel_set)
234                 return ip6_default_np_autolabel(net);
235         else
236                 return np->autoflowlabel;
237 }
238
239 /*
240  * xmit an sk_buff (used by TCP, SCTP and DCCP)
241  * Note : socket lock is not held for SYNACK packets, but might be modified
242  * by calls to skb_set_owner_w() and ipv6_local_error(),
243  * which are using proper atomic operations or spinlocks.
244  */
245 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
246              __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
247 {
248         struct net *net = sock_net(sk);
249         const struct ipv6_pinfo *np = inet6_sk(sk);
250         struct in6_addr *first_hop = &fl6->daddr;
251         struct dst_entry *dst = skb_dst(skb);
252         struct net_device *dev = dst->dev;
253         struct inet6_dev *idev = ip6_dst_idev(dst);
254         unsigned int head_room;
255         struct ipv6hdr *hdr;
256         u8  proto = fl6->flowi6_proto;
257         int seg_len = skb->len;
258         int hlimit = -1;
259         u32 mtu;
260
261         head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
262         if (opt)
263                 head_room += opt->opt_nflen + opt->opt_flen;
264
265         if (unlikely(head_room > skb_headroom(skb))) {
266                 skb = skb_expand_head(skb, head_room);
267                 if (!skb) {
268                         IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
269                         return -ENOBUFS;
270                 }
271         }
272
273         if (opt) {
274                 seg_len += opt->opt_nflen + opt->opt_flen;
275
276                 if (opt->opt_flen)
277                         ipv6_push_frag_opts(skb, opt, &proto);
278
279                 if (opt->opt_nflen)
280                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
281                                              &fl6->saddr);
282         }
283
284         skb_push(skb, sizeof(struct ipv6hdr));
285         skb_reset_network_header(skb);
286         hdr = ipv6_hdr(skb);
287
288         /*
289          *      Fill in the IPv6 header
290          */
291         if (np)
292                 hlimit = np->hop_limit;
293         if (hlimit < 0)
294                 hlimit = ip6_dst_hoplimit(dst);
295
296         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
297                                 ip6_autoflowlabel(net, np), fl6));
298
299         hdr->payload_len = htons(seg_len);
300         hdr->nexthdr = proto;
301         hdr->hop_limit = hlimit;
302
303         hdr->saddr = fl6->saddr;
304         hdr->daddr = *first_hop;
305
306         skb->protocol = htons(ETH_P_IPV6);
307         skb->priority = priority;
308         skb->mark = mark;
309
310         mtu = dst_mtu(dst);
311         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
312                 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
313
314                 /* if egress device is enslaved to an L3 master device pass the
315                  * skb to its handler for processing
316                  */
317                 skb = l3mdev_ip6_out((struct sock *)sk, skb);
318                 if (unlikely(!skb))
319                         return 0;
320
321                 /* hooks should never assume socket lock is held.
322                  * we promote our socket to non const
323                  */
324                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
325                                net, (struct sock *)sk, skb, NULL, dev,
326                                dst_output);
327         }
328
329         skb->dev = dev;
330         /* ipv6_local_error() does not require socket lock,
331          * we promote our socket to non const
332          */
333         ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
334
335         IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
336         kfree_skb(skb);
337         return -EMSGSIZE;
338 }
339 EXPORT_SYMBOL(ip6_xmit);
340
341 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
342 {
343         struct ip6_ra_chain *ra;
344         struct sock *last = NULL;
345
346         read_lock(&ip6_ra_lock);
347         for (ra = ip6_ra_chain; ra; ra = ra->next) {
348                 struct sock *sk = ra->sk;
349                 if (sk && ra->sel == sel &&
350                     (!sk->sk_bound_dev_if ||
351                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
352                         struct ipv6_pinfo *np = inet6_sk(sk);
353
354                         if (np && np->rtalert_isolate &&
355                             !net_eq(sock_net(sk), dev_net(skb->dev))) {
356                                 continue;
357                         }
358                         if (last) {
359                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
360                                 if (skb2)
361                                         rawv6_rcv(last, skb2);
362                         }
363                         last = sk;
364                 }
365         }
366
367         if (last) {
368                 rawv6_rcv(last, skb);
369                 read_unlock(&ip6_ra_lock);
370                 return 1;
371         }
372         read_unlock(&ip6_ra_lock);
373         return 0;
374 }
375
376 static int ip6_forward_proxy_check(struct sk_buff *skb)
377 {
378         struct ipv6hdr *hdr = ipv6_hdr(skb);
379         u8 nexthdr = hdr->nexthdr;
380         __be16 frag_off;
381         int offset;
382
383         if (ipv6_ext_hdr(nexthdr)) {
384                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
385                 if (offset < 0)
386                         return 0;
387         } else
388                 offset = sizeof(struct ipv6hdr);
389
390         if (nexthdr == IPPROTO_ICMPV6) {
391                 struct icmp6hdr *icmp6;
392
393                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
394                                          offset + 1 - skb->data)))
395                         return 0;
396
397                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
398
399                 switch (icmp6->icmp6_type) {
400                 case NDISC_ROUTER_SOLICITATION:
401                 case NDISC_ROUTER_ADVERTISEMENT:
402                 case NDISC_NEIGHBOUR_SOLICITATION:
403                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
404                 case NDISC_REDIRECT:
405                         /* For reaction involving unicast neighbor discovery
406                          * message destined to the proxied address, pass it to
407                          * input function.
408                          */
409                         return 1;
410                 default:
411                         break;
412                 }
413         }
414
415         /*
416          * The proxying router can't forward traffic sent to a link-local
417          * address, so signal the sender and discard the packet. This
418          * behavior is clarified by the MIPv6 specification.
419          */
420         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
421                 dst_link_failure(skb);
422                 return -1;
423         }
424
425         return 0;
426 }
427
428 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
429                                      struct sk_buff *skb)
430 {
431         struct dst_entry *dst = skb_dst(skb);
432
433         __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
434         __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
435
436 #ifdef CONFIG_NET_SWITCHDEV
437         if (skb->offload_l3_fwd_mark) {
438                 consume_skb(skb);
439                 return 0;
440         }
441 #endif
442
443         skb->tstamp = 0;
444         return dst_output(net, sk, skb);
445 }
446
447 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
448 {
449         if (skb->len <= mtu)
450                 return false;
451
452         /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
453         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
454                 return true;
455
456         if (skb->ignore_df)
457                 return false;
458
459         if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
460                 return false;
461
462         return true;
463 }
464
465 int ip6_forward(struct sk_buff *skb)
466 {
467         struct dst_entry *dst = skb_dst(skb);
468         struct ipv6hdr *hdr = ipv6_hdr(skb);
469         struct inet6_skb_parm *opt = IP6CB(skb);
470         struct net *net = dev_net(dst->dev);
471         struct inet6_dev *idev;
472         u32 mtu;
473
474         idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
475         if (net->ipv6.devconf_all->forwarding == 0)
476                 goto error;
477
478         if (skb->pkt_type != PACKET_HOST)
479                 goto drop;
480
481         if (unlikely(skb->sk))
482                 goto drop;
483
484         if (skb_warn_if_lro(skb))
485                 goto drop;
486
487         if (!net->ipv6.devconf_all->disable_policy &&
488             !idev->cnf.disable_policy &&
489             !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
490                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
491                 goto drop;
492         }
493
494         skb_forward_csum(skb);
495
496         /*
497          *      We DO NOT make any processing on
498          *      RA packets, pushing them to user level AS IS
499          *      without ane WARRANTY that application will be able
500          *      to interpret them. The reason is that we
501          *      cannot make anything clever here.
502          *
503          *      We are not end-node, so that if packet contains
504          *      AH/ESP, we cannot make anything.
505          *      Defragmentation also would be mistake, RA packets
506          *      cannot be fragmented, because there is no warranty
507          *      that different fragments will go along one path. --ANK
508          */
509         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
510                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
511                         return 0;
512         }
513
514         /*
515          *      check and decrement ttl
516          */
517         if (hdr->hop_limit <= 1) {
518                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
519                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
520
521                 kfree_skb(skb);
522                 return -ETIMEDOUT;
523         }
524
525         /* XXX: idev->cnf.proxy_ndp? */
526         if (net->ipv6.devconf_all->proxy_ndp &&
527             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
528                 int proxied = ip6_forward_proxy_check(skb);
529                 if (proxied > 0) {
530                         hdr->hop_limit--;
531                         return ip6_input(skb);
532                 } else if (proxied < 0) {
533                         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
534                         goto drop;
535                 }
536         }
537
538         if (!xfrm6_route_forward(skb)) {
539                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
540                 goto drop;
541         }
542         dst = skb_dst(skb);
543
544         /* IPv6 specs say nothing about it, but it is clear that we cannot
545            send redirects to source routed frames.
546            We don't send redirects to frames decapsulated from IPsec.
547          */
548         if (IP6CB(skb)->iif == dst->dev->ifindex &&
549             opt->srcrt == 0 && !skb_sec_path(skb)) {
550                 struct in6_addr *target = NULL;
551                 struct inet_peer *peer;
552                 struct rt6_info *rt;
553
554                 /*
555                  *      incoming and outgoing devices are the same
556                  *      send a redirect.
557                  */
558
559                 rt = (struct rt6_info *) dst;
560                 if (rt->rt6i_flags & RTF_GATEWAY)
561                         target = &rt->rt6i_gateway;
562                 else
563                         target = &hdr->daddr;
564
565                 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
566
567                 /* Limit redirects both by destination (here)
568                    and by source (inside ndisc_send_redirect)
569                  */
570                 if (inet_peer_xrlim_allow(peer, 1*HZ))
571                         ndisc_send_redirect(skb, target);
572                 if (peer)
573                         inet_putpeer(peer);
574         } else {
575                 int addrtype = ipv6_addr_type(&hdr->saddr);
576
577                 /* This check is security critical. */
578                 if (addrtype == IPV6_ADDR_ANY ||
579                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
580                         goto error;
581                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
582                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
583                                     ICMPV6_NOT_NEIGHBOUR, 0);
584                         goto error;
585                 }
586         }
587
588         mtu = ip6_dst_mtu_maybe_forward(dst, true);
589         if (mtu < IPV6_MIN_MTU)
590                 mtu = IPV6_MIN_MTU;
591
592         if (ip6_pkt_too_big(skb, mtu)) {
593                 /* Again, force OUTPUT device used as source address */
594                 skb->dev = dst->dev;
595                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
596                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
597                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
598                                 IPSTATS_MIB_FRAGFAILS);
599                 kfree_skb(skb);
600                 return -EMSGSIZE;
601         }
602
603         if (skb_cow(skb, dst->dev->hard_header_len)) {
604                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
605                                 IPSTATS_MIB_OUTDISCARDS);
606                 goto drop;
607         }
608
609         hdr = ipv6_hdr(skb);
610
611         /* Mangling hops number delayed to point after skb COW */
612
613         hdr->hop_limit--;
614
615         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
616                        net, NULL, skb, skb->dev, dst->dev,
617                        ip6_forward_finish);
618
619 error:
620         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
621 drop:
622         kfree_skb(skb);
623         return -EINVAL;
624 }
625
626 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
627 {
628         to->pkt_type = from->pkt_type;
629         to->priority = from->priority;
630         to->protocol = from->protocol;
631         skb_dst_drop(to);
632         skb_dst_set(to, dst_clone(skb_dst(from)));
633         to->dev = from->dev;
634         to->mark = from->mark;
635
636         skb_copy_hash(to, from);
637
638 #ifdef CONFIG_NET_SCHED
639         to->tc_index = from->tc_index;
640 #endif
641         nf_copy(to, from);
642         skb_ext_copy(to, from);
643         skb_copy_secmark(to, from);
644 }
645
646 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
647                       u8 nexthdr, __be32 frag_id,
648                       struct ip6_fraglist_iter *iter)
649 {
650         unsigned int first_len;
651         struct frag_hdr *fh;
652
653         /* BUILD HEADER */
654         *prevhdr = NEXTHDR_FRAGMENT;
655         iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
656         if (!iter->tmp_hdr)
657                 return -ENOMEM;
658
659         iter->frag = skb_shinfo(skb)->frag_list;
660         skb_frag_list_init(skb);
661
662         iter->offset = 0;
663         iter->hlen = hlen;
664         iter->frag_id = frag_id;
665         iter->nexthdr = nexthdr;
666
667         __skb_pull(skb, hlen);
668         fh = __skb_push(skb, sizeof(struct frag_hdr));
669         __skb_push(skb, hlen);
670         skb_reset_network_header(skb);
671         memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
672
673         fh->nexthdr = nexthdr;
674         fh->reserved = 0;
675         fh->frag_off = htons(IP6_MF);
676         fh->identification = frag_id;
677
678         first_len = skb_pagelen(skb);
679         skb->data_len = first_len - skb_headlen(skb);
680         skb->len = first_len;
681         ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
682
683         return 0;
684 }
685 EXPORT_SYMBOL(ip6_fraglist_init);
686
687 void ip6_fraglist_prepare(struct sk_buff *skb,
688                           struct ip6_fraglist_iter *iter)
689 {
690         struct sk_buff *frag = iter->frag;
691         unsigned int hlen = iter->hlen;
692         struct frag_hdr *fh;
693
694         frag->ip_summed = CHECKSUM_NONE;
695         skb_reset_transport_header(frag);
696         fh = __skb_push(frag, sizeof(struct frag_hdr));
697         __skb_push(frag, hlen);
698         skb_reset_network_header(frag);
699         memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
700         iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
701         fh->nexthdr = iter->nexthdr;
702         fh->reserved = 0;
703         fh->frag_off = htons(iter->offset);
704         if (frag->next)
705                 fh->frag_off |= htons(IP6_MF);
706         fh->identification = iter->frag_id;
707         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
708         ip6_copy_metadata(frag, skb);
709 }
710 EXPORT_SYMBOL(ip6_fraglist_prepare);
711
712 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
713                    unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
714                    u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
715 {
716         state->prevhdr = prevhdr;
717         state->nexthdr = nexthdr;
718         state->frag_id = frag_id;
719
720         state->hlen = hlen;
721         state->mtu = mtu;
722
723         state->left = skb->len - hlen;  /* Space per frame */
724         state->ptr = hlen;              /* Where to start from */
725
726         state->hroom = hdr_room;
727         state->troom = needed_tailroom;
728
729         state->offset = 0;
730 }
731 EXPORT_SYMBOL(ip6_frag_init);
732
733 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
734 {
735         u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
736         struct sk_buff *frag;
737         struct frag_hdr *fh;
738         unsigned int len;
739
740         len = state->left;
741         /* IF: it doesn't fit, use 'mtu' - the data space left */
742         if (len > state->mtu)
743                 len = state->mtu;
744         /* IF: we are not sending up to and including the packet end
745            then align the next start on an eight byte boundary */
746         if (len < state->left)
747                 len &= ~7;
748
749         /* Allocate buffer */
750         frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
751                          state->hroom + state->troom, GFP_ATOMIC);
752         if (!frag)
753                 return ERR_PTR(-ENOMEM);
754
755         /*
756          *      Set up data on packet
757          */
758
759         ip6_copy_metadata(frag, skb);
760         skb_reserve(frag, state->hroom);
761         skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
762         skb_reset_network_header(frag);
763         fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
764         frag->transport_header = (frag->network_header + state->hlen +
765                                   sizeof(struct frag_hdr));
766
767         /*
768          *      Charge the memory for the fragment to any owner
769          *      it might possess
770          */
771         if (skb->sk)
772                 skb_set_owner_w(frag, skb->sk);
773
774         /*
775          *      Copy the packet header into the new buffer.
776          */
777         skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
778
779         fragnexthdr_offset = skb_network_header(frag);
780         fragnexthdr_offset += prevhdr - skb_network_header(skb);
781         *fragnexthdr_offset = NEXTHDR_FRAGMENT;
782
783         /*
784          *      Build fragment header.
785          */
786         fh->nexthdr = state->nexthdr;
787         fh->reserved = 0;
788         fh->identification = state->frag_id;
789
790         /*
791          *      Copy a block of the IP datagram.
792          */
793         BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
794                              len));
795         state->left -= len;
796
797         fh->frag_off = htons(state->offset);
798         if (state->left > 0)
799                 fh->frag_off |= htons(IP6_MF);
800         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
801
802         state->ptr += len;
803         state->offset += len;
804
805         return frag;
806 }
807 EXPORT_SYMBOL(ip6_frag_next);
808
809 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
810                  int (*output)(struct net *, struct sock *, struct sk_buff *))
811 {
812         struct sk_buff *frag;
813         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
814         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
815                                 inet6_sk(skb->sk) : NULL;
816         struct ip6_frag_state state;
817         unsigned int mtu, hlen, nexthdr_offset;
818         ktime_t tstamp = skb->tstamp;
819         int hroom, err = 0;
820         __be32 frag_id;
821         u8 *prevhdr, nexthdr = 0;
822
823         err = ip6_find_1stfragopt(skb, &prevhdr);
824         if (err < 0)
825                 goto fail;
826         hlen = err;
827         nexthdr = *prevhdr;
828         nexthdr_offset = prevhdr - skb_network_header(skb);
829
830         mtu = ip6_skb_dst_mtu(skb);
831
832         /* We must not fragment if the socket is set to force MTU discovery
833          * or if the skb it not generated by a local socket.
834          */
835         if (unlikely(!skb->ignore_df && skb->len > mtu))
836                 goto fail_toobig;
837
838         if (IP6CB(skb)->frag_max_size) {
839                 if (IP6CB(skb)->frag_max_size > mtu)
840                         goto fail_toobig;
841
842                 /* don't send fragments larger than what we received */
843                 mtu = IP6CB(skb)->frag_max_size;
844                 if (mtu < IPV6_MIN_MTU)
845                         mtu = IPV6_MIN_MTU;
846         }
847
848         if (np && np->frag_size < mtu) {
849                 if (np->frag_size)
850                         mtu = np->frag_size;
851         }
852         if (mtu < hlen + sizeof(struct frag_hdr) + 8)
853                 goto fail_toobig;
854         mtu -= hlen + sizeof(struct frag_hdr);
855
856         frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
857                                     &ipv6_hdr(skb)->saddr);
858
859         if (skb->ip_summed == CHECKSUM_PARTIAL &&
860             (err = skb_checksum_help(skb)))
861                 goto fail;
862
863         prevhdr = skb_network_header(skb) + nexthdr_offset;
864         hroom = LL_RESERVED_SPACE(rt->dst.dev);
865         if (skb_has_frag_list(skb)) {
866                 unsigned int first_len = skb_pagelen(skb);
867                 struct ip6_fraglist_iter iter;
868                 struct sk_buff *frag2;
869
870                 if (first_len - hlen > mtu ||
871                     ((first_len - hlen) & 7) ||
872                     skb_cloned(skb) ||
873                     skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
874                         goto slow_path;
875
876                 skb_walk_frags(skb, frag) {
877                         /* Correct geometry. */
878                         if (frag->len > mtu ||
879                             ((frag->len & 7) && frag->next) ||
880                             skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
881                                 goto slow_path_clean;
882
883                         /* Partially cloned skb? */
884                         if (skb_shared(frag))
885                                 goto slow_path_clean;
886
887                         BUG_ON(frag->sk);
888                         if (skb->sk) {
889                                 frag->sk = skb->sk;
890                                 frag->destructor = sock_wfree;
891                         }
892                         skb->truesize -= frag->truesize;
893                 }
894
895                 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
896                                         &iter);
897                 if (err < 0)
898                         goto fail;
899
900                 for (;;) {
901                         /* Prepare header of the next frame,
902                          * before previous one went down. */
903                         if (iter.frag)
904                                 ip6_fraglist_prepare(skb, &iter);
905
906                         skb->tstamp = tstamp;
907                         err = output(net, sk, skb);
908                         if (!err)
909                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
910                                               IPSTATS_MIB_FRAGCREATES);
911
912                         if (err || !iter.frag)
913                                 break;
914
915                         skb = ip6_fraglist_next(&iter);
916                 }
917
918                 kfree(iter.tmp_hdr);
919
920                 if (err == 0) {
921                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
922                                       IPSTATS_MIB_FRAGOKS);
923                         return 0;
924                 }
925
926                 kfree_skb_list(iter.frag);
927
928                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
929                               IPSTATS_MIB_FRAGFAILS);
930                 return err;
931
932 slow_path_clean:
933                 skb_walk_frags(skb, frag2) {
934                         if (frag2 == frag)
935                                 break;
936                         frag2->sk = NULL;
937                         frag2->destructor = NULL;
938                         skb->truesize += frag2->truesize;
939                 }
940         }
941
942 slow_path:
943         /*
944          *      Fragment the datagram.
945          */
946
947         ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
948                       LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
949                       &state);
950
951         /*
952          *      Keep copying data until we run out.
953          */
954
955         while (state.left > 0) {
956                 frag = ip6_frag_next(skb, &state);
957                 if (IS_ERR(frag)) {
958                         err = PTR_ERR(frag);
959                         goto fail;
960                 }
961
962                 /*
963                  *      Put this fragment into the sending queue.
964                  */
965                 frag->tstamp = tstamp;
966                 err = output(net, sk, frag);
967                 if (err)
968                         goto fail;
969
970                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
971                               IPSTATS_MIB_FRAGCREATES);
972         }
973         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
974                       IPSTATS_MIB_FRAGOKS);
975         consume_skb(skb);
976         return err;
977
978 fail_toobig:
979         if (skb->sk && dst_allfrag(skb_dst(skb)))
980                 sk_gso_disable(skb->sk);
981
982         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
983         err = -EMSGSIZE;
984
985 fail:
986         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
987                       IPSTATS_MIB_FRAGFAILS);
988         kfree_skb(skb);
989         return err;
990 }
991
992 static inline int ip6_rt_check(const struct rt6key *rt_key,
993                                const struct in6_addr *fl_addr,
994                                const struct in6_addr *addr_cache)
995 {
996         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
997                 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
998 }
999
1000 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1001                                           struct dst_entry *dst,
1002                                           const struct flowi6 *fl6)
1003 {
1004         struct ipv6_pinfo *np = inet6_sk(sk);
1005         struct rt6_info *rt;
1006
1007         if (!dst)
1008                 goto out;
1009
1010         if (dst->ops->family != AF_INET6) {
1011                 dst_release(dst);
1012                 return NULL;
1013         }
1014
1015         rt = (struct rt6_info *)dst;
1016         /* Yes, checking route validity in not connected
1017          * case is not very simple. Take into account,
1018          * that we do not support routing by source, TOS,
1019          * and MSG_DONTROUTE            --ANK (980726)
1020          *
1021          * 1. ip6_rt_check(): If route was host route,
1022          *    check that cached destination is current.
1023          *    If it is network route, we still may
1024          *    check its validity using saved pointer
1025          *    to the last used address: daddr_cache.
1026          *    We do not want to save whole address now,
1027          *    (because main consumer of this service
1028          *    is tcp, which has not this problem),
1029          *    so that the last trick works only on connected
1030          *    sockets.
1031          * 2. oif also should be the same.
1032          */
1033         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1034 #ifdef CONFIG_IPV6_SUBTREES
1035             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1036 #endif
1037            (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1038               (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1039                 dst_release(dst);
1040                 dst = NULL;
1041         }
1042
1043 out:
1044         return dst;
1045 }
1046
1047 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1048                                struct dst_entry **dst, struct flowi6 *fl6)
1049 {
1050 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1051         struct neighbour *n;
1052         struct rt6_info *rt;
1053 #endif
1054         int err;
1055         int flags = 0;
1056
1057         /* The correct way to handle this would be to do
1058          * ip6_route_get_saddr, and then ip6_route_output; however,
1059          * the route-specific preferred source forces the
1060          * ip6_route_output call _before_ ip6_route_get_saddr.
1061          *
1062          * In source specific routing (no src=any default route),
1063          * ip6_route_output will fail given src=any saddr, though, so
1064          * that's why we try it again later.
1065          */
1066         if (ipv6_addr_any(&fl6->saddr)) {
1067                 struct fib6_info *from;
1068                 struct rt6_info *rt;
1069
1070                 *dst = ip6_route_output(net, sk, fl6);
1071                 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1072
1073                 rcu_read_lock();
1074                 from = rt ? rcu_dereference(rt->from) : NULL;
1075                 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1076                                           sk ? inet6_sk(sk)->srcprefs : 0,
1077                                           &fl6->saddr);
1078                 rcu_read_unlock();
1079
1080                 if (err)
1081                         goto out_err_release;
1082
1083                 /* If we had an erroneous initial result, pretend it
1084                  * never existed and let the SA-enabled version take
1085                  * over.
1086                  */
1087                 if ((*dst)->error) {
1088                         dst_release(*dst);
1089                         *dst = NULL;
1090                 }
1091
1092                 if (fl6->flowi6_oif)
1093                         flags |= RT6_LOOKUP_F_IFACE;
1094         }
1095
1096         if (!*dst)
1097                 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1098
1099         err = (*dst)->error;
1100         if (err)
1101                 goto out_err_release;
1102
1103 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1104         /*
1105          * Here if the dst entry we've looked up
1106          * has a neighbour entry that is in the INCOMPLETE
1107          * state and the src address from the flow is
1108          * marked as OPTIMISTIC, we release the found
1109          * dst entry and replace it instead with the
1110          * dst entry of the nexthop router
1111          */
1112         rt = (struct rt6_info *) *dst;
1113         rcu_read_lock_bh();
1114         n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1115                                       rt6_nexthop(rt, &fl6->daddr));
1116         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1117         rcu_read_unlock_bh();
1118
1119         if (err) {
1120                 struct inet6_ifaddr *ifp;
1121                 struct flowi6 fl_gw6;
1122                 int redirect;
1123
1124                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1125                                       (*dst)->dev, 1);
1126
1127                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1128                 if (ifp)
1129                         in6_ifa_put(ifp);
1130
1131                 if (redirect) {
1132                         /*
1133                          * We need to get the dst entry for the
1134                          * default router instead
1135                          */
1136                         dst_release(*dst);
1137                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1138                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1139                         *dst = ip6_route_output(net, sk, &fl_gw6);
1140                         err = (*dst)->error;
1141                         if (err)
1142                                 goto out_err_release;
1143                 }
1144         }
1145 #endif
1146         if (ipv6_addr_v4mapped(&fl6->saddr) &&
1147             !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1148                 err = -EAFNOSUPPORT;
1149                 goto out_err_release;
1150         }
1151
1152         return 0;
1153
1154 out_err_release:
1155         dst_release(*dst);
1156         *dst = NULL;
1157
1158         if (err == -ENETUNREACH)
1159                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1160         return err;
1161 }
1162
1163 /**
1164  *      ip6_dst_lookup - perform route lookup on flow
1165  *      @net: Network namespace to perform lookup in
1166  *      @sk: socket which provides route info
1167  *      @dst: pointer to dst_entry * for result
1168  *      @fl6: flow to lookup
1169  *
1170  *      This function performs a route lookup on the given flow.
1171  *
1172  *      It returns zero on success, or a standard errno code on error.
1173  */
1174 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1175                    struct flowi6 *fl6)
1176 {
1177         *dst = NULL;
1178         return ip6_dst_lookup_tail(net, sk, dst, fl6);
1179 }
1180 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1181
1182 /**
1183  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1184  *      @net: Network namespace to perform lookup in
1185  *      @sk: socket which provides route info
1186  *      @fl6: flow to lookup
1187  *      @final_dst: final destination address for ipsec lookup
1188  *
1189  *      This function performs a route lookup on the given flow.
1190  *
1191  *      It returns a valid dst pointer on success, or a pointer encoded
1192  *      error code.
1193  */
1194 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1195                                       const struct in6_addr *final_dst)
1196 {
1197         struct dst_entry *dst = NULL;
1198         int err;
1199
1200         err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1201         if (err)
1202                 return ERR_PTR(err);
1203         if (final_dst)
1204                 fl6->daddr = *final_dst;
1205
1206         return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1207 }
1208 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1209
1210 /**
1211  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1212  *      @sk: socket which provides the dst cache and route info
1213  *      @fl6: flow to lookup
1214  *      @final_dst: final destination address for ipsec lookup
1215  *      @connected: whether @sk is connected or not
1216  *
1217  *      This function performs a route lookup on the given flow with the
1218  *      possibility of using the cached route in the socket if it is valid.
1219  *      It will take the socket dst lock when operating on the dst cache.
1220  *      As a result, this function can only be used in process context.
1221  *
1222  *      In addition, for a connected socket, cache the dst in the socket
1223  *      if the current cache is not valid.
1224  *
1225  *      It returns a valid dst pointer on success, or a pointer encoded
1226  *      error code.
1227  */
1228 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1229                                          const struct in6_addr *final_dst,
1230                                          bool connected)
1231 {
1232         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1233
1234         dst = ip6_sk_dst_check(sk, dst, fl6);
1235         if (dst)
1236                 return dst;
1237
1238         dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1239         if (connected && !IS_ERR(dst))
1240                 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1241
1242         return dst;
1243 }
1244 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1245
1246 /**
1247  *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1248  *      @skb: Packet for which lookup is done
1249  *      @dev: Tunnel device
1250  *      @net: Network namespace of tunnel device
1251  *      @sock: Socket which provides route info
1252  *      @saddr: Memory to store the src ip address
1253  *      @info: Tunnel information
1254  *      @protocol: IP protocol
1255  *      @use_cache: Flag to enable cache usage
1256  *      This function performs a route lookup on a tunnel
1257  *
1258  *      It returns a valid dst pointer and stores src address to be used in
1259  *      tunnel in param saddr on success, else a pointer encoded error code.
1260  */
1261
1262 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1263                                         struct net_device *dev,
1264                                         struct net *net,
1265                                         struct socket *sock,
1266                                         struct in6_addr *saddr,
1267                                         const struct ip_tunnel_info *info,
1268                                         u8 protocol,
1269                                         bool use_cache)
1270 {
1271         struct dst_entry *dst = NULL;
1272 #ifdef CONFIG_DST_CACHE
1273         struct dst_cache *dst_cache;
1274 #endif
1275         struct flowi6 fl6;
1276         __u8 prio;
1277
1278 #ifdef CONFIG_DST_CACHE
1279         dst_cache = (struct dst_cache *)&info->dst_cache;
1280         if (use_cache) {
1281                 dst = dst_cache_get_ip6(dst_cache, saddr);
1282                 if (dst)
1283                         return dst;
1284         }
1285 #endif
1286         memset(&fl6, 0, sizeof(fl6));
1287         fl6.flowi6_mark = skb->mark;
1288         fl6.flowi6_proto = protocol;
1289         fl6.daddr = info->key.u.ipv6.dst;
1290         fl6.saddr = info->key.u.ipv6.src;
1291         prio = info->key.tos;
1292         fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1293                                           info->key.label);
1294
1295         dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1296                                               NULL);
1297         if (IS_ERR(dst)) {
1298                 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1299                 return ERR_PTR(-ENETUNREACH);
1300         }
1301         if (dst->dev == dev) { /* is this necessary? */
1302                 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1303                 dst_release(dst);
1304                 return ERR_PTR(-ELOOP);
1305         }
1306 #ifdef CONFIG_DST_CACHE
1307         if (use_cache)
1308                 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1309 #endif
1310         *saddr = fl6.saddr;
1311         return dst;
1312 }
1313 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1314
1315 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1316                                                gfp_t gfp)
1317 {
1318         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1319 }
1320
1321 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1322                                                 gfp_t gfp)
1323 {
1324         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1325 }
1326
1327 static void ip6_append_data_mtu(unsigned int *mtu,
1328                                 int *maxfraglen,
1329                                 unsigned int fragheaderlen,
1330                                 struct sk_buff *skb,
1331                                 struct rt6_info *rt,
1332                                 unsigned int orig_mtu)
1333 {
1334         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1335                 if (!skb) {
1336                         /* first fragment, reserve header_len */
1337                         *mtu = orig_mtu - rt->dst.header_len;
1338
1339                 } else {
1340                         /*
1341                          * this fragment is not first, the headers
1342                          * space is regarded as data space.
1343                          */
1344                         *mtu = orig_mtu;
1345                 }
1346                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1347                               + fragheaderlen - sizeof(struct frag_hdr);
1348         }
1349 }
1350
1351 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1352                           struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1353                           struct rt6_info *rt, struct flowi6 *fl6)
1354 {
1355         struct ipv6_pinfo *np = inet6_sk(sk);
1356         unsigned int mtu;
1357         struct ipv6_txoptions *opt = ipc6->opt;
1358
1359         /*
1360          * setup for corking
1361          */
1362         if (opt) {
1363                 if (WARN_ON(v6_cork->opt))
1364                         return -EINVAL;
1365
1366                 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1367                 if (unlikely(!v6_cork->opt))
1368                         return -ENOBUFS;
1369
1370                 v6_cork->opt->tot_len = sizeof(*opt);
1371                 v6_cork->opt->opt_flen = opt->opt_flen;
1372                 v6_cork->opt->opt_nflen = opt->opt_nflen;
1373
1374                 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1375                                                     sk->sk_allocation);
1376                 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1377                         return -ENOBUFS;
1378
1379                 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1380                                                     sk->sk_allocation);
1381                 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1382                         return -ENOBUFS;
1383
1384                 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1385                                                    sk->sk_allocation);
1386                 if (opt->hopopt && !v6_cork->opt->hopopt)
1387                         return -ENOBUFS;
1388
1389                 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1390                                                     sk->sk_allocation);
1391                 if (opt->srcrt && !v6_cork->opt->srcrt)
1392                         return -ENOBUFS;
1393
1394                 /* need source address above miyazawa*/
1395         }
1396         dst_hold(&rt->dst);
1397         cork->base.dst = &rt->dst;
1398         cork->fl.u.ip6 = *fl6;
1399         v6_cork->hop_limit = ipc6->hlimit;
1400         v6_cork->tclass = ipc6->tclass;
1401         if (rt->dst.flags & DST_XFRM_TUNNEL)
1402                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1403                       READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1404         else
1405                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1406                         READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1407         if (np->frag_size < mtu) {
1408                 if (np->frag_size)
1409                         mtu = np->frag_size;
1410         }
1411         if (mtu < IPV6_MIN_MTU)
1412                 return -EINVAL;
1413         cork->base.fragsize = mtu;
1414         cork->base.gso_size = ipc6->gso_size;
1415         cork->base.tx_flags = 0;
1416         cork->base.mark = ipc6->sockc.mark;
1417         sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1418
1419         if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1420                 cork->base.flags |= IPCORK_ALLFRAG;
1421         cork->base.length = 0;
1422
1423         cork->base.transmit_time = ipc6->sockc.transmit_time;
1424
1425         return 0;
1426 }
1427
1428 static int __ip6_append_data(struct sock *sk,
1429                              struct flowi6 *fl6,
1430                              struct sk_buff_head *queue,
1431                              struct inet_cork *cork,
1432                              struct inet6_cork *v6_cork,
1433                              struct page_frag *pfrag,
1434                              int getfrag(void *from, char *to, int offset,
1435                                          int len, int odd, struct sk_buff *skb),
1436                              void *from, int length, int transhdrlen,
1437                              unsigned int flags, struct ipcm6_cookie *ipc6)
1438 {
1439         struct sk_buff *skb, *skb_prev = NULL;
1440         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1441         struct ubuf_info *uarg = NULL;
1442         int exthdrlen = 0;
1443         int dst_exthdrlen = 0;
1444         int hh_len;
1445         int copy;
1446         int err;
1447         int offset = 0;
1448         u32 tskey = 0;
1449         struct rt6_info *rt = (struct rt6_info *)cork->dst;
1450         struct ipv6_txoptions *opt = v6_cork->opt;
1451         int csummode = CHECKSUM_NONE;
1452         unsigned int maxnonfragsize, headersize;
1453         unsigned int wmem_alloc_delta = 0;
1454         bool paged, extra_uref = false;
1455
1456         skb = skb_peek_tail(queue);
1457         if (!skb) {
1458                 exthdrlen = opt ? opt->opt_flen : 0;
1459                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1460         }
1461
1462         paged = !!cork->gso_size;
1463         mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1464         orig_mtu = mtu;
1465
1466         if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1467             sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1468                 tskey = sk->sk_tskey++;
1469
1470         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1471
1472         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1473                         (opt ? opt->opt_nflen : 0);
1474         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1475                      sizeof(struct frag_hdr);
1476
1477         headersize = sizeof(struct ipv6hdr) +
1478                      (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1479                      (dst_allfrag(&rt->dst) ?
1480                       sizeof(struct frag_hdr) : 0) +
1481                      rt->rt6i_nfheader_len;
1482
1483         /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1484          * the first fragment
1485          */
1486         if (headersize + transhdrlen > mtu)
1487                 goto emsgsize;
1488
1489         if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1490             (sk->sk_protocol == IPPROTO_UDP ||
1491              sk->sk_protocol == IPPROTO_RAW)) {
1492                 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1493                                 sizeof(struct ipv6hdr));
1494                 goto emsgsize;
1495         }
1496
1497         if (ip6_sk_ignore_df(sk))
1498                 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1499         else
1500                 maxnonfragsize = mtu;
1501
1502         if (cork->length + length > maxnonfragsize - headersize) {
1503 emsgsize:
1504                 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1505                 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1506                 return -EMSGSIZE;
1507         }
1508
1509         /* CHECKSUM_PARTIAL only with no extension headers and when
1510          * we are not going to fragment
1511          */
1512         if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1513             headersize == sizeof(struct ipv6hdr) &&
1514             length <= mtu - headersize &&
1515             (!(flags & MSG_MORE) || cork->gso_size) &&
1516             rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1517                 csummode = CHECKSUM_PARTIAL;
1518
1519         if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1520                 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
1521                 if (!uarg)
1522                         return -ENOBUFS;
1523                 extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1524                 if (rt->dst.dev->features & NETIF_F_SG &&
1525                     csummode == CHECKSUM_PARTIAL) {
1526                         paged = true;
1527                 } else {
1528                         uarg->zerocopy = 0;
1529                         skb_zcopy_set(skb, uarg, &extra_uref);
1530                 }
1531         }
1532
1533         /*
1534          * Let's try using as much space as possible.
1535          * Use MTU if total length of the message fits into the MTU.
1536          * Otherwise, we need to reserve fragment header and
1537          * fragment alignment (= 8-15 octects, in total).
1538          *
1539          * Note that we may need to "move" the data from the tail
1540          * of the buffer to the new fragment when we split
1541          * the message.
1542          *
1543          * FIXME: It may be fragmented into multiple chunks
1544          *        at once if non-fragmentable extension headers
1545          *        are too large.
1546          * --yoshfuji
1547          */
1548
1549         cork->length += length;
1550         if (!skb)
1551                 goto alloc_new_skb;
1552
1553         while (length > 0) {
1554                 /* Check if the remaining data fits into current packet. */
1555                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1556                 if (copy < length)
1557                         copy = maxfraglen - skb->len;
1558
1559                 if (copy <= 0) {
1560                         char *data;
1561                         unsigned int datalen;
1562                         unsigned int fraglen;
1563                         unsigned int fraggap;
1564                         unsigned int alloclen, alloc_extra;
1565                         unsigned int pagedlen;
1566 alloc_new_skb:
1567                         /* There's no room in the current skb */
1568                         if (skb)
1569                                 fraggap = skb->len - maxfraglen;
1570                         else
1571                                 fraggap = 0;
1572                         /* update mtu and maxfraglen if necessary */
1573                         if (!skb || !skb_prev)
1574                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1575                                                     fragheaderlen, skb, rt,
1576                                                     orig_mtu);
1577
1578                         skb_prev = skb;
1579
1580                         /*
1581                          * If remaining data exceeds the mtu,
1582                          * we know we need more fragment(s).
1583                          */
1584                         datalen = length + fraggap;
1585
1586                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1587                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1588                         fraglen = datalen + fragheaderlen;
1589                         pagedlen = 0;
1590
1591                         alloc_extra = hh_len;
1592                         alloc_extra += dst_exthdrlen;
1593                         alloc_extra += rt->dst.trailer_len;
1594
1595                         /* We just reserve space for fragment header.
1596                          * Note: this may be overallocation if the message
1597                          * (without MSG_MORE) fits into the MTU.
1598                          */
1599                         alloc_extra += sizeof(struct frag_hdr);
1600
1601                         if ((flags & MSG_MORE) &&
1602                             !(rt->dst.dev->features&NETIF_F_SG))
1603                                 alloclen = mtu;
1604                         else if (!paged &&
1605                                  (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1606                                   !(rt->dst.dev->features & NETIF_F_SG)))
1607                                 alloclen = fraglen;
1608                         else {
1609                                 alloclen = min_t(int, fraglen, MAX_HEADER);
1610                                 pagedlen = fraglen - alloclen;
1611                         }
1612                         alloclen += alloc_extra;
1613
1614                         if (datalen != length + fraggap) {
1615                                 /*
1616                                  * this is not the last fragment, the trailer
1617                                  * space is regarded as data space.
1618                                  */
1619                                 datalen += rt->dst.trailer_len;
1620                         }
1621
1622                         fraglen = datalen + fragheaderlen;
1623
1624                         copy = datalen - transhdrlen - fraggap - pagedlen;
1625                         if (copy < 0) {
1626                                 err = -EINVAL;
1627                                 goto error;
1628                         }
1629                         if (transhdrlen) {
1630                                 skb = sock_alloc_send_skb(sk, alloclen,
1631                                                 (flags & MSG_DONTWAIT), &err);
1632                         } else {
1633                                 skb = NULL;
1634                                 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1635                                     2 * sk->sk_sndbuf)
1636                                         skb = alloc_skb(alloclen,
1637                                                         sk->sk_allocation);
1638                                 if (unlikely(!skb))
1639                                         err = -ENOBUFS;
1640                         }
1641                         if (!skb)
1642                                 goto error;
1643                         /*
1644                          *      Fill in the control structures
1645                          */
1646                         skb->protocol = htons(ETH_P_IPV6);
1647                         skb->ip_summed = csummode;
1648                         skb->csum = 0;
1649                         /* reserve for fragmentation and ipsec header */
1650                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1651                                     dst_exthdrlen);
1652
1653                         /*
1654                          *      Find where to start putting bytes
1655                          */
1656                         data = skb_put(skb, fraglen - pagedlen);
1657                         skb_set_network_header(skb, exthdrlen);
1658                         data += fragheaderlen;
1659                         skb->transport_header = (skb->network_header +
1660                                                  fragheaderlen);
1661                         if (fraggap) {
1662                                 skb->csum = skb_copy_and_csum_bits(
1663                                         skb_prev, maxfraglen,
1664                                         data + transhdrlen, fraggap);
1665                                 skb_prev->csum = csum_sub(skb_prev->csum,
1666                                                           skb->csum);
1667                                 data += fraggap;
1668                                 pskb_trim_unique(skb_prev, maxfraglen);
1669                         }
1670                         if (copy > 0 &&
1671                             getfrag(from, data + transhdrlen, offset,
1672                                     copy, fraggap, skb) < 0) {
1673                                 err = -EFAULT;
1674                                 kfree_skb(skb);
1675                                 goto error;
1676                         }
1677
1678                         offset += copy;
1679                         length -= copy + transhdrlen;
1680                         transhdrlen = 0;
1681                         exthdrlen = 0;
1682                         dst_exthdrlen = 0;
1683
1684                         /* Only the initial fragment is time stamped */
1685                         skb_shinfo(skb)->tx_flags = cork->tx_flags;
1686                         cork->tx_flags = 0;
1687                         skb_shinfo(skb)->tskey = tskey;
1688                         tskey = 0;
1689                         skb_zcopy_set(skb, uarg, &extra_uref);
1690
1691                         if ((flags & MSG_CONFIRM) && !skb_prev)
1692                                 skb_set_dst_pending_confirm(skb, 1);
1693
1694                         /*
1695                          * Put the packet on the pending queue
1696                          */
1697                         if (!skb->destructor) {
1698                                 skb->destructor = sock_wfree;
1699                                 skb->sk = sk;
1700                                 wmem_alloc_delta += skb->truesize;
1701                         }
1702                         __skb_queue_tail(queue, skb);
1703                         continue;
1704                 }
1705
1706                 if (copy > length)
1707                         copy = length;
1708
1709                 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1710                     skb_tailroom(skb) >= copy) {
1711                         unsigned int off;
1712
1713                         off = skb->len;
1714                         if (getfrag(from, skb_put(skb, copy),
1715                                                 offset, copy, off, skb) < 0) {
1716                                 __skb_trim(skb, off);
1717                                 err = -EFAULT;
1718                                 goto error;
1719                         }
1720                 } else if (!uarg || !uarg->zerocopy) {
1721                         int i = skb_shinfo(skb)->nr_frags;
1722
1723                         err = -ENOMEM;
1724                         if (!sk_page_frag_refill(sk, pfrag))
1725                                 goto error;
1726
1727                         if (!skb_can_coalesce(skb, i, pfrag->page,
1728                                               pfrag->offset)) {
1729                                 err = -EMSGSIZE;
1730                                 if (i == MAX_SKB_FRAGS)
1731                                         goto error;
1732
1733                                 __skb_fill_page_desc(skb, i, pfrag->page,
1734                                                      pfrag->offset, 0);
1735                                 skb_shinfo(skb)->nr_frags = ++i;
1736                                 get_page(pfrag->page);
1737                         }
1738                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1739                         if (getfrag(from,
1740                                     page_address(pfrag->page) + pfrag->offset,
1741                                     offset, copy, skb->len, skb) < 0)
1742                                 goto error_efault;
1743
1744                         pfrag->offset += copy;
1745                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1746                         skb->len += copy;
1747                         skb->data_len += copy;
1748                         skb->truesize += copy;
1749                         wmem_alloc_delta += copy;
1750                 } else {
1751                         err = skb_zerocopy_iter_dgram(skb, from, copy);
1752                         if (err < 0)
1753                                 goto error;
1754                 }
1755                 offset += copy;
1756                 length -= copy;
1757         }
1758
1759         if (wmem_alloc_delta)
1760                 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1761         return 0;
1762
1763 error_efault:
1764         err = -EFAULT;
1765 error:
1766         net_zcopy_put_abort(uarg, extra_uref);
1767         cork->length -= length;
1768         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1769         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1770         return err;
1771 }
1772
1773 int ip6_append_data(struct sock *sk,
1774                     int getfrag(void *from, char *to, int offset, int len,
1775                                 int odd, struct sk_buff *skb),
1776                     void *from, int length, int transhdrlen,
1777                     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1778                     struct rt6_info *rt, unsigned int flags)
1779 {
1780         struct inet_sock *inet = inet_sk(sk);
1781         struct ipv6_pinfo *np = inet6_sk(sk);
1782         int exthdrlen;
1783         int err;
1784
1785         if (flags&MSG_PROBE)
1786                 return 0;
1787         if (skb_queue_empty(&sk->sk_write_queue)) {
1788                 /*
1789                  * setup for corking
1790                  */
1791                 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1792                                      ipc6, rt, fl6);
1793                 if (err)
1794                         return err;
1795
1796                 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1797                 length += exthdrlen;
1798                 transhdrlen += exthdrlen;
1799         } else {
1800                 fl6 = &inet->cork.fl.u.ip6;
1801                 transhdrlen = 0;
1802         }
1803
1804         return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1805                                  &np->cork, sk_page_frag(sk), getfrag,
1806                                  from, length, transhdrlen, flags, ipc6);
1807 }
1808 EXPORT_SYMBOL_GPL(ip6_append_data);
1809
1810 static void ip6_cork_release(struct inet_cork_full *cork,
1811                              struct inet6_cork *v6_cork)
1812 {
1813         if (v6_cork->opt) {
1814                 kfree(v6_cork->opt->dst0opt);
1815                 kfree(v6_cork->opt->dst1opt);
1816                 kfree(v6_cork->opt->hopopt);
1817                 kfree(v6_cork->opt->srcrt);
1818                 kfree(v6_cork->opt);
1819                 v6_cork->opt = NULL;
1820         }
1821
1822         if (cork->base.dst) {
1823                 dst_release(cork->base.dst);
1824                 cork->base.dst = NULL;
1825                 cork->base.flags &= ~IPCORK_ALLFRAG;
1826         }
1827         memset(&cork->fl, 0, sizeof(cork->fl));
1828 }
1829
1830 struct sk_buff *__ip6_make_skb(struct sock *sk,
1831                                struct sk_buff_head *queue,
1832                                struct inet_cork_full *cork,
1833                                struct inet6_cork *v6_cork)
1834 {
1835         struct sk_buff *skb, *tmp_skb;
1836         struct sk_buff **tail_skb;
1837         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1838         struct ipv6_pinfo *np = inet6_sk(sk);
1839         struct net *net = sock_net(sk);
1840         struct ipv6hdr *hdr;
1841         struct ipv6_txoptions *opt = v6_cork->opt;
1842         struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1843         struct flowi6 *fl6 = &cork->fl.u.ip6;
1844         unsigned char proto = fl6->flowi6_proto;
1845
1846         skb = __skb_dequeue(queue);
1847         if (!skb)
1848                 goto out;
1849         tail_skb = &(skb_shinfo(skb)->frag_list);
1850
1851         /* move skb->data to ip header from ext header */
1852         if (skb->data < skb_network_header(skb))
1853                 __skb_pull(skb, skb_network_offset(skb));
1854         while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1855                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1856                 *tail_skb = tmp_skb;
1857                 tail_skb = &(tmp_skb->next);
1858                 skb->len += tmp_skb->len;
1859                 skb->data_len += tmp_skb->len;
1860                 skb->truesize += tmp_skb->truesize;
1861                 tmp_skb->destructor = NULL;
1862                 tmp_skb->sk = NULL;
1863         }
1864
1865         /* Allow local fragmentation. */
1866         skb->ignore_df = ip6_sk_ignore_df(sk);
1867
1868         *final_dst = fl6->daddr;
1869         __skb_pull(skb, skb_network_header_len(skb));
1870         if (opt && opt->opt_flen)
1871                 ipv6_push_frag_opts(skb, opt, &proto);
1872         if (opt && opt->opt_nflen)
1873                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1874
1875         skb_push(skb, sizeof(struct ipv6hdr));
1876         skb_reset_network_header(skb);
1877         hdr = ipv6_hdr(skb);
1878
1879         ip6_flow_hdr(hdr, v6_cork->tclass,
1880                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
1881                                         ip6_autoflowlabel(net, np), fl6));
1882         hdr->hop_limit = v6_cork->hop_limit;
1883         hdr->nexthdr = proto;
1884         hdr->saddr = fl6->saddr;
1885         hdr->daddr = *final_dst;
1886
1887         skb->priority = sk->sk_priority;
1888         skb->mark = cork->base.mark;
1889
1890         skb->tstamp = cork->base.transmit_time;
1891
1892         skb_dst_set(skb, dst_clone(&rt->dst));
1893         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1894         if (proto == IPPROTO_ICMPV6) {
1895                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1896
1897                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1898                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1899         }
1900
1901         ip6_cork_release(cork, v6_cork);
1902 out:
1903         return skb;
1904 }
1905
1906 int ip6_send_skb(struct sk_buff *skb)
1907 {
1908         struct net *net = sock_net(skb->sk);
1909         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1910         int err;
1911
1912         err = ip6_local_out(net, skb->sk, skb);
1913         if (err) {
1914                 if (err > 0)
1915                         err = net_xmit_errno(err);
1916                 if (err)
1917                         IP6_INC_STATS(net, rt->rt6i_idev,
1918                                       IPSTATS_MIB_OUTDISCARDS);
1919         }
1920
1921         return err;
1922 }
1923
1924 int ip6_push_pending_frames(struct sock *sk)
1925 {
1926         struct sk_buff *skb;
1927
1928         skb = ip6_finish_skb(sk);
1929         if (!skb)
1930                 return 0;
1931
1932         return ip6_send_skb(skb);
1933 }
1934 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1935
1936 static void __ip6_flush_pending_frames(struct sock *sk,
1937                                        struct sk_buff_head *queue,
1938                                        struct inet_cork_full *cork,
1939                                        struct inet6_cork *v6_cork)
1940 {
1941         struct sk_buff *skb;
1942
1943         while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1944                 if (skb_dst(skb))
1945                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1946                                       IPSTATS_MIB_OUTDISCARDS);
1947                 kfree_skb(skb);
1948         }
1949
1950         ip6_cork_release(cork, v6_cork);
1951 }
1952
1953 void ip6_flush_pending_frames(struct sock *sk)
1954 {
1955         __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1956                                    &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1957 }
1958 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1959
1960 struct sk_buff *ip6_make_skb(struct sock *sk,
1961                              int getfrag(void *from, char *to, int offset,
1962                                          int len, int odd, struct sk_buff *skb),
1963                              void *from, int length, int transhdrlen,
1964                              struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1965                              struct rt6_info *rt, unsigned int flags,
1966                              struct inet_cork_full *cork)
1967 {
1968         struct inet6_cork v6_cork;
1969         struct sk_buff_head queue;
1970         int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1971         int err;
1972
1973         if (flags & MSG_PROBE)
1974                 return NULL;
1975
1976         __skb_queue_head_init(&queue);
1977
1978         cork->base.flags = 0;
1979         cork->base.addr = 0;
1980         cork->base.opt = NULL;
1981         cork->base.dst = NULL;
1982         v6_cork.opt = NULL;
1983         err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1984         if (err) {
1985                 ip6_cork_release(cork, &v6_cork);
1986                 return ERR_PTR(err);
1987         }
1988         if (ipc6->dontfrag < 0)
1989                 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1990
1991         err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1992                                 &current->task_frag, getfrag, from,
1993                                 length + exthdrlen, transhdrlen + exthdrlen,
1994                                 flags, ipc6);
1995         if (err) {
1996                 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1997                 return ERR_PTR(err);
1998         }
1999
2000         return __ip6_make_skb(sk, &queue, cork, &v6_cork);
2001 }