Merge tag 'pm-5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
[linux-2.6-microblaze.git] / net / ipv6 / ip6_output.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      IPv6 output functions
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on linux/net/ipv4/ip_output.c
10  *
11  *      Changes:
12  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
13  *                              extension headers are implemented.
14  *                              route changes now work.
15  *                              ip6_forward does not confuse sniffers.
16  *                              etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *      Imran Patel     :       frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *                      :       add ip6_append_data and related functions
22  *                              for datagram xmit
23  */
24
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41
42 #include <net/sock.h>
43 #include <net/snmp.h>
44
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 #include <net/ip_tunnels.h>
58
59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60 {
61         struct dst_entry *dst = skb_dst(skb);
62         struct net_device *dev = dst->dev;
63         const struct in6_addr *nexthop;
64         struct neighbour *neigh;
65         int ret;
66
67         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
68                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
69
70                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
71                     ((mroute6_is_socket(net, skb) &&
72                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
73                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
74                                          &ipv6_hdr(skb)->saddr))) {
75                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
76
77                         /* Do not check for IFF_ALLMULTI; multicast routing
78                            is not supported in any case.
79                          */
80                         if (newskb)
81                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
82                                         net, sk, newskb, NULL, newskb->dev,
83                                         dev_loopback_xmit);
84
85                         if (ipv6_hdr(skb)->hop_limit == 0) {
86                                 IP6_INC_STATS(net, idev,
87                                               IPSTATS_MIB_OUTDISCARDS);
88                                 kfree_skb(skb);
89                                 return 0;
90                         }
91                 }
92
93                 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
94
95                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
96                     IPV6_ADDR_SCOPE_NODELOCAL &&
97                     !(dev->flags & IFF_LOOPBACK)) {
98                         kfree_skb(skb);
99                         return 0;
100                 }
101         }
102
103         if (lwtunnel_xmit_redirect(dst->lwtstate)) {
104                 int res = lwtunnel_xmit(skb);
105
106                 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
107                         return res;
108         }
109
110         rcu_read_lock_bh();
111         nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
112         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
113         if (unlikely(!neigh))
114                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
115         if (!IS_ERR(neigh)) {
116                 sock_confirm_neigh(skb, neigh);
117                 ret = neigh_output(neigh, skb, false);
118                 rcu_read_unlock_bh();
119                 return ret;
120         }
121         rcu_read_unlock_bh();
122
123         IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
124         kfree_skb(skb);
125         return -EINVAL;
126 }
127
128 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
129 {
130 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
131         /* Policy lookup after SNAT yielded a new policy */
132         if (skb_dst(skb)->xfrm) {
133                 IPCB(skb)->flags |= IPSKB_REROUTED;
134                 return dst_output(net, sk, skb);
135         }
136 #endif
137
138         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
139             dst_allfrag(skb_dst(skb)) ||
140             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
141                 return ip6_fragment(net, sk, skb, ip6_finish_output2);
142         else
143                 return ip6_finish_output2(net, sk, skb);
144 }
145
146 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
147 {
148         int ret;
149
150         ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
151         switch (ret) {
152         case NET_XMIT_SUCCESS:
153                 return __ip6_finish_output(net, sk, skb);
154         case NET_XMIT_CN:
155                 return __ip6_finish_output(net, sk, skb) ? : ret;
156         default:
157                 kfree_skb(skb);
158                 return ret;
159         }
160 }
161
162 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
163 {
164         struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
165         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
166
167         skb->protocol = htons(ETH_P_IPV6);
168         skb->dev = dev;
169
170         if (unlikely(idev->cnf.disable_ipv6)) {
171                 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
172                 kfree_skb(skb);
173                 return 0;
174         }
175
176         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
177                             net, sk, skb, indev, dev,
178                             ip6_finish_output,
179                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
180 }
181
182 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
183 {
184         if (!np->autoflowlabel_set)
185                 return ip6_default_np_autolabel(net);
186         else
187                 return np->autoflowlabel;
188 }
189
190 /*
191  * xmit an sk_buff (used by TCP, SCTP and DCCP)
192  * Note : socket lock is not held for SYNACK packets, but might be modified
193  * by calls to skb_set_owner_w() and ipv6_local_error(),
194  * which are using proper atomic operations or spinlocks.
195  */
196 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
197              __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
198 {
199         struct net *net = sock_net(sk);
200         const struct ipv6_pinfo *np = inet6_sk(sk);
201         struct in6_addr *first_hop = &fl6->daddr;
202         struct dst_entry *dst = skb_dst(skb);
203         unsigned int head_room;
204         struct ipv6hdr *hdr;
205         u8  proto = fl6->flowi6_proto;
206         int seg_len = skb->len;
207         int hlimit = -1;
208         u32 mtu;
209
210         head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
211         if (opt)
212                 head_room += opt->opt_nflen + opt->opt_flen;
213
214         if (unlikely(skb_headroom(skb) < head_room)) {
215                 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
216                 if (!skb2) {
217                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
218                                       IPSTATS_MIB_OUTDISCARDS);
219                         kfree_skb(skb);
220                         return -ENOBUFS;
221                 }
222                 if (skb->sk)
223                         skb_set_owner_w(skb2, skb->sk);
224                 consume_skb(skb);
225                 skb = skb2;
226         }
227
228         if (opt) {
229                 seg_len += opt->opt_nflen + opt->opt_flen;
230
231                 if (opt->opt_flen)
232                         ipv6_push_frag_opts(skb, opt, &proto);
233
234                 if (opt->opt_nflen)
235                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
236                                              &fl6->saddr);
237         }
238
239         skb_push(skb, sizeof(struct ipv6hdr));
240         skb_reset_network_header(skb);
241         hdr = ipv6_hdr(skb);
242
243         /*
244          *      Fill in the IPv6 header
245          */
246         if (np)
247                 hlimit = np->hop_limit;
248         if (hlimit < 0)
249                 hlimit = ip6_dst_hoplimit(dst);
250
251         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
252                                 ip6_autoflowlabel(net, np), fl6));
253
254         hdr->payload_len = htons(seg_len);
255         hdr->nexthdr = proto;
256         hdr->hop_limit = hlimit;
257
258         hdr->saddr = fl6->saddr;
259         hdr->daddr = *first_hop;
260
261         skb->protocol = htons(ETH_P_IPV6);
262         skb->priority = priority;
263         skb->mark = mark;
264
265         mtu = dst_mtu(dst);
266         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
267                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
268                               IPSTATS_MIB_OUT, skb->len);
269
270                 /* if egress device is enslaved to an L3 master device pass the
271                  * skb to its handler for processing
272                  */
273                 skb = l3mdev_ip6_out((struct sock *)sk, skb);
274                 if (unlikely(!skb))
275                         return 0;
276
277                 /* hooks should never assume socket lock is held.
278                  * we promote our socket to non const
279                  */
280                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
281                                net, (struct sock *)sk, skb, NULL, dst->dev,
282                                dst_output);
283         }
284
285         skb->dev = dst->dev;
286         /* ipv6_local_error() does not require socket lock,
287          * we promote our socket to non const
288          */
289         ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
290
291         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
292         kfree_skb(skb);
293         return -EMSGSIZE;
294 }
295 EXPORT_SYMBOL(ip6_xmit);
296
297 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
298 {
299         struct ip6_ra_chain *ra;
300         struct sock *last = NULL;
301
302         read_lock(&ip6_ra_lock);
303         for (ra = ip6_ra_chain; ra; ra = ra->next) {
304                 struct sock *sk = ra->sk;
305                 if (sk && ra->sel == sel &&
306                     (!sk->sk_bound_dev_if ||
307                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
308                         struct ipv6_pinfo *np = inet6_sk(sk);
309
310                         if (np && np->rtalert_isolate &&
311                             !net_eq(sock_net(sk), dev_net(skb->dev))) {
312                                 continue;
313                         }
314                         if (last) {
315                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
316                                 if (skb2)
317                                         rawv6_rcv(last, skb2);
318                         }
319                         last = sk;
320                 }
321         }
322
323         if (last) {
324                 rawv6_rcv(last, skb);
325                 read_unlock(&ip6_ra_lock);
326                 return 1;
327         }
328         read_unlock(&ip6_ra_lock);
329         return 0;
330 }
331
332 static int ip6_forward_proxy_check(struct sk_buff *skb)
333 {
334         struct ipv6hdr *hdr = ipv6_hdr(skb);
335         u8 nexthdr = hdr->nexthdr;
336         __be16 frag_off;
337         int offset;
338
339         if (ipv6_ext_hdr(nexthdr)) {
340                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
341                 if (offset < 0)
342                         return 0;
343         } else
344                 offset = sizeof(struct ipv6hdr);
345
346         if (nexthdr == IPPROTO_ICMPV6) {
347                 struct icmp6hdr *icmp6;
348
349                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
350                                          offset + 1 - skb->data)))
351                         return 0;
352
353                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
354
355                 switch (icmp6->icmp6_type) {
356                 case NDISC_ROUTER_SOLICITATION:
357                 case NDISC_ROUTER_ADVERTISEMENT:
358                 case NDISC_NEIGHBOUR_SOLICITATION:
359                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
360                 case NDISC_REDIRECT:
361                         /* For reaction involving unicast neighbor discovery
362                          * message destined to the proxied address, pass it to
363                          * input function.
364                          */
365                         return 1;
366                 default:
367                         break;
368                 }
369         }
370
371         /*
372          * The proxying router can't forward traffic sent to a link-local
373          * address, so signal the sender and discard the packet. This
374          * behavior is clarified by the MIPv6 specification.
375          */
376         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
377                 dst_link_failure(skb);
378                 return -1;
379         }
380
381         return 0;
382 }
383
384 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
385                                      struct sk_buff *skb)
386 {
387         struct dst_entry *dst = skb_dst(skb);
388
389         __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
390         __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
391
392 #ifdef CONFIG_NET_SWITCHDEV
393         if (skb->offload_l3_fwd_mark) {
394                 consume_skb(skb);
395                 return 0;
396         }
397 #endif
398
399         skb->tstamp = 0;
400         return dst_output(net, sk, skb);
401 }
402
403 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
404 {
405         if (skb->len <= mtu)
406                 return false;
407
408         /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
409         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
410                 return true;
411
412         if (skb->ignore_df)
413                 return false;
414
415         if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
416                 return false;
417
418         return true;
419 }
420
421 int ip6_forward(struct sk_buff *skb)
422 {
423         struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
424         struct dst_entry *dst = skb_dst(skb);
425         struct ipv6hdr *hdr = ipv6_hdr(skb);
426         struct inet6_skb_parm *opt = IP6CB(skb);
427         struct net *net = dev_net(dst->dev);
428         u32 mtu;
429
430         if (net->ipv6.devconf_all->forwarding == 0)
431                 goto error;
432
433         if (skb->pkt_type != PACKET_HOST)
434                 goto drop;
435
436         if (unlikely(skb->sk))
437                 goto drop;
438
439         if (skb_warn_if_lro(skb))
440                 goto drop;
441
442         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
443                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
444                 goto drop;
445         }
446
447         skb_forward_csum(skb);
448
449         /*
450          *      We DO NOT make any processing on
451          *      RA packets, pushing them to user level AS IS
452          *      without ane WARRANTY that application will be able
453          *      to interpret them. The reason is that we
454          *      cannot make anything clever here.
455          *
456          *      We are not end-node, so that if packet contains
457          *      AH/ESP, we cannot make anything.
458          *      Defragmentation also would be mistake, RA packets
459          *      cannot be fragmented, because there is no warranty
460          *      that different fragments will go along one path. --ANK
461          */
462         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
463                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
464                         return 0;
465         }
466
467         /*
468          *      check and decrement ttl
469          */
470         if (hdr->hop_limit <= 1) {
471                 /* Force OUTPUT device used as source address */
472                 skb->dev = dst->dev;
473                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
474                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
475
476                 kfree_skb(skb);
477                 return -ETIMEDOUT;
478         }
479
480         /* XXX: idev->cnf.proxy_ndp? */
481         if (net->ipv6.devconf_all->proxy_ndp &&
482             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
483                 int proxied = ip6_forward_proxy_check(skb);
484                 if (proxied > 0)
485                         return ip6_input(skb);
486                 else if (proxied < 0) {
487                         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
488                         goto drop;
489                 }
490         }
491
492         if (!xfrm6_route_forward(skb)) {
493                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
494                 goto drop;
495         }
496         dst = skb_dst(skb);
497
498         /* IPv6 specs say nothing about it, but it is clear that we cannot
499            send redirects to source routed frames.
500            We don't send redirects to frames decapsulated from IPsec.
501          */
502         if (IP6CB(skb)->iif == dst->dev->ifindex &&
503             opt->srcrt == 0 && !skb_sec_path(skb)) {
504                 struct in6_addr *target = NULL;
505                 struct inet_peer *peer;
506                 struct rt6_info *rt;
507
508                 /*
509                  *      incoming and outgoing devices are the same
510                  *      send a redirect.
511                  */
512
513                 rt = (struct rt6_info *) dst;
514                 if (rt->rt6i_flags & RTF_GATEWAY)
515                         target = &rt->rt6i_gateway;
516                 else
517                         target = &hdr->daddr;
518
519                 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
520
521                 /* Limit redirects both by destination (here)
522                    and by source (inside ndisc_send_redirect)
523                  */
524                 if (inet_peer_xrlim_allow(peer, 1*HZ))
525                         ndisc_send_redirect(skb, target);
526                 if (peer)
527                         inet_putpeer(peer);
528         } else {
529                 int addrtype = ipv6_addr_type(&hdr->saddr);
530
531                 /* This check is security critical. */
532                 if (addrtype == IPV6_ADDR_ANY ||
533                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
534                         goto error;
535                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
536                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
537                                     ICMPV6_NOT_NEIGHBOUR, 0);
538                         goto error;
539                 }
540         }
541
542         mtu = ip6_dst_mtu_forward(dst);
543         if (mtu < IPV6_MIN_MTU)
544                 mtu = IPV6_MIN_MTU;
545
546         if (ip6_pkt_too_big(skb, mtu)) {
547                 /* Again, force OUTPUT device used as source address */
548                 skb->dev = dst->dev;
549                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
550                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
551                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
552                                 IPSTATS_MIB_FRAGFAILS);
553                 kfree_skb(skb);
554                 return -EMSGSIZE;
555         }
556
557         if (skb_cow(skb, dst->dev->hard_header_len)) {
558                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
559                                 IPSTATS_MIB_OUTDISCARDS);
560                 goto drop;
561         }
562
563         hdr = ipv6_hdr(skb);
564
565         /* Mangling hops number delayed to point after skb COW */
566
567         hdr->hop_limit--;
568
569         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
570                        net, NULL, skb, skb->dev, dst->dev,
571                        ip6_forward_finish);
572
573 error:
574         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
575 drop:
576         kfree_skb(skb);
577         return -EINVAL;
578 }
579
580 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
581 {
582         to->pkt_type = from->pkt_type;
583         to->priority = from->priority;
584         to->protocol = from->protocol;
585         skb_dst_drop(to);
586         skb_dst_set(to, dst_clone(skb_dst(from)));
587         to->dev = from->dev;
588         to->mark = from->mark;
589
590         skb_copy_hash(to, from);
591
592 #ifdef CONFIG_NET_SCHED
593         to->tc_index = from->tc_index;
594 #endif
595         nf_copy(to, from);
596         skb_ext_copy(to, from);
597         skb_copy_secmark(to, from);
598 }
599
600 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
601                       u8 nexthdr, __be32 frag_id,
602                       struct ip6_fraglist_iter *iter)
603 {
604         unsigned int first_len;
605         struct frag_hdr *fh;
606
607         /* BUILD HEADER */
608         *prevhdr = NEXTHDR_FRAGMENT;
609         iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
610         if (!iter->tmp_hdr)
611                 return -ENOMEM;
612
613         iter->frag = skb_shinfo(skb)->frag_list;
614         skb_frag_list_init(skb);
615
616         iter->offset = 0;
617         iter->hlen = hlen;
618         iter->frag_id = frag_id;
619         iter->nexthdr = nexthdr;
620
621         __skb_pull(skb, hlen);
622         fh = __skb_push(skb, sizeof(struct frag_hdr));
623         __skb_push(skb, hlen);
624         skb_reset_network_header(skb);
625         memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
626
627         fh->nexthdr = nexthdr;
628         fh->reserved = 0;
629         fh->frag_off = htons(IP6_MF);
630         fh->identification = frag_id;
631
632         first_len = skb_pagelen(skb);
633         skb->data_len = first_len - skb_headlen(skb);
634         skb->len = first_len;
635         ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
636
637         return 0;
638 }
639 EXPORT_SYMBOL(ip6_fraglist_init);
640
641 void ip6_fraglist_prepare(struct sk_buff *skb,
642                           struct ip6_fraglist_iter *iter)
643 {
644         struct sk_buff *frag = iter->frag;
645         unsigned int hlen = iter->hlen;
646         struct frag_hdr *fh;
647
648         frag->ip_summed = CHECKSUM_NONE;
649         skb_reset_transport_header(frag);
650         fh = __skb_push(frag, sizeof(struct frag_hdr));
651         __skb_push(frag, hlen);
652         skb_reset_network_header(frag);
653         memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
654         iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
655         fh->nexthdr = iter->nexthdr;
656         fh->reserved = 0;
657         fh->frag_off = htons(iter->offset);
658         if (frag->next)
659                 fh->frag_off |= htons(IP6_MF);
660         fh->identification = iter->frag_id;
661         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
662         ip6_copy_metadata(frag, skb);
663 }
664 EXPORT_SYMBOL(ip6_fraglist_prepare);
665
666 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
667                    unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
668                    u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
669 {
670         state->prevhdr = prevhdr;
671         state->nexthdr = nexthdr;
672         state->frag_id = frag_id;
673
674         state->hlen = hlen;
675         state->mtu = mtu;
676
677         state->left = skb->len - hlen;  /* Space per frame */
678         state->ptr = hlen;              /* Where to start from */
679
680         state->hroom = hdr_room;
681         state->troom = needed_tailroom;
682
683         state->offset = 0;
684 }
685 EXPORT_SYMBOL(ip6_frag_init);
686
687 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
688 {
689         u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
690         struct sk_buff *frag;
691         struct frag_hdr *fh;
692         unsigned int len;
693
694         len = state->left;
695         /* IF: it doesn't fit, use 'mtu' - the data space left */
696         if (len > state->mtu)
697                 len = state->mtu;
698         /* IF: we are not sending up to and including the packet end
699            then align the next start on an eight byte boundary */
700         if (len < state->left)
701                 len &= ~7;
702
703         /* Allocate buffer */
704         frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
705                          state->hroom + state->troom, GFP_ATOMIC);
706         if (!frag)
707                 return ERR_PTR(-ENOMEM);
708
709         /*
710          *      Set up data on packet
711          */
712
713         ip6_copy_metadata(frag, skb);
714         skb_reserve(frag, state->hroom);
715         skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
716         skb_reset_network_header(frag);
717         fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
718         frag->transport_header = (frag->network_header + state->hlen +
719                                   sizeof(struct frag_hdr));
720
721         /*
722          *      Charge the memory for the fragment to any owner
723          *      it might possess
724          */
725         if (skb->sk)
726                 skb_set_owner_w(frag, skb->sk);
727
728         /*
729          *      Copy the packet header into the new buffer.
730          */
731         skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
732
733         fragnexthdr_offset = skb_network_header(frag);
734         fragnexthdr_offset += prevhdr - skb_network_header(skb);
735         *fragnexthdr_offset = NEXTHDR_FRAGMENT;
736
737         /*
738          *      Build fragment header.
739          */
740         fh->nexthdr = state->nexthdr;
741         fh->reserved = 0;
742         fh->identification = state->frag_id;
743
744         /*
745          *      Copy a block of the IP datagram.
746          */
747         BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
748                              len));
749         state->left -= len;
750
751         fh->frag_off = htons(state->offset);
752         if (state->left > 0)
753                 fh->frag_off |= htons(IP6_MF);
754         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
755
756         state->ptr += len;
757         state->offset += len;
758
759         return frag;
760 }
761 EXPORT_SYMBOL(ip6_frag_next);
762
763 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
764                  int (*output)(struct net *, struct sock *, struct sk_buff *))
765 {
766         struct sk_buff *frag;
767         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
768         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
769                                 inet6_sk(skb->sk) : NULL;
770         struct ip6_frag_state state;
771         unsigned int mtu, hlen, nexthdr_offset;
772         ktime_t tstamp = skb->tstamp;
773         int hroom, err = 0;
774         __be32 frag_id;
775         u8 *prevhdr, nexthdr = 0;
776
777         err = ip6_find_1stfragopt(skb, &prevhdr);
778         if (err < 0)
779                 goto fail;
780         hlen = err;
781         nexthdr = *prevhdr;
782         nexthdr_offset = prevhdr - skb_network_header(skb);
783
784         mtu = ip6_skb_dst_mtu(skb);
785
786         /* We must not fragment if the socket is set to force MTU discovery
787          * or if the skb it not generated by a local socket.
788          */
789         if (unlikely(!skb->ignore_df && skb->len > mtu))
790                 goto fail_toobig;
791
792         if (IP6CB(skb)->frag_max_size) {
793                 if (IP6CB(skb)->frag_max_size > mtu)
794                         goto fail_toobig;
795
796                 /* don't send fragments larger than what we received */
797                 mtu = IP6CB(skb)->frag_max_size;
798                 if (mtu < IPV6_MIN_MTU)
799                         mtu = IPV6_MIN_MTU;
800         }
801
802         if (np && np->frag_size < mtu) {
803                 if (np->frag_size)
804                         mtu = np->frag_size;
805         }
806         if (mtu < hlen + sizeof(struct frag_hdr) + 8)
807                 goto fail_toobig;
808         mtu -= hlen + sizeof(struct frag_hdr);
809
810         frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
811                                     &ipv6_hdr(skb)->saddr);
812
813         if (skb->ip_summed == CHECKSUM_PARTIAL &&
814             (err = skb_checksum_help(skb)))
815                 goto fail;
816
817         prevhdr = skb_network_header(skb) + nexthdr_offset;
818         hroom = LL_RESERVED_SPACE(rt->dst.dev);
819         if (skb_has_frag_list(skb)) {
820                 unsigned int first_len = skb_pagelen(skb);
821                 struct ip6_fraglist_iter iter;
822                 struct sk_buff *frag2;
823
824                 if (first_len - hlen > mtu ||
825                     ((first_len - hlen) & 7) ||
826                     skb_cloned(skb) ||
827                     skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
828                         goto slow_path;
829
830                 skb_walk_frags(skb, frag) {
831                         /* Correct geometry. */
832                         if (frag->len > mtu ||
833                             ((frag->len & 7) && frag->next) ||
834                             skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
835                                 goto slow_path_clean;
836
837                         /* Partially cloned skb? */
838                         if (skb_shared(frag))
839                                 goto slow_path_clean;
840
841                         BUG_ON(frag->sk);
842                         if (skb->sk) {
843                                 frag->sk = skb->sk;
844                                 frag->destructor = sock_wfree;
845                         }
846                         skb->truesize -= frag->truesize;
847                 }
848
849                 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
850                                         &iter);
851                 if (err < 0)
852                         goto fail;
853
854                 for (;;) {
855                         /* Prepare header of the next frame,
856                          * before previous one went down. */
857                         if (iter.frag)
858                                 ip6_fraglist_prepare(skb, &iter);
859
860                         skb->tstamp = tstamp;
861                         err = output(net, sk, skb);
862                         if (!err)
863                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
864                                               IPSTATS_MIB_FRAGCREATES);
865
866                         if (err || !iter.frag)
867                                 break;
868
869                         skb = ip6_fraglist_next(&iter);
870                 }
871
872                 kfree(iter.tmp_hdr);
873
874                 if (err == 0) {
875                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
876                                       IPSTATS_MIB_FRAGOKS);
877                         return 0;
878                 }
879
880                 kfree_skb_list(iter.frag);
881
882                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
883                               IPSTATS_MIB_FRAGFAILS);
884                 return err;
885
886 slow_path_clean:
887                 skb_walk_frags(skb, frag2) {
888                         if (frag2 == frag)
889                                 break;
890                         frag2->sk = NULL;
891                         frag2->destructor = NULL;
892                         skb->truesize += frag2->truesize;
893                 }
894         }
895
896 slow_path:
897         /*
898          *      Fragment the datagram.
899          */
900
901         ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
902                       LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
903                       &state);
904
905         /*
906          *      Keep copying data until we run out.
907          */
908
909         while (state.left > 0) {
910                 frag = ip6_frag_next(skb, &state);
911                 if (IS_ERR(frag)) {
912                         err = PTR_ERR(frag);
913                         goto fail;
914                 }
915
916                 /*
917                  *      Put this fragment into the sending queue.
918                  */
919                 frag->tstamp = tstamp;
920                 err = output(net, sk, frag);
921                 if (err)
922                         goto fail;
923
924                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
925                               IPSTATS_MIB_FRAGCREATES);
926         }
927         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
928                       IPSTATS_MIB_FRAGOKS);
929         consume_skb(skb);
930         return err;
931
932 fail_toobig:
933         if (skb->sk && dst_allfrag(skb_dst(skb)))
934                 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
935
936         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
937         err = -EMSGSIZE;
938
939 fail:
940         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
941                       IPSTATS_MIB_FRAGFAILS);
942         kfree_skb(skb);
943         return err;
944 }
945
946 static inline int ip6_rt_check(const struct rt6key *rt_key,
947                                const struct in6_addr *fl_addr,
948                                const struct in6_addr *addr_cache)
949 {
950         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
951                 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
952 }
953
954 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
955                                           struct dst_entry *dst,
956                                           const struct flowi6 *fl6)
957 {
958         struct ipv6_pinfo *np = inet6_sk(sk);
959         struct rt6_info *rt;
960
961         if (!dst)
962                 goto out;
963
964         if (dst->ops->family != AF_INET6) {
965                 dst_release(dst);
966                 return NULL;
967         }
968
969         rt = (struct rt6_info *)dst;
970         /* Yes, checking route validity in not connected
971          * case is not very simple. Take into account,
972          * that we do not support routing by source, TOS,
973          * and MSG_DONTROUTE            --ANK (980726)
974          *
975          * 1. ip6_rt_check(): If route was host route,
976          *    check that cached destination is current.
977          *    If it is network route, we still may
978          *    check its validity using saved pointer
979          *    to the last used address: daddr_cache.
980          *    We do not want to save whole address now,
981          *    (because main consumer of this service
982          *    is tcp, which has not this problem),
983          *    so that the last trick works only on connected
984          *    sockets.
985          * 2. oif also should be the same.
986          */
987         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
988 #ifdef CONFIG_IPV6_SUBTREES
989             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
990 #endif
991            (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
992               (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
993                 dst_release(dst);
994                 dst = NULL;
995         }
996
997 out:
998         return dst;
999 }
1000
1001 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1002                                struct dst_entry **dst, struct flowi6 *fl6)
1003 {
1004 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1005         struct neighbour *n;
1006         struct rt6_info *rt;
1007 #endif
1008         int err;
1009         int flags = 0;
1010
1011         /* The correct way to handle this would be to do
1012          * ip6_route_get_saddr, and then ip6_route_output; however,
1013          * the route-specific preferred source forces the
1014          * ip6_route_output call _before_ ip6_route_get_saddr.
1015          *
1016          * In source specific routing (no src=any default route),
1017          * ip6_route_output will fail given src=any saddr, though, so
1018          * that's why we try it again later.
1019          */
1020         if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1021                 struct fib6_info *from;
1022                 struct rt6_info *rt;
1023                 bool had_dst = *dst != NULL;
1024
1025                 if (!had_dst)
1026                         *dst = ip6_route_output(net, sk, fl6);
1027                 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1028
1029                 rcu_read_lock();
1030                 from = rt ? rcu_dereference(rt->from) : NULL;
1031                 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1032                                           sk ? inet6_sk(sk)->srcprefs : 0,
1033                                           &fl6->saddr);
1034                 rcu_read_unlock();
1035
1036                 if (err)
1037                         goto out_err_release;
1038
1039                 /* If we had an erroneous initial result, pretend it
1040                  * never existed and let the SA-enabled version take
1041                  * over.
1042                  */
1043                 if (!had_dst && (*dst)->error) {
1044                         dst_release(*dst);
1045                         *dst = NULL;
1046                 }
1047
1048                 if (fl6->flowi6_oif)
1049                         flags |= RT6_LOOKUP_F_IFACE;
1050         }
1051
1052         if (!*dst)
1053                 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1054
1055         err = (*dst)->error;
1056         if (err)
1057                 goto out_err_release;
1058
1059 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1060         /*
1061          * Here if the dst entry we've looked up
1062          * has a neighbour entry that is in the INCOMPLETE
1063          * state and the src address from the flow is
1064          * marked as OPTIMISTIC, we release the found
1065          * dst entry and replace it instead with the
1066          * dst entry of the nexthop router
1067          */
1068         rt = (struct rt6_info *) *dst;
1069         rcu_read_lock_bh();
1070         n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1071                                       rt6_nexthop(rt, &fl6->daddr));
1072         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1073         rcu_read_unlock_bh();
1074
1075         if (err) {
1076                 struct inet6_ifaddr *ifp;
1077                 struct flowi6 fl_gw6;
1078                 int redirect;
1079
1080                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1081                                       (*dst)->dev, 1);
1082
1083                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1084                 if (ifp)
1085                         in6_ifa_put(ifp);
1086
1087                 if (redirect) {
1088                         /*
1089                          * We need to get the dst entry for the
1090                          * default router instead
1091                          */
1092                         dst_release(*dst);
1093                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1094                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1095                         *dst = ip6_route_output(net, sk, &fl_gw6);
1096                         err = (*dst)->error;
1097                         if (err)
1098                                 goto out_err_release;
1099                 }
1100         }
1101 #endif
1102         if (ipv6_addr_v4mapped(&fl6->saddr) &&
1103             !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1104                 err = -EAFNOSUPPORT;
1105                 goto out_err_release;
1106         }
1107
1108         return 0;
1109
1110 out_err_release:
1111         dst_release(*dst);
1112         *dst = NULL;
1113
1114         if (err == -ENETUNREACH)
1115                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1116         return err;
1117 }
1118
1119 /**
1120  *      ip6_dst_lookup - perform route lookup on flow
1121  *      @net: Network namespace to perform lookup in
1122  *      @sk: socket which provides route info
1123  *      @dst: pointer to dst_entry * for result
1124  *      @fl6: flow to lookup
1125  *
1126  *      This function performs a route lookup on the given flow.
1127  *
1128  *      It returns zero on success, or a standard errno code on error.
1129  */
1130 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1131                    struct flowi6 *fl6)
1132 {
1133         *dst = NULL;
1134         return ip6_dst_lookup_tail(net, sk, dst, fl6);
1135 }
1136 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1137
1138 /**
1139  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1140  *      @net: Network namespace to perform lookup in
1141  *      @sk: socket which provides route info
1142  *      @fl6: flow to lookup
1143  *      @final_dst: final destination address for ipsec lookup
1144  *
1145  *      This function performs a route lookup on the given flow.
1146  *
1147  *      It returns a valid dst pointer on success, or a pointer encoded
1148  *      error code.
1149  */
1150 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1151                                       const struct in6_addr *final_dst)
1152 {
1153         struct dst_entry *dst = NULL;
1154         int err;
1155
1156         err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1157         if (err)
1158                 return ERR_PTR(err);
1159         if (final_dst)
1160                 fl6->daddr = *final_dst;
1161
1162         return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1163 }
1164 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1165
1166 /**
1167  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1168  *      @sk: socket which provides the dst cache and route info
1169  *      @fl6: flow to lookup
1170  *      @final_dst: final destination address for ipsec lookup
1171  *      @connected: whether @sk is connected or not
1172  *
1173  *      This function performs a route lookup on the given flow with the
1174  *      possibility of using the cached route in the socket if it is valid.
1175  *      It will take the socket dst lock when operating on the dst cache.
1176  *      As a result, this function can only be used in process context.
1177  *
1178  *      In addition, for a connected socket, cache the dst in the socket
1179  *      if the current cache is not valid.
1180  *
1181  *      It returns a valid dst pointer on success, or a pointer encoded
1182  *      error code.
1183  */
1184 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1185                                          const struct in6_addr *final_dst,
1186                                          bool connected)
1187 {
1188         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1189
1190         dst = ip6_sk_dst_check(sk, dst, fl6);
1191         if (dst)
1192                 return dst;
1193
1194         dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1195         if (connected && !IS_ERR(dst))
1196                 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1197
1198         return dst;
1199 }
1200 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1201
1202 /**
1203  *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1204  *      @skb: Packet for which lookup is done
1205  *      @dev: Tunnel device
1206  *      @net: Network namespace of tunnel device
1207  *      @sock: Socket which provides route info
1208  *      @saddr: Memory to store the src ip address
1209  *      @info: Tunnel information
1210  *      @protocol: IP protocol
1211  *      @use_cache: Flag to enable cache usage
1212  *      This function performs a route lookup on a tunnel
1213  *
1214  *      It returns a valid dst pointer and stores src address to be used in
1215  *      tunnel in param saddr on success, else a pointer encoded error code.
1216  */
1217
1218 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1219                                         struct net_device *dev,
1220                                         struct net *net,
1221                                         struct socket *sock,
1222                                         struct in6_addr *saddr,
1223                                         const struct ip_tunnel_info *info,
1224                                         u8 protocol,
1225                                         bool use_cache)
1226 {
1227         struct dst_entry *dst = NULL;
1228 #ifdef CONFIG_DST_CACHE
1229         struct dst_cache *dst_cache;
1230 #endif
1231         struct flowi6 fl6;
1232         __u8 prio;
1233
1234 #ifdef CONFIG_DST_CACHE
1235         dst_cache = (struct dst_cache *)&info->dst_cache;
1236         if (use_cache) {
1237                 dst = dst_cache_get_ip6(dst_cache, saddr);
1238                 if (dst)
1239                         return dst;
1240         }
1241 #endif
1242         memset(&fl6, 0, sizeof(fl6));
1243         fl6.flowi6_mark = skb->mark;
1244         fl6.flowi6_proto = protocol;
1245         fl6.daddr = info->key.u.ipv6.dst;
1246         fl6.saddr = info->key.u.ipv6.src;
1247         prio = info->key.tos;
1248         fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1249                                           info->key.label);
1250
1251         dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1252                                               NULL);
1253         if (IS_ERR(dst)) {
1254                 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1255                 return ERR_PTR(-ENETUNREACH);
1256         }
1257         if (dst->dev == dev) { /* is this necessary? */
1258                 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1259                 dst_release(dst);
1260                 return ERR_PTR(-ELOOP);
1261         }
1262 #ifdef CONFIG_DST_CACHE
1263         if (use_cache)
1264                 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1265 #endif
1266         *saddr = fl6.saddr;
1267         return dst;
1268 }
1269 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1270
1271 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1272                                                gfp_t gfp)
1273 {
1274         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1275 }
1276
1277 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1278                                                 gfp_t gfp)
1279 {
1280         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1281 }
1282
1283 static void ip6_append_data_mtu(unsigned int *mtu,
1284                                 int *maxfraglen,
1285                                 unsigned int fragheaderlen,
1286                                 struct sk_buff *skb,
1287                                 struct rt6_info *rt,
1288                                 unsigned int orig_mtu)
1289 {
1290         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1291                 if (!skb) {
1292                         /* first fragment, reserve header_len */
1293                         *mtu = orig_mtu - rt->dst.header_len;
1294
1295                 } else {
1296                         /*
1297                          * this fragment is not first, the headers
1298                          * space is regarded as data space.
1299                          */
1300                         *mtu = orig_mtu;
1301                 }
1302                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1303                               + fragheaderlen - sizeof(struct frag_hdr);
1304         }
1305 }
1306
1307 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1308                           struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1309                           struct rt6_info *rt, struct flowi6 *fl6)
1310 {
1311         struct ipv6_pinfo *np = inet6_sk(sk);
1312         unsigned int mtu;
1313         struct ipv6_txoptions *opt = ipc6->opt;
1314
1315         /*
1316          * setup for corking
1317          */
1318         if (opt) {
1319                 if (WARN_ON(v6_cork->opt))
1320                         return -EINVAL;
1321
1322                 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1323                 if (unlikely(!v6_cork->opt))
1324                         return -ENOBUFS;
1325
1326                 v6_cork->opt->tot_len = sizeof(*opt);
1327                 v6_cork->opt->opt_flen = opt->opt_flen;
1328                 v6_cork->opt->opt_nflen = opt->opt_nflen;
1329
1330                 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1331                                                     sk->sk_allocation);
1332                 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1333                         return -ENOBUFS;
1334
1335                 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1336                                                     sk->sk_allocation);
1337                 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1338                         return -ENOBUFS;
1339
1340                 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1341                                                    sk->sk_allocation);
1342                 if (opt->hopopt && !v6_cork->opt->hopopt)
1343                         return -ENOBUFS;
1344
1345                 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1346                                                     sk->sk_allocation);
1347                 if (opt->srcrt && !v6_cork->opt->srcrt)
1348                         return -ENOBUFS;
1349
1350                 /* need source address above miyazawa*/
1351         }
1352         dst_hold(&rt->dst);
1353         cork->base.dst = &rt->dst;
1354         cork->fl.u.ip6 = *fl6;
1355         v6_cork->hop_limit = ipc6->hlimit;
1356         v6_cork->tclass = ipc6->tclass;
1357         if (rt->dst.flags & DST_XFRM_TUNNEL)
1358                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1359                       READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1360         else
1361                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1362                         READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1363         if (np->frag_size < mtu) {
1364                 if (np->frag_size)
1365                         mtu = np->frag_size;
1366         }
1367         if (mtu < IPV6_MIN_MTU)
1368                 return -EINVAL;
1369         cork->base.fragsize = mtu;
1370         cork->base.gso_size = ipc6->gso_size;
1371         cork->base.tx_flags = 0;
1372         cork->base.mark = ipc6->sockc.mark;
1373         sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1374
1375         if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1376                 cork->base.flags |= IPCORK_ALLFRAG;
1377         cork->base.length = 0;
1378
1379         cork->base.transmit_time = ipc6->sockc.transmit_time;
1380
1381         return 0;
1382 }
1383
1384 static int __ip6_append_data(struct sock *sk,
1385                              struct flowi6 *fl6,
1386                              struct sk_buff_head *queue,
1387                              struct inet_cork *cork,
1388                              struct inet6_cork *v6_cork,
1389                              struct page_frag *pfrag,
1390                              int getfrag(void *from, char *to, int offset,
1391                                          int len, int odd, struct sk_buff *skb),
1392                              void *from, int length, int transhdrlen,
1393                              unsigned int flags, struct ipcm6_cookie *ipc6)
1394 {
1395         struct sk_buff *skb, *skb_prev = NULL;
1396         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1397         struct ubuf_info *uarg = NULL;
1398         int exthdrlen = 0;
1399         int dst_exthdrlen = 0;
1400         int hh_len;
1401         int copy;
1402         int err;
1403         int offset = 0;
1404         u32 tskey = 0;
1405         struct rt6_info *rt = (struct rt6_info *)cork->dst;
1406         struct ipv6_txoptions *opt = v6_cork->opt;
1407         int csummode = CHECKSUM_NONE;
1408         unsigned int maxnonfragsize, headersize;
1409         unsigned int wmem_alloc_delta = 0;
1410         bool paged, extra_uref = false;
1411
1412         skb = skb_peek_tail(queue);
1413         if (!skb) {
1414                 exthdrlen = opt ? opt->opt_flen : 0;
1415                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1416         }
1417
1418         paged = !!cork->gso_size;
1419         mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1420         orig_mtu = mtu;
1421
1422         if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1423             sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1424                 tskey = sk->sk_tskey++;
1425
1426         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1427
1428         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1429                         (opt ? opt->opt_nflen : 0);
1430         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1431                      sizeof(struct frag_hdr);
1432
1433         headersize = sizeof(struct ipv6hdr) +
1434                      (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1435                      (dst_allfrag(&rt->dst) ?
1436                       sizeof(struct frag_hdr) : 0) +
1437                      rt->rt6i_nfheader_len;
1438
1439         /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1440          * the first fragment
1441          */
1442         if (headersize + transhdrlen > mtu)
1443                 goto emsgsize;
1444
1445         if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1446             (sk->sk_protocol == IPPROTO_UDP ||
1447              sk->sk_protocol == IPPROTO_RAW)) {
1448                 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1449                                 sizeof(struct ipv6hdr));
1450                 goto emsgsize;
1451         }
1452
1453         if (ip6_sk_ignore_df(sk))
1454                 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1455         else
1456                 maxnonfragsize = mtu;
1457
1458         if (cork->length + length > maxnonfragsize - headersize) {
1459 emsgsize:
1460                 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1461                 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1462                 return -EMSGSIZE;
1463         }
1464
1465         /* CHECKSUM_PARTIAL only with no extension headers and when
1466          * we are not going to fragment
1467          */
1468         if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1469             headersize == sizeof(struct ipv6hdr) &&
1470             length <= mtu - headersize &&
1471             (!(flags & MSG_MORE) || cork->gso_size) &&
1472             rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1473                 csummode = CHECKSUM_PARTIAL;
1474
1475         if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1476                 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1477                 if (!uarg)
1478                         return -ENOBUFS;
1479                 extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1480                 if (rt->dst.dev->features & NETIF_F_SG &&
1481                     csummode == CHECKSUM_PARTIAL) {
1482                         paged = true;
1483                 } else {
1484                         uarg->zerocopy = 0;
1485                         skb_zcopy_set(skb, uarg, &extra_uref);
1486                 }
1487         }
1488
1489         /*
1490          * Let's try using as much space as possible.
1491          * Use MTU if total length of the message fits into the MTU.
1492          * Otherwise, we need to reserve fragment header and
1493          * fragment alignment (= 8-15 octects, in total).
1494          *
1495          * Note that we may need to "move" the data from the tail of
1496          * of the buffer to the new fragment when we split
1497          * the message.
1498          *
1499          * FIXME: It may be fragmented into multiple chunks
1500          *        at once if non-fragmentable extension headers
1501          *        are too large.
1502          * --yoshfuji
1503          */
1504
1505         cork->length += length;
1506         if (!skb)
1507                 goto alloc_new_skb;
1508
1509         while (length > 0) {
1510                 /* Check if the remaining data fits into current packet. */
1511                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1512                 if (copy < length)
1513                         copy = maxfraglen - skb->len;
1514
1515                 if (copy <= 0) {
1516                         char *data;
1517                         unsigned int datalen;
1518                         unsigned int fraglen;
1519                         unsigned int fraggap;
1520                         unsigned int alloclen;
1521                         unsigned int pagedlen;
1522 alloc_new_skb:
1523                         /* There's no room in the current skb */
1524                         if (skb)
1525                                 fraggap = skb->len - maxfraglen;
1526                         else
1527                                 fraggap = 0;
1528                         /* update mtu and maxfraglen if necessary */
1529                         if (!skb || !skb_prev)
1530                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1531                                                     fragheaderlen, skb, rt,
1532                                                     orig_mtu);
1533
1534                         skb_prev = skb;
1535
1536                         /*
1537                          * If remaining data exceeds the mtu,
1538                          * we know we need more fragment(s).
1539                          */
1540                         datalen = length + fraggap;
1541
1542                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1543                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1544                         fraglen = datalen + fragheaderlen;
1545                         pagedlen = 0;
1546
1547                         if ((flags & MSG_MORE) &&
1548                             !(rt->dst.dev->features&NETIF_F_SG))
1549                                 alloclen = mtu;
1550                         else if (!paged)
1551                                 alloclen = fraglen;
1552                         else {
1553                                 alloclen = min_t(int, fraglen, MAX_HEADER);
1554                                 pagedlen = fraglen - alloclen;
1555                         }
1556
1557                         alloclen += dst_exthdrlen;
1558
1559                         if (datalen != length + fraggap) {
1560                                 /*
1561                                  * this is not the last fragment, the trailer
1562                                  * space is regarded as data space.
1563                                  */
1564                                 datalen += rt->dst.trailer_len;
1565                         }
1566
1567                         alloclen += rt->dst.trailer_len;
1568                         fraglen = datalen + fragheaderlen;
1569
1570                         /*
1571                          * We just reserve space for fragment header.
1572                          * Note: this may be overallocation if the message
1573                          * (without MSG_MORE) fits into the MTU.
1574                          */
1575                         alloclen += sizeof(struct frag_hdr);
1576
1577                         copy = datalen - transhdrlen - fraggap - pagedlen;
1578                         if (copy < 0) {
1579                                 err = -EINVAL;
1580                                 goto error;
1581                         }
1582                         if (transhdrlen) {
1583                                 skb = sock_alloc_send_skb(sk,
1584                                                 alloclen + hh_len,
1585                                                 (flags & MSG_DONTWAIT), &err);
1586                         } else {
1587                                 skb = NULL;
1588                                 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1589                                     2 * sk->sk_sndbuf)
1590                                         skb = alloc_skb(alloclen + hh_len,
1591                                                         sk->sk_allocation);
1592                                 if (unlikely(!skb))
1593                                         err = -ENOBUFS;
1594                         }
1595                         if (!skb)
1596                                 goto error;
1597                         /*
1598                          *      Fill in the control structures
1599                          */
1600                         skb->protocol = htons(ETH_P_IPV6);
1601                         skb->ip_summed = csummode;
1602                         skb->csum = 0;
1603                         /* reserve for fragmentation and ipsec header */
1604                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1605                                     dst_exthdrlen);
1606
1607                         /*
1608                          *      Find where to start putting bytes
1609                          */
1610                         data = skb_put(skb, fraglen - pagedlen);
1611                         skb_set_network_header(skb, exthdrlen);
1612                         data += fragheaderlen;
1613                         skb->transport_header = (skb->network_header +
1614                                                  fragheaderlen);
1615                         if (fraggap) {
1616                                 skb->csum = skb_copy_and_csum_bits(
1617                                         skb_prev, maxfraglen,
1618                                         data + transhdrlen, fraggap);
1619                                 skb_prev->csum = csum_sub(skb_prev->csum,
1620                                                           skb->csum);
1621                                 data += fraggap;
1622                                 pskb_trim_unique(skb_prev, maxfraglen);
1623                         }
1624                         if (copy > 0 &&
1625                             getfrag(from, data + transhdrlen, offset,
1626                                     copy, fraggap, skb) < 0) {
1627                                 err = -EFAULT;
1628                                 kfree_skb(skb);
1629                                 goto error;
1630                         }
1631
1632                         offset += copy;
1633                         length -= copy + transhdrlen;
1634                         transhdrlen = 0;
1635                         exthdrlen = 0;
1636                         dst_exthdrlen = 0;
1637
1638                         /* Only the initial fragment is time stamped */
1639                         skb_shinfo(skb)->tx_flags = cork->tx_flags;
1640                         cork->tx_flags = 0;
1641                         skb_shinfo(skb)->tskey = tskey;
1642                         tskey = 0;
1643                         skb_zcopy_set(skb, uarg, &extra_uref);
1644
1645                         if ((flags & MSG_CONFIRM) && !skb_prev)
1646                                 skb_set_dst_pending_confirm(skb, 1);
1647
1648                         /*
1649                          * Put the packet on the pending queue
1650                          */
1651                         if (!skb->destructor) {
1652                                 skb->destructor = sock_wfree;
1653                                 skb->sk = sk;
1654                                 wmem_alloc_delta += skb->truesize;
1655                         }
1656                         __skb_queue_tail(queue, skb);
1657                         continue;
1658                 }
1659
1660                 if (copy > length)
1661                         copy = length;
1662
1663                 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1664                     skb_tailroom(skb) >= copy) {
1665                         unsigned int off;
1666
1667                         off = skb->len;
1668                         if (getfrag(from, skb_put(skb, copy),
1669                                                 offset, copy, off, skb) < 0) {
1670                                 __skb_trim(skb, off);
1671                                 err = -EFAULT;
1672                                 goto error;
1673                         }
1674                 } else if (!uarg || !uarg->zerocopy) {
1675                         int i = skb_shinfo(skb)->nr_frags;
1676
1677                         err = -ENOMEM;
1678                         if (!sk_page_frag_refill(sk, pfrag))
1679                                 goto error;
1680
1681                         if (!skb_can_coalesce(skb, i, pfrag->page,
1682                                               pfrag->offset)) {
1683                                 err = -EMSGSIZE;
1684                                 if (i == MAX_SKB_FRAGS)
1685                                         goto error;
1686
1687                                 __skb_fill_page_desc(skb, i, pfrag->page,
1688                                                      pfrag->offset, 0);
1689                                 skb_shinfo(skb)->nr_frags = ++i;
1690                                 get_page(pfrag->page);
1691                         }
1692                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1693                         if (getfrag(from,
1694                                     page_address(pfrag->page) + pfrag->offset,
1695                                     offset, copy, skb->len, skb) < 0)
1696                                 goto error_efault;
1697
1698                         pfrag->offset += copy;
1699                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1700                         skb->len += copy;
1701                         skb->data_len += copy;
1702                         skb->truesize += copy;
1703                         wmem_alloc_delta += copy;
1704                 } else {
1705                         err = skb_zerocopy_iter_dgram(skb, from, copy);
1706                         if (err < 0)
1707                                 goto error;
1708                 }
1709                 offset += copy;
1710                 length -= copy;
1711         }
1712
1713         if (wmem_alloc_delta)
1714                 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1715         return 0;
1716
1717 error_efault:
1718         err = -EFAULT;
1719 error:
1720         if (uarg)
1721                 sock_zerocopy_put_abort(uarg, extra_uref);
1722         cork->length -= length;
1723         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1724         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1725         return err;
1726 }
1727
1728 int ip6_append_data(struct sock *sk,
1729                     int getfrag(void *from, char *to, int offset, int len,
1730                                 int odd, struct sk_buff *skb),
1731                     void *from, int length, int transhdrlen,
1732                     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1733                     struct rt6_info *rt, unsigned int flags)
1734 {
1735         struct inet_sock *inet = inet_sk(sk);
1736         struct ipv6_pinfo *np = inet6_sk(sk);
1737         int exthdrlen;
1738         int err;
1739
1740         if (flags&MSG_PROBE)
1741                 return 0;
1742         if (skb_queue_empty(&sk->sk_write_queue)) {
1743                 /*
1744                  * setup for corking
1745                  */
1746                 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1747                                      ipc6, rt, fl6);
1748                 if (err)
1749                         return err;
1750
1751                 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1752                 length += exthdrlen;
1753                 transhdrlen += exthdrlen;
1754         } else {
1755                 fl6 = &inet->cork.fl.u.ip6;
1756                 transhdrlen = 0;
1757         }
1758
1759         return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1760                                  &np->cork, sk_page_frag(sk), getfrag,
1761                                  from, length, transhdrlen, flags, ipc6);
1762 }
1763 EXPORT_SYMBOL_GPL(ip6_append_data);
1764
1765 static void ip6_cork_release(struct inet_cork_full *cork,
1766                              struct inet6_cork *v6_cork)
1767 {
1768         if (v6_cork->opt) {
1769                 kfree(v6_cork->opt->dst0opt);
1770                 kfree(v6_cork->opt->dst1opt);
1771                 kfree(v6_cork->opt->hopopt);
1772                 kfree(v6_cork->opt->srcrt);
1773                 kfree(v6_cork->opt);
1774                 v6_cork->opt = NULL;
1775         }
1776
1777         if (cork->base.dst) {
1778                 dst_release(cork->base.dst);
1779                 cork->base.dst = NULL;
1780                 cork->base.flags &= ~IPCORK_ALLFRAG;
1781         }
1782         memset(&cork->fl, 0, sizeof(cork->fl));
1783 }
1784
1785 struct sk_buff *__ip6_make_skb(struct sock *sk,
1786                                struct sk_buff_head *queue,
1787                                struct inet_cork_full *cork,
1788                                struct inet6_cork *v6_cork)
1789 {
1790         struct sk_buff *skb, *tmp_skb;
1791         struct sk_buff **tail_skb;
1792         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1793         struct ipv6_pinfo *np = inet6_sk(sk);
1794         struct net *net = sock_net(sk);
1795         struct ipv6hdr *hdr;
1796         struct ipv6_txoptions *opt = v6_cork->opt;
1797         struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1798         struct flowi6 *fl6 = &cork->fl.u.ip6;
1799         unsigned char proto = fl6->flowi6_proto;
1800
1801         skb = __skb_dequeue(queue);
1802         if (!skb)
1803                 goto out;
1804         tail_skb = &(skb_shinfo(skb)->frag_list);
1805
1806         /* move skb->data to ip header from ext header */
1807         if (skb->data < skb_network_header(skb))
1808                 __skb_pull(skb, skb_network_offset(skb));
1809         while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1810                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1811                 *tail_skb = tmp_skb;
1812                 tail_skb = &(tmp_skb->next);
1813                 skb->len += tmp_skb->len;
1814                 skb->data_len += tmp_skb->len;
1815                 skb->truesize += tmp_skb->truesize;
1816                 tmp_skb->destructor = NULL;
1817                 tmp_skb->sk = NULL;
1818         }
1819
1820         /* Allow local fragmentation. */
1821         skb->ignore_df = ip6_sk_ignore_df(sk);
1822
1823         *final_dst = fl6->daddr;
1824         __skb_pull(skb, skb_network_header_len(skb));
1825         if (opt && opt->opt_flen)
1826                 ipv6_push_frag_opts(skb, opt, &proto);
1827         if (opt && opt->opt_nflen)
1828                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1829
1830         skb_push(skb, sizeof(struct ipv6hdr));
1831         skb_reset_network_header(skb);
1832         hdr = ipv6_hdr(skb);
1833
1834         ip6_flow_hdr(hdr, v6_cork->tclass,
1835                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
1836                                         ip6_autoflowlabel(net, np), fl6));
1837         hdr->hop_limit = v6_cork->hop_limit;
1838         hdr->nexthdr = proto;
1839         hdr->saddr = fl6->saddr;
1840         hdr->daddr = *final_dst;
1841
1842         skb->priority = sk->sk_priority;
1843         skb->mark = cork->base.mark;
1844
1845         skb->tstamp = cork->base.transmit_time;
1846
1847         skb_dst_set(skb, dst_clone(&rt->dst));
1848         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1849         if (proto == IPPROTO_ICMPV6) {
1850                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1851
1852                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1853                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1854         }
1855
1856         ip6_cork_release(cork, v6_cork);
1857 out:
1858         return skb;
1859 }
1860
1861 int ip6_send_skb(struct sk_buff *skb)
1862 {
1863         struct net *net = sock_net(skb->sk);
1864         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1865         int err;
1866
1867         err = ip6_local_out(net, skb->sk, skb);
1868         if (err) {
1869                 if (err > 0)
1870                         err = net_xmit_errno(err);
1871                 if (err)
1872                         IP6_INC_STATS(net, rt->rt6i_idev,
1873                                       IPSTATS_MIB_OUTDISCARDS);
1874         }
1875
1876         return err;
1877 }
1878
1879 int ip6_push_pending_frames(struct sock *sk)
1880 {
1881         struct sk_buff *skb;
1882
1883         skb = ip6_finish_skb(sk);
1884         if (!skb)
1885                 return 0;
1886
1887         return ip6_send_skb(skb);
1888 }
1889 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1890
1891 static void __ip6_flush_pending_frames(struct sock *sk,
1892                                        struct sk_buff_head *queue,
1893                                        struct inet_cork_full *cork,
1894                                        struct inet6_cork *v6_cork)
1895 {
1896         struct sk_buff *skb;
1897
1898         while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1899                 if (skb_dst(skb))
1900                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1901                                       IPSTATS_MIB_OUTDISCARDS);
1902                 kfree_skb(skb);
1903         }
1904
1905         ip6_cork_release(cork, v6_cork);
1906 }
1907
1908 void ip6_flush_pending_frames(struct sock *sk)
1909 {
1910         __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1911                                    &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1912 }
1913 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1914
1915 struct sk_buff *ip6_make_skb(struct sock *sk,
1916                              int getfrag(void *from, char *to, int offset,
1917                                          int len, int odd, struct sk_buff *skb),
1918                              void *from, int length, int transhdrlen,
1919                              struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1920                              struct rt6_info *rt, unsigned int flags,
1921                              struct inet_cork_full *cork)
1922 {
1923         struct inet6_cork v6_cork;
1924         struct sk_buff_head queue;
1925         int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1926         int err;
1927
1928         if (flags & MSG_PROBE)
1929                 return NULL;
1930
1931         __skb_queue_head_init(&queue);
1932
1933         cork->base.flags = 0;
1934         cork->base.addr = 0;
1935         cork->base.opt = NULL;
1936         cork->base.dst = NULL;
1937         v6_cork.opt = NULL;
1938         err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1939         if (err) {
1940                 ip6_cork_release(cork, &v6_cork);
1941                 return ERR_PTR(err);
1942         }
1943         if (ipc6->dontfrag < 0)
1944                 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1945
1946         err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1947                                 &current->task_frag, getfrag, from,
1948                                 length + exthdrlen, transhdrlen + exthdrlen,
1949                                 flags, ipc6);
1950         if (err) {
1951                 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1952                 return ERR_PTR(err);
1953         }
1954
1955         return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1956 }