Merge tag 'gvt-next-2020-05-12' of https://github.com/intel/gvt-linux into drm-intel...
[linux-2.6-microblaze.git] / net / ipv6 / ip6_output.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      IPv6 output functions
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on linux/net/ipv4/ip_output.c
10  *
11  *      Changes:
12  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
13  *                              extension headers are implemented.
14  *                              route changes now work.
15  *                              ip6_forward does not confuse sniffers.
16  *                              etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *      Imran Patel     :       frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *                      :       add ip6_append_data and related functions
22  *                              for datagram xmit
23  */
24
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41
42 #include <net/sock.h>
43 #include <net/snmp.h>
44
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 #include <net/ip_tunnels.h>
58
59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60 {
61         struct dst_entry *dst = skb_dst(skb);
62         struct net_device *dev = dst->dev;
63         const struct in6_addr *nexthop;
64         struct neighbour *neigh;
65         int ret;
66
67         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
68                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
69
70                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
71                     ((mroute6_is_socket(net, skb) &&
72                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
73                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
74                                          &ipv6_hdr(skb)->saddr))) {
75                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
76
77                         /* Do not check for IFF_ALLMULTI; multicast routing
78                            is not supported in any case.
79                          */
80                         if (newskb)
81                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
82                                         net, sk, newskb, NULL, newskb->dev,
83                                         dev_loopback_xmit);
84
85                         if (ipv6_hdr(skb)->hop_limit == 0) {
86                                 IP6_INC_STATS(net, idev,
87                                               IPSTATS_MIB_OUTDISCARDS);
88                                 kfree_skb(skb);
89                                 return 0;
90                         }
91                 }
92
93                 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
94
95                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
96                     IPV6_ADDR_SCOPE_NODELOCAL &&
97                     !(dev->flags & IFF_LOOPBACK)) {
98                         kfree_skb(skb);
99                         return 0;
100                 }
101         }
102
103         if (lwtunnel_xmit_redirect(dst->lwtstate)) {
104                 int res = lwtunnel_xmit(skb);
105
106                 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
107                         return res;
108         }
109
110         rcu_read_lock_bh();
111         nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
112         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
113         if (unlikely(!neigh))
114                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
115         if (!IS_ERR(neigh)) {
116                 sock_confirm_neigh(skb, neigh);
117                 ret = neigh_output(neigh, skb, false);
118                 rcu_read_unlock_bh();
119                 return ret;
120         }
121         rcu_read_unlock_bh();
122
123         IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
124         kfree_skb(skb);
125         return -EINVAL;
126 }
127
128 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
129 {
130 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
131         /* Policy lookup after SNAT yielded a new policy */
132         if (skb_dst(skb)->xfrm) {
133                 IPCB(skb)->flags |= IPSKB_REROUTED;
134                 return dst_output(net, sk, skb);
135         }
136 #endif
137
138         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
139             dst_allfrag(skb_dst(skb)) ||
140             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
141                 return ip6_fragment(net, sk, skb, ip6_finish_output2);
142         else
143                 return ip6_finish_output2(net, sk, skb);
144 }
145
146 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
147 {
148         int ret;
149
150         ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
151         switch (ret) {
152         case NET_XMIT_SUCCESS:
153                 return __ip6_finish_output(net, sk, skb);
154         case NET_XMIT_CN:
155                 return __ip6_finish_output(net, sk, skb) ? : ret;
156         default:
157                 kfree_skb(skb);
158                 return ret;
159         }
160 }
161
162 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
163 {
164         struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
165         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
166
167         skb->protocol = htons(ETH_P_IPV6);
168         skb->dev = dev;
169
170         if (unlikely(idev->cnf.disable_ipv6)) {
171                 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
172                 kfree_skb(skb);
173                 return 0;
174         }
175
176         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
177                             net, sk, skb, indev, dev,
178                             ip6_finish_output,
179                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
180 }
181
182 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
183 {
184         if (!np->autoflowlabel_set)
185                 return ip6_default_np_autolabel(net);
186         else
187                 return np->autoflowlabel;
188 }
189
190 /*
191  * xmit an sk_buff (used by TCP, SCTP and DCCP)
192  * Note : socket lock is not held for SYNACK packets, but might be modified
193  * by calls to skb_set_owner_w() and ipv6_local_error(),
194  * which are using proper atomic operations or spinlocks.
195  */
196 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
197              __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
198 {
199         struct net *net = sock_net(sk);
200         const struct ipv6_pinfo *np = inet6_sk(sk);
201         struct in6_addr *first_hop = &fl6->daddr;
202         struct dst_entry *dst = skb_dst(skb);
203         unsigned int head_room;
204         struct ipv6hdr *hdr;
205         u8  proto = fl6->flowi6_proto;
206         int seg_len = skb->len;
207         int hlimit = -1;
208         u32 mtu;
209
210         head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
211         if (opt)
212                 head_room += opt->opt_nflen + opt->opt_flen;
213
214         if (unlikely(skb_headroom(skb) < head_room)) {
215                 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
216                 if (!skb2) {
217                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
218                                       IPSTATS_MIB_OUTDISCARDS);
219                         kfree_skb(skb);
220                         return -ENOBUFS;
221                 }
222                 if (skb->sk)
223                         skb_set_owner_w(skb2, skb->sk);
224                 consume_skb(skb);
225                 skb = skb2;
226         }
227
228         if (opt) {
229                 seg_len += opt->opt_nflen + opt->opt_flen;
230
231                 if (opt->opt_flen)
232                         ipv6_push_frag_opts(skb, opt, &proto);
233
234                 if (opt->opt_nflen)
235                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
236                                              &fl6->saddr);
237         }
238
239         skb_push(skb, sizeof(struct ipv6hdr));
240         skb_reset_network_header(skb);
241         hdr = ipv6_hdr(skb);
242
243         /*
244          *      Fill in the IPv6 header
245          */
246         if (np)
247                 hlimit = np->hop_limit;
248         if (hlimit < 0)
249                 hlimit = ip6_dst_hoplimit(dst);
250
251         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
252                                 ip6_autoflowlabel(net, np), fl6));
253
254         hdr->payload_len = htons(seg_len);
255         hdr->nexthdr = proto;
256         hdr->hop_limit = hlimit;
257
258         hdr->saddr = fl6->saddr;
259         hdr->daddr = *first_hop;
260
261         skb->protocol = htons(ETH_P_IPV6);
262         skb->priority = priority;
263         skb->mark = mark;
264
265         mtu = dst_mtu(dst);
266         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
267                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
268                               IPSTATS_MIB_OUT, skb->len);
269
270                 /* if egress device is enslaved to an L3 master device pass the
271                  * skb to its handler for processing
272                  */
273                 skb = l3mdev_ip6_out((struct sock *)sk, skb);
274                 if (unlikely(!skb))
275                         return 0;
276
277                 /* hooks should never assume socket lock is held.
278                  * we promote our socket to non const
279                  */
280                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
281                                net, (struct sock *)sk, skb, NULL, dst->dev,
282                                dst_output);
283         }
284
285         skb->dev = dst->dev;
286         /* ipv6_local_error() does not require socket lock,
287          * we promote our socket to non const
288          */
289         ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
290
291         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
292         kfree_skb(skb);
293         return -EMSGSIZE;
294 }
295 EXPORT_SYMBOL(ip6_xmit);
296
297 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
298 {
299         struct ip6_ra_chain *ra;
300         struct sock *last = NULL;
301
302         read_lock(&ip6_ra_lock);
303         for (ra = ip6_ra_chain; ra; ra = ra->next) {
304                 struct sock *sk = ra->sk;
305                 if (sk && ra->sel == sel &&
306                     (!sk->sk_bound_dev_if ||
307                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
308                         struct ipv6_pinfo *np = inet6_sk(sk);
309
310                         if (np && np->rtalert_isolate &&
311                             !net_eq(sock_net(sk), dev_net(skb->dev))) {
312                                 continue;
313                         }
314                         if (last) {
315                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
316                                 if (skb2)
317                                         rawv6_rcv(last, skb2);
318                         }
319                         last = sk;
320                 }
321         }
322
323         if (last) {
324                 rawv6_rcv(last, skb);
325                 read_unlock(&ip6_ra_lock);
326                 return 1;
327         }
328         read_unlock(&ip6_ra_lock);
329         return 0;
330 }
331
332 static int ip6_forward_proxy_check(struct sk_buff *skb)
333 {
334         struct ipv6hdr *hdr = ipv6_hdr(skb);
335         u8 nexthdr = hdr->nexthdr;
336         __be16 frag_off;
337         int offset;
338
339         if (ipv6_ext_hdr(nexthdr)) {
340                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
341                 if (offset < 0)
342                         return 0;
343         } else
344                 offset = sizeof(struct ipv6hdr);
345
346         if (nexthdr == IPPROTO_ICMPV6) {
347                 struct icmp6hdr *icmp6;
348
349                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
350                                          offset + 1 - skb->data)))
351                         return 0;
352
353                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
354
355                 switch (icmp6->icmp6_type) {
356                 case NDISC_ROUTER_SOLICITATION:
357                 case NDISC_ROUTER_ADVERTISEMENT:
358                 case NDISC_NEIGHBOUR_SOLICITATION:
359                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
360                 case NDISC_REDIRECT:
361                         /* For reaction involving unicast neighbor discovery
362                          * message destined to the proxied address, pass it to
363                          * input function.
364                          */
365                         return 1;
366                 default:
367                         break;
368                 }
369         }
370
371         /*
372          * The proxying router can't forward traffic sent to a link-local
373          * address, so signal the sender and discard the packet. This
374          * behavior is clarified by the MIPv6 specification.
375          */
376         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
377                 dst_link_failure(skb);
378                 return -1;
379         }
380
381         return 0;
382 }
383
384 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
385                                      struct sk_buff *skb)
386 {
387         struct dst_entry *dst = skb_dst(skb);
388
389         __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
390         __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
391
392 #ifdef CONFIG_NET_SWITCHDEV
393         if (skb->offload_l3_fwd_mark) {
394                 consume_skb(skb);
395                 return 0;
396         }
397 #endif
398
399         skb->tstamp = 0;
400         return dst_output(net, sk, skb);
401 }
402
403 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
404 {
405         if (skb->len <= mtu)
406                 return false;
407
408         /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
409         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
410                 return true;
411
412         if (skb->ignore_df)
413                 return false;
414
415         if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
416                 return false;
417
418         return true;
419 }
420
421 int ip6_forward(struct sk_buff *skb)
422 {
423         struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
424         struct dst_entry *dst = skb_dst(skb);
425         struct ipv6hdr *hdr = ipv6_hdr(skb);
426         struct inet6_skb_parm *opt = IP6CB(skb);
427         struct net *net = dev_net(dst->dev);
428         u32 mtu;
429
430         if (net->ipv6.devconf_all->forwarding == 0)
431                 goto error;
432
433         if (skb->pkt_type != PACKET_HOST)
434                 goto drop;
435
436         if (unlikely(skb->sk))
437                 goto drop;
438
439         if (skb_warn_if_lro(skb))
440                 goto drop;
441
442         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
443                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
444                 goto drop;
445         }
446
447         skb_forward_csum(skb);
448
449         /*
450          *      We DO NOT make any processing on
451          *      RA packets, pushing them to user level AS IS
452          *      without ane WARRANTY that application will be able
453          *      to interpret them. The reason is that we
454          *      cannot make anything clever here.
455          *
456          *      We are not end-node, so that if packet contains
457          *      AH/ESP, we cannot make anything.
458          *      Defragmentation also would be mistake, RA packets
459          *      cannot be fragmented, because there is no warranty
460          *      that different fragments will go along one path. --ANK
461          */
462         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
463                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
464                         return 0;
465         }
466
467         /*
468          *      check and decrement ttl
469          */
470         if (hdr->hop_limit <= 1) {
471                 /* Force OUTPUT device used as source address */
472                 skb->dev = dst->dev;
473                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
474                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
475
476                 kfree_skb(skb);
477                 return -ETIMEDOUT;
478         }
479
480         /* XXX: idev->cnf.proxy_ndp? */
481         if (net->ipv6.devconf_all->proxy_ndp &&
482             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
483                 int proxied = ip6_forward_proxy_check(skb);
484                 if (proxied > 0)
485                         return ip6_input(skb);
486                 else if (proxied < 0) {
487                         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
488                         goto drop;
489                 }
490         }
491
492         if (!xfrm6_route_forward(skb)) {
493                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
494                 goto drop;
495         }
496         dst = skb_dst(skb);
497
498         /* IPv6 specs say nothing about it, but it is clear that we cannot
499            send redirects to source routed frames.
500            We don't send redirects to frames decapsulated from IPsec.
501          */
502         if (IP6CB(skb)->iif == dst->dev->ifindex &&
503             opt->srcrt == 0 && !skb_sec_path(skb)) {
504                 struct in6_addr *target = NULL;
505                 struct inet_peer *peer;
506                 struct rt6_info *rt;
507
508                 /*
509                  *      incoming and outgoing devices are the same
510                  *      send a redirect.
511                  */
512
513                 rt = (struct rt6_info *) dst;
514                 if (rt->rt6i_flags & RTF_GATEWAY)
515                         target = &rt->rt6i_gateway;
516                 else
517                         target = &hdr->daddr;
518
519                 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
520
521                 /* Limit redirects both by destination (here)
522                    and by source (inside ndisc_send_redirect)
523                  */
524                 if (inet_peer_xrlim_allow(peer, 1*HZ))
525                         ndisc_send_redirect(skb, target);
526                 if (peer)
527                         inet_putpeer(peer);
528         } else {
529                 int addrtype = ipv6_addr_type(&hdr->saddr);
530
531                 /* This check is security critical. */
532                 if (addrtype == IPV6_ADDR_ANY ||
533                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
534                         goto error;
535                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
536                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
537                                     ICMPV6_NOT_NEIGHBOUR, 0);
538                         goto error;
539                 }
540         }
541
542         mtu = ip6_dst_mtu_forward(dst);
543         if (mtu < IPV6_MIN_MTU)
544                 mtu = IPV6_MIN_MTU;
545
546         if (ip6_pkt_too_big(skb, mtu)) {
547                 /* Again, force OUTPUT device used as source address */
548                 skb->dev = dst->dev;
549                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
550                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
551                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
552                                 IPSTATS_MIB_FRAGFAILS);
553                 kfree_skb(skb);
554                 return -EMSGSIZE;
555         }
556
557         if (skb_cow(skb, dst->dev->hard_header_len)) {
558                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
559                                 IPSTATS_MIB_OUTDISCARDS);
560                 goto drop;
561         }
562
563         hdr = ipv6_hdr(skb);
564
565         /* Mangling hops number delayed to point after skb COW */
566
567         hdr->hop_limit--;
568
569         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
570                        net, NULL, skb, skb->dev, dst->dev,
571                        ip6_forward_finish);
572
573 error:
574         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
575 drop:
576         kfree_skb(skb);
577         return -EINVAL;
578 }
579
580 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
581 {
582         to->pkt_type = from->pkt_type;
583         to->priority = from->priority;
584         to->protocol = from->protocol;
585         skb_dst_drop(to);
586         skb_dst_set(to, dst_clone(skb_dst(from)));
587         to->dev = from->dev;
588         to->mark = from->mark;
589
590         skb_copy_hash(to, from);
591
592 #ifdef CONFIG_NET_SCHED
593         to->tc_index = from->tc_index;
594 #endif
595         nf_copy(to, from);
596         skb_ext_copy(to, from);
597         skb_copy_secmark(to, from);
598 }
599
600 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
601                       u8 nexthdr, __be32 frag_id,
602                       struct ip6_fraglist_iter *iter)
603 {
604         unsigned int first_len;
605         struct frag_hdr *fh;
606
607         /* BUILD HEADER */
608         *prevhdr = NEXTHDR_FRAGMENT;
609         iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
610         if (!iter->tmp_hdr)
611                 return -ENOMEM;
612
613         iter->frag = skb_shinfo(skb)->frag_list;
614         skb_frag_list_init(skb);
615
616         iter->offset = 0;
617         iter->hlen = hlen;
618         iter->frag_id = frag_id;
619         iter->nexthdr = nexthdr;
620
621         __skb_pull(skb, hlen);
622         fh = __skb_push(skb, sizeof(struct frag_hdr));
623         __skb_push(skb, hlen);
624         skb_reset_network_header(skb);
625         memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
626
627         fh->nexthdr = nexthdr;
628         fh->reserved = 0;
629         fh->frag_off = htons(IP6_MF);
630         fh->identification = frag_id;
631
632         first_len = skb_pagelen(skb);
633         skb->data_len = first_len - skb_headlen(skb);
634         skb->len = first_len;
635         ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
636
637         return 0;
638 }
639 EXPORT_SYMBOL(ip6_fraglist_init);
640
641 void ip6_fraglist_prepare(struct sk_buff *skb,
642                           struct ip6_fraglist_iter *iter)
643 {
644         struct sk_buff *frag = iter->frag;
645         unsigned int hlen = iter->hlen;
646         struct frag_hdr *fh;
647
648         frag->ip_summed = CHECKSUM_NONE;
649         skb_reset_transport_header(frag);
650         fh = __skb_push(frag, sizeof(struct frag_hdr));
651         __skb_push(frag, hlen);
652         skb_reset_network_header(frag);
653         memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
654         iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
655         fh->nexthdr = iter->nexthdr;
656         fh->reserved = 0;
657         fh->frag_off = htons(iter->offset);
658         if (frag->next)
659                 fh->frag_off |= htons(IP6_MF);
660         fh->identification = iter->frag_id;
661         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
662         ip6_copy_metadata(frag, skb);
663 }
664 EXPORT_SYMBOL(ip6_fraglist_prepare);
665
666 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
667                    unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
668                    u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
669 {
670         state->prevhdr = prevhdr;
671         state->nexthdr = nexthdr;
672         state->frag_id = frag_id;
673
674         state->hlen = hlen;
675         state->mtu = mtu;
676
677         state->left = skb->len - hlen;  /* Space per frame */
678         state->ptr = hlen;              /* Where to start from */
679
680         state->hroom = hdr_room;
681         state->troom = needed_tailroom;
682
683         state->offset = 0;
684 }
685 EXPORT_SYMBOL(ip6_frag_init);
686
687 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
688 {
689         u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
690         struct sk_buff *frag;
691         struct frag_hdr *fh;
692         unsigned int len;
693
694         len = state->left;
695         /* IF: it doesn't fit, use 'mtu' - the data space left */
696         if (len > state->mtu)
697                 len = state->mtu;
698         /* IF: we are not sending up to and including the packet end
699            then align the next start on an eight byte boundary */
700         if (len < state->left)
701                 len &= ~7;
702
703         /* Allocate buffer */
704         frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
705                          state->hroom + state->troom, GFP_ATOMIC);
706         if (!frag)
707                 return ERR_PTR(-ENOMEM);
708
709         /*
710          *      Set up data on packet
711          */
712
713         ip6_copy_metadata(frag, skb);
714         skb_reserve(frag, state->hroom);
715         skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
716         skb_reset_network_header(frag);
717         fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
718         frag->transport_header = (frag->network_header + state->hlen +
719                                   sizeof(struct frag_hdr));
720
721         /*
722          *      Charge the memory for the fragment to any owner
723          *      it might possess
724          */
725         if (skb->sk)
726                 skb_set_owner_w(frag, skb->sk);
727
728         /*
729          *      Copy the packet header into the new buffer.
730          */
731         skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
732
733         fragnexthdr_offset = skb_network_header(frag);
734         fragnexthdr_offset += prevhdr - skb_network_header(skb);
735         *fragnexthdr_offset = NEXTHDR_FRAGMENT;
736
737         /*
738          *      Build fragment header.
739          */
740         fh->nexthdr = state->nexthdr;
741         fh->reserved = 0;
742         fh->identification = state->frag_id;
743
744         /*
745          *      Copy a block of the IP datagram.
746          */
747         BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
748                              len));
749         state->left -= len;
750
751         fh->frag_off = htons(state->offset);
752         if (state->left > 0)
753                 fh->frag_off |= htons(IP6_MF);
754         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
755
756         state->ptr += len;
757         state->offset += len;
758
759         return frag;
760 }
761 EXPORT_SYMBOL(ip6_frag_next);
762
763 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
764                  int (*output)(struct net *, struct sock *, struct sk_buff *))
765 {
766         struct sk_buff *frag;
767         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
768         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
769                                 inet6_sk(skb->sk) : NULL;
770         struct ip6_frag_state state;
771         unsigned int mtu, hlen, nexthdr_offset;
772         ktime_t tstamp = skb->tstamp;
773         int hroom, err = 0;
774         __be32 frag_id;
775         u8 *prevhdr, nexthdr = 0;
776
777         err = ip6_find_1stfragopt(skb, &prevhdr);
778         if (err < 0)
779                 goto fail;
780         hlen = err;
781         nexthdr = *prevhdr;
782         nexthdr_offset = prevhdr - skb_network_header(skb);
783
784         mtu = ip6_skb_dst_mtu(skb);
785
786         /* We must not fragment if the socket is set to force MTU discovery
787          * or if the skb it not generated by a local socket.
788          */
789         if (unlikely(!skb->ignore_df && skb->len > mtu))
790                 goto fail_toobig;
791
792         if (IP6CB(skb)->frag_max_size) {
793                 if (IP6CB(skb)->frag_max_size > mtu)
794                         goto fail_toobig;
795
796                 /* don't send fragments larger than what we received */
797                 mtu = IP6CB(skb)->frag_max_size;
798                 if (mtu < IPV6_MIN_MTU)
799                         mtu = IPV6_MIN_MTU;
800         }
801
802         if (np && np->frag_size < mtu) {
803                 if (np->frag_size)
804                         mtu = np->frag_size;
805         }
806         if (mtu < hlen + sizeof(struct frag_hdr) + 8)
807                 goto fail_toobig;
808         mtu -= hlen + sizeof(struct frag_hdr);
809
810         frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
811                                     &ipv6_hdr(skb)->saddr);
812
813         if (skb->ip_summed == CHECKSUM_PARTIAL &&
814             (err = skb_checksum_help(skb)))
815                 goto fail;
816
817         prevhdr = skb_network_header(skb) + nexthdr_offset;
818         hroom = LL_RESERVED_SPACE(rt->dst.dev);
819         if (skb_has_frag_list(skb)) {
820                 unsigned int first_len = skb_pagelen(skb);
821                 struct ip6_fraglist_iter iter;
822                 struct sk_buff *frag2;
823
824                 if (first_len - hlen > mtu ||
825                     ((first_len - hlen) & 7) ||
826                     skb_cloned(skb) ||
827                     skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
828                         goto slow_path;
829
830                 skb_walk_frags(skb, frag) {
831                         /* Correct geometry. */
832                         if (frag->len > mtu ||
833                             ((frag->len & 7) && frag->next) ||
834                             skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
835                                 goto slow_path_clean;
836
837                         /* Partially cloned skb? */
838                         if (skb_shared(frag))
839                                 goto slow_path_clean;
840
841                         BUG_ON(frag->sk);
842                         if (skb->sk) {
843                                 frag->sk = skb->sk;
844                                 frag->destructor = sock_wfree;
845                         }
846                         skb->truesize -= frag->truesize;
847                 }
848
849                 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
850                                         &iter);
851                 if (err < 0)
852                         goto fail;
853
854                 for (;;) {
855                         /* Prepare header of the next frame,
856                          * before previous one went down. */
857                         if (iter.frag)
858                                 ip6_fraglist_prepare(skb, &iter);
859
860                         skb->tstamp = tstamp;
861                         err = output(net, sk, skb);
862                         if (!err)
863                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
864                                               IPSTATS_MIB_FRAGCREATES);
865
866                         if (err || !iter.frag)
867                                 break;
868
869                         skb = ip6_fraglist_next(&iter);
870                 }
871
872                 kfree(iter.tmp_hdr);
873
874                 if (err == 0) {
875                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
876                                       IPSTATS_MIB_FRAGOKS);
877                         return 0;
878                 }
879
880                 kfree_skb_list(iter.frag);
881
882                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
883                               IPSTATS_MIB_FRAGFAILS);
884                 return err;
885
886 slow_path_clean:
887                 skb_walk_frags(skb, frag2) {
888                         if (frag2 == frag)
889                                 break;
890                         frag2->sk = NULL;
891                         frag2->destructor = NULL;
892                         skb->truesize += frag2->truesize;
893                 }
894         }
895
896 slow_path:
897         /*
898          *      Fragment the datagram.
899          */
900
901         ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
902                       LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
903                       &state);
904
905         /*
906          *      Keep copying data until we run out.
907          */
908
909         while (state.left > 0) {
910                 frag = ip6_frag_next(skb, &state);
911                 if (IS_ERR(frag)) {
912                         err = PTR_ERR(frag);
913                         goto fail;
914                 }
915
916                 /*
917                  *      Put this fragment into the sending queue.
918                  */
919                 frag->tstamp = tstamp;
920                 err = output(net, sk, frag);
921                 if (err)
922                         goto fail;
923
924                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
925                               IPSTATS_MIB_FRAGCREATES);
926         }
927         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
928                       IPSTATS_MIB_FRAGOKS);
929         consume_skb(skb);
930         return err;
931
932 fail_toobig:
933         if (skb->sk && dst_allfrag(skb_dst(skb)))
934                 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
935
936         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
937         err = -EMSGSIZE;
938
939 fail:
940         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
941                       IPSTATS_MIB_FRAGFAILS);
942         kfree_skb(skb);
943         return err;
944 }
945
946 static inline int ip6_rt_check(const struct rt6key *rt_key,
947                                const struct in6_addr *fl_addr,
948                                const struct in6_addr *addr_cache)
949 {
950         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
951                 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
952 }
953
954 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
955                                           struct dst_entry *dst,
956                                           const struct flowi6 *fl6)
957 {
958         struct ipv6_pinfo *np = inet6_sk(sk);
959         struct rt6_info *rt;
960
961         if (!dst)
962                 goto out;
963
964         if (dst->ops->family != AF_INET6) {
965                 dst_release(dst);
966                 return NULL;
967         }
968
969         rt = (struct rt6_info *)dst;
970         /* Yes, checking route validity in not connected
971          * case is not very simple. Take into account,
972          * that we do not support routing by source, TOS,
973          * and MSG_DONTROUTE            --ANK (980726)
974          *
975          * 1. ip6_rt_check(): If route was host route,
976          *    check that cached destination is current.
977          *    If it is network route, we still may
978          *    check its validity using saved pointer
979          *    to the last used address: daddr_cache.
980          *    We do not want to save whole address now,
981          *    (because main consumer of this service
982          *    is tcp, which has not this problem),
983          *    so that the last trick works only on connected
984          *    sockets.
985          * 2. oif also should be the same.
986          */
987         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
988 #ifdef CONFIG_IPV6_SUBTREES
989             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
990 #endif
991            (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
992               (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
993                 dst_release(dst);
994                 dst = NULL;
995         }
996
997 out:
998         return dst;
999 }
1000
1001 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1002                                struct dst_entry **dst, struct flowi6 *fl6)
1003 {
1004 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1005         struct neighbour *n;
1006         struct rt6_info *rt;
1007 #endif
1008         int err;
1009         int flags = 0;
1010
1011         /* The correct way to handle this would be to do
1012          * ip6_route_get_saddr, and then ip6_route_output; however,
1013          * the route-specific preferred source forces the
1014          * ip6_route_output call _before_ ip6_route_get_saddr.
1015          *
1016          * In source specific routing (no src=any default route),
1017          * ip6_route_output will fail given src=any saddr, though, so
1018          * that's why we try it again later.
1019          */
1020         if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1021                 struct fib6_info *from;
1022                 struct rt6_info *rt;
1023                 bool had_dst = *dst != NULL;
1024
1025                 if (!had_dst)
1026                         *dst = ip6_route_output(net, sk, fl6);
1027                 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1028
1029                 rcu_read_lock();
1030                 from = rt ? rcu_dereference(rt->from) : NULL;
1031                 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1032                                           sk ? inet6_sk(sk)->srcprefs : 0,
1033                                           &fl6->saddr);
1034                 rcu_read_unlock();
1035
1036                 if (err)
1037                         goto out_err_release;
1038
1039                 /* If we had an erroneous initial result, pretend it
1040                  * never existed and let the SA-enabled version take
1041                  * over.
1042                  */
1043                 if (!had_dst && (*dst)->error) {
1044                         dst_release(*dst);
1045                         *dst = NULL;
1046                 }
1047
1048                 if (fl6->flowi6_oif)
1049                         flags |= RT6_LOOKUP_F_IFACE;
1050         }
1051
1052         if (!*dst)
1053                 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1054
1055         err = (*dst)->error;
1056         if (err)
1057                 goto out_err_release;
1058
1059 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1060         /*
1061          * Here if the dst entry we've looked up
1062          * has a neighbour entry that is in the INCOMPLETE
1063          * state and the src address from the flow is
1064          * marked as OPTIMISTIC, we release the found
1065          * dst entry and replace it instead with the
1066          * dst entry of the nexthop router
1067          */
1068         rt = (struct rt6_info *) *dst;
1069         rcu_read_lock_bh();
1070         n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1071                                       rt6_nexthop(rt, &fl6->daddr));
1072         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1073         rcu_read_unlock_bh();
1074
1075         if (err) {
1076                 struct inet6_ifaddr *ifp;
1077                 struct flowi6 fl_gw6;
1078                 int redirect;
1079
1080                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1081                                       (*dst)->dev, 1);
1082
1083                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1084                 if (ifp)
1085                         in6_ifa_put(ifp);
1086
1087                 if (redirect) {
1088                         /*
1089                          * We need to get the dst entry for the
1090                          * default router instead
1091                          */
1092                         dst_release(*dst);
1093                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1094                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1095                         *dst = ip6_route_output(net, sk, &fl_gw6);
1096                         err = (*dst)->error;
1097                         if (err)
1098                                 goto out_err_release;
1099                 }
1100         }
1101 #endif
1102         if (ipv6_addr_v4mapped(&fl6->saddr) &&
1103             !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1104                 err = -EAFNOSUPPORT;
1105                 goto out_err_release;
1106         }
1107
1108         return 0;
1109
1110 out_err_release:
1111         dst_release(*dst);
1112         *dst = NULL;
1113
1114         if (err == -ENETUNREACH)
1115                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1116         return err;
1117 }
1118
1119 /**
1120  *      ip6_dst_lookup - perform route lookup on flow
1121  *      @sk: socket which provides route info
1122  *      @dst: pointer to dst_entry * for result
1123  *      @fl6: flow to lookup
1124  *
1125  *      This function performs a route lookup on the given flow.
1126  *
1127  *      It returns zero on success, or a standard errno code on error.
1128  */
1129 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1130                    struct flowi6 *fl6)
1131 {
1132         *dst = NULL;
1133         return ip6_dst_lookup_tail(net, sk, dst, fl6);
1134 }
1135 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1136
1137 /**
1138  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1139  *      @sk: socket which provides route info
1140  *      @fl6: flow to lookup
1141  *      @final_dst: final destination address for ipsec lookup
1142  *
1143  *      This function performs a route lookup on the given flow.
1144  *
1145  *      It returns a valid dst pointer on success, or a pointer encoded
1146  *      error code.
1147  */
1148 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1149                                       const struct in6_addr *final_dst)
1150 {
1151         struct dst_entry *dst = NULL;
1152         int err;
1153
1154         err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1155         if (err)
1156                 return ERR_PTR(err);
1157         if (final_dst)
1158                 fl6->daddr = *final_dst;
1159
1160         return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1161 }
1162 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1163
1164 /**
1165  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1166  *      @sk: socket which provides the dst cache and route info
1167  *      @fl6: flow to lookup
1168  *      @final_dst: final destination address for ipsec lookup
1169  *      @connected: whether @sk is connected or not
1170  *
1171  *      This function performs a route lookup on the given flow with the
1172  *      possibility of using the cached route in the socket if it is valid.
1173  *      It will take the socket dst lock when operating on the dst cache.
1174  *      As a result, this function can only be used in process context.
1175  *
1176  *      In addition, for a connected socket, cache the dst in the socket
1177  *      if the current cache is not valid.
1178  *
1179  *      It returns a valid dst pointer on success, or a pointer encoded
1180  *      error code.
1181  */
1182 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1183                                          const struct in6_addr *final_dst,
1184                                          bool connected)
1185 {
1186         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1187
1188         dst = ip6_sk_dst_check(sk, dst, fl6);
1189         if (dst)
1190                 return dst;
1191
1192         dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1193         if (connected && !IS_ERR(dst))
1194                 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1195
1196         return dst;
1197 }
1198 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1199
1200 /**
1201  *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1202  *      @skb: Packet for which lookup is done
1203  *      @dev: Tunnel device
1204  *      @net: Network namespace of tunnel device
1205  *      @sk: Socket which provides route info
1206  *      @saddr: Memory to store the src ip address
1207  *      @info: Tunnel information
1208  *      @protocol: IP protocol
1209  *      @use_cahce: Flag to enable cache usage
1210  *      This function performs a route lookup on a tunnel
1211  *
1212  *      It returns a valid dst pointer and stores src address to be used in
1213  *      tunnel in param saddr on success, else a pointer encoded error code.
1214  */
1215
1216 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1217                                         struct net_device *dev,
1218                                         struct net *net,
1219                                         struct socket *sock,
1220                                         struct in6_addr *saddr,
1221                                         const struct ip_tunnel_info *info,
1222                                         u8 protocol,
1223                                         bool use_cache)
1224 {
1225         struct dst_entry *dst = NULL;
1226 #ifdef CONFIG_DST_CACHE
1227         struct dst_cache *dst_cache;
1228 #endif
1229         struct flowi6 fl6;
1230         __u8 prio;
1231
1232 #ifdef CONFIG_DST_CACHE
1233         dst_cache = (struct dst_cache *)&info->dst_cache;
1234         if (use_cache) {
1235                 dst = dst_cache_get_ip6(dst_cache, saddr);
1236                 if (dst)
1237                         return dst;
1238         }
1239 #endif
1240         memset(&fl6, 0, sizeof(fl6));
1241         fl6.flowi6_mark = skb->mark;
1242         fl6.flowi6_proto = protocol;
1243         fl6.daddr = info->key.u.ipv6.dst;
1244         fl6.saddr = info->key.u.ipv6.src;
1245         prio = info->key.tos;
1246         fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1247                                           info->key.label);
1248
1249         dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1250                                               NULL);
1251         if (IS_ERR(dst)) {
1252                 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1253                 return ERR_PTR(-ENETUNREACH);
1254         }
1255         if (dst->dev == dev) { /* is this necessary? */
1256                 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1257                 dst_release(dst);
1258                 return ERR_PTR(-ELOOP);
1259         }
1260 #ifdef CONFIG_DST_CACHE
1261         if (use_cache)
1262                 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1263 #endif
1264         *saddr = fl6.saddr;
1265         return dst;
1266 }
1267 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1268
1269 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1270                                                gfp_t gfp)
1271 {
1272         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1273 }
1274
1275 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1276                                                 gfp_t gfp)
1277 {
1278         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1279 }
1280
1281 static void ip6_append_data_mtu(unsigned int *mtu,
1282                                 int *maxfraglen,
1283                                 unsigned int fragheaderlen,
1284                                 struct sk_buff *skb,
1285                                 struct rt6_info *rt,
1286                                 unsigned int orig_mtu)
1287 {
1288         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1289                 if (!skb) {
1290                         /* first fragment, reserve header_len */
1291                         *mtu = orig_mtu - rt->dst.header_len;
1292
1293                 } else {
1294                         /*
1295                          * this fragment is not first, the headers
1296                          * space is regarded as data space.
1297                          */
1298                         *mtu = orig_mtu;
1299                 }
1300                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1301                               + fragheaderlen - sizeof(struct frag_hdr);
1302         }
1303 }
1304
1305 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1306                           struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1307                           struct rt6_info *rt, struct flowi6 *fl6)
1308 {
1309         struct ipv6_pinfo *np = inet6_sk(sk);
1310         unsigned int mtu;
1311         struct ipv6_txoptions *opt = ipc6->opt;
1312
1313         /*
1314          * setup for corking
1315          */
1316         if (opt) {
1317                 if (WARN_ON(v6_cork->opt))
1318                         return -EINVAL;
1319
1320                 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1321                 if (unlikely(!v6_cork->opt))
1322                         return -ENOBUFS;
1323
1324                 v6_cork->opt->tot_len = sizeof(*opt);
1325                 v6_cork->opt->opt_flen = opt->opt_flen;
1326                 v6_cork->opt->opt_nflen = opt->opt_nflen;
1327
1328                 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1329                                                     sk->sk_allocation);
1330                 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1331                         return -ENOBUFS;
1332
1333                 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1334                                                     sk->sk_allocation);
1335                 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1336                         return -ENOBUFS;
1337
1338                 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1339                                                    sk->sk_allocation);
1340                 if (opt->hopopt && !v6_cork->opt->hopopt)
1341                         return -ENOBUFS;
1342
1343                 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1344                                                     sk->sk_allocation);
1345                 if (opt->srcrt && !v6_cork->opt->srcrt)
1346                         return -ENOBUFS;
1347
1348                 /* need source address above miyazawa*/
1349         }
1350         dst_hold(&rt->dst);
1351         cork->base.dst = &rt->dst;
1352         cork->fl.u.ip6 = *fl6;
1353         v6_cork->hop_limit = ipc6->hlimit;
1354         v6_cork->tclass = ipc6->tclass;
1355         if (rt->dst.flags & DST_XFRM_TUNNEL)
1356                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1357                       READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1358         else
1359                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1360                         READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1361         if (np->frag_size < mtu) {
1362                 if (np->frag_size)
1363                         mtu = np->frag_size;
1364         }
1365         if (mtu < IPV6_MIN_MTU)
1366                 return -EINVAL;
1367         cork->base.fragsize = mtu;
1368         cork->base.gso_size = ipc6->gso_size;
1369         cork->base.tx_flags = 0;
1370         cork->base.mark = ipc6->sockc.mark;
1371         sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1372
1373         if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1374                 cork->base.flags |= IPCORK_ALLFRAG;
1375         cork->base.length = 0;
1376
1377         cork->base.transmit_time = ipc6->sockc.transmit_time;
1378
1379         return 0;
1380 }
1381
1382 static int __ip6_append_data(struct sock *sk,
1383                              struct flowi6 *fl6,
1384                              struct sk_buff_head *queue,
1385                              struct inet_cork *cork,
1386                              struct inet6_cork *v6_cork,
1387                              struct page_frag *pfrag,
1388                              int getfrag(void *from, char *to, int offset,
1389                                          int len, int odd, struct sk_buff *skb),
1390                              void *from, int length, int transhdrlen,
1391                              unsigned int flags, struct ipcm6_cookie *ipc6)
1392 {
1393         struct sk_buff *skb, *skb_prev = NULL;
1394         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1395         struct ubuf_info *uarg = NULL;
1396         int exthdrlen = 0;
1397         int dst_exthdrlen = 0;
1398         int hh_len;
1399         int copy;
1400         int err;
1401         int offset = 0;
1402         u32 tskey = 0;
1403         struct rt6_info *rt = (struct rt6_info *)cork->dst;
1404         struct ipv6_txoptions *opt = v6_cork->opt;
1405         int csummode = CHECKSUM_NONE;
1406         unsigned int maxnonfragsize, headersize;
1407         unsigned int wmem_alloc_delta = 0;
1408         bool paged, extra_uref = false;
1409
1410         skb = skb_peek_tail(queue);
1411         if (!skb) {
1412                 exthdrlen = opt ? opt->opt_flen : 0;
1413                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1414         }
1415
1416         paged = !!cork->gso_size;
1417         mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1418         orig_mtu = mtu;
1419
1420         if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1421             sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1422                 tskey = sk->sk_tskey++;
1423
1424         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1425
1426         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1427                         (opt ? opt->opt_nflen : 0);
1428         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1429                      sizeof(struct frag_hdr);
1430
1431         headersize = sizeof(struct ipv6hdr) +
1432                      (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1433                      (dst_allfrag(&rt->dst) ?
1434                       sizeof(struct frag_hdr) : 0) +
1435                      rt->rt6i_nfheader_len;
1436
1437         /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1438          * the first fragment
1439          */
1440         if (headersize + transhdrlen > mtu)
1441                 goto emsgsize;
1442
1443         if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1444             (sk->sk_protocol == IPPROTO_UDP ||
1445              sk->sk_protocol == IPPROTO_RAW)) {
1446                 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1447                                 sizeof(struct ipv6hdr));
1448                 goto emsgsize;
1449         }
1450
1451         if (ip6_sk_ignore_df(sk))
1452                 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1453         else
1454                 maxnonfragsize = mtu;
1455
1456         if (cork->length + length > maxnonfragsize - headersize) {
1457 emsgsize:
1458                 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1459                 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1460                 return -EMSGSIZE;
1461         }
1462
1463         /* CHECKSUM_PARTIAL only with no extension headers and when
1464          * we are not going to fragment
1465          */
1466         if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1467             headersize == sizeof(struct ipv6hdr) &&
1468             length <= mtu - headersize &&
1469             (!(flags & MSG_MORE) || cork->gso_size) &&
1470             rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1471                 csummode = CHECKSUM_PARTIAL;
1472
1473         if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1474                 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1475                 if (!uarg)
1476                         return -ENOBUFS;
1477                 extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1478                 if (rt->dst.dev->features & NETIF_F_SG &&
1479                     csummode == CHECKSUM_PARTIAL) {
1480                         paged = true;
1481                 } else {
1482                         uarg->zerocopy = 0;
1483                         skb_zcopy_set(skb, uarg, &extra_uref);
1484                 }
1485         }
1486
1487         /*
1488          * Let's try using as much space as possible.
1489          * Use MTU if total length of the message fits into the MTU.
1490          * Otherwise, we need to reserve fragment header and
1491          * fragment alignment (= 8-15 octects, in total).
1492          *
1493          * Note that we may need to "move" the data from the tail of
1494          * of the buffer to the new fragment when we split
1495          * the message.
1496          *
1497          * FIXME: It may be fragmented into multiple chunks
1498          *        at once if non-fragmentable extension headers
1499          *        are too large.
1500          * --yoshfuji
1501          */
1502
1503         cork->length += length;
1504         if (!skb)
1505                 goto alloc_new_skb;
1506
1507         while (length > 0) {
1508                 /* Check if the remaining data fits into current packet. */
1509                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1510                 if (copy < length)
1511                         copy = maxfraglen - skb->len;
1512
1513                 if (copy <= 0) {
1514                         char *data;
1515                         unsigned int datalen;
1516                         unsigned int fraglen;
1517                         unsigned int fraggap;
1518                         unsigned int alloclen;
1519                         unsigned int pagedlen;
1520 alloc_new_skb:
1521                         /* There's no room in the current skb */
1522                         if (skb)
1523                                 fraggap = skb->len - maxfraglen;
1524                         else
1525                                 fraggap = 0;
1526                         /* update mtu and maxfraglen if necessary */
1527                         if (!skb || !skb_prev)
1528                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1529                                                     fragheaderlen, skb, rt,
1530                                                     orig_mtu);
1531
1532                         skb_prev = skb;
1533
1534                         /*
1535                          * If remaining data exceeds the mtu,
1536                          * we know we need more fragment(s).
1537                          */
1538                         datalen = length + fraggap;
1539
1540                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1541                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1542                         fraglen = datalen + fragheaderlen;
1543                         pagedlen = 0;
1544
1545                         if ((flags & MSG_MORE) &&
1546                             !(rt->dst.dev->features&NETIF_F_SG))
1547                                 alloclen = mtu;
1548                         else if (!paged)
1549                                 alloclen = fraglen;
1550                         else {
1551                                 alloclen = min_t(int, fraglen, MAX_HEADER);
1552                                 pagedlen = fraglen - alloclen;
1553                         }
1554
1555                         alloclen += dst_exthdrlen;
1556
1557                         if (datalen != length + fraggap) {
1558                                 /*
1559                                  * this is not the last fragment, the trailer
1560                                  * space is regarded as data space.
1561                                  */
1562                                 datalen += rt->dst.trailer_len;
1563                         }
1564
1565                         alloclen += rt->dst.trailer_len;
1566                         fraglen = datalen + fragheaderlen;
1567
1568                         /*
1569                          * We just reserve space for fragment header.
1570                          * Note: this may be overallocation if the message
1571                          * (without MSG_MORE) fits into the MTU.
1572                          */
1573                         alloclen += sizeof(struct frag_hdr);
1574
1575                         copy = datalen - transhdrlen - fraggap - pagedlen;
1576                         if (copy < 0) {
1577                                 err = -EINVAL;
1578                                 goto error;
1579                         }
1580                         if (transhdrlen) {
1581                                 skb = sock_alloc_send_skb(sk,
1582                                                 alloclen + hh_len,
1583                                                 (flags & MSG_DONTWAIT), &err);
1584                         } else {
1585                                 skb = NULL;
1586                                 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1587                                     2 * sk->sk_sndbuf)
1588                                         skb = alloc_skb(alloclen + hh_len,
1589                                                         sk->sk_allocation);
1590                                 if (unlikely(!skb))
1591                                         err = -ENOBUFS;
1592                         }
1593                         if (!skb)
1594                                 goto error;
1595                         /*
1596                          *      Fill in the control structures
1597                          */
1598                         skb->protocol = htons(ETH_P_IPV6);
1599                         skb->ip_summed = csummode;
1600                         skb->csum = 0;
1601                         /* reserve for fragmentation and ipsec header */
1602                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1603                                     dst_exthdrlen);
1604
1605                         /*
1606                          *      Find where to start putting bytes
1607                          */
1608                         data = skb_put(skb, fraglen - pagedlen);
1609                         skb_set_network_header(skb, exthdrlen);
1610                         data += fragheaderlen;
1611                         skb->transport_header = (skb->network_header +
1612                                                  fragheaderlen);
1613                         if (fraggap) {
1614                                 skb->csum = skb_copy_and_csum_bits(
1615                                         skb_prev, maxfraglen,
1616                                         data + transhdrlen, fraggap, 0);
1617                                 skb_prev->csum = csum_sub(skb_prev->csum,
1618                                                           skb->csum);
1619                                 data += fraggap;
1620                                 pskb_trim_unique(skb_prev, maxfraglen);
1621                         }
1622                         if (copy > 0 &&
1623                             getfrag(from, data + transhdrlen, offset,
1624                                     copy, fraggap, skb) < 0) {
1625                                 err = -EFAULT;
1626                                 kfree_skb(skb);
1627                                 goto error;
1628                         }
1629
1630                         offset += copy;
1631                         length -= copy + transhdrlen;
1632                         transhdrlen = 0;
1633                         exthdrlen = 0;
1634                         dst_exthdrlen = 0;
1635
1636                         /* Only the initial fragment is time stamped */
1637                         skb_shinfo(skb)->tx_flags = cork->tx_flags;
1638                         cork->tx_flags = 0;
1639                         skb_shinfo(skb)->tskey = tskey;
1640                         tskey = 0;
1641                         skb_zcopy_set(skb, uarg, &extra_uref);
1642
1643                         if ((flags & MSG_CONFIRM) && !skb_prev)
1644                                 skb_set_dst_pending_confirm(skb, 1);
1645
1646                         /*
1647                          * Put the packet on the pending queue
1648                          */
1649                         if (!skb->destructor) {
1650                                 skb->destructor = sock_wfree;
1651                                 skb->sk = sk;
1652                                 wmem_alloc_delta += skb->truesize;
1653                         }
1654                         __skb_queue_tail(queue, skb);
1655                         continue;
1656                 }
1657
1658                 if (copy > length)
1659                         copy = length;
1660
1661                 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1662                     skb_tailroom(skb) >= copy) {
1663                         unsigned int off;
1664
1665                         off = skb->len;
1666                         if (getfrag(from, skb_put(skb, copy),
1667                                                 offset, copy, off, skb) < 0) {
1668                                 __skb_trim(skb, off);
1669                                 err = -EFAULT;
1670                                 goto error;
1671                         }
1672                 } else if (!uarg || !uarg->zerocopy) {
1673                         int i = skb_shinfo(skb)->nr_frags;
1674
1675                         err = -ENOMEM;
1676                         if (!sk_page_frag_refill(sk, pfrag))
1677                                 goto error;
1678
1679                         if (!skb_can_coalesce(skb, i, pfrag->page,
1680                                               pfrag->offset)) {
1681                                 err = -EMSGSIZE;
1682                                 if (i == MAX_SKB_FRAGS)
1683                                         goto error;
1684
1685                                 __skb_fill_page_desc(skb, i, pfrag->page,
1686                                                      pfrag->offset, 0);
1687                                 skb_shinfo(skb)->nr_frags = ++i;
1688                                 get_page(pfrag->page);
1689                         }
1690                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1691                         if (getfrag(from,
1692                                     page_address(pfrag->page) + pfrag->offset,
1693                                     offset, copy, skb->len, skb) < 0)
1694                                 goto error_efault;
1695
1696                         pfrag->offset += copy;
1697                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1698                         skb->len += copy;
1699                         skb->data_len += copy;
1700                         skb->truesize += copy;
1701                         wmem_alloc_delta += copy;
1702                 } else {
1703                         err = skb_zerocopy_iter_dgram(skb, from, copy);
1704                         if (err < 0)
1705                                 goto error;
1706                 }
1707                 offset += copy;
1708                 length -= copy;
1709         }
1710
1711         if (wmem_alloc_delta)
1712                 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1713         return 0;
1714
1715 error_efault:
1716         err = -EFAULT;
1717 error:
1718         if (uarg)
1719                 sock_zerocopy_put_abort(uarg, extra_uref);
1720         cork->length -= length;
1721         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1722         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1723         return err;
1724 }
1725
1726 int ip6_append_data(struct sock *sk,
1727                     int getfrag(void *from, char *to, int offset, int len,
1728                                 int odd, struct sk_buff *skb),
1729                     void *from, int length, int transhdrlen,
1730                     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1731                     struct rt6_info *rt, unsigned int flags)
1732 {
1733         struct inet_sock *inet = inet_sk(sk);
1734         struct ipv6_pinfo *np = inet6_sk(sk);
1735         int exthdrlen;
1736         int err;
1737
1738         if (flags&MSG_PROBE)
1739                 return 0;
1740         if (skb_queue_empty(&sk->sk_write_queue)) {
1741                 /*
1742                  * setup for corking
1743                  */
1744                 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1745                                      ipc6, rt, fl6);
1746                 if (err)
1747                         return err;
1748
1749                 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1750                 length += exthdrlen;
1751                 transhdrlen += exthdrlen;
1752         } else {
1753                 fl6 = &inet->cork.fl.u.ip6;
1754                 transhdrlen = 0;
1755         }
1756
1757         return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1758                                  &np->cork, sk_page_frag(sk), getfrag,
1759                                  from, length, transhdrlen, flags, ipc6);
1760 }
1761 EXPORT_SYMBOL_GPL(ip6_append_data);
1762
1763 static void ip6_cork_release(struct inet_cork_full *cork,
1764                              struct inet6_cork *v6_cork)
1765 {
1766         if (v6_cork->opt) {
1767                 kfree(v6_cork->opt->dst0opt);
1768                 kfree(v6_cork->opt->dst1opt);
1769                 kfree(v6_cork->opt->hopopt);
1770                 kfree(v6_cork->opt->srcrt);
1771                 kfree(v6_cork->opt);
1772                 v6_cork->opt = NULL;
1773         }
1774
1775         if (cork->base.dst) {
1776                 dst_release(cork->base.dst);
1777                 cork->base.dst = NULL;
1778                 cork->base.flags &= ~IPCORK_ALLFRAG;
1779         }
1780         memset(&cork->fl, 0, sizeof(cork->fl));
1781 }
1782
1783 struct sk_buff *__ip6_make_skb(struct sock *sk,
1784                                struct sk_buff_head *queue,
1785                                struct inet_cork_full *cork,
1786                                struct inet6_cork *v6_cork)
1787 {
1788         struct sk_buff *skb, *tmp_skb;
1789         struct sk_buff **tail_skb;
1790         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1791         struct ipv6_pinfo *np = inet6_sk(sk);
1792         struct net *net = sock_net(sk);
1793         struct ipv6hdr *hdr;
1794         struct ipv6_txoptions *opt = v6_cork->opt;
1795         struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1796         struct flowi6 *fl6 = &cork->fl.u.ip6;
1797         unsigned char proto = fl6->flowi6_proto;
1798
1799         skb = __skb_dequeue(queue);
1800         if (!skb)
1801                 goto out;
1802         tail_skb = &(skb_shinfo(skb)->frag_list);
1803
1804         /* move skb->data to ip header from ext header */
1805         if (skb->data < skb_network_header(skb))
1806                 __skb_pull(skb, skb_network_offset(skb));
1807         while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1808                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1809                 *tail_skb = tmp_skb;
1810                 tail_skb = &(tmp_skb->next);
1811                 skb->len += tmp_skb->len;
1812                 skb->data_len += tmp_skb->len;
1813                 skb->truesize += tmp_skb->truesize;
1814                 tmp_skb->destructor = NULL;
1815                 tmp_skb->sk = NULL;
1816         }
1817
1818         /* Allow local fragmentation. */
1819         skb->ignore_df = ip6_sk_ignore_df(sk);
1820
1821         *final_dst = fl6->daddr;
1822         __skb_pull(skb, skb_network_header_len(skb));
1823         if (opt && opt->opt_flen)
1824                 ipv6_push_frag_opts(skb, opt, &proto);
1825         if (opt && opt->opt_nflen)
1826                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1827
1828         skb_push(skb, sizeof(struct ipv6hdr));
1829         skb_reset_network_header(skb);
1830         hdr = ipv6_hdr(skb);
1831
1832         ip6_flow_hdr(hdr, v6_cork->tclass,
1833                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
1834                                         ip6_autoflowlabel(net, np), fl6));
1835         hdr->hop_limit = v6_cork->hop_limit;
1836         hdr->nexthdr = proto;
1837         hdr->saddr = fl6->saddr;
1838         hdr->daddr = *final_dst;
1839
1840         skb->priority = sk->sk_priority;
1841         skb->mark = cork->base.mark;
1842
1843         skb->tstamp = cork->base.transmit_time;
1844
1845         skb_dst_set(skb, dst_clone(&rt->dst));
1846         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1847         if (proto == IPPROTO_ICMPV6) {
1848                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1849
1850                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1851                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1852         }
1853
1854         ip6_cork_release(cork, v6_cork);
1855 out:
1856         return skb;
1857 }
1858
1859 int ip6_send_skb(struct sk_buff *skb)
1860 {
1861         struct net *net = sock_net(skb->sk);
1862         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1863         int err;
1864
1865         err = ip6_local_out(net, skb->sk, skb);
1866         if (err) {
1867                 if (err > 0)
1868                         err = net_xmit_errno(err);
1869                 if (err)
1870                         IP6_INC_STATS(net, rt->rt6i_idev,
1871                                       IPSTATS_MIB_OUTDISCARDS);
1872         }
1873
1874         return err;
1875 }
1876
1877 int ip6_push_pending_frames(struct sock *sk)
1878 {
1879         struct sk_buff *skb;
1880
1881         skb = ip6_finish_skb(sk);
1882         if (!skb)
1883                 return 0;
1884
1885         return ip6_send_skb(skb);
1886 }
1887 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1888
1889 static void __ip6_flush_pending_frames(struct sock *sk,
1890                                        struct sk_buff_head *queue,
1891                                        struct inet_cork_full *cork,
1892                                        struct inet6_cork *v6_cork)
1893 {
1894         struct sk_buff *skb;
1895
1896         while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1897                 if (skb_dst(skb))
1898                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1899                                       IPSTATS_MIB_OUTDISCARDS);
1900                 kfree_skb(skb);
1901         }
1902
1903         ip6_cork_release(cork, v6_cork);
1904 }
1905
1906 void ip6_flush_pending_frames(struct sock *sk)
1907 {
1908         __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1909                                    &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1910 }
1911 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1912
1913 struct sk_buff *ip6_make_skb(struct sock *sk,
1914                              int getfrag(void *from, char *to, int offset,
1915                                          int len, int odd, struct sk_buff *skb),
1916                              void *from, int length, int transhdrlen,
1917                              struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1918                              struct rt6_info *rt, unsigned int flags,
1919                              struct inet_cork_full *cork)
1920 {
1921         struct inet6_cork v6_cork;
1922         struct sk_buff_head queue;
1923         int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1924         int err;
1925
1926         if (flags & MSG_PROBE)
1927                 return NULL;
1928
1929         __skb_queue_head_init(&queue);
1930
1931         cork->base.flags = 0;
1932         cork->base.addr = 0;
1933         cork->base.opt = NULL;
1934         cork->base.dst = NULL;
1935         v6_cork.opt = NULL;
1936         err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1937         if (err) {
1938                 ip6_cork_release(cork, &v6_cork);
1939                 return ERR_PTR(err);
1940         }
1941         if (ipc6->dontfrag < 0)
1942                 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1943
1944         err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1945                                 &current->task_frag, getfrag, from,
1946                                 length + exthdrlen, transhdrlen + exthdrlen,
1947                                 flags, ipc6);
1948         if (err) {
1949                 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1950                 return ERR_PTR(err);
1951         }
1952
1953         return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1954 }