2 * SR-IPv6 implementation
5 * David Lebrun <david.lebrun@uclouvain.be>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 #include <linux/types.h>
15 #include <linux/skbuff.h>
16 #include <linux/net.h>
17 #include <linux/module.h>
19 #include <net/lwtunnel.h>
20 #include <net/netevent.h>
21 #include <net/netns/generic.h>
22 #include <net/ip6_fib.h>
23 #include <net/route.h>
25 #include <linux/seg6.h>
26 #include <linux/seg6_local.h>
27 #include <net/addrconf.h>
28 #include <net/ip6_route.h>
29 #include <net/dst_cache.h>
30 #ifdef CONFIG_IPV6_SEG6_HMAC
31 #include <net/seg6_hmac.h>
33 #include <linux/etherdevice.h>
35 struct seg6_local_lwt;
37 struct seg6_action_desc {
40 int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
44 struct seg6_local_lwt {
46 struct ipv6_sr_hdr *srh;
54 struct seg6_action_desc *desc;
57 static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
59 return (struct seg6_local_lwt *)lwt->data;
62 static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
64 struct ipv6_sr_hdr *srh;
67 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
70 if (!pskb_may_pull(skb, srhoff + sizeof(*srh)))
73 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
75 len = (srh->hdrlen + 1) << 3;
77 if (!pskb_may_pull(skb, srhoff + len))
80 if (!seg6_validate_srh(srh, len))
86 static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
88 struct ipv6_sr_hdr *srh;
94 if (srh->segments_left == 0)
97 #ifdef CONFIG_IPV6_SEG6_HMAC
98 if (!seg6_hmac_validate_skb(skb))
105 static bool decap_and_validate(struct sk_buff *skb, int proto)
107 struct ipv6_sr_hdr *srh;
108 unsigned int off = 0;
111 if (srh && srh->segments_left > 0)
114 #ifdef CONFIG_IPV6_SEG6_HMAC
115 if (srh && !seg6_hmac_validate_skb(skb))
119 if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
122 if (!pskb_pull(skb, off))
125 skb_postpull_rcsum(skb, skb_network_header(skb), off);
127 skb_reset_network_header(skb);
128 skb_reset_transport_header(skb);
129 skb->encapsulation = 0;
134 static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
136 struct in6_addr *addr;
138 srh->segments_left--;
139 addr = srh->segments + srh->segments_left;
143 static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
146 struct net *net = dev_net(skb->dev);
147 struct ipv6hdr *hdr = ipv6_hdr(skb);
148 int flags = RT6_LOOKUP_F_HAS_SADDR;
149 struct dst_entry *dst = NULL;
153 fl6.flowi6_iif = skb->dev->ifindex;
154 fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
155 fl6.saddr = hdr->saddr;
156 fl6.flowlabel = ip6_flowinfo(hdr);
157 fl6.flowi6_mark = skb->mark;
158 fl6.flowi6_proto = hdr->nexthdr;
161 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
164 dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
166 struct fib6_table *table;
168 table = fib6_get_table(net, tbl_id);
172 rt = ip6_pol_route(net, table, 0, &fl6, flags);
176 if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) {
183 rt = net->ipv6.ip6_blk_hole_entry;
189 skb_dst_set(skb, dst);
192 /* regular endpoint function */
193 static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
195 struct ipv6_sr_hdr *srh;
197 srh = get_and_validate_srh(skb);
201 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
203 lookup_nexthop(skb, NULL, 0);
205 return dst_input(skb);
212 /* regular endpoint, and forward to specified nexthop */
213 static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
215 struct ipv6_sr_hdr *srh;
217 srh = get_and_validate_srh(skb);
221 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
223 lookup_nexthop(skb, &slwt->nh6, 0);
225 return dst_input(skb);
232 static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
234 struct ipv6_sr_hdr *srh;
236 srh = get_and_validate_srh(skb);
240 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
242 lookup_nexthop(skb, NULL, slwt->table);
244 return dst_input(skb);
251 /* decapsulate and forward inner L2 frame on specified interface */
252 static int input_action_end_dx2(struct sk_buff *skb,
253 struct seg6_local_lwt *slwt)
255 struct net *net = dev_net(skb->dev);
256 struct net_device *odev;
259 if (!decap_and_validate(skb, NEXTHDR_NONE))
262 if (!pskb_may_pull(skb, ETH_HLEN))
265 skb_reset_mac_header(skb);
266 eth = (struct ethhdr *)skb->data;
268 /* To determine the frame's protocol, we assume it is 802.3. This avoids
269 * a call to eth_type_trans(), which is not really relevant for our
272 if (!eth_proto_is_802_3(eth->h_proto))
275 odev = dev_get_by_index_rcu(net, slwt->oif);
279 /* As we accept Ethernet frames, make sure the egress device is of
282 if (odev->type != ARPHRD_ETHER)
285 if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
290 if (skb_warn_if_lro(skb))
293 skb_forward_csum(skb);
295 if (skb->len - ETH_HLEN > odev->mtu)
299 skb->protocol = eth->h_proto;
301 return dev_queue_xmit(skb);
308 /* decapsulate and forward to specified nexthop */
309 static int input_action_end_dx6(struct sk_buff *skb,
310 struct seg6_local_lwt *slwt)
312 struct in6_addr *nhaddr = NULL;
314 /* this function accepts IPv6 encapsulated packets, with either
315 * an SRH with SL=0, or no SRH.
318 if (!decap_and_validate(skb, IPPROTO_IPV6))
321 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
324 /* The inner packet is not associated to any local interface,
325 * so we do not call netif_rx().
327 * If slwt->nh6 is set to ::, then lookup the nexthop for the
328 * inner packet's DA. Otherwise, use the specified nexthop.
331 if (!ipv6_addr_any(&slwt->nh6))
334 lookup_nexthop(skb, nhaddr, 0);
336 return dst_input(skb);
342 static int input_action_end_dx4(struct sk_buff *skb,
343 struct seg6_local_lwt *slwt)
349 if (!decap_and_validate(skb, IPPROTO_IPIP))
352 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
355 skb->protocol = htons(ETH_P_IP);
359 nhaddr = slwt->nh4.s_addr ?: iph->daddr;
363 err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
367 return dst_input(skb);
374 static int input_action_end_dt6(struct sk_buff *skb,
375 struct seg6_local_lwt *slwt)
377 if (!decap_and_validate(skb, IPPROTO_IPV6))
380 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
383 lookup_nexthop(skb, NULL, slwt->table);
385 return dst_input(skb);
392 /* push an SRH on top of the current one */
393 static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
395 struct ipv6_sr_hdr *srh;
398 srh = get_and_validate_srh(skb);
402 err = seg6_do_srh_inline(skb, slwt->srh);
406 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
407 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
409 lookup_nexthop(skb, NULL, 0);
411 return dst_input(skb);
418 /* encapsulate within an outer IPv6 header and a specified SRH */
419 static int input_action_end_b6_encap(struct sk_buff *skb,
420 struct seg6_local_lwt *slwt)
422 struct ipv6_sr_hdr *srh;
425 srh = get_and_validate_srh(skb);
429 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
431 skb_reset_inner_headers(skb);
432 skb->encapsulation = 1;
434 err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
438 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
439 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
441 lookup_nexthop(skb, NULL, 0);
443 return dst_input(skb);
450 static struct seg6_action_desc seg6_action_table[] = {
452 .action = SEG6_LOCAL_ACTION_END,
454 .input = input_action_end,
457 .action = SEG6_LOCAL_ACTION_END_X,
458 .attrs = (1 << SEG6_LOCAL_NH6),
459 .input = input_action_end_x,
462 .action = SEG6_LOCAL_ACTION_END_T,
463 .attrs = (1 << SEG6_LOCAL_TABLE),
464 .input = input_action_end_t,
467 .action = SEG6_LOCAL_ACTION_END_DX2,
468 .attrs = (1 << SEG6_LOCAL_OIF),
469 .input = input_action_end_dx2,
472 .action = SEG6_LOCAL_ACTION_END_DX6,
473 .attrs = (1 << SEG6_LOCAL_NH6),
474 .input = input_action_end_dx6,
477 .action = SEG6_LOCAL_ACTION_END_DX4,
478 .attrs = (1 << SEG6_LOCAL_NH4),
479 .input = input_action_end_dx4,
482 .action = SEG6_LOCAL_ACTION_END_DT6,
483 .attrs = (1 << SEG6_LOCAL_TABLE),
484 .input = input_action_end_dt6,
487 .action = SEG6_LOCAL_ACTION_END_B6,
488 .attrs = (1 << SEG6_LOCAL_SRH),
489 .input = input_action_end_b6,
492 .action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
493 .attrs = (1 << SEG6_LOCAL_SRH),
494 .input = input_action_end_b6_encap,
495 .static_headroom = sizeof(struct ipv6hdr),
499 static struct seg6_action_desc *__get_action_desc(int action)
501 struct seg6_action_desc *desc;
504 count = ARRAY_SIZE(seg6_action_table);
505 for (i = 0; i < count; i++) {
506 desc = &seg6_action_table[i];
507 if (desc->action == action)
514 static int seg6_local_input(struct sk_buff *skb)
516 struct dst_entry *orig_dst = skb_dst(skb);
517 struct seg6_action_desc *desc;
518 struct seg6_local_lwt *slwt;
520 if (skb->protocol != htons(ETH_P_IPV6)) {
525 slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
528 return desc->input(skb, slwt);
531 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
532 [SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
533 [SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
534 [SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
535 [SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
536 .len = sizeof(struct in_addr) },
537 [SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
538 .len = sizeof(struct in6_addr) },
539 [SEG6_LOCAL_IIF] = { .type = NLA_U32 },
540 [SEG6_LOCAL_OIF] = { .type = NLA_U32 },
543 static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
545 struct ipv6_sr_hdr *srh;
548 srh = nla_data(attrs[SEG6_LOCAL_SRH]);
549 len = nla_len(attrs[SEG6_LOCAL_SRH]);
551 /* SRH must contain at least one segment */
552 if (len < sizeof(*srh) + sizeof(struct in6_addr))
555 if (!seg6_validate_srh(srh, len))
558 slwt->srh = kmalloc(len, GFP_KERNEL);
562 memcpy(slwt->srh, srh, len);
564 slwt->headroom += len;
569 static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
571 struct ipv6_sr_hdr *srh;
576 len = (srh->hdrlen + 1) << 3;
578 nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
582 memcpy(nla_data(nla), srh, len);
587 static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
589 int len = (a->srh->hdrlen + 1) << 3;
591 if (len != ((b->srh->hdrlen + 1) << 3))
594 return memcmp(a->srh, b->srh, len);
597 static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt)
599 slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
604 static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
606 if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
612 static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
614 if (a->table != b->table)
620 static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
622 memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
623 sizeof(struct in_addr));
628 static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
632 nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
636 memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
641 static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
643 return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
646 static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt)
648 memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
649 sizeof(struct in6_addr));
654 static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
658 nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
662 memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
667 static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
669 return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
672 static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
674 slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
679 static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
681 if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
687 static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
689 if (a->iif != b->iif)
695 static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
697 slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
702 static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
704 if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
710 static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
712 if (a->oif != b->oif)
718 struct seg6_action_param {
719 int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
720 int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
721 int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
724 static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
725 [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh,
727 .cmp = cmp_nla_srh },
729 [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table,
730 .put = put_nla_table,
731 .cmp = cmp_nla_table },
733 [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4,
735 .cmp = cmp_nla_nh4 },
737 [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6,
739 .cmp = cmp_nla_nh6 },
741 [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif,
743 .cmp = cmp_nla_iif },
745 [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
747 .cmp = cmp_nla_oif },
750 static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
752 struct seg6_action_param *param;
753 struct seg6_action_desc *desc;
756 desc = __get_action_desc(slwt->action);
764 slwt->headroom += desc->static_headroom;
766 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
767 if (desc->attrs & (1 << i)) {
771 param = &seg6_action_params[i];
773 err = param->parse(attrs, slwt);
782 static int seg6_local_build_state(struct nlattr *nla, unsigned int family,
783 const void *cfg, struct lwtunnel_state **ts,
784 struct netlink_ext_ack *extack)
786 struct nlattr *tb[SEG6_LOCAL_MAX + 1];
787 struct lwtunnel_state *newts;
788 struct seg6_local_lwt *slwt;
791 if (family != AF_INET6)
794 err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
800 if (!tb[SEG6_LOCAL_ACTION])
803 newts = lwtunnel_state_alloc(sizeof(*slwt));
807 slwt = seg6_local_lwtunnel(newts);
808 slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
810 err = parse_nla_action(tb, slwt);
814 newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
815 newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
816 newts->headroom = slwt->headroom;
828 static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
830 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
835 static int seg6_local_fill_encap(struct sk_buff *skb,
836 struct lwtunnel_state *lwt)
838 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
839 struct seg6_action_param *param;
842 if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
845 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
846 if (slwt->desc->attrs & (1 << i)) {
847 param = &seg6_action_params[i];
848 err = param->put(skb, slwt);
857 static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
859 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
863 nlsize = nla_total_size(4); /* action */
865 attrs = slwt->desc->attrs;
867 if (attrs & (1 << SEG6_LOCAL_SRH))
868 nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
870 if (attrs & (1 << SEG6_LOCAL_TABLE))
871 nlsize += nla_total_size(4);
873 if (attrs & (1 << SEG6_LOCAL_NH4))
874 nlsize += nla_total_size(4);
876 if (attrs & (1 << SEG6_LOCAL_NH6))
877 nlsize += nla_total_size(16);
879 if (attrs & (1 << SEG6_LOCAL_IIF))
880 nlsize += nla_total_size(4);
882 if (attrs & (1 << SEG6_LOCAL_OIF))
883 nlsize += nla_total_size(4);
888 static int seg6_local_cmp_encap(struct lwtunnel_state *a,
889 struct lwtunnel_state *b)
891 struct seg6_local_lwt *slwt_a, *slwt_b;
892 struct seg6_action_param *param;
895 slwt_a = seg6_local_lwtunnel(a);
896 slwt_b = seg6_local_lwtunnel(b);
898 if (slwt_a->action != slwt_b->action)
901 if (slwt_a->desc->attrs != slwt_b->desc->attrs)
904 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
905 if (slwt_a->desc->attrs & (1 << i)) {
906 param = &seg6_action_params[i];
907 if (param->cmp(slwt_a, slwt_b))
915 static const struct lwtunnel_encap_ops seg6_local_ops = {
916 .build_state = seg6_local_build_state,
917 .destroy_state = seg6_local_destroy_state,
918 .input = seg6_local_input,
919 .fill_encap = seg6_local_fill_encap,
920 .get_encap_size = seg6_local_get_encap_size,
921 .cmp_encap = seg6_local_cmp_encap,
922 .owner = THIS_MODULE,
925 int __init seg6_local_init(void)
927 return lwtunnel_encap_add_ops(&seg6_local_ops,
928 LWTUNNEL_ENCAP_SEG6_LOCAL);
931 void seg6_local_exit(void)
933 lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);