1 // SPDX-License-Identifier: GPL-2.0+
3 * IPv6 IOAM Lightweight Tunnel implementation
6 * Justin Iurman <justin.iurman@uliege.be>
9 #include <linux/kernel.h>
10 #include <linux/skbuff.h>
11 #include <linux/net.h>
12 #include <linux/in6.h>
13 #include <linux/ioam6.h>
14 #include <linux/ioam6_iptunnel.h>
17 #include <net/lwtunnel.h>
18 #include <net/ioam6.h>
19 #include <net/netlink.h>
21 #include <net/dst_cache.h>
22 #include <net/ip6_route.h>
23 #include <net/addrconf.h>
25 #define IOAM6_MASK_SHORT_FIELDS 0xff100000
26 #define IOAM6_MASK_WIDE_FIELDS 0xe00000
28 struct ioam6_lwt_encap {
29 struct ipv6_hopopt_hdr eh;
30 u8 pad[2]; /* 2-octet padding for 4n-alignment */
31 struct ioam6_hdr ioamh;
32 struct ioam6_trace_hdr traceh;
36 struct dst_cache cache;
38 struct in6_addr tundst;
39 struct ioam6_lwt_encap tuninfo;
42 static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
44 return (struct ioam6_lwt *)lwt->data;
47 static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt)
49 return &ioam6_lwt_state(lwt)->tuninfo;
52 static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt)
54 return &(ioam6_lwt_state(lwt)->tuninfo.traceh);
57 static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
58 [IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8,
59 IOAM6_IPTUNNEL_MODE_MIN,
60 IOAM6_IPTUNNEL_MODE_MAX),
61 [IOAM6_IPTUNNEL_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
62 [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
65 static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
69 if (!trace->type_be32 || !trace->remlen ||
70 trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4 ||
71 trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
72 trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
73 trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
78 fields = be32_to_cpu(trace->type_be32);
80 trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS)
81 * (sizeof(__be32) / 4);
82 trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS)
83 * (sizeof(__be64) / 4);
88 static int ioam6_build_state(struct net *net, struct nlattr *nla,
89 unsigned int family, const void *cfg,
90 struct lwtunnel_state **ts,
91 struct netlink_ext_ack *extack)
93 struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1];
94 struct ioam6_lwt_encap *tuninfo;
95 struct ioam6_trace_hdr *trace;
96 struct lwtunnel_state *lwt;
97 struct ioam6_lwt *ilwt;
101 if (family != AF_INET6)
104 err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla,
105 ioam6_iptunnel_policy, extack);
109 if (!tb[IOAM6_IPTUNNEL_MODE])
110 mode = IOAM6_IPTUNNEL_MODE_INLINE;
112 mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]);
114 if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) {
115 NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination");
119 if (!tb[IOAM6_IPTUNNEL_TRACE]) {
120 NL_SET_ERR_MSG(extack, "missing trace");
124 trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]);
125 if (!ioam6_validate_trace_hdr(trace)) {
126 NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE],
127 "invalid trace validation");
131 len_aligned = ALIGN(trace->remlen * 4, 8);
132 lwt = lwtunnel_state_alloc(sizeof(*ilwt) + len_aligned);
136 ilwt = ioam6_lwt_state(lwt);
137 err = dst_cache_init(&ilwt->cache, GFP_ATOMIC);
144 if (tb[IOAM6_IPTUNNEL_DST])
145 ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
147 tuninfo = ioam6_lwt_info(lwt);
148 tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1;
149 tuninfo->pad[0] = IPV6_TLV_PADN;
150 tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC;
151 tuninfo->ioamh.opt_type = IPV6_TLV_IOAM;
152 tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace)
155 memcpy(&tuninfo->traceh, trace, sizeof(*trace));
157 if (len_aligned - trace->remlen * 4) {
158 tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN;
159 tuninfo->traceh.data[trace->remlen * 4 + 1] = 2;
162 lwt->type = LWTUNNEL_ENCAP_IOAM6;
163 lwt->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
170 static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
172 struct ioam6_trace_hdr *trace;
173 struct ioam6_namespace *ns;
175 trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
176 + sizeof(struct ipv6_hopopt_hdr) + 2
177 + sizeof(struct ioam6_hdr));
179 ns = ioam6_namespace(net, trace->namespace_id);
181 ioam6_fill_trace_data(skb, ns, trace, false);
186 static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
187 struct ioam6_lwt_encap *tuninfo)
189 struct ipv6hdr *oldhdr, *hdr;
192 hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
194 err = skb_cow_head(skb, hdrlen + skb->mac_len);
198 oldhdr = ipv6_hdr(skb);
199 skb_pull(skb, sizeof(*oldhdr));
200 skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr));
202 skb_push(skb, sizeof(*oldhdr) + hdrlen);
203 skb_reset_network_header(skb);
204 skb_mac_header_rebuild(skb);
207 memmove(hdr, oldhdr, sizeof(*oldhdr));
208 tuninfo->eh.nexthdr = hdr->nexthdr;
210 skb_set_transport_header(skb, sizeof(*hdr));
211 skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen);
213 memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
215 hdr->nexthdr = NEXTHDR_HOP;
216 hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
218 return ioam6_do_fill(net, skb);
221 static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
222 struct ioam6_lwt_encap *tuninfo,
223 struct in6_addr *tundst)
225 struct dst_entry *dst = skb_dst(skb);
226 struct ipv6hdr *hdr, *inner_hdr;
227 int hdrlen, len, err;
229 hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
230 len = sizeof(*hdr) + hdrlen;
232 err = skb_cow_head(skb, len + skb->mac_len);
236 inner_hdr = ipv6_hdr(skb);
239 skb_reset_network_header(skb);
240 skb_mac_header_rebuild(skb);
241 skb_set_transport_header(skb, sizeof(*hdr));
243 tuninfo->eh.nexthdr = NEXTHDR_IPV6;
244 memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
247 memcpy(hdr, inner_hdr, sizeof(*hdr));
249 hdr->nexthdr = NEXTHDR_HOP;
250 hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
251 hdr->daddr = *tundst;
252 ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
253 IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
255 skb_postpush_rcsum(skb, hdr, len);
257 return ioam6_do_fill(net, skb);
260 static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
262 struct dst_entry *dst = skb_dst(skb);
263 struct in6_addr orig_daddr;
264 struct ioam6_lwt *ilwt;
267 if (skb->protocol != htons(ETH_P_IPV6))
270 ilwt = ioam6_lwt_state(dst->lwtstate);
271 orig_daddr = ipv6_hdr(skb)->daddr;
273 switch (ilwt->mode) {
274 case IOAM6_IPTUNNEL_MODE_INLINE:
276 /* Direct insertion - if there is no Hop-by-Hop yet */
277 if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
280 err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
285 case IOAM6_IPTUNNEL_MODE_ENCAP:
287 /* Encapsulation (ip6ip6) */
288 err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
293 case IOAM6_IPTUNNEL_MODE_AUTO:
294 /* Automatic (RFC8200 compliant):
295 * - local packets -> INLINE mode
296 * - in-transit packets -> ENCAP mode
306 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
310 if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
312 dst = dst_cache_get(&ilwt->cache);
315 if (unlikely(!dst)) {
316 struct ipv6hdr *hdr = ipv6_hdr(skb);
319 memset(&fl6, 0, sizeof(fl6));
320 fl6.daddr = hdr->daddr;
321 fl6.saddr = hdr->saddr;
322 fl6.flowlabel = ip6_flowinfo(hdr);
323 fl6.flowi6_mark = skb->mark;
324 fl6.flowi6_proto = hdr->nexthdr;
326 dst = ip6_route_output(net, NULL, &fl6);
334 dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
339 skb_dst_set(skb, dst);
341 return dst_output(net, sk, skb);
344 return dst->lwtstate->orig_output(net, sk, skb);
350 static void ioam6_destroy_state(struct lwtunnel_state *lwt)
352 dst_cache_destroy(&ioam6_lwt_state(lwt)->cache);
355 static int ioam6_fill_encap_info(struct sk_buff *skb,
356 struct lwtunnel_state *lwtstate)
358 struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
361 err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode);
365 if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
366 err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst);
371 err = nla_put(skb, IOAM6_IPTUNNEL_TRACE, sizeof(ilwt->tuninfo.traceh),
372 &ilwt->tuninfo.traceh);
377 static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
379 struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
382 nlsize = nla_total_size(sizeof(ilwt->mode)) +
383 nla_total_size(sizeof(ilwt->tuninfo.traceh));
385 if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
386 nlsize += nla_total_size(sizeof(ilwt->tundst));
391 static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
393 struct ioam6_trace_hdr *trace_a = ioam6_lwt_trace(a);
394 struct ioam6_trace_hdr *trace_b = ioam6_lwt_trace(b);
395 struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a);
396 struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b);
398 return (ilwt_a->mode != ilwt_b->mode ||
399 (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
400 !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
401 trace_a->namespace_id != trace_b->namespace_id);
404 static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
405 .build_state = ioam6_build_state,
406 .destroy_state = ioam6_destroy_state,
407 .output = ioam6_output,
408 .fill_encap = ioam6_fill_encap_info,
409 .get_encap_size = ioam6_encap_nlsize,
410 .cmp_encap = ioam6_encap_cmp,
411 .owner = THIS_MODULE,
414 int __init ioam6_iptunnel_init(void)
416 return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
419 void ioam6_iptunnel_exit(void)
421 lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);