Merge tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache
[linux-2.6-microblaze.git] / net / ipv6 / ioam6_iptunnel.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  *  IPv6 IOAM Lightweight Tunnel implementation
4  *
5  *  Author:
6  *  Justin Iurman <justin.iurman@uliege.be>
7  */
8
9 #include <linux/kernel.h>
10 #include <linux/skbuff.h>
11 #include <linux/net.h>
12 #include <linux/in6.h>
13 #include <linux/ioam6.h>
14 #include <linux/ioam6_iptunnel.h>
15 #include <net/dst.h>
16 #include <net/sock.h>
17 #include <net/lwtunnel.h>
18 #include <net/ioam6.h>
19 #include <net/netlink.h>
20 #include <net/ipv6.h>
21 #include <net/dst_cache.h>
22 #include <net/ip6_route.h>
23 #include <net/addrconf.h>
24
25 #define IOAM6_MASK_SHORT_FIELDS 0xff100000
26 #define IOAM6_MASK_WIDE_FIELDS 0xe00000
27
28 struct ioam6_lwt_encap {
29         struct ipv6_hopopt_hdr eh;
30         u8 pad[2];                      /* 2-octet padding for 4n-alignment */
31         struct ioam6_hdr ioamh;
32         struct ioam6_trace_hdr traceh;
33 } __packed;
34
35 struct ioam6_lwt {
36         struct dst_cache cache;
37         u8 mode;
38         struct in6_addr tundst;
39         struct ioam6_lwt_encap  tuninfo;
40 };
41
42 static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
43 {
44         return (struct ioam6_lwt *)lwt->data;
45 }
46
47 static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt)
48 {
49         return &ioam6_lwt_state(lwt)->tuninfo;
50 }
51
52 static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt)
53 {
54         return &(ioam6_lwt_state(lwt)->tuninfo.traceh);
55 }
56
57 static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
58         [IOAM6_IPTUNNEL_MODE]   = NLA_POLICY_RANGE(NLA_U8,
59                                                    IOAM6_IPTUNNEL_MODE_MIN,
60                                                    IOAM6_IPTUNNEL_MODE_MAX),
61         [IOAM6_IPTUNNEL_DST]    = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
62         [IOAM6_IPTUNNEL_TRACE]  = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
63 };
64
65 static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
66 {
67         u32 fields;
68
69         if (!trace->type_be32 || !trace->remlen ||
70             trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4 ||
71             trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
72             trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
73             trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
74             trace->type.bit21)
75                 return false;
76
77         trace->nodelen = 0;
78         fields = be32_to_cpu(trace->type_be32);
79
80         trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS)
81                                 * (sizeof(__be32) / 4);
82         trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS)
83                                 * (sizeof(__be64) / 4);
84
85         return true;
86 }
87
88 static int ioam6_build_state(struct net *net, struct nlattr *nla,
89                              unsigned int family, const void *cfg,
90                              struct lwtunnel_state **ts,
91                              struct netlink_ext_ack *extack)
92 {
93         struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1];
94         struct ioam6_lwt_encap *tuninfo;
95         struct ioam6_trace_hdr *trace;
96         struct lwtunnel_state *lwt;
97         struct ioam6_lwt *ilwt;
98         int len_aligned, err;
99         u8 mode;
100
101         if (family != AF_INET6)
102                 return -EINVAL;
103
104         err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla,
105                                ioam6_iptunnel_policy, extack);
106         if (err < 0)
107                 return err;
108
109         if (!tb[IOAM6_IPTUNNEL_MODE])
110                 mode = IOAM6_IPTUNNEL_MODE_INLINE;
111         else
112                 mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]);
113
114         if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) {
115                 NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination");
116                 return -EINVAL;
117         }
118
119         if (!tb[IOAM6_IPTUNNEL_TRACE]) {
120                 NL_SET_ERR_MSG(extack, "missing trace");
121                 return -EINVAL;
122         }
123
124         trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]);
125         if (!ioam6_validate_trace_hdr(trace)) {
126                 NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE],
127                                     "invalid trace validation");
128                 return -EINVAL;
129         }
130
131         len_aligned = ALIGN(trace->remlen * 4, 8);
132         lwt = lwtunnel_state_alloc(sizeof(*ilwt) + len_aligned);
133         if (!lwt)
134                 return -ENOMEM;
135
136         ilwt = ioam6_lwt_state(lwt);
137         err = dst_cache_init(&ilwt->cache, GFP_ATOMIC);
138         if (err) {
139                 kfree(lwt);
140                 return err;
141         }
142
143         ilwt->mode = mode;
144         if (tb[IOAM6_IPTUNNEL_DST])
145                 ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
146
147         tuninfo = ioam6_lwt_info(lwt);
148         tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1;
149         tuninfo->pad[0] = IPV6_TLV_PADN;
150         tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC;
151         tuninfo->ioamh.opt_type = IPV6_TLV_IOAM;
152         tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace)
153                                         + trace->remlen * 4;
154
155         memcpy(&tuninfo->traceh, trace, sizeof(*trace));
156
157         if (len_aligned - trace->remlen * 4) {
158                 tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN;
159                 tuninfo->traceh.data[trace->remlen * 4 + 1] = 2;
160         }
161
162         lwt->type = LWTUNNEL_ENCAP_IOAM6;
163         lwt->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
164
165         *ts = lwt;
166
167         return 0;
168 }
169
170 static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
171 {
172         struct ioam6_trace_hdr *trace;
173         struct ioam6_namespace *ns;
174
175         trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
176                                            + sizeof(struct ipv6_hopopt_hdr) + 2
177                                            + sizeof(struct ioam6_hdr));
178
179         ns = ioam6_namespace(net, trace->namespace_id);
180         if (ns)
181                 ioam6_fill_trace_data(skb, ns, trace, false);
182
183         return 0;
184 }
185
186 static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
187                            struct ioam6_lwt_encap *tuninfo)
188 {
189         struct ipv6hdr *oldhdr, *hdr;
190         int hdrlen, err;
191
192         hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
193
194         err = skb_cow_head(skb, hdrlen + skb->mac_len);
195         if (unlikely(err))
196                 return err;
197
198         oldhdr = ipv6_hdr(skb);
199         skb_pull(skb, sizeof(*oldhdr));
200         skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr));
201
202         skb_push(skb, sizeof(*oldhdr) + hdrlen);
203         skb_reset_network_header(skb);
204         skb_mac_header_rebuild(skb);
205
206         hdr = ipv6_hdr(skb);
207         memmove(hdr, oldhdr, sizeof(*oldhdr));
208         tuninfo->eh.nexthdr = hdr->nexthdr;
209
210         skb_set_transport_header(skb, sizeof(*hdr));
211         skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen);
212
213         memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
214
215         hdr->nexthdr = NEXTHDR_HOP;
216         hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
217
218         return ioam6_do_fill(net, skb);
219 }
220
221 static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
222                           struct ioam6_lwt_encap *tuninfo,
223                           struct in6_addr *tundst)
224 {
225         struct dst_entry *dst = skb_dst(skb);
226         struct ipv6hdr *hdr, *inner_hdr;
227         int hdrlen, len, err;
228
229         hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
230         len = sizeof(*hdr) + hdrlen;
231
232         err = skb_cow_head(skb, len + skb->mac_len);
233         if (unlikely(err))
234                 return err;
235
236         inner_hdr = ipv6_hdr(skb);
237
238         skb_push(skb, len);
239         skb_reset_network_header(skb);
240         skb_mac_header_rebuild(skb);
241         skb_set_transport_header(skb, sizeof(*hdr));
242
243         tuninfo->eh.nexthdr = NEXTHDR_IPV6;
244         memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
245
246         hdr = ipv6_hdr(skb);
247         memcpy(hdr, inner_hdr, sizeof(*hdr));
248
249         hdr->nexthdr = NEXTHDR_HOP;
250         hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
251         hdr->daddr = *tundst;
252         ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
253                            IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
254
255         skb_postpush_rcsum(skb, hdr, len);
256
257         return ioam6_do_fill(net, skb);
258 }
259
260 static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
261 {
262         struct dst_entry *dst = skb_dst(skb);
263         struct in6_addr orig_daddr;
264         struct ioam6_lwt *ilwt;
265         int err = -EINVAL;
266
267         if (skb->protocol != htons(ETH_P_IPV6))
268                 goto drop;
269
270         ilwt = ioam6_lwt_state(dst->lwtstate);
271         orig_daddr = ipv6_hdr(skb)->daddr;
272
273         switch (ilwt->mode) {
274         case IOAM6_IPTUNNEL_MODE_INLINE:
275 do_inline:
276                 /* Direct insertion - if there is no Hop-by-Hop yet */
277                 if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
278                         goto out;
279
280                 err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
281                 if (unlikely(err))
282                         goto drop;
283
284                 break;
285         case IOAM6_IPTUNNEL_MODE_ENCAP:
286 do_encap:
287                 /* Encapsulation (ip6ip6) */
288                 err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
289                 if (unlikely(err))
290                         goto drop;
291
292                 break;
293         case IOAM6_IPTUNNEL_MODE_AUTO:
294                 /* Automatic (RFC8200 compliant):
295                  *  - local packets -> INLINE mode
296                  *  - in-transit packets -> ENCAP mode
297                  */
298                 if (!skb->dev)
299                         goto do_inline;
300
301                 goto do_encap;
302         default:
303                 goto drop;
304         }
305
306         err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
307         if (unlikely(err))
308                 goto drop;
309
310         if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
311                 preempt_disable();
312                 dst = dst_cache_get(&ilwt->cache);
313                 preempt_enable();
314
315                 if (unlikely(!dst)) {
316                         struct ipv6hdr *hdr = ipv6_hdr(skb);
317                         struct flowi6 fl6;
318
319                         memset(&fl6, 0, sizeof(fl6));
320                         fl6.daddr = hdr->daddr;
321                         fl6.saddr = hdr->saddr;
322                         fl6.flowlabel = ip6_flowinfo(hdr);
323                         fl6.flowi6_mark = skb->mark;
324                         fl6.flowi6_proto = hdr->nexthdr;
325
326                         dst = ip6_route_output(net, NULL, &fl6);
327                         if (dst->error) {
328                                 err = dst->error;
329                                 dst_release(dst);
330                                 goto drop;
331                         }
332
333                         preempt_disable();
334                         dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
335                         preempt_enable();
336                 }
337
338                 skb_dst_drop(skb);
339                 skb_dst_set(skb, dst);
340
341                 return dst_output(net, sk, skb);
342         }
343 out:
344         return dst->lwtstate->orig_output(net, sk, skb);
345 drop:
346         kfree_skb(skb);
347         return err;
348 }
349
350 static void ioam6_destroy_state(struct lwtunnel_state *lwt)
351 {
352         dst_cache_destroy(&ioam6_lwt_state(lwt)->cache);
353 }
354
355 static int ioam6_fill_encap_info(struct sk_buff *skb,
356                                  struct lwtunnel_state *lwtstate)
357 {
358         struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
359         int err;
360
361         err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode);
362         if (err)
363                 goto ret;
364
365         if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
366                 err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst);
367                 if (err)
368                         goto ret;
369         }
370
371         err = nla_put(skb, IOAM6_IPTUNNEL_TRACE, sizeof(ilwt->tuninfo.traceh),
372                       &ilwt->tuninfo.traceh);
373 ret:
374         return err;
375 }
376
377 static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
378 {
379         struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
380         int nlsize;
381
382         nlsize = nla_total_size(sizeof(ilwt->mode)) +
383                   nla_total_size(sizeof(ilwt->tuninfo.traceh));
384
385         if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
386                 nlsize += nla_total_size(sizeof(ilwt->tundst));
387
388         return nlsize;
389 }
390
391 static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
392 {
393         struct ioam6_trace_hdr *trace_a = ioam6_lwt_trace(a);
394         struct ioam6_trace_hdr *trace_b = ioam6_lwt_trace(b);
395         struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a);
396         struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b);
397
398         return (ilwt_a->mode != ilwt_b->mode ||
399                 (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
400                  !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
401                 trace_a->namespace_id != trace_b->namespace_id);
402 }
403
404 static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
405         .build_state            = ioam6_build_state,
406         .destroy_state          = ioam6_destroy_state,
407         .output         = ioam6_output,
408         .fill_encap             = ioam6_fill_encap_info,
409         .get_encap_size = ioam6_encap_nlsize,
410         .cmp_encap              = ioam6_encap_cmp,
411         .owner                  = THIS_MODULE,
412 };
413
414 int __init ioam6_iptunnel_init(void)
415 {
416         return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
417 }
418
419 void ioam6_iptunnel_exit(void)
420 {
421         lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
422 }