1 // SPDX-License-Identifier: GPL-2.0
3 * Management Component Transport Protocol (MCTP) - routing
6 * This is currently based on a simple routing table, with no dst cache. The
7 * number of routes should stay fairly small, so the lookup cost is small.
9 * Copyright (c) 2021 Code Construct
10 * Copyright (c) 2021 Google
13 #include <linux/idr.h>
14 #include <linux/mctp.h>
15 #include <linux/netdevice.h>
16 #include <linux/rtnetlink.h>
17 #include <linux/skbuff.h>
19 #include <uapi/linux/if_arp.h>
22 #include <net/mctpdevice.h>
23 #include <net/netlink.h>
26 /* route output callbacks */
27 static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
33 static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
35 struct mctp_skb_cb *cb = mctp_cb(skb);
40 WARN_ON(!rcu_read_lock_held());
42 /* TODO: look up in skb->cb? */
45 if (!skb_headlen(skb))
48 type = (*(u8 *)skb->data) & 0x7f;
50 sk_for_each_rcu(sk, &net->mctp.binds) {
51 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
53 if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
56 if (msk->bind_type != type)
59 if (msk->bind_addr != MCTP_ADDR_ANY &&
60 msk->bind_addr != mh->dest)
69 static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
70 mctp_eid_t peer, u8 tag)
72 if (key->local_addr != local)
75 if (key->peer_addr != peer)
84 static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
87 struct mctp_sk_key *key, *ret;
91 WARN_ON(!rcu_read_lock_held());
94 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
98 hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) {
99 if (mctp_key_match(key, mh->dest, peer, tag)) {
108 static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
110 struct net *net = dev_net(skb->dev);
111 struct mctp_sk_key *key;
112 struct mctp_sock *msk;
117 /* we may be receiving a locally-routed packet; drop source sk
122 /* ensure we have enough data for a header and a type */
123 if (skb->len < sizeof(struct mctp_hdr) + 1)
126 /* grab header, advance data ptr */
128 skb_pull(skb, sizeof(struct mctp_hdr));
133 /* TODO: reassembly */
134 if ((mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM))
135 != (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM))
139 /* 1. lookup socket matching (src,dest,tag) */
140 key = mctp_lookup_key(net, skb, mh->src);
142 /* 2. lookup socket macthing (BCAST,dest,tag) */
144 key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
146 /* 3. SOM? -> lookup bound socket, conditionally (!EOM) create
147 * mapping for future (1)/(2).
150 msk = container_of(key->sk, struct mctp_sock, sk);
151 else if (!msk && (mh->flags_seq_tag & MCTP_HDR_FLAG_SOM))
152 msk = mctp_lookup_bind(net, skb);
157 sock_queue_rcv_skb(&msk->sk, skb);
170 static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
175 skb->protocol = htons(ETH_P_MCTP);
177 mtu = READ_ONCE(skb->dev->mtu);
178 if (skb->len > mtu) {
183 /* TODO: daddr (from rt->neigh), saddr (from device?) */
184 rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
185 NULL, NULL, skb->len);
188 return -EHOSTUNREACH;
191 rc = dev_queue_xmit(skb);
193 rc = net_xmit_errno(rc);
198 /* route alloc/release */
199 static void mctp_route_release(struct mctp_route *rt)
201 if (refcount_dec_and_test(&rt->refs)) {
202 dev_put(rt->dev->dev);
207 /* returns a route with the refcount at 1 */
208 static struct mctp_route *mctp_route_alloc(void)
210 struct mctp_route *rt;
212 rt = kzalloc(sizeof(*rt), GFP_KERNEL);
216 INIT_LIST_HEAD(&rt->list);
217 refcount_set(&rt->refs, 1);
218 rt->output = mctp_route_discard;
224 static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
225 struct mctp_sock *msk)
227 struct netns_mctp *mns = &net->mctp;
229 lockdep_assert_held(&mns->keys_lock);
233 /* we hold the net->key_lock here, allowing updates to both
236 hlist_add_head_rcu(&key->hlist, &mns->keys);
237 hlist_add_head_rcu(&key->sklist, &msk->keys);
240 /* Allocate a locally-owned tag value for (saddr, daddr), and reserve
241 * it for the socket msk
243 static int mctp_alloc_local_tag(struct mctp_sock *msk,
244 mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
246 struct net *net = sock_net(&msk->sk);
247 struct netns_mctp *mns = &net->mctp;
248 struct mctp_sk_key *key, *tmp;
253 /* be optimistic, alloc now */
254 key = kzalloc(sizeof(*key), GFP_KERNEL);
257 key->local_addr = saddr;
258 key->peer_addr = daddr;
260 /* 8 possible tag values */
263 spin_lock_irqsave(&mns->keys_lock, flags);
265 /* Walk through the existing keys, looking for potential conflicting
266 * tags. If we find a conflict, clear that bit from tagbits
268 hlist_for_each_entry(tmp, &mns->keys, hlist) {
269 /* if we don't own the tag, it can't conflict */
270 if (tmp->tag & MCTP_HDR_FLAG_TO)
273 if ((tmp->peer_addr == daddr ||
274 tmp->peer_addr == MCTP_ADDR_ANY) &&
275 tmp->local_addr == saddr)
276 tagbits &= ~(1 << tmp->tag);
283 key->tag = __ffs(tagbits);
284 mctp_reserve_tag(net, key, msk);
289 spin_unlock_irqrestore(&mns->keys_lock, flags);
297 /* routing lookups */
298 static bool mctp_rt_match_eid(struct mctp_route *rt,
299 unsigned int net, mctp_eid_t eid)
301 return READ_ONCE(rt->dev->net) == net &&
302 rt->min <= eid && rt->max >= eid;
305 /* compares match, used for duplicate prevention */
306 static bool mctp_rt_compare_exact(struct mctp_route *rt1,
307 struct mctp_route *rt2)
310 return rt1->dev->net == rt2->dev->net &&
311 rt1->min == rt2->min &&
312 rt1->max == rt2->max;
315 struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
318 struct mctp_route *tmp, *rt = NULL;
320 list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
321 /* TODO: add metrics */
322 if (mctp_rt_match_eid(tmp, dnet, daddr)) {
323 if (refcount_inc_not_zero(&tmp->refs)) {
333 /* sends a skb to rt and releases the route. */
334 int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
338 rc = rt->output(rt, skb);
339 mctp_route_release(rt);
343 int mctp_local_output(struct sock *sk, struct mctp_route *rt,
344 struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
346 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
347 struct mctp_skb_cb *cb = mctp_cb(skb);
348 struct mctp_hdr *hdr;
354 if (WARN_ON(!rt->dev))
357 spin_lock_irqsave(&rt->dev->addrs_lock, flags);
358 if (rt->dev->num_addrs == 0) {
361 /* use the outbound interface's first address as our source */
362 saddr = rt->dev->addrs[0];
365 spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
370 if (req_tag & MCTP_HDR_FLAG_TO) {
371 rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
374 tag |= MCTP_HDR_FLAG_TO;
379 /* TODO: we have the route MTU here; packetise */
381 skb_reset_transport_header(skb);
382 skb_push(skb, sizeof(struct mctp_hdr));
383 skb_reset_network_header(skb);
388 hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | /* TODO */
391 skb->dev = rt->dev->dev;
392 skb->protocol = htons(ETH_P_MCTP);
395 /* cb->net will have been set on initial ingress */
398 return mctp_do_route(rt, skb);
401 /* route management */
402 static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
403 unsigned int daddr_extent, unsigned int mtu,
406 struct net *net = dev_net(mdev->dev);
407 struct mctp_route *rt, *ert;
409 if (!mctp_address_ok(daddr_start))
412 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
415 rt = mctp_route_alloc();
419 rt->min = daddr_start;
420 rt->max = daddr_start + daddr_extent;
423 dev_hold(rt->dev->dev);
424 rt->output = is_local ? mctp_route_input : mctp_route_output;
427 /* Prevent duplicate identical routes. */
428 list_for_each_entry(ert, &net->mctp.routes, list) {
429 if (mctp_rt_compare_exact(rt, ert)) {
430 mctp_route_release(rt);
435 list_add_rcu(&rt->list, &net->mctp.routes);
440 static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
441 unsigned int daddr_extent)
443 struct net *net = dev_net(mdev->dev);
444 struct mctp_route *rt, *tmp;
445 mctp_eid_t daddr_end;
448 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
451 daddr_end = daddr_start + daddr_extent;
456 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
457 if (rt->dev == mdev &&
458 rt->min == daddr_start && rt->max == daddr_end) {
459 list_del_rcu(&rt->list);
460 /* TODO: immediate RTM_DELROUTE */
461 mctp_route_release(rt);
466 return dropped ? 0 : -ENOENT;
469 int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
471 return mctp_route_add(mdev, addr, 0, 0, true);
474 int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
476 return mctp_route_remove(mdev, addr, 0);
479 /* removes all entries for a given device */
480 void mctp_route_remove_dev(struct mctp_dev *mdev)
482 struct net *net = dev_net(mdev->dev);
483 struct mctp_route *rt, *tmp;
486 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
487 if (rt->dev == mdev) {
488 list_del_rcu(&rt->list);
489 /* TODO: immediate RTM_DELROUTE */
490 mctp_route_release(rt);
495 /* Incoming packet-handling */
497 static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
498 struct packet_type *pt,
499 struct net_device *orig_dev)
501 struct net *net = dev_net(dev);
502 struct mctp_skb_cb *cb;
503 struct mctp_route *rt;
506 /* basic non-data sanity checks */
507 if (dev->type != ARPHRD_MCTP)
510 if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
513 skb_reset_transport_header(skb);
514 skb_reset_network_header(skb);
516 /* We have enough for a header; decode and route */
518 if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
523 cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
526 rt = mctp_route_lookup(net, cb->net, mh->dest);
530 mctp_do_route(rt, skb);
532 return NET_RX_SUCCESS;
539 static struct packet_type mctp_packet_type = {
540 .type = cpu_to_be16(ETH_P_MCTP),
541 .func = mctp_pkttype_receive,
544 /* netlink interface */
546 static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
547 [RTA_DST] = { .type = NLA_U8 },
548 [RTA_METRICS] = { .type = NLA_NESTED },
549 [RTA_OIF] = { .type = NLA_U32 },
552 /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
553 * tb must hold RTA_MAX+1 elements.
555 static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
556 struct netlink_ext_ack *extack,
557 struct nlattr **tb, struct rtmsg **rtm,
558 struct mctp_dev **mdev, mctp_eid_t *daddr_start)
560 struct net *net = sock_net(skb->sk);
561 struct net_device *dev;
562 unsigned int ifindex;
565 rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
566 rta_mctp_policy, extack);
568 NL_SET_ERR_MSG(extack, "incorrect format");
573 NL_SET_ERR_MSG(extack, "dst EID missing");
576 *daddr_start = nla_get_u8(tb[RTA_DST]);
579 NL_SET_ERR_MSG(extack, "ifindex missing");
582 ifindex = nla_get_u32(tb[RTA_OIF]);
584 *rtm = nlmsg_data(nlh);
585 if ((*rtm)->rtm_family != AF_MCTP) {
586 NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
590 dev = __dev_get_by_index(net, ifindex);
592 NL_SET_ERR_MSG(extack, "bad ifindex");
595 *mdev = mctp_dev_get_rtnl(dev);
599 if (dev->flags & IFF_LOOPBACK) {
600 NL_SET_ERR_MSG(extack, "no routes to loopback");
607 static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
608 struct netlink_ext_ack *extack)
610 struct nlattr *tb[RTA_MAX + 1];
611 mctp_eid_t daddr_start;
612 struct mctp_dev *mdev;
617 rc = mctp_route_nlparse(skb, nlh, extack, tb,
618 &rtm, &mdev, &daddr_start);
622 if (rtm->rtm_type != RTN_UNICAST) {
623 NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
627 /* TODO: parse mtu from nlparse */
630 rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, false);
634 static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
635 struct netlink_ext_ack *extack)
637 struct nlattr *tb[RTA_MAX + 1];
638 mctp_eid_t daddr_start;
639 struct mctp_dev *mdev;
643 rc = mctp_route_nlparse(skb, nlh, extack, tb,
644 &rtm, &mdev, &daddr_start);
648 /* we only have unicast routes */
649 if (rtm->rtm_type != RTN_UNICAST)
652 rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len);
656 static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
657 u32 portid, u32 seq, int event, unsigned int flags)
659 struct nlmsghdr *nlh;
663 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
667 hdr = nlmsg_data(nlh);
668 hdr->rtm_family = AF_MCTP;
670 /* we use the _len fields as a number of EIDs, rather than
671 * a number of bits in the address
673 hdr->rtm_dst_len = rt->max - rt->min;
674 hdr->rtm_src_len = 0;
676 hdr->rtm_table = RT_TABLE_DEFAULT;
677 hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
678 hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
679 hdr->rtm_type = RTN_ANYCAST; /* TODO: type from route */
681 if (nla_put_u8(skb, RTA_DST, rt->min))
684 metrics = nla_nest_start_noflag(skb, RTA_METRICS);
689 if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
693 nla_nest_end(skb, metrics);
696 if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
700 /* TODO: conditional neighbour physaddr? */
707 nlmsg_cancel(skb, nlh);
711 static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
713 struct net *net = sock_net(skb->sk);
714 struct mctp_route *rt;
717 /* TODO: allow filtering on route data, possibly under
721 /* TODO: change to struct overlay */
726 list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
729 if (mctp_fill_rtinfo(skb, rt,
730 NETLINK_CB(cb->skb).portid,
732 RTM_NEWROUTE, NLM_F_MULTI) < 0)
742 /* net namespace implementation */
743 static int __net_init mctp_routes_net_init(struct net *net)
745 struct netns_mctp *ns = &net->mctp;
747 INIT_LIST_HEAD(&ns->routes);
748 INIT_HLIST_HEAD(&ns->binds);
749 mutex_init(&ns->bind_lock);
750 INIT_HLIST_HEAD(&ns->keys);
751 spin_lock_init(&ns->keys_lock);
755 static void __net_exit mctp_routes_net_exit(struct net *net)
757 struct mctp_route *rt;
759 list_for_each_entry_rcu(rt, &net->mctp.routes, list)
760 mctp_route_release(rt);
763 static struct pernet_operations mctp_net_ops = {
764 .init = mctp_routes_net_init,
765 .exit = mctp_routes_net_exit,
768 int __init mctp_routes_init(void)
770 dev_add_pack(&mctp_packet_type);
772 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
773 NULL, mctp_dump_rtinfo, 0);
774 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
775 mctp_newroute, NULL, 0);
776 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
777 mctp_delroute, NULL, 0);
779 return register_pernet_subsys(&mctp_net_ops);
782 void __exit mctp_routes_exit(void)
784 unregister_pernet_subsys(&mctp_net_ops);
785 rtnl_unregister(PF_MCTP, RTM_DELROUTE);
786 rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
787 rtnl_unregister(PF_MCTP, RTM_GETROUTE);
788 dev_remove_pack(&mctp_packet_type);