1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/list.h>
4 #include <linux/netdevice.h>
5 #include <linux/rtnetlink.h>
6 #include <linux/skbuff.h>
8 #include <net/switchdev.h>
10 #include "br_private.h"
12 static struct static_key_false br_switchdev_tx_fwd_offload;
14 static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p,
15 const struct sk_buff *skb)
17 if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
20 return (p->flags & BR_TX_FWD_OFFLOAD) &&
21 (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom);
24 bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
26 if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
29 return BR_INPUT_SKB_CB(skb)->tx_fwd_offload;
32 void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb)
34 skb->offload_fwd_mark = br_switchdev_frame_uses_tx_fwd_offload(skb);
37 /* Mark the frame for TX forwarding offload if this egress port supports it */
38 void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
41 if (nbp_switchdev_can_offload_tx_fwd(p, skb))
42 BR_INPUT_SKB_CB(skb)->tx_fwd_offload = true;
45 /* Lazily adds the hwdom of the egress bridge port to the bit mask of hwdoms
46 * that the skb has been already forwarded to, to avoid further cloning to
47 * other ports in the same hwdom by making nbp_switchdev_allowed_egress()
50 void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
53 if (nbp_switchdev_can_offload_tx_fwd(p, skb))
54 set_bit(p->hwdom, &BR_INPUT_SKB_CB(skb)->fwd_hwdoms);
57 void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
61 BR_INPUT_SKB_CB(skb)->src_hwdom = p->hwdom;
64 bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
65 const struct sk_buff *skb)
67 struct br_input_skb_cb *cb = BR_INPUT_SKB_CB(skb);
69 return !test_bit(p->hwdom, &cb->fwd_hwdoms) &&
70 (!skb->offload_fwd_mark || cb->src_hwdom != p->hwdom);
73 /* Flags that can be offloaded to hardware */
74 #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
75 BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED)
77 int br_switchdev_set_port_flag(struct net_bridge_port *p,
80 struct netlink_ext_ack *extack)
82 struct switchdev_attr attr = {
85 struct switchdev_notifier_port_attr_info info = {
90 mask &= BR_PORT_FLAGS_HW_OFFLOAD;
94 attr.id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS;
95 attr.u.brport_flags.val = flags;
96 attr.u.brport_flags.mask = mask;
98 /* We run from atomic context here */
99 err = call_switchdev_notifiers(SWITCHDEV_PORT_ATTR_SET, p->dev,
101 err = notifier_to_errno(err);
102 if (err == -EOPNOTSUPP)
106 if (extack && !extack->_msg)
107 NL_SET_ERR_MSG_MOD(extack,
108 "bridge flag offload is not supported");
112 attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS;
113 attr.flags = SWITCHDEV_F_DEFER;
115 err = switchdev_port_attr_set(p->dev, &attr, extack);
117 if (extack && !extack->_msg)
118 NL_SET_ERR_MSG_MOD(extack,
119 "error setting offload flag on port");
126 static void br_switchdev_fdb_populate(struct net_bridge *br,
127 struct switchdev_notifier_fdb_info *item,
128 const struct net_bridge_fdb_entry *fdb,
131 const struct net_bridge_port *p = READ_ONCE(fdb->dst);
133 item->addr = fdb->key.addr.addr;
134 item->vid = fdb->key.vlan_id;
135 item->added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
136 item->offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
137 item->is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
138 item->info.dev = (!p || item->is_local) ? br->dev : p->dev;
139 item->info.ctx = ctx;
143 br_switchdev_fdb_notify(struct net_bridge *br,
144 const struct net_bridge_fdb_entry *fdb, int type)
146 struct switchdev_notifier_fdb_info item;
148 br_switchdev_fdb_populate(br, &item, fdb, NULL);
152 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_DEVICE,
153 item.info.dev, &item.info, NULL);
156 call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_DEVICE,
157 item.info.dev, &item.info, NULL);
162 int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
163 bool changed, struct netlink_ext_ack *extack)
165 struct switchdev_obj_port_vlan v = {
167 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
173 return switchdev_port_obj_add(dev, &v.obj, extack);
176 int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
178 struct switchdev_obj_port_vlan v = {
180 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
184 return switchdev_port_obj_del(dev, &v.obj);
187 static int nbp_switchdev_hwdom_set(struct net_bridge_port *joining)
189 struct net_bridge *br = joining->br;
190 struct net_bridge_port *p;
193 /* joining is yet to be added to the port list. */
194 list_for_each_entry(p, &br->port_list, list) {
195 if (netdev_phys_item_id_same(&joining->ppid, &p->ppid)) {
196 joining->hwdom = p->hwdom;
201 hwdom = find_next_zero_bit(&br->busy_hwdoms, BR_HWDOM_MAX, 1);
202 if (hwdom >= BR_HWDOM_MAX)
205 set_bit(hwdom, &br->busy_hwdoms);
206 joining->hwdom = hwdom;
210 static void nbp_switchdev_hwdom_put(struct net_bridge_port *leaving)
212 struct net_bridge *br = leaving->br;
213 struct net_bridge_port *p;
215 /* leaving is no longer in the port list. */
216 list_for_each_entry(p, &br->port_list, list) {
217 if (p->hwdom == leaving->hwdom)
221 clear_bit(leaving->hwdom, &br->busy_hwdoms);
224 static int nbp_switchdev_add(struct net_bridge_port *p,
225 struct netdev_phys_item_id ppid,
227 struct netlink_ext_ack *extack)
231 if (p->offload_count) {
232 /* Prevent unsupported configurations such as a bridge port
233 * which is a bonding interface, and the member ports are from
234 * different hardware switches.
236 if (!netdev_phys_item_id_same(&p->ppid, &ppid)) {
237 NL_SET_ERR_MSG_MOD(extack,
238 "Same bridge port cannot be offloaded by two physical switches");
242 /* Tolerate drivers that call switchdev_bridge_port_offload()
243 * more than once for the same bridge port, such as when the
244 * bridge port is an offloaded bonding/team interface.
252 p->offload_count = 1;
254 err = nbp_switchdev_hwdom_set(p);
258 if (tx_fwd_offload) {
259 p->flags |= BR_TX_FWD_OFFLOAD;
260 static_branch_inc(&br_switchdev_tx_fwd_offload);
266 static void nbp_switchdev_del(struct net_bridge_port *p)
268 if (WARN_ON(!p->offload_count))
273 if (p->offload_count)
277 nbp_switchdev_hwdom_put(p);
279 if (p->flags & BR_TX_FWD_OFFLOAD) {
280 p->flags &= ~BR_TX_FWD_OFFLOAD;
281 static_branch_dec(&br_switchdev_tx_fwd_offload);
286 br_switchdev_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb,
287 const struct net_bridge_fdb_entry *fdb,
288 unsigned long action, const void *ctx)
290 struct switchdev_notifier_fdb_info item;
293 br_switchdev_fdb_populate(br, &item, fdb, ctx);
295 err = nb->notifier_call(nb, action, &item);
296 return notifier_to_errno(err);
300 br_switchdev_fdb_replay(const struct net_device *br_dev, const void *ctx,
301 bool adding, struct notifier_block *nb)
303 struct net_bridge_fdb_entry *fdb;
304 struct net_bridge *br;
305 unsigned long action;
311 if (!netif_is_bridge_master(br_dev))
314 br = netdev_priv(br_dev);
317 action = SWITCHDEV_FDB_ADD_TO_DEVICE;
319 action = SWITCHDEV_FDB_DEL_TO_DEVICE;
323 hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
324 err = br_switchdev_fdb_replay_one(br, nb, fdb, action, ctx);
334 static int br_switchdev_vlan_attr_replay(struct net_device *br_dev,
336 struct notifier_block *nb,
337 struct netlink_ext_ack *extack)
339 struct switchdev_notifier_port_attr_info attr_info = {
346 struct net_bridge *br = netdev_priv(br_dev);
347 struct net_bridge_vlan_group *vg;
348 struct switchdev_attr attr;
349 struct net_bridge_vlan *v;
352 attr_info.attr = &attr;
353 attr.orig_dev = br_dev;
355 vg = br_vlan_group(br);
359 list_for_each_entry(v, &vg->vlan_list, vlist) {
361 attr.id = SWITCHDEV_ATTR_ID_VLAN_MSTI;
362 attr.u.vlan_msti.vid = v->vid;
363 attr.u.vlan_msti.msti = v->msti;
365 err = nb->notifier_call(nb, SWITCHDEV_PORT_ATTR_SET,
367 err = notifier_to_errno(err);
377 br_switchdev_vlan_replay_one(struct notifier_block *nb,
378 struct net_device *dev,
379 struct switchdev_obj_port_vlan *vlan,
380 const void *ctx, unsigned long action,
381 struct netlink_ext_ack *extack)
383 struct switchdev_notifier_port_obj_info obj_info = {
393 err = nb->notifier_call(nb, action, &obj_info);
394 return notifier_to_errno(err);
397 static int br_switchdev_vlan_replay_group(struct notifier_block *nb,
398 struct net_device *dev,
399 struct net_bridge_vlan_group *vg,
400 const void *ctx, unsigned long action,
401 struct netlink_ext_ack *extack)
403 struct net_bridge_vlan *v;
410 pvid = br_get_pvid(vg);
412 list_for_each_entry(v, &vg->vlan_list, vlist) {
413 struct switchdev_obj_port_vlan vlan = {
415 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
416 .flags = br_vlan_flags(v, pvid),
420 if (!br_vlan_should_use(v))
423 err = br_switchdev_vlan_replay_one(nb, dev, &vlan, ctx,
432 static int br_switchdev_vlan_replay(struct net_device *br_dev,
433 const void *ctx, bool adding,
434 struct notifier_block *nb,
435 struct netlink_ext_ack *extack)
437 struct net_bridge *br = netdev_priv(br_dev);
438 struct net_bridge_port *p;
439 unsigned long action;
447 if (!netif_is_bridge_master(br_dev))
451 action = SWITCHDEV_PORT_OBJ_ADD;
453 action = SWITCHDEV_PORT_OBJ_DEL;
455 err = br_switchdev_vlan_replay_group(nb, br_dev, br_vlan_group(br),
456 ctx, action, extack);
460 list_for_each_entry(p, &br->port_list, list) {
461 struct net_device *dev = p->dev;
463 err = br_switchdev_vlan_replay_group(nb, dev,
465 ctx, action, extack);
471 err = br_switchdev_vlan_attr_replay(br_dev, ctx, nb, extack);
479 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
480 struct br_switchdev_mdb_complete_info {
481 struct net_bridge_port *port;
485 static void br_switchdev_mdb_complete(struct net_device *dev, int err, void *priv)
487 struct br_switchdev_mdb_complete_info *data = priv;
488 struct net_bridge_port_group __rcu **pp;
489 struct net_bridge_port_group *p;
490 struct net_bridge_mdb_entry *mp;
491 struct net_bridge_port *port = data->port;
492 struct net_bridge *br = port->br;
497 spin_lock_bh(&br->multicast_lock);
498 mp = br_mdb_ip_get(br, &data->ip);
501 for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
503 if (p->key.port != port)
505 p->flags |= MDB_PG_FLAGS_OFFLOAD;
508 spin_unlock_bh(&br->multicast_lock);
513 static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb,
514 const struct net_bridge_mdb_entry *mp)
516 if (mp->addr.proto == htons(ETH_P_IP))
517 ip_eth_mc_map(mp->addr.dst.ip4, mdb->addr);
518 #if IS_ENABLED(CONFIG_IPV6)
519 else if (mp->addr.proto == htons(ETH_P_IPV6))
520 ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb->addr);
523 ether_addr_copy(mdb->addr, mp->addr.dst.mac_addr);
525 mdb->vid = mp->addr.vid;
528 static void br_switchdev_host_mdb_one(struct net_device *dev,
529 struct net_device *lower_dev,
530 struct net_bridge_mdb_entry *mp,
533 struct switchdev_obj_port_mdb mdb = {
535 .id = SWITCHDEV_OBJ_ID_HOST_MDB,
536 .flags = SWITCHDEV_F_DEFER,
541 br_switchdev_mdb_populate(&mdb, mp);
545 switchdev_port_obj_add(lower_dev, &mdb.obj, NULL);
548 switchdev_port_obj_del(lower_dev, &mdb.obj);
553 static void br_switchdev_host_mdb(struct net_device *dev,
554 struct net_bridge_mdb_entry *mp, int type)
556 struct net_device *lower_dev;
557 struct list_head *iter;
559 netdev_for_each_lower_dev(dev, lower_dev, iter)
560 br_switchdev_host_mdb_one(dev, lower_dev, mp, type);
564 br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
565 const struct switchdev_obj_port_mdb *mdb,
566 unsigned long action, const void *ctx,
567 struct netlink_ext_ack *extack)
569 struct switchdev_notifier_port_obj_info obj_info = {
579 err = nb->notifier_call(nb, action, &obj_info);
580 return notifier_to_errno(err);
583 static int br_switchdev_mdb_queue_one(struct list_head *mdb_list,
584 enum switchdev_obj_id id,
585 const struct net_bridge_mdb_entry *mp,
586 struct net_device *orig_dev)
588 struct switchdev_obj_port_mdb *mdb;
590 mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
595 mdb->obj.orig_dev = orig_dev;
596 br_switchdev_mdb_populate(mdb, mp);
597 list_add_tail(&mdb->obj.list, mdb_list);
602 void br_switchdev_mdb_notify(struct net_device *dev,
603 struct net_bridge_mdb_entry *mp,
604 struct net_bridge_port_group *pg,
607 struct br_switchdev_mdb_complete_info *complete_info;
608 struct switchdev_obj_port_mdb mdb = {
610 .id = SWITCHDEV_OBJ_ID_PORT_MDB,
611 .flags = SWITCHDEV_F_DEFER,
616 return br_switchdev_host_mdb(dev, mp, type);
618 br_switchdev_mdb_populate(&mdb, mp);
620 mdb.obj.orig_dev = pg->key.port->dev;
623 complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
626 complete_info->port = pg->key.port;
627 complete_info->ip = mp->addr;
628 mdb.obj.complete_priv = complete_info;
629 mdb.obj.complete = br_switchdev_mdb_complete;
630 if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL))
631 kfree(complete_info);
634 switchdev_port_obj_del(pg->key.port->dev, &mdb.obj);
641 br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
642 const void *ctx, bool adding, struct notifier_block *nb,
643 struct netlink_ext_ack *extack)
645 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
646 const struct net_bridge_mdb_entry *mp;
647 struct switchdev_obj *obj, *tmp;
648 struct net_bridge *br;
649 unsigned long action;
658 if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
661 br = netdev_priv(br_dev);
663 if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
666 /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
667 * because the write-side protection is br->multicast_lock. But we
668 * need to emulate the [ blocking ] calling context of a regular
669 * switchdev event, so since both br->multicast_lock and RCU read side
670 * critical sections are atomic, we have no choice but to pick the RCU
671 * read side lock, queue up all our events, leave the critical section
672 * and notify switchdev from blocking context.
676 hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
677 struct net_bridge_port_group __rcu * const *pp;
678 const struct net_bridge_port_group *p;
680 if (mp->host_joined) {
681 err = br_switchdev_mdb_queue_one(&mdb_list,
682 SWITCHDEV_OBJ_ID_HOST_MDB,
690 for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
692 if (p->key.port->dev != dev)
695 err = br_switchdev_mdb_queue_one(&mdb_list,
696 SWITCHDEV_OBJ_ID_PORT_MDB,
708 action = SWITCHDEV_PORT_OBJ_ADD;
710 action = SWITCHDEV_PORT_OBJ_DEL;
712 list_for_each_entry(obj, &mdb_list, list) {
713 err = br_switchdev_mdb_replay_one(nb, dev,
714 SWITCHDEV_OBJ_PORT_MDB(obj),
715 action, ctx, extack);
721 list_for_each_entry_safe(obj, tmp, &mdb_list, list) {
722 list_del(&obj->list);
723 kfree(SWITCHDEV_OBJ_PORT_MDB(obj));
733 static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
734 struct notifier_block *atomic_nb,
735 struct notifier_block *blocking_nb,
736 struct netlink_ext_ack *extack)
738 struct net_device *br_dev = p->br->dev;
739 struct net_device *dev = p->dev;
742 err = br_switchdev_vlan_replay(br_dev, ctx, true, blocking_nb, extack);
743 if (err && err != -EOPNOTSUPP)
746 err = br_switchdev_mdb_replay(br_dev, dev, ctx, true, blocking_nb,
748 if (err && err != -EOPNOTSUPP)
751 err = br_switchdev_fdb_replay(br_dev, ctx, true, atomic_nb);
752 if (err && err != -EOPNOTSUPP)
758 static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
760 struct notifier_block *atomic_nb,
761 struct notifier_block *blocking_nb)
763 struct net_device *br_dev = p->br->dev;
764 struct net_device *dev = p->dev;
766 br_switchdev_fdb_replay(br_dev, ctx, false, atomic_nb);
768 br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
770 br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL);
773 /* Let the bridge know that this port is offloaded, so that it can assign a
774 * switchdev hardware domain to it.
776 int br_switchdev_port_offload(struct net_bridge_port *p,
777 struct net_device *dev, const void *ctx,
778 struct notifier_block *atomic_nb,
779 struct notifier_block *blocking_nb,
781 struct netlink_ext_ack *extack)
783 struct netdev_phys_item_id ppid;
786 err = dev_get_port_parent_id(dev, &ppid, false);
790 err = nbp_switchdev_add(p, ppid, tx_fwd_offload, extack);
794 err = nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack);
796 goto out_switchdev_del;
801 nbp_switchdev_del(p);
806 void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
807 struct notifier_block *atomic_nb,
808 struct notifier_block *blocking_nb)
810 nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb);
812 nbp_switchdev_del(p);