Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[linux-2.6-microblaze.git] / net / bridge / br_switchdev.c
index ee84e78..7b41ee8 100644 (file)
@@ -595,21 +595,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
 }
 
 static int br_switchdev_mdb_queue_one(struct list_head *mdb_list,
+                                     struct net_device *dev,
+                                     unsigned long action,
                                      enum switchdev_obj_id id,
                                      const struct net_bridge_mdb_entry *mp,
                                      struct net_device *orig_dev)
 {
-       struct switchdev_obj_port_mdb *mdb;
+       struct switchdev_obj_port_mdb mdb = {
+               .obj = {
+                       .id = id,
+                       .orig_dev = orig_dev,
+               },
+       };
+       struct switchdev_obj_port_mdb *pmdb;
 
-       mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
-       if (!mdb)
-               return -ENOMEM;
+       br_switchdev_mdb_populate(&mdb, mp);
+
+       if (action == SWITCHDEV_PORT_OBJ_ADD &&
+           switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) {
+               /* This event is already in the deferred queue of
+                * events, so this replay must be elided, lest the
+                * driver receives duplicate events for it. This can
+                * only happen when replaying additions, since
+                * modifications are always immediately visible in
+                * br->mdb_list, whereas actual event delivery may be
+                * delayed.
+                */
+               return 0;
+       }
 
-       mdb->obj.id = id;
-       mdb->obj.orig_dev = orig_dev;
-       br_switchdev_mdb_populate(mdb, mp);
-       list_add_tail(&mdb->obj.list, mdb_list);
+       pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC);
+       if (!pmdb)
+               return -ENOMEM;
 
+       list_add_tail(&pmdb->obj.list, mdb_list);
        return 0;
 }
 
@@ -677,51 +696,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
        if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
                return 0;
 
-       /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
-        * because the write-side protection is br->multicast_lock. But we
-        * need to emulate the [ blocking ] calling context of a regular
-        * switchdev event, so since both br->multicast_lock and RCU read side
-        * critical sections are atomic, we have no choice but to pick the RCU
-        * read side lock, queue up all our events, leave the critical section
-        * and notify switchdev from blocking context.
+       if (adding)
+               action = SWITCHDEV_PORT_OBJ_ADD;
+       else
+               action = SWITCHDEV_PORT_OBJ_DEL;
+
+       /* br_switchdev_mdb_queue_one() will take care to not queue a
+        * replay of an event that is already pending in the switchdev
+        * deferred queue. In order to safely determine that, there
+        * must be no new deferred MDB notifications enqueued for the
+        * duration of the MDB scan. Therefore, grab the write-side
+        * lock to avoid racing with any concurrent IGMP/MLD snooping.
         */
-       rcu_read_lock();
+       spin_lock_bh(&br->multicast_lock);
 
-       hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
+       hlist_for_each_entry(mp, &br->mdb_list, mdb_node) {
                struct net_bridge_port_group __rcu * const *pp;
                const struct net_bridge_port_group *p;
 
                if (mp->host_joined) {
-                       err = br_switchdev_mdb_queue_one(&mdb_list,
+                       err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
                                                         SWITCHDEV_OBJ_ID_HOST_MDB,
                                                         mp, br_dev);
                        if (err) {
-                               rcu_read_unlock();
+                               spin_unlock_bh(&br->multicast_lock);
                                goto out_free_mdb;
                        }
                }
 
-               for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+               for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
                     pp = &p->next) {
                        if (p->key.port->dev != dev)
                                continue;
 
-                       err = br_switchdev_mdb_queue_one(&mdb_list,
+                       err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
                                                         SWITCHDEV_OBJ_ID_PORT_MDB,
                                                         mp, dev);
                        if (err) {
-                               rcu_read_unlock();
+                               spin_unlock_bh(&br->multicast_lock);
                                goto out_free_mdb;
                        }
                }
        }
 
-       rcu_read_unlock();
-
-       if (adding)
-               action = SWITCHDEV_PORT_OBJ_ADD;
-       else
-               action = SWITCHDEV_PORT_OBJ_DEL;
+       spin_unlock_bh(&br->multicast_lock);
 
        list_for_each_entry(obj, &mdb_list, list) {
                err = br_switchdev_mdb_replay_one(nb, dev,
@@ -786,6 +804,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
        br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
 
        br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL);
+
+       /* Make sure that the device leaving this bridge has seen all
+        * relevant events before it is disassociated. In the normal
+        * case, when the device is directly attached to the bridge,
+        * this is covered by del_nbp(). If the association was indirect
+        * however, e.g. via a team or bond, and the device is leaving
+        * that intermediate device, then the bridge port remains in
+        * place.
+        */
+       switchdev_deferred_process();
 }
 
 /* Let the bridge know that this port is offloaded, so that it can assign a