net/mlx5: Fix devlink objects and devlink device unregister sequence
authorParav Pandit <parav@mellanox.com>
Fri, 15 May 2020 07:44:06 +0000 (02:44 -0500)
committerSaeed Mahameed <saeedm@mellanox.com>
Thu, 11 Jun 2020 22:38:02 +0000 (15:38 -0700)
Current below problems exists.

1. devlink device is registered by mlx5_load_one(). But it is
not unregistered by mlx5_unload_one(). This is incorrect.

2. Above issue leads to,
When mlx5 PCI device is removed, currently devlink device is
unregistered before devlink ports are unregistered in below ladder
diagram.

remove_one()
  mlx5_devlink_unregister()
    [..]
    devlink_unregister() <- ports are still registered!
  mlx5_unload_one()
    mlx5_unregister_device()
      mlx5_remove_device()
        mlx5e_remove()
          mlx5e_devlink_port_unregister()
            devlink_port_unregister()

3. Condition checking for registering and unregister device are not
symmetric either in these routines.

Hence, fix the sequence by having load and unload routines symmetric
and in right order.
i.e.
(a) register devlink device followed by registering devlink ports
(b) unregister devlink ports followed by devlink device

Do this based on boot and cleanup flags instead of different
conditions.

Fixes: c6acd629eec7 ("net/mlx5e: Add support for devlink-port in non-representors mode")
Fixes: f60f315d339e ("net/mlx5e: Register devlink ports for physical link, PCI PF, VFs")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/main.c

index 2729afc..e786c5c 100644 (file)
@@ -1199,23 +1199,22 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
        if (err)
                goto err_load;
 
+       set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+
        if (boot) {
                err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
                if (err)
                        goto err_devlink_reg;
-       }
-
-       if (mlx5_device_registered(dev))
-               mlx5_attach_device(dev);
-       else
                mlx5_register_device(dev);
-
-       set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+       } else {
+               mlx5_attach_device(dev);
+       }
 
        mutex_unlock(&dev->intf_state_mutex);
        return 0;
 
 err_devlink_reg:
+       clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
        mlx5_unload(dev);
 err_load:
        if (boot)
@@ -1231,10 +1230,15 @@ out:
 
 void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
 {
-       if (cleanup)
+       mutex_lock(&dev->intf_state_mutex);
+
+       if (cleanup) {
                mlx5_unregister_device(dev);
+               mlx5_devlink_unregister(priv_to_devlink(dev));
+       } else {
+               mlx5_detach_device(dev);
+       }
 
-       mutex_lock(&dev->intf_state_mutex);
        if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
                mlx5_core_warn(dev, "%s: interface is down, NOP\n",
                               __func__);
@@ -1245,9 +1249,6 @@ void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
 
        clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
 
-       if (mlx5_device_registered(dev))
-               mlx5_detach_device(dev);
-
        mlx5_unload(dev);
 
        if (cleanup)
@@ -1387,8 +1388,6 @@ static void remove_one(struct pci_dev *pdev)
 
        devlink_reload_disable(devlink);
        mlx5_crdump_disable(dev);
-       mlx5_devlink_unregister(devlink);
-
        mlx5_drain_health_wq(dev);
        mlx5_unload_one(dev, true);
        mlx5_pci_close(dev);