net/mlx5: Add support for sync reset using hot reset
authorMoshe Shemesh <moshe@nvidia.com>
Wed, 11 Sep 2024 20:17:52 +0000 (13:17 -0700)
committerJakub Kicinski <kuba@kernel.org>
Fri, 13 Sep 2024 03:50:29 +0000 (20:50 -0700)
On device that supports sync reset for firmware activate using hot
reset, the driver queries the required reset method while handling the
sync reset request. If the required reset method is hot reset, the
driver will use pci_reset_bus() to reset the PCI link instead of the
link toggle.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://patch.msgid.link/20240911201757.1505453-11-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
drivers/net/ethernet/mellanox/mlx5/core/main.c

index b43ca0b..bda74cb 100644 (file)
@@ -26,6 +26,7 @@ struct mlx5_fw_reset {
        struct work_struct reset_now_work;
        struct work_struct reset_abort_work;
        unsigned long reset_flags;
+       u8 reset_method;
        struct timer_list timer;
        struct completion done;
        int ret;
@@ -95,7 +96,7 @@ static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
 }
 
 static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
-                              u8 *reset_type, u8 *reset_state)
+                              u8 *reset_type, u8 *reset_state, u8 *reset_method)
 {
        u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
        u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
@@ -111,13 +112,26 @@ static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
                *reset_type = MLX5_GET(mfrl_reg, out, reset_type);
        if (reset_state)
                *reset_state = MLX5_GET(mfrl_reg, out, reset_state);
+       if (reset_method)
+               *reset_method = MLX5_GET(mfrl_reg, out, pci_reset_req_method);
 
        return 0;
 }
 
 int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
 {
-       return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL);
+       return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL, NULL);
+}
+
+static int mlx5_fw_reset_get_reset_method(struct mlx5_core_dev *dev,
+                                         u8 *reset_method)
+{
+       if (!MLX5_CAP_GEN(dev, pcie_reset_using_hotreset_method)) {
+               *reset_method = MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE;
+               return 0;
+       }
+
+       return mlx5_reg_mfrl_query(dev, NULL, NULL, NULL, reset_method);
 }
 
 static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
@@ -125,7 +139,7 @@ static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
 {
        u8 reset_state;
 
-       if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state))
+       if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state, NULL))
                goto out;
 
        if (!reset_state)
@@ -427,7 +441,11 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
        struct mlx5_core_dev *dev = fw_reset->dev;
        int err;
 
-       if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) ||
+       err = mlx5_fw_reset_get_reset_method(dev, &fw_reset->reset_method);
+       if (err)
+               mlx5_core_warn(dev, "Failed reading MFRL, err %d\n", err);
+
+       if (err || test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) ||
            !mlx5_is_reset_now_capable(dev)) {
                err = mlx5_fw_reset_set_reset_sync_nack(dev);
                mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s",
@@ -444,21 +462,15 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
                mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
 }
 
-static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
+static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev, u16 dev_id)
 {
        struct pci_bus *bridge_bus = dev->pdev->bus;
        struct pci_dev *bridge = bridge_bus->self;
        unsigned long timeout;
        struct pci_dev *sdev;
-       u16 reg16, dev_id;
        int cap, err;
+       u16 reg16;
 
-       err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
-       if (err)
-               return pcibios_err_to_errno(err);
-       err = mlx5_check_dev_ids(dev, dev_id);
-       if (err)
-               return err;
        cap = pci_find_capability(bridge, PCI_CAP_ID_EXP);
        if (!cap)
                return -EOPNOTSUPP;
@@ -528,6 +540,44 @@ restore:
        return err;
 }
 
+static int mlx5_pci_reset_bus(struct mlx5_core_dev *dev)
+{
+       if (!MLX5_CAP_GEN(dev, pcie_reset_using_hotreset_method))
+               return -EOPNOTSUPP;
+
+       return pci_reset_bus(dev->pdev);
+}
+
+static int mlx5_sync_pci_reset(struct mlx5_core_dev *dev, u8 reset_method)
+{
+       u16 dev_id;
+       int err;
+
+       err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
+       if (err)
+               return pcibios_err_to_errno(err);
+       err = mlx5_check_dev_ids(dev, dev_id);
+       if (err)
+               return err;
+
+       switch (reset_method) {
+       case MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE:
+               err = mlx5_pci_link_toggle(dev, dev_id);
+               if (err)
+                       mlx5_core_warn(dev, "mlx5_pci_link_toggle failed\n");
+               break;
+       case MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET:
+               err = mlx5_pci_reset_bus(dev);
+               if (err)
+                       mlx5_core_warn(dev, "mlx5_pci_reset_bus failed\n");
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return err;
+}
+
 static void mlx5_sync_reset_now_event(struct work_struct *work)
 {
        struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
@@ -546,9 +596,9 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
                goto done;
        }
 
-       err = mlx5_pci_link_toggle(dev);
+       err = mlx5_sync_pci_reset(dev, fw_reset->reset_method);
        if (err) {
-               mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
+               mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err);
                set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags);
        }
 
@@ -610,9 +660,9 @@ static void mlx5_sync_reset_unload_event(struct work_struct *work)
 
        mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state);
        if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) {
-               err = mlx5_pci_link_toggle(dev);
+               err = mlx5_sync_pci_reset(dev, fw_reset->reset_method);
                if (err) {
-                       mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, err %d\n", err);
+                       mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", err);
                        fw_reset->ret = err;
                }
        }
index c6e951b..8b0abd6 100644 (file)
@@ -619,6 +619,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
        if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload))
                MLX5_SET(cmd_hca_cap, set_hca_cap,
                         pci_sync_for_fw_update_with_driver_unload, 1);
+       if (MLX5_CAP_GEN_MAX(dev, pcie_reset_using_hotreset_method))
+               MLX5_SET(cmd_hca_cap, set_hca_cap,
+                        pcie_reset_using_hotreset_method, 1);
 
        if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
                MLX5_SET(cmd_hca_cap,