qed: prevent a fw assert during device shutdown
authorVenkata Sudheer Kumar Bhavaraju <vbhavaraju@marvell.com>
Wed, 9 Feb 2022 19:28:14 +0000 (11:28 -0800)
committerDavid S. Miller <davem@davemloft.net>
Thu, 10 Feb 2022 15:27:44 +0000 (15:27 +0000)
Device firmware can assert if the device shutdown path in driver
encounters an async. events from mfw (processed in
qed_mcp_handle_events()) after qed_mcp_unload_req() returns.
A call to qed_mcp_unload_req() currently marks the device as inactive
and thus stops any new events, but there is a windows where in-flight
events might still be received by the driver.

To prevent this race condition, atomically set QED_MCP_BYPASS_PROC_BIT
in qed_mcp_unload_req() to make sure qed_mcp_handle_events() ignores all
events. Wait for any event that might already be in-process to complete
by monitoring QED_MCP_IN_PROCESSING_BIT.

Signed-off-by: Pravin Kumar Ganesh Dhende <pdhende@marvell.com>
Signed-off-by: Venkata Sudheer Kumar Bhavaraju <vbhavaraju@marvell.com>
Signed-off-by: Alok Prasad <palok@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/qlogic/qed/qed_dev.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/qlogic/qed/qed_mcp.h

index cc4ec2b..672480c 100644 (file)
@@ -3098,6 +3098,9 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
                        continue;
                }
 
+               /* Some flows may keep variable set */
+               p_hwfn->mcp_info->mcp_handling_status = 0;
+
                rc = qed_calc_hw_mode(p_hwfn);
                if (rc)
                        return rc;
index b3811ad..9fb1fa4 100644 (file)
@@ -140,7 +140,7 @@ static struct qed_mcp_cmd_elem *qed_mcp_cmd_get_elem(struct qed_hwfn *p_hwfn,
 int qed_mcp_free(struct qed_hwfn *p_hwfn)
 {
        if (p_hwfn->mcp_info) {
-               struct qed_mcp_cmd_elem *p_cmd_elem, *p_tmp;
+               struct qed_mcp_cmd_elem *p_cmd_elem = NULL, *p_tmp;
 
                kfree(p_hwfn->mcp_info->mfw_mb_cur);
                kfree(p_hwfn->mcp_info->mfw_mb_shadow);
@@ -249,6 +249,7 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        /* Initialize the MFW spinlock */
        spin_lock_init(&p_info->cmd_lock);
        spin_lock_init(&p_info->link_lock);
+       spin_lock_init(&p_info->unload_lock);
 
        INIT_LIST_HEAD(&p_info->cmd_list);
 
@@ -1095,10 +1096,15 @@ int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        return 0;
 }
 
+#define MFW_COMPLETION_MAX_ITER 5000
+#define MFW_COMPLETION_INTERVAL_MS 1
+
 int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        struct qed_mcp_mb_params mb_params;
+       u32 cnt = MFW_COMPLETION_MAX_ITER;
        u32 wol_param;
+       int rc;
 
        switch (p_hwfn->cdev->wol_config) {
        case QED_OV_WOL_DISABLED:
@@ -1121,7 +1127,23 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        mb_params.param = wol_param;
        mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK;
 
-       return qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+       spin_lock_bh(&p_hwfn->mcp_info->unload_lock);
+       set_bit(QED_MCP_BYPASS_PROC_BIT,
+               &p_hwfn->mcp_info->mcp_handling_status);
+       spin_unlock_bh(&p_hwfn->mcp_info->unload_lock);
+
+       rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+
+       while (test_bit(QED_MCP_IN_PROCESSING_BIT,
+                       &p_hwfn->mcp_info->mcp_handling_status) && --cnt)
+               msleep(MFW_COMPLETION_INTERVAL_MS);
+
+       if (!cnt)
+               DP_NOTICE(p_hwfn,
+                         "Failed to wait MFW event completion after %d msec\n",
+                         MFW_COMPLETION_MAX_ITER * MFW_COMPLETION_INTERVAL_MS);
+
+       return rc;
 }
 
 int qed_mcp_unload_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
@@ -2021,6 +2043,19 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
                           "Msg [%d] - old CMD 0x%02x, new CMD 0x%02x\n",
                           i, info->mfw_mb_shadow[i], info->mfw_mb_cur[i]);
 
+               spin_lock_bh(&p_hwfn->mcp_info->unload_lock);
+               if (test_bit(QED_MCP_BYPASS_PROC_BIT,
+                            &p_hwfn->mcp_info->mcp_handling_status)) {
+                       spin_unlock_bh(&p_hwfn->mcp_info->unload_lock);
+                       DP_INFO(p_hwfn,
+                               "Msg [%d] is bypassed on unload flow\n", i);
+                       continue;
+               }
+
+               set_bit(QED_MCP_IN_PROCESSING_BIT,
+                       &p_hwfn->mcp_info->mcp_handling_status);
+               spin_unlock_bh(&p_hwfn->mcp_info->unload_lock);
+
                switch (i) {
                case MFW_DRV_MSG_LINK_CHANGE:
                        qed_mcp_handle_link_change(p_hwfn, p_ptt, false);
@@ -2074,6 +2109,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
                        DP_INFO(p_hwfn, "Unimplemented MFW message %d\n", i);
                        rc = -EINVAL;
                }
+
+               clear_bit(QED_MCP_IN_PROCESSING_BIT,
+                         &p_hwfn->mcp_info->mcp_handling_status);
        }
 
        /* ACK everything */
index 2f26bee..9bd0565 100644 (file)
@@ -788,6 +788,14 @@ struct qed_mcp_info {
 
        /* S/N for debug data mailbox commands */
        atomic_t dbg_data_seq;
+
+       /* Spinlock used to sync the flag mcp_handling_status with
+        * the mfw events handler
+        */
+       spinlock_t unload_lock;
+       unsigned long mcp_handling_status;
+#define QED_MCP_BYPASS_PROC_BIT 0
+#define QED_MCP_IN_PROCESSING_BIT       1
 };
 
 struct qed_mcp_mb_params {