bnxt_en: Improve wait for firmware commands completion
authorPavan Chebbi <pavan.chebbi@broadcom.com>
Mon, 22 Mar 2021 07:08:40 +0000 (03:08 -0400)
committerDavid S. Miller <davem@davemloft.net>
Mon, 22 Mar 2021 20:07:28 +0000 (13:07 -0700)
In situations where FW has crashed, the bnxt_hwrm_do_send_msg() call
will have to wait until timeout for each firmware message.  This
generally takes about half a second for each firmware message.  If we
try to unload the driver n this state, the unload sequence will take
a long time to complete.

Improve this by checking the health register if it is available and
abort the wait for the firmware response if the register shows that
firmware is not healthy.  The very first message HWRM_VER_GET is
excluded from this check because that message is used to poll for
firmware to come out of reset during error recovery.

Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h

index 16cf18e..deba552 100644 (file)
@@ -4500,12 +4500,15 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
                        if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
                                return -EBUSY;
                        /* on first few passes, just barely sleep */
-                       if (i < HWRM_SHORT_TIMEOUT_COUNTER)
+                       if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
                                usleep_range(HWRM_SHORT_MIN_TIMEOUT,
                                             HWRM_SHORT_MAX_TIMEOUT);
-                       else
+                       } else {
+                               if (HWRM_WAIT_MUST_ABORT(bp, req))
+                                       break;
                                usleep_range(HWRM_MIN_TIMEOUT,
                                             HWRM_MAX_TIMEOUT);
+                       }
                }
 
                if (bp->hwrm_intr_seq_id != (u16)~seq_id) {
@@ -4530,15 +4533,19 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
                        if (len)
                                break;
                        /* on first few passes, just barely sleep */
-                       if (i < HWRM_SHORT_TIMEOUT_COUNTER)
+                       if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
                                usleep_range(HWRM_SHORT_MIN_TIMEOUT,
                                             HWRM_SHORT_MAX_TIMEOUT);
-                       else
+                       } else {
+                               if (HWRM_WAIT_MUST_ABORT(bp, req))
+                                       goto timeout_abort;
                                usleep_range(HWRM_MIN_TIMEOUT,
                                             HWRM_MAX_TIMEOUT);
+                       }
                }
 
                if (i >= tmo_count) {
+timeout_abort:
                        if (!silent)
                                netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d\n",
                                           HWRM_TOTAL_TIMEOUT(i),
@@ -7540,6 +7547,19 @@ static void __bnxt_map_fw_health_reg(struct bnxt *bp, u32 reg)
                                         BNXT_FW_HEALTH_WIN_MAP_OFF);
 }
 
+bool bnxt_is_fw_healthy(struct bnxt *bp)
+{
+       if (bp->fw_health && bp->fw_health->status_reliable) {
+               u32 fw_status;
+
+               fw_status = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+               if (fw_status && !BNXT_FW_IS_HEALTHY(fw_status))
+                       return false;
+       }
+
+       return true;
+}
+
 static void bnxt_inv_fw_health_reg(struct bnxt *bp)
 {
        struct bnxt_fw_health *fw_health = bp->fw_health;
index 1259e68..e77d607 100644 (file)
@@ -671,6 +671,10 @@ struct nqe_cn {
 #define HWRM_MIN_TIMEOUT               25
 #define HWRM_MAX_TIMEOUT               40
 
+#define HWRM_WAIT_MUST_ABORT(bp, req)                                  \
+       (le16_to_cpu((req)->req_type) != HWRM_VER_GET &&                \
+        !bnxt_is_fw_healthy(bp))
+
 #define HWRM_TOTAL_TIMEOUT(n)  (((n) <= HWRM_SHORT_TIMEOUT_COUNTER) ?  \
        ((n) * HWRM_SHORT_MIN_TIMEOUT) :                                \
        (HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT +          \
@@ -2228,6 +2232,7 @@ int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool);
 int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp);
 int bnxt_hwrm_free_wol_fltr(struct bnxt *bp);
 int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all);
+bool bnxt_is_fw_healthy(struct bnxt *bp);
 int bnxt_hwrm_fw_set_time(struct bnxt *);
 int bnxt_open_nic(struct bnxt *, bool, bool);
 int bnxt_half_open_nic(struct bnxt *bp);