bnxt_en: Fix the PCI-AER routines
authorVikas Gupta <vikas.gupta@broadcom.com>
Fri, 19 Apr 2024 18:34:48 +0000 (11:34 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 22 Apr 2024 13:13:18 +0000 (14:13 +0100)
We do not support two simultaneous recoveries so check for reset
flag, BNXT_STATE_IN_FW_RESET, and do not proceed with AER further.
When the pci channel state is pci_channel_io_frozen, the PCIe link
can not be trusted so we disable the traffic immediately and stop
BAR access by calling bnxt_fw_fatal_close().  BAR access after
AER fatal error can cause an NMI.

Fixes: f75d9a0aa967 ("bnxt_en: Re-write PCI BARs after PCI fatal error.")
Signed-off-by: Vikas Gupta <vikas.gupta@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/broadcom/bnxt/bnxt.c

index c852f87..86c1c30 100644 (file)
@@ -15378,6 +15378,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct bnxt *bp = netdev_priv(netdev);
+       bool abort = false;
 
        netdev_info(netdev, "PCI I/O error detected\n");
 
@@ -15386,16 +15387,27 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
 
        bnxt_ulp_stop(bp);
 
-       if (state == pci_channel_io_perm_failure) {
+       if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
+               netdev_err(bp->dev, "Firmware reset already in progress\n");
+               abort = true;
+       }
+
+       if (abort || state == pci_channel_io_perm_failure) {
                rtnl_unlock();
                return PCI_ERS_RESULT_DISCONNECT;
        }
 
-       if (state == pci_channel_io_frozen)
+       /* Link is not reliable anymore if state is pci_channel_io_frozen
+        * so we disable bus master to prevent any potential bad DMAs before
+        * freeing kernel memory.
+        */
+       if (state == pci_channel_io_frozen) {
                set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state);
+               bnxt_fw_fatal_close(bp);
+       }
 
        if (netif_running(netdev))
-               bnxt_close(netdev);
+               __bnxt_close_nic(bp, true, true);
 
        if (pci_is_enabled(pdev))
                pci_disable_device(pdev);
@@ -15479,6 +15491,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
        }
 
 reset_exit:
+       clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
        bnxt_clear_reservations(bp, true);
        rtnl_unlock();