cxgb4: avoid crash on PCI error recovery path
authorGuilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Mon, 29 May 2017 02:07:01 +0000 (23:07 -0300)
committerDavid S. Miller <davem@davemloft.net>
Tue, 30 May 2017 16:15:59 +0000 (12:15 -0400)
During PCI error recovery process, specifically on eeh_err_detected()
we might have a NULL netdev struct, hence a direct dereference will
lead to a kernel oops. This was observed with latest upstream kernel
(v4.12-rc2) on Chelsio adapter T422-CR in PowerPC machines.

This patch checks for NULL pointer and avoids the crash, both in
eeh_err_detected() and eeh_resume(). Also, we avoid to trigger
a fatal error or to try disabling interrupts on FW during PCI
error recovery, because: (a) driver might not be able to accurately
access PCI regions in this case, and (b) trigger a fatal error
_during_ the recovery steps is a mistake that could prevent the
recovery path to complete successfully.

Reported-by: Harsha Thyagaraja <hathyaga@in.ibm.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c

index 38a5c67..b512149 100644 (file)
@@ -2771,6 +2771,9 @@ void t4_fatal_err(struct adapter *adap)
 {
        int port;
 
+       if (pci_channel_offline(adap->pdev))
+               return;
+
        /* Disable the SGE since ULDs are going to free resources that
         * could be exposed to the adapter.  RDMA MWs for example...
         */
@@ -3882,9 +3885,10 @@ static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
        spin_lock(&adap->stats_lock);
        for_each_port(adap, i) {
                struct net_device *dev = adap->port[i];
-
-               netif_device_detach(dev);
-               netif_carrier_off(dev);
+               if (dev) {
+                       netif_device_detach(dev);
+                       netif_carrier_off(dev);
+               }
        }
        spin_unlock(&adap->stats_lock);
        disable_interrupts(adap);
@@ -3963,12 +3967,13 @@ static void eeh_resume(struct pci_dev *pdev)
        rtnl_lock();
        for_each_port(adap, i) {
                struct net_device *dev = adap->port[i];
-
-               if (netif_running(dev)) {
-                       link_start(dev);
-                       cxgb_set_rxmode(dev);
+               if (dev) {
+                       if (netif_running(dev)) {
+                               link_start(dev);
+                               cxgb_set_rxmode(dev);
+                       }
+                       netif_device_attach(dev);
                }
-               netif_device_attach(dev);
        }
        rtnl_unlock();
 }
index aded42b..3a34aa6 100644 (file)
@@ -4557,8 +4557,13 @@ void t4_intr_enable(struct adapter *adapter)
  */
 void t4_intr_disable(struct adapter *adapter)
 {
-       u32 whoami = t4_read_reg(adapter, PL_WHOAMI_A);
-       u32 pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
+       u32 whoami, pf;
+
+       if (pci_channel_offline(adapter->pdev))
+               return;
+
+       whoami = t4_read_reg(adapter, PL_WHOAMI_A);
+       pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
                        SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
 
        t4_write_reg(adapter, MYPF_REG(PL_PF_INT_ENABLE_A), 0);