qede: Error recovery process
authorTomer Tayar <tomer.tayar@cavium.com>
Mon, 28 Jan 2019 17:27:56 +0000 (19:27 +0200)
committerDavid S. Miller <davem@davemloft.net>
Mon, 28 Jan 2019 18:58:41 +0000 (10:58 -0800)
This patch adds the error recovery process in the qede driver.
The process includes a partial/customized driver unload and load, which
allows it to look like a short suspend period to the kernel while
preserving the net devices' state.

Signed-off-by: Tomer Tayar <tomer.tayar@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/qlogic/qede/qede.h
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/qlogic/qede/qede_rdma.c
include/linux/qed/qede_rdma.h

index 613249d..8434164 100644 (file)
@@ -162,6 +162,7 @@ struct qede_rdma_dev {
        struct list_head entry;
        struct list_head rdma_event_list;
        struct workqueue_struct *rdma_wq;
+       bool exp_recovery;
 };
 
 struct qede_ptp;
@@ -264,6 +265,7 @@ struct qede_dev {
 enum QEDE_STATE {
        QEDE_STATE_CLOSED,
        QEDE_STATE_OPEN,
+       QEDE_STATE_RECOVERY,
 };
 
 #define HILO_U64(hi, lo)               ((((u64)(hi)) << 32) + (lo))
@@ -462,6 +464,7 @@ struct qede_fastpath {
 #define QEDE_CSUM_UNNECESSARY          BIT(1)
 #define QEDE_TUNN_CSUM_UNNECESSARY     BIT(2)
 
+#define QEDE_SP_RECOVERY               0
 #define QEDE_SP_RX_MODE                        1
 
 #ifdef CONFIG_RFS_ACCEL
index 5a74fcb..6b4d966 100644 (file)
@@ -133,23 +133,12 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
 static void qede_remove(struct pci_dev *pdev);
 static void qede_shutdown(struct pci_dev *pdev);
 static void qede_link_update(void *dev, struct qed_link_output *link);
+static void qede_schedule_recovery_handler(void *dev);
+static void qede_recovery_handler(struct qede_dev *edev);
 static void qede_get_eth_tlv_data(void *edev, void *data);
 static void qede_get_generic_tlv_data(void *edev,
                                      struct qed_generic_tlvs *data);
 
-/* The qede lock is used to protect driver state change and driver flows that
- * are not reentrant.
- */
-void __qede_lock(struct qede_dev *edev)
-{
-       mutex_lock(&edev->qede_lock);
-}
-
-void __qede_unlock(struct qede_dev *edev)
-{
-       mutex_unlock(&edev->qede_lock);
-}
-
 #ifdef CONFIG_QED_SRIOV
 static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
                            __be16 vlan_proto)
@@ -231,6 +220,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
                .arfs_filter_op = qede_arfs_filter_op,
 #endif
                .link_update = qede_link_update,
+               .schedule_recovery_handler = qede_schedule_recovery_handler,
                .get_generic_tlv_data = qede_get_generic_tlv_data,
                .get_protocol_tlv_data = qede_get_eth_tlv_data,
        },
@@ -950,11 +940,57 @@ err:
        return -ENOMEM;
 }
 
+/* The qede lock is used to protect driver state change and driver flows that
+ * are not reentrant.
+ */
+void __qede_lock(struct qede_dev *edev)
+{
+       mutex_lock(&edev->qede_lock);
+}
+
+void __qede_unlock(struct qede_dev *edev)
+{
+       mutex_unlock(&edev->qede_lock);
+}
+
+/* This version of the lock should be used when acquiring the RTNL lock is also
+ * needed in addition to the internal qede lock.
+ */
+void qede_lock(struct qede_dev *edev)
+{
+       rtnl_lock();
+       __qede_lock(edev);
+}
+
+void qede_unlock(struct qede_dev *edev)
+{
+       __qede_unlock(edev);
+       rtnl_unlock();
+}
+
 static void qede_sp_task(struct work_struct *work)
 {
        struct qede_dev *edev = container_of(work, struct qede_dev,
                                             sp_task.work);
 
+       /* The locking scheme depends on the specific flag:
+        * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
+        * ensure that ongoing flows are ended and new ones are not started.
+        * In other cases - only the internal qede lock should be acquired.
+        */
+
+       if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) {
+#ifdef CONFIG_QED_SRIOV
+               /* SRIOV must be disabled outside the lock to avoid a deadlock.
+                * The recovery of the active VFs is currently not supported.
+                */
+               qede_sriov_configure(edev->pdev, 0);
+#endif
+               qede_lock(edev);
+               qede_recovery_handler(edev);
+               qede_unlock(edev);
+       }
+
        __qede_lock(edev);
 
        if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
@@ -1031,6 +1067,7 @@ static void qede_log_probe(struct qede_dev *edev)
 
 enum qede_probe_mode {
        QEDE_PROBE_NORMAL,
+       QEDE_PROBE_RECOVERY,
 };
 
 static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
@@ -1051,6 +1088,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
        probe_params.dp_module = dp_module;
        probe_params.dp_level = dp_level;
        probe_params.is_vf = is_vf;
+       probe_params.recov_in_prog = (mode == QEDE_PROBE_RECOVERY);
        cdev = qed_ops->common->probe(pdev, &probe_params);
        if (!cdev) {
                rc = -ENODEV;
@@ -1078,11 +1116,20 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
        if (rc)
                goto err2;
 
-       edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
-                                  dp_level);
-       if (!edev) {
-               rc = -ENOMEM;
-               goto err2;
+       if (mode != QEDE_PROBE_RECOVERY) {
+               edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
+                                          dp_level);
+               if (!edev) {
+                       rc = -ENOMEM;
+                       goto err2;
+               }
+       } else {
+               struct net_device *ndev = pci_get_drvdata(pdev);
+
+               edev = netdev_priv(ndev);
+               edev->cdev = cdev;
+               memset(&edev->stats, 0, sizeof(edev->stats));
+               memcpy(&edev->dev_info, &dev_info, sizeof(dev_info));
        }
 
        if (is_vf)
@@ -1090,28 +1137,31 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 
        qede_init_ndev(edev);
 
-       rc = qede_rdma_dev_add(edev);
+       rc = qede_rdma_dev_add(edev, (mode == QEDE_PROBE_RECOVERY));
        if (rc)
                goto err3;
 
-       /* Prepare the lock prior to the registration of the netdev,
-        * as once it's registered we might reach flows requiring it
-        * [it's even possible to reach a flow needing it directly
-        * from there, although it's unlikely].
-        */
-       INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
-       mutex_init(&edev->qede_lock);
-       rc = register_netdev(edev->ndev);
-       if (rc) {
-               DP_NOTICE(edev, "Cannot register net-device\n");
-               goto err4;
+       if (mode != QEDE_PROBE_RECOVERY) {
+               /* Prepare the lock prior to the registration of the netdev,
+                * as once it's registered we might reach flows requiring it
+                * [it's even possible to reach a flow needing it directly
+                * from there, although it's unlikely].
+                */
+               INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
+               mutex_init(&edev->qede_lock);
+
+               rc = register_netdev(edev->ndev);
+               if (rc) {
+                       DP_NOTICE(edev, "Cannot register net-device\n");
+                       goto err4;
+               }
        }
 
        edev->ops->common->set_name(cdev, edev->ndev->name);
 
        /* PTP not supported on VFs */
        if (!is_vf)
-               qede_ptp_enable(edev, true);
+               qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL));
 
        edev->ops->register_ops(cdev, &qede_ll_ops, edev);
 
@@ -1126,7 +1176,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
        return 0;
 
 err4:
-       qede_rdma_dev_remove(edev);
+       qede_rdma_dev_remove(edev, (mode == QEDE_PROBE_RECOVERY));
 err3:
        free_netdev(edev->ndev);
 err2:
@@ -1162,6 +1212,7 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 enum qede_remove_mode {
        QEDE_REMOVE_NORMAL,
+       QEDE_REMOVE_RECOVERY,
 };
 
 static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
@@ -1172,15 +1223,19 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 
        DP_INFO(edev, "Starting qede_remove\n");
 
-       qede_rdma_dev_remove(edev);
-       unregister_netdev(ndev);
-       cancel_delayed_work_sync(&edev->sp_task);
+       qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY));
 
-       qede_ptp_disable(edev);
+       if (mode != QEDE_REMOVE_RECOVERY) {
+               unregister_netdev(ndev);
 
-       edev->ops->common->set_power_state(cdev, PCI_D0);
+               cancel_delayed_work_sync(&edev->sp_task);
 
-       pci_set_drvdata(pdev, NULL);
+               edev->ops->common->set_power_state(cdev, PCI_D0);
+
+               pci_set_drvdata(pdev, NULL);
+       }
+
+       qede_ptp_disable(edev);
 
        /* Use global ops since we've freed edev */
        qed_ops->common->slowpath_stop(cdev);
@@ -1194,7 +1249,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
         * [e.g., QED register callbacks] won't break anything when
         * accessing the netdevice.
         */
-        free_netdev(ndev);
+       if (mode != QEDE_REMOVE_RECOVERY)
+               free_netdev(ndev);
 
        dev_info(&pdev->dev, "Ending qede_remove successfully\n");
 }
@@ -1539,6 +1595,58 @@ static int qede_alloc_mem_load(struct qede_dev *edev)
        return 0;
 }
 
+static void qede_empty_tx_queue(struct qede_dev *edev,
+                               struct qede_tx_queue *txq)
+{
+       unsigned int pkts_compl = 0, bytes_compl = 0;
+       struct netdev_queue *netdev_txq;
+       int rc, len = 0;
+
+       netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
+
+       while (qed_chain_get_cons_idx(&txq->tx_pbl) !=
+              qed_chain_get_prod_idx(&txq->tx_pbl)) {
+               DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
+                          "Freeing a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
+                          txq->index, qed_chain_get_cons_idx(&txq->tx_pbl),
+                          qed_chain_get_prod_idx(&txq->tx_pbl));
+
+               rc = qede_free_tx_pkt(edev, txq, &len);
+               if (rc) {
+                       DP_NOTICE(edev,
+                                 "Failed to free a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
+                                 txq->index,
+                                 qed_chain_get_cons_idx(&txq->tx_pbl),
+                                 qed_chain_get_prod_idx(&txq->tx_pbl));
+                       break;
+               }
+
+               bytes_compl += len;
+               pkts_compl++;
+               txq->sw_tx_cons++;
+       }
+
+       netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl);
+}
+
+static void qede_empty_tx_queues(struct qede_dev *edev)
+{
+       int i;
+
+       for_each_queue(i)
+               if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+                       int cos;
+
+                       for_each_cos_in_txq(edev, cos) {
+                               struct qede_fastpath *fp;
+
+                               fp = &edev->fp_array[i];
+                               qede_empty_tx_queue(edev,
+                                                   &fp->txq[cos]);
+                       }
+               }
+}
+
 /* This function inits fp content and resets the SB, RXQ and TXQ structures */
 static void qede_init_fp(struct qede_dev *edev)
 {
@@ -2053,6 +2161,7 @@ out:
 
 enum qede_unload_mode {
        QEDE_UNLOAD_NORMAL,
+       QEDE_UNLOAD_RECOVERY,
 };
 
 static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
@@ -2068,7 +2177,8 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 
        clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags);
 
-       edev->state = QEDE_STATE_CLOSED;
+       if (mode != QEDE_UNLOAD_RECOVERY)
+               edev->state = QEDE_STATE_CLOSED;
 
        qede_rdma_dev_event_close(edev);
 
@@ -2076,17 +2186,20 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
        netif_tx_disable(edev->ndev);
        netif_carrier_off(edev->ndev);
 
-       /* Reset the link */
-       memset(&link_params, 0, sizeof(link_params));
-       link_params.link_up = false;
-       edev->ops->common->set_link(edev->cdev, &link_params);
-       rc = qede_stop_queues(edev);
-       if (rc) {
-               qede_sync_free_irqs(edev);
-               goto out;
-       }
+       if (mode != QEDE_UNLOAD_RECOVERY) {
+               /* Reset the link */
+               memset(&link_params, 0, sizeof(link_params));
+               link_params.link_up = false;
+               edev->ops->common->set_link(edev->cdev, &link_params);
 
-       DP_INFO(edev, "Stopped Queues\n");
+               rc = qede_stop_queues(edev);
+               if (rc) {
+                       qede_sync_free_irqs(edev);
+                       goto out;
+               }
+
+               DP_INFO(edev, "Stopped Queues\n");
+       }
 
        qede_vlan_mark_nonconfigured(edev);
        edev->ops->fastpath_stop(edev->cdev);
@@ -2102,18 +2215,26 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 
        qede_napi_disable_remove(edev);
 
+       if (mode == QEDE_UNLOAD_RECOVERY)
+               qede_empty_tx_queues(edev);
+
        qede_free_mem_load(edev);
        qede_free_fp_array(edev);
 
 out:
        if (!is_locked)
                __qede_unlock(edev);
+
+       if (mode != QEDE_UNLOAD_RECOVERY)
+               DP_NOTICE(edev, "Link is down\n");
+
        DP_INFO(edev, "Ending qede unload\n");
 }
 
 enum qede_load_mode {
        QEDE_LOAD_NORMAL,
        QEDE_LOAD_RELOAD,
+       QEDE_LOAD_RECOVERY,
 };
 
 static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
@@ -2293,6 +2414,77 @@ static void qede_link_update(void *dev, struct qed_link_output *link)
        }
 }
 
+static void qede_schedule_recovery_handler(void *dev)
+{
+       struct qede_dev *edev = dev;
+
+       if (edev->state == QEDE_STATE_RECOVERY) {
+               DP_NOTICE(edev,
+                         "Avoid scheduling a recovery handling since already in recovery state\n");
+               return;
+       }
+
+       set_bit(QEDE_SP_RECOVERY, &edev->sp_flags);
+       schedule_delayed_work(&edev->sp_task, 0);
+
+       DP_INFO(edev, "Scheduled a recovery handler\n");
+}
+
+static void qede_recovery_failed(struct qede_dev *edev)
+{
+       netdev_err(edev->ndev, "Recovery handling has failed. Power cycle is needed.\n");
+
+       netif_device_detach(edev->ndev);
+
+       if (edev->cdev)
+               edev->ops->common->set_power_state(edev->cdev, PCI_D3hot);
+}
+
+static void qede_recovery_handler(struct qede_dev *edev)
+{
+       u32 curr_state = edev->state;
+       int rc;
+
+       DP_NOTICE(edev, "Starting a recovery process\n");
+
+       /* No need to acquire first the qede_lock since is done by qede_sp_task
+        * before calling this function.
+        */
+       edev->state = QEDE_STATE_RECOVERY;
+
+       edev->ops->common->recovery_prolog(edev->cdev);
+
+       if (curr_state == QEDE_STATE_OPEN)
+               qede_unload(edev, QEDE_UNLOAD_RECOVERY, true);
+
+       __qede_remove(edev->pdev, QEDE_REMOVE_RECOVERY);
+
+       rc = __qede_probe(edev->pdev, edev->dp_module, edev->dp_level,
+                         IS_VF(edev), QEDE_PROBE_RECOVERY);
+       if (rc) {
+               edev->cdev = NULL;
+               goto err;
+       }
+
+       if (curr_state == QEDE_STATE_OPEN) {
+               rc = qede_load(edev, QEDE_LOAD_RECOVERY, true);
+               if (rc)
+                       goto err;
+
+               qede_config_rx_mode(edev->ndev);
+               udp_tunnel_get_rx_info(edev->ndev);
+       }
+
+       edev->state = curr_state;
+
+       DP_NOTICE(edev, "Recovery handling is done\n");
+
+       return;
+
+err:
+       qede_recovery_failed(edev);
+}
+
 static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
        struct netdev_queue *netdev_txq;
index 1900bf7..ffabc2d 100644 (file)
@@ -50,6 +50,8 @@ static void _qede_rdma_dev_add(struct qede_dev *edev)
        if (!qedr_drv)
                return;
 
+       /* Leftovers from previous error recovery */
+       edev->rdma_info.exp_recovery = false;
        edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
                                                 edev->ndev);
 }
@@ -87,21 +89,26 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev)
        destroy_workqueue(edev->rdma_info.rdma_wq);
 }
 
-int qede_rdma_dev_add(struct qede_dev *edev)
+int qede_rdma_dev_add(struct qede_dev *edev, bool recovery)
 {
-       int rc = 0;
+       int rc;
 
-       if (qede_rdma_supported(edev)) {
-               rc = qede_rdma_create_wq(edev);
-               if (rc)
-                       return rc;
+       if (!qede_rdma_supported(edev))
+               return 0;
 
-               INIT_LIST_HEAD(&edev->rdma_info.entry);
-               mutex_lock(&qedr_dev_list_lock);
-               list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
-               _qede_rdma_dev_add(edev);
-               mutex_unlock(&qedr_dev_list_lock);
-       }
+       /* Cannot start qedr while recovering since it wasn't fully stopped */
+       if (recovery)
+               return 0;
+
+       rc = qede_rdma_create_wq(edev);
+       if (rc)
+               return rc;
+
+       INIT_LIST_HEAD(&edev->rdma_info.entry);
+       mutex_lock(&qedr_dev_list_lock);
+       list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
+       _qede_rdma_dev_add(edev);
+       mutex_unlock(&qedr_dev_list_lock);
 
        return rc;
 }
@@ -110,19 +117,30 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev)
 {
        if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
                qedr_drv->remove(edev->rdma_info.qedr_dev);
-       edev->rdma_info.qedr_dev = NULL;
 }
 
-void qede_rdma_dev_remove(struct qede_dev *edev)
+void qede_rdma_dev_remove(struct qede_dev *edev, bool recovery)
 {
        if (!qede_rdma_supported(edev))
                return;
 
-       qede_rdma_destroy_wq(edev);
-       mutex_lock(&qedr_dev_list_lock);
-       _qede_rdma_dev_remove(edev);
-       list_del(&edev->rdma_info.entry);
-       mutex_unlock(&qedr_dev_list_lock);
+       /* Cannot remove qedr while recovering since it wasn't fully stopped */
+       if (!recovery) {
+               qede_rdma_destroy_wq(edev);
+               mutex_lock(&qedr_dev_list_lock);
+               if (!edev->rdma_info.exp_recovery)
+                       _qede_rdma_dev_remove(edev);
+               edev->rdma_info.qedr_dev = NULL;
+               list_del(&edev->rdma_info.entry);
+               mutex_unlock(&qedr_dev_list_lock);
+       } else {
+               if (!edev->rdma_info.exp_recovery) {
+                       mutex_lock(&qedr_dev_list_lock);
+                       _qede_rdma_dev_remove(edev);
+                       mutex_unlock(&qedr_dev_list_lock);
+               }
+               edev->rdma_info.exp_recovery = true;
+       }
 }
 
 static void _qede_rdma_dev_open(struct qede_dev *edev)
@@ -204,7 +222,8 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv)
 
        mutex_lock(&qedr_dev_list_lock);
        list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
-               if (edev->rdma_info.qedr_dev)
+               /* If device has experienced recovery it was already removed */
+               if (edev->rdma_info.qedr_dev && !edev->rdma_info.exp_recovery)
                        _qede_rdma_dev_remove(edev);
        }
        qedr_drv = NULL;
@@ -284,6 +303,10 @@ static void qede_rdma_add_event(struct qede_dev *edev,
 {
        struct qede_rdma_event_work *event_node;
 
+       /* If a recovery was experienced avoid adding the event */
+       if (edev->rdma_info.exp_recovery)
+               return;
+
        if (!edev->rdma_info.qedr_dev)
                return;
 
index 9904617..5a00c7a 100644 (file)
@@ -74,21 +74,23 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv);
 bool qede_rdma_supported(struct qede_dev *dev);
 
 #if IS_ENABLED(CONFIG_QED_RDMA)
-int qede_rdma_dev_add(struct qede_dev *dev);
+int qede_rdma_dev_add(struct qede_dev *dev, bool recovery);
 void qede_rdma_dev_event_open(struct qede_dev *dev);
 void qede_rdma_dev_event_close(struct qede_dev *dev);
-void qede_rdma_dev_remove(struct qede_dev *dev);
+void qede_rdma_dev_remove(struct qede_dev *dev, bool recovery);
 void qede_rdma_event_changeaddr(struct qede_dev *edr);
 
 #else
-static inline int qede_rdma_dev_add(struct qede_dev *dev)
+static inline int qede_rdma_dev_add(struct qede_dev *dev,
+                                   bool recovery)
 {
        return 0;
 }
 
 static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {}
 static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {}
-static inline void qede_rdma_dev_remove(struct qede_dev *dev) {}
+static inline void qede_rdma_dev_remove(struct qede_dev *dev,
+                                       bool recovery) {}
 static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {}
 #endif
 #endif