scsi: lpfc: Skip waiting for register ready bits when in unrecoverable state
authorJustin Tee <justin.tee@broadcom.com>
Wed, 1 Mar 2023 23:16:23 +0000 (15:16 -0800)
committerMartin K. Petersen <martin.petersen@oracle.com>
Fri, 10 Mar 2023 02:21:45 +0000 (21:21 -0500)
During tolerance tests that force an HBA to become unresponsive, rmmod
hangs resulting in the inability to remove the driver.

The lpfc_pci_remove_one_s4() routine attempts to submit a clean up mailbox
command via the lpfc_sli4_post_sync_mbox() routine, but ends up waiting
forever for a mailbox register to set its ready bit.  Because the HBA is in
an unrecoverable and unresponsive state, the ready bit will never be set.

Create a new routine called lpfc_sli4_unrecoverable_port(), which checks a
port status register's error notification bits.

Use the lpfc_sli4_unrecoverable_port() routine in ready bit check routines
to early return error if port is deemed unrecoverable.

Also, when the lpfc_handle_eratt_s4() handler detects an unrecoverable
state, call the lpfc_sli4_offline_eratt() routine to kick off flushing
outstanding I/O.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20230301231626.9621-8-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_attr.c
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_nvme.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/lpfc/lpfc_sli4.h

index 22f2e04..ddbc54e 100644 (file)
@@ -1644,6 +1644,12 @@ lpfc_sli4_pdev_status_reg_wait(struct lpfc_hba *phba)
            !bf_get(lpfc_sliport_status_err, &portstat_reg))
                return -EPERM;
 
+       /* There is no point to wait if the port is in an unrecoverable
+        * state.
+        */
+       if (lpfc_sli4_unrecoverable_port(&portstat_reg))
+               return -EIO;
+
        /* wait for the SLI port firmware ready after firmware reset */
        for (i = 0; i < LPFC_FW_RESET_MAXIMUM_WAIT_10MS_CNT; i++) {
                msleep(10);
index 61958a2..3e1e1d1 100644 (file)
@@ -2148,7 +2148,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
                /* fall through for not able to recover */
                lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
                                "3152 Unrecoverable error\n");
-               phba->link_state = LPFC_HBA_ERROR;
+               lpfc_sli4_offline_eratt(phba);
                break;
        case LPFC_SLI_INTF_IF_TYPE_1:
        default:
@@ -9567,8 +9567,7 @@ lpfc_sli4_post_status_check(struct lpfc_hba *phba)
                        /* Final checks.  The port status should be clean. */
                        if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr,
                                &reg_data.word0) ||
-                               (bf_get(lpfc_sliport_status_err, &reg_data) &&
-                                !bf_get(lpfc_sliport_status_rn, &reg_data))) {
+                               lpfc_sli4_unrecoverable_port(&reg_data)) {
                                phba->work_status[0] =
                                        readl(phba->sli4_hba.u.if_type2.
                                              ERR1regaddr);
index 152245f..ae3207e 100644 (file)
@@ -2265,6 +2265,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
                        }
                        if (!vport->localport ||
                            test_bit(HBA_PCI_ERR, &vport->phba->bit_flags) ||
+                           phba->link_state == LPFC_HBA_ERROR ||
                            vport->load_flag & FC_UNLOADING)
                                return;
 
@@ -2630,7 +2631,8 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                 * return values is ignored.  The upcall is a courtesy to the
                 * transport.
                 */
-               if (vport->load_flag & FC_UNLOADING)
+               if (vport->load_flag & FC_UNLOADING ||
+                   unlikely(vport->phba->link_state == LPFC_HBA_ERROR))
                        (void)nvme_fc_set_remoteport_devloss(remoteport, 0);
 
                ret = nvme_fc_unregister_remoteport(remoteport);
index c8b4632..b4917db 100644 (file)
@@ -9895,7 +9895,8 @@ lpfc_sli4_async_mbox_unblock(struct lpfc_hba *phba)
  * port for twice the regular mailbox command timeout value.
  *
  *      0 - no timeout on waiting for bootstrap mailbox register ready.
- *      MBXERR_ERROR - wait for bootstrap mailbox register timed out.
+ *      MBXERR_ERROR - wait for bootstrap mailbox register timed out or port
+ *                     is in an unrecoverable state.
  **/
 static int
 lpfc_sli4_wait_bmbx_ready(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
@@ -9903,6 +9904,23 @@ lpfc_sli4_wait_bmbx_ready(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
        uint32_t db_ready;
        unsigned long timeout;
        struct lpfc_register bmbx_reg;
+       struct lpfc_register portstat_reg = {-1};
+
+       /* Sanity check - there is no point to wait if the port is in an
+        * unrecoverable state.
+        */
+       if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >=
+           LPFC_SLI_INTF_IF_TYPE_2) {
+               if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr,
+                              &portstat_reg.word0) ||
+                   lpfc_sli4_unrecoverable_port(&portstat_reg)) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                       "3858 Skipping bmbx ready because "
+                                       "Port Status x%x\n",
+                                       portstat_reg.word0);
+                       return MBXERR_ERROR;
+               }
+       }
 
        timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, mboxq)
                                   * 1000) + jiffies;
index 3b62c40..2a0864e 100644 (file)
@@ -1180,3 +1180,22 @@ static inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
        return q->q_pgs[idx / q->entry_cnt_per_pg] +
                (q->entry_size * (idx % q->entry_cnt_per_pg));
 }
+
+/**
+ * lpfc_sli4_unrecoverable_port - Check ERR and RN bits in portstat_reg
+ * @portstat_reg: portstat_reg pointer containing portstat_reg contents
+ *
+ * Description:
+ * Use only for SLI4 interface type-2 or later.  If ERR is set && RN is 0, then
+ * port is deemed unrecoverable.
+ *
+ * Returns:
+ * true                - ERR && !RN
+ * false       - otherwise
+ */
+static inline bool
+lpfc_sli4_unrecoverable_port(struct lpfc_register *portstat_reg)
+{
+       return bf_get(lpfc_sliport_status_err, portstat_reg) &&
+              !bf_get(lpfc_sliport_status_rn, portstat_reg);
+}