scsi: lpfc: Remove ndlp when a PLOGI/ADISC/PRLI/REG_RPI ultimately fails
authorJames Smart <james.smart@broadcom.com>
Sun, 15 Nov 2020 19:26:35 +0000 (11:26 -0800)
committerMartin K. Petersen <martin.petersen@oracle.com>
Tue, 17 Nov 2020 05:43:55 +0000 (00:43 -0500)
When a PLOGI/ADISC/PRLI/REG_RPI fails, the node remains in the nodelist in
that state.  Although the driver now frees a node when the ref count goes
to zero, in this case the ref cnt doesn't reach zero because there isn't a
mechanism to release the final reference.  Discovery just stops.

Fix by calling the node discovery state machine DEVICE_RM event whenever
one of these commands fail. This will remove the final reference count and
trigger node release.

Link: https://lore.kernel.org/r/20201115192646.12977-7-james.smart@broadcom.com
Co-developed-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_els.c
drivers/scsi/lpfc/lpfc_hbadisc.c

index 020300b..388728f 100644 (file)
@@ -1988,12 +1988,25 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                 "2753 PLOGI failure DID:%06X Status:x%x/x%x\n",
                                 ndlp->nlp_DID, irsp->ulpStatus,
                                 irsp->un.ulpWord[4]);
-               /* Do not call DSM for lpfc_els_abort'ed ELS cmds. Just execute
-                * the final node put to free it to the pool.
+
+               /* Do not call DSM for lpfc_els_abort'ed ELS cmds */
+               if (lpfc_error_lost_link(irsp))
+                       goto check_plogi;
+               else
+                       lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+                                               NLP_EVT_CMPL_PLOGI);
+
+               /* As long as this node is not registered with the scsi or nvme
+                * transport, it is no longer an active node.  Otherwise
+                * devloss handles the final cleanup.
                 */
-               if (!lpfc_error_lost_link(irsp))
+               if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) {
+                       spin_lock_irq(&ndlp->lock);
+                       ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
+                       spin_unlock_irq(&ndlp->lock);
                        lpfc_disc_state_machine(vport, ndlp, cmdiocb,
                                                NLP_EVT_DEVICE_RM);
+               }
        } else {
                /* Good status, call state machine */
                prsp = list_entry(((struct lpfc_dmabuf *)
@@ -2004,6 +2017,7 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                        NLP_EVT_CMPL_PLOGI);
        }
 
+ check_plogi:
        if (disc && vport->num_disc_nodes) {
                /* Check to see if there are more PLOGIs to be sent */
                lpfc_more_plogi(vport);
@@ -2243,6 +2257,20 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                else
                        lpfc_disc_state_machine(vport, ndlp, cmdiocb,
                                                NLP_EVT_CMPL_PRLI);
+
+               /* As long as this node is not registered with the SCSI
+                * or NVMe transport and no other PRLIs are outstanding,
+                * it is no longer an active node.  Otherwise devloss
+                * handles the final cleanup.
+                */
+               if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD)) &&
+                   !ndlp->fc4_prli_sent) {
+                       spin_lock_irq(&ndlp->lock);
+                       ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
+                       spin_unlock_irq(&ndlp->lock);
+                       lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+                                               NLP_EVT_DEVICE_RM);
+               }
        } else {
                /* Good status, call state machine.  However, if another
                 * PRLI is outstanding, don't call the state machine
@@ -2655,14 +2683,29 @@ lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                 ndlp->nlp_DID, irsp->ulpStatus,
                                 irsp->un.ulpWord[4]);
                /* Do not call DSM for lpfc_els_abort'ed ELS cmds */
-               if (!lpfc_error_lost_link(irsp))
+               if (lpfc_error_lost_link(irsp))
+                       goto check_adisc;
+               else
                        lpfc_disc_state_machine(vport, ndlp, cmdiocb,
                                                NLP_EVT_CMPL_ADISC);
+
+               /* As long as this node is not registered with the SCSI or NVMe
+                * transport, it is no longer an active node. Otherwise
+                * devloss handles the final cleanup.
+                */
+               if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) {
+                       spin_lock_irq(&ndlp->lock);
+                       ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
+                       spin_unlock_irq(&ndlp->lock);
+                       lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+                                               NLP_EVT_DEVICE_RM);
+               }
        } else
                /* Good status, call state machine */
                lpfc_disc_state_machine(vport, ndlp, cmdiocb,
                                        NLP_EVT_CMPL_ADISC);
 
+ check_adisc:
        /* Check to see if there are more ADISCs to be sent */
        if (disc && vport->num_disc_nodes)
                lpfc_more_adisc(vport);
@@ -2879,8 +2922,21 @@ out:
                                 irsp->un.ulpWord[4], irsp->ulpTimeout,
                                 vport->num_disc_nodes);
                lpfc_disc_start(vport);
+               return;
+       }
+
+       /* Cleanup path for failed REG_RPI handling. If REG_RPI fails, the
+        * driver sends a LOGO to the rport to cleanup.  For fabric and
+        * initiator ports cleanup the node as long as it the node is not
+        * register with the transport.
+        */
+       if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) {
+               spin_lock_irq(&ndlp->lock);
+               ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
+               spin_unlock_irq(&ndlp->lock);
+               lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+                                       NLP_EVT_DEVICE_RM);
        }
-       return;
 }
 
 /**
index 8c99e89..c6d7620 100644 (file)
@@ -204,10 +204,13 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
 
        spin_lock_irqsave(&ndlp->lock, iflags);
        ndlp->nlp_flag |= NLP_IN_DEV_LOSS;
+       ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
+
        /*
-        * The backend does not expect any more calls assoicated with this
-        * rport, Remove the association between rport and ndlp
+        * The backend does not expect any more calls associated with this
+        * rport. Remove the association between rport and ndlp.
         */
+       ndlp->fc4_xpt_flags &= ~SCSI_XPT_REGD;
        ((struct lpfc_rport_data *)rport->dd_data)->pnode = NULL;
        ndlp->rport = NULL;
        spin_unlock_irqrestore(&ndlp->lock, iflags);
@@ -248,7 +251,6 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
        int warn_on = 0;
        int fcf_inuse = 0;
        unsigned long iflags;
-       u32 fc4_xpt_flags;
 
        vport = ndlp->vport;
        shost = lpfc_shost_from_vport(vport);
@@ -267,10 +269,11 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
                              ndlp->nlp_DID, ndlp->nlp_type, ndlp->nlp_sid);
 
        lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
-                        "3182 %s x%06x, nflag x%x xflags x%x\n",
+                        "3182 %s x%06x, nflag x%x xflags x%x refcnt %d\n",
                         __func__, ndlp->nlp_DID, ndlp->nlp_flag,
-                        ndlp->fc4_xpt_flags);
+                        ndlp->fc4_xpt_flags, kref_read(&ndlp->kref));
 
+       /* If the driver is recovering the rport, ignore devloss. */
        if (ndlp->nlp_state == NLP_STE_MAPPED_NODE) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                 "0284 Devloss timeout Ignored on "
@@ -282,8 +285,11 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
                return fcf_inuse;
        }
 
-       if (ndlp->nlp_type & NLP_FABRIC)
+       /* Fabric nodes are done. */
+       if (ndlp->nlp_type & NLP_FABRIC) {
+               lpfc_nlp_put(ndlp);
                return fcf_inuse;
+       }
 
        if (ndlp->nlp_sid != NLP_NO_SID) {
                warn_on = 1;
@@ -311,12 +317,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
                                 ndlp->nlp_state, ndlp->nlp_rpi);
        }
 
-       /* Should be final reference removal triggering a node free. */
-       spin_lock_irqsave(shost->host_lock, iflags);
-       fc4_xpt_flags = ndlp->fc4_xpt_flags;
-       spin_unlock_irqrestore(shost->host_lock, iflags);
-
-       if (!(fc4_xpt_flags & (NVME_XPT_REGD | SCSI_XPT_REGD)))
+       if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD))
                lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM);
 
        return fcf_inuse;
@@ -3587,7 +3588,7 @@ lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
        pmb->ctx_buf = NULL;
        pmb->ctx_ndlp = NULL;
 
-       lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+       lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI | LOG_NODE | LOG_DISCOVERY,
                         "0002 rpi:%x DID:%x flg:%x %d x%px\n",
                         ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
                         kref_read(&ndlp->kref),
@@ -4079,8 +4080,16 @@ out:
                kfree(mp);
                mempool_free(pmb, phba->mbox_mem_pool);
 
-               /* If no other thread is using the ndlp, free it */
-               lpfc_nlp_not_used(ndlp);
+               /* If the node is not registered with the scsi or nvme
+                * transport, remove the fabric node.  The failed reg_login
+                * is terminal.
+                */
+               if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) {
+                       spin_lock_irq(&ndlp->lock);
+                       ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
+                       spin_unlock_irq(&ndlp->lock);
+                       lpfc_nlp_not_used(ndlp);
+               }
 
                if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) {
                        /*