IB/hfi1: Add functions for restarting TID RDMA READ request
authorKaike Wan <kaike.wan@intel.com>
Thu, 24 Jan 2019 03:31:46 +0000 (19:31 -0800)
committerDoug Ledford <dledford@redhat.com>
Tue, 5 Feb 2019 22:53:55 +0000 (17:53 -0500)
This patch adds functions to retry TID RDMA READ request. Since TID RDMA
READ request could be retried from any segment boundary, it requires
a number of tracking fields in various structures and those fields
should be reset properly. The qp->s_num_rd_atomic field is reset before
retry and therefore should be incremented for each new or retried
RDMA READ or atomic request.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/tid_rdma.c
drivers/infiniband/hw/hfi1/tid_rdma.h

index 6e74cd3..e478a0b 100644 (file)
@@ -503,16 +503,14 @@ no_flow_control:
                         * Don't allow more operations to be started
                         * than the QP limits allow.
                         */
-                       if (newreq) {
-                               if (qp->s_num_rd_atomic >=
-                                   qp->s_max_rd_atomic) {
-                                       qp->s_flags |= RVT_S_WAIT_RDMAR;
-                                       goto bail;
-                               }
-                               qp->s_num_rd_atomic++;
-                               if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
-                                       qp->s_lsn++;
+                       if (qp->s_num_rd_atomic >=
+                           qp->s_max_rd_atomic) {
+                               qp->s_flags |= RVT_S_WAIT_RDMAR;
+                               goto bail;
                        }
+                       qp->s_num_rd_atomic++;
+                       if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
+                               qp->s_lsn++;
                        put_ib_reth_vaddr(
                                wqe->rdma_wr.remote_addr,
                                &ohdr->u.rc.reth);
@@ -534,14 +532,12 @@ no_flow_control:
                         * Don't allow more operations to be started
                         * than the QP limits allow.
                         */
-                       if (newreq) {
-                               if (qp->s_num_rd_atomic >=
-                                   qp->s_max_rd_atomic) {
-                                       qp->s_flags |= RVT_S_WAIT_RDMAR;
-                                       goto bail;
-                               }
-                               qp->s_num_rd_atomic++;
+                       if (qp->s_num_rd_atomic >=
+                           qp->s_max_rd_atomic) {
+                               qp->s_flags |= RVT_S_WAIT_RDMAR;
+                               goto bail;
                        }
+                       qp->s_num_rd_atomic++;
 
                        /* FALLTHROUGH */
                case IB_WR_OPFN:
@@ -970,6 +966,43 @@ void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn)
        return;
 }
 
+/**
+ * update_num_rd_atomic - update the qp->s_num_rd_atomic
+ * @qp: the QP
+ * @psn: the packet sequence number to restart at
+ * @wqe: the wqe
+ *
+ * This is called from reset_psn() to update qp->s_num_rd_atomic
+ * for the current wqe.
+ * Called at interrupt level with the QP s_lock held.
+ */
+static void update_num_rd_atomic(struct rvt_qp *qp, u32 psn,
+                                struct rvt_swqe *wqe)
+{
+       u32 opcode = wqe->wr.opcode;
+
+       if (opcode == IB_WR_RDMA_READ ||
+           opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+           opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+               qp->s_num_rd_atomic++;
+       } else if (opcode == IB_WR_TID_RDMA_READ) {
+               struct tid_rdma_request *req = wqe_to_tid_req(wqe);
+               struct hfi1_qp_priv *priv = qp->priv;
+
+               if (cmp_psn(psn, wqe->lpsn) <= 0) {
+                       u32 cur_seg;
+
+                       cur_seg = (psn - wqe->psn) / priv->pkts_ps;
+                       req->ack_pending = cur_seg - req->comp_seg;
+                       priv->pending_tid_r_segs += req->ack_pending;
+                       qp->s_num_rd_atomic += req->ack_pending;
+               } else {
+                       priv->pending_tid_r_segs += req->total_segs;
+                       qp->s_num_rd_atomic += req->total_segs;
+               }
+       }
+}
+
 /**
  * reset_psn - reset the QP state to send starting from PSN
  * @qp: the QP
@@ -984,9 +1017,12 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
        u32 n = qp->s_acked;
        struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
        u32 opcode;
+       struct hfi1_qp_priv *priv = qp->priv;
 
        lockdep_assert_held(&qp->s_lock);
        qp->s_cur = n;
+       priv->pending_tid_r_segs = 0;
+       qp->s_num_rd_atomic = 0;
 
        /*
         * If we are starting the request from the beginning,
@@ -996,9 +1032,9 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
                qp->s_state = OP(SEND_LAST);
                goto done;
        }
+       update_num_rd_atomic(qp, psn, wqe);
 
        /* Find the work request opcode corresponding to the given PSN. */
-       opcode = wqe->wr.opcode;
        for (;;) {
                int diff;
 
@@ -1008,8 +1044,11 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
                        break;
                wqe = rvt_get_swqe_ptr(qp, n);
                diff = cmp_psn(psn, wqe->psn);
-               if (diff < 0)
+               if (diff < 0) {
+                       /* Point wqe back to the previous one*/
+                       wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
                        break;
+               }
                qp->s_cur = n;
                /*
                 * If we are starting the request from the beginning,
@@ -1019,8 +1058,10 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
                        qp->s_state = OP(SEND_LAST);
                        goto done;
                }
-               opcode = wqe->wr.opcode;
+
+               update_num_rd_atomic(qp, psn, wqe);
        }
+       opcode = wqe->wr.opcode;
 
        /*
         * Set the state to restart in the middle of a request.
@@ -1042,6 +1083,10 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
                qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
                break;
 
+       case IB_WR_TID_RDMA_READ:
+               qp->s_state = TID_OP(READ_RESP);
+               break;
+
        default:
                /*
                 * This case shouldn't happen since its only
@@ -1095,6 +1140,14 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
                                wqe = do_rc_completion(qp, wqe, ibp);
                                qp->s_flags &= ~RVT_S_WAIT_ACK;
                        } else {
+                               if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
+                                       struct tid_rdma_request *req;
+
+                                       req = wqe_to_tid_req(wqe);
+                                       hfi1_kern_exp_rcv_clear_all(req);
+                                       hfi1_kern_clear_hw_flow(priv->rcd, qp);
+                               }
+
                                rvt_send_complete(qp, wqe,
                                                  IB_WC_RETRY_EXC_ERR);
                                rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
@@ -1108,7 +1161,8 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
        }
 
        ibp = to_iport(qp->ibqp.device, qp->port_num);
-       if (wqe->wr.opcode == IB_WR_RDMA_READ)
+       if (wqe->wr.opcode == IB_WR_RDMA_READ ||
+           wqe->wr.opcode == IB_WR_TID_RDMA_READ)
                ibp->rvp.n_rc_resends++;
        else
                ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
index da8b63e..f767c5c 100644 (file)
@@ -1622,6 +1622,27 @@ u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
        return dd->verbs_dev.n_tidwait;
 }
 
+static struct tid_rdma_flow *find_flow_ib(struct tid_rdma_request *req,
+                                         u32 psn, u16 *fidx)
+{
+       u16 head, tail;
+       struct tid_rdma_flow *flow;
+
+       head = req->setup_head;
+       tail = req->clear_tail;
+       for ( ; CIRC_CNT(head, tail, MAX_FLOWS);
+            tail = CIRC_NEXT(tail, MAX_FLOWS)) {
+               flow = &req->flows[tail];
+               if (cmp_psn(psn, flow->flow_state.ib_spsn) >= 0 &&
+                   cmp_psn(psn, flow->flow_state.ib_lpsn) <= 0) {
+                       if (fidx)
+                               *fidx = tail;
+                       return flow;
+               }
+       }
+       return NULL;
+}
+
 static struct tid_rdma_flow *
 __find_flow_ranged(struct tid_rdma_request *req, u16 head, u16 tail,
                   u32 psn, u16 *fidx)
@@ -2714,3 +2735,64 @@ rcu_unlock:
 drop:
        return ret;
 }
+
+/*
+ * "Rewind" the TID request information.
+ * This means that we reset the state back to ACTIVE,
+ * find the proper flow, set the flow index to that flow,
+ * and reset the flow information.
+ */
+void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
+                              u32 *bth2)
+{
+       struct tid_rdma_request *req = wqe_to_tid_req(wqe);
+       struct tid_rdma_flow *flow;
+       int diff;
+       u32 tididx = 0;
+       u16 fidx;
+
+       if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
+               *bth2 = mask_psn(qp->s_psn);
+               flow = find_flow_ib(req, *bth2, &fidx);
+               if (!flow)
+                       return;
+       } else {
+               return;
+       }
+
+       diff = delta_psn(*bth2, flow->flow_state.ib_spsn);
+
+       flow->sent = 0;
+       flow->pkt = 0;
+       flow->tid_idx = 0;
+       flow->tid_offset = 0;
+       if (diff) {
+               for (tididx = 0; tididx < flow->tidcnt; tididx++) {
+                       u32 tidentry = flow->tid_entry[tididx], tidlen,
+                               tidnpkts, npkts;
+
+                       flow->tid_offset = 0;
+                       tidlen = EXP_TID_GET(tidentry, LEN) * PAGE_SIZE;
+                       tidnpkts = rvt_div_round_up_mtu(qp, tidlen);
+                       npkts = min_t(u32, diff, tidnpkts);
+                       flow->pkt += npkts;
+                       flow->sent += (npkts == tidnpkts ? tidlen :
+                                      npkts * qp->pmtu);
+                       flow->tid_offset += npkts * qp->pmtu;
+                       diff -= npkts;
+                       if (!diff)
+                               break;
+               }
+       }
+
+       if (flow->tid_offset ==
+           EXP_TID_GET(flow->tid_entry[tididx], LEN) * PAGE_SIZE) {
+               tididx++;
+               flow->tid_offset = 0;
+       }
+       flow->tid_idx = tididx;
+       /* Move flow_idx to correct index */
+       req->flow_idx = fidx;
+
+       req->state = TID_REQUEST_ACTIVE;
+}
index d428236..beb5982 100644 (file)
@@ -207,5 +207,7 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet);
 bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
                              struct hfi1_pportdata *ppd,
                              struct hfi1_packet *packet);
+void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
+                              u32 *bth2);
 
 #endif /* HFI1_TID_RDMA_H */