IB/hfi1: Add interlock between a TID RDMA request and other requests
authorKaike Wan <kaike.wan@intel.com>
Thu, 24 Jan 2019 14:36:48 +0000 (06:36 -0800)
committerDoug Ledford <dledford@redhat.com>
Tue, 5 Feb 2019 22:53:55 +0000 (17:53 -0500)
This locking mechanism is designed to provent vavious memory corruption
scenarios from occurring when requests are pipelined, especially when
RDMA READ/WRITE requests are interleaved with TID RDMA READ/WRITE
requests:
1. READ-AFTER-READ;
2. READ-AFTER-WRITE;
3. WRITE-AFTER-READ;
When memory corruption is likely, a request will be held back until
previous requests have been completed.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/tid_rdma.c
drivers/infiniband/hw/hfi1/tid_rdma.h
drivers/infiniband/hw/hfi1/verbs.h

index a5aacf8..349751c 100644 (file)
@@ -482,6 +482,15 @@ check_s_state:
                len = wqe->length;
                ss = &qp->s_sge;
                bth2 = mask_psn(qp->s_psn);
+
+               /*
+                * Interlock between various IB requests and TID RDMA
+                * if necessary.
+                */
+               if ((priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) ||
+                   hfi1_tid_rdma_wqe_interlock(qp, wqe))
+                       goto bail;
+
                switch (wqe->wr.opcode) {
                case IB_WR_SEND:
                case IB_WR_SEND_WITH_IMM:
@@ -1321,6 +1330,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
                qp->s_state = OP(SEND_LAST);
        }
 done:
+       priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
        qp->s_psn = psn;
        /*
         * Set RVT_S_WAIT_PSN as rc_complete() may start the timer
@@ -1540,6 +1550,8 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
                                  struct rvt_swqe *wqe,
                                  struct hfi1_ibport *ibp)
 {
+       struct hfi1_qp_priv *priv = qp->priv;
+
        lockdep_assert_held(&qp->s_lock);
        /*
         * Don't decrement refcount and don't generate a
@@ -1608,6 +1620,10 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
                        qp->s_draining = 0;
                wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
        }
+       if (priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) {
+               priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
+               hfi1_schedule_send(qp);
+       }
        return wqe;
 }
 
index f6d9e27..ccf15c9 100644 (file)
@@ -2829,3 +2829,40 @@ void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
                } while (!ret);
        }
 }
+
+bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+       struct rvt_swqe *prev;
+       struct hfi1_qp_priv *priv = qp->priv;
+       u32 s_prev;
+
+       s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
+       prev = rvt_get_swqe_ptr(qp, s_prev);
+
+       switch (wqe->wr.opcode) {
+       case IB_WR_SEND:
+       case IB_WR_SEND_WITH_IMM:
+       case IB_WR_SEND_WITH_INV:
+       case IB_WR_ATOMIC_CMP_AND_SWP:
+       case IB_WR_ATOMIC_FETCH_AND_ADD:
+       case IB_WR_RDMA_WRITE:
+       case IB_WR_RDMA_READ:
+               break;
+       case IB_WR_TID_RDMA_READ:
+               switch (prev->wr.opcode) {
+               case IB_WR_RDMA_READ:
+                       if (qp->s_acked != qp->s_cur)
+                               goto interlock;
+                       break;
+               default:
+                       break;
+               }
+       default:
+               break;
+       }
+       return false;
+
+interlock:
+       priv->s_flags |= HFI1_S_TID_WAIT_INTERLCK;
+       return true;
+}
index 4f85b7e..689a549 100644 (file)
 #define TID_RDMA_MAX_SEGMENT_SIZE       BIT(18)   /* 256 KiB (for now) */
 #define TID_RDMA_MAX_PAGES              (BIT(18) >> PAGE_SHIFT)
 
+/*
+ * Bit definitions for priv->s_flags.
+ * These bit flags overload the bit flags defined for the QP's s_flags.
+ * Due to the fact that these bit fields are used only for the QP priv
+ * s_flags, there are no collisions.
+ *
+ * HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
+ */
+#define HFI1_S_TID_WAIT_INTERLCK  BIT(5)
+
 struct tid_rdma_params {
        struct rcu_head rcu_head;
        u32 qp;
@@ -210,5 +220,6 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
 void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
                               u32 *bth2);
 void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp);
+bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe);
 
 #endif /* HFI1_TID_RDMA_H */
index 7642b59..841727a 100644 (file)
@@ -171,6 +171,9 @@ struct hfi1_qp_priv {
        u8 hdr_type; /* 9B or 16B */
        unsigned long tid_timer_timeout_jiffies;
 
+       /* variables for the TID RDMA SE state machine */
+       u32 s_flags;
+
        /* For TID RDMA READ */
        u32 tid_r_reqs;         /* Num of tid reads requested */
        u32 tid_r_comp;         /* Num of tid reads completed */