RDMA/hns: Add support for XRC on HIP09
authorWenpeng Liang <liangwenpeng@huawei.com>
Thu, 4 Mar 2021 02:55:58 +0000 (10:55 +0800)
committerJason Gunthorpe <jgg@nvidia.com>
Thu, 11 Mar 2021 23:51:27 +0000 (19:51 -0400)
The HIP09 supports XRC transport service, it greatly saves the number of
QPs required to connect all processes in a large cluster.

Link: https://lore.kernel.org/r/1614826558-35423-1-git-send-email-liweihang@huawei.com
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/hw/hns/hns_roce_alloc.c
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/hns/hns_roce_pd.c
drivers/infiniband/hw/hns/hns_roce_qp.c
drivers/infiniband/hw/hns/hns_roce_srq.c
include/uapi/rdma/hns-abi.h

index 4bcaaa0..5d389ed 100644 (file)
@@ -304,6 +304,9 @@ done:
 
 void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
 {
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+               hns_roce_cleanup_xrcd_table(hr_dev);
+
        if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
                hns_roce_cleanup_srq_table(hr_dev);
        hns_roce_cleanup_qp_table(hr_dev);
index 3d6b7a2..1a95fb7 100644 (file)
@@ -137,6 +137,7 @@ enum {
        SERV_TYPE_UC,
        SERV_TYPE_RD,
        SERV_TYPE_UD,
+       SERV_TYPE_XRC = 5,
 };
 
 enum hns_roce_qp_state {
@@ -168,6 +169,8 @@ enum hns_roce_event {
        HNS_ROCE_EVENT_TYPE_DB_OVERFLOW               = 0x12,
        HNS_ROCE_EVENT_TYPE_MB                        = 0x13,
        HNS_ROCE_EVENT_TYPE_FLR                       = 0x15,
+       HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION            = 0x16,
+       HNS_ROCE_EVENT_TYPE_INVALID_XRCETH            = 0x17,
 };
 
 #define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
@@ -179,6 +182,7 @@ enum {
        HNS_ROCE_CAP_FLAG_RECORD_DB             = BIT(3),
        HNS_ROCE_CAP_FLAG_SQ_RECORD_DB          = BIT(4),
        HNS_ROCE_CAP_FLAG_SRQ                   = BIT(5),
+       HNS_ROCE_CAP_FLAG_XRC                   = BIT(6),
        HNS_ROCE_CAP_FLAG_MW                    = BIT(7),
        HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
        HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL          = BIT(9),
@@ -244,6 +248,11 @@ struct hns_roce_pd {
        unsigned long           pdn;
 };
 
+struct hns_roce_xrcd {
+       struct ib_xrcd ibxrcd;
+       u32 xrcdn;
+};
+
 struct hns_roce_bitmap {
        /* Bitmap Traversal last a bit which is 1 */
        unsigned long           last;
@@ -467,6 +476,7 @@ struct hns_roce_srq {
        u32                     rsv_sge;
        int                     wqe_shift;
        u32                     cqn;
+       u32                     xrcdn;
        void __iomem            *db_reg_l;
 
        atomic_t                refcount;
@@ -640,6 +650,8 @@ struct hns_roce_qp {
                                         enum hns_roce_event event_type);
        unsigned long           qpn;
 
+       u32                     xrcdn;
+
        atomic_t                refcount;
        struct completion       free;
 
@@ -766,6 +778,8 @@ struct hns_roce_caps {
        int             reserved_uars;
        int             num_pds;
        int             reserved_pds;
+       u32             num_xrcds;
+       u32             reserved_xrcds;
        u32             mtt_entry_sz;
        u32             cqe_sz;
        u32             page_size_cap;
@@ -963,6 +977,7 @@ struct hns_roce_dev {
 
        struct hns_roce_cmdq    cmd;
        struct hns_roce_bitmap    pd_bitmap;
+       struct hns_roce_bitmap xrcd_bitmap;
        struct hns_roce_uar_table uar_table;
        struct hns_roce_mr_table  mr_table;
        struct hns_roce_cq_table  cq_table;
@@ -1004,6 +1019,11 @@ static inline struct hns_roce_pd *to_hr_pd(struct ib_pd *ibpd)
        return container_of(ibpd, struct hns_roce_pd, ibpd);
 }
 
+static inline struct hns_roce_xrcd *to_hr_xrcd(struct ib_xrcd *ibxrcd)
+{
+       return container_of(ibxrcd, struct hns_roce_xrcd, ibxrcd);
+}
+
 static inline struct hns_roce_ah *to_hr_ah(struct ib_ah *ibah)
 {
        return container_of(ibah, struct hns_roce_ah, ibah);
@@ -1136,6 +1156,7 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
 void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
 int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
 int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
+int hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev);
 
 void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev);
@@ -1143,6 +1164,7 @@ void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev);
+void hns_roce_cleanup_xrcd_table(struct hns_roce_dev *hr_dev);
 
 int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
 void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
@@ -1207,6 +1229,9 @@ int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
                        struct ib_udata *udata);
 int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
 
+int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
+int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
+
 struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
                                 struct ib_qp_init_attr *init_attr,
                                 struct ib_udata *udata);
index 18394e5..8f73d00 100644 (file)
@@ -1828,6 +1828,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
        caps->num_srqwqe_segs   = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
        caps->num_idx_segs      = HNS_ROCE_V2_MAX_IDX_SEGS;
        caps->num_pds           = HNS_ROCE_V2_MAX_PD_NUM;
+       caps->num_xrcds         = HNS_ROCE_V2_MAX_XRCD_NUM;
        caps->max_qp_init_rdma  = HNS_ROCE_V2_MAX_QP_INIT_RDMA;
        caps->max_qp_dest_rdma  = HNS_ROCE_V2_MAX_QP_DEST_RDMA;
        caps->max_sq_desc_sz    = HNS_ROCE_V2_MAX_SQ_DESC_SZ;
@@ -1845,6 +1846,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
        caps->page_size_cap     = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
        caps->reserved_lkey     = 0;
        caps->reserved_pds      = 0;
+       caps->reserved_xrcds    = HNS_ROCE_V2_RSV_XRCD_NUM;
        caps->reserved_mrws     = 1;
        caps->reserved_uars     = 0;
        caps->reserved_cqs      = 0;
@@ -1899,7 +1901,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
 
        caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
                       HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
-                      HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
+                      HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL | HNS_ROCE_CAP_FLAG_XRC;
 
        caps->num_qpc_timer       = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
        caps->qpc_timer_entry_sz  = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
@@ -2130,6 +2132,8 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
        caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
        caps->ceqe_size = HNS_ROCE_CEQE_SIZE;
        caps->aeqe_size = HNS_ROCE_AEQE_SIZE;
+       caps->num_xrcds = HNS_ROCE_V2_MAX_XRCD_NUM;
+       caps->reserved_xrcds = HNS_ROCE_V2_RSV_XRCD_NUM;
        caps->mtt_ba_pg_sz = 0;
        caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
        caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
@@ -3908,6 +3912,16 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
                       ilog2(hr_qp->rq.wqe_cnt));
 }
 
+static inline int get_cqn(struct ib_cq *ib_cq)
+{
+       return ib_cq ? to_hr_cq(ib_cq)->cqn : 0;
+}
+
+static inline int get_pdn(struct ib_pd *ib_pd)
+{
+       return ib_pd ? to_hr_pd(ib_pd)->pdn : 0;
+}
+
 static void modify_qp_reset_to_init(struct ib_qp *ibqp,
                                    const struct ib_qp_attr *attr,
                                    int attr_mask,
@@ -3924,13 +3938,13 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
         * 0 at the same time, else set them to 0x1.
         */
        roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
-                      V2_QPC_BYTE_4_TST_S, to_hr_qp_type(hr_qp->ibqp.qp_type));
+                      V2_QPC_BYTE_4_TST_S, to_hr_qp_type(ibqp->qp_type));
 
        roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
                       V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn);
 
        roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
-                      V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn);
+                      V2_QPC_BYTE_16_PD_S, get_pdn(ibqp->pd));
 
        roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M,
                       V2_QPC_BYTE_20_RQWS_S, ilog2(hr_qp->rq.max_gs));
@@ -3941,6 +3955,9 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
        roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
                       V2_QPC_BYTE_24_VLAN_ID_S, 0xfff);
 
+       if (ibqp->qp_type == IB_QPT_XRC_TGT)
+               context->qkey_xrcd = cpu_to_le32(hr_qp->xrcdn);
+
        if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
                roce_set_bit(context->byte_68_rq_db,
                             V2_QPC_BYTE_68_RQ_RECORD_EN_S, 1);
@@ -3955,19 +3972,20 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
                    (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0);
 
        roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
-                      V2_QPC_BYTE_80_RX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn);
+                      V2_QPC_BYTE_80_RX_CQN_S, get_cqn(ibqp->recv_cq));
+
        if (ibqp->srq) {
+               roce_set_bit(context->byte_76_srqn_op_en,
+                            V2_QPC_BYTE_76_SRQ_EN_S, 1);
                roce_set_field(context->byte_76_srqn_op_en,
                               V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S,
                               to_hr_srq(ibqp->srq)->srqn);
-               roce_set_bit(context->byte_76_srqn_op_en,
-                            V2_QPC_BYTE_76_SRQ_EN_S, 1);
        }
 
        roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
 
        roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
-                      V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn);
+                      V2_QPC_BYTE_252_TX_CQN_S, get_cqn(ibqp->send_cq));
 
        if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ)
                return;
@@ -3990,22 +4008,28 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
         * 0 at the same time, else set them to 0x1.
         */
        roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
-                      V2_QPC_BYTE_4_TST_S, to_hr_qp_type(hr_qp->ibqp.qp_type));
+                      V2_QPC_BYTE_4_TST_S, to_hr_qp_type(ibqp->qp_type));
        roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
                       V2_QPC_BYTE_4_TST_S, 0);
 
+       if (ibqp->qp_type == IB_QPT_XRC_TGT) {
+               context->qkey_xrcd = cpu_to_le32(hr_qp->xrcdn);
+               qpc_mask->qkey_xrcd = 0;
+       }
+
        roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
-                      V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn);
+                      V2_QPC_BYTE_16_PD_S, get_pdn(ibqp->pd));
+
        roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
                       V2_QPC_BYTE_16_PD_S, 0);
 
        roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
-                      V2_QPC_BYTE_80_RX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn);
+                      V2_QPC_BYTE_80_RX_CQN_S, get_cqn(ibqp->recv_cq));
        roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
                       V2_QPC_BYTE_80_RX_CQN_S, 0);
 
        roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
-                      V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn);
+                      V2_QPC_BYTE_252_TX_CQN_S, get_cqn(ibqp->send_cq));
        roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
                       V2_QPC_BYTE_252_TX_CQN_S, 0);
 
@@ -4684,7 +4708,6 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
                               V2_QPC_BYTE_244_RNR_CNT_S, 0);
        }
 
-       /* RC&UC&UD required attr */
        if (attr_mask & IB_QP_SQ_PSN) {
                roce_set_field(context->byte_172_sq_psn,
                               V2_QPC_BYTE_172_SQ_CUR_PSN_M,
@@ -4762,7 +4785,6 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
                               V2_QPC_BYTE_80_MIN_RNR_TIME_S, 0);
        }
 
-       /* RC&UC required attr */
        if (attr_mask & IB_QP_RQ_PSN) {
                roce_set_field(context->byte_108_rx_reqepsn,
                               V2_QPC_BYTE_108_RX_REQ_EPSN_M,
@@ -4805,6 +4827,29 @@ static void hns_roce_v2_record_opt_fields(struct ib_qp *ibqp,
        }
 }
 
+static void clear_qp(struct hns_roce_qp *hr_qp)
+{
+       struct ib_qp *ibqp = &hr_qp->ibqp;
+
+       if (ibqp->send_cq)
+               hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq),
+                                    hr_qp->qpn, NULL);
+
+       if (ibqp->recv_cq  && ibqp->recv_cq != ibqp->send_cq)
+               hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq),
+                                    hr_qp->qpn, ibqp->srq ?
+                                    to_hr_srq(ibqp->srq) : NULL);
+
+       if (hr_qp->rq.wqe_cnt)
+               *hr_qp->rdb.db_record = 0;
+
+       hr_qp->rq.head = 0;
+       hr_qp->rq.tail = 0;
+       hr_qp->sq.head = 0;
+       hr_qp->sq.tail = 0;
+       hr_qp->next_sge = 0;
+}
+
 static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
                                 const struct ib_qp_attr *attr,
                                 int attr_mask, enum ib_qp_state cur_state,
@@ -4839,19 +4884,23 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 
        /* When QP state is err, SQ and RQ WQE should be flushed */
        if (new_state == IB_QPS_ERR) {
-               spin_lock_irqsave(&hr_qp->sq.lock, sq_flag);
-               hr_qp->state = IB_QPS_ERR;
-               roce_set_field(context->byte_160_sq_ci_pi,
-                              V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
-                              V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
-                              hr_qp->sq.head);
-               roce_set_field(qpc_mask->byte_160_sq_ci_pi,
-                              V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
-                              V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
-               spin_unlock_irqrestore(&hr_qp->sq.lock, sq_flag);
-
-               if (!ibqp->srq) {
+               if (ibqp->qp_type != IB_QPT_XRC_TGT) {
+                       spin_lock_irqsave(&hr_qp->sq.lock, sq_flag);
+                       hr_qp->state = IB_QPS_ERR;
+                       roce_set_field(context->byte_160_sq_ci_pi,
+                                      V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+                                      V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
+                                      hr_qp->sq.head);
+                       roce_set_field(qpc_mask->byte_160_sq_ci_pi,
+                                      V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+                                      V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
+                       spin_unlock_irqrestore(&hr_qp->sq.lock, sq_flag);
+               }
+
+               if (!ibqp->srq && ibqp->qp_type != IB_QPT_XRC_INI &&
+                   ibqp->qp_type != IB_QPT_XRC_TGT) {
                        spin_lock_irqsave(&hr_qp->rq.lock, rq_flag);
+                       hr_qp->state = IB_QPS_ERR;
                        roce_set_field(context->byte_84_rq_ci_pi,
                               V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
                               V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
@@ -4870,7 +4919,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
                goto out;
 
        roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S,
-                    ibqp->srq ? 1 : 0);
+                    ((to_hr_qp_type(hr_qp->ibqp.qp_type) == SERV_TYPE_XRC) ||
+                    ibqp->srq) ? 1 : 0);
        roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
                     V2_QPC_BYTE_108_INV_CREDIT_S, 0);
 
@@ -4891,21 +4941,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 
        hns_roce_v2_record_opt_fields(ibqp, attr, attr_mask);
 
-       if (new_state == IB_QPS_RESET && !ibqp->uobject) {
-               hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
-                                    ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
-               if (ibqp->send_cq != ibqp->recv_cq)
-                       hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq),
-                                            hr_qp->qpn, NULL);
-
-               hr_qp->rq.head = 0;
-               hr_qp->rq.tail = 0;
-               hr_qp->sq.head = 0;
-               hr_qp->sq.tail = 0;
-               hr_qp->next_sge = 0;
-               if (hr_qp->rq.wqe_cnt)
-                       *hr_qp->rdb.db_record = 0;
-       }
+       if (new_state == IB_QPS_RESET && !ibqp->uobject)
+               clear_qp(hr_qp);
 
 out:
        return ret;
@@ -5016,7 +5053,9 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
                                    V2_QPC_BYTE_76_ATE_S)) << V2_QP_ATE_S);
 
        if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
-           hr_qp->ibqp.qp_type == IB_QPT_UC) {
+           hr_qp->ibqp.qp_type == IB_QPT_UC ||
+           hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+           hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT) {
                struct ib_global_route *grh =
                                rdma_ah_retrieve_grh(&qp_attr->ah_attr);
 
@@ -5048,6 +5087,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
        qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context.byte_140_raq,
                                                     V2_QPC_BYTE_140_RR_MAX_M,
                                                     V2_QPC_BYTE_140_RR_MAX_S);
+
        qp_attr->min_rnr_timer = (u8)roce_get_field(context.byte_80_rnr_rx_cqn,
                                                 V2_QPC_BYTE_80_MIN_RNR_TIME_M,
                                                 V2_QPC_BYTE_80_MIN_RNR_TIME_S);
@@ -5082,6 +5122,15 @@ out:
        return ret;
 }
 
+static inline int modify_qp_is_ok(struct hns_roce_qp *hr_qp)
+{
+       return ((hr_qp->ibqp.qp_type == IB_QPT_RC ||
+                hr_qp->ibqp.qp_type == IB_QPT_UD ||
+                hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+                hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT) &&
+               hr_qp->state != IB_QPS_RESET);
+}
+
 static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
                                         struct hns_roce_qp *hr_qp,
                                         struct ib_udata *udata)
@@ -5091,9 +5140,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
        unsigned long flags;
        int ret = 0;
 
-       if ((hr_qp->ibqp.qp_type == IB_QPT_RC ||
-            hr_qp->ibqp.qp_type == IB_QPT_UD) &&
-          hr_qp->state != IB_QPS_RESET) {
+       if (modify_qp_is_ok(hr_qp)) {
                /* Modify qp to reset before destroying qp */
                ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
                                            hr_qp->state, IB_QPS_RESET);
@@ -5274,7 +5321,7 @@ static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf)
        hr_reg_write(ctx, SRQC_SRQ_ST, 1);
        hr_reg_write(ctx, SRQC_PD, to_hr_pd(srq->ibsrq.pd)->pdn);
        hr_reg_write(ctx, SRQC_SRQN, srq->srqn);
-       hr_reg_write(ctx, SRQC_XRCD, 0);
+       hr_reg_write(ctx, SRQC_XRCD, srq->xrcdn);
        hr_reg_write(ctx, SRQC_XRC_CQN, srq->cqn);
        hr_reg_write(ctx, SRQC_SHIFT, ilog2(srq->wqe_cnt));
        hr_reg_write(ctx, SRQC_RQWS,
@@ -5478,6 +5525,12 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
        case HNS_ROCE_EVENT_TYPE_FLR:
                ibdev_warn(ibdev, "Function level reset.\n");
                break;
+       case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+               ibdev_err(ibdev, "xrc domain violation error.\n");
+               break;
+       case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+               ibdev_err(ibdev, "invalid xrceth error.\n");
+               break;
        default:
                break;
        }
@@ -5578,6 +5631,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
                case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
                case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
                case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+               case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+               case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
                        hns_roce_qp_event(hr_dev, queue_num, event_type);
                        break;
                case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
index 39621fb..ffdae15 100644 (file)
@@ -74,6 +74,8 @@
 #define HNS_ROCE_V2_MAX_SRQWQE_SEGS            0x1000000
 #define HNS_ROCE_V2_MAX_IDX_SEGS               0x1000000
 #define HNS_ROCE_V2_MAX_PD_NUM                 0x1000000
+#define HNS_ROCE_V2_MAX_XRCD_NUM               0x1000000
+#define HNS_ROCE_V2_RSV_XRCD_NUM               0
 #define HNS_ROCE_V2_MAX_QP_INIT_RDMA           128
 #define HNS_ROCE_V2_MAX_QP_DEST_RDMA           128
 #define HNS_ROCE_V2_MAX_SQ_DESC_SZ             64
index c9c0836..1a747f7 100644 (file)
@@ -207,6 +207,9 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
                props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA;
        }
 
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+               props->device_cap_flags |= IB_DEVICE_XRC;
+
        return 0;
 }
 
@@ -300,6 +303,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
                return -EAGAIN;
 
        resp.qp_tab_size = hr_dev->caps.num_qps;
+       resp.srq_tab_size = hr_dev->caps.num_srqs;
 
        ret = hns_roce_uar_alloc(hr_dev, &context->uar);
        if (ret)
@@ -461,6 +465,13 @@ static const struct ib_device_ops hns_roce_dev_srq_ops = {
        INIT_RDMA_OBJ_SIZE(ib_srq, hns_roce_srq, ibsrq),
 };
 
+static const struct ib_device_ops hns_roce_dev_xrcd_ops = {
+       .alloc_xrcd = hns_roce_alloc_xrcd,
+       .dealloc_xrcd = hns_roce_dealloc_xrcd,
+
+       INIT_RDMA_OBJ_SIZE(ib_xrcd, hns_roce_xrcd, ibxrcd),
+};
+
 static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
 {
        int ret;
@@ -484,20 +495,20 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
        if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR)
                ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops);
 
-       /* MW */
        if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW)
                ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops);
 
-       /* FRMR */
        if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR)
                ib_set_device_ops(ib_dev, &hns_roce_dev_frmr_ops);
 
-       /* SRQ */
        if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
                ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops);
                ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops);
        }
 
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+               ib_set_device_ops(ib_dev, &hns_roce_dev_xrcd_ops);
+
        ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
        ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
        for (i = 0; i < hr_dev->caps.num_ports; i++) {
@@ -727,10 +738,19 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
                goto err_uar_alloc_free;
        }
 
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC) {
+               ret = hns_roce_init_xrcd_table(hr_dev);
+               if (ret) {
+                       dev_err(dev, "failed to init xrcd table, ret = %d.\n",
+                               ret);
+                       goto err_pd_table_free;
+               }
+       }
+
        ret = hns_roce_init_mr_table(hr_dev);
        if (ret) {
                dev_err(dev, "Failed to init memory region table.\n");
-               goto err_pd_table_free;
+               goto err_xrcd_table_free;
        }
 
        hns_roce_init_cq_table(hr_dev);
@@ -759,6 +779,10 @@ err_cq_table_free:
        hns_roce_cleanup_cq_table(hr_dev);
        hns_roce_cleanup_mr_table(hr_dev);
 
+err_xrcd_table_free:
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+               hns_roce_cleanup_xrcd_table(hr_dev);
+
 err_pd_table_free:
        hns_roce_cleanup_pd_table(hr_dev);
 
index cca818d..3ca51ce 100644 (file)
@@ -137,3 +137,54 @@ void hns_roce_cleanup_uar_table(struct hns_roce_dev *hr_dev)
 {
        hns_roce_bitmap_cleanup(&hr_dev->uar_table.bitmap);
 }
+
+static int hns_roce_xrcd_alloc(struct hns_roce_dev *hr_dev, u32 *xrcdn)
+{
+       return hns_roce_bitmap_alloc(&hr_dev->xrcd_bitmap,
+                                    (unsigned long *)xrcdn);
+}
+
+static void hns_roce_xrcd_free(struct hns_roce_dev *hr_dev,
+                              u32 xrcdn)
+{
+       hns_roce_bitmap_free(&hr_dev->xrcd_bitmap, xrcdn, BITMAP_NO_RR);
+}
+
+int hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev)
+{
+       return hns_roce_bitmap_init(&hr_dev->xrcd_bitmap,
+                                   hr_dev->caps.num_xrcds,
+                                   hr_dev->caps.num_xrcds - 1,
+                                   hr_dev->caps.reserved_xrcds, 0);
+}
+
+void hns_roce_cleanup_xrcd_table(struct hns_roce_dev *hr_dev)
+{
+       hns_roce_bitmap_cleanup(&hr_dev->xrcd_bitmap);
+}
+
+int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(ib_xrcd->device);
+       struct hns_roce_xrcd *xrcd = to_hr_xrcd(ib_xrcd);
+       int ret;
+
+       if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
+               return -EINVAL;
+
+       ret = hns_roce_xrcd_alloc(hr_dev, &xrcd->xrcdn);
+       if (ret) {
+               dev_err(hr_dev->dev, "failed to alloc xrcdn, ret = %d.\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
+{
+       hns_roce_xrcd_free(to_hr_dev(ib_xrcd->device),
+                          to_hr_xrcd(ib_xrcd)->xrcdn);
+
+       return 0;
+}
index 0d514fa..6203c17 100644 (file)
@@ -98,7 +98,9 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
        if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 &&
            (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR ||
             event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR ||
-            event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR)) {
+            event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR ||
+            event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION ||
+            event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH)) {
                qp->state = IB_QPS_ERR;
                if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag))
                        init_flush_work(hr_dev, qp);
@@ -142,6 +144,8 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
                        event.event = IB_EVENT_QP_REQ_ERR;
                        break;
                case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+               case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+               case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
                        event.event = IB_EVENT_QP_ACCESS_ERR;
                        break;
                default:
@@ -366,8 +370,13 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
        unsigned long flags;
 
        list_del(&hr_qp->node);
-       list_del(&hr_qp->sq_node);
-       list_del(&hr_qp->rq_node);
+
+       if (hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
+               list_del(&hr_qp->sq_node);
+
+       if (hr_qp->ibqp.qp_type != IB_QPT_XRC_INI &&
+           hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
+               list_del(&hr_qp->rq_node);
 
        xa_lock_irqsave(xa, flags);
        __xa_erase(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1));
@@ -1105,11 +1114,16 @@ static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
                         bool is_user)
 {
        switch (type) {
+       case IB_QPT_XRC_INI:
+       case IB_QPT_XRC_TGT:
+               if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
+                       goto out;
+               break;
        case IB_QPT_UD:
                if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 &&
                    is_user)
                        goto out;
-               fallthrough;
+               break;
        case IB_QPT_RC:
        case IB_QPT_GSI:
                break;
@@ -1129,8 +1143,8 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
                                 struct ib_qp_init_attr *init_attr,
                                 struct ib_udata *udata)
 {
-       struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
-       struct ib_device *ibdev = &hr_dev->ib_dev;
+       struct ib_device *ibdev = pd ? pd->device : init_attr->xrcd->device;
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
        struct hns_roce_qp *hr_qp;
        int ret;
 
@@ -1142,6 +1156,15 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
        if (!hr_qp)
                return ERR_PTR(-ENOMEM);
 
+       if (init_attr->qp_type == IB_QPT_XRC_INI)
+               init_attr->recv_cq = NULL;
+
+       if (init_attr->qp_type == IB_QPT_XRC_TGT) {
+               hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn;
+               init_attr->recv_cq = NULL;
+               init_attr->send_cq = NULL;
+       }
+
        if (init_attr->qp_type == IB_QPT_GSI) {
                hr_qp->port = init_attr->port_num - 1;
                hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
@@ -1161,20 +1184,20 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
 
 int to_hr_qp_type(int qp_type)
 {
-       int transport_type;
-
-       if (qp_type == IB_QPT_RC)
-               transport_type = SERV_TYPE_RC;
-       else if (qp_type == IB_QPT_UC)
-               transport_type = SERV_TYPE_UC;
-       else if (qp_type == IB_QPT_UD)
-               transport_type = SERV_TYPE_UD;
-       else if (qp_type == IB_QPT_GSI)
-               transport_type = SERV_TYPE_UD;
-       else
-               transport_type = -1;
-
-       return transport_type;
+       switch (qp_type) {
+       case IB_QPT_RC:
+               return SERV_TYPE_RC;
+       case IB_QPT_UC:
+               return SERV_TYPE_UC;
+       case IB_QPT_UD:
+       case IB_QPT_GSI:
+               return SERV_TYPE_UD;
+       case IB_QPT_XRC_INI:
+       case IB_QPT_XRC_TGT:
+               return SERV_TYPE_XRC;
+       default:
+               return -1;
+       }
 }
 
 static int check_mtu_validate(struct hns_roce_dev *hr_dev,
index d5a6de0..f71459a 100644 (file)
@@ -314,6 +314,9 @@ static void set_srq_ext_param(struct hns_roce_srq *srq,
 {
        srq->cqn = ib_srq_has_cq(init_attr->srq_type) ?
                   to_hr_cq(init_attr->ext.cq)->cqn : 0;
+
+       srq->xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
+                    to_hr_xrcd(init_attr->ext.xrc.xrcd)->xrcdn : 0;
 }
 
 static int set_srq_param(struct hns_roce_srq *srq,
index 90b739d..42b1776 100644 (file)
@@ -86,6 +86,8 @@ struct hns_roce_ib_create_qp_resp {
 struct hns_roce_ib_alloc_ucontext_resp {
        __u32   qp_tab_size;
        __u32   cqe_size;
+       __u32   srq_tab_size;
+       __u32   reserved;
 };
 
 struct hns_roce_ib_alloc_pd_resp {