Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Apr 2020 01:18:18 +0000 (18:18 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Apr 2020 01:18:18 +0000 (18:18 -0700)
Pull rdma updates from Jason Gunthorpe:
 "The majority of the patches are cleanups, refactorings and clarity
  improvements.

  This cycle saw some more activity from Syzkaller, I think we are now
  clean on all but one of those bugs, including the long standing and
  obnoxious rdma_cm locking design defect. Continue to see many drivers
  getting cleanups, with a few new user visible features.

  Summary:

   - Various driver updates for siw, bnxt_re, rxe, efa, mlx5, hfi1

   - Lots of cleanup patches for hns

   - Convert more places to use refcount

   - Aggressively lock the RDMA CM code that syzkaller says isn't
     working

   - Work to clarify ib_cm

   - Use the new ib_device lifecycle model in bnxt_re

   - Fix mlx5's MR cache which seems to be failing more often with the
     new ODP code

   - mlx5 'dynamic uar' and 'tx steering' user interfaces"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (144 commits)
  RDMA/bnxt_re: make bnxt_re_ib_init static
  IB/qib: Delete struct qib_ivdev.qp_rnd
  RDMA/hns: Fix uninitialized variable bug
  RDMA/hns: Modify the mask of QP number for CQE of hip08
  RDMA/hns: Reduce the maximum number of extend SGE per WQE
  RDMA/hns: Reduce PFC frames in congestion scenarios
  RDMA/mlx5: Add support for RDMA TX flow table
  net/mlx5: Add support for RDMA TX steering
  IB/hfi1: Call kobject_put() when kobject_init_and_add() fails
  IB/hfi1: Fix memory leaks in sysfs registration and unregistration
  IB/mlx5: Move to fully dynamic UAR mode once user space supports it
  IB/mlx5: Limit the scope of struct mlx5_bfreg_info to mlx5_ib
  IB/mlx5: Extend QP creation to get uar page index from user space
  IB/mlx5: Extend CQ creation to get uar page index from user space
  IB/mlx5: Expose UAR object and its alloc/destroy commands
  IB/hfi1: Get rid of a warning
  RDMA/hns: Remove redundant judgment of qp_type
  RDMA/hns: Remove redundant assignment of wc->smac when polling cq
  RDMA/hns: Remove redundant qpc setup operations
  RDMA/hns: Remove meaningless prints
  ...

120 files changed:
drivers/infiniband/core/cache.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/cma_configfs.c
drivers/infiniband/core/cma_priv.h
drivers/infiniband/core/mad_priv.h
drivers/infiniband/core/multicast.c
drivers/infiniband/core/rw.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/ucma.c
drivers/infiniband/core/umem.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/bnxt_re/bnxt_re.h
drivers/infiniband/hw/bnxt_re/ib_verbs.c
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/bnxt_re/qplib_fp.c
drivers/infiniband/hw/bnxt_re/qplib_fp.h
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
drivers/infiniband/hw/bnxt_re/qplib_res.c
drivers/infiniband/hw/bnxt_re/qplib_res.h
drivers/infiniband/hw/bnxt_re/qplib_sp.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
drivers/infiniband/hw/efa/efa_admin_defs.h
drivers/infiniband/hw/efa/efa_com.c
drivers/infiniband/hw/efa/efa_com_cmd.c
drivers/infiniband/hw/efa/efa_common_defs.h
drivers/infiniband/hw/efa/efa_regs_defs.h
drivers/infiniband/hw/efa/efa_verbs.c
drivers/infiniband/hw/hfi1/fault.c
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/mad.c
drivers/infiniband/hw/hfi1/mad.h
drivers/infiniband/hw/hfi1/pio.h
drivers/infiniband/hw/hfi1/sdma.c
drivers/infiniband/hw/hfi1/sdma.h
drivers/infiniband/hw/hfi1/sysfs.c
drivers/infiniband/hw/hfi1/user_exp_rcv.h
drivers/infiniband/hw/hns/hns_roce_cq.c
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_hem.c
drivers/infiniband/hw/hns/hns_roce_hw_v1.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
drivers/infiniband/hw/hns/hns_roce_mr.c
drivers/infiniband/hw/hns/hns_roce_pd.c
drivers/infiniband/hw/hns/hns_roce_qp.c
drivers/infiniband/hw/hns/hns_roce_srq.c
drivers/infiniband/hw/i40iw/i40iw.h
drivers/infiniband/hw/i40iw/i40iw_cm.h
drivers/infiniband/hw/i40iw/i40iw_ctrl.c
drivers/infiniband/hw/i40iw/i40iw_d.h
drivers/infiniband/hw/i40iw/i40iw_main.c
drivers/infiniband/hw/i40iw/i40iw_p.h
drivers/infiniband/hw/i40iw/i40iw_status.h
drivers/infiniband/hw/i40iw/i40iw_type.h
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx5/Makefile
drivers/infiniband/hw/mlx5/cong.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/flow.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mem.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qos.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mthca/mthca_memfree.c
drivers/infiniband/hw/mthca/mthca_memfree.h
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/qedr/verbs.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
drivers/infiniband/hw/usnic/usnic_uiom.h
drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
drivers/infiniband/sw/rdmavt/qp.c
drivers/infiniband/sw/rdmavt/vt.c
drivers/infiniband/sw/rxe/rxe.c
drivers/infiniband/sw/rxe/rxe_qp.c
drivers/infiniband/sw/rxe/rxe_queue.h
drivers/infiniband/sw/siw/siw_cm.c
drivers/infiniband/sw/siw/siw_qp_rx.c
drivers/infiniband/sw/siw/siw_verbs.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
drivers/infiniband/ulp/srp/ib_srp.h
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/fs.h
include/linux/mlx5/mlx5_ifc.h
include/rdma/ib_cache.h
include/rdma/ib_cm.h
include/rdma/ib_fmr_pool.h
include/rdma/ib_verbs.h
include/rdma/opa_vnic.h
include/rdma/rdmavt_mr.h
include/rdma/rdmavt_qp.h
include/rdma/uverbs_ioctl.h
include/uapi/rdma/mlx5-abi.h
include/uapi/rdma/mlx5_user_ioctl_cmds.h
include/uapi/rdma/mlx5_user_ioctl_verbs.h

index 17bfedd..717b798 100644 (file)
@@ -46,7 +46,7 @@
 
 struct ib_pkey_cache {
        int             table_len;
-       u16             table[0];
+       u16             table[];
 };
 
 struct ib_update_work {
@@ -972,6 +972,23 @@ done:
 }
 EXPORT_SYMBOL(rdma_query_gid);
 
+/**
+ * rdma_read_gid_hw_context - Read the HW GID context from GID attribute
+ * @attr:              Potinter to the GID attribute
+ *
+ * rdma_read_gid_hw_context() reads the drivers GID HW context corresponding
+ * to the SGID attr. Callers are required to already be holding the reference
+ * to an existing GID entry.
+ *
+ * Returns the HW GID context
+ *
+ */
+void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr)
+{
+       return container_of(attr, struct ib_gid_table_entry, attr)->context;
+}
+EXPORT_SYMBOL(rdma_read_gid_hw_context);
+
 /**
  * rdma_find_gid - Returns SGID attributes if the matching GID is found.
  * @device: The device to query.
index 15e99a8..4794113 100644 (file)
@@ -80,8 +80,19 @@ const char *__attribute_const__ ibcm_reject_msg(int reason)
 }
 EXPORT_SYMBOL(ibcm_reject_msg);
 
+struct cm_id_private;
 static void cm_add_one(struct ib_device *device);
 static void cm_remove_one(struct ib_device *device, void *client_data);
+static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
+                                  struct ib_cm_sidr_rep_param *param);
+static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
+                              const void *private_data, u8 private_data_len);
+static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
+                              void *private_data, u8 private_data_len);
+static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
+                             enum ib_cm_rej_reason reason, void *ari,
+                             u8 ari_length, const void *private_data,
+                             u8 private_data_len);
 
 static struct ib_client cm_client = {
        .name   = "cm",
@@ -197,7 +208,7 @@ struct cm_device {
        struct ib_device *ib_device;
        u8 ack_delay;
        int going_down;
-       struct cm_port *port[0];
+       struct cm_port *port[];
 };
 
 struct cm_av {
@@ -216,7 +227,7 @@ struct cm_work {
        __be32 local_id;                        /* Established / timewait */
        __be32 remote_id;
        struct ib_cm_event cm_event;
-       struct sa_path_rec path[0];
+       struct sa_path_rec path[];
 };
 
 struct cm_timewait_info {
@@ -261,7 +272,6 @@ struct cm_id_private {
        __be16 pkey;
        u8 private_data_len;
        u8 max_cm_retries;
-       u8 peer_to_peer;
        u8 responder_resources;
        u8 initiator_depth;
        u8 retry_count;
@@ -572,18 +582,6 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
        return 0;
 }
 
-static int cm_alloc_id(struct cm_id_private *cm_id_priv)
-{
-       int err;
-       u32 id;
-
-       err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv,
-                       xa_limit_32b, &cm.local_id_next, GFP_KERNEL);
-
-       cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
-       return err;
-}
-
 static u32 cm_local_id(__be32 local_id)
 {
        return (__force u32) (local_id ^ cm.random_id_operand);
@@ -633,22 +631,44 @@ static int be64_gt(__be64 a, __be64 b)
        return (__force u64) a > (__force u64) b;
 }
 
-static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
+/*
+ * Inserts a new cm_id_priv into the listen_service_table. Returns cm_id_priv
+ * if the new ID was inserted, NULL if it could not be inserted due to a
+ * collision, or the existing cm_id_priv ready for shared usage.
+ */
+static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv,
+                                             ib_cm_handler shared_handler)
 {
        struct rb_node **link = &cm.listen_service_table.rb_node;
        struct rb_node *parent = NULL;
        struct cm_id_private *cur_cm_id_priv;
        __be64 service_id = cm_id_priv->id.service_id;
        __be64 service_mask = cm_id_priv->id.service_mask;
+       unsigned long flags;
 
+       spin_lock_irqsave(&cm.lock, flags);
        while (*link) {
                parent = *link;
                cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
                                          service_node);
                if ((cur_cm_id_priv->id.service_mask & service_id) ==
                    (service_mask & cur_cm_id_priv->id.service_id) &&
-                   (cm_id_priv->id.device == cur_cm_id_priv->id.device))
+                   (cm_id_priv->id.device == cur_cm_id_priv->id.device)) {
+                       /*
+                        * Sharing an ib_cm_id with different handlers is not
+                        * supported
+                        */
+                       if (cur_cm_id_priv->id.cm_handler != shared_handler ||
+                           cur_cm_id_priv->id.context ||
+                           WARN_ON(!cur_cm_id_priv->id.cm_handler)) {
+                               spin_unlock_irqrestore(&cm.lock, flags);
+                               return NULL;
+                       }
+                       refcount_inc(&cur_cm_id_priv->refcount);
+                       cur_cm_id_priv->listen_sharecount++;
+                       spin_unlock_irqrestore(&cm.lock, flags);
                        return cur_cm_id_priv;
+               }
 
                if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
                        link = &(*link)->rb_left;
@@ -661,9 +681,11 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
                else
                        link = &(*link)->rb_right;
        }
+       cm_id_priv->listen_sharecount++;
        rb_link_node(&cm_id_priv->service_node, parent, link);
        rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
-       return NULL;
+       spin_unlock_irqrestore(&cm.lock, flags);
+       return cm_id_priv;
 }
 
 static struct cm_id_private * cm_find_listen(struct ib_device *device,
@@ -810,21 +832,12 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
        return NULL;
 }
 
-static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
-                              enum ib_cm_sidr_status status)
-{
-       struct ib_cm_sidr_rep_param param;
-
-       memset(&param, 0, sizeof param);
-       param.status = status;
-       ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
-}
-
-struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
-                                ib_cm_handler cm_handler,
-                                void *context)
+static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device,
+                                             ib_cm_handler cm_handler,
+                                             void *context)
 {
        struct cm_id_private *cm_id_priv;
+       u32 id;
        int ret;
 
        cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
@@ -836,10 +849,9 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
        cm_id_priv->id.cm_handler = cm_handler;
        cm_id_priv->id.context = context;
        cm_id_priv->id.remote_cm_qpn = 1;
-       ret = cm_alloc_id(cm_id_priv);
-       if (ret)
-               goto error;
 
+       RB_CLEAR_NODE(&cm_id_priv->service_node);
+       RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
        spin_lock_init(&cm_id_priv->lock);
        init_completion(&cm_id_priv->comp);
        INIT_LIST_HEAD(&cm_id_priv->work_list);
@@ -847,11 +859,42 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
        INIT_LIST_HEAD(&cm_id_priv->altr_list);
        atomic_set(&cm_id_priv->work_count, -1);
        refcount_set(&cm_id_priv->refcount, 1);
-       return &cm_id_priv->id;
+
+       ret = xa_alloc_cyclic_irq(&cm.local_id_table, &id, NULL, xa_limit_32b,
+                                 &cm.local_id_next, GFP_KERNEL);
+       if (ret)
+               goto error;
+       cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
+
+       return cm_id_priv;
 
 error:
        kfree(cm_id_priv);
-       return ERR_PTR(-ENOMEM);
+       return ERR_PTR(ret);
+}
+
+/*
+ * Make the ID visible to the MAD handlers and other threads that use the
+ * xarray.
+ */
+static void cm_finalize_id(struct cm_id_private *cm_id_priv)
+{
+       xa_store_irq(&cm.local_id_table, cm_local_id(cm_id_priv->id.local_id),
+                    cm_id_priv, GFP_KERNEL);
+}
+
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+                                ib_cm_handler cm_handler,
+                                void *context)
+{
+       struct cm_id_private *cm_id_priv;
+
+       cm_id_priv = cm_alloc_id_priv(device, cm_handler, context);
+       if (IS_ERR(cm_id_priv))
+               return ERR_CAST(cm_id_priv);
+
+       cm_finalize_id(cm_id_priv);
+       return &cm_id_priv->id;
 }
 EXPORT_SYMBOL(ib_create_cm_id);
 
@@ -932,6 +975,8 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
        unsigned long flags;
        struct cm_device *cm_dev;
 
+       lockdep_assert_held(&cm_id_priv->lock);
+
        cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
        if (!cm_dev)
                return;
@@ -963,6 +1008,8 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
 {
        unsigned long flags;
 
+       lockdep_assert_held(&cm_id_priv->lock);
+
        cm_id_priv->id.state = IB_CM_IDLE;
        if (cm_id_priv->timewait_info) {
                spin_lock_irqsave(&cm.lock, flags);
@@ -979,54 +1026,51 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
        struct cm_work *work;
 
        cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-retest:
        spin_lock_irq(&cm_id_priv->lock);
+retest:
        switch (cm_id->state) {
        case IB_CM_LISTEN:
-               spin_unlock_irq(&cm_id_priv->lock);
-
-               spin_lock_irq(&cm.lock);
+               spin_lock(&cm.lock);
                if (--cm_id_priv->listen_sharecount > 0) {
                        /* The id is still shared. */
+                       WARN_ON(refcount_read(&cm_id_priv->refcount) == 1);
+                       spin_unlock(&cm.lock);
+                       spin_unlock_irq(&cm_id_priv->lock);
                        cm_deref_id(cm_id_priv);
-                       spin_unlock_irq(&cm.lock);
                        return;
                }
+               cm_id->state = IB_CM_IDLE;
                rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
-               spin_unlock_irq(&cm.lock);
+               RB_CLEAR_NODE(&cm_id_priv->service_node);
+               spin_unlock(&cm.lock);
                break;
        case IB_CM_SIDR_REQ_SENT:
                cm_id->state = IB_CM_IDLE;
                ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
-               spin_unlock_irq(&cm_id_priv->lock);
                break;
        case IB_CM_SIDR_REQ_RCVD:
-               spin_unlock_irq(&cm_id_priv->lock);
-               cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
-               spin_lock_irq(&cm.lock);
-               if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
-                       rb_erase(&cm_id_priv->sidr_id_node,
-                                &cm.remote_sidr_table);
-               spin_unlock_irq(&cm.lock);
+               cm_send_sidr_rep_locked(cm_id_priv,
+                                       &(struct ib_cm_sidr_rep_param){
+                                               .status = IB_SIDR_REJECT });
+               /* cm_send_sidr_rep_locked will not move to IDLE if it fails */
+               cm_id->state = IB_CM_IDLE;
                break;
        case IB_CM_REQ_SENT:
        case IB_CM_MRA_REQ_RCVD:
                ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
-               spin_unlock_irq(&cm_id_priv->lock);
-               ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
-                              &cm_id_priv->id.device->node_guid,
-                              sizeof cm_id_priv->id.device->node_guid,
-                              NULL, 0);
+               cm_send_rej_locked(cm_id_priv, IB_CM_REJ_TIMEOUT,
+                                  &cm_id_priv->id.device->node_guid,
+                                  sizeof(cm_id_priv->id.device->node_guid),
+                                  NULL, 0);
                break;
        case IB_CM_REQ_RCVD:
                if (err == -ENOMEM) {
                        /* Do not reject to allow future retries. */
                        cm_reset_to_idle(cm_id_priv);
-                       spin_unlock_irq(&cm_id_priv->lock);
                } else {
-                       spin_unlock_irq(&cm_id_priv->lock);
-                       ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
-                                      NULL, 0, NULL, 0);
+                       cm_send_rej_locked(cm_id_priv,
+                                          IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
+                                          NULL, 0);
                }
                break;
        case IB_CM_REP_SENT:
@@ -1036,38 +1080,56 @@ retest:
        case IB_CM_MRA_REQ_SENT:
        case IB_CM_REP_RCVD:
        case IB_CM_MRA_REP_SENT:
-               spin_unlock_irq(&cm_id_priv->lock);
-               ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
-                              NULL, 0, NULL, 0);
+               cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
+                                  0, NULL, 0);
                break;
        case IB_CM_ESTABLISHED:
-               spin_unlock_irq(&cm_id_priv->lock);
-               if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
+               if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
+                       cm_id->state = IB_CM_IDLE;
                        break;
-               ib_send_cm_dreq(cm_id, NULL, 0);
+               }
+               cm_send_dreq_locked(cm_id_priv, NULL, 0);
                goto retest;
        case IB_CM_DREQ_SENT:
                ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
                cm_enter_timewait(cm_id_priv);
-               spin_unlock_irq(&cm_id_priv->lock);
-               break;
+               goto retest;
        case IB_CM_DREQ_RCVD:
-               spin_unlock_irq(&cm_id_priv->lock);
-               ib_send_cm_drep(cm_id, NULL, 0);
+               cm_send_drep_locked(cm_id_priv, NULL, 0);
+               WARN_ON(cm_id->state != IB_CM_TIMEWAIT);
+               goto retest;
+       case IB_CM_TIMEWAIT:
+               /*
+                * The cm_acquire_id in cm_timewait_handler will stop working
+                * once we do cm_free_id() below, so just move to idle here for
+                * consistency.
+                */
+               cm_id->state = IB_CM_IDLE;
                break;
-       default:
-               spin_unlock_irq(&cm_id_priv->lock);
+       case IB_CM_IDLE:
                break;
        }
+       WARN_ON(cm_id->state != IB_CM_IDLE);
 
-       spin_lock_irq(&cm.lock);
+       spin_lock(&cm.lock);
+       /* Required for cleanup paths related cm_req_handler() */
+       if (cm_id_priv->timewait_info) {
+               cm_cleanup_timewait(cm_id_priv->timewait_info);
+               kfree(cm_id_priv->timewait_info);
+               cm_id_priv->timewait_info = NULL;
+       }
        if (!list_empty(&cm_id_priv->altr_list) &&
            (!cm_id_priv->altr_send_port_not_ready))
                list_del(&cm_id_priv->altr_list);
        if (!list_empty(&cm_id_priv->prim_list) &&
            (!cm_id_priv->prim_send_port_not_ready))
                list_del(&cm_id_priv->prim_list);
-       spin_unlock_irq(&cm.lock);
+       WARN_ON(cm_id_priv->listen_sharecount);
+       WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node));
+       if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
+               rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
+       spin_unlock(&cm.lock);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        cm_free_id(cm_id->local_id);
        cm_deref_id(cm_id_priv);
@@ -1087,8 +1149,27 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id)
 }
 EXPORT_SYMBOL(ib_destroy_cm_id);
 
+static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id,
+                         __be64 service_mask)
+{
+       service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
+       service_id &= service_mask;
+       if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
+           (service_id != IB_CM_ASSIGN_SERVICE_ID))
+               return -EINVAL;
+
+       if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
+               cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++);
+               cm_id_priv->id.service_mask = ~cpu_to_be64(0);
+       } else {
+               cm_id_priv->id.service_id = service_id;
+               cm_id_priv->id.service_mask = service_mask;
+       }
+       return 0;
+}
+
 /**
- * __ib_cm_listen - Initiates listening on the specified service ID for
+ * ib_cm_listen - Initiates listening on the specified service ID for
  *   connection and service ID resolution requests.
  * @cm_id: Connection identifier associated with the listen request.
  * @service_id: Service identifier matched against incoming connection
@@ -1100,51 +1181,33 @@ EXPORT_SYMBOL(ib_destroy_cm_id);
  *   exactly.  This parameter is ignored if %service_id is set to
  *   IB_CM_ASSIGN_SERVICE_ID.
  */
-static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
-                         __be64 service_mask)
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
 {
-       struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
-       int ret = 0;
-
-       service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
-       service_id &= service_mask;
-       if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
-           (service_id != IB_CM_ASSIGN_SERVICE_ID))
-               return -EINVAL;
-
-       cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-       if (cm_id->state != IB_CM_IDLE)
-               return -EINVAL;
-
-       cm_id->state = IB_CM_LISTEN;
-       ++cm_id_priv->listen_sharecount;
+       struct cm_id_private *cm_id_priv =
+               container_of(cm_id, struct cm_id_private, id);
+       unsigned long flags;
+       int ret;
 
-       if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
-               cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
-               cm_id->service_mask = ~cpu_to_be64(0);
-       } else {
-               cm_id->service_id = service_id;
-               cm_id->service_mask = service_mask;
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       if (cm_id_priv->id.state != IB_CM_IDLE) {
+               ret = -EINVAL;
+               goto out;
        }
-       cur_cm_id_priv = cm_insert_listen(cm_id_priv);
 
-       if (cur_cm_id_priv) {
-               cm_id->state = IB_CM_IDLE;
-               --cm_id_priv->listen_sharecount;
+       ret = cm_init_listen(cm_id_priv, service_id, service_mask);
+       if (ret)
+               goto out;
+
+       if (!cm_insert_listen(cm_id_priv, NULL)) {
                ret = -EBUSY;
+               goto out;
        }
-       return ret;
-}
 
-int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
-{
-       unsigned long flags;
-       int ret;
-
-       spin_lock_irqsave(&cm.lock, flags);
-       ret = __ib_cm_listen(cm_id, service_id, service_mask);
-       spin_unlock_irqrestore(&cm.lock, flags);
+       cm_id_priv->id.state = IB_CM_LISTEN;
+       ret = 0;
 
+out:
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
        return ret;
 }
 EXPORT_SYMBOL(ib_cm_listen);
@@ -1169,51 +1232,38 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
                                     ib_cm_handler cm_handler,
                                     __be64 service_id)
 {
+       struct cm_id_private *listen_id_priv;
        struct cm_id_private *cm_id_priv;
-       struct ib_cm_id *cm_id;
-       unsigned long flags;
        int err = 0;
 
        /* Create an ID in advance, since the creation may sleep */
-       cm_id = ib_create_cm_id(device, cm_handler, NULL);
-       if (IS_ERR(cm_id))
-               return cm_id;
+       cm_id_priv = cm_alloc_id_priv(device, cm_handler, NULL);
+       if (IS_ERR(cm_id_priv))
+               return ERR_CAST(cm_id_priv);
 
-       spin_lock_irqsave(&cm.lock, flags);
-
-       if (service_id == IB_CM_ASSIGN_SERVICE_ID)
-               goto new_id;
+       err = cm_init_listen(cm_id_priv, service_id, 0);
+       if (err)
+               return ERR_PTR(err);
 
-       /* Find an existing ID */
-       cm_id_priv = cm_find_listen(device, service_id);
-       if (cm_id_priv) {
-               if (cm_id->cm_handler != cm_handler || cm_id->context) {
-                       /* Sharing an ib_cm_id with different handlers is not
-                        * supported */
-                       spin_unlock_irqrestore(&cm.lock, flags);
-                       ib_destroy_cm_id(cm_id);
+       spin_lock_irq(&cm_id_priv->lock);
+       listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler);
+       if (listen_id_priv != cm_id_priv) {
+               spin_unlock_irq(&cm_id_priv->lock);
+               ib_destroy_cm_id(&cm_id_priv->id);
+               if (!listen_id_priv)
                        return ERR_PTR(-EINVAL);
-               }
-               refcount_inc(&cm_id_priv->refcount);
-               ++cm_id_priv->listen_sharecount;
-               spin_unlock_irqrestore(&cm.lock, flags);
-
-               ib_destroy_cm_id(cm_id);
-               cm_id = &cm_id_priv->id;
-               return cm_id;
+               return &listen_id_priv->id;
        }
+       cm_id_priv->id.state = IB_CM_LISTEN;
+       spin_unlock_irq(&cm_id_priv->lock);
 
-new_id:
-       /* Use newly created ID */
-       err = __ib_cm_listen(cm_id, service_id, 0);
-
-       spin_unlock_irqrestore(&cm.lock, flags);
+       /*
+        * A listen ID does not need to be in the xarray since it does not
+        * receive mads, is not placed in the remote_id or remote_qpn rbtree,
+        * and does not enter timewait.
+        */
 
-       if (err) {
-               ib_destroy_cm_id(cm_id);
-               return ERR_PTR(err);
-       }
-       return cm_id;
+       return &cm_id_priv->id;
 }
 EXPORT_SYMBOL(ib_cm_insert_listen);
 
@@ -1381,10 +1431,6 @@ static void cm_format_req(struct cm_req_msg *req_msg,
 
 static int cm_validate_req_param(struct ib_cm_req_param *param)
 {
-       /* peer-to-peer not supported */
-       if (param->peer_to_peer)
-               return -EINVAL;
-
        if (!param->primary_path)
                return -EINVAL;
 
@@ -1419,7 +1465,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
        /* Verify that we're not in timewait. */
        cm_id_priv = container_of(cm_id, struct cm_id_private, id);
        spin_lock_irqsave(&cm_id_priv->lock, flags);
-       if (cm_id->state != IB_CM_IDLE) {
+       if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) {
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
                ret = -EINVAL;
                goto out;
@@ -1437,12 +1483,12 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
                                 param->ppath_sgid_attr, &cm_id_priv->av,
                                 cm_id_priv);
        if (ret)
-               goto error1;
+               goto out;
        if (param->alternate_path) {
                ret = cm_init_av_by_path(param->alternate_path, NULL,
                                         &cm_id_priv->alt_av, cm_id_priv);
                if (ret)
-                       goto error1;
+                       goto out;
        }
        cm_id->service_id = param->service_id;
        cm_id->service_mask = ~cpu_to_be64(0);
@@ -1460,7 +1506,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
 
        ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
        if (ret)
-               goto error1;
+               goto out;
 
        req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
        cm_format_req(req_msg, cm_id_priv, param);
@@ -1483,7 +1529,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
        return 0;
 
 error2:        cm_free_msg(cm_id_priv->msg);
-error1:        kfree(cm_id_priv->timewait_info);
 out:   return ret;
 }
 EXPORT_SYMBOL(ib_send_cm_req);
@@ -1789,6 +1834,8 @@ static void cm_format_rej(struct cm_rej_msg *rej_msg,
                          const void *private_data,
                          u8 private_data_len)
 {
+       lockdep_assert_held(&cm_id_priv->lock);
+
        cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
        IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
                be32_to_cpu(cm_id_priv->id.remote_id));
@@ -1838,8 +1885,12 @@ static void cm_dup_req_handler(struct cm_work *work,
                        counter[CM_REQ_COUNTER]);
 
        /* Quick state check to discard duplicate REQs. */
-       if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
+       spin_lock_irq(&cm_id_priv->lock);
+       if (cm_id_priv->id.state == IB_CM_REQ_RCVD) {
+               spin_unlock_irq(&cm_id_priv->lock);
                return;
+       }
+       spin_unlock_irq(&cm_id_priv->lock);
 
        ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
        if (ret)
@@ -1924,14 +1975,10 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
                cm_issue_rej(work->port, work->mad_recv_wc,
                             IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
                             NULL, 0);
-               goto out;
+               return NULL;
        }
        refcount_inc(&listen_cm_id_priv->refcount);
-       refcount_inc(&cm_id_priv->refcount);
-       cm_id_priv->id.state = IB_CM_REQ_RCVD;
-       atomic_inc(&cm_id_priv->work_count);
        spin_unlock_irq(&cm.lock);
-out:
        return listen_cm_id_priv;
 }
 
@@ -1973,7 +2020,6 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
 
 static int cm_req_handler(struct cm_work *work)
 {
-       struct ib_cm_id *cm_id;
        struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
        struct cm_req_msg *req_msg;
        const struct ib_global_route *grh;
@@ -1982,13 +2028,33 @@ static int cm_req_handler(struct cm_work *work)
 
        req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
 
-       cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
-       if (IS_ERR(cm_id))
-               return PTR_ERR(cm_id);
+       cm_id_priv =
+               cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
+       if (IS_ERR(cm_id_priv))
+               return PTR_ERR(cm_id_priv);
 
-       cm_id_priv = container_of(cm_id, struct cm_id_private, id);
        cm_id_priv->id.remote_id =
                cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
+       cm_id_priv->id.service_id =
+               cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
+       cm_id_priv->id.service_mask = ~cpu_to_be64(0);
+       cm_id_priv->tid = req_msg->hdr.tid;
+       cm_id_priv->timeout_ms = cm_convert_to_ms(
+               IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
+       cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
+       cm_id_priv->remote_qpn =
+               cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+       cm_id_priv->initiator_depth =
+               IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
+       cm_id_priv->responder_resources =
+               IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
+       cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
+       cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+       cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
+       cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
+       cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
+       cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
+
        ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
                                      work->mad_recv_wc->recv_buf.grh,
                                      &cm_id_priv->av);
@@ -2000,27 +2066,26 @@ static int cm_req_handler(struct cm_work *work)
                ret = PTR_ERR(cm_id_priv->timewait_info);
                goto destroy;
        }
-       cm_id_priv->timewait_info->work.remote_id =
-               cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
+       cm_id_priv->timewait_info->work.remote_id = cm_id_priv->id.remote_id;
        cm_id_priv->timewait_info->remote_ca_guid =
                cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
-       cm_id_priv->timewait_info->remote_qpn =
-               cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+       cm_id_priv->timewait_info->remote_qpn = cm_id_priv->remote_qpn;
+
+       /*
+        * Note that the ID pointer is not in the xarray at this point,
+        * so this set is only visible to the local thread.
+        */
+       cm_id_priv->id.state = IB_CM_REQ_RCVD;
 
        listen_cm_id_priv = cm_match_req(work, cm_id_priv);
        if (!listen_cm_id_priv) {
                pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
-                        be32_to_cpu(cm_id->local_id));
+                        be32_to_cpu(cm_id_priv->id.local_id));
+               cm_id_priv->id.state = IB_CM_IDLE;
                ret = -EINVAL;
-               goto free_timeinfo;
+               goto destroy;
        }
 
-       cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
-       cm_id_priv->id.context = listen_cm_id_priv->id.context;
-       cm_id_priv->id.service_id =
-               cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
-       cm_id_priv->id.service_mask = ~cpu_to_be64(0);
-
        cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
 
        memset(&work->path[0], 0, sizeof(work->path[0]));
@@ -2058,10 +2123,10 @@ static int cm_req_handler(struct cm_work *work)
                                     work->port->port_num, 0,
                                     &work->path[0].sgid);
                if (err)
-                       ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+                       ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
                                       NULL, 0, NULL, 0);
                else
-                       ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+                       ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
                                       &work->path[0].sgid,
                                       sizeof(work->path[0].sgid),
                                       NULL, 0);
@@ -2071,41 +2136,40 @@ static int cm_req_handler(struct cm_work *work)
                ret = cm_init_av_by_path(&work->path[1], NULL,
                                         &cm_id_priv->alt_av, cm_id_priv);
                if (ret) {
-                       ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+                       ib_send_cm_rej(&cm_id_priv->id,
+                                      IB_CM_REJ_INVALID_ALT_GID,
                                       &work->path[0].sgid,
                                       sizeof(work->path[0].sgid), NULL, 0);
                        goto rejected;
                }
        }
-       cm_id_priv->tid = req_msg->hdr.tid;
-       cm_id_priv->timeout_ms = cm_convert_to_ms(
-               IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
-       cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
-       cm_id_priv->remote_qpn =
-               cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
-       cm_id_priv->initiator_depth =
-               IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
-       cm_id_priv->responder_resources =
-               IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
-       cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
-       cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
-       cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
-       cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
-       cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
-       cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
 
+       cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
+       cm_id_priv->id.context = listen_cm_id_priv->id.context;
        cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
+
+       /* Now MAD handlers can see the new ID */
+       spin_lock_irq(&cm_id_priv->lock);
+       cm_finalize_id(cm_id_priv);
+
+       /* Refcount belongs to the event, pairs with cm_process_work() */
+       refcount_inc(&cm_id_priv->refcount);
+       atomic_inc(&cm_id_priv->work_count);
+       spin_unlock_irq(&cm_id_priv->lock);
        cm_process_work(cm_id_priv, work);
+       /*
+        * Since this ID was just created and was not made visible to other MAD
+        * handlers until the cm_finalize_id() above we know that the
+        * cm_process_work() will deliver the event and the listen_cm_id
+        * embedded in the event can be derefed here.
+        */
        cm_deref_id(listen_cm_id_priv);
        return 0;
 
 rejected:
-       refcount_dec(&cm_id_priv->refcount);
        cm_deref_id(listen_cm_id_priv);
-free_timeinfo:
-       kfree(cm_id_priv->timewait_info);
 destroy:
-       ib_destroy_cm_id(cm_id);
+       ib_destroy_cm_id(&cm_id_priv->id);
        return ret;
 }
 
@@ -2189,6 +2253,9 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
        cm_id_priv->initiator_depth = param->initiator_depth;
        cm_id_priv->responder_resources = param->responder_resources;
        cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
+       WARN_ONCE(param->qp_num & 0xFF000000,
+                 "IBTA declares QPN to be 24 bits, but it is 0x%X\n",
+                 param->qp_num);
        cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
 
 out:   spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2359,13 +2426,13 @@ static int cm_rep_handler(struct cm_work *work)
        case IB_CM_MRA_REQ_RCVD:
                break;
        default:
-               spin_unlock_irq(&cm_id_priv->lock);
                ret = -EINVAL;
                pr_debug(
                        "%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
                        __func__, cm_id_priv->id.state,
                        IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
                        IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
+               spin_unlock_irq(&cm_id_priv->lock);
                goto error;
        }
 
@@ -2434,8 +2501,6 @@ static int cm_rep_handler(struct cm_work *work)
                        cm_ack_timeout(cm_id_priv->target_ack_delay,
                                       cm_id_priv->alt_av.timeout - 1);
 
-       /* todo: handle peer_to_peer */
-
        ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
@@ -2546,35 +2611,32 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
                            private_data_len);
 }
 
-int ib_send_cm_dreq(struct ib_cm_id *cm_id,
-                   const void *private_data,
-                   u8 private_data_len)
+static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
+                              const void *private_data, u8 private_data_len)
 {
-       struct cm_id_private *cm_id_priv;
        struct ib_mad_send_buf *msg;
-       unsigned long flags;
        int ret;
 
+       lockdep_assert_held(&cm_id_priv->lock);
+
        if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
                return -EINVAL;
 
-       cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
-       if (cm_id->state != IB_CM_ESTABLISHED) {
+       if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
                pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
-                        be32_to_cpu(cm_id->local_id), cm_id->state);
-               ret = -EINVAL;
-               goto out;
+                        be32_to_cpu(cm_id_priv->id.local_id),
+                        cm_id_priv->id.state);
+               return -EINVAL;
        }
 
-       if (cm_id->lap_state == IB_CM_LAP_SENT ||
-           cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
+       if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
+           cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
                ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 
        ret = cm_alloc_msg(cm_id_priv, &msg);
        if (ret) {
                cm_enter_timewait(cm_id_priv);
-               goto out;
+               return ret;
        }
 
        cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
@@ -2585,14 +2647,26 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                cm_enter_timewait(cm_id_priv);
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
                cm_free_msg(msg);
                return ret;
        }
 
-       cm_id->state = IB_CM_DREQ_SENT;
+       cm_id_priv->id.state = IB_CM_DREQ_SENT;
        cm_id_priv->msg = msg;
-out:   spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       return 0;
+}
+
+int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data,
+                   u8 private_data_len)
+{
+       struct cm_id_private *cm_id_priv =
+               container_of(cm_id, struct cm_id_private, id);
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       ret = cm_send_dreq_locked(cm_id_priv, private_data, private_data_len);
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
        return ret;
 }
 EXPORT_SYMBOL(ib_send_cm_dreq);
@@ -2613,51 +2687,60 @@ static void cm_format_drep(struct cm_drep_msg *drep_msg,
                            private_data_len);
 }
 
-int ib_send_cm_drep(struct ib_cm_id *cm_id,
-                   const void *private_data,
-                   u8 private_data_len)
+static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
+                              void *private_data, u8 private_data_len)
 {
-       struct cm_id_private *cm_id_priv;
        struct ib_mad_send_buf *msg;
-       unsigned long flags;
-       void *data;
        int ret;
 
+       lockdep_assert_held(&cm_id_priv->lock);
+
        if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
                return -EINVAL;
 
-       data = cm_copy_private_data(private_data, private_data_len);
-       if (IS_ERR(data))
-               return PTR_ERR(data);
-
-       cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
-       if (cm_id->state != IB_CM_DREQ_RCVD) {
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-               kfree(data);
-               pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
-                        __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
+       if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
+               pr_debug(
+                       "%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
+                       __func__, be32_to_cpu(cm_id_priv->id.local_id),
+                       cm_id_priv->id.state);
+               kfree(private_data);
                return -EINVAL;
        }
 
-       cm_set_private_data(cm_id_priv, data, private_data_len);
+       cm_set_private_data(cm_id_priv, private_data, private_data_len);
        cm_enter_timewait(cm_id_priv);
 
        ret = cm_alloc_msg(cm_id_priv, &msg);
        if (ret)
-               goto out;
+               return ret;
 
        cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
                       private_data, private_data_len);
 
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
                cm_free_msg(msg);
                return ret;
        }
+       return 0;
+}
 
-out:   spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data,
+                   u8 private_data_len)
+{
+       struct cm_id_private *cm_id_priv =
+               container_of(cm_id, struct cm_id_private, id);
+       unsigned long flags;
+       void *data;
+       int ret;
+
+       data = cm_copy_private_data(private_data, private_data_len);
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       ret = cm_send_drep_locked(cm_id_priv, data, private_data_len);
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
        return ret;
 }
 EXPORT_SYMBOL(ib_send_cm_drep);
@@ -2816,65 +2899,72 @@ out:
        return -EINVAL;
 }
 
-int ib_send_cm_rej(struct ib_cm_id *cm_id,
-                  enum ib_cm_rej_reason reason,
-                  void *ari,
-                  u8 ari_length,
-                  const void *private_data,
-                  u8 private_data_len)
+static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
+                             enum ib_cm_rej_reason reason, void *ari,
+                             u8 ari_length, const void *private_data,
+                             u8 private_data_len)
 {
-       struct cm_id_private *cm_id_priv;
        struct ib_mad_send_buf *msg;
-       unsigned long flags;
        int ret;
 
+       lockdep_assert_held(&cm_id_priv->lock);
+
        if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
            (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
                return -EINVAL;
 
-       cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
-       switch (cm_id->state) {
+       switch (cm_id_priv->id.state) {
        case IB_CM_REQ_SENT:
        case IB_CM_MRA_REQ_RCVD:
        case IB_CM_REQ_RCVD:
        case IB_CM_MRA_REQ_SENT:
        case IB_CM_REP_RCVD:
        case IB_CM_MRA_REP_SENT:
-               ret = cm_alloc_msg(cm_id_priv, &msg);
-               if (!ret)
-                       cm_format_rej((struct cm_rej_msg *) msg->mad,
-                                     cm_id_priv, reason, ari, ari_length,
-                                     private_data, private_data_len);
-
                cm_reset_to_idle(cm_id_priv);
+               ret = cm_alloc_msg(cm_id_priv, &msg);
+               if (ret)
+                       return ret;
+               cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
+                             ari, ari_length, private_data, private_data_len);
                break;
        case IB_CM_REP_SENT:
        case IB_CM_MRA_REP_RCVD:
-               ret = cm_alloc_msg(cm_id_priv, &msg);
-               if (!ret)
-                       cm_format_rej((struct cm_rej_msg *) msg->mad,
-                                     cm_id_priv, reason, ari, ari_length,
-                                     private_data, private_data_len);
-
                cm_enter_timewait(cm_id_priv);
+               ret = cm_alloc_msg(cm_id_priv, &msg);
+               if (ret)
+                       return ret;
+               cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
+                             ari, ari_length, private_data, private_data_len);
                break;
        default:
                pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
-                        be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
-               ret = -EINVAL;
-               goto out;
+                        be32_to_cpu(cm_id_priv->id.local_id),
+                        cm_id_priv->id.state);
+               return -EINVAL;
        }
 
-       if (ret)
-               goto out;
-
        ret = ib_post_send_mad(msg, NULL);
-       if (ret)
+       if (ret) {
                cm_free_msg(msg);
+               return ret;
+       }
 
-out:   spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       return 0;
+}
+
+int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason,
+                  void *ari, u8 ari_length, const void *private_data,
+                  u8 private_data_len)
+{
+       struct cm_id_private *cm_id_priv =
+               container_of(cm_id, struct cm_id_private, id);
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       ret = cm_send_rej_locked(cm_id_priv, reason, ari, ari_length,
+                                private_data, private_data_len);
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
        return ret;
 }
 EXPORT_SYMBOL(ib_send_cm_rej);
@@ -2972,10 +3062,10 @@ static int cm_rej_handler(struct cm_work *work)
                }
                /* fall through */
        default:
-               spin_unlock_irq(&cm_id_priv->lock);
                pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
                         __func__, be32_to_cpu(cm_id_priv->id.local_id),
                         cm_id_priv->id.state);
+               spin_unlock_irq(&cm_id_priv->lock);
                ret = -EINVAL;
                goto out;
        }
@@ -3502,20 +3592,27 @@ static void cm_format_sidr_req_event(struct cm_work *work,
 
 static int cm_sidr_req_handler(struct cm_work *work)
 {
-       struct ib_cm_id *cm_id;
-       struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
+       struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
        struct cm_sidr_req_msg *sidr_req_msg;
        struct ib_wc *wc;
        int ret;
 
-       cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
-       if (IS_ERR(cm_id))
-               return PTR_ERR(cm_id);
-       cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+       cm_id_priv =
+               cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
+       if (IS_ERR(cm_id_priv))
+               return PTR_ERR(cm_id_priv);
 
        /* Record SGID/SLID and request ID for lookup. */
        sidr_req_msg = (struct cm_sidr_req_msg *)
                                work->mad_recv_wc->recv_buf.mad;
+
+       cm_id_priv->id.remote_id =
+               cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
+       cm_id_priv->id.service_id =
+               cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
+       cm_id_priv->id.service_mask = ~cpu_to_be64(0);
+       cm_id_priv->tid = sidr_req_msg->hdr.tid;
+
        wc = work->mad_recv_wc->wc;
        cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
        cm_id_priv->av.dgid.global.interface_id = 0;
@@ -3525,41 +3622,46 @@ static int cm_sidr_req_handler(struct cm_work *work)
        if (ret)
                goto out;
 
-       cm_id_priv->id.remote_id =
-               cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
-       cm_id_priv->tid = sidr_req_msg->hdr.tid;
-       atomic_inc(&cm_id_priv->work_count);
-
        spin_lock_irq(&cm.lock);
-       cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
-       if (cur_cm_id_priv) {
+       listen_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
+       if (listen_cm_id_priv) {
                spin_unlock_irq(&cm.lock);
                atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
                                counter[CM_SIDR_REQ_COUNTER]);
                goto out; /* Duplicate message. */
        }
        cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
-       cur_cm_id_priv = cm_find_listen(
-               cm_id->device,
-               cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)));
-       if (!cur_cm_id_priv) {
+       listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
+                                          cm_id_priv->id.service_id);
+       if (!listen_cm_id_priv) {
                spin_unlock_irq(&cm.lock);
-               cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
+               ib_send_cm_sidr_rep(&cm_id_priv->id,
+                                   &(struct ib_cm_sidr_rep_param){
+                                           .status = IB_SIDR_UNSUPPORTED });
                goto out; /* No match. */
        }
-       refcount_inc(&cur_cm_id_priv->refcount);
-       refcount_inc(&cm_id_priv->refcount);
+       refcount_inc(&listen_cm_id_priv->refcount);
        spin_unlock_irq(&cm.lock);
 
-       cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
-       cm_id_priv->id.context = cur_cm_id_priv->id.context;
-       cm_id_priv->id.service_id =
-               cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
-       cm_id_priv->id.service_mask = ~cpu_to_be64(0);
+       cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
+       cm_id_priv->id.context = listen_cm_id_priv->id.context;
 
-       cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
-       cm_process_work(cm_id_priv, work);
-       cm_deref_id(cur_cm_id_priv);
+       /*
+        * A SIDR ID does not need to be in the xarray since it does not receive
+        * mads, is not placed in the remote_id or remote_qpn rbtree, and does
+        * not enter timewait.
+        */
+
+       cm_format_sidr_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
+       ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
+       cm_free_work(work);
+       /*
+        * A pointer to the listen_cm_id is held in the event, so this deref
+        * must be after the event is delivered above.
+        */
+       cm_deref_id(listen_cm_id_priv);
+       if (ret)
+               cm_destroy_id(&cm_id_priv->id, ret);
        return 0;
 out:
        ib_destroy_cm_id(&cm_id_priv->id);
@@ -3589,50 +3691,52 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
                            param->private_data, param->private_data_len);
 }
 
-int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
-                       struct ib_cm_sidr_rep_param *param)
+static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
+                                  struct ib_cm_sidr_rep_param *param)
 {
-       struct cm_id_private *cm_id_priv;
        struct ib_mad_send_buf *msg;
-       unsigned long flags;
        int ret;
 
+       lockdep_assert_held(&cm_id_priv->lock);
+
        if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
            (param->private_data &&
             param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
                return -EINVAL;
 
-       cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
-       if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
-               ret = -EINVAL;
-               goto error;
-       }
+       if (cm_id_priv->id.state != IB_CM_SIDR_REQ_RCVD)
+               return -EINVAL;
 
        ret = cm_alloc_msg(cm_id_priv, &msg);
        if (ret)
-               goto error;
+               return ret;
 
        cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
                           param);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
                cm_free_msg(msg);
                return ret;
        }
-       cm_id->state = IB_CM_IDLE;
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-
-       spin_lock_irqsave(&cm.lock, flags);
+       cm_id_priv->id.state = IB_CM_IDLE;
        if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
                rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
                RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
        }
-       spin_unlock_irqrestore(&cm.lock, flags);
        return 0;
+}
 
-error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
+                       struct ib_cm_sidr_rep_param *param)
+{
+       struct cm_id_private *cm_id_priv =
+               container_of(cm_id, struct cm_id_private, id);
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       ret = cm_send_sidr_rep_locked(cm_id_priv, param);
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
        return ret;
 }
 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
index 2dec3a0..26e6f7d 100644 (file)
@@ -199,7 +199,7 @@ struct cma_device {
        struct list_head        list;
        struct ib_device        *device;
        struct completion       comp;
-       atomic_t                refcount;
+       refcount_t refcount;
        struct list_head        id_list;
        enum ib_gid_type        *default_gid_type;
        u8                      *default_roce_tos;
@@ -247,9 +247,15 @@ enum {
        CMA_OPTION_AFONLY,
 };
 
-void cma_ref_dev(struct cma_device *cma_dev)
+void cma_dev_get(struct cma_device *cma_dev)
 {
-       atomic_inc(&cma_dev->refcount);
+       refcount_inc(&cma_dev->refcount);
+}
+
+void cma_dev_put(struct cma_device *cma_dev)
+{
+       if (refcount_dec_and_test(&cma_dev->refcount))
+               complete(&cma_dev->comp);
 }
 
 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
@@ -267,7 +273,7 @@ struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter      filter,
                }
 
        if (found_cma_dev)
-               cma_ref_dev(found_cma_dev);
+               cma_dev_get(found_cma_dev);
        mutex_unlock(&lock);
        return found_cma_dev;
 }
@@ -463,7 +469,7 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
 static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
                               struct cma_device *cma_dev)
 {
-       cma_ref_dev(cma_dev);
+       cma_dev_get(cma_dev);
        id_priv->cma_dev = cma_dev;
        id_priv->id.device = cma_dev->device;
        id_priv->id.route.addr.dev_addr.transport =
@@ -484,12 +490,6 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
                                          rdma_start_port(cma_dev->device)];
 }
 
-void cma_deref_dev(struct cma_device *cma_dev)
-{
-       if (atomic_dec_and_test(&cma_dev->refcount))
-               complete(&cma_dev->comp);
-}
-
 static inline void release_mc(struct kref *kref)
 {
        struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
@@ -502,7 +502,7 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
 {
        mutex_lock(&lock);
        list_del(&id_priv->list);
-       cma_deref_dev(id_priv->cma_dev);
+       cma_dev_put(id_priv->cma_dev);
        id_priv->cma_dev = NULL;
        mutex_unlock(&lock);
 }
@@ -728,8 +728,8 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
        struct cma_device *cma_dev;
        enum ib_gid_type gid_type;
        int ret = -ENODEV;
+       unsigned int port;
        union ib_gid gid;
-       u8 port;
 
        if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
            id_priv->id.ps == RDMA_PS_IPOIB)
@@ -753,7 +753,7 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
        }
 
        list_for_each_entry(cma_dev, &dev_list, list) {
-               for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+               rdma_for_each_port (cma_dev->device, port) {
                        if (listen_id_priv->cma_dev == cma_dev &&
                            listen_id_priv->id.port_num == port)
                                continue;
@@ -786,8 +786,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
        struct cma_device *cma_dev, *cur_dev;
        struct sockaddr_ib *addr;
        union ib_gid gid, sgid, *dgid;
+       unsigned int p;
        u16 pkey, index;
-       u8 p;
        enum ib_port_state port_state;
        int i;
 
@@ -798,7 +798,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
 
        mutex_lock(&lock);
        list_for_each_entry(cur_dev, &dev_list, list) {
-               for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+               rdma_for_each_port (cur_dev->device, p) {
                        if (!rdma_cap_af_ib(cur_dev->device, p))
                                continue;
 
@@ -840,9 +840,14 @@ found:
        return 0;
 }
 
-static void cma_deref_id(struct rdma_id_private *id_priv)
+static void cma_id_get(struct rdma_id_private *id_priv)
+{
+       refcount_inc(&id_priv->refcount);
+}
+
+static void cma_id_put(struct rdma_id_private *id_priv)
 {
-       if (atomic_dec_and_test(&id_priv->refcount))
+       if (refcount_dec_and_test(&id_priv->refcount))
                complete(&id_priv->comp);
 }
 
@@ -870,7 +875,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
        spin_lock_init(&id_priv->lock);
        mutex_init(&id_priv->qp_mutex);
        init_completion(&id_priv->comp);
-       atomic_set(&id_priv->refcount, 1);
+       refcount_set(&id_priv->refcount, 1);
        mutex_init(&id_priv->handler_mutex);
        INIT_LIST_HEAD(&id_priv->listen_list);
        INIT_LIST_HEAD(&id_priv->mc_list);
@@ -1846,11 +1851,11 @@ void rdma_destroy_id(struct rdma_cm_id *id)
        }
 
        cma_release_port(id_priv);
-       cma_deref_id(id_priv);
+       cma_id_put(id_priv);
        wait_for_completion(&id_priv->comp);
 
        if (id_priv->internal_id)
-               cma_deref_id(id_priv->id.context);
+               cma_id_put(id_priv->id.context);
 
        kfree(id_priv->id.route.path_rec);
 
@@ -2187,7 +2192,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
         * Protect against the user destroying conn_id from another thread
         * until we're done accessing it.
         */
-       atomic_inc(&conn_id->refcount);
+       cma_id_get(conn_id);
        ret = cma_cm_event_handler(conn_id, &event);
        if (ret)
                goto err3;
@@ -2204,13 +2209,13 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
        mutex_unlock(&lock);
        mutex_unlock(&conn_id->handler_mutex);
        mutex_unlock(&listen_id->handler_mutex);
-       cma_deref_id(conn_id);
+       cma_id_put(conn_id);
        if (net_dev)
                dev_put(net_dev);
        return 0;
 
 err3:
-       cma_deref_id(conn_id);
+       cma_id_put(conn_id);
        /* Destroy the CM ID by returning a non-zero value. */
        conn_id->cm_id.ib = NULL;
 err2:
@@ -2391,7 +2396,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
         * Protect against the user destroying conn_id from another thread
         * until we're done accessing it.
         */
-       atomic_inc(&conn_id->refcount);
+       cma_id_get(conn_id);
        ret = cma_cm_event_handler(conn_id, &event);
        if (ret) {
                /* User wants to destroy the CM ID */
@@ -2399,13 +2404,13 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
                cma_exch(conn_id, RDMA_CM_DESTROYING);
                mutex_unlock(&conn_id->handler_mutex);
                mutex_unlock(&listen_id->handler_mutex);
-               cma_deref_id(conn_id);
+               cma_id_put(conn_id);
                rdma_destroy_id(&conn_id->id);
                return ret;
        }
 
        mutex_unlock(&conn_id->handler_mutex);
-       cma_deref_id(conn_id);
+       cma_id_put(conn_id);
 
 out:
        mutex_unlock(&listen_id->handler_mutex);
@@ -2492,7 +2497,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 
        _cma_attach_to_dev(dev_id_priv, cma_dev);
        list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
-       atomic_inc(&id_priv->refcount);
+       cma_id_get(id_priv);
        dev_id_priv->internal_id = 1;
        dev_id_priv->afonly = id_priv->afonly;
        dev_id_priv->tos_set = id_priv->tos_set;
@@ -2647,7 +2652,7 @@ static void cma_work_handler(struct work_struct *_work)
        }
 out:
        mutex_unlock(&id_priv->handler_mutex);
-       cma_deref_id(id_priv);
+       cma_id_put(id_priv);
        if (destroy)
                rdma_destroy_id(&id_priv->id);
        kfree(work);
@@ -2671,7 +2676,7 @@ static void cma_ndev_work_handler(struct work_struct *_work)
 
 out:
        mutex_unlock(&id_priv->handler_mutex);
-       cma_deref_id(id_priv);
+       cma_id_put(id_priv);
        if (destroy)
                rdma_destroy_id(&id_priv->id);
        kfree(work);
@@ -2687,14 +2692,19 @@ static void cma_init_resolve_route_work(struct cma_work *work,
        work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
 }
 
-static void cma_init_resolve_addr_work(struct cma_work *work,
-                                      struct rdma_id_private *id_priv)
+static void enqueue_resolve_addr_work(struct cma_work *work,
+                                     struct rdma_id_private *id_priv)
 {
+       /* Balances with cma_id_put() in cma_work_handler */
+       cma_id_get(id_priv);
+
        work->id = id_priv;
        INIT_WORK(&work->work, cma_work_handler);
        work->old_state = RDMA_CM_ADDR_QUERY;
        work->new_state = RDMA_CM_ADDR_RESOLVED;
        work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+
+       queue_work(cma_wq, &work->work);
 }
 
 static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
@@ -2968,6 +2978,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 err2:
        kfree(route->path_rec);
        route->path_rec = NULL;
+       route->num_paths = 0;
 err1:
        kfree(work);
        return ret;
@@ -2982,7 +2993,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
                return -EINVAL;
 
-       atomic_inc(&id_priv->refcount);
+       cma_id_get(id_priv);
        if (rdma_cap_ib_sa(id->device, id->port_num))
                ret = cma_resolve_ib_route(id_priv, timeout_ms);
        else if (rdma_protocol_roce(id->device, id->port_num))
@@ -2998,7 +3009,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
        return 0;
 err:
        cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
-       cma_deref_id(id_priv);
+       cma_id_put(id_priv);
        return ret;
 }
 EXPORT_SYMBOL(rdma_resolve_route);
@@ -3025,9 +3036,9 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
        struct cma_device *cma_dev, *cur_dev;
        union ib_gid gid;
        enum ib_port_state port_state;
+       unsigned int p;
        u16 pkey;
        int ret;
-       u8 p;
 
        cma_dev = NULL;
        mutex_lock(&lock);
@@ -3039,7 +3050,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
                if (!cma_dev)
                        cma_dev = cur_dev;
 
-               for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+               rdma_for_each_port (cur_dev->device, p) {
                        if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
                            port_state == IB_PORT_ACTIVE) {
                                cma_dev = cur_dev;
@@ -3148,9 +3159,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
        rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
        rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
 
-       atomic_inc(&id_priv->refcount);
-       cma_init_resolve_addr_work(work, id_priv);
-       queue_work(cma_wq, &work->work);
+       enqueue_resolve_addr_work(work, id_priv);
        return 0;
 err:
        kfree(work);
@@ -3175,9 +3184,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
        rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
                &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
 
-       atomic_inc(&id_priv->refcount);
-       cma_init_resolve_addr_work(work, id_priv);
-       queue_work(cma_wq, &work->work);
+       enqueue_resolve_addr_work(work, id_priv);
        return 0;
 err:
        kfree(work);
@@ -4588,7 +4595,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
                INIT_WORK(&work->work, cma_ndev_work_handler);
                work->id = id_priv;
                work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
-               atomic_inc(&id_priv->refcount);
+               cma_id_get(id_priv);
                queue_work(cma_wq, &work->work);
        }
 
@@ -4663,7 +4670,7 @@ static void cma_add_one(struct ib_device *device)
        }
 
        init_completion(&cma_dev->comp);
-       atomic_set(&cma_dev->refcount, 1);
+       refcount_set(&cma_dev->refcount, 1);
        INIT_LIST_HEAD(&cma_dev->id_list);
        ib_set_client_data(device, &cma_client, cma_dev);
 
@@ -4722,11 +4729,11 @@ static void cma_process_remove(struct cma_device *cma_dev)
 
                list_del(&id_priv->listen_list);
                list_del_init(&id_priv->list);
-               atomic_inc(&id_priv->refcount);
+               cma_id_get(id_priv);
                mutex_unlock(&lock);
 
                ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
-               cma_deref_id(id_priv);
+               cma_id_put(id_priv);
                if (ret)
                        rdma_destroy_id(&id_priv->id);
 
@@ -4734,7 +4741,7 @@ static void cma_process_remove(struct cma_device *cma_dev)
        }
        mutex_unlock(&lock);
 
-       cma_deref_dev(cma_dev);
+       cma_dev_put(cma_dev);
        wait_for_completion(&cma_dev->comp);
 }
 
@@ -4790,6 +4797,19 @@ static int __init cma_init(void)
 {
        int ret;
 
+       /*
+        * There is a rare lock ordering dependency in cma_netdev_callback()
+        * that only happens when bonding is enabled. Teach lockdep that rtnl
+        * must never be nested under lock so it can find these without having
+        * to test with bonding.
+        */
+       if (IS_ENABLED(CONFIG_LOCKDEP)) {
+               rtnl_lock();
+               mutex_lock(&lock);
+               mutex_unlock(&lock);
+               rtnl_unlock();
+       }
+
        cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
        if (!cma_wq)
                return -ENOMEM;
index 8b0b5ae..c672a49 100644 (file)
@@ -94,7 +94,7 @@ static int cma_configfs_params_get(struct config_item *item,
 
 static void cma_configfs_params_put(struct cma_device *cma_dev)
 {
-       cma_deref_dev(cma_dev);
+       cma_dev_put(cma_dev);
 }
 
 static ssize_t default_roce_mode_show(struct config_item *item,
@@ -312,12 +312,12 @@ static struct config_group *make_cma_dev(struct config_group *group,
        configfs_add_default_group(&cma_dev_group->ports_group,
                        &cma_dev_group->device_group);
 
-       cma_deref_dev(cma_dev);
+       cma_dev_put(cma_dev);
        return &cma_dev_group->device_group;
 
 fail:
        if (cma_dev)
-               cma_deref_dev(cma_dev);
+               cma_dev_put(cma_dev);
        kfree(cma_dev_group);
        return ERR_PTR(err);
 }
index ca73072..5edcf44 100644 (file)
@@ -66,7 +66,7 @@ struct rdma_id_private {
        struct mutex            qp_mutex;
 
        struct completion       comp;
-       atomic_t                refcount;
+       refcount_t refcount;
        struct mutex            handler_mutex;
 
        int                     backlog;
@@ -111,8 +111,8 @@ static inline void cma_configfs_exit(void)
 }
 #endif
 
-void cma_ref_dev(struct cma_device *dev);
-void cma_deref_dev(struct cma_device *dev);
+void cma_dev_get(struct cma_device *dev);
+void cma_dev_put(struct cma_device *dev);
 typedef bool (*cma_device_filter)(struct ib_device *, void *);
 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
                                             void *cookie);
index 956b3a7..403d867 100644 (file)
@@ -79,13 +79,13 @@ struct ib_mad_private {
        struct ib_mad_private_header header;
        size_t mad_size;
        struct ib_grh grh;
-       u8 mad[0];
+       u8 mad[];
 } __packed;
 
 struct ib_rmpp_segment {
        struct list_head list;
        u32 num;
-       u8 data[0];
+       u8 data[];
 };
 
 struct ib_mad_agent_private {
index cd338dd..9c2d8b7 100644 (file)
@@ -71,7 +71,7 @@ struct mcast_device {
        struct ib_event_handler event_handler;
        int                     start_port;
        int                     end_port;
-       struct mcast_port       port[0];
+       struct mcast_port       port[];
 };
 
 enum mcast_state {
index 06e5b67..557efbf 100644 (file)
@@ -391,13 +391,13 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
                return -EINVAL;
        }
 
-       ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
+       ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir);
        if (!ret)
                return -ENOMEM;
        sg_cnt = ret;
 
        if (prot_sg_cnt) {
-               ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir);
+               ret = rdma_rw_map_sg(dev, prot_sg, prot_sg_cnt, dir);
                if (!ret) {
                        ret = -ENOMEM;
                        goto out_unmap_sg;
@@ -466,9 +466,9 @@ out_free_ctx:
        kfree(ctx->reg);
 out_unmap_prot_sg:
        if (prot_sg_cnt)
-               ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
+               rdma_rw_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
 out_unmap_sg:
-       ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
+       rdma_rw_unmap_sg(dev, sg, sg_cnt, dir);
        return ret;
 }
 EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
@@ -628,9 +628,9 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
        ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
        kfree(ctx->reg);
 
-       ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
        if (prot_sg_cnt)
-               ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
+               rdma_rw_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
+       rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
 }
 EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
 
index 30d4c12..74e0058 100644 (file)
@@ -101,7 +101,7 @@ struct ib_sa_port {
 struct ib_sa_device {
        int                     start_port, end_port;
        struct ib_event_handler event_handler;
-       struct ib_sa_port port[0];
+       struct ib_sa_port port[];
 };
 
 struct ib_sa_query {
index 0274e9b..16b6cf5 100644 (file)
@@ -85,12 +85,13 @@ struct ucma_file {
 struct ucma_context {
        u32                     id;
        struct completion       comp;
-       atomic_t                ref;
+       refcount_t              ref;
        int                     events_reported;
        int                     backlog;
 
        struct ucma_file        *file;
        struct rdma_cm_id       *cm_id;
+       struct mutex            mutex;
        u64                     uid;
 
        struct list_head        list;
@@ -152,7 +153,7 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
                if (ctx->closing)
                        ctx = ERR_PTR(-EIO);
                else
-                       atomic_inc(&ctx->ref);
+                       refcount_inc(&ctx->ref);
        }
        xa_unlock(&ctx_table);
        return ctx;
@@ -160,7 +161,7 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
 
 static void ucma_put_ctx(struct ucma_context *ctx)
 {
-       if (atomic_dec_and_test(&ctx->ref))
+       if (refcount_dec_and_test(&ctx->ref))
                complete(&ctx->comp);
 }
 
@@ -212,10 +213,11 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
                return NULL;
 
        INIT_WORK(&ctx->close_work, ucma_close_id);
-       atomic_set(&ctx->ref, 1);
+       refcount_set(&ctx->ref, 1);
        init_completion(&ctx->comp);
        INIT_LIST_HEAD(&ctx->mc_list);
        ctx->file = file;
+       mutex_init(&ctx->mutex);
 
        if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL))
                goto error;
@@ -589,6 +591,7 @@ static int ucma_free_ctx(struct ucma_context *ctx)
        }
 
        events_reported = ctx->events_reported;
+       mutex_destroy(&ctx->mutex);
        kfree(ctx);
        return events_reported;
 }
@@ -658,7 +661,10 @@ static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
+       mutex_lock(&ctx->mutex);
        ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
+       mutex_unlock(&ctx->mutex);
+
        ucma_put_ctx(ctx);
        return ret;
 }
@@ -681,7 +687,9 @@ static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
+       mutex_lock(&ctx->mutex);
        ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
+       mutex_unlock(&ctx->mutex);
        ucma_put_ctx(ctx);
        return ret;
 }
@@ -705,8 +713,10 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
+       mutex_lock(&ctx->mutex);
        ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
                                (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
+       mutex_unlock(&ctx->mutex);
        ucma_put_ctx(ctx);
        return ret;
 }
@@ -731,8 +741,10 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
+       mutex_lock(&ctx->mutex);
        ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
                                (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
+       mutex_unlock(&ctx->mutex);
        ucma_put_ctx(ctx);
        return ret;
 }
@@ -752,7 +764,9 @@ static ssize_t ucma_resolve_route(struct ucma_file *file,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
+       mutex_lock(&ctx->mutex);
        ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
+       mutex_unlock(&ctx->mutex);
        ucma_put_ctx(ctx);
        return ret;
 }
@@ -841,6 +855,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
+       mutex_lock(&ctx->mutex);
        memset(&resp, 0, sizeof resp);
        addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
        memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
@@ -864,6 +879,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
                ucma_copy_iw_route(&resp, &ctx->cm_id->route);
 
 out:
+       mutex_unlock(&ctx->mutex);
        if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp)))
                ret = -EFAULT;
@@ -1014,6 +1030,7 @@ static ssize_t ucma_query(struct ucma_file *file,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
+       mutex_lock(&ctx->mutex);
        switch (cmd.option) {
        case RDMA_USER_CM_QUERY_ADDR:
                ret = ucma_query_addr(ctx, response, out_len);
@@ -1028,6 +1045,7 @@ static ssize_t ucma_query(struct ucma_file *file,
                ret = -ENOSYS;
                break;
        }
+       mutex_unlock(&ctx->mutex);
 
        ucma_put_ctx(ctx);
        return ret;
@@ -1045,7 +1063,7 @@ static void ucma_copy_conn_param(struct rdma_cm_id *id,
        dst->retry_count = src->retry_count;
        dst->rnr_retry_count = src->rnr_retry_count;
        dst->srq = src->srq;
-       dst->qp_num = src->qp_num;
+       dst->qp_num = src->qp_num & 0xFFFFFF;
        dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
 }
 
@@ -1068,7 +1086,9 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
                return PTR_ERR(ctx);
 
        ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
+       mutex_lock(&ctx->mutex);
        ret = rdma_connect(ctx->cm_id, &conn_param);
+       mutex_unlock(&ctx->mutex);
        ucma_put_ctx(ctx);
        return ret;
 }
@@ -1089,7 +1109,9 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
 
        ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
                       cmd.backlog : max_backlog;
+       mutex_lock(&ctx->mutex);
        ret = rdma_listen(ctx->cm_id, ctx->backlog);
+       mutex_unlock(&ctx->mutex);
        ucma_put_ctx(ctx);
        return ret;
 }
@@ -1112,13 +1134,17 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
        if (cmd.conn_param.valid) {
                ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
                mutex_lock(&file->mut);
+               mutex_lock(&ctx->mutex);
                ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
+               mutex_unlock(&ctx->mutex);
                if (!ret)
                        ctx->uid = cmd.uid;
                mutex_unlock(&file->mut);
-       } else
+       } else {
+               mutex_lock(&ctx->mutex);
                ret = __rdma_accept(ctx->cm_id, NULL, NULL);
-
+               mutex_unlock(&ctx->mutex);
+       }
        ucma_put_ctx(ctx);
        return ret;
 }
@@ -1137,7 +1163,9 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
+       mutex_lock(&ctx->mutex);
        ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
+       mutex_unlock(&ctx->mutex);
        ucma_put_ctx(ctx);
        return ret;
 }
@@ -1156,7 +1184,9 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
+       mutex_lock(&ctx->mutex);
        ret = rdma_disconnect(ctx->cm_id);
+       mutex_unlock(&ctx->mutex);
        ucma_put_ctx(ctx);
        return ret;
 }
@@ -1187,7 +1217,9 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
        resp.qp_attr_mask = 0;
        memset(&qp_attr, 0, sizeof qp_attr);
        qp_attr.qp_state = cmd.qp_state;
+       mutex_lock(&ctx->mutex);
        ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
+       mutex_unlock(&ctx->mutex);
        if (ret)
                goto out;
 
@@ -1273,9 +1305,13 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
                struct sa_path_rec opa;
 
                sa_convert_path_ib_to_opa(&opa, &sa_path);
+               mutex_lock(&ctx->mutex);
                ret = rdma_set_ib_path(ctx->cm_id, &opa);
+               mutex_unlock(&ctx->mutex);
        } else {
+               mutex_lock(&ctx->mutex);
                ret = rdma_set_ib_path(ctx->cm_id, &sa_path);
+               mutex_unlock(&ctx->mutex);
        }
        if (ret)
                return ret;
@@ -1308,7 +1344,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
 
        switch (level) {
        case RDMA_OPTION_ID:
+               mutex_lock(&ctx->mutex);
                ret = ucma_set_option_id(ctx, optname, optval, optlen);
+               mutex_unlock(&ctx->mutex);
                break;
        case RDMA_OPTION_IB:
                ret = ucma_set_option_ib(ctx, optname, optval, optlen);
@@ -1368,8 +1406,10 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
+       mutex_lock(&ctx->mutex);
        if (ctx->cm_id->device)
                ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event);
+       mutex_unlock(&ctx->mutex);
 
        ucma_put_ctx(ctx);
        return ret;
@@ -1412,8 +1452,10 @@ static ssize_t ucma_process_join(struct ucma_file *file,
        mc->join_state = join_state;
        mc->uid = cmd->uid;
        memcpy(&mc->addr, addr, cmd->addr_size);
+       mutex_lock(&ctx->mutex);
        ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
                                  join_state, mc);
+       mutex_unlock(&ctx->mutex);
        if (ret)
                goto err2;
 
@@ -1502,7 +1544,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
                mc = ERR_PTR(-ENOENT);
        else if (mc->ctx->file != file)
                mc = ERR_PTR(-EINVAL);
-       else if (!atomic_inc_not_zero(&mc->ctx->ref))
+       else if (!refcount_inc_not_zero(&mc->ctx->ref))
                mc = ERR_PTR(-ENXIO);
        else
                __xa_erase(&multicast_table, mc->id);
@@ -1513,7 +1555,10 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
                goto out;
        }
 
+       mutex_lock(&mc->ctx->mutex);
        rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
+       mutex_unlock(&mc->ctx->mutex);
+
        mutex_lock(&mc->ctx->file->mut);
        ucma_cleanup_mc_events(mc);
        list_del(&mc->list);
index 06b6125..82455a1 100644 (file)
@@ -197,6 +197,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
        unsigned long lock_limit;
        unsigned long new_pinned;
        unsigned long cur_base;
+       unsigned long dma_attr = 0;
        struct mm_struct *mm;
        unsigned long npages;
        int ret;
@@ -278,10 +279,12 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 
        sg_mark_end(sg);
 
-       umem->nmap = ib_dma_map_sg(device,
-                                  umem->sg_head.sgl,
-                                  umem->sg_nents,
-                                  DMA_BIDIRECTIONAL);
+       if (access & IB_ACCESS_RELAXED_ORDERING)
+               dma_attr |= DMA_ATTR_WEAK_ORDERING;
+
+       umem->nmap =
+               ib_dma_map_sg_attrs(device, umem->sg_head.sgl, umem->sg_nents,
+                                   DMA_BIDIRECTIONAL, dma_attr);
 
        if (!umem->nmap) {
                ret = -ENOMEM;
index e62c9df..56a7133 100644 (file)
@@ -54,8 +54,6 @@
 #include "core_priv.h"
 #include <trace/events/rdma_core.h>
 
-#include <trace/events/rdma_core.h>
-
 static int ib_resolve_eth_dmac(struct ib_device *device,
                               struct rdma_ah_attr *ah_attr);
 
@@ -1127,8 +1125,7 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
 EXPORT_SYMBOL(ib_open_qp);
 
 static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
-                                       struct ib_qp_init_attr *qp_init_attr,
-                                       struct ib_udata *udata)
+                                       struct ib_qp_init_attr *qp_init_attr)
 {
        struct ib_qp *real_qp = qp;
 
@@ -1150,9 +1147,18 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
        return qp;
 }
 
-struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
-                               struct ib_qp_init_attr *qp_init_attr,
-                               struct ib_udata *udata)
+/**
+ * ib_create_qp - Creates a kernel QP associated with the specified protection
+ *   domain.
+ * @pd: The protection domain associated with the QP.
+ * @qp_init_attr: A list of initial attributes required to create the
+ *   QP.  If QP creation succeeds, then the attributes are updated to
+ *   the actual capabilities of the created QP.
+ *
+ * NOTE: for user qp use ib_create_qp_user with valid udata!
+ */
+struct ib_qp *ib_create_qp(struct ib_pd *pd,
+                          struct ib_qp_init_attr *qp_init_attr)
 {
        struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device;
        struct ib_qp *qp;
@@ -1187,7 +1193,7 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
 
        if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
                struct ib_qp *xrc_qp =
-                       create_xrc_qp_user(qp, qp_init_attr, udata);
+                       create_xrc_qp_user(qp, qp_init_attr);
 
                if (IS_ERR(xrc_qp)) {
                        ret = PTR_ERR(xrc_qp);
@@ -1243,7 +1249,7 @@ err:
        return ERR_PTR(ret);
 
 }
-EXPORT_SYMBOL(ib_create_qp_user);
+EXPORT_SYMBOL(ib_create_qp);
 
 static const struct {
        int                     valid;
index 725b235..a300588 100644 (file)
 
 #define BNXT_RE_DEFAULT_ACK_DELAY      16
 
+struct bnxt_re_ring_attr {
+       dma_addr_t      *dma_arr;
+       int             pages;
+       int             type;
+       u32             depth;
+       u32             lrid; /* Logical ring id */
+       u8              mode;
+};
+
 struct bnxt_re_work {
        struct work_struct      work;
        unsigned long           event;
@@ -104,6 +113,14 @@ struct bnxt_re_sqp_entries {
        struct bnxt_re_qp *qp1_qp;
 };
 
+#define BNXT_RE_MAX_GSI_SQP_ENTRIES    1024
+struct bnxt_re_gsi_context {
+       struct  bnxt_re_qp *gsi_qp;
+       struct  bnxt_re_qp *gsi_sqp;
+       struct  bnxt_re_ah *gsi_sah;
+       struct  bnxt_re_sqp_entries *sqp_tbl;
+};
+
 #define BNXT_RE_MIN_MSIX               2
 #define BNXT_RE_MAX_MSIX               9
 #define BNXT_RE_AEQ_IDX                        0
@@ -115,7 +132,6 @@ struct bnxt_re_dev {
        struct list_head                list;
        unsigned long                   flags;
 #define BNXT_RE_FLAG_NETDEV_REGISTERED         0
-#define BNXT_RE_FLAG_IBDEV_REGISTERED          1
 #define BNXT_RE_FLAG_GOT_MSIX                  2
 #define BNXT_RE_FLAG_HAVE_L2_REF               3
 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN           4
@@ -125,7 +141,7 @@ struct bnxt_re_dev {
 #define BNXT_RE_FLAG_ISSUE_ROCE_STATS          29
        struct net_device               *netdev;
        unsigned int                    version, major, minor;
-       struct bnxt_qplib_chip_ctx      chip_ctx;
+       struct bnxt_qplib_chip_ctx      *chip_ctx;
        struct bnxt_en_dev              *en_dev;
        struct bnxt_msix_entry          msix_entries[BNXT_RE_MAX_MSIX];
        int                             num_msix;
@@ -160,15 +176,11 @@ struct bnxt_re_dev {
        atomic_t                        srq_count;
        atomic_t                        mr_count;
        atomic_t                        mw_count;
-       atomic_t                        sched_count;
        /* Max of 2 lossless traffic class supported per port */
        u16                             cosq[2];
 
        /* QP for for handling QP1 packets */
-       u32                             sqp_id;
-       struct bnxt_re_qp               *qp1_sqp;
-       struct bnxt_re_ah               *sqp_ah;
-       struct bnxt_re_sqp_entries sqp_tbl[1024];
+       struct bnxt_re_gsi_context      gsi_ctx;
        atomic_t nq_alloc_cnt;
        u32 is_virtfn;
        u32 num_vfs;
index 52b6a4d..95f6d49 100644 (file)
@@ -312,9 +312,9 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
                 */
                if (ctx->idx == 0 &&
                    rdma_link_local_addr((struct in6_addr *)gid_to_del) &&
-                   ctx->refcnt == 1 && rdev->qp1_sqp) {
-                       dev_dbg(rdev_to_dev(rdev),
-                               "Trying to delete GID0 while QP1 is alive\n");
+                   ctx->refcnt == 1 && rdev->gsi_ctx.gsi_sqp) {
+                       ibdev_dbg(&rdev->ibdev,
+                                 "Trying to delete GID0 while QP1 is alive\n");
                        return -EFAULT;
                }
                ctx->refcnt--;
@@ -322,8 +322,8 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
                        rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del,
                                                 vlan_id,  true);
                        if (rc) {
-                               dev_err(rdev_to_dev(rdev),
-                                       "Failed to remove GID: %#x", rc);
+                               ibdev_err(&rdev->ibdev,
+                                         "Failed to remove GID: %#x", rc);
                        } else {
                                ctx_tbl = sgid_tbl->ctx;
                                ctx_tbl[ctx->idx] = NULL;
@@ -360,7 +360,7 @@ int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context)
        }
 
        if (rc < 0) {
-               dev_err(rdev_to_dev(rdev), "Failed to add GID: %#x", rc);
+               ibdev_err(&rdev->ibdev, "Failed to add GID: %#x", rc);
                return rc;
        }
 
@@ -423,12 +423,12 @@ static int bnxt_re_bind_fence_mw(struct bnxt_qplib_qp *qplib_qp)
        wqe.bind.r_key = fence->bind_rkey;
        fence->bind_rkey = ib_inc_rkey(fence->bind_rkey);
 
-       dev_dbg(rdev_to_dev(qp->rdev),
-               "Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n",
+       ibdev_dbg(&qp->rdev->ibdev,
+                 "Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n",
                wqe.bind.r_key, qp->qplib_qp.id, pd);
        rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
        if (rc) {
-               dev_err(rdev_to_dev(qp->rdev), "Failed to bind fence-WQE\n");
+               ibdev_err(&qp->rdev->ibdev, "Failed to bind fence-WQE\n");
                return rc;
        }
        bnxt_qplib_post_send_db(&qp->qplib_qp);
@@ -479,7 +479,7 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
                                  DMA_BIDIRECTIONAL);
        rc = dma_mapping_error(dev, dma_addr);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Failed to dma-map fence-MR-mem\n");
+               ibdev_err(&rdev->ibdev, "Failed to dma-map fence-MR-mem\n");
                rc = -EIO;
                fence->dma_addr = 0;
                goto fail;
@@ -499,7 +499,7 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
        mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
        rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Failed to alloc fence-HW-MR\n");
+               ibdev_err(&rdev->ibdev, "Failed to alloc fence-HW-MR\n");
                goto fail;
        }
 
@@ -511,7 +511,7 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
        rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl,
                               BNXT_RE_FENCE_PBL_SIZE, false, PAGE_SIZE);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n");
+               ibdev_err(&rdev->ibdev, "Failed to register fence-MR\n");
                goto fail;
        }
        mr->ib_mr.rkey = mr->qplib_mr.rkey;
@@ -519,8 +519,8 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
        /* Create a fence MW only for kernel consumers */
        mw = bnxt_re_alloc_mw(&pd->ib_pd, IB_MW_TYPE_1, NULL);
        if (IS_ERR(mw)) {
-               dev_err(rdev_to_dev(rdev),
-                       "Failed to create fence-MW for PD: %p\n", pd);
+               ibdev_err(&rdev->ibdev,
+                         "Failed to create fence-MW for PD: %p\n", pd);
                rc = PTR_ERR(mw);
                goto fail;
        }
@@ -558,7 +558,7 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 
        pd->rdev = rdev;
        if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) {
-               dev_err(rdev_to_dev(rdev), "Failed to allocate HW PD");
+               ibdev_err(&rdev->ibdev, "Failed to allocate HW PD");
                rc = -ENOMEM;
                goto fail;
        }
@@ -585,16 +585,16 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 
                rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
                if (rc) {
-                       dev_err(rdev_to_dev(rdev),
-                               "Failed to copy user response\n");
+                       ibdev_err(&rdev->ibdev,
+                                 "Failed to copy user response\n");
                        goto dbfail;
                }
        }
 
        if (!udata)
                if (bnxt_re_create_fence_mr(pd))
-                       dev_warn(rdev_to_dev(rdev),
-                                "Failed to create Fence-MR\n");
+                       ibdev_warn(&rdev->ibdev,
+                                  "Failed to create Fence-MR\n");
        return 0;
 dbfail:
        bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
@@ -639,12 +639,13 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr,
        const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
        struct bnxt_re_dev *rdev = pd->rdev;
        const struct ib_gid_attr *sgid_attr;
+       struct bnxt_re_gid_ctx *ctx;
        struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
        u8 nw_type;
        int rc;
 
        if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
-               dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set");
+               ibdev_err(&rdev->ibdev, "Failed to alloc AH: GRH not set");
                return -EINVAL;
        }
 
@@ -654,19 +655,18 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr,
        /* Supply the configuration for the HW */
        memcpy(ah->qplib_ah.dgid.data, grh->dgid.raw,
               sizeof(union ib_gid));
-       /*
-        * If RoCE V2 is enabled, stack will have two entries for
-        * each GID entry. Avoiding this duplicte entry in HW. Dividing
-        * the GID index by 2 for RoCE V2
+       sgid_attr = grh->sgid_attr;
+       /* Get the HW context of the GID. The reference
+        * of GID table entry is already taken by the caller.
         */
-       ah->qplib_ah.sgid_index = grh->sgid_index / 2;
+       ctx = rdma_read_gid_hw_context(sgid_attr);
+       ah->qplib_ah.sgid_index = ctx->idx;
        ah->qplib_ah.host_sgid_index = grh->sgid_index;
        ah->qplib_ah.traffic_class = grh->traffic_class;
        ah->qplib_ah.flow_label = grh->flow_label;
        ah->qplib_ah.hop_limit = grh->hop_limit;
        ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr);
 
-       sgid_attr = grh->sgid_attr;
        /* Get network header type for this GID */
        nw_type = rdma_gid_attr_network_type(sgid_attr);
        ah->qplib_ah.nw_type = bnxt_re_stack_to_dev_nw_type(nw_type);
@@ -675,7 +675,7 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr,
        rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah,
                                  !(flags & RDMA_CREATE_AH_SLEEPABLE));
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH");
+               ibdev_err(&rdev->ibdev, "Failed to allocate HW AH");
                return rc;
        }
 
@@ -742,6 +742,49 @@ void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
        spin_unlock_irqrestore(&qp->scq->cq_lock, flags);
 }
 
+static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp)
+{
+       struct bnxt_re_qp *gsi_sqp;
+       struct bnxt_re_ah *gsi_sah;
+       struct bnxt_re_dev *rdev;
+       int rc = 0;
+
+       rdev = qp->rdev;
+       gsi_sqp = rdev->gsi_ctx.gsi_sqp;
+       gsi_sah = rdev->gsi_ctx.gsi_sah;
+
+       /* remove from active qp list */
+       mutex_lock(&rdev->qp_lock);
+       list_del(&gsi_sqp->list);
+       mutex_unlock(&rdev->qp_lock);
+       atomic_dec(&rdev->qp_count);
+
+       ibdev_dbg(&rdev->ibdev, "Destroy the shadow AH\n");
+       bnxt_qplib_destroy_ah(&rdev->qplib_res,
+                             &gsi_sah->qplib_ah,
+                             true);
+       bnxt_qplib_clean_qp(&qp->qplib_qp);
+
+       ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n");
+       rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &gsi_sqp->qplib_qp);
+       if (rc) {
+               ibdev_err(&rdev->ibdev, "Destroy Shadow QP failed");
+               goto fail;
+       }
+       bnxt_qplib_free_qp_res(&rdev->qplib_res, &gsi_sqp->qplib_qp);
+
+       kfree(rdev->gsi_ctx.sqp_tbl);
+       kfree(gsi_sah);
+       kfree(gsi_sqp);
+       rdev->gsi_ctx.gsi_sqp = NULL;
+       rdev->gsi_ctx.gsi_sah = NULL;
+       rdev->gsi_ctx.sqp_tbl = NULL;
+
+       return 0;
+fail:
+       return rc;
+}
+
 /* Queue Pairs */
 int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
 {
@@ -750,10 +793,16 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
        unsigned int flags;
        int rc;
 
+       mutex_lock(&rdev->qp_lock);
+       list_del(&qp->list);
+       mutex_unlock(&rdev->qp_lock);
+       atomic_dec(&rdev->qp_count);
+
        bnxt_qplib_flush_cqn_wq(&qp->qplib_qp);
+
        rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP");
+               ibdev_err(&rdev->ibdev, "Failed to destroy HW QP");
                return rc;
        }
 
@@ -765,40 +814,19 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
 
        bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp);
 
-       if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
-               bnxt_qplib_destroy_ah(&rdev->qplib_res, &rdev->sqp_ah->qplib_ah,
-                                     false);
-
-               bnxt_qplib_clean_qp(&qp->qplib_qp);
-               rc = bnxt_qplib_destroy_qp(&rdev->qplib_res,
-                                          &rdev->qp1_sqp->qplib_qp);
-               if (rc) {
-                       dev_err(rdev_to_dev(rdev),
-                               "Failed to destroy Shadow QP");
-                       return rc;
-               }
-               bnxt_qplib_free_qp_res(&rdev->qplib_res,
-                                      &rdev->qp1_sqp->qplib_qp);
-               mutex_lock(&rdev->qp_lock);
-               list_del(&rdev->qp1_sqp->list);
-               atomic_dec(&rdev->qp_count);
-               mutex_unlock(&rdev->qp_lock);
-
-               kfree(rdev->sqp_ah);
-               kfree(rdev->qp1_sqp);
-               rdev->qp1_sqp = NULL;
-               rdev->sqp_ah = NULL;
+       if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_sqp) {
+               rc = bnxt_re_destroy_gsi_sqp(qp);
+               if (rc)
+                       goto sh_fail;
        }
 
        ib_umem_release(qp->rumem);
        ib_umem_release(qp->sumem);
 
-       mutex_lock(&rdev->qp_lock);
-       list_del(&qp->list);
-       atomic_dec(&rdev->qp_count);
-       mutex_unlock(&rdev->qp_lock);
        kfree(qp);
        return 0;
+sh_fail:
+       return rc;
 }
 
 static u8 __from_ib_qp_type(enum ib_qp_type type)
@@ -831,7 +859,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
        bytes = (qplib_qp->sq.max_wqe * BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
        /* Consider mapping PSN search memory only for RC QPs. */
        if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) {
-               psn_sz = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
+               psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
                                        sizeof(struct sq_psn_search_ext) :
                                        sizeof(struct sq_psn_search);
                bytes += (qplib_qp->sq.max_wqe * psn_sz);
@@ -843,9 +871,11 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
                return PTR_ERR(umem);
 
        qp->sumem = umem;
-       qplib_qp->sq.sg_info.sglist = umem->sg_head.sgl;
+       qplib_qp->sq.sg_info.sghead = umem->sg_head.sgl;
        qplib_qp->sq.sg_info.npages = ib_umem_num_pages(umem);
        qplib_qp->sq.sg_info.nmap = umem->nmap;
+       qplib_qp->sq.sg_info.pgsize = PAGE_SIZE;
+       qplib_qp->sq.sg_info.pgshft = PAGE_SHIFT;
        qplib_qp->qp_handle = ureq.qp_handle;
 
        if (!qp->qplib_qp.srq) {
@@ -856,9 +886,11 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
                if (IS_ERR(umem))
                        goto rqfail;
                qp->rumem = umem;
-               qplib_qp->rq.sg_info.sglist = umem->sg_head.sgl;
+               qplib_qp->rq.sg_info.sghead = umem->sg_head.sgl;
                qplib_qp->rq.sg_info.npages = ib_umem_num_pages(umem);
                qplib_qp->rq.sg_info.nmap = umem->nmap;
+               qplib_qp->rq.sg_info.pgsize = PAGE_SIZE;
+               qplib_qp->rq.sg_info.pgshft = PAGE_SHIFT;
        }
 
        qplib_qp->dpi = &cntx->dpi;
@@ -906,8 +938,8 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah
 
        rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, false);
        if (rc) {
-               dev_err(rdev_to_dev(rdev),
-                       "Failed to allocate HW AH for Shadow QP");
+               ibdev_err(&rdev->ibdev,
+                         "Failed to allocate HW AH for Shadow QP");
                goto fail;
        }
 
@@ -948,6 +980,8 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
        qp->qplib_qp.sq.max_sge = 2;
        /* Q full delta can be 1 since it is internal QP */
        qp->qplib_qp.sq.q_full_delta = 1;
+       qp->qplib_qp.sq.sg_info.pgsize = PAGE_SIZE;
+       qp->qplib_qp.sq.sg_info.pgshft = PAGE_SHIFT;
 
        qp->qplib_qp.scq = qp1_qp->scq;
        qp->qplib_qp.rcq = qp1_qp->rcq;
@@ -956,6 +990,8 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
        qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
        /* Q full delta can be 1 since it is internal QP */
        qp->qplib_qp.rq.q_full_delta = 1;
+       qp->qplib_qp.rq.sg_info.pgsize = PAGE_SIZE;
+       qp->qplib_qp.rq.sg_info.pgshft = PAGE_SHIFT;
 
        qp->qplib_qp.mtu = qp1_qp->mtu;
 
@@ -967,8 +1003,6 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
        if (rc)
                goto fail;
 
-       rdev->sqp_id = qp->qplib_qp.id;
-
        spin_lock_init(&qp->sq_lock);
        INIT_LIST_HEAD(&qp->list);
        mutex_lock(&rdev->qp_lock);
@@ -981,205 +1015,378 @@ fail:
        return NULL;
 }
 
-struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
-                               struct ib_qp_init_attr *qp_init_attr,
-                               struct ib_udata *udata)
+static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp,
+                               struct ib_qp_init_attr *init_attr)
 {
-       struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
-       struct bnxt_re_dev *rdev = pd->rdev;
-       struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
-       struct bnxt_re_qp *qp;
-       struct bnxt_re_cq *cq;
-       struct bnxt_re_srq *srq;
-       int rc, entries;
+       struct bnxt_qplib_dev_attr *dev_attr;
+       struct bnxt_qplib_qp *qplqp;
+       struct bnxt_re_dev *rdev;
+       int entries;
 
-       if ((qp_init_attr->cap.max_send_wr > dev_attr->max_qp_wqes) ||
-           (qp_init_attr->cap.max_recv_wr > dev_attr->max_qp_wqes) ||
-           (qp_init_attr->cap.max_send_sge > dev_attr->max_qp_sges) ||
-           (qp_init_attr->cap.max_recv_sge > dev_attr->max_qp_sges) ||
-           (qp_init_attr->cap.max_inline_data > dev_attr->max_inline_data))
-               return ERR_PTR(-EINVAL);
+       rdev = qp->rdev;
+       qplqp = &qp->qplib_qp;
+       dev_attr = &rdev->dev_attr;
 
-       qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-       if (!qp)
-               return ERR_PTR(-ENOMEM);
+       if (init_attr->srq) {
+               struct bnxt_re_srq *srq;
 
-       qp->rdev = rdev;
-       ether_addr_copy(qp->qplib_qp.smac, rdev->netdev->dev_addr);
-       qp->qplib_qp.pd = &pd->qplib_pd;
-       qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp);
-       qp->qplib_qp.type = __from_ib_qp_type(qp_init_attr->qp_type);
+               srq = container_of(init_attr->srq, struct bnxt_re_srq, ib_srq);
+               if (!srq) {
+                       ibdev_err(&rdev->ibdev, "SRQ not found");
+                       return -EINVAL;
+               }
+               qplqp->srq = &srq->qplib_srq;
+               qplqp->rq.max_wqe = 0;
+       } else {
+               /* Allocate 1 more than what's provided so posting max doesn't
+                * mean empty.
+                */
+               entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1);
+               qplqp->rq.max_wqe = min_t(u32, entries,
+                                         dev_attr->max_qp_wqes + 1);
 
-       if (qp_init_attr->qp_type == IB_QPT_GSI &&
-           bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))
-               qp->qplib_qp.type = CMDQ_CREATE_QP_TYPE_GSI;
-       if (qp->qplib_qp.type == IB_QPT_MAX) {
-               dev_err(rdev_to_dev(rdev), "QP type 0x%x not supported",
-                       qp->qplib_qp.type);
-               rc = -EINVAL;
-               goto fail;
+               qplqp->rq.q_full_delta = qplqp->rq.max_wqe -
+                                        init_attr->cap.max_recv_wr;
+               qplqp->rq.max_sge = init_attr->cap.max_recv_sge;
+               if (qplqp->rq.max_sge > dev_attr->max_qp_sges)
+                       qplqp->rq.max_sge = dev_attr->max_qp_sges;
+       }
+       qplqp->rq.sg_info.pgsize = PAGE_SIZE;
+       qplqp->rq.sg_info.pgshft = PAGE_SHIFT;
+
+       return 0;
+}
+
+static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp)
+{
+       struct bnxt_qplib_dev_attr *dev_attr;
+       struct bnxt_qplib_qp *qplqp;
+       struct bnxt_re_dev *rdev;
+
+       rdev = qp->rdev;
+       qplqp = &qp->qplib_qp;
+       dev_attr = &rdev->dev_attr;
+
+       qplqp->rq.max_sge = dev_attr->max_qp_sges;
+       if (qplqp->rq.max_sge > dev_attr->max_qp_sges)
+               qplqp->rq.max_sge = dev_attr->max_qp_sges;
+       qplqp->rq.max_sge = 6;
+}
+
+static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
+                                struct ib_qp_init_attr *init_attr,
+                                struct ib_udata *udata)
+{
+       struct bnxt_qplib_dev_attr *dev_attr;
+       struct bnxt_qplib_qp *qplqp;
+       struct bnxt_re_dev *rdev;
+       int entries;
+
+       rdev = qp->rdev;
+       qplqp = &qp->qplib_qp;
+       dev_attr = &rdev->dev_attr;
+
+       qplqp->sq.max_sge = init_attr->cap.max_send_sge;
+       if (qplqp->sq.max_sge > dev_attr->max_qp_sges)
+               qplqp->sq.max_sge = dev_attr->max_qp_sges;
+       /*
+        * Change the SQ depth if user has requested minimum using
+        * configfs. Only supported for kernel consumers
+        */
+       entries = init_attr->cap.max_send_wr;
+       /* Allocate 128 + 1 more than what's provided */
+       entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1);
+       qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes +
+                       BNXT_QPLIB_RESERVED_QP_WRS + 1);
+       qplqp->sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
+       /*
+        * Reserving one slot for Phantom WQE. Application can
+        * post one extra entry in this case. But allowing this to avoid
+        * unexpected Queue full condition
+        */
+       qplqp->sq.q_full_delta -= 1;
+       qplqp->sq.sg_info.pgsize = PAGE_SIZE;
+       qplqp->sq.sg_info.pgshft = PAGE_SHIFT;
+}
+
+static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp,
+                                      struct ib_qp_init_attr *init_attr)
+{
+       struct bnxt_qplib_dev_attr *dev_attr;
+       struct bnxt_qplib_qp *qplqp;
+       struct bnxt_re_dev *rdev;
+       int entries;
+
+       rdev = qp->rdev;
+       qplqp = &qp->qplib_qp;
+       dev_attr = &rdev->dev_attr;
+
+       entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1);
+       qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+       qplqp->sq.q_full_delta = qplqp->sq.max_wqe -
+                                init_attr->cap.max_send_wr;
+       qplqp->sq.max_sge++; /* Need one extra sge to put UD header */
+       if (qplqp->sq.max_sge > dev_attr->max_qp_sges)
+               qplqp->sq.max_sge = dev_attr->max_qp_sges;
+}
+
+static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev,
+                               struct ib_qp_init_attr *init_attr)
+{
+       struct bnxt_qplib_chip_ctx *chip_ctx;
+       int qptype;
+
+       chip_ctx = rdev->chip_ctx;
+
+       qptype = __from_ib_qp_type(init_attr->qp_type);
+       if (qptype == IB_QPT_MAX) {
+               ibdev_err(&rdev->ibdev, "QP type 0x%x not supported", qptype);
+               qptype = -EOPNOTSUPP;
+               goto out;
        }
 
-       qp->qplib_qp.max_inline_data = qp_init_attr->cap.max_inline_data;
-       qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type ==
-                                 IB_SIGNAL_ALL_WR) ? true : false);
+       if (bnxt_qplib_is_chip_gen_p5(chip_ctx) &&
+           init_attr->qp_type == IB_QPT_GSI)
+               qptype = CMDQ_CREATE_QP_TYPE_GSI;
+out:
+       return qptype;
+}
 
-       qp->qplib_qp.sq.max_sge = qp_init_attr->cap.max_send_sge;
-       if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
-               qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
+static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
+                               struct ib_qp_init_attr *init_attr,
+                               struct ib_udata *udata)
+{
+       struct bnxt_qplib_dev_attr *dev_attr;
+       struct bnxt_qplib_qp *qplqp;
+       struct bnxt_re_dev *rdev;
+       struct bnxt_re_cq *cq;
+       int rc = 0, qptype;
+
+       rdev = qp->rdev;
+       qplqp = &qp->qplib_qp;
+       dev_attr = &rdev->dev_attr;
+
+       /* Setup misc params */
+       ether_addr_copy(qplqp->smac, rdev->netdev->dev_addr);
+       qplqp->pd = &pd->qplib_pd;
+       qplqp->qp_handle = (u64)qplqp;
+       qplqp->max_inline_data = init_attr->cap.max_inline_data;
+       qplqp->sig_type = ((init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ?
+                           true : false);
+       qptype = bnxt_re_init_qp_type(rdev, init_attr);
+       if (qptype < 0) {
+               rc = qptype;
+               goto out;
+       }
+       qplqp->type = (u8)qptype;
+
+       if (init_attr->qp_type == IB_QPT_RC) {
+               qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom;
+               qplqp->max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
+       }
+       qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
+       qplqp->dpi = &rdev->dpi_privileged; /* Doorbell page */
+       if (init_attr->create_flags)
+               ibdev_dbg(&rdev->ibdev,
+                         "QP create flags 0x%x not supported",
+                         init_attr->create_flags);
 
-       if (qp_init_attr->send_cq) {
-               cq = container_of(qp_init_attr->send_cq, struct bnxt_re_cq,
-                                 ib_cq);
+       /* Setup CQs */
+       if (init_attr->send_cq) {
+               cq = container_of(init_attr->send_cq, struct bnxt_re_cq, ib_cq);
                if (!cq) {
-                       dev_err(rdev_to_dev(rdev), "Send CQ not found");
+                       ibdev_err(&rdev->ibdev, "Send CQ not found");
                        rc = -EINVAL;
-                       goto fail;
+                       goto out;
                }
-               qp->qplib_qp.scq = &cq->qplib_cq;
+               qplqp->scq = &cq->qplib_cq;
                qp->scq = cq;
        }
 
-       if (qp_init_attr->recv_cq) {
-               cq = container_of(qp_init_attr->recv_cq, struct bnxt_re_cq,
-                                 ib_cq);
+       if (init_attr->recv_cq) {
+               cq = container_of(init_attr->recv_cq, struct bnxt_re_cq, ib_cq);
                if (!cq) {
-                       dev_err(rdev_to_dev(rdev), "Receive CQ not found");
+                       ibdev_err(&rdev->ibdev, "Receive CQ not found");
                        rc = -EINVAL;
-                       goto fail;
+                       goto out;
                }
-               qp->qplib_qp.rcq = &cq->qplib_cq;
+               qplqp->rcq = &cq->qplib_cq;
                qp->rcq = cq;
        }
 
-       if (qp_init_attr->srq) {
-               srq = container_of(qp_init_attr->srq, struct bnxt_re_srq,
-                                  ib_srq);
-               if (!srq) {
-                       dev_err(rdev_to_dev(rdev), "SRQ not found");
-                       rc = -EINVAL;
-                       goto fail;
-               }
-               qp->qplib_qp.srq = &srq->qplib_srq;
-               qp->qplib_qp.rq.max_wqe = 0;
-       } else {
-               /* Allocate 1 more than what's provided so posting max doesn't
-                * mean empty
-                */
-               entries = roundup_pow_of_two(qp_init_attr->cap.max_recv_wr + 1);
-               qp->qplib_qp.rq.max_wqe = min_t(u32, entries,
-                                               dev_attr->max_qp_wqes + 1);
+       /* Setup RQ/SRQ */
+       rc = bnxt_re_init_rq_attr(qp, init_attr);
+       if (rc)
+               goto out;
+       if (init_attr->qp_type == IB_QPT_GSI)
+               bnxt_re_adjust_gsi_rq_attr(qp);
 
-               qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
-                                               qp_init_attr->cap.max_recv_wr;
+       /* Setup SQ */
+       bnxt_re_init_sq_attr(qp, init_attr, udata);
+       if (init_attr->qp_type == IB_QPT_GSI)
+               bnxt_re_adjust_gsi_sq_attr(qp, init_attr);
 
-               qp->qplib_qp.rq.max_sge = qp_init_attr->cap.max_recv_sge;
-               if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
-                       qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
+       if (udata) /* This will update DPI and qp_handle */
+               rc = bnxt_re_init_user_qp(rdev, pd, qp, udata);
+out:
+       return rc;
+}
+
+static int bnxt_re_create_shadow_gsi(struct bnxt_re_qp *qp,
+                                    struct bnxt_re_pd *pd)
+{
+       struct bnxt_re_sqp_entries *sqp_tbl = NULL;
+       struct bnxt_re_dev *rdev;
+       struct bnxt_re_qp *sqp;
+       struct bnxt_re_ah *sah;
+       int rc = 0;
+
+       rdev = qp->rdev;
+       /* Create a shadow QP to handle the QP1 traffic */
+       sqp_tbl = kzalloc(sizeof(*sqp_tbl) * BNXT_RE_MAX_GSI_SQP_ENTRIES,
+                         GFP_KERNEL);
+       if (!sqp_tbl)
+               return -ENOMEM;
+       rdev->gsi_ctx.sqp_tbl = sqp_tbl;
+
+       sqp = bnxt_re_create_shadow_qp(pd, &rdev->qplib_res, &qp->qplib_qp);
+       if (!sqp) {
+               rc = -ENODEV;
+               ibdev_err(&rdev->ibdev, "Failed to create Shadow QP for QP1");
+               goto out;
+       }
+       rdev->gsi_ctx.gsi_sqp = sqp;
+
+       sqp->rcq = qp->rcq;
+       sqp->scq = qp->scq;
+       sah = bnxt_re_create_shadow_qp_ah(pd, &rdev->qplib_res,
+                                         &qp->qplib_qp);
+       if (!sah) {
+               bnxt_qplib_destroy_qp(&rdev->qplib_res,
+                                     &sqp->qplib_qp);
+               rc = -ENODEV;
+               ibdev_err(&rdev->ibdev,
+                         "Failed to create AH entry for ShadowQP");
+               goto out;
        }
+       rdev->gsi_ctx.gsi_sah = sah;
 
-       qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
+       return 0;
+out:
+       kfree(sqp_tbl);
+       return rc;
+}
 
-       if (qp_init_attr->qp_type == IB_QPT_GSI &&
-           !(bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))) {
-               /* Allocate 1 more than what's provided */
-               entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
-               qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
-                                               dev_attr->max_qp_wqes + 1);
-               qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
-                                               qp_init_attr->cap.max_send_wr;
-               qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
-               if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
-                       qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
-               qp->qplib_qp.sq.max_sge++;
-               if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
-                       qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
-
-               qp->qplib_qp.rq_hdr_buf_size =
-                                       BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2;
-
-               qp->qplib_qp.sq_hdr_buf_size =
-                                       BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2;
-               qp->qplib_qp.dpi = &rdev->dpi_privileged;
-               rc = bnxt_qplib_create_qp1(&rdev->qplib_res, &qp->qplib_qp);
-               if (rc) {
-                       dev_err(rdev_to_dev(rdev), "Failed to create HW QP1");
-                       goto fail;
-               }
-               /* Create a shadow QP to handle the QP1 traffic */
-               rdev->qp1_sqp = bnxt_re_create_shadow_qp(pd, &rdev->qplib_res,
-                                                        &qp->qplib_qp);
-               if (!rdev->qp1_sqp) {
-                       rc = -EINVAL;
-                       dev_err(rdev_to_dev(rdev),
-                               "Failed to create Shadow QP for QP1");
-                       goto qp_destroy;
-               }
-               rdev->sqp_ah = bnxt_re_create_shadow_qp_ah(pd, &rdev->qplib_res,
-                                                          &qp->qplib_qp);
-               if (!rdev->sqp_ah) {
-                       bnxt_qplib_destroy_qp(&rdev->qplib_res,
-                                             &rdev->qp1_sqp->qplib_qp);
-                       rc = -EINVAL;
-                       dev_err(rdev_to_dev(rdev),
-                               "Failed to create AH entry for ShadowQP");
-                       goto qp_destroy;
-               }
+static int bnxt_re_create_gsi_qp(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
+                                struct ib_qp_init_attr *init_attr)
+{
+       struct bnxt_re_dev *rdev;
+       struct bnxt_qplib_qp *qplqp;
+       int rc = 0;
 
-       } else {
-               /* Allocate 128 + 1 more than what's provided */
-               entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr +
-                                            BNXT_QPLIB_RESERVED_QP_WRS + 1);
-               qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
-                                               dev_attr->max_qp_wqes +
-                                               BNXT_QPLIB_RESERVED_QP_WRS + 1);
-               qp->qplib_qp.sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
+       rdev = qp->rdev;
+       qplqp = &qp->qplib_qp;
 
-               /*
-                * Reserving one slot for Phantom WQE. Application can
-                * post one extra entry in this case. But allowing this to avoid
-                * unexpected Queue full condition
-                */
+       qplqp->rq_hdr_buf_size = BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2;
+       qplqp->sq_hdr_buf_size = BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2;
 
-               qp->qplib_qp.sq.q_full_delta -= 1;
+       rc = bnxt_qplib_create_qp1(&rdev->qplib_res, qplqp);
+       if (rc) {
+               ibdev_err(&rdev->ibdev, "create HW QP1 failed!");
+               goto out;
+       }
 
-               qp->qplib_qp.max_rd_atomic = dev_attr->max_qp_rd_atom;
-               qp->qplib_qp.max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
-               if (udata) {
-                       rc = bnxt_re_init_user_qp(rdev, pd, qp, udata);
-                       if (rc)
-                               goto fail;
-               } else {
-                       qp->qplib_qp.dpi = &rdev->dpi_privileged;
-               }
+       rc = bnxt_re_create_shadow_gsi(qp, pd);
+out:
+       return rc;
+}
+
+static bool bnxt_re_test_qp_limits(struct bnxt_re_dev *rdev,
+                                  struct ib_qp_init_attr *init_attr,
+                                  struct bnxt_qplib_dev_attr *dev_attr)
+{
+       bool rc = true;
+
+       if (init_attr->cap.max_send_wr > dev_attr->max_qp_wqes ||
+           init_attr->cap.max_recv_wr > dev_attr->max_qp_wqes ||
+           init_attr->cap.max_send_sge > dev_attr->max_qp_sges ||
+           init_attr->cap.max_recv_sge > dev_attr->max_qp_sges ||
+           init_attr->cap.max_inline_data > dev_attr->max_inline_data) {
+               ibdev_err(&rdev->ibdev,
+                         "Create QP failed - max exceeded! 0x%x/0x%x 0x%x/0x%x 0x%x/0x%x 0x%x/0x%x 0x%x/0x%x",
+                         init_attr->cap.max_send_wr, dev_attr->max_qp_wqes,
+                         init_attr->cap.max_recv_wr, dev_attr->max_qp_wqes,
+                         init_attr->cap.max_send_sge, dev_attr->max_qp_sges,
+                         init_attr->cap.max_recv_sge, dev_attr->max_qp_sges,
+                         init_attr->cap.max_inline_data,
+                         dev_attr->max_inline_data);
+               rc = false;
+       }
+       return rc;
+}
 
+struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
+                               struct ib_qp_init_attr *qp_init_attr,
+                               struct ib_udata *udata)
+{
+       struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+       struct bnxt_re_dev *rdev = pd->rdev;
+       struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+       struct bnxt_re_qp *qp;
+       int rc;
+
+       rc = bnxt_re_test_qp_limits(rdev, qp_init_attr, dev_attr);
+       if (!rc) {
+               rc = -EINVAL;
+               goto exit;
+       }
+
+       qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+       if (!qp) {
+               rc = -ENOMEM;
+               goto exit;
+       }
+       qp->rdev = rdev;
+       rc = bnxt_re_init_qp_attr(qp, pd, qp_init_attr, udata);
+       if (rc)
+               goto fail;
+
+       if (qp_init_attr->qp_type == IB_QPT_GSI &&
+           !(bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))) {
+               rc = bnxt_re_create_gsi_qp(qp, pd, qp_init_attr);
+               if (rc == -ENODEV)
+                       goto qp_destroy;
+               if (rc)
+                       goto fail;
+       } else {
                rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp);
                if (rc) {
-                       dev_err(rdev_to_dev(rdev), "Failed to create HW QP");
+                       ibdev_err(&rdev->ibdev, "Failed to create HW QP");
                        goto free_umem;
                }
+               if (udata) {
+                       struct bnxt_re_qp_resp resp;
+
+                       resp.qpid = qp->qplib_qp.id;
+                       resp.rsvd = 0;
+                       rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+                       if (rc) {
+                               ibdev_err(&rdev->ibdev, "Failed to copy QP udata");
+                               goto qp_destroy;
+                       }
+               }
        }
 
        qp->ib_qp.qp_num = qp->qplib_qp.id;
+       if (qp_init_attr->qp_type == IB_QPT_GSI)
+               rdev->gsi_ctx.gsi_qp = qp;
        spin_lock_init(&qp->sq_lock);
        spin_lock_init(&qp->rq_lock);
-
-       if (udata) {
-               struct bnxt_re_qp_resp resp;
-
-               resp.qpid = qp->ib_qp.qp_num;
-               resp.rsvd = 0;
-               rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
-               if (rc) {
-                       dev_err(rdev_to_dev(rdev), "Failed to copy QP udata");
-                       goto qp_destroy;
-               }
-       }
        INIT_LIST_HEAD(&qp->list);
        mutex_lock(&rdev->qp_lock);
        list_add_tail(&qp->list, &rdev->qp_list);
-       atomic_inc(&rdev->qp_count);
        mutex_unlock(&rdev->qp_lock);
+       atomic_inc(&rdev->qp_count);
 
        return &qp->ib_qp;
 qp_destroy:
@@ -1189,6 +1396,7 @@ free_umem:
        ib_umem_release(qp->sumem);
 fail:
        kfree(qp);
+exit:
        return ERR_PTR(rc);
 }
 
@@ -1311,9 +1519,11 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
                return PTR_ERR(umem);
 
        srq->umem = umem;
-       qplib_srq->sg_info.sglist = umem->sg_head.sgl;
+       qplib_srq->sg_info.sghead = umem->sg_head.sgl;
        qplib_srq->sg_info.npages = ib_umem_num_pages(umem);
        qplib_srq->sg_info.nmap = umem->nmap;
+       qplib_srq->sg_info.pgsize = PAGE_SIZE;
+       qplib_srq->sg_info.pgshft = PAGE_SHIFT;
        qplib_srq->srq_handle = ureq.srq_handle;
        qplib_srq->dpi = &cntx->dpi;
 
@@ -1334,7 +1544,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
        int rc, entries;
 
        if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) {
-               dev_err(rdev_to_dev(rdev), "Create CQ failed - max exceeded");
+               ibdev_err(&rdev->ibdev, "Create CQ failed - max exceeded");
                rc = -EINVAL;
                goto exit;
        }
@@ -1369,7 +1579,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
 
        rc = bnxt_qplib_create_srq(&rdev->qplib_res, &srq->qplib_srq);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Create HW SRQ failed!");
+               ibdev_err(&rdev->ibdev, "Create HW SRQ failed!");
                goto fail;
        }
 
@@ -1379,7 +1589,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
                resp.srqid = srq->qplib_srq.id;
                rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
                if (rc) {
-                       dev_err(rdev_to_dev(rdev), "SRQ copy to udata failed!");
+                       ibdev_err(&rdev->ibdev, "SRQ copy to udata failed!");
                        bnxt_qplib_destroy_srq(&rdev->qplib_res,
                                               &srq->qplib_srq);
                        goto fail;
@@ -1418,7 +1628,7 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr,
                srq->qplib_srq.threshold = srq_attr->srq_limit;
                rc = bnxt_qplib_modify_srq(&rdev->qplib_res, &srq->qplib_srq);
                if (rc) {
-                       dev_err(rdev_to_dev(rdev), "Modify HW SRQ failed!");
+                       ibdev_err(&rdev->ibdev, "Modify HW SRQ failed!");
                        return rc;
                }
                /* On success, update the shadow */
@@ -1426,8 +1636,8 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr,
                /* No need to Build and send response back to udata */
                break;
        default:
-               dev_err(rdev_to_dev(rdev),
-                       "Unsupported srq_attr_mask 0x%x", srq_attr_mask);
+               ibdev_err(&rdev->ibdev,
+                         "Unsupported srq_attr_mask 0x%x", srq_attr_mask);
                return -EINVAL;
        }
        return 0;
@@ -1445,7 +1655,7 @@ int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr)
        tsrq.qplib_srq.id = srq->qplib_srq.id;
        rc = bnxt_qplib_query_srq(&rdev->qplib_res, &tsrq.qplib_srq);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Query HW SRQ failed!");
+               ibdev_err(&rdev->ibdev, "Query HW SRQ failed!");
                return rc;
        }
        srq_attr->max_wr = srq->qplib_srq.max_wqe;
@@ -1487,7 +1697,7 @@ static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
                                    struct bnxt_re_qp *qp1_qp,
                                    int qp_attr_mask)
 {
-       struct bnxt_re_qp *qp = rdev->qp1_sqp;
+       struct bnxt_re_qp *qp = rdev->gsi_ctx.gsi_sqp;
        int rc = 0;
 
        if (qp_attr_mask & IB_QP_STATE) {
@@ -1511,8 +1721,7 @@ static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
 
        rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
        if (rc)
-               dev_err(rdev_to_dev(rdev),
-                       "Failed to modify Shadow QP for QP1");
+               ibdev_err(&rdev->ibdev, "Failed to modify Shadow QP for QP1");
        return rc;
 }
 
@@ -1533,15 +1742,15 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
                new_qp_state = qp_attr->qp_state;
                if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
                                        ib_qp->qp_type, qp_attr_mask)) {
-                       dev_err(rdev_to_dev(rdev),
-                               "Invalid attribute mask: %#x specified ",
-                               qp_attr_mask);
-                       dev_err(rdev_to_dev(rdev),
-                               "for qpn: %#x type: %#x",
-                               ib_qp->qp_num, ib_qp->qp_type);
-                       dev_err(rdev_to_dev(rdev),
-                               "curr_qp_state=0x%x, new_qp_state=0x%x\n",
-                               curr_qp_state, new_qp_state);
+                       ibdev_err(&rdev->ibdev,
+                                 "Invalid attribute mask: %#x specified ",
+                                 qp_attr_mask);
+                       ibdev_err(&rdev->ibdev,
+                                 "for qpn: %#x type: %#x",
+                                 ib_qp->qp_num, ib_qp->qp_type);
+                       ibdev_err(&rdev->ibdev,
+                                 "curr_qp_state=0x%x, new_qp_state=0x%x\n",
+                                 curr_qp_state, new_qp_state);
                        return -EINVAL;
                }
                qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
@@ -1549,18 +1758,16 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 
                if (!qp->sumem &&
                    qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
-                       dev_dbg(rdev_to_dev(rdev),
-                               "Move QP = %p to flush list\n",
-                               qp);
+                       ibdev_dbg(&rdev->ibdev,
+                                 "Move QP = %p to flush list\n", qp);
                        flags = bnxt_re_lock_cqs(qp);
                        bnxt_qplib_add_flush_qp(&qp->qplib_qp);
                        bnxt_re_unlock_cqs(qp, flags);
                }
                if (!qp->sumem &&
                    qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
-                       dev_dbg(rdev_to_dev(rdev),
-                               "Move QP = %p out of flush list\n",
-                               qp);
+                       ibdev_dbg(&rdev->ibdev,
+                                 "Move QP = %p out of flush list\n", qp);
                        flags = bnxt_re_lock_cqs(qp);
                        bnxt_qplib_clean_qp(&qp->qplib_qp);
                        bnxt_re_unlock_cqs(qp, flags);
@@ -1593,6 +1800,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
                const struct ib_global_route *grh =
                        rdma_ah_read_grh(&qp_attr->ah_attr);
                const struct ib_gid_attr *sgid_attr;
+               struct bnxt_re_gid_ctx *ctx;
 
                qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
                                     CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
@@ -1604,11 +1812,12 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
                memcpy(qp->qplib_qp.ah.dgid.data, grh->dgid.raw,
                       sizeof(qp->qplib_qp.ah.dgid.data));
                qp->qplib_qp.ah.flow_label = grh->flow_label;
-               /* If RoCE V2 is enabled, stack will have two entries for
-                * each GID entry. Avoiding this duplicte entry in HW. Dividing
-                * the GID index by 2 for RoCE V2
+               sgid_attr = grh->sgid_attr;
+               /* Get the HW context of the GID. The reference
+                * of GID table entry is already taken by the caller.
                 */
-               qp->qplib_qp.ah.sgid_index = grh->sgid_index / 2;
+               ctx = rdma_read_gid_hw_context(sgid_attr);
+               qp->qplib_qp.ah.sgid_index = ctx->idx;
                qp->qplib_qp.ah.host_sgid_index = grh->sgid_index;
                qp->qplib_qp.ah.hop_limit = grh->hop_limit;
                qp->qplib_qp.ah.traffic_class = grh->traffic_class;
@@ -1616,7 +1825,6 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
                ether_addr_copy(qp->qplib_qp.ah.dmac,
                                qp_attr->ah_attr.roce.dmac);
 
-               sgid_attr = qp_attr->ah_attr.grh.sgid_attr;
                rc = rdma_read_gid_l2_fields(sgid_attr, NULL,
                                             &qp->qplib_qp.smac[0]);
                if (rc)
@@ -1690,10 +1898,10 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
        if (qp_attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
                if (qp_attr->max_dest_rd_atomic >
                    dev_attr->max_qp_init_rd_atom) {
-                       dev_err(rdev_to_dev(rdev),
-                               "max_dest_rd_atomic requested%d is > dev_max%d",
-                               qp_attr->max_dest_rd_atomic,
-                               dev_attr->max_qp_init_rd_atom);
+                       ibdev_err(&rdev->ibdev,
+                                 "max_dest_rd_atomic requested%d is > dev_max%d",
+                                 qp_attr->max_dest_rd_atomic,
+                                 dev_attr->max_qp_init_rd_atom);
                        return -EINVAL;
                }
 
@@ -1714,8 +1922,8 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
                    (qp_attr->cap.max_recv_sge >= dev_attr->max_qp_sges) ||
                    (qp_attr->cap.max_inline_data >=
                                                dev_attr->max_inline_data)) {
-                       dev_err(rdev_to_dev(rdev),
-                               "Create QP failed - max exceeded");
+                       ibdev_err(&rdev->ibdev,
+                                 "Create QP failed - max exceeded");
                        return -EINVAL;
                }
                entries = roundup_pow_of_two(qp_attr->cap.max_send_wr);
@@ -1748,10 +1956,10 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
        }
        rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Failed to modify HW QP");
+               ibdev_err(&rdev->ibdev, "Failed to modify HW QP");
                return rc;
        }
-       if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp)
+       if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_sqp)
                rc = bnxt_re_modify_shadow_qp(rdev, qp, qp_attr_mask);
        return rc;
 }
@@ -1773,7 +1981,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 
        rc = bnxt_qplib_query_qp(&rdev->qplib_res, qplib_qp);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Failed to query HW QP");
+               ibdev_err(&rdev->ibdev, "Failed to query HW QP");
                goto out;
        }
        qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state);
@@ -1978,7 +2186,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
                wqe->num_sge++;
 
        } else {
-               dev_err(rdev_to_dev(qp->rdev), "QP1 buffer is empty!");
+               ibdev_err(&qp->rdev->ibdev, "QP1 buffer is empty!");
                rc = -ENOMEM;
        }
        return rc;
@@ -1995,9 +2203,12 @@ static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
                                            struct bnxt_qplib_swqe *wqe,
                                            int payload_size)
 {
+       struct bnxt_re_sqp_entries *sqp_entry;
        struct bnxt_qplib_sge ref, sge;
+       struct bnxt_re_dev *rdev;
        u32 rq_prod_index;
-       struct bnxt_re_sqp_entries *sqp_entry;
+
+       rdev = qp->rdev;
 
        rq_prod_index = bnxt_qplib_get_rq_prod_index(&qp->qplib_qp);
 
@@ -2012,7 +2223,7 @@ static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
        ref.lkey = wqe->sg_list[0].lkey;
        ref.size = wqe->sg_list[0].size;
 
-       sqp_entry = &qp->rdev->sqp_tbl[rq_prod_index];
+       sqp_entry = &rdev->gsi_ctx.sqp_tbl[rq_prod_index];
 
        /* SGE 1 */
        wqe->sg_list[0].addr = sge.addr;
@@ -2164,7 +2375,7 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr,
        wqe->frmr.pbl_dma_ptr = qplib_frpl->hwq.pbl_dma_ptr[0];
        wqe->frmr.page_list = mr->pages;
        wqe->frmr.page_list_len = mr->npages;
-       wqe->frmr.levels = qplib_frpl->hwq.level + 1;
+       wqe->frmr.levels = qplib_frpl->hwq.level;
        wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR;
 
        /* Need unconditional fence for reg_mr
@@ -2211,8 +2422,8 @@ static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev,
 
                if ((sge_len + wqe->inline_len) >
                    BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
-                       dev_err(rdev_to_dev(rdev),
-                               "Inline data size requested > supported value");
+                       ibdev_err(&rdev->ibdev,
+                                 "Inline data size requested > supported value");
                        return -EINVAL;
                }
                sge_len = wr->sg_list[i].length;
@@ -2259,21 +2470,18 @@ static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
                                       struct bnxt_re_qp *qp,
                                       const struct ib_send_wr *wr)
 {
-       struct bnxt_qplib_swqe wqe;
        int rc = 0, payload_sz = 0;
        unsigned long flags;
 
        spin_lock_irqsave(&qp->sq_lock, flags);
-       memset(&wqe, 0, sizeof(wqe));
        while (wr) {
-               /* House keeping */
-               memset(&wqe, 0, sizeof(wqe));
+               struct bnxt_qplib_swqe wqe = {};
 
                /* Common */
                wqe.num_sge = wr->num_sge;
                if (wr->num_sge > qp->qplib_qp.sq.max_sge) {
-                       dev_err(rdev_to_dev(rdev),
-                               "Limit exceeded for Send SGEs");
+                       ibdev_err(&rdev->ibdev,
+                                 "Limit exceeded for Send SGEs");
                        rc = -EINVAL;
                        goto bad;
                }
@@ -2292,9 +2500,9 @@ static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
                        rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
 bad:
                if (rc) {
-                       dev_err(rdev_to_dev(rdev),
-                               "Post send failed opcode = %#x rc = %d",
-                               wr->opcode, rc);
+                       ibdev_err(&rdev->ibdev,
+                                 "Post send failed opcode = %#x rc = %d",
+                                 wr->opcode, rc);
                        break;
                }
                wr = wr->next;
@@ -2321,8 +2529,8 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
                /* Common */
                wqe.num_sge = wr->num_sge;
                if (wr->num_sge > qp->qplib_qp.sq.max_sge) {
-                       dev_err(rdev_to_dev(qp->rdev),
-                               "Limit exceeded for Send SGEs");
+                       ibdev_err(&qp->rdev->ibdev,
+                                 "Limit exceeded for Send SGEs");
                        rc = -EINVAL;
                        goto bad;
                }
@@ -2367,8 +2575,8 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
                        rc = bnxt_re_build_atomic_wqe(wr, &wqe);
                        break;
                case IB_WR_RDMA_READ_WITH_INV:
-                       dev_err(rdev_to_dev(qp->rdev),
-                               "RDMA Read with Invalidate is not supported");
+                       ibdev_err(&qp->rdev->ibdev,
+                                 "RDMA Read with Invalidate is not supported");
                        rc = -EINVAL;
                        goto bad;
                case IB_WR_LOCAL_INV:
@@ -2379,8 +2587,8 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
                        break;
                default:
                        /* Unsupported WRs */
-                       dev_err(rdev_to_dev(qp->rdev),
-                               "WR (%#x) is not supported", wr->opcode);
+                       ibdev_err(&qp->rdev->ibdev,
+                                 "WR (%#x) is not supported", wr->opcode);
                        rc = -EINVAL;
                        goto bad;
                }
@@ -2388,9 +2596,9 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
                        rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
 bad:
                if (rc) {
-                       dev_err(rdev_to_dev(qp->rdev),
-                               "post_send failed op:%#x qps = %#x rc = %d\n",
-                               wr->opcode, qp->qplib_qp.state, rc);
+                       ibdev_err(&qp->rdev->ibdev,
+                                 "post_send failed op:%#x qps = %#x rc = %d\n",
+                                 wr->opcode, qp->qplib_qp.state, rc);
                        *bad_wr = wr;
                        break;
                }
@@ -2418,8 +2626,8 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
                /* Common */
                wqe.num_sge = wr->num_sge;
                if (wr->num_sge > qp->qplib_qp.rq.max_sge) {
-                       dev_err(rdev_to_dev(rdev),
-                               "Limit exceeded for Receive SGEs");
+                       ibdev_err(&rdev->ibdev,
+                                 "Limit exceeded for Receive SGEs");
                        rc = -EINVAL;
                        break;
                }
@@ -2455,8 +2663,8 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
                /* Common */
                wqe.num_sge = wr->num_sge;
                if (wr->num_sge > qp->qplib_qp.rq.max_sge) {
-                       dev_err(rdev_to_dev(qp->rdev),
-                               "Limit exceeded for Receive SGEs");
+                       ibdev_err(&qp->rdev->ibdev,
+                                 "Limit exceeded for Receive SGEs");
                        rc = -EINVAL;
                        *bad_wr = wr;
                        break;
@@ -2527,7 +2735,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 
        /* Validate CQ fields */
        if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
-               dev_err(rdev_to_dev(rdev), "Failed to create CQ -max exceeded");
+               ibdev_err(&rdev->ibdev, "Failed to create CQ -max exceeded");
                return -EINVAL;
        }
 
@@ -2538,6 +2746,8 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
        if (entries > dev_attr->max_cq_wqes + 1)
                entries = dev_attr->max_cq_wqes + 1;
 
+       cq->qplib_cq.sg_info.pgsize = PAGE_SIZE;
+       cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT;
        if (udata) {
                struct bnxt_re_cq_req req;
                struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context(
@@ -2554,7 +2764,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                        rc = PTR_ERR(cq->umem);
                        goto fail;
                }
-               cq->qplib_cq.sg_info.sglist = cq->umem->sg_head.sgl;
+               cq->qplib_cq.sg_info.sghead = cq->umem->sg_head.sgl;
                cq->qplib_cq.sg_info.npages = ib_umem_num_pages(cq->umem);
                cq->qplib_cq.sg_info.nmap = cq->umem->nmap;
                cq->qplib_cq.dpi = &uctx->dpi;
@@ -2581,7 +2791,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 
        rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Failed to create HW CQ");
+               ibdev_err(&rdev->ibdev, "Failed to create HW CQ");
                goto fail;
        }
 
@@ -2601,7 +2811,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                resp.rsvd = 0;
                rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
                if (rc) {
-                       dev_err(rdev_to_dev(rdev), "Failed to copy CQ udata");
+                       ibdev_err(&rdev->ibdev, "Failed to copy CQ udata");
                        bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
                        goto c2fail;
                }
@@ -2832,12 +3042,13 @@ static bool bnxt_re_is_loopback_packet(struct bnxt_re_dev *rdev,
        return rc;
 }
 
-static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp,
+static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *gsi_qp,
                                         struct bnxt_qplib_cqe *cqe)
 {
-       struct bnxt_re_dev *rdev = qp1_qp->rdev;
+       struct bnxt_re_dev *rdev = gsi_qp->rdev;
        struct bnxt_re_sqp_entries *sqp_entry = NULL;
-       struct bnxt_re_qp *qp = rdev->qp1_sqp;
+       struct bnxt_re_qp *gsi_sqp = rdev->gsi_ctx.gsi_sqp;
+       struct bnxt_re_ah *gsi_sah;
        struct ib_send_wr *swr;
        struct ib_ud_wr udwr;
        struct ib_recv_wr rwr;
@@ -2860,26 +3071,26 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp,
        swr = &udwr.wr;
        tbl_idx = cqe->wr_id;
 
-       rq_hdr_buf = qp1_qp->qplib_qp.rq_hdr_buf +
-                       (tbl_idx * qp1_qp->qplib_qp.rq_hdr_buf_size);
-       rq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&qp1_qp->qplib_qp,
+       rq_hdr_buf = gsi_qp->qplib_qp.rq_hdr_buf +
+                       (tbl_idx * gsi_qp->qplib_qp.rq_hdr_buf_size);
+       rq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&gsi_qp->qplib_qp,
                                                          tbl_idx);
 
        /* Shadow QP header buffer */
-       shrq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&qp->qplib_qp,
+       shrq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&gsi_qp->qplib_qp,
                                                            tbl_idx);
-       sqp_entry = &rdev->sqp_tbl[tbl_idx];
+       sqp_entry = &rdev->gsi_ctx.sqp_tbl[tbl_idx];
 
        /* Store this cqe */
        memcpy(&sqp_entry->cqe, cqe, sizeof(struct bnxt_qplib_cqe));
-       sqp_entry->qp1_qp = qp1_qp;
+       sqp_entry->qp1_qp = gsi_qp;
 
        /* Find packet type from the cqe */
 
        pkt_type = bnxt_re_check_packet_type(cqe->raweth_qp1_flags,
                                             cqe->raweth_qp1_flags2);
        if (pkt_type < 0) {
-               dev_err(rdev_to_dev(rdev), "Invalid packet\n");
+               ibdev_err(&rdev->ibdev, "Invalid packet\n");
                return -EINVAL;
        }
 
@@ -2926,10 +3137,10 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp,
        rwr.wr_id = tbl_idx;
        rwr.next = NULL;
 
-       rc = bnxt_re_post_recv_shadow_qp(rdev, qp, &rwr);
+       rc = bnxt_re_post_recv_shadow_qp(rdev, gsi_sqp, &rwr);
        if (rc) {
-               dev_err(rdev_to_dev(rdev),
-                       "Failed to post Rx buffers to shadow QP");
+               ibdev_err(&rdev->ibdev,
+                         "Failed to post Rx buffers to shadow QP");
                return -ENOMEM;
        }
 
@@ -2938,13 +3149,13 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp,
        swr->wr_id = tbl_idx;
        swr->opcode = IB_WR_SEND;
        swr->next = NULL;
-
-       udwr.ah = &rdev->sqp_ah->ib_ah;
-       udwr.remote_qpn = rdev->qp1_sqp->qplib_qp.id;
-       udwr.remote_qkey = rdev->qp1_sqp->qplib_qp.qkey;
+       gsi_sah = rdev->gsi_ctx.gsi_sah;
+       udwr.ah = &gsi_sah->ib_ah;
+       udwr.remote_qpn = gsi_sqp->qplib_qp.id;
+       udwr.remote_qkey = gsi_sqp->qplib_qp.qkey;
 
        /* post data received  in the send queue */
-       rc = bnxt_re_post_send_shadow_qp(rdev, qp, swr);
+       rc = bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr);
 
        return 0;
 }
@@ -2998,12 +3209,12 @@ static void bnxt_re_process_res_rc_wc(struct ib_wc *wc,
                wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
 }
 
-static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp,
+static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *gsi_sqp,
                                             struct ib_wc *wc,
                                             struct bnxt_qplib_cqe *cqe)
 {
-       struct bnxt_re_dev *rdev = qp->rdev;
-       struct bnxt_re_qp *qp1_qp = NULL;
+       struct bnxt_re_dev *rdev = gsi_sqp->rdev;
+       struct bnxt_re_qp *gsi_qp = NULL;
        struct bnxt_qplib_cqe *orig_cqe = NULL;
        struct bnxt_re_sqp_entries *sqp_entry = NULL;
        int nw_type;
@@ -3013,13 +3224,13 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp,
 
        tbl_idx = cqe->wr_id;
 
-       sqp_entry = &rdev->sqp_tbl[tbl_idx];
-       qp1_qp = sqp_entry->qp1_qp;
+       sqp_entry = &rdev->gsi_ctx.sqp_tbl[tbl_idx];
+       gsi_qp = sqp_entry->qp1_qp;
        orig_cqe = &sqp_entry->cqe;
 
        wc->wr_id = sqp_entry->wrid;
        wc->byte_len = orig_cqe->length;
-       wc->qp = &qp1_qp->ib_qp;
+       wc->qp = &gsi_qp->ib_qp;
 
        wc->ex.imm_data = orig_cqe->immdata;
        wc->src_qp = orig_cqe->src_qp;
@@ -3084,11 +3295,11 @@ static int send_phantom_wqe(struct bnxt_re_qp *qp)
        rc = bnxt_re_bind_fence_mw(lib_qp);
        if (!rc) {
                lib_qp->sq.phantom_wqe_cnt++;
-               dev_dbg(&lib_qp->sq.hwq.pdev->dev,
-                       "qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n",
-                       lib_qp->id, lib_qp->sq.hwq.prod,
-                       HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq),
-                       lib_qp->sq.phantom_wqe_cnt);
+               ibdev_dbg(&qp->rdev->ibdev,
+                         "qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n",
+                         lib_qp->id, lib_qp->sq.hwq.prod,
+                         HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq),
+                         lib_qp->sq.phantom_wqe_cnt);
        }
 
        spin_unlock_irqrestore(&qp->sq_lock, flags);
@@ -3098,7 +3309,7 @@ static int send_phantom_wqe(struct bnxt_re_qp *qp)
 int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
 {
        struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
-       struct bnxt_re_qp *qp;
+       struct bnxt_re_qp *qp, *sh_qp;
        struct bnxt_qplib_cqe *cqe;
        int i, ncqe, budget;
        struct bnxt_qplib_q *sq;
@@ -3111,7 +3322,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
        budget = min_t(u32, num_entries, cq->max_cql);
        num_entries = budget;
        if (!cq->cql) {
-               dev_err(rdev_to_dev(cq->rdev), "POLL CQ : no CQL to use");
+               ibdev_err(&cq->rdev->ibdev, "POLL CQ : no CQL to use");
                goto exit;
        }
        cqe = &cq->cql[0];
@@ -3124,8 +3335,8 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
                                qp = container_of(lib_qp,
                                                  struct bnxt_re_qp, qplib_qp);
                                if (send_phantom_wqe(qp) == -ENOMEM)
-                                       dev_err(rdev_to_dev(cq->rdev),
-                                               "Phantom failed! Scheduled to send again\n");
+                                       ibdev_err(&cq->rdev->ibdev,
+                                                 "Phantom failed! Scheduled to send again\n");
                                else
                                        sq->send_phantom = false;
                        }
@@ -3149,8 +3360,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
                                 (unsigned long)(cqe->qp_handle),
                                 struct bnxt_re_qp, qplib_qp);
                        if (!qp) {
-                               dev_err(rdev_to_dev(cq->rdev),
-                                       "POLL CQ : bad QP handle");
+                               ibdev_err(&cq->rdev->ibdev, "POLL CQ : bad QP handle");
                                continue;
                        }
                        wc->qp = &qp->ib_qp;
@@ -3162,8 +3372,9 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
 
                        switch (cqe->opcode) {
                        case CQ_BASE_CQE_TYPE_REQ:
-                               if (qp->rdev->qp1_sqp && qp->qplib_qp.id ==
-                                   qp->rdev->qp1_sqp->qplib_qp.id) {
+                               sh_qp = qp->rdev->gsi_ctx.gsi_sqp;
+                               if (sh_qp &&
+                                   qp->qplib_qp.id == sh_qp->qplib_qp.id) {
                                        /* Handle this completion with
                                         * the stored completion
                                         */
@@ -3189,7 +3400,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
                                 * stored in the table
                                 */
                                tbl_idx = cqe->wr_id;
-                               sqp_entry = &cq->rdev->sqp_tbl[tbl_idx];
+                               sqp_entry = &cq->rdev->gsi_ctx.sqp_tbl[tbl_idx];
                                wc->wr_id = sqp_entry->wrid;
                                bnxt_re_process_res_rawqp1_wc(wc, cqe);
                                break;
@@ -3197,8 +3408,9 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
                                bnxt_re_process_res_rc_wc(wc, cqe);
                                break;
                        case CQ_BASE_CQE_TYPE_RES_UD:
-                               if (qp->rdev->qp1_sqp && qp->qplib_qp.id ==
-                                   qp->rdev->qp1_sqp->qplib_qp.id) {
+                               sh_qp = qp->rdev->gsi_ctx.gsi_sqp;
+                               if (sh_qp &&
+                                   qp->qplib_qp.id == sh_qp->qplib_qp.id) {
                                        /* Handle this completion with
                                         * the stored completion
                                         */
@@ -3213,9 +3425,9 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
                                bnxt_re_process_res_ud_wc(qp, wc, cqe);
                                break;
                        default:
-                               dev_err(rdev_to_dev(cq->rdev),
-                                       "POLL CQ : type 0x%x not handled",
-                                       cqe->opcode);
+                               ibdev_err(&cq->rdev->ibdev,
+                                         "POLL CQ : type 0x%x not handled",
+                                         cqe->opcode);
                                continue;
                        }
                        wc++;
@@ -3308,7 +3520,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
 
        rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc);
+               ibdev_err(&rdev->ibdev, "Dereg MR failed: %#x\n", rc);
                return rc;
        }
 
@@ -3355,7 +3567,7 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
        int rc;
 
        if (type != IB_MR_TYPE_MEM_REG) {
-               dev_dbg(rdev_to_dev(rdev), "MR type 0x%x not supported", type);
+               ibdev_dbg(&rdev->ibdev, "MR type 0x%x not supported", type);
                return ERR_PTR(-EINVAL);
        }
        if (max_num_sg > MAX_PBL_LVL_1_PGS)
@@ -3385,8 +3597,8 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
        rc = bnxt_qplib_alloc_fast_reg_page_list(&rdev->qplib_res,
                                                 &mr->qplib_frpl, max_num_sg);
        if (rc) {
-               dev_err(rdev_to_dev(rdev),
-                       "Failed to allocate HW FR page list");
+               ibdev_err(&rdev->ibdev,
+                         "Failed to allocate HW FR page list");
                goto fail_mr;
        }
 
@@ -3421,7 +3633,7 @@ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
                               CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B);
        rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mw->qplib_mw);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Allocate MW failed!");
+               ibdev_err(&rdev->ibdev, "Allocate MW failed!");
                goto fail;
        }
        mw->ib_mw.rkey = mw->qplib_mw.rkey;
@@ -3442,7 +3654,7 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
 
        rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mw->qplib_mw);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Free MW failed: %#x\n", rc);
+               ibdev_err(&rdev->ibdev, "Free MW failed: %#x\n", rc);
                return rc;
        }
 
@@ -3494,8 +3706,8 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
        int umem_pgs, page_shift, rc;
 
        if (length > BNXT_RE_MAX_MR_SIZE) {
-               dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%lld\n",
-                       length, BNXT_RE_MAX_MR_SIZE);
+               ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n",
+                         length, BNXT_RE_MAX_MR_SIZE);
                return ERR_PTR(-ENOMEM);
        }
 
@@ -3510,7 +3722,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
 
        rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Failed to allocate MR");
+               ibdev_err(&rdev->ibdev, "Failed to allocate MR");
                goto free_mr;
        }
        /* The fixed portion of the rkey is the same as the lkey */
@@ -3518,7 +3730,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
 
        umem = ib_umem_get(&rdev->ibdev, start, length, mr_access_flags);
        if (IS_ERR(umem)) {
-               dev_err(rdev_to_dev(rdev), "Failed to get umem");
+               ibdev_err(&rdev->ibdev, "Failed to get umem");
                rc = -EFAULT;
                goto free_mrw;
        }
@@ -3527,7 +3739,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
        mr->qplib_mr.va = virt_addr;
        umem_pgs = ib_umem_page_count(umem);
        if (!umem_pgs) {
-               dev_err(rdev_to_dev(rdev), "umem is invalid!");
+               ibdev_err(&rdev->ibdev, "umem is invalid!");
                rc = -EINVAL;
                goto free_umem;
        }
@@ -3544,15 +3756,15 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
                                virt_addr));
 
        if (!bnxt_re_page_size_ok(page_shift)) {
-               dev_err(rdev_to_dev(rdev), "umem page size unsupported!");
+               ibdev_err(&rdev->ibdev, "umem page size unsupported!");
                rc = -EFAULT;
                goto fail;
        }
 
        if (page_shift == BNXT_RE_PAGE_SHIFT_4K &&
            length > BNXT_RE_MAX_MR_SIZE_LOW) {
-               dev_err(rdev_to_dev(rdev), "Requested MR Sz:%llu Max sup:%llu",
-                       length, (u64)BNXT_RE_MAX_MR_SIZE_LOW);
+               ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu",
+                         length, (u64)BNXT_RE_MAX_MR_SIZE_LOW);
                rc = -EINVAL;
                goto fail;
        }
@@ -3562,7 +3774,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
        rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl,
                               umem_pgs, false, 1 << page_shift);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Failed to register user MR");
+               ibdev_err(&rdev->ibdev, "Failed to register user MR");
                goto fail;
        }
 
@@ -3595,12 +3807,11 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
        u32 chip_met_rev_num = 0;
        int rc;
 
-       dev_dbg(rdev_to_dev(rdev), "ABI version requested %u",
-               ibdev->ops.uverbs_abi_ver);
+       ibdev_dbg(ibdev, "ABI version requested %u", ibdev->ops.uverbs_abi_ver);
 
        if (ibdev->ops.uverbs_abi_ver != BNXT_RE_ABI_VERSION) {
-               dev_dbg(rdev_to_dev(rdev), " is different from the device %d ",
-                       BNXT_RE_ABI_VERSION);
+               ibdev_dbg(ibdev, " is different from the device %d ",
+                         BNXT_RE_ABI_VERSION);
                return -EPERM;
        }
 
@@ -3614,10 +3825,10 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
        spin_lock_init(&uctx->sh_lock);
 
        resp.comp_mask = BNXT_RE_UCNTX_CMASK_HAVE_CCTX;
-       chip_met_rev_num = rdev->chip_ctx.chip_num;
-       chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_rev & 0xFF) <<
+       chip_met_rev_num = rdev->chip_ctx->chip_num;
+       chip_met_rev_num |= ((u32)rdev->chip_ctx->chip_rev & 0xFF) <<
                             BNXT_RE_CHIP_ID0_CHIP_REV_SFT;
-       chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_metal & 0xFF) <<
+       chip_met_rev_num |= ((u32)rdev->chip_ctx->chip_metal & 0xFF) <<
                             BNXT_RE_CHIP_ID0_CHIP_MET_SFT;
        resp.chip_id0 = chip_met_rev_num;
        /* Future extension of chip info */
@@ -3632,7 +3843,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
 
        rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Failed to copy user context");
+               ibdev_err(ibdev, "Failed to copy user context");
                rc = -EFAULT;
                goto cfail;
        }
@@ -3682,15 +3893,14 @@ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma)
                vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
                if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
                                       PAGE_SIZE, vma->vm_page_prot)) {
-                       dev_err(rdev_to_dev(rdev), "Failed to map DPI");
+                       ibdev_err(&rdev->ibdev, "Failed to map DPI");
                        return -EAGAIN;
                }
        } else {
                pfn = virt_to_phys(uctx->shpg) >> PAGE_SHIFT;
                if (remap_pfn_range(vma, vma->vm_start,
                                    pfn, PAGE_SIZE, vma->vm_page_prot)) {
-                       dev_err(rdev_to_dev(rdev),
-                               "Failed to map shared page");
+                       ibdev_err(&rdev->ibdev, "Failed to map shared page");
                        return -EAGAIN;
                }
        }
index 793c972..b12fbc8 100644 (file)
@@ -78,26 +78,43 @@ static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
 /* Mutex to protect the list of bnxt_re devices added */
 static DEFINE_MUTEX(bnxt_re_dev_lock);
 static struct workqueue_struct *bnxt_re_wq;
-static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev);
+static void bnxt_re_remove_device(struct bnxt_re_dev *rdev);
+static void bnxt_re_dealloc_driver(struct ib_device *ib_dev);
+static void bnxt_re_stop_irq(void *handle);
 
 static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
 {
+       struct bnxt_qplib_chip_ctx *chip_ctx;
+
+       if (!rdev->chip_ctx)
+               return;
+       chip_ctx = rdev->chip_ctx;
+       rdev->chip_ctx = NULL;
        rdev->rcfw.res = NULL;
        rdev->qplib_res.cctx = NULL;
+       rdev->qplib_res.pdev = NULL;
+       rdev->qplib_res.netdev = NULL;
+       kfree(chip_ctx);
 }
 
 static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
 {
+       struct bnxt_qplib_chip_ctx *chip_ctx;
        struct bnxt_en_dev *en_dev;
        struct bnxt *bp;
 
        en_dev = rdev->en_dev;
        bp = netdev_priv(en_dev->net);
 
-       rdev->chip_ctx.chip_num = bp->chip_num;
+       chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
+       if (!chip_ctx)
+               return -ENOMEM;
+       chip_ctx->chip_num = bp->chip_num;
+
+       rdev->chip_ctx = chip_ctx;
        /* rest members to follow eventually */
 
-       rdev->qplib_res.cctx = &rdev->chip_ctx;
+       rdev->qplib_res.cctx = rdev->chip_ctx;
        rdev->rcfw.res = &rdev->qplib_res;
 
        return 0;
@@ -136,9 +153,9 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
        ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
                                attr->max_srq);
        ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq);
-       if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))
+       if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))
                for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
-                       rdev->qplib_ctx.tqm_count[i] =
+                       rdev->qplib_ctx.tqm_ctx.qcount[i] =
                        rdev->dev_attr.tqm_alloc_reqs[i];
 }
 
@@ -185,7 +202,7 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
        memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res));
        bnxt_re_limit_pf_res(rdev);
 
-       num_vfs =  bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
+       num_vfs =  bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
                        BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs;
        if (num_vfs)
                bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs);
@@ -208,7 +225,7 @@ static void bnxt_re_sriov_config(void *p, int num_vfs)
                return;
 
        rdev->num_vfs = num_vfs;
-       if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) {
+       if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
                bnxt_re_set_resource_limits(rdev);
                bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
                                              &rdev->qplib_ctx);
@@ -221,8 +238,10 @@ static void bnxt_re_shutdown(void *p)
 
        if (!rdev)
                return;
-
-       bnxt_re_ib_unreg(rdev);
+       ASSERT_RTNL();
+       /* Release the MSIx vectors before queuing unregister */
+       bnxt_re_stop_irq(rdev);
+       ib_unregister_device_queued(&rdev->ibdev);
 }
 
 static void bnxt_re_stop_irq(void *handle)
@@ -254,7 +273,7 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
                 * to f/w will timeout and that will set the
                 * timeout bit.
                 */
-               dev_err(rdev_to_dev(rdev), "Failed to re-start IRQs\n");
+               ibdev_err(&rdev->ibdev, "Failed to re-start IRQs\n");
                return;
        }
 
@@ -271,8 +290,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
                rc = bnxt_qplib_nq_start_irq(nq, indx - 1,
                                             msix_ent[indx].vector, false);
                if (rc)
-                       dev_warn(rdev_to_dev(rdev),
-                                "Failed to reinit NQ index %d\n", indx - 1);
+                       ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n",
+                                  indx - 1);
        }
 }
 
@@ -358,9 +377,9 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
                goto done;
        }
        if (num_msix_got != num_msix_want) {
-               dev_warn(rdev_to_dev(rdev),
-                        "Requested %d MSI-X vectors, got %d\n",
-                        num_msix_want, num_msix_got);
+               ibdev_warn(&rdev->ibdev,
+                          "Requested %d MSI-X vectors, got %d\n",
+                          num_msix_want, num_msix_got);
        }
        rdev->num_msix = num_msix_got;
 done:
@@ -407,14 +426,14 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
                            sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
        rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
        if (rc)
-               dev_err(rdev_to_dev(rdev),
-                       "Failed to free HW ring:%d :%#x", req.ring_id, rc);
+               ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x",
+                         req.ring_id, rc);
        return rc;
 }
 
-static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
-                                 int pages, int type, u32 ring_mask,
-                                 u32 map_index, u16 *fw_ring_id)
+static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev,
+                                 struct bnxt_re_ring_attr *ring_attr,
+                                 u16 *fw_ring_id)
 {
        struct bnxt_en_dev *en_dev = rdev->en_dev;
        struct hwrm_ring_alloc_input req = {0};
@@ -428,18 +447,18 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
        memset(&fw_msg, 0, sizeof(fw_msg));
        bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
        req.enables = 0;
-       req.page_tbl_addr =  cpu_to_le64(dma_arr[0]);
-       if (pages > 1) {
+       req.page_tbl_addr =  cpu_to_le64(ring_attr->dma_arr[0]);
+       if (ring_attr->pages > 1) {
                /* Page size is in log2 units */
                req.page_size = BNXT_PAGE_SHIFT;
                req.page_tbl_depth = 1;
        }
        req.fbo = 0;
        /* Association of ring index with doorbell index and MSIX number */
-       req.logical_id = cpu_to_le16(map_index);
-       req.length = cpu_to_le32(ring_mask + 1);
-       req.ring_type = type;
-       req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+       req.logical_id = cpu_to_le16(ring_attr->lrid);
+       req.length = cpu_to_le32(ring_attr->depth + 1);
+       req.ring_type = ring_attr->type;
+       req.int_mode = ring_attr->mode;
        bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
                            sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
        rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
@@ -468,8 +487,8 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
                            sizeof(req), DFLT_HWRM_CMD_TIMEOUT);
        rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
        if (rc)
-               dev_err(rdev_to_dev(rdev),
-                       "Failed to free HW stats context %#x", rc);
+               ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x",
+                         rc);
 
        return rc;
 }
@@ -524,17 +543,12 @@ static bool is_bnxt_re_dev(struct net_device *netdev)
 
 static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
 {
-       struct bnxt_re_dev *rdev;
+       struct ib_device *ibdev =
+               ib_device_get_by_netdev(netdev, RDMA_DRIVER_BNXT_RE);
+       if (!ibdev)
+               return NULL;
 
-       rcu_read_lock();
-       list_for_each_entry_rcu(rdev, &bnxt_re_dev_list, list) {
-               if (rdev->netdev == netdev) {
-                       rcu_read_unlock();
-                       return rdev;
-               }
-       }
-       rcu_read_unlock();
-       return NULL;
+       return container_of(ibdev, struct bnxt_re_dev, ibdev);
 }
 
 static void bnxt_re_dev_unprobe(struct net_device *netdev,
@@ -608,11 +622,6 @@ static const struct attribute_group bnxt_re_dev_attr_group = {
        .attrs = bnxt_re_attributes,
 };
 
-static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
-{
-       ib_unregister_device(&rdev->ibdev);
-}
-
 static const struct ib_device_ops bnxt_re_dev_ops = {
        .owner = THIS_MODULE,
        .driver_id = RDMA_DRIVER_BNXT_RE,
@@ -627,6 +636,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
        .create_cq = bnxt_re_create_cq,
        .create_qp = bnxt_re_create_qp,
        .create_srq = bnxt_re_create_srq,
+       .dealloc_driver = bnxt_re_dealloc_driver,
        .dealloc_pd = bnxt_re_dealloc_pd,
        .dealloc_ucontext = bnxt_re_dealloc_ucontext,
        .del_gid = bnxt_re_del_gid,
@@ -723,15 +733,11 @@ static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
 {
        dev_put(rdev->netdev);
        rdev->netdev = NULL;
-
        mutex_lock(&bnxt_re_dev_lock);
        list_del_rcu(&rdev->list);
        mutex_unlock(&bnxt_re_dev_lock);
 
        synchronize_rcu();
-
-       ib_dealloc_device(&rdev->ibdev);
-       /* rdev is gone */
 }
 
 static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
@@ -742,8 +748,8 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
        /* Allocate bnxt_re_dev instance here */
        rdev = ib_alloc_device(bnxt_re_dev, ibdev);
        if (!rdev) {
-               dev_err(NULL, "%s: bnxt_re_dev allocation failure!",
-                       ROCE_DRV_MODULE_NAME);
+               ibdev_err(NULL, "%s: bnxt_re_dev allocation failure!",
+                         ROCE_DRV_MODULE_NAME);
                return NULL;
        }
        /* Default values */
@@ -872,8 +878,8 @@ static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
        int rc = 0;
 
        if (!srq) {
-               dev_err(NULL, "%s: SRQ is NULL, SRQN not handled",
-                       ROCE_DRV_MODULE_NAME);
+               ibdev_err(NULL, "%s: SRQ is NULL, SRQN not handled",
+                         ROCE_DRV_MODULE_NAME);
                rc = -EINVAL;
                goto done;
        }
@@ -900,8 +906,8 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
                                             qplib_cq);
 
        if (!cq) {
-               dev_err(NULL, "%s: CQ is NULL, CQN not handled",
-                       ROCE_DRV_MODULE_NAME);
+               ibdev_err(NULL, "%s: CQ is NULL, CQN not handled",
+                         ROCE_DRV_MODULE_NAME);
                return -EINVAL;
        }
        if (cq->ib_cq.comp_handler) {
@@ -916,7 +922,7 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
 #define BNXT_RE_GEN_P5_VF_NQ_DB                0x4000
 static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx)
 {
-       return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
+       return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
                (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB :
                                   BNXT_RE_GEN_P5_PF_NQ_DB) :
                                   rdev->msix_entries[indx].db_offset;
@@ -948,8 +954,8 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
                                          db_offt, &bnxt_re_cqn_handler,
                                          &bnxt_re_srqn_handler);
                if (rc) {
-                       dev_err(rdev_to_dev(rdev),
-                               "Failed to enable NQ with rc = 0x%x", rc);
+                       ibdev_err(&rdev->ibdev,
+                                 "Failed to enable NQ with rc = 0x%x", rc);
                        goto fail;
                }
                num_vec_enabled++;
@@ -967,10 +973,10 @@ static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
        int i;
 
        for (i = 0; i < rdev->num_msix - 1; i++) {
-               type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
+               type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
                bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
-               rdev->nq[i].res = NULL;
                bnxt_qplib_free_nq(&rdev->nq[i]);
+               rdev->nq[i].res = NULL;
        }
 }
 
@@ -991,10 +997,10 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
 
 static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
 {
+       struct bnxt_re_ring_attr rattr = {};
+       struct bnxt_qplib_ctx *qplib_ctx;
        int num_vec_created = 0;
-       dma_addr_t *pg_map;
        int rc = 0, i;
-       int pages;
        u8 type;
 
        /* Configure and allocate resources for qplib */
@@ -1015,27 +1021,31 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
        if (rc)
                goto dealloc_res;
 
+       qplib_ctx = &rdev->qplib_ctx;
        for (i = 0; i < rdev->num_msix - 1; i++) {
-               rdev->nq[i].res = &rdev->qplib_res;
-               rdev->nq[i].hwq.max_elements = BNXT_RE_MAX_CQ_COUNT +
-                       BNXT_RE_MAX_SRQC_COUNT + 2;
-               rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq[i]);
+               struct bnxt_qplib_nq *nq;
+
+               nq = &rdev->nq[i];
+               nq->hwq.max_elements = (qplib_ctx->cq_count +
+                                       qplib_ctx->srqc_count + 2);
+               rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, &rdev->nq[i]);
                if (rc) {
-                       dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x",
-                               i, rc);
+                       ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x",
+                                 i, rc);
                        goto free_nq;
                }
-               type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
-               pg_map = rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr;
-               pages = rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count;
-               rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type,
-                                           BNXT_QPLIB_NQE_MAX_CNT - 1,
-                                           rdev->msix_entries[i + 1].ring_idx,
-                                           &rdev->nq[i].ring_id);
+               type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+               rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr;
+               rattr.pages = nq->hwq.pbl[rdev->nq[i].hwq.level].pg_count;
+               rattr.type = type;
+               rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+               rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1;
+               rattr.lrid = rdev->msix_entries[i + 1].ring_idx;
+               rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id);
                if (rc) {
-                       dev_err(rdev_to_dev(rdev),
-                               "Failed to allocate NQ fw id with rc = 0x%x",
-                               rc);
+                       ibdev_err(&rdev->ibdev,
+                                 "Failed to allocate NQ fw id with rc = 0x%x",
+                                 rc);
                        bnxt_qplib_free_nq(&rdev->nq[i]);
                        goto free_nq;
                }
@@ -1043,8 +1053,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
        }
        return 0;
 free_nq:
-       for (i = num_vec_created; i >= 0; i--) {
-               type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
+       for (i = num_vec_created - 1; i >= 0; i--) {
+               type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
                bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
                bnxt_qplib_free_nq(&rdev->nq[i]);
        }
@@ -1109,10 +1119,10 @@ static int bnxt_re_query_hwrm_pri2cos(struct bnxt_re_dev *rdev, u8 dir,
                return rc;
 
        if (resp.queue_cfg_info) {
-               dev_warn(rdev_to_dev(rdev),
-                        "Asymmetric cos queue configuration detected");
-               dev_warn(rdev_to_dev(rdev),
-                        " on device, QoS may not be fully functional\n");
+               ibdev_warn(&rdev->ibdev,
+                          "Asymmetric cos queue configuration detected");
+               ibdev_warn(&rdev->ibdev,
+                          " on device, QoS may not be fully functional\n");
        }
        qcfgmap = &resp.pri0_cos_queue_id;
        tmp_map = (u8 *)cid_map;
@@ -1125,7 +1135,8 @@ static int bnxt_re_query_hwrm_pri2cos(struct bnxt_re_dev *rdev, u8 dir,
 static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
                                        struct bnxt_re_qp *qp)
 {
-       return (qp->ib_qp.qp_type == IB_QPT_GSI) || (qp == rdev->qp1_sqp);
+       return (qp->ib_qp.qp_type == IB_QPT_GSI) ||
+              (qp == rdev->gsi_ctx.gsi_sqp);
 }
 
 static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
@@ -1160,12 +1171,13 @@ static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
        u16 gid_idx, index;
        int rc = 0;
 
-       if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
+       if (!ib_device_try_get(&rdev->ibdev))
                return 0;
 
        if (!sgid_tbl) {
-               dev_err(rdev_to_dev(rdev), "QPLIB: SGID table not allocated");
-               return -EINVAL;
+               ibdev_err(&rdev->ibdev, "QPLIB: SGID table not allocated");
+               rc = -EINVAL;
+               goto out;
        }
 
        for (index = 0; index < sgid_tbl->active; index++) {
@@ -1185,7 +1197,8 @@ static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
                rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx,
                                            rdev->qplib_res.netdev->dev_addr);
        }
-
+out:
+       ib_device_put(&rdev->ibdev);
        return rc;
 }
 
@@ -1241,7 +1254,7 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
        /* Get cosq id for this priority */
        rc = bnxt_re_query_hwrm_pri2cos(rdev, 0, &cid_map);
        if (rc) {
-               dev_warn(rdev_to_dev(rdev), "no cos for p_mask %x\n", prio_map);
+               ibdev_warn(&rdev->ibdev, "no cos for p_mask %x\n", prio_map);
                return rc;
        }
        /* Parse CoS IDs for app priority */
@@ -1250,8 +1263,8 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
        /* Config BONO. */
        rc = bnxt_qplib_map_tc2cos(&rdev->qplib_res, rdev->cosq);
        if (rc) {
-               dev_warn(rdev_to_dev(rdev), "no tc for cos{%x, %x}\n",
-                        rdev->cosq[0], rdev->cosq[1]);
+               ibdev_warn(&rdev->ibdev, "no tc for cos{%x, %x}\n",
+                          rdev->cosq[0], rdev->cosq[1]);
                return rc;
        }
 
@@ -1286,8 +1299,8 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
                            sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
        rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
        if (rc) {
-               dev_err(rdev_to_dev(rdev),
-                       "Failed to query HW version, rc = 0x%x", rc);
+               ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x",
+                         rc);
                return;
        }
        rdev->qplib_ctx.hwrm_intf_ver =
@@ -1297,15 +1310,35 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
                le16_to_cpu(resp.hwrm_intf_patch);
 }
 
-static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
+static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
+{
+       int rc = 0;
+       u32 event;
+
+       /* Register ib dev */
+       rc = bnxt_re_register_ib(rdev);
+       if (rc) {
+               pr_err("Failed to register with IB: %#x\n", rc);
+               return rc;
+       }
+       dev_info(rdev_to_dev(rdev), "Device registered successfully");
+       ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
+                        &rdev->active_width);
+       set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
+
+       event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?
+               IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+
+       bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, event);
+
+       return rc;
+}
+
+static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev)
 {
        u8 type;
        int rc;
 
-       if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
-               /* Cleanup ib dev */
-               bnxt_re_unregister_ib(rdev);
-       }
        if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
                cancel_delayed_work_sync(&rdev->worker);
 
@@ -1318,28 +1351,28 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
        if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
                rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
                if (rc)
-                       dev_warn(rdev_to_dev(rdev),
-                                "Failed to deinitialize RCFW: %#x", rc);
+                       ibdev_warn(&rdev->ibdev,
+                                  "Failed to deinitialize RCFW: %#x", rc);
                bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
-               bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
+               bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx);
                bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
-               type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
-               bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type);
+               type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+               bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
                bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
        }
        if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
                rc = bnxt_re_free_msix(rdev);
                if (rc)
-                       dev_warn(rdev_to_dev(rdev),
-                                "Failed to free MSI-X vectors: %#x", rc);
+                       ibdev_warn(&rdev->ibdev,
+                                  "Failed to free MSI-X vectors: %#x", rc);
        }
 
        bnxt_re_destroy_chip_ctx(rdev);
        if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
                rc = bnxt_re_unregister_netdev(rdev);
                if (rc)
-                       dev_warn(rdev_to_dev(rdev),
-                                "Failed to unregister with netdev: %#x", rc);
+                       ibdev_warn(&rdev->ibdev,
+                                  "Failed to unregister with netdev: %#x", rc);
        }
 }
 
@@ -1353,31 +1386,29 @@ static void bnxt_re_worker(struct work_struct *work)
        schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
 }
 
-static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
+static int bnxt_re_dev_init(struct bnxt_re_dev *rdev)
 {
-       dma_addr_t *pg_map;
-       u32 db_offt, ridx;
-       int pages, vid;
-       bool locked;
+       struct bnxt_qplib_creq_ctx *creq;
+       struct bnxt_re_ring_attr rattr;
+       u32 db_offt;
+       int vid;
        u8 type;
        int rc;
 
-       /* Acquire rtnl lock through out this function */
-       rtnl_lock();
-       locked = true;
-
        /* Registered a new RoCE device instance to netdev */
+       memset(&rattr, 0, sizeof(rattr));
        rc = bnxt_re_register_netdev(rdev);
        if (rc) {
                rtnl_unlock();
-               pr_err("Failed to register with netedev: %#x\n", rc);
+               ibdev_err(&rdev->ibdev,
+                         "Failed to register with netedev: %#x\n", rc);
                return -EINVAL;
        }
        set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
 
        rc = bnxt_re_setup_chip_ctx(rdev);
        if (rc) {
-               dev_err(rdev_to_dev(rdev), "Failed to get chip context\n");
+               ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
                return -EINVAL;
        }
 
@@ -1386,7 +1417,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 
        rc = bnxt_re_request_msix(rdev);
        if (rc) {
-               pr_err("Failed to get MSI-X vectors: %#x\n", rc);
+               ibdev_err(&rdev->ibdev,
+                         "Failed to get MSI-X vectors: %#x\n", rc);
                rc = -EINVAL;
                goto fail;
        }
@@ -1397,31 +1429,36 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
        /* Establish RCFW Communication Channel to initialize the context
         * memory for the function and all child VFs
         */
-       rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw,
+       rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw,
                                           &rdev->qplib_ctx,
                                           BNXT_RE_MAX_QPC_COUNT);
        if (rc) {
-               pr_err("Failed to allocate RCFW Channel: %#x\n", rc);
+               ibdev_err(&rdev->ibdev,
+                         "Failed to allocate RCFW Channel: %#x\n", rc);
                goto fail;
        }
-       type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
-       pg_map = rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr;
-       pages = rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count;
-       ridx = rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
-       rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type,
-                                   BNXT_QPLIB_CREQE_MAX_CNT - 1,
-                                   ridx, &rdev->rcfw.creq_ring_id);
+
+       type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+       creq = &rdev->rcfw.creq;
+       rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr;
+       rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count;
+       rattr.type = type;
+       rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+       rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1;
+       rattr.lrid = rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
+       rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
        if (rc) {
-               pr_err("Failed to allocate CREQ: %#x\n", rc);
+               ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
                goto free_rcfw;
        }
        db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX);
        vid = rdev->msix_entries[BNXT_RE_AEQ_IDX].vector;
-       rc = bnxt_qplib_enable_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw,
+       rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw,
                                            vid, db_offt, rdev->is_virtfn,
                                            &bnxt_re_aeq_handler);
        if (rc) {
-               pr_err("Failed to enable RCFW channel: %#x\n", rc);
+               ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n",
+                         rc);
                goto free_ring;
        }
 
@@ -1432,24 +1469,27 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 
        bnxt_re_set_resource_limits(rdev);
 
-       rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0,
-                                 bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx));
+       rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0,
+                                 bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx));
        if (rc) {
-               pr_err("Failed to allocate QPLIB context: %#x\n", rc);
+               ibdev_err(&rdev->ibdev,
+                         "Failed to allocate QPLIB context: %#x\n", rc);
                goto disable_rcfw;
        }
        rc = bnxt_re_net_stats_ctx_alloc(rdev,
                                         rdev->qplib_ctx.stats.dma_map,
                                         &rdev->qplib_ctx.stats.fw_id);
        if (rc) {
-               pr_err("Failed to allocate stats context: %#x\n", rc);
+               ibdev_err(&rdev->ibdev,
+                         "Failed to allocate stats context: %#x\n", rc);
                goto free_ctx;
        }
 
        rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx,
                                  rdev->is_virtfn);
        if (rc) {
-               pr_err("Failed to initialize RCFW: %#x\n", rc);
+               ibdev_err(&rdev->ibdev,
+                         "Failed to initialize RCFW: %#x\n", rc);
                goto free_sctx;
        }
        set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
@@ -1457,13 +1497,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
        /* Resources based on the 'new' device caps */
        rc = bnxt_re_alloc_res(rdev);
        if (rc) {
-               pr_err("Failed to allocate resources: %#x\n", rc);
+               ibdev_err(&rdev->ibdev,
+                         "Failed to allocate resources: %#x\n", rc);
                goto fail;
        }
        set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
        rc = bnxt_re_init_res(rdev);
        if (rc) {
-               pr_err("Failed to initialize resources: %#x\n", rc);
+               ibdev_err(&rdev->ibdev,
+                         "Failed to initialize resources: %#x\n", rc);
                goto fail;
        }
 
@@ -1472,46 +1514,28 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
        if (!rdev->is_virtfn) {
                rc = bnxt_re_setup_qos(rdev);
                if (rc)
-                       pr_info("RoCE priority not yet configured\n");
+                       ibdev_info(&rdev->ibdev,
+                                  "RoCE priority not yet configured\n");
 
                INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
                set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
                schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
        }
 
-       rtnl_unlock();
-       locked = false;
-
-       /* Register ib dev */
-       rc = bnxt_re_register_ib(rdev);
-       if (rc) {
-               pr_err("Failed to register with IB: %#x\n", rc);
-               goto fail;
-       }
-       set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
-       dev_info(rdev_to_dev(rdev), "Device registered successfully");
-       ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
-                        &rdev->active_width);
-       set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
-       bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
-
        return 0;
 free_sctx:
        bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
 free_ctx:
-       bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
+       bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx);
 disable_rcfw:
        bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
 free_ring:
-       type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
-       bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type);
+       type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+       bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
 free_rcfw:
        bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
 fail:
-       if (!locked)
-               rtnl_lock();
-       bnxt_re_ib_unreg(rdev);
-       rtnl_unlock();
+       bnxt_re_dev_uninit(rdev);
 
        return rc;
 }
@@ -1538,7 +1562,8 @@ static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev)
        en_dev = bnxt_re_dev_probe(netdev);
        if (IS_ERR(en_dev)) {
                if (en_dev != ERR_PTR(-ENODEV))
-                       pr_err("%s: Failed to probe\n", ROCE_DRV_MODULE_NAME);
+                       ibdev_err(&(*rdev)->ibdev, "%s: Failed to probe\n",
+                                 ROCE_DRV_MODULE_NAME);
                rc = PTR_ERR(en_dev);
                goto exit;
        }
@@ -1552,9 +1577,47 @@ exit:
        return rc;
 }
 
-static void bnxt_re_remove_one(struct bnxt_re_dev *rdev)
+static void bnxt_re_remove_device(struct bnxt_re_dev *rdev)
 {
+       bnxt_re_dev_uninit(rdev);
        pci_dev_put(rdev->en_dev->pdev);
+       bnxt_re_dev_unreg(rdev);
+}
+
+static int bnxt_re_add_device(struct bnxt_re_dev **rdev,
+                             struct net_device *netdev)
+{
+       int rc;
+
+       rc = bnxt_re_dev_reg(rdev, netdev);
+       if (rc == -ENODEV)
+               return rc;
+       if (rc) {
+               pr_err("Failed to register with the device %s: %#x\n",
+                      netdev->name, rc);
+               return rc;
+       }
+
+       pci_dev_get((*rdev)->en_dev->pdev);
+       rc = bnxt_re_dev_init(*rdev);
+       if (rc) {
+               pci_dev_put((*rdev)->en_dev->pdev);
+               bnxt_re_dev_unreg(*rdev);
+       }
+
+       return rc;
+}
+
+static void bnxt_re_dealloc_driver(struct ib_device *ib_dev)
+{
+       struct bnxt_re_dev *rdev =
+               container_of(ib_dev, struct bnxt_re_dev, ibdev);
+
+       dev_info(rdev_to_dev(rdev), "Unregistering Device");
+
+       rtnl_lock();
+       bnxt_re_remove_device(rdev);
+       rtnl_unlock();
 }
 
 /* Handle all deferred netevents tasks */
@@ -1567,21 +1630,23 @@ static void bnxt_re_task(struct work_struct *work)
        re_work = container_of(work, struct bnxt_re_work, work);
        rdev = re_work->rdev;
 
-       if (re_work->event != NETDEV_REGISTER &&
-           !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
-               return;
-
-       switch (re_work->event) {
-       case NETDEV_REGISTER:
-               rc = bnxt_re_ib_reg(rdev);
+       if (re_work->event == NETDEV_REGISTER) {
+               rc = bnxt_re_ib_init(rdev);
                if (rc) {
-                       dev_err(rdev_to_dev(rdev),
-                               "Failed to register with IB: %#x", rc);
-                       bnxt_re_remove_one(rdev);
-                       bnxt_re_dev_unreg(rdev);
+                       ibdev_err(&rdev->ibdev,
+                                 "Failed to register with IB: %#x", rc);
+                       rtnl_lock();
+                       bnxt_re_remove_device(rdev);
+                       rtnl_unlock();
                        goto exit;
                }
-               break;
+               goto exit;
+       }
+
+       if (!ib_device_try_get(&rdev->ibdev))
+               goto exit;
+
+       switch (re_work->event) {
        case NETDEV_UP:
                bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
                                       IB_EVENT_PORT_ACTIVE);
@@ -1601,17 +1666,12 @@ static void bnxt_re_task(struct work_struct *work)
        default:
                break;
        }
-       smp_mb__before_atomic();
-       atomic_dec(&rdev->sched_count);
+       ib_device_put(&rdev->ibdev);
 exit:
+       put_device(&rdev->ibdev.dev);
        kfree(re_work);
 }
 
-static void bnxt_re_init_one(struct bnxt_re_dev *rdev)
-{
-       pci_dev_get(rdev->en_dev->pdev);
-}
-
 /*
  * "Notifier chain callback can be invoked for the same chain from
  * different CPUs at the same time".
@@ -1634,6 +1694,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
        struct bnxt_re_dev *rdev;
        int rc = 0;
        bool sch_work = false;
+       bool release = true;
 
        real_dev = rdma_vlan_dev_real_dev(netdev);
        if (!real_dev)
@@ -1641,7 +1702,8 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
 
        rdev = bnxt_re_from_netdev(real_dev);
        if (!rdev && event != NETDEV_REGISTER)
-               goto exit;
+               return NOTIFY_OK;
+
        if (real_dev != netdev)
                goto exit;
 
@@ -1649,27 +1711,14 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
        case NETDEV_REGISTER:
                if (rdev)
                        break;
-               rc = bnxt_re_dev_reg(&rdev, real_dev);
-               if (rc == -ENODEV)
-                       break;
-               if (rc) {
-                       pr_err("Failed to register with the device %s: %#x\n",
-                              real_dev->name, rc);
-                       break;
-               }
-               bnxt_re_init_one(rdev);
-               sch_work = true;
+               rc = bnxt_re_add_device(&rdev, real_dev);
+               if (!rc)
+                       sch_work = true;
+               release = false;
                break;
 
        case NETDEV_UNREGISTER:
-               /* netdev notifier will call NETDEV_UNREGISTER again later since
-                * we are still holding the reference to the netdev
-                */
-               if (atomic_read(&rdev->sched_count) > 0)
-                       goto exit;
-               bnxt_re_ib_unreg(rdev);
-               bnxt_re_remove_one(rdev);
-               bnxt_re_dev_unreg(rdev);
+               ib_unregister_device_queued(&rdev->ibdev);
                break;
 
        default:
@@ -1680,17 +1729,19 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
                /* Allocate for the deferred task */
                re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC);
                if (re_work) {
+                       get_device(&rdev->ibdev.dev);
                        re_work->rdev = rdev;
                        re_work->event = event;
                        re_work->vlan_dev = (real_dev == netdev ?
                                             NULL : netdev);
                        INIT_WORK(&re_work->work, bnxt_re_task);
-                       atomic_inc(&rdev->sched_count);
                        queue_work(bnxt_re_wq, &re_work->work);
                }
        }
 
 exit:
+       if (rdev && release)
+               ib_device_put(&rdev->ibdev);
        return NOTIFY_DONE;
 }
 
@@ -1726,36 +1777,21 @@ err_netdev:
 
 static void __exit bnxt_re_mod_exit(void)
 {
-       struct bnxt_re_dev *rdev, *next;
-       LIST_HEAD(to_be_deleted);
+       struct bnxt_re_dev *rdev;
 
-       mutex_lock(&bnxt_re_dev_lock);
-       /* Free all adapter allocated resources */
-       if (!list_empty(&bnxt_re_dev_list))
-               list_splice_init(&bnxt_re_dev_list, &to_be_deleted);
-       mutex_unlock(&bnxt_re_dev_lock);
-       /*
-       * Cleanup the devices in reverse order so that the VF device
-       * cleanup is done before PF cleanup
-       */
-       list_for_each_entry_safe_reverse(rdev, next, &to_be_deleted, list) {
-               dev_info(rdev_to_dev(rdev), "Unregistering Device");
-               /*
-                * Flush out any scheduled tasks before destroying the
-                * resources
-                */
-               flush_workqueue(bnxt_re_wq);
-               bnxt_re_dev_stop(rdev);
-               /* Acquire the rtnl_lock as the L2 resources are freed here */
-               rtnl_lock();
-               bnxt_re_ib_unreg(rdev);
-               rtnl_unlock();
-               bnxt_re_remove_one(rdev);
-               bnxt_re_dev_unreg(rdev);
-       }
        unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
        if (bnxt_re_wq)
                destroy_workqueue(bnxt_re_wq);
+       list_for_each_entry(rdev, &bnxt_re_dev_list, list) {
+               /* VF device removal should be called before the removal
+                * of PF device. Queue VFs unregister first, so that VFs
+                * shall be removed before the PF during the call of
+                * ib_unregister_driver.
+                */
+               if (rdev->is_virtfn)
+                       ib_unregister_device(&rdev->ibdev);
+       }
+       ib_unregister_driver(RDMA_DRIVER_BNXT_RE);
 }
 
 module_init(bnxt_re_mod_init);
index 020f70e..899a5d2 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
+#include <linux/delay.h>
 #include <linux/prefetch.h>
 #include <linux/if_ether.h>
 
@@ -53,9 +54,7 @@
 #include "qplib_sp.h"
 #include "qplib_fp.h"
 
-static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq);
 static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp);
-static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type);
 
 static void bnxt_qplib_cancel_phantom_processing(struct bnxt_qplib_qp *qp)
 {
@@ -233,6 +232,70 @@ fail:
        return rc;
 }
 
+static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq)
+{
+       struct bnxt_qplib_hwq *hwq = &nq->hwq;
+       struct nq_base *nqe, **nq_ptr;
+       int budget = nq->budget;
+       u32 sw_cons, raw_cons;
+       uintptr_t q_handle;
+       u16 type;
+
+       spin_lock_bh(&hwq->lock);
+       /* Service the NQ until empty */
+       raw_cons = hwq->cons;
+       while (budget--) {
+               sw_cons = HWQ_CMP(raw_cons, hwq);
+               nq_ptr = (struct nq_base **)hwq->pbl_ptr;
+               nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)];
+               if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements))
+                       break;
+
+               /*
+                * The valid test of the entry must be done first before
+                * reading any further.
+                */
+               dma_rmb();
+
+               type = le16_to_cpu(nqe->info10_type) & NQ_BASE_TYPE_MASK;
+               switch (type) {
+               case NQ_BASE_TYPE_CQ_NOTIFICATION:
+               {
+                       struct nq_cn *nqcne = (struct nq_cn *)nqe;
+
+                       q_handle = le32_to_cpu(nqcne->cq_handle_low);
+                       q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high)
+                                                    << 32;
+                       if ((unsigned long)cq == q_handle) {
+                               nqcne->cq_handle_low = 0;
+                               nqcne->cq_handle_high = 0;
+                               cq->cnq_events++;
+                       }
+                       break;
+               }
+               default:
+                       break;
+               }
+               raw_cons++;
+       }
+       spin_unlock_bh(&hwq->lock);
+}
+
+/* Wait for receiving all NQEs for this CQ and clean the NQEs associated with
+ * this CQ.
+ */
+static void __wait_for_all_nqes(struct bnxt_qplib_cq *cq, u16 cnq_events)
+{
+       u32 retry_cnt = 100;
+
+       while (retry_cnt--) {
+               if (cnq_events == cq->cnq_events)
+                       return;
+               usleep_range(50, 100);
+               clean_nq(cq->nq, cq);
+       }
+}
+
 static void bnxt_qplib_service_nq(unsigned long data)
 {
        struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data;
@@ -241,12 +304,12 @@ static void bnxt_qplib_service_nq(unsigned long data)
        struct bnxt_qplib_cq *cq;
        int num_cqne_processed = 0;
        int num_srqne_processed = 0;
-       u32 sw_cons, raw_cons;
-       u16 type;
        int budget = nq->budget;
+       u32 sw_cons, raw_cons;
        uintptr_t q_handle;
-       bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx);
+       u16 type;
 
+       spin_lock_bh(&hwq->lock);
        /* Service the NQ until empty */
        raw_cons = hwq->cons;
        while (budget--) {
@@ -272,7 +335,10 @@ static void bnxt_qplib_service_nq(unsigned long data)
                        q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high)
                                                     << 32;
                        cq = (struct bnxt_qplib_cq *)(unsigned long)q_handle;
-                       bnxt_qplib_arm_cq_enable(cq);
+                       if (!cq)
+                               break;
+                       bnxt_qplib_armen_db(&cq->dbinfo,
+                                           DBC_DBC_TYPE_CQ_ARMENA);
                        spin_lock_bh(&cq->compl_lock);
                        atomic_set(&cq->arm_state, 0);
                        if (!nq->cqn_handler(nq, (cq)))
@@ -280,19 +346,22 @@ static void bnxt_qplib_service_nq(unsigned long data)
                        else
                                dev_warn(&nq->pdev->dev,
                                         "cqn - type 0x%x not handled\n", type);
+                       cq->cnq_events++;
                        spin_unlock_bh(&cq->compl_lock);
                        break;
                }
                case NQ_BASE_TYPE_SRQ_EVENT:
                {
+                       struct bnxt_qplib_srq *srq;
                        struct nq_srq_event *nqsrqe =
                                                (struct nq_srq_event *)nqe;
 
                        q_handle = le32_to_cpu(nqsrqe->srq_handle_low);
                        q_handle |= (u64)le32_to_cpu(nqsrqe->srq_handle_high)
                                     << 32;
-                       bnxt_qplib_arm_srq((struct bnxt_qplib_srq *)q_handle,
-                                          DBC_DBC_TYPE_SRQ_ARMENA);
+                       srq = (struct bnxt_qplib_srq *)q_handle;
+                       bnxt_qplib_armen_db(&srq->dbinfo,
+                                           DBC_DBC_TYPE_SRQ_ARMENA);
                        if (!nq->srqn_handler(nq,
                                              (struct bnxt_qplib_srq *)q_handle,
                                              nqsrqe->event))
@@ -314,10 +383,9 @@ static void bnxt_qplib_service_nq(unsigned long data)
        }
        if (hwq->cons != raw_cons) {
                hwq->cons = raw_cons;
-               bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, hwq->cons,
-                                           hwq->max_elements, nq->ring_id,
-                                           gen_p5);
+               bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true);
        }
+       spin_unlock_bh(&hwq->lock);
 }
 
 static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
@@ -333,25 +401,23 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
        prefetch(&nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]);
 
        /* Fan out to CPU affinitized kthreads? */
-       tasklet_schedule(&nq->worker);
+       tasklet_schedule(&nq->nq_tasklet);
 
        return IRQ_HANDLED;
 }
 
 void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill)
 {
-       bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx);
-       tasklet_disable(&nq->worker);
+       tasklet_disable(&nq->nq_tasklet);
        /* Mask h/w interrupt */
-       bnxt_qplib_ring_nq_db(nq->bar_reg_iomem, nq->hwq.cons,
-                             nq->hwq.max_elements, nq->ring_id, gen_p5);
+       bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, false);
        /* Sync with last running IRQ handler */
-       synchronize_irq(nq->vector);
+       synchronize_irq(nq->msix_vec);
        if (kill)
-               tasklet_kill(&nq->worker);
+               tasklet_kill(&nq->nq_tasklet);
        if (nq->requested) {
-               irq_set_affinity_hint(nq->vector, NULL);
-               free_irq(nq->vector, nq);
+               irq_set_affinity_hint(nq->msix_vec, NULL);
+               free_irq(nq->msix_vec, nq);
                nq->requested = false;
        }
 }
@@ -364,89 +430,108 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
        }
 
        /* Make sure the HW is stopped! */
-       if (nq->requested)
-               bnxt_qplib_nq_stop_irq(nq, true);
+       bnxt_qplib_nq_stop_irq(nq, true);
 
-       if (nq->bar_reg_iomem)
-               iounmap(nq->bar_reg_iomem);
-       nq->bar_reg_iomem = NULL;
+       if (nq->nq_db.reg.bar_reg) {
+               iounmap(nq->nq_db.reg.bar_reg);
+               nq->nq_db.reg.bar_reg = NULL;
+       }
 
        nq->cqn_handler = NULL;
        nq->srqn_handler = NULL;
-       nq->vector = 0;
+       nq->msix_vec = 0;
 }
 
 int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
                            int msix_vector, bool need_init)
 {
-       bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx);
        int rc;
 
        if (nq->requested)
                return -EFAULT;
 
-       nq->vector = msix_vector;
+       nq->msix_vec = msix_vector;
        if (need_init)
-               tasklet_init(&nq->worker, bnxt_qplib_service_nq,
+               tasklet_init(&nq->nq_tasklet, bnxt_qplib_service_nq,
                             (unsigned long)nq);
        else
-               tasklet_enable(&nq->worker);
+               tasklet_enable(&nq->nq_tasklet);
 
        snprintf(nq->name, sizeof(nq->name), "bnxt_qplib_nq-%d", nq_indx);
-       rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, nq->name, nq);
+       rc = request_irq(nq->msix_vec, bnxt_qplib_nq_irq, 0, nq->name, nq);
        if (rc)
                return rc;
 
        cpumask_clear(&nq->mask);
        cpumask_set_cpu(nq_indx, &nq->mask);
-       rc = irq_set_affinity_hint(nq->vector, &nq->mask);
+       rc = irq_set_affinity_hint(nq->msix_vec, &nq->mask);
        if (rc) {
                dev_warn(&nq->pdev->dev,
                         "set affinity failed; vector: %d nq_idx: %d\n",
-                        nq->vector, nq_indx);
+                        nq->msix_vec, nq_indx);
        }
        nq->requested = true;
-       bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, nq->hwq.cons,
-                                   nq->hwq.max_elements, nq->ring_id, gen_p5);
+       bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true);
+
+       return rc;
+}
+
+static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq,  u32 reg_offt)
+{
+       resource_size_t reg_base;
+       struct bnxt_qplib_nq_db *nq_db;
+       struct pci_dev *pdev;
+       int rc = 0;
+
+       pdev = nq->pdev;
+       nq_db = &nq->nq_db;
+
+       nq_db->reg.bar_id = NQ_CONS_PCI_BAR_REGION;
+       nq_db->reg.bar_base = pci_resource_start(pdev, nq_db->reg.bar_id);
+       if (!nq_db->reg.bar_base) {
+               dev_err(&pdev->dev, "QPLIB: NQ BAR region %d resc start is 0!",
+                       nq_db->reg.bar_id);
+               rc = -ENOMEM;
+               goto fail;
+       }
 
+       reg_base = nq_db->reg.bar_base + reg_offt;
+       /* Unconditionally map 8 bytes to support 57500 series */
+       nq_db->reg.len = 8;
+       nq_db->reg.bar_reg = ioremap(reg_base, nq_db->reg.len);
+       if (!nq_db->reg.bar_reg) {
+               dev_err(&pdev->dev, "QPLIB: NQ BAR region %d mapping failed",
+                       nq_db->reg.bar_id);
+               rc = -ENOMEM;
+               goto fail;
+       }
+
+       nq_db->dbinfo.db = nq_db->reg.bar_reg;
+       nq_db->dbinfo.hwq = &nq->hwq;
+       nq_db->dbinfo.xid = nq->ring_id;
+fail:
        return rc;
 }
 
 int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
                         int nq_idx, int msix_vector, int bar_reg_offset,
-                        int (*cqn_handler)(struct bnxt_qplib_nq *nq,
-                                           struct bnxt_qplib_cq *),
-                        int (*srqn_handler)(struct bnxt_qplib_nq *nq,
-                                            struct bnxt_qplib_srq *,
-                                            u8 event))
+                        cqn_handler_t cqn_handler,
+                        srqn_handler_t srqn_handler)
 {
-       resource_size_t nq_base;
        int rc = -1;
 
-       if (cqn_handler)
-               nq->cqn_handler = cqn_handler;
-
-       if (srqn_handler)
-               nq->srqn_handler = srqn_handler;
+       nq->pdev = pdev;
+       nq->cqn_handler = cqn_handler;
+       nq->srqn_handler = srqn_handler;
 
        /* Have a task to schedule CQ notifiers in post send case */
        nq->cqn_wq  = create_singlethread_workqueue("bnxt_qplib_nq");
        if (!nq->cqn_wq)
                return -ENOMEM;
 
-       nq->bar_reg = NQ_CONS_PCI_BAR_REGION;
-       nq->bar_reg_off = bar_reg_offset;
-       nq_base = pci_resource_start(pdev, nq->bar_reg);
-       if (!nq_base) {
-               rc = -ENOMEM;
-               goto fail;
-       }
-       /* Unconditionally map 8 bytes to support 57500 series */
-       nq->bar_reg_iomem = ioremap(nq_base + nq->bar_reg_off, 8);
-       if (!nq->bar_reg_iomem) {
-               rc = -ENOMEM;
+       rc = bnxt_qplib_map_nq_db(nq, bar_reg_offset);
+       if (rc)
                goto fail;
-       }
 
        rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true);
        if (rc) {
@@ -464,49 +549,38 @@ fail:
 void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq)
 {
        if (nq->hwq.max_elements) {
-               bnxt_qplib_free_hwq(nq->pdev, &nq->hwq);
+               bnxt_qplib_free_hwq(nq->res, &nq->hwq);
                nq->hwq.max_elements = 0;
        }
 }
 
-int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq)
+int bnxt_qplib_alloc_nq(struct bnxt_qplib_res *res, struct bnxt_qplib_nq *nq)
 {
-       u8 hwq_type;
+       struct bnxt_qplib_hwq_attr hwq_attr = {};
+       struct bnxt_qplib_sg_info sginfo = {};
 
-       nq->pdev = pdev;
+       nq->pdev = res->pdev;
+       nq->res = res;
        if (!nq->hwq.max_elements ||
            nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT)
                nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
-       hwq_type = bnxt_qplib_get_hwq_type(nq->res);
-       if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL,
-                                     &nq->hwq.max_elements,
-                                     BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0,
-                                     PAGE_SIZE, hwq_type))
-               return -ENOMEM;
 
+       sginfo.pgsize = PAGE_SIZE;
+       sginfo.pgshft = PAGE_SHIFT;
+       hwq_attr.res = res;
+       hwq_attr.sginfo = &sginfo;
+       hwq_attr.depth = nq->hwq.max_elements;
+       hwq_attr.stride = sizeof(struct nq_base);
+       hwq_attr.type = bnxt_qplib_get_hwq_type(nq->res);
+       if (bnxt_qplib_alloc_init_hwq(&nq->hwq, &hwq_attr)) {
+               dev_err(&nq->pdev->dev, "FP NQ allocation failed");
+               return -ENOMEM;
+       }
        nq->budget = 8;
        return 0;
 }
 
 /* SRQ */
-static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type)
-{
-       struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
-       void __iomem *db;
-       u32 sw_prod;
-       u64 val = 0;
-
-       /* Ring DB */
-       sw_prod = (arm_type == DBC_DBC_TYPE_SRQ_ARM) ?
-                  srq->threshold : HWQ_CMP(srq_hwq->prod, srq_hwq);
-       db = (arm_type == DBC_DBC_TYPE_SRQ_ARMENA) ? srq->dbr_base :
-                                                    srq->dpi->dbr;
-       val = ((srq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type;
-       val <<= 32;
-       val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
-       writeq(val, db);
-}
-
 void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res,
                           struct bnxt_qplib_srq *srq)
 {
@@ -526,24 +600,26 @@ void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res,
        kfree(srq->swq);
        if (rc)
                return;
-       bnxt_qplib_free_hwq(res->pdev, &srq->hwq);
+       bnxt_qplib_free_hwq(res, &srq->hwq);
 }
 
 int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
                          struct bnxt_qplib_srq *srq)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
-       struct cmdq_create_srq req;
+       struct bnxt_qplib_hwq_attr hwq_attr = {};
        struct creq_create_srq_resp resp;
+       struct cmdq_create_srq req;
        struct bnxt_qplib_pbl *pbl;
        u16 cmd_flags = 0;
        int rc, idx;
 
-       srq->hwq.max_elements = srq->max_wqe;
-       rc = bnxt_qplib_alloc_init_hwq(res->pdev, &srq->hwq, &srq->sg_info,
-                                      &srq->hwq.max_elements,
-                                      BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
-                                      PAGE_SIZE, HWQ_TYPE_QUEUE);
+       hwq_attr.res = res;
+       hwq_attr.sginfo = &srq->sg_info;
+       hwq_attr.depth = srq->max_wqe;
+       hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE;
+       hwq_attr.type = HWQ_TYPE_QUEUE;
+       rc = bnxt_qplib_alloc_init_hwq(&srq->hwq, &hwq_attr);
        if (rc)
                goto exit;
 
@@ -595,14 +671,17 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
        srq->swq[srq->last_idx].next_idx = -1;
 
        srq->id = le32_to_cpu(resp.xid);
-       srq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
+       srq->dbinfo.hwq = &srq->hwq;
+       srq->dbinfo.xid = srq->id;
+       srq->dbinfo.db = srq->dpi->dbr;
+       srq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem;
        if (srq->threshold)
-               bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARMENA);
+               bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA);
        srq->arm_req = false;
 
        return 0;
 fail:
-       bnxt_qplib_free_hwq(res->pdev, &srq->hwq);
+       bnxt_qplib_free_hwq(res, &srq->hwq);
        kfree(srq->swq);
 exit:
        return rc;
@@ -621,7 +700,7 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
                                    srq_hwq->max_elements - sw_cons + sw_prod;
        if (count > srq->threshold) {
                srq->arm_req = false;
-               bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM);
+               bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold);
        } else {
                /* Deferred arming */
                srq->arm_req = true;
@@ -709,10 +788,10 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
                                    srq_hwq->max_elements - sw_cons + sw_prod;
        spin_unlock(&srq_hwq->lock);
        /* Ring DB */
-       bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ);
+       bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ);
        if (srq->arm_req == true && count > srq->threshold) {
                srq->arm_req = false;
-               bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM);
+               bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold);
        }
 done:
        return rc;
@@ -721,15 +800,16 @@ done:
 /* QP */
 int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
+       struct bnxt_qplib_hwq_attr hwq_attr = {};
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
-       struct cmdq_create_qp1 req;
-       struct creq_create_qp1_resp resp;
-       struct bnxt_qplib_pbl *pbl;
        struct bnxt_qplib_q *sq = &qp->sq;
        struct bnxt_qplib_q *rq = &qp->rq;
-       int rc;
+       struct creq_create_qp1_resp resp;
+       struct cmdq_create_qp1 req;
+       struct bnxt_qplib_pbl *pbl;
        u16 cmd_flags = 0;
        u32 qp_flags = 0;
+       int rc;
 
        RCFW_CMD_PREP(req, CREATE_QP1, cmd_flags);
 
@@ -739,11 +819,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
        req.qp_handle = cpu_to_le64(qp->qp_handle);
 
        /* SQ */
-       sq->hwq.max_elements = sq->max_wqe;
-       rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, NULL,
-                                      &sq->hwq.max_elements,
-                                      BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, 0,
-                                      PAGE_SIZE, HWQ_TYPE_QUEUE);
+       hwq_attr.res = res;
+       hwq_attr.sginfo = &sq->sg_info;
+       hwq_attr.depth = sq->max_wqe;
+       hwq_attr.stride = BNXT_QPLIB_MAX_SQE_ENTRY_SIZE;
+       hwq_attr.type = HWQ_TYPE_QUEUE;
+       rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
        if (rc)
                goto exit;
 
@@ -778,11 +859,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
        /* RQ */
        if (rq->max_wqe) {
-               rq->hwq.max_elements = qp->rq.max_wqe;
-               rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, NULL,
-                                              &rq->hwq.max_elements,
-                                              BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
-                                              PAGE_SIZE, HWQ_TYPE_QUEUE);
+               hwq_attr.res = res;
+               hwq_attr.sginfo = &rq->sg_info;
+               hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE;
+               hwq_attr.depth = qp->rq.max_wqe;
+               hwq_attr.type = HWQ_TYPE_QUEUE;
+               rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr);
                if (rc)
                        goto fail_sq;
 
@@ -840,6 +922,15 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
        qp->id = le32_to_cpu(resp.xid);
        qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
+       qp->cctx = res->cctx;
+       sq->dbinfo.hwq = &sq->hwq;
+       sq->dbinfo.xid = qp->id;
+       sq->dbinfo.db = qp->dpi->dbr;
+       if (rq->max_wqe) {
+               rq->dbinfo.hwq = &rq->hwq;
+               rq->dbinfo.xid = qp->id;
+               rq->dbinfo.db = qp->dpi->dbr;
+       }
        rcfw->qp_tbl[qp->id].qp_id = qp->id;
        rcfw->qp_tbl[qp->id].qp_handle = (void *)qp;
 
@@ -848,10 +939,10 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 fail:
        bnxt_qplib_free_qp_hdr_buf(res, qp);
 fail_rq:
-       bnxt_qplib_free_hwq(res->pdev, &rq->hwq);
+       bnxt_qplib_free_hwq(res, &rq->hwq);
        kfree(rq->swq);
 fail_sq:
-       bnxt_qplib_free_hwq(res->pdev, &sq->hwq);
+       bnxt_qplib_free_hwq(res, &sq->hwq);
        kfree(sq->swq);
 exit:
        return rc;
@@ -860,7 +951,9 @@ exit:
 int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+       struct bnxt_qplib_hwq_attr hwq_attr = {};
        unsigned long int psn_search, poff = 0;
+       struct bnxt_qplib_sg_info sginfo = {};
        struct sq_psn_search **psn_search_ptr;
        struct bnxt_qplib_q *sq = &qp->sq;
        struct bnxt_qplib_q *rq = &qp->rq;
@@ -887,12 +980,15 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
                         sizeof(struct sq_psn_search_ext) :
                         sizeof(struct sq_psn_search);
        }
-       sq->hwq.max_elements = sq->max_wqe;
-       rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, &sq->sg_info,
-                                      &sq->hwq.max_elements,
-                                      BNXT_QPLIB_MAX_SQE_ENTRY_SIZE,
-                                      psn_sz,
-                                      PAGE_SIZE, HWQ_TYPE_QUEUE);
+
+       hwq_attr.res = res;
+       hwq_attr.sginfo = &sq->sg_info;
+       hwq_attr.stride = BNXT_QPLIB_MAX_SQE_ENTRY_SIZE;
+       hwq_attr.depth = sq->max_wqe;
+       hwq_attr.aux_stride = psn_sz;
+       hwq_attr.aux_depth = hwq_attr.depth;
+       hwq_attr.type = HWQ_TYPE_QUEUE;
+       rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
        if (rc)
                goto exit;
 
@@ -956,12 +1052,14 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
        /* RQ */
        if (rq->max_wqe) {
-               rq->hwq.max_elements = rq->max_wqe;
-               rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq,
-                                              &rq->sg_info,
-                                              &rq->hwq.max_elements,
-                                              BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
-                                              PAGE_SIZE, HWQ_TYPE_QUEUE);
+               hwq_attr.res = res;
+               hwq_attr.sginfo = &rq->sg_info;
+               hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE;
+               hwq_attr.depth = rq->max_wqe;
+               hwq_attr.aux_stride = 0;
+               hwq_attr.aux_depth = 0;
+               hwq_attr.type = HWQ_TYPE_QUEUE;
+               rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr);
                if (rc)
                        goto fail_sq;
 
@@ -1029,10 +1127,17 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
                req_size = xrrq->max_elements *
                           BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE + PAGE_SIZE - 1;
                req_size &= ~(PAGE_SIZE - 1);
-               rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL,
-                                              &xrrq->max_elements,
-                                              BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE,
-                                              0, req_size, HWQ_TYPE_CTX);
+               sginfo.pgsize = req_size;
+               sginfo.pgshft = PAGE_SHIFT;
+
+               hwq_attr.res = res;
+               hwq_attr.sginfo = &sginfo;
+               hwq_attr.depth = xrrq->max_elements;
+               hwq_attr.stride = BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE;
+               hwq_attr.aux_stride = 0;
+               hwq_attr.aux_depth = 0;
+               hwq_attr.type = HWQ_TYPE_CTX;
+               rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr);
                if (rc)
                        goto fail_buf_free;
                pbl = &xrrq->pbl[PBL_LVL_0];
@@ -1044,11 +1149,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
                req_size = xrrq->max_elements *
                           BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE + PAGE_SIZE - 1;
                req_size &= ~(PAGE_SIZE - 1);
-
-               rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL,
-                                              &xrrq->max_elements,
-                                              BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE,
-                                              0, req_size, HWQ_TYPE_CTX);
+               sginfo.pgsize = req_size;
+               hwq_attr.depth =  xrrq->max_elements;
+               hwq_attr.stride = BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE;
+               rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr);
                if (rc)
                        goto fail_orrq;
 
@@ -1064,9 +1168,17 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
        qp->id = le32_to_cpu(resp.xid);
        qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
-       qp->cctx = res->cctx;
        INIT_LIST_HEAD(&qp->sq_flush);
        INIT_LIST_HEAD(&qp->rq_flush);
+       qp->cctx = res->cctx;
+       sq->dbinfo.hwq = &sq->hwq;
+       sq->dbinfo.xid = qp->id;
+       sq->dbinfo.db = qp->dpi->dbr;
+       if (rq->max_wqe) {
+               rq->dbinfo.hwq = &rq->hwq;
+               rq->dbinfo.xid = qp->id;
+               rq->dbinfo.db = qp->dpi->dbr;
+       }
        rcfw->qp_tbl[qp->id].qp_id = qp->id;
        rcfw->qp_tbl[qp->id].qp_handle = (void *)qp;
 
@@ -1074,17 +1186,17 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
 fail:
        if (qp->irrq.max_elements)
-               bnxt_qplib_free_hwq(res->pdev, &qp->irrq);
+               bnxt_qplib_free_hwq(res, &qp->irrq);
 fail_orrq:
        if (qp->orrq.max_elements)
-               bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
+               bnxt_qplib_free_hwq(res, &qp->orrq);
 fail_buf_free:
        bnxt_qplib_free_qp_hdr_buf(res, qp);
 fail_rq:
-       bnxt_qplib_free_hwq(res->pdev, &rq->hwq);
+       bnxt_qplib_free_hwq(res, &rq->hwq);
        kfree(rq->swq);
 fail_sq:
-       bnxt_qplib_free_hwq(res->pdev, &sq->hwq);
+       bnxt_qplib_free_hwq(res, &sq->hwq);
        kfree(sq->swq);
 exit:
        return rc;
@@ -1440,16 +1552,16 @@ void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res,
                            struct bnxt_qplib_qp *qp)
 {
        bnxt_qplib_free_qp_hdr_buf(res, qp);
-       bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq);
+       bnxt_qplib_free_hwq(res, &qp->sq.hwq);
        kfree(qp->sq.swq);
 
-       bnxt_qplib_free_hwq(res->pdev, &qp->rq.hwq);
+       bnxt_qplib_free_hwq(res, &qp->rq.hwq);
        kfree(qp->rq.swq);
 
        if (qp->irrq.max_elements)
-               bnxt_qplib_free_hwq(res->pdev, &qp->irrq);
+               bnxt_qplib_free_hwq(res, &qp->irrq);
        if (qp->orrq.max_elements)
-               bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
+               bnxt_qplib_free_hwq(res, &qp->orrq);
 
 }
 
@@ -1506,16 +1618,8 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
 void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
 {
        struct bnxt_qplib_q *sq = &qp->sq;
-       u32 sw_prod;
-       u64 val = 0;
 
-       val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) |
-              DBC_DBC_TYPE_SQ);
-       val <<= 32;
-       sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
-       val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
-       /* Flush all the WQE writes to HW */
-       writeq(val, qp->dpi->dbr);
+       bnxt_qplib_ring_prod_db(&sq->dbinfo, DBC_DBC_TYPE_SQ);
 }
 
 int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
@@ -1807,16 +1911,8 @@ done:
 void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp)
 {
        struct bnxt_qplib_q *rq = &qp->rq;
-       u32 sw_prod;
-       u64 val = 0;
 
-       val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) |
-              DBC_DBC_TYPE_RQ);
-       val <<= 32;
-       sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
-       val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
-       /* Flush the writes to HW Rx WQE before the ringing Rx DB */
-       writeq(val, qp->dpi->dbr);
+       bnxt_qplib_ring_prod_db(&rq->dbinfo, DBC_DBC_TYPE_RQ);
 }
 
 int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
@@ -1896,48 +1992,22 @@ done:
 }
 
 /* CQ */
-
-/* Spinlock must be held */
-static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq)
-{
-       u64 val = 0;
-
-       val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) |
-              DBC_DBC_TYPE_CQ_ARMENA;
-       val <<= 32;
-       /* Flush memory writes before enabling the CQ */
-       writeq(val, cq->dbr_base);
-}
-
-static void bnxt_qplib_arm_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
-{
-       struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
-       u32 sw_cons;
-       u64 val = 0;
-
-       /* Ring DB */
-       val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type;
-       val <<= 32;
-       sw_cons = HWQ_CMP(cq_hwq->cons, cq_hwq);
-       val |= (sw_cons << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
-       /* flush memory writes before arming the CQ */
-       writeq(val, cq->dpi->dbr);
-}
-
 int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
-       struct cmdq_create_cq req;
+       struct bnxt_qplib_hwq_attr hwq_attr = {};
        struct creq_create_cq_resp resp;
+       struct cmdq_create_cq req;
        struct bnxt_qplib_pbl *pbl;
        u16 cmd_flags = 0;
        int rc;
 
-       cq->hwq.max_elements = cq->max_wqe;
-       rc = bnxt_qplib_alloc_init_hwq(res->pdev, &cq->hwq, &cq->sg_info,
-                                      &cq->hwq.max_elements,
-                                      BNXT_QPLIB_MAX_CQE_ENTRY_SIZE, 0,
-                                      PAGE_SIZE, HWQ_TYPE_QUEUE);
+       hwq_attr.res = res;
+       hwq_attr.depth = cq->max_wqe;
+       hwq_attr.stride = sizeof(struct cq_base);
+       hwq_attr.type = HWQ_TYPE_QUEUE;
+       hwq_attr.sginfo = &cq->sg_info;
+       rc = bnxt_qplib_alloc_init_hwq(&cq->hwq, &hwq_attr);
        if (rc)
                goto exit;
 
@@ -1976,7 +2046,6 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
                goto fail;
 
        cq->id = le32_to_cpu(resp.xid);
-       cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
        cq->period = BNXT_QPLIB_QUEUE_START_PERIOD;
        init_waitqueue_head(&cq->waitq);
        INIT_LIST_HEAD(&cq->sqf_head);
@@ -1984,11 +2053,17 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
        spin_lock_init(&cq->compl_lock);
        spin_lock_init(&cq->flush_lock);
 
-       bnxt_qplib_arm_cq_enable(cq);
+       cq->dbinfo.hwq = &cq->hwq;
+       cq->dbinfo.xid = cq->id;
+       cq->dbinfo.db = cq->dpi->dbr;
+       cq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem;
+
+       bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA);
+
        return 0;
 
 fail:
-       bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
+       bnxt_qplib_free_hwq(res, &cq->hwq);
 exit:
        return rc;
 }
@@ -1998,6 +2073,7 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_destroy_cq req;
        struct creq_destroy_cq_resp resp;
+       u16 total_cnq_events;
        u16 cmd_flags = 0;
        int rc;
 
@@ -2008,7 +2084,9 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
                                          (void *)&resp, NULL, 0);
        if (rc)
                return rc;
-       bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
+       total_cnq_events = le16_to_cpu(resp.total_cnq_events);
+       __wait_for_all_nqes(cq, total_cnq_events);
+       bnxt_qplib_free_hwq(res, &cq->hwq);
        return 0;
 }
 
@@ -2141,8 +2219,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
                sq->send_phantom = true;
 
                /* TODO: Only ARM if the previous SQE is ARMALL */
-               bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ_ARMALL);
-
+               bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMALL);
                rc = -EAGAIN;
                goto out;
        }
@@ -2426,7 +2503,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
        }
        cqe = *pcqe;
        cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
-       cqe->length = (u32)le16_to_cpu(hwcqe->length);
+       cqe->length = le16_to_cpu(hwcqe->length) & CQ_RES_UD_LENGTH_MASK;
        cqe->cfa_meta = le16_to_cpu(hwcqe->cfa_metadata);
        cqe->invrkey = le32_to_cpu(hwcqe->imm_data);
        cqe->flags = le16_to_cpu(hwcqe->flags);
@@ -2812,7 +2889,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
        }
        if (cq->hwq.cons != raw_cons) {
                cq->hwq.cons = raw_cons;
-               bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ);
+               bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ);
        }
 exit:
        return num_cqes - budget;
@@ -2821,7 +2898,7 @@ exit:
 void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
 {
        if (arm_type)
-               bnxt_qplib_arm_cq(cq, arm_type);
+               bnxt_qplib_ring_db(&cq->dbinfo, arm_type);
        /* Using cq->arm_state variable to track whether to issue cq handler */
        atomic_set(&cq->arm_state, 1);
 }
index 99e0a13..7edb70b 100644 (file)
@@ -42,7 +42,7 @@
 struct bnxt_qplib_srq {
        struct bnxt_qplib_pd            *pd;
        struct bnxt_qplib_dpi           *dpi;
-       void __iomem                    *dbr_base;
+       struct bnxt_qplib_db_info       dbinfo;
        u64                             srq_handle;
        u32                             id;
        u32                             max_wqe;
@@ -236,6 +236,7 @@ struct bnxt_qplib_swqe {
 struct bnxt_qplib_q {
        struct bnxt_qplib_hwq           hwq;
        struct bnxt_qplib_swq           *swq;
+       struct bnxt_qplib_db_info       dbinfo;
        struct bnxt_qplib_sg_info       sg_info;
        u32                             max_wqe;
        u16                             q_full_delta;
@@ -370,7 +371,7 @@ struct bnxt_qplib_cqe {
 #define BNXT_QPLIB_QUEUE_START_PERIOD          0x01
 struct bnxt_qplib_cq {
        struct bnxt_qplib_dpi           *dpi;
-       void __iomem                    *dbr_base;
+       struct bnxt_qplib_db_info       dbinfo;
        u32                             max_wqe;
        u32                             id;
        u16                             count;
@@ -401,6 +402,7 @@ struct bnxt_qplib_cq {
  * of the same QP while manipulating the flush list.
  */
        spinlock_t                      flush_lock; /* QP flush management */
+       u16                             cnq_events;
 };
 
 #define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE        sizeof(struct xrrq_irrq)
@@ -433,66 +435,32 @@ struct bnxt_qplib_cq {
                                         NQ_DB_IDX_VALID |      \
                                         NQ_DB_IRQ_DIS)
 
-static inline void bnxt_qplib_ring_nq_db64(void __iomem *db, u32 index,
-                                          u32 xid, bool arm)
-{
-       u64 val;
-
-       val = xid & DBC_DBC_XID_MASK;
-       val |= DBC_DBC_PATH_ROCE;
-       val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
-       val <<= 32;
-       val |= index & DBC_DBC_INDEX_MASK;
-       writeq(val, db);
-}
-
-static inline void bnxt_qplib_ring_nq_db_rearm(void __iomem *db, u32 raw_cons,
-                                              u32 max_elements, u32 xid,
-                                              bool gen_p5)
-{
-       u32 index = raw_cons & (max_elements - 1);
-
-       if (gen_p5)
-               bnxt_qplib_ring_nq_db64(db, index, xid, true);
-       else
-               writel(NQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK), db);
-}
+struct bnxt_qplib_nq_db {
+       struct bnxt_qplib_reg_desc      reg;
+       struct bnxt_qplib_db_info       dbinfo;
+};
 
-static inline void bnxt_qplib_ring_nq_db(void __iomem *db, u32 raw_cons,
-                                        u32 max_elements, u32 xid,
-                                        bool gen_p5)
-{
-       u32 index = raw_cons & (max_elements - 1);
-
-       if (gen_p5)
-               bnxt_qplib_ring_nq_db64(db, index, xid, false);
-       else
-               writel(NQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK), db);
-}
+typedef int (*cqn_handler_t)(struct bnxt_qplib_nq *nq,
+               struct bnxt_qplib_cq *cq);
+typedef int (*srqn_handler_t)(struct bnxt_qplib_nq *nq,
+               struct bnxt_qplib_srq *srq, u8 event);
 
 struct bnxt_qplib_nq {
-       struct pci_dev          *pdev;
-       struct bnxt_qplib_res   *res;
-
-       int                     vector;
-       cpumask_t               mask;
-       int                     budget;
-       bool                    requested;
-       struct tasklet_struct   worker;
-       struct bnxt_qplib_hwq   hwq;
-
-       u16                     bar_reg;
-       u32                     bar_reg_off;
-       u16                     ring_id;
-       void __iomem            *bar_reg_iomem;
-
-       int                     (*cqn_handler)(struct bnxt_qplib_nq *nq,
-                                              struct bnxt_qplib_cq *cq);
-       int                     (*srqn_handler)(struct bnxt_qplib_nq *nq,
-                                               struct bnxt_qplib_srq *srq,
-                                               u8 event);
-       struct workqueue_struct *cqn_wq;
-       char                    name[32];
+       struct pci_dev                  *pdev;
+       struct bnxt_qplib_res           *res;
+       char                            name[32];
+       struct bnxt_qplib_hwq           hwq;
+       struct bnxt_qplib_nq_db         nq_db;
+       u16                             ring_id;
+       int                             msix_vec;
+       cpumask_t                       mask;
+       struct tasklet_struct           nq_tasklet;
+       bool                            requested;
+       int                             budget;
+
+       cqn_handler_t                   cqn_handler;
+       srqn_handler_t                  srqn_handler;
+       struct workqueue_struct         *cqn_wq;
 };
 
 struct bnxt_qplib_nq_work {
@@ -507,11 +475,8 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
                            int msix_vector, bool need_init);
 int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
                         int nq_idx, int msix_vector, int bar_reg_offset,
-                        int (*cqn_handler)(struct bnxt_qplib_nq *nq,
-                                           struct bnxt_qplib_cq *cq),
-                        int (*srqn_handler)(struct bnxt_qplib_nq *nq,
-                                            struct bnxt_qplib_srq *srq,
-                                            u8 event));
+                        cqn_handler_t cqn_handler,
+                        srqn_handler_t srq_handler);
 int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
                          struct bnxt_qplib_srq *srq);
 int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
@@ -550,7 +515,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq);
 void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
 void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
-int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
+int bnxt_qplib_alloc_nq(struct bnxt_qplib_res *res, struct bnxt_qplib_nq *nq);
 void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp);
 void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
                                 unsigned long *flags);
index 1291b12..f01e864 100644 (file)
@@ -55,12 +55,14 @@ static void bnxt_qplib_service_creq(unsigned long data);
 /* Hardware communication channel */
 static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 {
+       struct bnxt_qplib_cmdq_ctx *cmdq;
        u16 cbit;
        int rc;
 
+       cmdq = &rcfw->cmdq;
        cbit = cookie % rcfw->cmdq_depth;
-       rc = wait_event_timeout(rcfw->waitq,
-                               !test_bit(cbit, rcfw->cmdq_bitmap),
+       rc = wait_event_timeout(cmdq->waitq,
+                               !test_bit(cbit, cmdq->cmdq_bitmap),
                                msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
        return rc ? 0 : -ETIMEDOUT;
 };
@@ -68,15 +70,17 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 {
        u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT;
+       struct bnxt_qplib_cmdq_ctx *cmdq;
        u16 cbit;
 
+       cmdq = &rcfw->cmdq;
        cbit = cookie % rcfw->cmdq_depth;
-       if (!test_bit(cbit, rcfw->cmdq_bitmap))
+       if (!test_bit(cbit, cmdq->cmdq_bitmap))
                goto done;
        do {
                mdelay(1); /* 1m sec */
                bnxt_qplib_service_creq((unsigned long)rcfw);
-       } while (test_bit(cbit, rcfw->cmdq_bitmap) && --count);
+       } while (test_bit(cbit, cmdq->cmdq_bitmap) && --count);
 done:
        return count ? 0 : -ETIMEDOUT;
 };
@@ -84,56 +88,60 @@ done:
 static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
                          struct creq_base *resp, void *sb, u8 is_block)
 {
-       struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
-       struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
+       struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
+       struct bnxt_qplib_cmdqe *cmdqe, **hwq_ptr;
+       struct bnxt_qplib_hwq *hwq = &cmdq->hwq;
+       struct bnxt_qplib_crsqe *crsqe;
        u32 cmdq_depth = rcfw->cmdq_depth;
-       struct bnxt_qplib_crsq *crsqe;
        u32 sw_prod, cmdq_prod;
+       struct pci_dev *pdev;
        unsigned long flags;
        u32 size, opcode;
        u16 cookie, cbit;
        u8 *preq;
 
+       pdev = rcfw->pdev;
+
        opcode = req->opcode;
-       if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
+       if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
            (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
             opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
             opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
-               dev_err(&rcfw->pdev->dev,
+               dev_err(&pdev->dev,
                        "RCFW not initialized, reject opcode 0x%x\n", opcode);
                return -EINVAL;
        }
 
-       if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
+       if (test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
            opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
-               dev_err(&rcfw->pdev->dev, "RCFW already initialized!\n");
+               dev_err(&pdev->dev, "RCFW already initialized!\n");
                return -EINVAL;
        }
 
-       if (test_bit(FIRMWARE_TIMED_OUT, &rcfw->flags))
+       if (test_bit(FIRMWARE_TIMED_OUT, &cmdq->flags))
                return -ETIMEDOUT;
 
        /* Cmdq are in 16-byte units, each request can consume 1 or more
         * cmdqe
         */
-       spin_lock_irqsave(&cmdq->lock, flags);
-       if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
-               dev_err(&rcfw->pdev->dev, "RCFW: CMDQ is full!\n");
-               spin_unlock_irqrestore(&cmdq->lock, flags);
+       spin_lock_irqsave(&hwq->lock, flags);
+       if (req->cmd_size >= HWQ_FREE_SLOTS(hwq)) {
+               dev_err(&pdev->dev, "RCFW: CMDQ is full!\n");
+               spin_unlock_irqrestore(&hwq->lock, flags);
                return -EAGAIN;
        }
 
 
-       cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE;
+       cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE;
        cbit = cookie % rcfw->cmdq_depth;
        if (is_block)
                cookie |= RCFW_CMD_IS_BLOCKING;
 
-       set_bit(cbit, rcfw->cmdq_bitmap);
+       set_bit(cbit, cmdq->cmdq_bitmap);
        req->cookie = cpu_to_le16(cookie);
        crsqe = &rcfw->crsqe_tbl[cbit];
        if (crsqe->resp) {
-               spin_unlock_irqrestore(&cmdq->lock, flags);
+               spin_unlock_irqrestore(&hwq->lock, flags);
                return -EBUSY;
        }
 
@@ -155,15 +163,15 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
                                  BNXT_QPLIB_CMDQE_UNITS;
        }
 
-       cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr;
+       hwq_ptr = (struct bnxt_qplib_cmdqe **)hwq->pbl_ptr;
        preq = (u8 *)req;
        do {
                /* Locate the next cmdq slot */
-               sw_prod = HWQ_CMP(cmdq->prod, cmdq);
-               cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)]
+               sw_prod = HWQ_CMP(hwq->prod, hwq);
+               cmdqe = &hwq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)]
                                [get_cmdq_idx(sw_prod, cmdq_depth)];
                if (!cmdqe) {
-                       dev_err(&rcfw->pdev->dev,
+                       dev_err(&pdev->dev,
                                "RCFW request failed with no cmdqe!\n");
                        goto done;
                }
@@ -172,31 +180,27 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
                memcpy(cmdqe, preq, min_t(u32, size, sizeof(*cmdqe)));
                preq += min_t(u32, size, sizeof(*cmdqe));
                size -= min_t(u32, size, sizeof(*cmdqe));
-               cmdq->prod++;
-               rcfw->seq_num++;
+               hwq->prod++;
        } while (size > 0);
+       cmdq->seq_num++;
 
-       rcfw->seq_num++;
-
-       cmdq_prod = cmdq->prod;
-       if (test_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags)) {
+       cmdq_prod = hwq->prod;
+       if (test_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags)) {
                /* The very first doorbell write
                 * is required to set this flag
                 * which prompts the FW to reset
                 * its internal pointers
                 */
                cmdq_prod |= BIT(FIRMWARE_FIRST_FLAG);
-               clear_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags);
+               clear_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags);
        }
 
        /* ring CMDQ DB */
        wmb();
-       writel(cmdq_prod, rcfw->cmdq_bar_reg_iomem +
-              rcfw->cmdq_bar_reg_prod_off);
-       writel(RCFW_CMDQ_TRIG_VAL, rcfw->cmdq_bar_reg_iomem +
-              rcfw->cmdq_bar_reg_trig_off);
+       writel(cmdq_prod, cmdq->cmdq_mbox.prod);
+       writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db);
 done:
-       spin_unlock_irqrestore(&cmdq->lock, flags);
+       spin_unlock_irqrestore(&hwq->lock, flags);
        /* Return the CREQ response pointer */
        return 0;
 }
@@ -236,7 +240,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
                /* timed out */
                dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n",
                        cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
-               set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags);
+               set_bit(FIRMWARE_TIMED_OUT, &rcfw->cmdq.flags);
                return rc;
        }
 
@@ -253,6 +257,8 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
                                         struct creq_func_event *func_event)
 {
+       int rc;
+
        switch (func_event->event) {
        case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
                break;
@@ -286,37 +292,41 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
        default:
                return -EINVAL;
        }
-       return 0;
+
+       rc = rcfw->creq.aeq_handler(rcfw, (void *)func_event, NULL);
+       return rc;
 }
 
 static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
                                       struct creq_qp_event *qp_event)
 {
-       struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
        struct creq_qp_error_notification *err_event;
-       struct bnxt_qplib_crsq *crsqe;
-       unsigned long flags;
+       struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq;
+       struct bnxt_qplib_crsqe *crsqe;
        struct bnxt_qplib_qp *qp;
        u16 cbit, blocked = 0;
-       u16 cookie;
+       struct pci_dev *pdev;
+       unsigned long flags;
        __le16  mcookie;
+       u16 cookie;
+       int rc = 0;
        u32 qp_id;
 
+       pdev = rcfw->pdev;
        switch (qp_event->event) {
        case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
                err_event = (struct creq_qp_error_notification *)qp_event;
                qp_id = le32_to_cpu(err_event->xid);
                qp = rcfw->qp_tbl[qp_id].qp_handle;
-               dev_dbg(&rcfw->pdev->dev,
-                       "Received QP error notification\n");
-               dev_dbg(&rcfw->pdev->dev,
+               dev_dbg(&pdev->dev, "Received QP error notification\n");
+               dev_dbg(&pdev->dev,
                        "qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
                        qp_id, err_event->req_err_state_reason,
                        err_event->res_err_state_reason);
                if (!qp)
                        break;
                bnxt_qplib_mark_qp_error(qp);
-               rcfw->aeq_handler(rcfw, qp_event, qp);
+               rc = rcfw->creq.aeq_handler(rcfw, qp_event, qp);
                break;
        default:
                /*
@@ -328,7 +338,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
                 *
                 */
 
-               spin_lock_irqsave_nested(&cmdq->lock, flags,
+               spin_lock_irqsave_nested(&hwq->lock, flags,
                                         SINGLE_DEPTH_NESTING);
                cookie = le16_to_cpu(qp_event->cookie);
                mcookie = qp_event->cookie;
@@ -342,44 +352,44 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
                        crsqe->resp = NULL;
                } else {
                        if (crsqe->resp && crsqe->resp->cookie)
-                               dev_err(&rcfw->pdev->dev,
+                               dev_err(&pdev->dev,
                                        "CMD %s cookie sent=%#x, recd=%#x\n",
                                        crsqe->resp ? "mismatch" : "collision",
                                        crsqe->resp ? crsqe->resp->cookie : 0,
                                        mcookie);
                }
-               if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
-                       dev_warn(&rcfw->pdev->dev,
+               if (!test_and_clear_bit(cbit, rcfw->cmdq.cmdq_bitmap))
+                       dev_warn(&pdev->dev,
                                 "CMD bit %d was not requested\n", cbit);
-               cmdq->cons += crsqe->req_size;
+               hwq->cons += crsqe->req_size;
                crsqe->req_size = 0;
 
                if (!blocked)
-                       wake_up(&rcfw->waitq);
-               spin_unlock_irqrestore(&cmdq->lock, flags);
+                       wake_up(&rcfw->cmdq.waitq);
+               spin_unlock_irqrestore(&hwq->lock, flags);
        }
-       return 0;
+       return rc;
 }
 
 /* SP - CREQ Completion handlers */
 static void bnxt_qplib_service_creq(unsigned long data)
 {
        struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data;
-       bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx);
-       struct bnxt_qplib_hwq *creq = &rcfw->creq;
+       struct bnxt_qplib_creq_ctx *creq = &rcfw->creq;
        u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
-       struct creq_base *creqe, **creq_ptr;
+       struct bnxt_qplib_hwq *hwq = &creq->hwq;
+       struct creq_base *creqe, **hwq_ptr;
        u32 sw_cons, raw_cons;
        unsigned long flags;
 
        /* Service the CREQ until budget is over */
-       spin_lock_irqsave(&creq->lock, flags);
-       raw_cons = creq->cons;
+       spin_lock_irqsave(&hwq->lock, flags);
+       raw_cons = hwq->cons;
        while (budget > 0) {
-               sw_cons = HWQ_CMP(raw_cons, creq);
-               creq_ptr = (struct creq_base **)creq->pbl_ptr;
-               creqe = &creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)];
-               if (!CREQ_CMP_VALID(creqe, raw_cons, creq->max_elements))
+               sw_cons = HWQ_CMP(raw_cons, hwq);
+               hwq_ptr = (struct creq_base **)hwq->pbl_ptr;
+               creqe = &hwq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)];
+               if (!CREQ_CMP_VALID(creqe, raw_cons, hwq->max_elements))
                        break;
                /* The valid test of the entry must be done first before
                 * reading any further.
@@ -391,12 +401,12 @@ static void bnxt_qplib_service_creq(unsigned long data)
                case CREQ_BASE_TYPE_QP_EVENT:
                        bnxt_qplib_process_qp_event
                                (rcfw, (struct creq_qp_event *)creqe);
-                       rcfw->creq_qp_event_processed++;
+                       creq->stats.creq_qp_event_processed++;
                        break;
                case CREQ_BASE_TYPE_FUNC_EVENT:
                        if (!bnxt_qplib_process_func_event
                            (rcfw, (struct creq_func_event *)creqe))
-                               rcfw->creq_func_event_processed++;
+                               creq->stats.creq_func_event_processed++;
                        else
                                dev_warn(&rcfw->pdev->dev,
                                         "aeqe:%#x Not handled\n", type);
@@ -412,28 +422,30 @@ static void bnxt_qplib_service_creq(unsigned long data)
                budget--;
        }
 
-       if (creq->cons != raw_cons) {
-               creq->cons = raw_cons;
-               bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem,
-                                             raw_cons, creq->max_elements,
-                                             rcfw->creq_ring_id, gen_p5);
+       if (hwq->cons != raw_cons) {
+               hwq->cons = raw_cons;
+               bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo,
+                                     rcfw->res->cctx, true);
        }
-       spin_unlock_irqrestore(&creq->lock, flags);
+       spin_unlock_irqrestore(&hwq->lock, flags);
 }
 
 static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
 {
        struct bnxt_qplib_rcfw *rcfw = dev_instance;
-       struct bnxt_qplib_hwq *creq = &rcfw->creq;
+       struct bnxt_qplib_creq_ctx *creq;
        struct creq_base **creq_ptr;
+       struct bnxt_qplib_hwq *hwq;
        u32 sw_cons;
 
+       creq = &rcfw->creq;
+       hwq = &creq->hwq;
        /* Prefetch the CREQ element */
-       sw_cons = HWQ_CMP(creq->cons, creq);
-       creq_ptr = (struct creq_base **)rcfw->creq.pbl_ptr;
+       sw_cons = HWQ_CMP(hwq->cons, hwq);
+       creq_ptr = (struct creq_base **)creq->hwq.pbl_ptr;
        prefetch(&creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]);
 
-       tasklet_schedule(&rcfw->worker);
+       tasklet_schedule(&creq->creq_tasklet);
 
        return IRQ_HANDLED;
 }
@@ -452,7 +464,7 @@ int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw)
        if (rc)
                return rc;
 
-       clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
+       clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
        return 0;
 }
 
@@ -520,9 +532,10 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
        level = ctx->tim_tbl.level;
        req.tim_pg_size_tim_lvl = (level << CMDQ_INITIALIZE_FW_TIM_LVL_SFT) |
                                  __get_pbl_pg_idx(&ctx->tim_tbl.pbl[level]);
-       level = ctx->tqm_pde_level;
-       req.tqm_pg_size_tqm_lvl = (level << CMDQ_INITIALIZE_FW_TQM_LVL_SFT) |
-                                 __get_pbl_pg_idx(&ctx->tqm_pde.pbl[level]);
+       level = ctx->tqm_ctx.pde.level;
+       req.tqm_pg_size_tqm_lvl =
+               (level << CMDQ_INITIALIZE_FW_TQM_LVL_SFT) |
+                __get_pbl_pg_idx(&ctx->tqm_ctx.pde.pbl[level]);
 
        req.qpc_page_dir =
                cpu_to_le64(ctx->qpc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
@@ -535,7 +548,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
        req.tim_page_dir =
                cpu_to_le64(ctx->tim_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
        req.tqm_page_dir =
-               cpu_to_le64(ctx->tqm_pde.pbl[PBL_LVL_0].pg_map_arr[0]);
+               cpu_to_le64(ctx->tqm_ctx.pde.pbl[PBL_LVL_0].pg_map_arr[0]);
 
        req.number_of_qp = cpu_to_le32(ctx->qpc_tbl.max_elements);
        req.number_of_mrw = cpu_to_le32(ctx->mrw_tbl.max_elements);
@@ -555,33 +568,46 @@ skip_ctx_setup:
                                          NULL, 0);
        if (rc)
                return rc;
-       set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
+       set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
        return 0;
 }
 
 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
 {
+       kfree(rcfw->cmdq.cmdq_bitmap);
        kfree(rcfw->qp_tbl);
        kfree(rcfw->crsqe_tbl);
-       bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq);
-       bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq);
+       bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq);
+       bnxt_qplib_free_hwq(rcfw->res, &rcfw->creq.hwq);
        rcfw->pdev = NULL;
 }
 
-int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
+int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
                                  struct bnxt_qplib_rcfw *rcfw,
                                  struct bnxt_qplib_ctx *ctx,
                                  int qp_tbl_sz)
 {
-       u8 hwq_type;
-
-       rcfw->pdev = pdev;
-       rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT;
-       hwq_type = bnxt_qplib_get_hwq_type(rcfw->res);
-       if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL,
-                                     &rcfw->creq.max_elements,
-                                     BNXT_QPLIB_CREQE_UNITS,
-                                     0, PAGE_SIZE, hwq_type)) {
+       struct bnxt_qplib_hwq_attr hwq_attr = {};
+       struct bnxt_qplib_sg_info sginfo = {};
+       struct bnxt_qplib_cmdq_ctx *cmdq;
+       struct bnxt_qplib_creq_ctx *creq;
+       u32 bmap_size = 0;
+
+       rcfw->pdev = res->pdev;
+       cmdq = &rcfw->cmdq;
+       creq = &rcfw->creq;
+       rcfw->res = res;
+
+       sginfo.pgsize = PAGE_SIZE;
+       sginfo.pgshft = PAGE_SHIFT;
+
+       hwq_attr.sginfo = &sginfo;
+       hwq_attr.res = rcfw->res;
+       hwq_attr.depth = BNXT_QPLIB_CREQE_MAX_CNT;
+       hwq_attr.stride = BNXT_QPLIB_CREQE_UNITS;
+       hwq_attr.type = bnxt_qplib_get_hwq_type(res);
+
+       if (bnxt_qplib_alloc_init_hwq(&creq->hwq, &hwq_attr)) {
                dev_err(&rcfw->pdev->dev,
                        "HW channel CREQ allocation failed\n");
                goto fail;
@@ -591,23 +617,28 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
        else
                rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192;
 
-       rcfw->cmdq.max_elements = rcfw->cmdq_depth;
-       if (bnxt_qplib_alloc_init_hwq
-                       (rcfw->pdev, &rcfw->cmdq, NULL,
-                        &rcfw->cmdq.max_elements,
-                        BNXT_QPLIB_CMDQE_UNITS, 0,
-                        bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth),
-                        HWQ_TYPE_CTX)) {
+       sginfo.pgsize = bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth);
+       hwq_attr.depth = rcfw->cmdq_depth;
+       hwq_attr.stride = BNXT_QPLIB_CMDQE_UNITS;
+       hwq_attr.type = HWQ_TYPE_CTX;
+       if (bnxt_qplib_alloc_init_hwq(&cmdq->hwq, &hwq_attr)) {
                dev_err(&rcfw->pdev->dev,
                        "HW channel CMDQ allocation failed\n");
                goto fail;
        }
 
-       rcfw->crsqe_tbl = kcalloc(rcfw->cmdq.max_elements,
+       rcfw->crsqe_tbl = kcalloc(cmdq->hwq.max_elements,
                                  sizeof(*rcfw->crsqe_tbl), GFP_KERNEL);
        if (!rcfw->crsqe_tbl)
                goto fail;
 
+       bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth) * sizeof(unsigned long);
+       cmdq->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL);
+       if (!cmdq->cmdq_bitmap)
+               goto fail;
+
+       cmdq->bmap_size = bmap_size;
+
        rcfw->qp_tbl_size = qp_tbl_sz;
        rcfw->qp_tbl = kcalloc(qp_tbl_sz, sizeof(struct bnxt_qplib_qp_node),
                               GFP_KERNEL);
@@ -623,137 +654,199 @@ fail:
 
 void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill)
 {
-       bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx);
+       struct bnxt_qplib_creq_ctx *creq;
 
-       tasklet_disable(&rcfw->worker);
+       creq = &rcfw->creq;
+       tasklet_disable(&creq->creq_tasklet);
        /* Mask h/w interrupts */
-       bnxt_qplib_ring_creq_db(rcfw->creq_bar_reg_iomem, rcfw->creq.cons,
-                               rcfw->creq.max_elements, rcfw->creq_ring_id,
-                               gen_p5);
+       bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false);
        /* Sync with last running IRQ-handler */
-       synchronize_irq(rcfw->vector);
+       synchronize_irq(creq->msix_vec);
        if (kill)
-               tasklet_kill(&rcfw->worker);
+               tasklet_kill(&creq->creq_tasklet);
 
-       if (rcfw->requested) {
-               free_irq(rcfw->vector, rcfw);
-               rcfw->requested = false;
+       if (creq->requested) {
+               free_irq(creq->msix_vec, rcfw);
+               creq->requested = false;
        }
 }
 
 void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
 {
+       struct bnxt_qplib_creq_ctx *creq;
+       struct bnxt_qplib_cmdq_ctx *cmdq;
        unsigned long indx;
 
+       creq = &rcfw->creq;
+       cmdq = &rcfw->cmdq;
+       /* Make sure the HW channel is stopped! */
        bnxt_qplib_rcfw_stop_irq(rcfw, true);
 
-       iounmap(rcfw->cmdq_bar_reg_iomem);
-       iounmap(rcfw->creq_bar_reg_iomem);
+       iounmap(cmdq->cmdq_mbox.reg.bar_reg);
+       iounmap(creq->creq_db.reg.bar_reg);
 
-       indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size);
-       if (indx != rcfw->bmap_size)
+       indx = find_first_bit(cmdq->cmdq_bitmap, cmdq->bmap_size);
+       if (indx != cmdq->bmap_size)
                dev_err(&rcfw->pdev->dev,
                        "disabling RCFW with pending cmd-bit %lx\n", indx);
-       kfree(rcfw->cmdq_bitmap);
-       rcfw->bmap_size = 0;
 
-       rcfw->cmdq_bar_reg_iomem = NULL;
-       rcfw->creq_bar_reg_iomem = NULL;
-       rcfw->aeq_handler = NULL;
-       rcfw->vector = 0;
+       cmdq->cmdq_mbox.reg.bar_reg = NULL;
+       creq->creq_db.reg.bar_reg = NULL;
+       creq->aeq_handler = NULL;
+       creq->msix_vec = 0;
 }
 
 int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
                              bool need_init)
 {
-       bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx);
+       struct bnxt_qplib_creq_ctx *creq;
        int rc;
 
-       if (rcfw->requested)
+       creq = &rcfw->creq;
+
+       if (creq->requested)
                return -EFAULT;
 
-       rcfw->vector = msix_vector;
+       creq->msix_vec = msix_vector;
        if (need_init)
-               tasklet_init(&rcfw->worker,
+               tasklet_init(&creq->creq_tasklet,
                             bnxt_qplib_service_creq, (unsigned long)rcfw);
        else
-               tasklet_enable(&rcfw->worker);
-       rc = request_irq(rcfw->vector, bnxt_qplib_creq_irq, 0,
+               tasklet_enable(&creq->creq_tasklet);
+       rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0,
                         "bnxt_qplib_creq", rcfw);
        if (rc)
                return rc;
-       rcfw->requested = true;
-       bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem,
-                                     rcfw->creq.cons, rcfw->creq.max_elements,
-                                     rcfw->creq_ring_id, gen_p5);
+       creq->requested = true;
+
+       bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, true);
 
        return 0;
 }
 
-int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
-                                  struct bnxt_qplib_rcfw *rcfw,
-                                  int msix_vector,
-                                  int cp_bar_reg_off, int virt_fn,
-                                  int (*aeq_handler)(struct bnxt_qplib_rcfw *,
-                                                     void *, void *))
+static int bnxt_qplib_map_cmdq_mbox(struct bnxt_qplib_rcfw *rcfw, bool is_vf)
 {
-       resource_size_t res_base;
-       struct cmdq_init init;
-       u16 bmap_size;
-       int rc;
+       struct bnxt_qplib_cmdq_mbox *mbox;
+       resource_size_t bar_reg;
+       struct pci_dev *pdev;
+       u16 prod_offt;
+       int rc = 0;
 
-       /* General */
-       rcfw->seq_num = 0;
-       set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags);
-       bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth) * sizeof(unsigned long);
-       rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL);
-       if (!rcfw->cmdq_bitmap)
-               return -ENOMEM;
-       rcfw->bmap_size = bmap_size;
+       pdev = rcfw->pdev;
+       mbox = &rcfw->cmdq.cmdq_mbox;
 
-       /* CMDQ */
-       rcfw->cmdq_bar_reg = RCFW_COMM_PCI_BAR_REGION;
-       res_base = pci_resource_start(pdev, rcfw->cmdq_bar_reg);
-       if (!res_base)
+       mbox->reg.bar_id = RCFW_COMM_PCI_BAR_REGION;
+       mbox->reg.len = RCFW_COMM_SIZE;
+       mbox->reg.bar_base = pci_resource_start(pdev, mbox->reg.bar_id);
+       if (!mbox->reg.bar_base) {
+               dev_err(&pdev->dev,
+                       "QPLIB: CMDQ BAR region %d resc start is 0!\n",
+                       mbox->reg.bar_id);
                return -ENOMEM;
+       }
 
-       rcfw->cmdq_bar_reg_iomem = ioremap(res_base +
-                                             RCFW_COMM_BASE_OFFSET,
-                                             RCFW_COMM_SIZE);
-       if (!rcfw->cmdq_bar_reg_iomem) {
-               dev_err(&rcfw->pdev->dev, "CMDQ BAR region %d mapping failed\n",
-                       rcfw->cmdq_bar_reg);
+       bar_reg = mbox->reg.bar_base + RCFW_COMM_BASE_OFFSET;
+       mbox->reg.len = RCFW_COMM_SIZE;
+       mbox->reg.bar_reg = ioremap(bar_reg, mbox->reg.len);
+       if (!mbox->reg.bar_reg) {
+               dev_err(&pdev->dev,
+                       "QPLIB: CMDQ BAR region %d mapping failed\n",
+                       mbox->reg.bar_id);
                return -ENOMEM;
        }
 
-       rcfw->cmdq_bar_reg_prod_off = virt_fn ? RCFW_VF_COMM_PROD_OFFSET :
-                                       RCFW_PF_COMM_PROD_OFFSET;
+       prod_offt = is_vf ? RCFW_VF_COMM_PROD_OFFSET :
+                           RCFW_PF_COMM_PROD_OFFSET;
+       mbox->prod = (void  __iomem *)(mbox->reg.bar_reg + prod_offt);
+       mbox->db = (void __iomem *)(mbox->reg.bar_reg + RCFW_COMM_TRIG_OFFSET);
+       return rc;
+}
 
-       rcfw->cmdq_bar_reg_trig_off = RCFW_COMM_TRIG_OFFSET;
+static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt)
+{
+       struct bnxt_qplib_creq_db *creq_db;
+       resource_size_t bar_reg;
+       struct pci_dev *pdev;
 
-       /* CREQ */
-       rcfw->creq_bar_reg = RCFW_COMM_CONS_PCI_BAR_REGION;
-       res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
-       if (!res_base)
-               dev_err(&rcfw->pdev->dev,
-                       "CREQ BAR region %d resc start is 0!\n",
-                       rcfw->creq_bar_reg);
+       pdev = rcfw->pdev;
+       creq_db = &rcfw->creq.creq_db;
+
+       creq_db->reg.bar_id = RCFW_COMM_CONS_PCI_BAR_REGION;
+       creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id);
+       if (!creq_db->reg.bar_id)
+               dev_err(&pdev->dev,
+                       "QPLIB: CREQ BAR region %d resc start is 0!",
+                       creq_db->reg.bar_id);
+
+       bar_reg = creq_db->reg.bar_base + reg_offt;
        /* Unconditionally map 8 bytes to support 57500 series */
-       rcfw->creq_bar_reg_iomem = ioremap(res_base + cp_bar_reg_off,
-                                                  8);
-       if (!rcfw->creq_bar_reg_iomem) {
-               dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n",
-                       rcfw->creq_bar_reg);
-               iounmap(rcfw->cmdq_bar_reg_iomem);
-               rcfw->cmdq_bar_reg_iomem = NULL;
+       creq_db->reg.len = 8;
+       creq_db->reg.bar_reg = ioremap(bar_reg, creq_db->reg.len);
+       if (!creq_db->reg.bar_reg) {
+               dev_err(&pdev->dev,
+                       "QPLIB: CREQ BAR region %d mapping failed",
+                       creq_db->reg.bar_id);
                return -ENOMEM;
        }
-       rcfw->creq_qp_event_processed = 0;
-       rcfw->creq_func_event_processed = 0;
+       creq_db->dbinfo.db = creq_db->reg.bar_reg;
+       creq_db->dbinfo.hwq = &rcfw->creq.hwq;
+       creq_db->dbinfo.xid = rcfw->creq.ring_id;
+       return 0;
+}
 
-       if (aeq_handler)
-               rcfw->aeq_handler = aeq_handler;
-       init_waitqueue_head(&rcfw->waitq);
+static void bnxt_qplib_start_rcfw(struct bnxt_qplib_rcfw *rcfw)
+{
+       struct bnxt_qplib_cmdq_ctx *cmdq;
+       struct bnxt_qplib_creq_ctx *creq;
+       struct bnxt_qplib_cmdq_mbox *mbox;
+       struct cmdq_init init = {0};
+
+       cmdq = &rcfw->cmdq;
+       creq = &rcfw->creq;
+       mbox = &cmdq->cmdq_mbox;
+
+       init.cmdq_pbl = cpu_to_le64(cmdq->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
+       init.cmdq_size_cmdq_lvl =
+                       cpu_to_le16(((rcfw->cmdq_depth <<
+                                     CMDQ_INIT_CMDQ_SIZE_SFT) &
+                                   CMDQ_INIT_CMDQ_SIZE_MASK) |
+                                   ((cmdq->hwq.level <<
+                                     CMDQ_INIT_CMDQ_LVL_SFT) &
+                                   CMDQ_INIT_CMDQ_LVL_MASK));
+       init.creq_ring_id = cpu_to_le16(creq->ring_id);
+       /* Write to the Bono mailbox register */
+       __iowrite32_copy(mbox->reg.bar_reg, &init, sizeof(init) / 4);
+}
+
+int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw,
+                                  int msix_vector,
+                                  int cp_bar_reg_off, int virt_fn,
+                                  aeq_handler_t aeq_handler)
+{
+       struct bnxt_qplib_cmdq_ctx *cmdq;
+       struct bnxt_qplib_creq_ctx *creq;
+       int rc;
+
+       cmdq = &rcfw->cmdq;
+       creq = &rcfw->creq;
+
+       /* Clear to defaults */
+
+       cmdq->seq_num = 0;
+       set_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags);
+       init_waitqueue_head(&cmdq->waitq);
+
+       creq->stats.creq_qp_event_processed = 0;
+       creq->stats.creq_func_event_processed = 0;
+       creq->aeq_handler = aeq_handler;
+
+       rc = bnxt_qplib_map_cmdq_mbox(rcfw, virt_fn);
+       if (rc)
+               return rc;
+
+       rc = bnxt_qplib_map_creq_db(rcfw, cp_bar_reg_off);
+       if (rc)
+               return rc;
 
        rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true);
        if (rc) {
@@ -763,16 +856,8 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
                return rc;
        }
 
-       init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]);
-       init.cmdq_size_cmdq_lvl = cpu_to_le16(
-               ((rcfw->cmdq_depth << CMDQ_INIT_CMDQ_SIZE_SFT) &
-                CMDQ_INIT_CMDQ_SIZE_MASK) |
-               ((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) &
-                CMDQ_INIT_CMDQ_LVL_MASK));
-       init.creq_ring_id = cpu_to_le16(rcfw->creq_ring_id);
+       bnxt_qplib_start_rcfw(rcfw);
 
-       /* Write to the Bono mailbox register */
-       __iowrite32_copy(rcfw->cmdq_bar_reg_iomem, &init, sizeof(init) / 4);
        return 0;
 }
 
index dfeadc1..411fce3 100644 (file)
@@ -206,8 +206,9 @@ static inline void bnxt_qplib_ring_creq_db(void __iomem *db, u32 raw_cons,
 #define CREQ_ENTRY_POLL_BUDGET         0x100
 
 /* HWQ */
+typedef int (*aeq_handler_t)(struct bnxt_qplib_rcfw *, void *, void *);
 
-struct bnxt_qplib_crsq {
+struct bnxt_qplib_crsqe {
        struct creq_qp_event    *resp;
        u32                     req_size;
 };
@@ -225,41 +226,53 @@ struct bnxt_qplib_qp_node {
 
 #define BNXT_QPLIB_OOS_COUNT_MASK 0xFFFFFFFF
 
+#define FIRMWARE_INITIALIZED_FLAG      (0)
+#define FIRMWARE_FIRST_FLAG            (31)
+#define FIRMWARE_TIMED_OUT             (3)
+struct bnxt_qplib_cmdq_mbox {
+       struct bnxt_qplib_reg_desc      reg;
+       void __iomem                    *prod;
+       void __iomem                    *db;
+};
+
+struct bnxt_qplib_cmdq_ctx {
+       struct bnxt_qplib_hwq           hwq;
+       struct bnxt_qplib_cmdq_mbox     cmdq_mbox;
+       wait_queue_head_t               waitq;
+       unsigned long                   flags;
+       unsigned long                   *cmdq_bitmap;
+       u32                             bmap_size;
+       u32                             seq_num;
+};
+
+struct bnxt_qplib_creq_db {
+       struct bnxt_qplib_reg_desc      reg;
+       struct bnxt_qplib_db_info       dbinfo;
+};
+
+struct bnxt_qplib_creq_stat {
+       u64     creq_qp_event_processed;
+       u64     creq_func_event_processed;
+};
+
+struct bnxt_qplib_creq_ctx {
+       struct bnxt_qplib_hwq           hwq;
+       struct bnxt_qplib_creq_db       creq_db;
+       struct bnxt_qplib_creq_stat     stats;
+       struct tasklet_struct           creq_tasklet;
+       aeq_handler_t                   aeq_handler;
+       u16                             ring_id;
+       int                             msix_vec;
+       bool                            requested; /*irq handler installed */
+};
+
 /* RCFW Communication Channels */
 struct bnxt_qplib_rcfw {
        struct pci_dev          *pdev;
        struct bnxt_qplib_res   *res;
-       int                     vector;
-       struct tasklet_struct   worker;
-       bool                    requested;
-       unsigned long           *cmdq_bitmap;
-       u32                     bmap_size;
-       unsigned long           flags;
-#define FIRMWARE_INITIALIZED_FLAG      0
-#define FIRMWARE_FIRST_FLAG            31
-#define FIRMWARE_TIMED_OUT             3
-       wait_queue_head_t       waitq;
-       int                     (*aeq_handler)(struct bnxt_qplib_rcfw *,
-                                              void *, void *);
-       u32                     seq_num;
-
-       /* Bar region info */
-       void __iomem            *cmdq_bar_reg_iomem;
-       u16                     cmdq_bar_reg;
-       u16                     cmdq_bar_reg_prod_off;
-       u16                     cmdq_bar_reg_trig_off;
-       u16                     creq_ring_id;
-       u16                     creq_bar_reg;
-       void __iomem            *creq_bar_reg_iomem;
-
-       /* Cmd-Resp and Async Event notification queue */
-       struct bnxt_qplib_hwq   creq;
-       u64                     creq_qp_event_processed;
-       u64                     creq_func_event_processed;
-
-       /* Actual Cmd and Resp Queues */
-       struct bnxt_qplib_hwq   cmdq;
-       struct bnxt_qplib_crsq  *crsqe_tbl;
+       struct bnxt_qplib_cmdq_ctx      cmdq;
+       struct bnxt_qplib_creq_ctx      creq;
+       struct bnxt_qplib_crsqe         *crsqe_tbl;
        int qp_tbl_size;
        struct bnxt_qplib_qp_node *qp_tbl;
        u64 oos_prev;
@@ -268,7 +281,7 @@ struct bnxt_qplib_rcfw {
 };
 
 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
-int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
+int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
                                  struct bnxt_qplib_rcfw *rcfw,
                                  struct bnxt_qplib_ctx *ctx,
                                  int qp_tbl_sz);
@@ -276,12 +289,10 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill);
 void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
 int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
                              bool need_init);
-int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
-                                  struct bnxt_qplib_rcfw *rcfw,
+int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw,
                                   int msix_vector,
                                   int cp_bar_reg_off, int virt_fn,
-                                  int (*aeq_handler)(struct bnxt_qplib_rcfw *,
-                                                     void *aeqe, void *obj));
+                                  aeq_handler_t aeq_handler);
 
 struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
                                struct bnxt_qplib_rcfw *rcfw,
index 60ea1b9..cab1adf 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/inetdevice.h>
 #include <linux/dma-mapping.h>
 #include <linux/if_vlan.h>
+#include <linux/vmalloc.h>
 #include "roce_hsi.h"
 #include "qplib_res.h"
 #include "qplib_sp.h"
@@ -55,9 +56,10 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
                                      struct bnxt_qplib_stats *stats);
 
 /* PBL */
-static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
+static void __free_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pbl *pbl,
                       bool is_umem)
 {
+       struct pci_dev *pdev = res->pdev;
        int i;
 
        if (!is_umem) {
@@ -74,35 +76,56 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
                        pbl->pg_arr[i] = NULL;
                }
        }
-       kfree(pbl->pg_arr);
+       vfree(pbl->pg_arr);
        pbl->pg_arr = NULL;
-       kfree(pbl->pg_map_arr);
+       vfree(pbl->pg_map_arr);
        pbl->pg_map_arr = NULL;
        pbl->pg_count = 0;
        pbl->pg_size = 0;
 }
 
-static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
-                      struct scatterlist *sghead, u32 pages,
-                      u32 nmaps, u32 pg_size)
+static void bnxt_qplib_fill_user_dma_pages(struct bnxt_qplib_pbl *pbl,
+                                          struct bnxt_qplib_sg_info *sginfo)
 {
+       struct scatterlist *sghead = sginfo->sghead;
        struct sg_dma_page_iter sg_iter;
+       int i = 0;
+
+       for_each_sg_dma_page(sghead, &sg_iter, sginfo->nmap, 0) {
+               pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter);
+               pbl->pg_arr[i] = NULL;
+               pbl->pg_count++;
+               i++;
+       }
+}
+
+static int __alloc_pbl(struct bnxt_qplib_res *res,
+                      struct bnxt_qplib_pbl *pbl,
+                      struct bnxt_qplib_sg_info *sginfo)
+{
+       struct pci_dev *pdev = res->pdev;
+       struct scatterlist *sghead;
        bool is_umem = false;
+       u32 pages;
        int i;
 
+       if (sginfo->nopte)
+               return 0;
+       pages = sginfo->npages;
+       sghead = sginfo->sghead;
        /* page ptr arrays */
-       pbl->pg_arr = kcalloc(pages, sizeof(void *), GFP_KERNEL);
+       pbl->pg_arr = vmalloc(pages * sizeof(void *));
        if (!pbl->pg_arr)
                return -ENOMEM;
 
-       pbl->pg_map_arr = kcalloc(pages, sizeof(dma_addr_t), GFP_KERNEL);
+       pbl->pg_map_arr = vmalloc(pages * sizeof(dma_addr_t));
        if (!pbl->pg_map_arr) {
-               kfree(pbl->pg_arr);
+               vfree(pbl->pg_arr);
                pbl->pg_arr = NULL;
                return -ENOMEM;
        }
        pbl->pg_count = 0;
-       pbl->pg_size = pg_size;
+       pbl->pg_size = sginfo->pgsize;
 
        if (!sghead) {
                for (i = 0; i < pages; i++) {
@@ -115,25 +138,19 @@ static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
                        pbl->pg_count++;
                }
        } else {
-               i = 0;
                is_umem = true;
-               for_each_sg_dma_page(sghead, &sg_iter, nmaps, 0) {
-                       pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter);
-                       pbl->pg_arr[i] = NULL;
-                       pbl->pg_count++;
-                       i++;
-               }
+               bnxt_qplib_fill_user_dma_pages(pbl, sginfo);
        }
 
        return 0;
-
 fail:
-       __free_pbl(pdev, pbl, is_umem);
+       __free_pbl(res, pbl, is_umem);
        return -ENOMEM;
 }
 
 /* HWQ */
-void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq)
+void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res,
+                        struct bnxt_qplib_hwq *hwq)
 {
        int i;
 
@@ -144,9 +161,9 @@ void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq)
 
        for (i = 0; i < hwq->level + 1; i++) {
                if (i == hwq->level)
-                       __free_pbl(pdev, &hwq->pbl[i], hwq->is_user);
+                       __free_pbl(res, &hwq->pbl[i], hwq->is_user);
                else
-                       __free_pbl(pdev, &hwq->pbl[i], false);
+                       __free_pbl(res, &hwq->pbl[i], false);
        }
 
        hwq->level = PBL_LVL_MAX;
@@ -158,79 +175,113 @@ void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq)
 }
 
 /* All HWQs are power of 2 in size */
-int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
-                             struct bnxt_qplib_sg_info *sg_info,
-                             u32 *elements, u32 element_size, u32 aux,
-                             u32 pg_size, enum bnxt_qplib_hwq_type hwq_type)
+
+int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
+                             struct bnxt_qplib_hwq_attr *hwq_attr)
 {
-       u32 pages, maps, slots, size, aux_pages = 0, aux_size = 0;
+       u32 npages, aux_slots, pg_size, aux_pages = 0, aux_size = 0;
+       struct bnxt_qplib_sg_info sginfo = {};
+       u32 depth, stride, npbl, npde;
        dma_addr_t *src_phys_ptr, **dst_virt_ptr;
        struct scatterlist *sghead = NULL;
-       int i, rc;
-
+       struct bnxt_qplib_res *res;
+       struct pci_dev *pdev;
+       int i, rc, lvl;
+
+       res = hwq_attr->res;
+       pdev = res->pdev;
+       sghead = hwq_attr->sginfo->sghead;
+       pg_size = hwq_attr->sginfo->pgsize;
        hwq->level = PBL_LVL_MAX;
 
-       slots = roundup_pow_of_two(*elements);
-       if (aux) {
-               aux_size = roundup_pow_of_two(aux);
-               aux_pages = (slots * aux_size) / pg_size;
-               if ((slots * aux_size) % pg_size)
+       depth = roundup_pow_of_two(hwq_attr->depth);
+       stride = roundup_pow_of_two(hwq_attr->stride);
+       if (hwq_attr->aux_depth) {
+               aux_slots = hwq_attr->aux_depth;
+               aux_size = roundup_pow_of_two(hwq_attr->aux_stride);
+               aux_pages = (aux_slots * aux_size) / pg_size;
+               if ((aux_slots * aux_size) % pg_size)
                        aux_pages++;
        }
-       size = roundup_pow_of_two(element_size);
-
-       if (sg_info)
-               sghead = sg_info->sglist;
 
        if (!sghead) {
                hwq->is_user = false;
-               pages = (slots * size) / pg_size + aux_pages;
-               if ((slots * size) % pg_size)
-                       pages++;
-               if (!pages)
+               npages = (depth * stride) / pg_size + aux_pages;
+               if ((depth * stride) % pg_size)
+                       npages++;
+               if (!npages)
                        return -EINVAL;
-               maps = 0;
+               hwq_attr->sginfo->npages = npages;
        } else {
                hwq->is_user = true;
-               pages = sg_info->npages;
-               maps = sg_info->nmap;
+               npages = hwq_attr->sginfo->npages;
+               npages = (npages * PAGE_SIZE) /
+                         BIT_ULL(hwq_attr->sginfo->pgshft);
+               if ((hwq_attr->sginfo->npages * PAGE_SIZE) %
+                    BIT_ULL(hwq_attr->sginfo->pgshft))
+                       if (!npages)
+                               npages++;
        }
 
-       /* Alloc the 1st memory block; can be a PDL/PTL/PBL */
-       if (sghead && (pages == MAX_PBL_LVL_0_PGS))
-               rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], sghead,
-                                pages, maps, pg_size);
-       else
-               rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], NULL,
-                                1, 0, pg_size);
-       if (rc)
-               goto fail;
-
-       hwq->level = PBL_LVL_0;
+       if (npages == MAX_PBL_LVL_0_PGS) {
+               /* This request is Level 0, map PTE */
+               rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], hwq_attr->sginfo);
+               if (rc)
+                       goto fail;
+               hwq->level = PBL_LVL_0;
+       }
 
-       if (pages > MAX_PBL_LVL_0_PGS) {
-               if (pages > MAX_PBL_LVL_1_PGS) {
+       if (npages > MAX_PBL_LVL_0_PGS) {
+               if (npages > MAX_PBL_LVL_1_PGS) {
+                       u32 flag = (hwq_attr->type == HWQ_TYPE_L2_CMPL) ?
+                                   0 : PTU_PTE_VALID;
                        /* 2 levels of indirection */
-                       rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], NULL,
-                                        MAX_PBL_LVL_1_PGS_FOR_LVL_2,
-                                        0, pg_size);
+                       npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT;
+                       if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT))
+                               npbl++;
+                       npde = npbl >> MAX_PDL_LVL_SHIFT;
+                       if (npbl % BIT(MAX_PDL_LVL_SHIFT))
+                               npde++;
+                       /* Alloc PDE pages */
+                       sginfo.pgsize = npde * pg_size;
+                       sginfo.npages = 1;
+                       rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], &sginfo);
+
+                       /* Alloc PBL pages */
+                       sginfo.npages = npbl;
+                       sginfo.pgsize = PAGE_SIZE;
+                       rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_1], &sginfo);
                        if (rc)
                                goto fail;
-                       /* Fill in lvl0 PBL */
+                       /* Fill PDL with PBL page pointers */
                        dst_virt_ptr =
                                (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
                        src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
-                       for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++)
-                               dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
-                                       src_phys_ptr[i] | PTU_PDE_VALID;
-                       hwq->level = PBL_LVL_1;
-
-                       rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_2], sghead,
-                                        pages, maps, pg_size);
+                       if (hwq_attr->type == HWQ_TYPE_MR) {
+                       /* For MR it is expected that we supply only 1 contigous
+                        * page i.e only 1 entry in the PDL that will contain
+                        * all the PBLs for the user supplied memory region
+                        */
+                               for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count;
+                                    i++)
+                                       dst_virt_ptr[0][i] = src_phys_ptr[i] |
+                                               flag;
+                       } else {
+                               for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count;
+                                    i++)
+                                       dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+                                               src_phys_ptr[i] |
+                                               PTU_PDE_VALID;
+                       }
+                       /* Alloc or init PTEs */
+                       rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_2],
+                                        hwq_attr->sginfo);
                        if (rc)
                                goto fail;
-
-                       /* Fill in lvl1 PBL */
+                       hwq->level = PBL_LVL_2;
+                       if (hwq_attr->sginfo->nopte)
+                               goto done;
+                       /* Fill PBLs with PTE pointers */
                        dst_virt_ptr =
                                (dma_addr_t **)hwq->pbl[PBL_LVL_1].pg_arr;
                        src_phys_ptr = hwq->pbl[PBL_LVL_2].pg_map_arr;
@@ -238,7 +289,7 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
                                dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
                                        src_phys_ptr[i] | PTU_PTE_VALID;
                        }
-                       if (hwq_type == HWQ_TYPE_QUEUE) {
+                       if (hwq_attr->type == HWQ_TYPE_QUEUE) {
                                /* Find the last pg of the size */
                                i = hwq->pbl[PBL_LVL_2].pg_count;
                                dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
@@ -248,25 +299,36 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
                                                    [PTR_IDX(i - 2)] |=
                                                    PTU_PTE_NEXT_TO_LAST;
                        }
-                       hwq->level = PBL_LVL_2;
-               } else {
-                       u32 flag = hwq_type == HWQ_TYPE_L2_CMPL ? 0 :
-                                               PTU_PTE_VALID;
+               } else { /* pages < 512 npbl = 1, npde = 0 */
+                       u32 flag = (hwq_attr->type == HWQ_TYPE_L2_CMPL) ?
+                                   0 : PTU_PTE_VALID;
 
                        /* 1 level of indirection */
-                       rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], sghead,
-                                        pages, maps, pg_size);
+                       npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT;
+                       if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT))
+                               npbl++;
+                       sginfo.npages = npbl;
+                       sginfo.pgsize = PAGE_SIZE;
+                       /* Alloc PBL page */
+                       rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], &sginfo);
                        if (rc)
                                goto fail;
-                       /* Fill in lvl0 PBL */
+                       /* Alloc or init  PTEs */
+                       rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_1],
+                                        hwq_attr->sginfo);
+                       if (rc)
+                               goto fail;
+                       hwq->level = PBL_LVL_1;
+                       if (hwq_attr->sginfo->nopte)
+                               goto done;
+                       /* Fill PBL with PTE pointers */
                        dst_virt_ptr =
                                (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
                        src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
-                       for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) {
+                       for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++)
                                dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
                                        src_phys_ptr[i] | flag;
-                       }
-                       if (hwq_type == HWQ_TYPE_QUEUE) {
+                       if (hwq_attr->type == HWQ_TYPE_QUEUE) {
                                /* Find the last pg of the size */
                                i = hwq->pbl[PBL_LVL_1].pg_count;
                                dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
@@ -276,42 +338,141 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
                                                    [PTR_IDX(i - 2)] |=
                                                    PTU_PTE_NEXT_TO_LAST;
                        }
-                       hwq->level = PBL_LVL_1;
                }
        }
-       hwq->pdev = pdev;
-       spin_lock_init(&hwq->lock);
+done:
        hwq->prod = 0;
        hwq->cons = 0;
-       *elements = hwq->max_elements = slots;
-       hwq->element_size = size;
-
+       hwq->pdev = pdev;
+       hwq->depth = hwq_attr->depth;
+       hwq->max_elements = depth;
+       hwq->element_size = stride;
        /* For direct access to the elements */
-       hwq->pbl_ptr = hwq->pbl[hwq->level].pg_arr;
-       hwq->pbl_dma_ptr = hwq->pbl[hwq->level].pg_map_arr;
+       lvl = hwq->level;
+       if (hwq_attr->sginfo->nopte && hwq->level)
+               lvl = hwq->level - 1;
+       hwq->pbl_ptr = hwq->pbl[lvl].pg_arr;
+       hwq->pbl_dma_ptr = hwq->pbl[lvl].pg_map_arr;
+       spin_lock_init(&hwq->lock);
 
        return 0;
-
 fail:
-       bnxt_qplib_free_hwq(pdev, hwq);
+       bnxt_qplib_free_hwq(res, hwq);
        return -ENOMEM;
 }
 
 /* Context Tables */
-void bnxt_qplib_free_ctx(struct pci_dev *pdev,
+void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res,
                         struct bnxt_qplib_ctx *ctx)
 {
        int i;
 
-       bnxt_qplib_free_hwq(pdev, &ctx->qpc_tbl);
-       bnxt_qplib_free_hwq(pdev, &ctx->mrw_tbl);
-       bnxt_qplib_free_hwq(pdev, &ctx->srqc_tbl);
-       bnxt_qplib_free_hwq(pdev, &ctx->cq_tbl);
-       bnxt_qplib_free_hwq(pdev, &ctx->tim_tbl);
+       bnxt_qplib_free_hwq(res, &ctx->qpc_tbl);
+       bnxt_qplib_free_hwq(res, &ctx->mrw_tbl);
+       bnxt_qplib_free_hwq(res, &ctx->srqc_tbl);
+       bnxt_qplib_free_hwq(res, &ctx->cq_tbl);
+       bnxt_qplib_free_hwq(res, &ctx->tim_tbl);
        for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
-               bnxt_qplib_free_hwq(pdev, &ctx->tqm_tbl[i]);
-       bnxt_qplib_free_hwq(pdev, &ctx->tqm_pde);
-       bnxt_qplib_free_stats_ctx(pdev, &ctx->stats);
+               bnxt_qplib_free_hwq(res, &ctx->tqm_ctx.qtbl[i]);
+       /* restore original pde level before destroy */
+       ctx->tqm_ctx.pde.level = ctx->tqm_ctx.pde_level;
+       bnxt_qplib_free_hwq(res, &ctx->tqm_ctx.pde);
+       bnxt_qplib_free_stats_ctx(res->pdev, &ctx->stats);
+}
+
+static int bnxt_qplib_alloc_tqm_rings(struct bnxt_qplib_res *res,
+                                     struct bnxt_qplib_ctx *ctx)
+{
+       struct bnxt_qplib_hwq_attr hwq_attr = {};
+       struct bnxt_qplib_sg_info sginfo = {};
+       struct bnxt_qplib_tqm_ctx *tqmctx;
+       int rc = 0;
+       int i;
+
+       tqmctx = &ctx->tqm_ctx;
+
+       sginfo.pgsize = PAGE_SIZE;
+       sginfo.pgshft = PAGE_SHIFT;
+       hwq_attr.sginfo = &sginfo;
+       hwq_attr.res = res;
+       hwq_attr.type = HWQ_TYPE_CTX;
+       hwq_attr.depth = 512;
+       hwq_attr.stride = sizeof(u64);
+       /* Alloc pdl buffer */
+       rc = bnxt_qplib_alloc_init_hwq(&tqmctx->pde, &hwq_attr);
+       if (rc)
+               goto out;
+       /* Save original pdl level */
+       tqmctx->pde_level = tqmctx->pde.level;
+
+       hwq_attr.stride = 1;
+       for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) {
+               if (!tqmctx->qcount[i])
+                       continue;
+               hwq_attr.depth = ctx->qpc_count * tqmctx->qcount[i];
+               rc = bnxt_qplib_alloc_init_hwq(&tqmctx->qtbl[i], &hwq_attr);
+               if (rc)
+                       goto out;
+       }
+out:
+       return rc;
+}
+
+static void bnxt_qplib_map_tqm_pgtbl(struct bnxt_qplib_tqm_ctx *ctx)
+{
+       struct bnxt_qplib_hwq *tbl;
+       dma_addr_t *dma_ptr;
+       __le64 **pbl_ptr, *ptr;
+       int i, j, k;
+       int fnz_idx = -1;
+       int pg_count;
+
+       pbl_ptr = (__le64 **)ctx->pde.pbl_ptr;
+
+       for (i = 0, j = 0; i < MAX_TQM_ALLOC_REQ;
+            i++, j += MAX_TQM_ALLOC_BLK_SIZE) {
+               tbl = &ctx->qtbl[i];
+               if (!tbl->max_elements)
+                       continue;
+               if (fnz_idx == -1)
+                       fnz_idx = i; /* first non-zero index */
+               switch (tbl->level) {
+               case PBL_LVL_2:
+                       pg_count = tbl->pbl[PBL_LVL_1].pg_count;
+                       for (k = 0; k < pg_count; k++) {
+                               ptr = &pbl_ptr[PTR_PG(j + k)][PTR_IDX(j + k)];
+                               dma_ptr = &tbl->pbl[PBL_LVL_1].pg_map_arr[k];
+                               *ptr = cpu_to_le64(*dma_ptr | PTU_PTE_VALID);
+                       }
+                       break;
+               case PBL_LVL_1:
+               case PBL_LVL_0:
+               default:
+                       ptr = &pbl_ptr[PTR_PG(j)][PTR_IDX(j)];
+                       *ptr = cpu_to_le64(tbl->pbl[PBL_LVL_0].pg_map_arr[0] |
+                                          PTU_PTE_VALID);
+                       break;
+               }
+       }
+       if (fnz_idx == -1)
+               fnz_idx = 0;
+       /* update pde level as per page table programming */
+       ctx->pde.level = (ctx->qtbl[fnz_idx].level == PBL_LVL_2) ? PBL_LVL_2 :
+                         ctx->qtbl[fnz_idx].level + 1;
+}
+
+static int bnxt_qplib_setup_tqm_rings(struct bnxt_qplib_res *res,
+                                     struct bnxt_qplib_ctx *ctx)
+{
+       int rc = 0;
+
+       rc = bnxt_qplib_alloc_tqm_rings(res, ctx);
+       if (rc)
+               goto fail;
+
+       bnxt_qplib_map_tqm_pgtbl(&ctx->tqm_ctx);
+fail:
+       return rc;
 }
 
 /*
@@ -335,120 +496,72 @@ void bnxt_qplib_free_ctx(struct pci_dev *pdev,
  * Returns:
  *     0 if success, else -ERRORS
  */
-int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
+int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
                         struct bnxt_qplib_ctx *ctx,
                         bool virt_fn, bool is_p5)
 {
-       int i, j, k, rc = 0;
-       int fnz_idx = -1;
-       __le64 **pbl_ptr;
+       struct bnxt_qplib_hwq_attr hwq_attr = {};
+       struct bnxt_qplib_sg_info sginfo = {};
+       int rc = 0;
 
        if (virt_fn || is_p5)
                goto stats_alloc;
 
        /* QPC Tables */
-       ctx->qpc_tbl.max_elements = ctx->qpc_count;
-       rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->qpc_tbl, NULL,
-                                      &ctx->qpc_tbl.max_elements,
-                                      BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE, 0,
-                                      PAGE_SIZE, HWQ_TYPE_CTX);
+       sginfo.pgsize = PAGE_SIZE;
+       sginfo.pgshft = PAGE_SHIFT;
+       hwq_attr.sginfo = &sginfo;
+
+       hwq_attr.res = res;
+       hwq_attr.depth = ctx->qpc_count;
+       hwq_attr.stride = BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE;
+       hwq_attr.type = HWQ_TYPE_CTX;
+       rc = bnxt_qplib_alloc_init_hwq(&ctx->qpc_tbl, &hwq_attr);
        if (rc)
                goto fail;
 
        /* MRW Tables */
-       ctx->mrw_tbl.max_elements = ctx->mrw_count;
-       rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->mrw_tbl, NULL,
-                                      &ctx->mrw_tbl.max_elements,
-                                      BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE, 0,
-                                      PAGE_SIZE, HWQ_TYPE_CTX);
+       hwq_attr.depth = ctx->mrw_count;
+       hwq_attr.stride = BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE;
+       rc = bnxt_qplib_alloc_init_hwq(&ctx->mrw_tbl, &hwq_attr);
        if (rc)
                goto fail;
 
        /* SRQ Tables */
-       ctx->srqc_tbl.max_elements = ctx->srqc_count;
-       rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->srqc_tbl, NULL,
-                                      &ctx->srqc_tbl.max_elements,
-                                      BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE, 0,
-                                      PAGE_SIZE, HWQ_TYPE_CTX);
+       hwq_attr.depth = ctx->srqc_count;
+       hwq_attr.stride = BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE;
+       rc = bnxt_qplib_alloc_init_hwq(&ctx->srqc_tbl, &hwq_attr);
        if (rc)
                goto fail;
 
        /* CQ Tables */
-       ctx->cq_tbl.max_elements = ctx->cq_count;
-       rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->cq_tbl, NULL,
-                                      &ctx->cq_tbl.max_elements,
-                                      BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE, 0,
-                                      PAGE_SIZE, HWQ_TYPE_CTX);
+       hwq_attr.depth = ctx->cq_count;
+       hwq_attr.stride = BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE;
+       rc = bnxt_qplib_alloc_init_hwq(&ctx->cq_tbl, &hwq_attr);
        if (rc)
                goto fail;
 
        /* TQM Buffer */
-       ctx->tqm_pde.max_elements = 512;
-       rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_pde, NULL,
-                                      &ctx->tqm_pde.max_elements, sizeof(u64),
-                                      0, PAGE_SIZE, HWQ_TYPE_CTX);
+       rc = bnxt_qplib_setup_tqm_rings(res, ctx);
        if (rc)
                goto fail;
-
-       for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) {
-               if (!ctx->tqm_count[i])
-                       continue;
-               ctx->tqm_tbl[i].max_elements = ctx->qpc_count *
-                                              ctx->tqm_count[i];
-               rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_tbl[i], NULL,
-                                              &ctx->tqm_tbl[i].max_elements, 1,
-                                              0, PAGE_SIZE, HWQ_TYPE_CTX);
-               if (rc)
-                       goto fail;
-       }
-       pbl_ptr = (__le64 **)ctx->tqm_pde.pbl_ptr;
-       for (i = 0, j = 0; i < MAX_TQM_ALLOC_REQ;
-            i++, j += MAX_TQM_ALLOC_BLK_SIZE) {
-               if (!ctx->tqm_tbl[i].max_elements)
-                       continue;
-               if (fnz_idx == -1)
-                       fnz_idx = i;
-               switch (ctx->tqm_tbl[i].level) {
-               case PBL_LVL_2:
-                       for (k = 0; k < ctx->tqm_tbl[i].pbl[PBL_LVL_1].pg_count;
-                            k++)
-                               pbl_ptr[PTR_PG(j + k)][PTR_IDX(j + k)] =
-                                 cpu_to_le64(
-                                   ctx->tqm_tbl[i].pbl[PBL_LVL_1].pg_map_arr[k]
-                                   | PTU_PTE_VALID);
-                       break;
-               case PBL_LVL_1:
-               case PBL_LVL_0:
-               default:
-                       pbl_ptr[PTR_PG(j)][PTR_IDX(j)] = cpu_to_le64(
-                               ctx->tqm_tbl[i].pbl[PBL_LVL_0].pg_map_arr[0] |
-                               PTU_PTE_VALID);
-                       break;
-               }
-       }
-       if (fnz_idx == -1)
-               fnz_idx = 0;
-       ctx->tqm_pde_level = ctx->tqm_tbl[fnz_idx].level == PBL_LVL_2 ?
-                            PBL_LVL_2 : ctx->tqm_tbl[fnz_idx].level + 1;
-
        /* TIM Buffer */
        ctx->tim_tbl.max_elements = ctx->qpc_count * 16;
-       rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tim_tbl, NULL,
-                                      &ctx->tim_tbl.max_elements, 1,
-                                      0, PAGE_SIZE, HWQ_TYPE_CTX);
+       hwq_attr.depth = ctx->qpc_count * 16;
+       hwq_attr.stride = 1;
+       rc = bnxt_qplib_alloc_init_hwq(&ctx->tim_tbl, &hwq_attr);
        if (rc)
                goto fail;
-
 stats_alloc:
        /* Stats */
-       rc = bnxt_qplib_alloc_stats_ctx(pdev, &ctx->stats);
+       rc = bnxt_qplib_alloc_stats_ctx(res->pdev, &ctx->stats);
        if (rc)
                goto fail;
 
        return 0;
 
 fail:
-       bnxt_qplib_free_ctx(pdev, ctx);
+       bnxt_qplib_free_ctx(res, ctx);
        return rc;
 }
 
@@ -808,9 +921,6 @@ void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
        bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl);
        bnxt_qplib_free_pd_tbl(&res->pd_tbl);
        bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl);
-
-       res->netdev = NULL;
-       res->pdev = NULL;
 }
 
 int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
index aaa76d7..95b645d 100644 (file)
@@ -55,7 +55,8 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
 enum bnxt_qplib_hwq_type {
        HWQ_TYPE_CTX,
        HWQ_TYPE_QUEUE,
-       HWQ_TYPE_L2_CMPL
+       HWQ_TYPE_L2_CMPL,
+       HWQ_TYPE_MR
 };
 
 #define MAX_PBL_LVL_0_PGS              1
@@ -63,6 +64,7 @@ enum bnxt_qplib_hwq_type {
 #define MAX_PBL_LVL_1_PGS_SHIFT                9
 #define MAX_PBL_LVL_1_PGS_FOR_LVL_2    256
 #define MAX_PBL_LVL_2_PGS              (256 * 512)
+#define MAX_PDL_LVL_SHIFT               9
 
 enum bnxt_qplib_pbl_lvl {
        PBL_LVL_0,
@@ -78,6 +80,13 @@ enum bnxt_qplib_pbl_lvl {
 #define ROCE_PG_SIZE_8M                (8 * 1024 * 1024)
 #define ROCE_PG_SIZE_1G                (1024 * 1024 * 1024)
 
+struct bnxt_qplib_reg_desc {
+       u8              bar_id;
+       resource_size_t bar_base;
+       void __iomem    *bar_reg;
+       size_t          len;
+};
+
 struct bnxt_qplib_pbl {
        u32                             pg_count;
        u32                             pg_size;
@@ -85,17 +94,37 @@ struct bnxt_qplib_pbl {
        dma_addr_t                      *pg_map_arr;
 };
 
+struct bnxt_qplib_sg_info {
+       struct scatterlist              *sghead;
+       u32                             nmap;
+       u32                             npages;
+       u32                             pgshft;
+       u32                             pgsize;
+       bool                            nopte;
+};
+
+struct bnxt_qplib_hwq_attr {
+       struct bnxt_qplib_res           *res;
+       struct bnxt_qplib_sg_info       *sginfo;
+       enum bnxt_qplib_hwq_type        type;
+       u32                             depth;
+       u32                             stride;
+       u32                             aux_stride;
+       u32                             aux_depth;
+};
+
 struct bnxt_qplib_hwq {
        struct pci_dev                  *pdev;
        /* lock to protect qplib_hwq */
        spinlock_t                      lock;
-       struct bnxt_qplib_pbl           pbl[PBL_LVL_MAX];
+       struct bnxt_qplib_pbl           pbl[PBL_LVL_MAX + 1];
        enum bnxt_qplib_pbl_lvl         level;          /* 0, 1, or 2 */
        /* ptr for easy access to the PBL entries */
        void                            **pbl_ptr;
        /* ptr for easy access to the dma_addr */
        dma_addr_t                      *pbl_dma_ptr;
        u32                             max_elements;
+       u32                             depth;
        u16                             element_size;   /* Size of each entry */
 
        u32                             prod;           /* raw */
@@ -104,6 +133,13 @@ struct bnxt_qplib_hwq {
        u8                              is_user;
 };
 
+struct bnxt_qplib_db_info {
+       void __iomem            *db;
+       void __iomem            *priv_db;
+       struct bnxt_qplib_hwq   *hwq;
+       u32                     xid;
+};
+
 /* Tables */
 struct bnxt_qplib_pd_tbl {
        unsigned long                   *tbl;
@@ -159,6 +195,15 @@ struct bnxt_qplib_vf_res {
 #define BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE       64
 #define BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE      128
 
+#define MAX_TQM_ALLOC_REQ               48
+#define MAX_TQM_ALLOC_BLK_SIZE          8
+struct bnxt_qplib_tqm_ctx {
+       struct bnxt_qplib_hwq           pde;
+       u8                              pde_level; /* Original level */
+       struct bnxt_qplib_hwq           qtbl[MAX_TQM_ALLOC_REQ];
+       u8                              qcount[MAX_TQM_ALLOC_REQ];
+};
+
 struct bnxt_qplib_ctx {
        u32                             qpc_count;
        struct bnxt_qplib_hwq           qpc_tbl;
@@ -169,12 +214,7 @@ struct bnxt_qplib_ctx {
        u32                             cq_count;
        struct bnxt_qplib_hwq           cq_tbl;
        struct bnxt_qplib_hwq           tim_tbl;
-#define MAX_TQM_ALLOC_REQ              48
-#define MAX_TQM_ALLOC_BLK_SIZE         8
-       u8                              tqm_count[MAX_TQM_ALLOC_REQ];
-       struct bnxt_qplib_hwq           tqm_pde;
-       u32                             tqm_pde_level;
-       struct bnxt_qplib_hwq           tqm_tbl[MAX_TQM_ALLOC_REQ];
+       struct bnxt_qplib_tqm_ctx       tqm_ctx;
        struct bnxt_qplib_stats         stats;
        struct bnxt_qplib_vf_res        vf_res;
        u64                             hwrm_intf_ver;
@@ -223,11 +263,6 @@ static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx)
               RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL;
 }
 
-struct bnxt_qplib_sg_info {
-       struct scatterlist              *sglist;
-       u32                             nmap;
-       u32                             npages;
-};
 
 #define to_bnxt_qplib(ptr, type, member)       \
        container_of(ptr, type, member)
@@ -235,11 +270,10 @@ struct bnxt_qplib_sg_info {
 struct bnxt_qplib_pd;
 struct bnxt_qplib_dev_attr;
 
-void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq);
-int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
-                             struct bnxt_qplib_sg_info *sg_info, u32 *elements,
-                             u32 elements_per_page, u32 aux, u32 pg_size,
-                             enum bnxt_qplib_hwq_type hwq_type);
+void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res,
+                        struct bnxt_qplib_hwq *hwq);
+int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
+                             struct bnxt_qplib_hwq_attr *hwq_attr);
 void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid);
 int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl,
                        struct bnxt_qplib_pd *pd);
@@ -258,9 +292,80 @@ void bnxt_qplib_free_res(struct bnxt_qplib_res *res);
 int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
                         struct net_device *netdev,
                         struct bnxt_qplib_dev_attr *dev_attr);
-void bnxt_qplib_free_ctx(struct pci_dev *pdev,
+void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res,
                         struct bnxt_qplib_ctx *ctx);
-int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
+int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
                         struct bnxt_qplib_ctx *ctx,
                         bool virt_fn, bool is_p5);
+
+static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info,
+                                       bool arm)
+{
+       u32 key;
+
+       key = info->hwq->cons & (info->hwq->max_elements - 1);
+       key |= (CMPL_DOORBELL_IDX_VALID |
+               (CMPL_DOORBELL_KEY_CMPL & CMPL_DOORBELL_KEY_MASK));
+       if (!arm)
+               key |= CMPL_DOORBELL_MASK;
+       writel(key, info->db);
+}
+
+static inline void bnxt_qplib_ring_db(struct bnxt_qplib_db_info *info,
+                                     u32 type)
+{
+       u64 key = 0;
+
+       key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type;
+       key <<= 32;
+       key |= (info->hwq->cons & (info->hwq->max_elements - 1)) &
+               DBC_DBC_INDEX_MASK;
+       writeq(key, info->db);
+}
+
+static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info,
+                                          u32 type)
+{
+       u64 key = 0;
+
+       key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type;
+       key <<= 32;
+       key |= (info->hwq->prod & (info->hwq->max_elements - 1)) &
+               DBC_DBC_INDEX_MASK;
+       writeq(key, info->db);
+}
+
+static inline void bnxt_qplib_armen_db(struct bnxt_qplib_db_info *info,
+                                      u32 type)
+{
+       u64 key = 0;
+
+       key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type;
+       key <<= 32;
+       writeq(key, info->priv_db);
+}
+
+static inline void bnxt_qplib_srq_arm_db(struct bnxt_qplib_db_info *info,
+                                        u32 th)
+{
+       u64 key = 0;
+
+       key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | th;
+       key <<= 32;
+       key |=  th & DBC_DBC_INDEX_MASK;
+       writeq(key, info->priv_db);
+}
+
+static inline void bnxt_qplib_ring_nq_db(struct bnxt_qplib_db_info *info,
+                                        struct bnxt_qplib_chip_ctx *cctx,
+                                        bool arm)
+{
+       u32 type;
+
+       type = arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
+       if (bnxt_qplib_is_chip_gen_p5(cctx))
+               bnxt_qplib_ring_db(info, type);
+       else
+               bnxt_qplib_ring_db32(info, arm);
+}
 #endif /* __BNXT_QPLIB_RES_H__ */
index 40296b9..66954ff 100644 (file)
@@ -585,7 +585,7 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 
        /* Free the qplib's MRW memory */
        if (mrw->hwq.max_elements)
-               bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
+               bnxt_qplib_free_hwq(res, &mrw->hwq);
 
        return 0;
 }
@@ -646,7 +646,7 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
        if (mrw->hwq.max_elements) {
                mrw->va = 0;
                mrw->total_size = 0;
-               bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
+               bnxt_qplib_free_hwq(res, &mrw->hwq);
        }
 
        return 0;
@@ -656,10 +656,12 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
                      u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
-       struct cmdq_register_mr req;
+       struct bnxt_qplib_hwq_attr hwq_attr = {};
+       struct bnxt_qplib_sg_info sginfo = {};
        struct creq_register_mr_resp resp;
-       u16 cmd_flags = 0, level;
+       struct cmdq_register_mr req;
        int pg_ptrs, pages, i, rc;
+       u16 cmd_flags = 0, level;
        dma_addr_t **pbl_ptr;
        u32 pg_size;
 
@@ -674,20 +676,23 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 
                if (pages > MAX_PBL_LVL_1_PGS) {
                        dev_err(&res->pdev->dev,
-                               "SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n",
+                               "SP: Reg MR: pages requested (0x%x) exceeded max (0x%x)\n",
                                pages, MAX_PBL_LVL_1_PGS);
                        return -ENOMEM;
                }
                /* Free the hwq if it already exist, must be a rereg */
                if (mr->hwq.max_elements)
-                       bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
-
-               mr->hwq.max_elements = pages;
+                       bnxt_qplib_free_hwq(res, &mr->hwq);
                /* Use system PAGE_SIZE */
-               rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL,
-                                              &mr->hwq.max_elements,
-                                              PAGE_SIZE, 0, PAGE_SIZE,
-                                              HWQ_TYPE_CTX);
+               hwq_attr.res = res;
+               hwq_attr.depth = pages;
+               hwq_attr.stride = PAGE_SIZE;
+               hwq_attr.type = HWQ_TYPE_MR;
+               hwq_attr.sginfo = &sginfo;
+               hwq_attr.sginfo->npages = pages;
+               hwq_attr.sginfo->pgsize = PAGE_SIZE;
+               hwq_attr.sginfo->pgshft = PAGE_SHIFT;
+               rc = bnxt_qplib_alloc_init_hwq(&mr->hwq, &hwq_attr);
                if (rc) {
                        dev_err(&res->pdev->dev,
                                "SP: Reg MR memory allocation failed\n");
@@ -734,7 +739,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 
 fail:
        if (mr->hwq.max_elements)
-               bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
+               bnxt_qplib_free_hwq(res, &mr->hwq);
        return rc;
 }
 
@@ -742,6 +747,8 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
                                        struct bnxt_qplib_frpl *frpl,
                                        int max_pg_ptrs)
 {
+       struct bnxt_qplib_hwq_attr hwq_attr = {};
+       struct bnxt_qplib_sg_info sginfo = {};
        int pg_ptrs, pages, rc;
 
        /* Re-calculate the max to fit the HWQ allocation model */
@@ -753,10 +760,15 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
        if (pages > MAX_PBL_LVL_1_PGS)
                return -ENOMEM;
 
-       frpl->hwq.max_elements = pages;
-       rc = bnxt_qplib_alloc_init_hwq(res->pdev, &frpl->hwq, NULL,
-                                      &frpl->hwq.max_elements, PAGE_SIZE, 0,
-                                      PAGE_SIZE, HWQ_TYPE_CTX);
+       sginfo.pgsize = PAGE_SIZE;
+       sginfo.nopte = true;
+
+       hwq_attr.res = res;
+       hwq_attr.depth = pg_ptrs;
+       hwq_attr.stride = PAGE_SIZE;
+       hwq_attr.sginfo = &sginfo;
+       hwq_attr.type = HWQ_TYPE_CTX;
+       rc = bnxt_qplib_alloc_init_hwq(&frpl->hwq, &hwq_attr);
        if (!rc)
                frpl->max_pg_ptrs = pg_ptrs;
 
@@ -766,7 +778,7 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
 int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
                                       struct bnxt_qplib_frpl *frpl)
 {
-       bnxt_qplib_free_hwq(res->pdev, &frpl->hwq);
+       bnxt_qplib_free_hwq(res, &frpl->hwq);
        return 0;
 }
 
index 7d06b0f..e8e11bd 100644 (file)
@@ -707,7 +707,7 @@ struct mpa_message {
        u8 flags;
        u8 revision;
        __be16 private_data_size;
-       u8 private_data[0];
+       u8 private_data[];
 };
 
 struct mpa_v2_conn_params {
@@ -719,7 +719,7 @@ struct terminate_message {
        u8 layer_etype;
        u8 ecode;
        __be16 hdrct_rsvd;
-       u8 len_hdrs[0];
+       u8 len_hdrs[];
 };
 
 #define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
index 89ac2f9..ac48012 100644 (file)
@@ -2127,7 +2127,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
        pr_debug("ib_pd %p\n", pd);
 
        if (attrs->qp_type != IB_QPT_RC)
-               return ERR_PTR(-EINVAL);
+               return ERR_PTR(-EOPNOTSUPP);
 
        php = to_c4iw_pd(pd);
        rhp = php->rhp;
index cbdb300..a2f5e29 100644 (file)
@@ -123,7 +123,7 @@ struct fw_ri_dsgl {
        __be32  len0;
        __be64  addr0;
 #ifndef C99_NOT_SUPPORTED
-       struct fw_ri_dsge_pair sge[0];
+       struct fw_ri_dsge_pair sge[];
 #endif
 };
 
@@ -139,7 +139,7 @@ struct fw_ri_isgl {
        __be16  nsge;
        __be32  r2;
 #ifndef C99_NOT_SUPPORTED
-       struct fw_ri_sge sge[0];
+       struct fw_ri_sge sge[];
 #endif
 };
 
@@ -149,7 +149,7 @@ struct fw_ri_immd {
        __be16  r2;
        __be32  immdlen;
 #ifndef C99_NOT_SUPPORTED
-       __u8    data[0];
+       __u8    data[];
 #endif
 };
 
@@ -321,7 +321,7 @@ struct fw_ri_res_wr {
        __be32 len16_pkd;
        __u64  cookie;
 #ifndef C99_NOT_SUPPORTED
-       struct fw_ri_res res[0];
+       struct fw_ri_res res[];
 #endif
 };
 
index 74b787a..96b104a 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
 /*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef _EFA_ADMIN_CMDS_H_
@@ -801,21 +801,16 @@ struct efa_admin_mmio_req_read_less_resp {
 
 /* create_qp_cmd */
 #define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK                BIT(0)
-#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_SHIFT               1
 #define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK                BIT(1)
 
 /* reg_mr_cmd */
 #define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK      GENMASK(4, 0)
-#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT     7
 #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK      BIT(7)
 #define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK        BIT(0)
-#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_SHIFT       2
 #define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK        BIT(2)
 
 /* create_cq_cmd */
-#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5
 #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5)
-#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_SHIFT                  6
 #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK                   BIT(6)
 #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK    GENMASK(4, 0)
 
index c8e0c8b..29d53ed 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
 /*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef _EFA_ADMIN_H_
@@ -121,9 +121,7 @@ struct efa_admin_aenq_entry {
 /* aq_common_desc */
 #define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK            GENMASK(11, 0)
 #define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK                 BIT(0)
-#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT            1
 #define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK             BIT(1)
-#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT   2
 #define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK    BIT(2)
 
 /* acq_common_desc */
index 0778f4f..7fce69f 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
 /*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #include "efa_com.h"
 #define EFA_ASYNC_QUEUE_DEPTH 16
 #define EFA_ADMIN_QUEUE_DEPTH 32
 
-#define MIN_EFA_VER\
-       ((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \
-        (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK))
-
 #define EFA_CTRL_MAJOR          0
 #define EFA_CTRL_MINOR          0
 #define EFA_CTRL_SUB_MINOR      1
 
-#define MIN_EFA_CTRL_VER \
-       (((EFA_CTRL_MAJOR) << \
-       (EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \
-       ((EFA_CTRL_MINOR) << \
-       (EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \
-       (EFA_CTRL_SUB_MINOR))
-
 #define EFA_DMA_ADDR_TO_UINT32_LOW(x)   ((u32)((u64)(x)))
 #define EFA_DMA_ADDR_TO_UINT32_HIGH(x)  ((u32)(((u64)(x)) >> 32))
 
-#define EFA_REGS_ADMIN_INTR_MASK 1
-
 enum efa_cmd_status {
        EFA_CMD_SUBMITTED,
        EFA_CMD_COMPLETED,
@@ -84,7 +71,7 @@ static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
        struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
        struct efa_admin_mmio_req_read_less_resp *read_resp;
        unsigned long exp_time;
-       u32 mmio_read_reg;
+       u32 mmio_read_reg = 0;
        u32 err;
 
        read_resp = mmio_read->read_resp;
@@ -94,10 +81,9 @@ static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
 
        /* trash DMA req_id to identify when hardware is done */
        read_resp->req_id = mmio_read->seq_num + 0x9aL;
-       mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) &
-                       EFA_REGS_MMIO_REG_READ_REG_OFF_MASK;
-       mmio_read_reg |= mmio_read->seq_num &
-                        EFA_REGS_MMIO_REG_READ_REQ_ID_MASK;
+       EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REG_OFF, offset);
+       EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REQ_ID,
+               mmio_read->seq_num);
 
        writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF);
 
@@ -137,9 +123,9 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev)
        struct efa_com_admin_queue *aq = &edev->aq;
        struct efa_com_admin_sq *sq = &aq->sq;
        u16 size = aq->depth * sizeof(*sq->entries);
+       u32 aq_caps = 0;
        u32 addr_high;
        u32 addr_low;
-       u32 aq_caps;
 
        sq->entries =
                dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL);
@@ -160,10 +146,9 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev)
        writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF);
        writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF);
 
-       aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK;
-       aq_caps |= (sizeof(struct efa_admin_aq_entry) <<
-                       EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) &
-                       EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK;
+       EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_DEPTH, aq->depth);
+       EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE,
+               sizeof(struct efa_admin_aq_entry));
 
        writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF);
 
@@ -175,9 +160,9 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev)
        struct efa_com_admin_queue *aq = &edev->aq;
        struct efa_com_admin_cq *cq = &aq->cq;
        u16 size = aq->depth * sizeof(*cq->entries);
+       u32 acq_caps = 0;
        u32 addr_high;
        u32 addr_low;
-       u32 acq_caps;
 
        cq->entries =
                dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL);
@@ -195,13 +180,11 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev)
        writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF);
        writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF);
 
-       acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK;
-       acq_caps |= (sizeof(struct efa_admin_acq_entry) <<
-                       EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) &
-                       EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK;
-       acq_caps |= (aq->msix_vector_idx <<
-                       EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) &
-                       EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK;
+       EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_DEPTH, aq->depth);
+       EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE,
+               sizeof(struct efa_admin_acq_entry));
+       EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR,
+               aq->msix_vector_idx);
 
        writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF);
 
@@ -212,7 +195,8 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
                                   struct efa_aenq_handlers *aenq_handlers)
 {
        struct efa_com_aenq *aenq = &edev->aenq;
-       u32 addr_low, addr_high, aenq_caps;
+       u32 addr_low, addr_high;
+       u32 aenq_caps = 0;
        u16 size;
 
        if (!aenq_handlers) {
@@ -237,13 +221,11 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
        writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF);
        writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF);
 
-       aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
-       aenq_caps |= (sizeof(struct efa_admin_aenq_entry) <<
-               EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
-               EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
-       aenq_caps |= (aenq->msix_vector_idx
-                     << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) &
-                    EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK;
+       EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_DEPTH, aenq->depth);
+       EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE,
+               sizeof(struct efa_admin_aenq_entry));
+       EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR,
+               aenq->msix_vector_idx);
        writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF);
 
        /*
@@ -280,8 +262,8 @@ static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
 static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
                                        struct efa_comp_ctx *comp_ctx)
 {
-       u16 cmd_id = comp_ctx->user_cqe->acq_common_descriptor.command &
-                    EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+       u16 cmd_id = EFA_GET(&comp_ctx->user_cqe->acq_common_descriptor.command,
+                            EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
        u16 ctx_id = cmd_id & (aq->depth - 1);
 
        ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id);
@@ -335,8 +317,8 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
        cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
 
        cmd->aq_common_descriptor.command_id = cmd_id;
-       cmd->aq_common_descriptor.flags |= aq->sq.phase &
-               EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
+       EFA_SET(&cmd->aq_common_descriptor.flags,
+               EFA_ADMIN_AQ_COMMON_DESC_PHASE, aq->sq.phase);
 
        comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true);
        if (!comp_ctx) {
@@ -427,8 +409,8 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a
        struct efa_comp_ctx *comp_ctx;
        u16 cmd_id;
 
-       cmd_id = cqe->acq_common_descriptor.command &
-                EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+       cmd_id = EFA_GET(&cqe->acq_common_descriptor.command,
+                        EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
 
        comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
        if (!comp_ctx) {
@@ -705,7 +687,7 @@ void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling)
        u32 mask_value = 0;
 
        if (polling)
-               mask_value = EFA_REGS_ADMIN_INTR_MASK;
+               EFA_SET(&mask_value, EFA_REGS_INTR_MASK_EN, 1);
 
        writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF);
        if (polling)
@@ -743,7 +725,7 @@ int efa_com_admin_init(struct efa_com_dev *edev,
        int err;
 
        dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
-       if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) {
+       if (!EFA_GET(&dev_sts, EFA_REGS_DEV_STS_READY)) {
                ibdev_err(edev->efa_dev,
                          "Device isn't ready, abort com init %#x\n", dev_sts);
                return -ENODEV;
@@ -778,8 +760,7 @@ int efa_com_admin_init(struct efa_com_dev *edev,
                goto err_destroy_cq;
 
        cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
-       timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
-                 EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
+       timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO);
        if (timeout)
                /* the resolution of timeout reg is 100ms */
                aq->completion_timeout = timeout * 100000;
@@ -940,7 +921,9 @@ void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev)
 
 int efa_com_validate_version(struct efa_com_dev *edev)
 {
+       u32 min_ctrl_ver = 0;
        u32 ctrl_ver_masked;
+       u32 min_ver = 0;
        u32 ctrl_ver;
        u32 ver;
 
@@ -953,33 +936,42 @@ int efa_com_validate_version(struct efa_com_dev *edev)
                                      EFA_REGS_CONTROLLER_VERSION_OFF);
 
        ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n",
-                 (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >>
-                         EFA_REGS_VERSION_MAJOR_VERSION_SHIFT,
-                 ver & EFA_REGS_VERSION_MINOR_VERSION_MASK);
-
-       if (ver < MIN_EFA_VER) {
+                 EFA_GET(&ver, EFA_REGS_VERSION_MAJOR_VERSION),
+                 EFA_GET(&ver, EFA_REGS_VERSION_MINOR_VERSION));
+
+       EFA_SET(&min_ver, EFA_REGS_VERSION_MAJOR_VERSION,
+               EFA_ADMIN_API_VERSION_MAJOR);
+       EFA_SET(&min_ver, EFA_REGS_VERSION_MINOR_VERSION,
+               EFA_ADMIN_API_VERSION_MINOR);
+       if (ver < min_ver) {
                ibdev_err(edev->efa_dev,
                          "EFA version is lower than the minimal version the driver supports\n");
                return -EOPNOTSUPP;
        }
 
-       ibdev_dbg(edev->efa_dev,
-                 "efa controller version: %d.%d.%d implementation version %d\n",
-                 (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
-                         EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
-                 (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
-                         EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT,
-                 (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK),
-                 (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >>
-                         EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT);
+       ibdev_dbg(
+               edev->efa_dev,
+               "efa controller version: %d.%d.%d implementation version %d\n",
+               EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION),
+               EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION),
+               EFA_GET(&ctrl_ver,
+                       EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION),
+               EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_IMPL_ID));
 
        ctrl_ver_masked =
-               (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) |
-               (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) |
-               (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK);
-
+               EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION) |
+               EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION) |
+               EFA_GET(&ctrl_ver,
+                       EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION);
+
+       EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION,
+               EFA_CTRL_MAJOR);
+       EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION,
+               EFA_CTRL_MINOR);
+       EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION,
+               EFA_CTRL_SUB_MINOR);
        /* Validate the ctrl version without the implementation ID */
-       if (ctrl_ver_masked < MIN_EFA_CTRL_VER) {
+       if (ctrl_ver_masked < min_ctrl_ver) {
                ibdev_err(edev->efa_dev,
                          "EFA ctrl version is lower than the minimal ctrl version the driver supports\n");
                return -EOPNOTSUPP;
@@ -1002,8 +994,7 @@ int efa_com_get_dma_width(struct efa_com_dev *edev)
        u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
        int width;
 
-       width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >>
-               EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT;
+       width = EFA_GET(&caps, EFA_REGS_CAPS_DMA_ADDR_WIDTH);
 
        ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width);
 
@@ -1017,16 +1008,14 @@ int efa_com_get_dma_width(struct efa_com_dev *edev)
        return width;
 }
 
-static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout,
-                               u16 exp_state)
+static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, int on)
 {
        u32 val, i;
 
        for (i = 0; i < timeout; i++) {
                val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
 
-               if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) ==
-                   exp_state)
+               if (EFA_GET(&val, EFA_REGS_DEV_STS_RESET_IN_PROGRESS) == on)
                        return 0;
 
                ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val);
@@ -1046,36 +1035,34 @@ static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout,
 int efa_com_dev_reset(struct efa_com_dev *edev,
                      enum efa_regs_reset_reason_types reset_reason)
 {
-       u32 stat, timeout, cap, reset_val;
+       u32 stat, timeout, cap;
+       u32 reset_val = 0;
        int err;
 
        stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
        cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
 
-       if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) {
+       if (!EFA_GET(&stat, EFA_REGS_DEV_STS_READY)) {
                ibdev_err(edev->efa_dev,
                          "Device isn't ready, can't reset device\n");
                return -EINVAL;
        }
 
-       timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >>
-                 EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT;
+       timeout = EFA_GET(&cap, EFA_REGS_CAPS_RESET_TIMEOUT);
        if (!timeout) {
                ibdev_err(edev->efa_dev, "Invalid timeout value\n");
                return -EINVAL;
        }
 
        /* start reset */
-       reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK;
-       reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) &
-                    EFA_REGS_DEV_CTL_RESET_REASON_MASK;
+       EFA_SET(&reset_val, EFA_REGS_DEV_CTL_DEV_RESET, 1);
+       EFA_SET(&reset_val, EFA_REGS_DEV_CTL_RESET_REASON, reset_reason);
        writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
 
        /* reset clears the mmio readless address, restore it */
        efa_com_mmio_reg_read_resp_addr_init(edev);
 
-       err = wait_for_reset_state(edev, timeout,
-                                  EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
+       err = wait_for_reset_state(edev, timeout, 1);
        if (err) {
                ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n");
                return err;
@@ -1089,8 +1076,7 @@ int efa_com_dev_reset(struct efa_com_dev *edev,
                return err;
        }
 
-       timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
-                 EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
+       timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO);
        if (timeout)
                /* the resolution of timeout reg is 100ms */
                edev->aq.completion_timeout = timeout * 100000;
index e20bd84..eea5574 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
 /*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #include "efa_com.h"
@@ -161,8 +161,9 @@ int efa_com_create_cq(struct efa_com_dev *edev,
        int err;
 
        create_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_CQ;
-       create_cmd.cq_caps_2 = (params->entry_size_in_bytes / 4) &
-                               EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK;
+       EFA_SET(&create_cmd.cq_caps_2,
+               EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS,
+               params->entry_size_in_bytes / 4);
        create_cmd.cq_depth = params->cq_depth;
        create_cmd.num_sub_cqs = params->num_sub_cqs;
        create_cmd.uar = params->uarn;
@@ -227,8 +228,8 @@ int efa_com_register_mr(struct efa_com_dev *edev,
        mr_cmd.aq_common_desc.opcode = EFA_ADMIN_REG_MR;
        mr_cmd.pd = params->pd;
        mr_cmd.mr_length = params->mr_length_in_bytes;
-       mr_cmd.flags |= params->page_shift &
-               EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK;
+       EFA_SET(&mr_cmd.flags, EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT,
+               params->page_shift);
        mr_cmd.iova = params->iova;
        mr_cmd.permissions = params->permissions;
 
@@ -242,11 +243,11 @@ int efa_com_register_mr(struct efa_com_dev *edev,
                        params->pbl.pbl.address.mem_addr_low;
                mr_cmd.pbl.pbl.address.mem_addr_high =
                        params->pbl.pbl.address.mem_addr_high;
-               mr_cmd.aq_common_desc.flags |=
-                       EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK;
+               EFA_SET(&mr_cmd.aq_common_desc.flags,
+                       EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1);
                if (params->indirect)
-                       mr_cmd.aq_common_desc.flags |=
-                               EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+                       EFA_SET(&mr_cmd.aq_common_desc.flags,
+                               EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT, 1);
        }
 
        err = efa_com_cmd_exec(aq,
@@ -386,9 +387,8 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev,
        get_cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_FEATURE;
 
        if (control_buff_size)
-               get_cmd.aq_common_descriptor.flags =
-                       EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
-
+               EFA_SET(&get_cmd.aq_common_descriptor.flags,
+                       EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT, 1);
 
        efa_com_set_dma_addr(control_buf_dma_addr,
                             &get_cmd.control_buffer.address.mem_addr_high,
@@ -538,8 +538,9 @@ static int efa_com_set_feature_ex(struct efa_com_dev *edev,
 
        set_cmd->aq_common_descriptor.opcode = EFA_ADMIN_SET_FEATURE;
        if (control_buff_size) {
-               set_cmd->aq_common_descriptor.flags =
-                       EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+               set_cmd->aq_common_descriptor.flags = 0;
+               EFA_SET(&set_cmd->aq_common_descriptor.flags,
+                       EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT, 1);
                efa_com_set_dma_addr(control_buf_dma_addr,
                                     &set_cmd->control_buffer.address.mem_addr_high,
                                     &set_cmd->control_buffer.address.mem_addr_low);
index c559ec0..90af1c8 100644 (file)
@@ -1,14 +1,25 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
 /*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef _EFA_COMMON_H_
 #define _EFA_COMMON_H_
 
+#include <linux/bitfield.h>
+
 #define EFA_COMMON_SPEC_VERSION_MAJOR        2
 #define EFA_COMMON_SPEC_VERSION_MINOR        0
 
+#define EFA_GET(ptr, mask) FIELD_GET(mask##_MASK, *(ptr))
+
+#define EFA_SET(ptr, mask, value)                                              \
+       ({                                                                     \
+               typeof(ptr) _ptr = ptr;                                        \
+               *_ptr = (*_ptr & ~(mask##_MASK)) |                             \
+                       FIELD_PREP(mask##_MASK, value);                        \
+       })
+
 struct efa_common_mem_addr {
        u32 mem_addr_low;
 
index bb9cad3..4017982 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
 /*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef _EFA_REGS_H_
@@ -45,69 +45,52 @@ enum efa_regs_reset_reason_types {
 
 /* version register */
 #define EFA_REGS_VERSION_MINOR_VERSION_MASK                 0xff
-#define EFA_REGS_VERSION_MAJOR_VERSION_SHIFT                8
 #define EFA_REGS_VERSION_MAJOR_VERSION_MASK                 0xff00
 
 /* controller_version register */
 #define EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK   0xff
-#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT     8
 #define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK      0xff00
-#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT     16
 #define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK      0xff0000
-#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT           24
 #define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK            0xff000000
 
 /* caps register */
 #define EFA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK        0x1
-#define EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT                   1
 #define EFA_REGS_CAPS_RESET_TIMEOUT_MASK                    0x3e
-#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT                  8
 #define EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK                   0xff00
-#define EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT                    16
 #define EFA_REGS_CAPS_ADMIN_CMD_TO_MASK                     0xf0000
 
 /* aq_caps register */
 #define EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK                      0xffff
-#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT                16
 #define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK                 0xffff0000
 
 /* acq_caps register */
 #define EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK                    0xffff
-#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT              16
 #define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK               0xff0000
-#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT             24
 #define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK              0xff000000
 
 /* aenq_caps register */
 #define EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK                  0xffff
-#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT            16
 #define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK             0xff0000
-#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT           24
 #define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK            0xff000000
 
+/* intr_mask register */
+#define EFA_REGS_INTR_MASK_EN_MASK                          0x1
+
 /* dev_ctl register */
 #define EFA_REGS_DEV_CTL_DEV_RESET_MASK                     0x1
-#define EFA_REGS_DEV_CTL_AQ_RESTART_SHIFT                   1
 #define EFA_REGS_DEV_CTL_AQ_RESTART_MASK                    0x2
-#define EFA_REGS_DEV_CTL_RESET_REASON_SHIFT                 28
 #define EFA_REGS_DEV_CTL_RESET_REASON_MASK                  0xf0000000
 
 /* dev_sts register */
 #define EFA_REGS_DEV_STS_READY_MASK                         0x1
-#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT       1
 #define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK        0x2
-#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT          2
 #define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK           0x4
-#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT            3
 #define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK             0x8
-#define EFA_REGS_DEV_STS_RESET_FINISHED_SHIFT               4
 #define EFA_REGS_DEV_STS_RESET_FINISHED_MASK                0x10
-#define EFA_REGS_DEV_STS_FATAL_ERROR_SHIFT                  5
 #define EFA_REGS_DEV_STS_FATAL_ERROR_MASK                   0x20
 
 /* mmio_reg_read register */
 #define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK                  0xffff
-#define EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT                16
 #define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK                 0xffff0000
 
 #endif /* _EFA_REGS_H_ */
index ec55458..5c57098 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #include <linux/vmalloc.h>
@@ -144,9 +144,6 @@ static inline bool is_rdma_read_cap(struct efa_dev *dev)
        return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK;
 }
 
-#define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
-                                sizeof_field(typeof(x), fld) <= (sz))
-
 #define is_reserved_cleared(reserved) \
        !memchr_inv(reserved, 0, sizeof(reserved))
 
@@ -169,6 +166,14 @@ static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
        return addr;
 }
 
+static void efa_free_mapped(struct efa_dev *dev, void *cpu_addr,
+                           dma_addr_t dma_addr,
+                           size_t size, enum dma_data_direction dir)
+{
+       dma_unmap_single(&dev->pdev->dev, dma_addr, size, dir);
+       free_pages_exact(cpu_addr, size);
+}
+
 int efa_query_device(struct ib_device *ibdev,
                     struct ib_device_attr *props,
                     struct ib_udata *udata)
@@ -402,6 +407,9 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
        int err;
 
        ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
+
+       efa_qp_user_mmap_entries_remove(qp);
+
        err = efa_destroy_qp_handle(dev, qp->qp_handle);
        if (err)
                return err;
@@ -411,11 +419,10 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
                          "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
                          qp->rq_cpu_addr, qp->rq_size,
                          &qp->rq_dma_addr);
-               dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
-                                DMA_TO_DEVICE);
+               efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
+                               qp->rq_size, DMA_TO_DEVICE);
        }
 
-       efa_qp_user_mmap_entries_remove(qp);
        kfree(qp);
        return 0;
 }
@@ -599,7 +606,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
        if (err)
                goto err_out;
 
-       if (!field_avail(cmd, driver_qp_type, udata->inlen)) {
+       if (offsetofend(typeof(cmd), driver_qp_type) > udata->inlen) {
                ibdev_dbg(&dev->ibdev,
                          "Incompatible ABI params, no input udata\n");
                err = -EINVAL;
@@ -720,13 +727,9 @@ err_remove_mmap_entries:
 err_destroy_qp:
        efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
 err_free_mapped:
-       if (qp->rq_size) {
-               dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
-                                DMA_TO_DEVICE);
-
-               if (!qp->rq_mmap_entry)
-                       free_pages_exact(qp->rq_cpu_addr, qp->rq_size);
-       }
+       if (qp->rq_size)
+               efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
+                               qp->rq_size, DMA_TO_DEVICE);
 err_free_qp:
        kfree(qp);
 err_out:
@@ -845,10 +848,10 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
                  "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
                  cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
 
-       efa_destroy_cq_idx(dev, cq->cq_idx);
-       dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
-                        DMA_FROM_DEVICE);
        rdma_user_mmap_entry_remove(cq->mmap_entry);
+       efa_destroy_cq_idx(dev, cq->cq_idx);
+       efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
+                       DMA_FROM_DEVICE);
 }
 
 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
@@ -890,7 +893,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                goto err_out;
        }
 
-       if (!field_avail(cmd, num_sub_cqs, udata->inlen)) {
+       if (offsetofend(typeof(cmd), num_sub_cqs) > udata->inlen) {
                ibdev_dbg(ibdev,
                          "Incompatible ABI params, no input udata\n");
                err = -EINVAL;
@@ -985,10 +988,8 @@ err_remove_mmap:
 err_destroy_cq:
        efa_destroy_cq_idx(dev, cq->cq_idx);
 err_free_mapped:
-       dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
-                        DMA_FROM_DEVICE);
-       if (!cq->mmap_entry)
-               free_pages_exact(cq->cpu_addr, cq->size);
+       efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
+                       DMA_FROM_DEVICE);
 
 err_out:
        atomic64_inc(&dev->stats.sw_stats.create_cq_err);
@@ -1550,10 +1551,6 @@ void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
 {
        struct efa_user_mmap_entry *entry = to_emmap(rdma_entry);
 
-       /* DMA mapping is already gone, now free the pages */
-       if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
-               free_pages_exact(phys_to_virt(entry->address),
-                                entry->rdma_entry.npages * PAGE_SIZE);
        kfree(entry);
 }
 
index 986c121..0dfbcfb 100644 (file)
@@ -222,11 +222,11 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
        while (bit < bitsize) {
                zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
                if (zero - 1 != bit)
-                       size += snprintf(data + size,
+                       size += scnprintf(data + size,
                                         datalen - size - 1,
                                         "0x%lx-0x%lx,", bit, zero - 1);
                else
-                       size += snprintf(data + size,
+                       size += scnprintf(data + size,
                                         datalen - size - 1, "0x%lx,",
                                         bit);
                bit = find_next_bit(fault->opcodes, bitsize, zero);
index 2591158..e7fdd70 100644 (file)
@@ -209,7 +209,6 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
        fd->mm = current->mm;
        mmgrab(fd->mm);
        fd->dd = dd;
-       kobject_get(&fd->dd->kobj);
        fp->private_data = fd;
        return 0;
 nomem:
@@ -713,7 +712,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
        deallocate_ctxt(uctxt);
 done:
        mmdrop(fdata->mm);
-       kobject_put(&dd->kobj);
 
        if (atomic_dec_and_test(&dd->user_refcount))
                complete(&dd->user_comp);
@@ -1696,7 +1694,7 @@ static int user_add(struct hfi1_devdata *dd)
        snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit);
        ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops,
                             &dd->user_cdev, &dd->user_device,
-                            true, &dd->kobj);
+                            true, &dd->verbs_dev.rdi.ibdev.dev.kobj);
        if (ret)
                user_remove(dd);
 
index cae12f4..b06c259 100644 (file)
@@ -1413,8 +1413,6 @@ struct hfi1_devdata {
        bool aspm_enabled;      /* ASPM state: enabled/disabled */
        struct rhashtable *sdma_rht;
 
-       struct kobject kobj;
-
        /* vnic data */
        struct hfi1_vnic_data vnic;
        /* Lock to protect IRQ SRC register access */
index e3acda7..3759d92 100644 (file)
@@ -1198,13 +1198,13 @@ static void finalize_asic_data(struct hfi1_devdata *dd,
 }
 
 /**
- * hfi1_clean_devdata - cleans up per-unit data structure
+ * hfi1_free_devdata - cleans up and frees per-unit data structure
  * @dd: pointer to a valid devdata structure
  *
- * It cleans up all data structures set up by
+ * It cleans up and frees all data structures set up by
  * by hfi1_alloc_devdata().
  */
-static void hfi1_clean_devdata(struct hfi1_devdata *dd)
+void hfi1_free_devdata(struct hfi1_devdata *dd)
 {
        struct hfi1_asic_data *ad;
        unsigned long flags;
@@ -1231,23 +1231,6 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd)
        rvt_dealloc_device(&dd->verbs_dev.rdi);
 }
 
-static void __hfi1_free_devdata(struct kobject *kobj)
-{
-       struct hfi1_devdata *dd =
-               container_of(kobj, struct hfi1_devdata, kobj);
-
-       hfi1_clean_devdata(dd);
-}
-
-static struct kobj_type hfi1_devdata_type = {
-       .release = __hfi1_free_devdata,
-};
-
-void hfi1_free_devdata(struct hfi1_devdata *dd)
-{
-       kobject_put(&dd->kobj);
-}
-
 /**
  * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
  * @pdev: Valid PCI device
@@ -1333,11 +1316,10 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
                goto bail;
        }
 
-       kobject_init(&dd->kobj, &hfi1_devdata_type);
        return dd;
 
 bail:
-       hfi1_clean_devdata(dd);
+       hfi1_free_devdata(dd);
        return ERR_PTR(ret);
 }
 
index a51bcd2..7073f23 100644 (file)
@@ -2381,7 +2381,7 @@ struct opa_port_status_rsp {
                __be64 port_vl_rcv_bubble;
                __be64 port_vl_mark_fecn;
                __be64 port_vl_xmit_discards;
-       } vls[0]; /* real array size defined by # bits set in vl_select_mask */
+       } vls[]; /* real array size defined by # bits set in vl_select_mask */
 };
 
 enum counter_selects {
@@ -2423,7 +2423,7 @@ struct opa_aggregate {
        __be16 attr_id;
        __be16 err_reqlength;   /* 1 bit, 8 res, 7 bit */
        __be32 attr_mod;
-       u8 data[0];
+       u8 data[];
 };
 
 #define MSK_LLI 0x000000f0
index 2f48e69..889e63d 100644 (file)
@@ -165,7 +165,7 @@ struct opa_mad_notice_attr {
                } __packed ntc_2048;
 
        };
-       u8      class_data[0];
+       u8      class_data[];
 };
 
 #define IB_VLARB_LOWPRI_0_31    1
index c9a58b6..0102262 100644 (file)
@@ -243,7 +243,7 @@ struct sc_config_sizes {
  */
 struct pio_map_elem {
        u32 mask;
-       struct send_context *ksc[0];
+       struct send_context *ksc[];
 };
 
 /*
@@ -263,7 +263,7 @@ struct pio_vl_map {
        u32 mask;
        u8 actual_vls;
        u8 vls;
-       struct pio_map_elem *map[0];
+       struct pio_map_elem *map[];
 };
 
 int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls,
index a515256..c93ea02 100644 (file)
@@ -833,7 +833,7 @@ struct sdma_engine *sdma_select_engine_sc(
 struct sdma_rht_map_elem {
        u32 mask;
        u8 ctr;
-       struct sdma_engine *sde[0];
+       struct sdma_engine *sde[];
 };
 
 struct sdma_rht_node {
index 1e2e40f..7a85119 100644 (file)
@@ -1002,7 +1002,7 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
  */
 struct sdma_map_elem {
        u32 mask;
-       struct sdma_engine *sde[0];
+       struct sdma_engine *sde[];
 };
 
 /**
@@ -1024,7 +1024,7 @@ struct sdma_vl_map {
        u32 mask;
        u8 actual_vls;
        u8 vls;
-       struct sdma_map_elem *map[0];
+       struct sdma_map_elem *map[];
 };
 
 int sdma_map_init(
index 90f62c4..074ec71 100644 (file)
@@ -674,7 +674,11 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
                dd_dev_err(dd,
                           "Skipping sc2vl sysfs info, (err %d) port %u\n",
                           ret, port_num);
-               goto bail;
+               /*
+                * Based on the documentation for kobject_init_and_add(), the
+                * caller should call kobject_put even if this call fails.
+                */
+               goto bail_sc2vl;
        }
        kobject_uevent(&ppd->sc2vl_kobj, KOBJ_ADD);
 
@@ -684,7 +688,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
                dd_dev_err(dd,
                           "Skipping sl2sc sysfs info, (err %d) port %u\n",
                           ret, port_num);
-               goto bail_sc2vl;
+               goto bail_sl2sc;
        }
        kobject_uevent(&ppd->sl2sc_kobj, KOBJ_ADD);
 
@@ -694,7 +698,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
                dd_dev_err(dd,
                           "Skipping vl2mtu sysfs info, (err %d) port %u\n",
                           ret, port_num);
-               goto bail_sl2sc;
+               goto bail_vl2mtu;
        }
        kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD);
 
@@ -704,7 +708,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
                dd_dev_err(dd,
                           "Skipping Congestion Control sysfs info, (err %d) port %u\n",
                           ret, port_num);
-               goto bail_vl2mtu;
+               goto bail_cc;
        }
 
        kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
@@ -742,7 +746,6 @@ bail_sl2sc:
        kobject_put(&ppd->sl2sc_kobj);
 bail_sc2vl:
        kobject_put(&ppd->sc2vl_kobj);
-bail:
        return ret;
 }
 
@@ -853,8 +856,13 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
 
        return 0;
 bail:
-       for (i = 0; i < dd->num_sdma; i++)
-               kobject_del(&dd->per_sdma[i].kobj);
+       /*
+        * The function kobject_put() will call kobject_del() if the kobject
+        * has been added successfully. The sysfs files created under the
+        * kobject directory will also be removed during the process.
+        */
+       for (; i >= 0; i--)
+               kobject_put(&dd->per_sdma[i].kobj);
 
        return ret;
 }
@@ -867,6 +875,10 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd)
        struct hfi1_pportdata *ppd;
        int i;
 
+       /* Unwind operations in hfi1_verbs_register_sysfs() */
+       for (i = 0; i < dd->num_sdma; i++)
+               kobject_put(&dd->per_sdma[i].kobj);
+
        for (i = 0; i < dd->num_pports; i++) {
                ppd = &dd->pport[i];
 
index 6257eee..332abb4 100644 (file)
@@ -73,7 +73,7 @@ struct tid_rb_node {
        dma_addr_t dma_addr;
        bool freed;
        unsigned int npages;
-       struct page *pages[0];
+       struct page *pages[];
 };
 
 static inline int num_user_pages(unsigned long addr,
index 5ffe4c9..5bfb52f 100644 (file)
@@ -257,8 +257,8 @@ static int create_user_cq(struct hns_roce_dev *hr_dev,
                return ret;
        }
 
-       if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
-           (udata->outlen >= sizeof(*resp))) {
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB &&
+           udata->outlen >= offsetofend(typeof(*resp), cap_flags)) {
                ret = hns_roce_db_map_user(context, udata, ucmd.db_addr,
                                           &hr_cq->db);
                if (ret) {
@@ -321,8 +321,8 @@ static void destroy_user_cq(struct hns_roce_dev *hr_dev,
        struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
                                   udata, struct hns_roce_ucontext, ibucontext);
 
-       if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
-           (udata->outlen >= sizeof(*resp)))
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB &&
+           udata->outlen >= offsetofend(typeof(*resp), cap_flags))
                hns_roce_db_unmap_user(context, &hr_cq->db);
 
        hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
index a7c4ff9..f6b3cf6 100644 (file)
@@ -641,6 +641,19 @@ struct hns_roce_rinl_buf {
        u32                      wqe_cnt;
 };
 
+enum {
+       HNS_ROCE_FLUSH_FLAG = 0,
+};
+
+struct hns_roce_work {
+       struct hns_roce_dev *hr_dev;
+       struct work_struct work;
+       u32 qpn;
+       u32 cqn;
+       int event_type;
+       int sub_type;
+};
+
 struct hns_roce_qp {
        struct ib_qp            ibqp;
        struct hns_roce_buf     hr_buf;
@@ -656,11 +669,6 @@ struct hns_roce_qp {
        struct ib_umem          *umem;
        struct hns_roce_mtt     mtt;
        struct hns_roce_mtr     mtr;
-
-       /* this define must less than HNS_ROCE_MAX_BT_REGION */
-#define HNS_ROCE_WQE_REGION_MAX         3
-       struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX];
-       int                     region_cnt;
        int                     wqe_bt_pg_shift;
 
        u32                     buff_size;
@@ -684,6 +692,9 @@ struct hns_roce_qp {
        struct hns_roce_sge     sge;
        u32                     next_sge;
 
+       /* 0: flush needed, 1: unneeded */
+       unsigned long           flush_flag;
+       struct hns_roce_work    flush_work;
        struct hns_roce_rinl_buf rq_inl_buf;
        struct list_head        node;           /* all qps are on a list */
        struct list_head        rq_node;        /* all recv qps are on a list */
@@ -762,14 +773,8 @@ struct hns_roce_eq {
        int                             eqe_ba_pg_sz;
        int                             eqe_buf_pg_sz;
        int                             hop_num;
-       u64                             *bt_l0; /* Base address table for L0 */
-       u64                             **bt_l1; /* Base address table for L1 */
-       u64                             **buf;
-       dma_addr_t                      l0_dma;
-       dma_addr_t                      *l1_dma;
-       dma_addr_t                      *buf_dma;
-       u32                             l0_last_num; /* L0 last chunk num */
-       u32                             l1_last_num; /* L1 last chunk num */
+       struct hns_roce_mtr             mtr;
+       struct hns_roce_buf             buf;
        int                             eq_max_cnt;
        int                             eq_period;
        int                             shift;
@@ -881,7 +886,7 @@ struct hns_roce_caps {
        u32             cqc_timer_ba_pg_sz;
        u32             cqc_timer_buf_pg_sz;
        u32             cqc_timer_hop_num;
-       u32             cqe_ba_pg_sz;
+       u32             cqe_ba_pg_sz;   /* page_size = 4K*(2^cqe_ba_pg_sz) */
        u32             cqe_buf_pg_sz;
        u32             cqe_hop_num;
        u32             srqwqe_ba_pg_sz;
@@ -906,15 +911,6 @@ struct hns_roce_caps {
        u16             default_ceq_arm_st;
 };
 
-struct hns_roce_work {
-       struct hns_roce_dev *hr_dev;
-       struct work_struct work;
-       u32 qpn;
-       u32 cqn;
-       int event_type;
-       int sub_type;
-};
-
 struct hns_roce_dfx_hw {
        int (*query_cqc_info)(struct hns_roce_dev *hr_dev, u32 cqn,
                              int *buffer);
@@ -1237,9 +1233,10 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
                                 struct ib_udata *udata);
 int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                       int attr_mask, struct ib_udata *udata);
-void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n);
-void *get_send_wqe(struct hns_roce_qp *hr_qp, int n);
-void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n);
+void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
+void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n);
+void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n);
+void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n);
 bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
                          struct ib_cq *ib_cq);
 enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state);
@@ -1248,9 +1245,8 @@ void hns_roce_lock_cqs(struct hns_roce_cq *send_cq,
 void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
                         struct hns_roce_cq *recv_cq);
 void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
-void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
-void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
-                              int cnt);
+void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+                        struct ib_udata *udata);
 __be32 send_ieth(const struct ib_send_wr *wr);
 int to_hr_qp_type(int qp_type);
 
index e822157..263338b 100644 (file)
 #define DMA_ADDR_T_SHIFT               12
 #define BT_BA_SHIFT                    32
 
+#define HEM_INDEX_BUF                  BIT(0)
+#define HEM_INDEX_L0                   BIT(1)
+#define HEM_INDEX_L1                   BIT(2)
+struct hns_roce_hem_index {
+       u64 buf;
+       u64 l0;
+       u64 l1;
+       u32 inited; /* indicate which index is available */
+};
+
 bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
 {
        int hop_num = 0;
@@ -84,25 +94,27 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
        return hop_num ? true : false;
 }
 
-static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 start_idx,
-                           u32 bt_chunk_num, u64 hem_max_num)
+static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 hem_idx,
+                                   u32 bt_chunk_num, u64 hem_max_num)
 {
+       u64 start_idx = round_down(hem_idx, bt_chunk_num);
        u64 check_max_num = start_idx + bt_chunk_num;
        u64 i;
 
        for (i = start_idx; (i < check_max_num) && (i < hem_max_num); i++)
-               if (hem[i])
+               if (i != hem_idx && hem[i])
                        return false;
 
        return true;
 }
 
-static bool hns_roce_check_bt_null(u64 **bt, u64 start_idx, u32 bt_chunk_num)
+static bool hns_roce_check_bt_null(u64 **bt, u64 ba_idx, u32 bt_chunk_num)
 {
+       u64 start_idx = round_down(ba_idx, bt_chunk_num);
        int i;
 
        for (i = 0; i < bt_chunk_num; i++)
-               if (bt[start_idx + i])
+               if (i != ba_idx && bt[start_idx + i])
                        return false;
 
        return true;
@@ -434,178 +446,235 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
        return ret;
 }
 
-static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
-                                  struct hns_roce_hem_table *table,
-                                  unsigned long obj)
+static int calc_hem_config(struct hns_roce_dev *hr_dev,
+                          struct hns_roce_hem_table *table, unsigned long obj,
+                          struct hns_roce_hem_mhop *mhop,
+                          struct hns_roce_hem_index *index)
 {
-       struct device *dev = hr_dev->dev;
-       struct hns_roce_hem_mhop mhop;
-       struct hns_roce_hem_iter iter;
-       u32 buf_chunk_size;
-       u32 bt_chunk_size;
+       struct ib_device *ibdev = &hr_dev->ib_dev;
+       unsigned long mhop_obj = obj;
+       u32 l0_idx, l1_idx, l2_idx;
        u32 chunk_ba_num;
-       u32 hop_num;
-       u32 size;
        u32 bt_num;
-       u64 hem_idx;
-       u64 bt_l1_idx = 0;
-       u64 bt_l0_idx = 0;
-       u64 bt_ba;
-       unsigned long mhop_obj = obj;
-       int bt_l1_allocated = 0;
-       int bt_l0_allocated = 0;
-       int step_idx;
        int ret;
 
-       ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop);
+       ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, mhop);
        if (ret)
                return ret;
 
-       buf_chunk_size = mhop.buf_chunk_size;
-       bt_chunk_size = mhop.bt_chunk_size;
-       hop_num = mhop.hop_num;
-       chunk_ba_num = bt_chunk_size / BA_BYTE_LEN;
-
-       bt_num = hns_roce_get_bt_num(table->type, hop_num);
+       l0_idx = mhop->l0_idx;
+       l1_idx = mhop->l1_idx;
+       l2_idx = mhop->l2_idx;
+       chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
+       bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num);
        switch (bt_num) {
        case 3:
-               hem_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num +
-                         mhop.l1_idx * chunk_ba_num + mhop.l2_idx;
-               bt_l1_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx;
-               bt_l0_idx = mhop.l0_idx;
+               index->l1 = l0_idx * chunk_ba_num + l1_idx;
+               index->l0 = l0_idx;
+               index->buf = l0_idx * chunk_ba_num * chunk_ba_num +
+                            l1_idx * chunk_ba_num + l2_idx;
                break;
        case 2:
-               hem_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx;
-               bt_l0_idx = mhop.l0_idx;
+               index->l0 = l0_idx;
+               index->buf = l0_idx * chunk_ba_num + l1_idx;
                break;
        case 1:
-               hem_idx = mhop.l0_idx;
+               index->buf = l0_idx;
                break;
        default:
-               dev_err(dev, "Table %d not support hop_num = %d!\n",
-                            table->type, hop_num);
+               ibdev_err(ibdev, "Table %d not support mhop.hop_num = %d!\n",
+                         table->type, mhop->hop_num);
                return -EINVAL;
        }
 
-       if (unlikely(hem_idx >= table->num_hem)) {
-               dev_err(dev, "Table %d exceed hem limt idx = %llu,max = %lu!\n",
-                            table->type, hem_idx, table->num_hem);
+       if (unlikely(index->buf >= table->num_hem)) {
+               ibdev_err(ibdev, "Table %d exceed hem limt idx %llu,max %lu!\n",
+                         table->type, index->buf, table->num_hem);
                return -EINVAL;
        }
 
-       mutex_lock(&table->mutex);
+       return 0;
+}
 
-       if (table->hem[hem_idx]) {
-               ++table->hem[hem_idx]->refcount;
-               goto out;
+static void free_mhop_hem(struct hns_roce_dev *hr_dev,
+                         struct hns_roce_hem_table *table,
+                         struct hns_roce_hem_mhop *mhop,
+                         struct hns_roce_hem_index *index)
+{
+       u32 bt_size = mhop->bt_chunk_size;
+       struct device *dev = hr_dev->dev;
+
+       if (index->inited & HEM_INDEX_BUF) {
+               hns_roce_free_hem(hr_dev, table->hem[index->buf]);
+               table->hem[index->buf] = NULL;
+       }
+
+       if (index->inited & HEM_INDEX_L1) {
+               dma_free_coherent(dev, bt_size, table->bt_l1[index->l1],
+                                 table->bt_l1_dma_addr[index->l1]);
+               table->bt_l1[index->l1] = NULL;
        }
 
+       if (index->inited & HEM_INDEX_L0) {
+               dma_free_coherent(dev, bt_size, table->bt_l0[index->l0],
+                                 table->bt_l0_dma_addr[index->l0]);
+               table->bt_l0[index->l0] = NULL;
+       }
+}
+
+static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
+                         struct hns_roce_hem_table *table,
+                         struct hns_roce_hem_mhop *mhop,
+                         struct hns_roce_hem_index *index)
+{
+       u32 bt_size = mhop->bt_chunk_size;
+       struct device *dev = hr_dev->dev;
+       struct hns_roce_hem_iter iter;
+       gfp_t flag;
+       u64 bt_ba;
+       u32 size;
+       int ret;
+
        /* alloc L1 BA's chunk */
-       if ((check_whether_bt_num_3(table->type, hop_num) ||
-               check_whether_bt_num_2(table->type, hop_num)) &&
-               !table->bt_l0[bt_l0_idx]) {
-               table->bt_l0[bt_l0_idx] = dma_alloc_coherent(dev, bt_chunk_size,
-                                           &(table->bt_l0_dma_addr[bt_l0_idx]),
+       if ((check_whether_bt_num_3(table->type, mhop->hop_num) ||
+            check_whether_bt_num_2(table->type, mhop->hop_num)) &&
+            !table->bt_l0[index->l0]) {
+               table->bt_l0[index->l0] = dma_alloc_coherent(dev, bt_size,
+                                           &table->bt_l0_dma_addr[index->l0],
                                            GFP_KERNEL);
-               if (!table->bt_l0[bt_l0_idx]) {
+               if (!table->bt_l0[index->l0]) {
                        ret = -ENOMEM;
                        goto out;
                }
-               bt_l0_allocated = 1;
-
-               /* set base address to hardware */
-               if (table->type < HEM_TYPE_MTT) {
-                       step_idx = 0;
-                       if (hr_dev->hw->set_hem(hr_dev, table, obj, step_idx)) {
-                               ret = -ENODEV;
-                               dev_err(dev, "set HEM base address to HW failed!\n");
-                               goto err_dma_alloc_l1;
-                       }
-               }
+               index->inited |= HEM_INDEX_L0;
        }
 
        /* alloc L2 BA's chunk */
-       if (check_whether_bt_num_3(table->type, hop_num) &&
-           !table->bt_l1[bt_l1_idx])  {
-               table->bt_l1[bt_l1_idx] = dma_alloc_coherent(dev, bt_chunk_size,
-                                           &(table->bt_l1_dma_addr[bt_l1_idx]),
+       if (check_whether_bt_num_3(table->type, mhop->hop_num) &&
+           !table->bt_l1[index->l1])  {
+               table->bt_l1[index->l1] = dma_alloc_coherent(dev, bt_size,
+                                           &table->bt_l1_dma_addr[index->l1],
                                            GFP_KERNEL);
-               if (!table->bt_l1[bt_l1_idx]) {
+               if (!table->bt_l1[index->l1]) {
                        ret = -ENOMEM;
-                       goto err_dma_alloc_l1;
-               }
-               bt_l1_allocated = 1;
-               *(table->bt_l0[bt_l0_idx] + mhop.l1_idx) =
-                                              table->bt_l1_dma_addr[bt_l1_idx];
-
-               /* set base address to hardware */
-               step_idx = 1;
-               if (hr_dev->hw->set_hem(hr_dev, table, obj, step_idx)) {
-                       ret = -ENODEV;
-                       dev_err(dev, "set HEM base address to HW failed!\n");
-                       goto err_alloc_hem_buf;
+                       goto err_alloc_hem;
                }
+               index->inited |= HEM_INDEX_L1;
+               *(table->bt_l0[index->l0] + mhop->l1_idx) =
+                                              table->bt_l1_dma_addr[index->l1];
        }
 
        /*
         * alloc buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC.
         * alloc bt space chunk for MTT/CQE.
         */
-       size = table->type < HEM_TYPE_MTT ? buf_chunk_size : bt_chunk_size;
-       table->hem[hem_idx] = hns_roce_alloc_hem(hr_dev,
-                                               size >> PAGE_SHIFT,
-                                               size,
-                                               (table->lowmem ? GFP_KERNEL :
-                                               GFP_HIGHUSER) | __GFP_NOWARN);
-       if (!table->hem[hem_idx]) {
+       size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size;
+       flag = (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN;
+       table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size >> PAGE_SHIFT,
+                                                   size, flag);
+       if (!table->hem[index->buf]) {
                ret = -ENOMEM;
-               goto err_alloc_hem_buf;
+               goto err_alloc_hem;
        }
 
-       hns_roce_hem_first(table->hem[hem_idx], &iter);
+       index->inited |= HEM_INDEX_BUF;
+       hns_roce_hem_first(table->hem[index->buf], &iter);
        bt_ba = hns_roce_hem_addr(&iter);
-
        if (table->type < HEM_TYPE_MTT) {
-               if (hop_num == 2) {
-                       *(table->bt_l1[bt_l1_idx] + mhop.l2_idx) = bt_ba;
-                       step_idx = 2;
-               } else if (hop_num == 1) {
-                       *(table->bt_l0[bt_l0_idx] + mhop.l1_idx) = bt_ba;
-                       step_idx = 1;
-               } else if (hop_num == HNS_ROCE_HOP_NUM_0) {
-                       step_idx = 0;
-               } else {
-                       ret = -EINVAL;
-                       goto err_dma_alloc_l1;
+               if (mhop->hop_num == 2)
+                       *(table->bt_l1[index->l1] + mhop->l2_idx) = bt_ba;
+               else if (mhop->hop_num == 1)
+                       *(table->bt_l0[index->l0] + mhop->l1_idx) = bt_ba;
+       } else if (mhop->hop_num == 2) {
+               *(table->bt_l0[index->l0] + mhop->l1_idx) = bt_ba;
+       }
+
+       return 0;
+err_alloc_hem:
+       free_mhop_hem(hr_dev, table, mhop, index);
+out:
+       return ret;
+}
+
+static int set_mhop_hem(struct hns_roce_dev *hr_dev,
+                       struct hns_roce_hem_table *table, unsigned long obj,
+                       struct hns_roce_hem_mhop *mhop,
+                       struct hns_roce_hem_index *index)
+{
+       struct ib_device *ibdev = &hr_dev->ib_dev;
+       int step_idx;
+       int ret = 0;
+
+       if (index->inited & HEM_INDEX_L0) {
+               ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0);
+               if (ret) {
+                       ibdev_err(ibdev, "set HEM step 0 failed!\n");
+                       goto out;
                }
+       }
 
-               /* set HEM base address to hardware */
-               if (hr_dev->hw->set_hem(hr_dev, table, obj, step_idx)) {
-                       ret = -ENODEV;
-                       dev_err(dev, "set HEM base address to HW failed!\n");
-                       goto err_alloc_hem_buf;
+       if (index->inited & HEM_INDEX_L1) {
+               ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1);
+               if (ret) {
+                       ibdev_err(ibdev, "set HEM step 1 failed!\n");
+                       goto out;
                }
-       } else if (hop_num == 2) {
-               *(table->bt_l0[bt_l0_idx] + mhop.l1_idx) = bt_ba;
        }
 
-       ++table->hem[hem_idx]->refcount;
-       goto out;
+       if (index->inited & HEM_INDEX_BUF) {
+               if (mhop->hop_num == HNS_ROCE_HOP_NUM_0)
+                       step_idx = 0;
+               else
+                       step_idx = mhop->hop_num;
+               ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx);
+               if (ret)
+                       ibdev_err(ibdev, "set HEM step last failed!\n");
+       }
+out:
+       return ret;
+}
 
-err_alloc_hem_buf:
-       if (bt_l1_allocated) {
-               dma_free_coherent(dev, bt_chunk_size, table->bt_l1[bt_l1_idx],
-                                 table->bt_l1_dma_addr[bt_l1_idx]);
-               table->bt_l1[bt_l1_idx] = NULL;
+static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
+                                  struct hns_roce_hem_table *table,
+                                  unsigned long obj)
+{
+       struct ib_device *ibdev = &hr_dev->ib_dev;
+       struct hns_roce_hem_index index = {};
+       struct hns_roce_hem_mhop mhop = {};
+       int ret;
+
+       ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
+       if (ret) {
+               ibdev_err(ibdev, "calc hem config failed!\n");
+               return ret;
+       }
+
+       mutex_lock(&table->mutex);
+       if (table->hem[index.buf]) {
+               ++table->hem[index.buf]->refcount;
+               goto out;
        }
 
-err_dma_alloc_l1:
-       if (bt_l0_allocated) {
-               dma_free_coherent(dev, bt_chunk_size, table->bt_l0[bt_l0_idx],
-                                 table->bt_l0_dma_addr[bt_l0_idx]);
-               table->bt_l0[bt_l0_idx] = NULL;
+       ret = alloc_mhop_hem(hr_dev, table, &mhop, &index);
+       if (ret) {
+               ibdev_err(ibdev, "alloc mhop hem failed!\n");
+               goto out;
        }
 
+       /* set HEM base address to hardware */
+       if (table->type < HEM_TYPE_MTT) {
+               ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index);
+               if (ret) {
+                       ibdev_err(ibdev, "set HEM address to HW failed!\n");
+                       goto err_alloc;
+               }
+       }
+
+       ++table->hem[index.buf]->refcount;
+       goto out;
+
+err_alloc:
+       free_mhop_hem(hr_dev, table, &mhop, &index);
 out:
        mutex_unlock(&table->mutex);
        return ret;
@@ -656,116 +725,75 @@ out:
        return ret;
 }
 
+static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
+                          struct hns_roce_hem_table *table, unsigned long obj,
+                          struct hns_roce_hem_mhop *mhop,
+                          struct hns_roce_hem_index *index)
+{
+       struct ib_device *ibdev = &hr_dev->ib_dev;
+       u32 hop_num = mhop->hop_num;
+       u32 chunk_ba_num;
+       int step_idx;
+
+       index->inited = HEM_INDEX_BUF;
+       chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
+       if (check_whether_bt_num_2(table->type, hop_num)) {
+               if (hns_roce_check_hem_null(table->hem, index->buf,
+                                           chunk_ba_num, table->num_hem))
+                       index->inited |= HEM_INDEX_L0;
+       } else if (check_whether_bt_num_3(table->type, hop_num)) {
+               if (hns_roce_check_hem_null(table->hem, index->buf,
+                                           chunk_ba_num, table->num_hem)) {
+                       index->inited |= HEM_INDEX_L1;
+                       if (hns_roce_check_bt_null(table->bt_l1, index->l1,
+                                                  chunk_ba_num))
+                               index->inited |= HEM_INDEX_L0;
+               }
+       }
+
+       if (table->type < HEM_TYPE_MTT) {
+               if (hop_num == HNS_ROCE_HOP_NUM_0)
+                       step_idx = 0;
+               else
+                       step_idx = hop_num;
+
+               if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx))
+                       ibdev_warn(ibdev, "Clear hop%d HEM failed.\n", hop_num);
+
+               if (index->inited & HEM_INDEX_L1)
+                       if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1))
+                               ibdev_warn(ibdev, "Clear HEM step 1 failed.\n");
+
+               if (index->inited & HEM_INDEX_L0)
+                       if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
+                               ibdev_warn(ibdev, "Clear HEM step 0 failed.\n");
+       }
+}
+
 static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
                                    struct hns_roce_hem_table *table,
                                    unsigned long obj,
                                    int check_refcount)
 {
-       struct device *dev = hr_dev->dev;
-       struct hns_roce_hem_mhop mhop;
-       unsigned long mhop_obj = obj;
-       u32 bt_chunk_size;
-       u32 chunk_ba_num;
-       u32 hop_num;
-       u32 start_idx;
-       u32 bt_num;
-       u64 hem_idx;
-       u64 bt_l1_idx = 0;
+       struct ib_device *ibdev = &hr_dev->ib_dev;
+       struct hns_roce_hem_index index = {};
+       struct hns_roce_hem_mhop mhop = {};
        int ret;
 
-       ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop);
-       if (ret)
-               return;
-
-       bt_chunk_size = mhop.bt_chunk_size;
-       hop_num = mhop.hop_num;
-       chunk_ba_num = bt_chunk_size / BA_BYTE_LEN;
-
-       bt_num = hns_roce_get_bt_num(table->type, hop_num);
-       switch (bt_num) {
-       case 3:
-               hem_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num +
-                         mhop.l1_idx * chunk_ba_num + mhop.l2_idx;
-               bt_l1_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx;
-               break;
-       case 2:
-               hem_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx;
-               break;
-       case 1:
-               hem_idx = mhop.l0_idx;
-               break;
-       default:
-               dev_err(dev, "Table %d not support hop_num = %d!\n",
-                            table->type, hop_num);
+       ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
+       if (ret) {
+               ibdev_err(ibdev, "calc hem config failed!\n");
                return;
        }
 
        mutex_lock(&table->mutex);
-
-       if (check_refcount && (--table->hem[hem_idx]->refcount > 0)) {
+       if (check_refcount && (--table->hem[index.buf]->refcount > 0)) {
                mutex_unlock(&table->mutex);
                return;
        }
 
-       if (table->type < HEM_TYPE_MTT && hop_num == 1) {
-               if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1))
-                       dev_warn(dev, "Clear HEM base address failed.\n");
-       } else if (table->type < HEM_TYPE_MTT && hop_num == 2) {
-               if (hr_dev->hw->clear_hem(hr_dev, table, obj, 2))
-                       dev_warn(dev, "Clear HEM base address failed.\n");
-       } else if (table->type < HEM_TYPE_MTT &&
-                  hop_num == HNS_ROCE_HOP_NUM_0) {
-               if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
-                       dev_warn(dev, "Clear HEM base address failed.\n");
-       }
-
-       /*
-        * free buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC.
-        * free bt space chunk for MTT/CQE.
-        */
-       hns_roce_free_hem(hr_dev, table->hem[hem_idx]);
-       table->hem[hem_idx] = NULL;
-
-       if (check_whether_bt_num_2(table->type, hop_num)) {
-               start_idx = mhop.l0_idx * chunk_ba_num;
-               if (hns_roce_check_hem_null(table->hem, start_idx,
-                                           chunk_ba_num, table->num_hem)) {
-                       if (table->type < HEM_TYPE_MTT &&
-                           hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
-                               dev_warn(dev, "Clear HEM base address failed.\n");
-
-                       dma_free_coherent(dev, bt_chunk_size,
-                                         table->bt_l0[mhop.l0_idx],
-                                         table->bt_l0_dma_addr[mhop.l0_idx]);
-                       table->bt_l0[mhop.l0_idx] = NULL;
-               }
-       } else if (check_whether_bt_num_3(table->type, hop_num)) {
-               start_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num +
-                           mhop.l1_idx * chunk_ba_num;
-               if (hns_roce_check_hem_null(table->hem, start_idx,
-                                           chunk_ba_num, table->num_hem)) {
-                       if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1))
-                               dev_warn(dev, "Clear HEM base address failed.\n");
-
-                       dma_free_coherent(dev, bt_chunk_size,
-                                         table->bt_l1[bt_l1_idx],
-                                         table->bt_l1_dma_addr[bt_l1_idx]);
-                       table->bt_l1[bt_l1_idx] = NULL;
-
-                       start_idx = mhop.l0_idx * chunk_ba_num;
-                       if (hns_roce_check_bt_null(table->bt_l1, start_idx,
-                                                  chunk_ba_num)) {
-                               if (hr_dev->hw->clear_hem(hr_dev, table, obj,
-                                                         0))
-                                       dev_warn(dev, "Clear HEM base address failed.\n");
-
-                               dma_free_coherent(dev, bt_chunk_size,
-                                           table->bt_l0[mhop.l0_idx],
-                                           table->bt_l0_dma_addr[mhop.l0_idx]);
-                               table->bt_l0[mhop.l0_idx] = NULL;
-                       }
-               }
-       }
+       clear_mhop_hem(hr_dev, table, obj, &mhop, &index);
+       free_mhop_hem(hr_dev, table, &mhop, &index);
 
        mutex_unlock(&table->mutex);
 }
@@ -1383,6 +1411,7 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
        void *cpu_base;
        u64 phy_base;
        int ret = 0;
+       int ba_num;
        int offset;
        int total;
        int step;
@@ -1393,12 +1422,16 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
        if (root_hem)
                return 0;
 
+       ba_num = hns_roce_hem_list_calc_root_ba(regions, region_cnt, unit);
+       if (ba_num < 1)
+               return -ENOMEM;
+
        INIT_LIST_HEAD(&temp_root);
-       total = r->offset;
+       offset = r->offset;
        /* indicate to last region */
        r = &regions[region_cnt - 1];
-       root_hem = hem_list_alloc_item(hr_dev, total, r->offset + r->count - 1,
-                                      unit, true, 0);
+       root_hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1,
+                                      ba_num, true, 0);
        if (!root_hem)
                return -ENOMEM;
        list_add(&root_hem->list, &temp_root);
@@ -1410,7 +1443,7 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
                INIT_LIST_HEAD(&temp_list[i]);
 
        total = 0;
-       for (i = 0; i < region_cnt && total < unit; i++) {
+       for (i = 0; i < region_cnt && total < ba_num; i++) {
                r = &regions[i];
                if (!r->count)
                        continue;
@@ -1443,7 +1476,8 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
                        /* if exist mid bt, link L1 to L0 */
                        list_for_each_entry_safe(hem, temp_hem,
                                          &hem_list->mid_bt[i][1], list) {
-                               offset = hem->start / step * BA_BYTE_LEN;
+                               offset = (hem->start - r->offset) / step *
+                                         BA_BYTE_LEN;
                                hem_list_link_bt(hr_dev, cpu_base + offset,
                                                 hem->dma_addr);
                                total++;
index c6e6658..5ff028d 100644 (file)
@@ -69,7 +69,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
        struct hns_roce_wqe_data_seg *dseg = NULL;
        struct hns_roce_qp *qp = to_hr_qp(ibqp);
        struct device *dev = &hr_dev->pdev->dev;
-       struct hns_roce_sq_db sq_db;
+       struct hns_roce_sq_db sq_db = {};
        int ps_opcode = 0, i = 0;
        unsigned long flags = 0;
        void *wqe = NULL;
@@ -106,7 +106,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
                        goto out;
                }
 
-               wqe = get_send_wqe(qp, wqe_idx);
+               wqe = hns_roce_get_send_wqe(qp, wqe_idx);
                qp->sq.wrid[wqe_idx] = wr->wr_id;
 
                /* Corresponding to the RC and RD type wqe process separately */
@@ -318,8 +318,6 @@ out:
                /* Memory barrier */
                wmb();
 
-               sq_db.u32_4 = 0;
-               sq_db.u32_8 = 0;
                roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M,
                               SQ_DOORBELL_U32_4_SQ_HEAD_S,
                              (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)));
@@ -351,7 +349,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct device *dev = &hr_dev->pdev->dev;
-       struct hns_roce_rq_db rq_db;
+       struct hns_roce_rq_db rq_db = {};
        __le32 doorbell[2] = {0};
        unsigned long flags = 0;
        unsigned int wqe_idx;
@@ -380,7 +378,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
                        goto out;
                }
 
-               ctrl = get_recv_wqe(hr_qp, wqe_idx);
+               ctrl = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
 
                roce_set_field(ctrl->rwqe_byte_12,
                               RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M,
@@ -418,9 +416,6 @@ out:
                                   ROCEE_QP1C_CFG3_0_REG +
                                   QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val);
                } else {
-                       rq_db.u32_4 = 0;
-                       rq_db.u32_8 = 0;
-
                        roce_set_field(rq_db.u32_4, RQ_DOORBELL_U32_4_RQ_HEAD_M,
                                       RQ_DOORBELL_U32_4_RQ_HEAD_S,
                                       hr_qp->rq.head);
@@ -2289,9 +2284,10 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,
 
        if (is_send) {
                /* SQ conrespond to CQE */
-               sq_wqe = get_send_wqe(*cur_qp, roce_get_field(cqe->cqe_byte_4,
+               sq_wqe = hns_roce_get_send_wqe(*cur_qp,
+                                               roce_get_field(cqe->cqe_byte_4,
                                                CQE_BYTE_4_WQE_INDEX_M,
-                                               CQE_BYTE_4_WQE_INDEX_S)&
+                                               CQE_BYTE_4_WQE_INDEX_S) &
                                                ((*cur_qp)->sq.wqe_cnt-1));
                switch (le32_to_cpu(sq_wqe->flag) & HNS_ROCE_WQE_OPCODE_MASK) {
                case HNS_ROCE_WQE_OPCODE_SEND:
@@ -3623,26 +3619,11 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
                if (send_cq && send_cq != recv_cq)
                        __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL);
        }
-       hns_roce_unlock_cqs(send_cq, recv_cq);
-
        hns_roce_qp_remove(hr_dev, hr_qp);
-       hns_roce_qp_free(hr_dev, hr_qp);
-
-       /* RC QP, release QPN */
-       if (hr_qp->ibqp.qp_type == IB_QPT_RC)
-               hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
-
-       hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
-
-       ib_umem_release(hr_qp->umem);
-       if (!udata) {
-               kfree(hr_qp->sq.wrid);
-               kfree(hr_qp->rq.wrid);
+       hns_roce_unlock_cqs(send_cq, recv_cq);
 
-               hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
-       }
+       hns_roce_qp_destroy(hr_dev, hr_qp, udata);
 
-       kfree(hr_qp);
        return 0;
 }
 
@@ -3954,10 +3935,8 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev,
                eq->cons_index++;
                aeqes_found = 1;
 
-               if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1) {
-                       dev_warn(dev, "cons_index overflow, set back to 0.\n");
+               if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1)
                        eq->cons_index = 0;
-               }
        }
 
        set_eq_cons_index_v1(eq, 0);
@@ -4007,11 +3986,8 @@ static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev,
                ceqes_found = 1;
 
                if (eq->cons_index >
-                   EQ_DEPTH_COEFF * hr_dev->caps.ceqe_depth - 1) {
-                       dev_warn(&eq->hr_dev->pdev->dev,
-                               "cons_index overflow, set back to 0.\n");
+                   EQ_DEPTH_COEFF * hr_dev->caps.ceqe_depth - 1)
                        eq->cons_index = 0;
-               }
        }
 
        set_eq_cons_index_v1(eq, 0);
index 12c4cd8..c331667 100644 (file)
@@ -56,11 +56,45 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
        dseg->len  = cpu_to_le32(sg->length);
 }
 
+/*
+ * mapped-value = 1 + real-value
+ * The hns wr opcode real value is start from 0, In order to distinguish between
+ * initialized and uninitialized map values, we plus 1 to the actual value when
+ * defining the mapping, so that the validity can be identified by checking the
+ * mapped value is greater than 0.
+ */
+#define HR_OPC_MAP(ib_key, hr_key) \
+               [IB_WR_ ## ib_key] = 1 + HNS_ROCE_V2_WQE_OP_ ## hr_key
+
+static const u32 hns_roce_op_code[] = {
+       HR_OPC_MAP(RDMA_WRITE,                  RDMA_WRITE),
+       HR_OPC_MAP(RDMA_WRITE_WITH_IMM,         RDMA_WRITE_WITH_IMM),
+       HR_OPC_MAP(SEND,                        SEND),
+       HR_OPC_MAP(SEND_WITH_IMM,               SEND_WITH_IMM),
+       HR_OPC_MAP(RDMA_READ,                   RDMA_READ),
+       HR_OPC_MAP(ATOMIC_CMP_AND_SWP,          ATOM_CMP_AND_SWAP),
+       HR_OPC_MAP(ATOMIC_FETCH_AND_ADD,        ATOM_FETCH_AND_ADD),
+       HR_OPC_MAP(SEND_WITH_INV,               SEND_WITH_INV),
+       HR_OPC_MAP(LOCAL_INV,                   LOCAL_INV),
+       HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP,   ATOM_MSK_CMP_AND_SWAP),
+       HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD),
+       HR_OPC_MAP(REG_MR,                      FAST_REG_PMR),
+};
+
+static u32 to_hr_opcode(u32 ib_opcode)
+{
+       if (ib_opcode >= ARRAY_SIZE(hns_roce_op_code))
+               return HNS_ROCE_V2_WQE_OP_MASK;
+
+       return hns_roce_op_code[ib_opcode] ? hns_roce_op_code[ib_opcode] - 1 :
+                                            HNS_ROCE_V2_WQE_OP_MASK;
+}
+
 static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
-                        struct hns_roce_wqe_frmr_seg *fseg,
-                        const struct ib_reg_wr *wr)
+                        void *wqe, const struct ib_reg_wr *wr)
 {
        struct hns_roce_mr *mr = to_hr_mr(wr->mr);
+       struct hns_roce_wqe_frmr_seg *fseg = wqe;
 
        /* use ib_access_flags */
        roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
@@ -92,16 +126,26 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
                     V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
 }
 
-static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
-                          const struct ib_atomic_wr *wr)
+static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe,
+                          struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+                          int valid_num_sge)
 {
-       if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-               aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
-               aseg->cmp_data  = cpu_to_le64(wr->compare_add);
+       struct hns_roce_wqe_atomic_seg *aseg;
+
+       set_data_seg_v2(wqe, wr->sg_list);
+       aseg = wqe + sizeof(struct hns_roce_v2_wqe_data_seg);
+
+       if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+               aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap);
+               aseg->cmp_data = cpu_to_le64(atomic_wr(wr)->compare_add);
        } else {
-               aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
+               aseg->fetchadd_swap_data =
+                       cpu_to_le64(atomic_wr(wr)->compare_add);
                aseg->cmp_data  = 0;
        }
+
+       roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+                      V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
 }
 
 static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
@@ -127,7 +171,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
         * should calculate how many sges in the first page and the second
         * page.
         */
-       dseg = get_send_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1));
+       dseg = hns_roce_get_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1));
        fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) -
                      (uintptr_t)dseg) /
                      sizeof(struct hns_roce_v2_wqe_data_seg);
@@ -137,7 +181,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
                        set_data_seg_v2(dseg++, sg + i);
                        (*sge_ind)++;
                }
-               dseg = get_send_extend_sge(qp,
+               dseg = hns_roce_get_extend_sge(qp,
                                           (*sge_ind) & (qp->sge.sge_cnt - 1));
                for (i = 0; i < se_sge_num; i++) {
                        set_data_seg_v2(dseg++, sg + fi_sge_num + i);
@@ -154,11 +198,11 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
 static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                             struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
                             void *wqe, unsigned int *sge_ind,
-                            int valid_num_sge,
-                            const struct ib_send_wr **bad_wr)
+                            int valid_num_sge)
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+       struct ib_device *ibdev = &hr_dev->ib_dev;
        struct hns_roce_qp *qp = to_hr_qp(ibqp);
        int j = 0;
        int i;
@@ -166,15 +210,14 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
        if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) {
                if (le32_to_cpu(rc_sq_wqe->msg_len) >
                    hr_dev->caps.max_sq_inline) {
-                       *bad_wr = wr;
-                       dev_err(hr_dev->dev, "inline len(1-%d)=%d, illegal",
-                               rc_sq_wqe->msg_len, hr_dev->caps.max_sq_inline);
+                       ibdev_err(ibdev, "inline len(1-%d)=%d, illegal",
+                                 rc_sq_wqe->msg_len,
+                                 hr_dev->caps.max_sq_inline);
                        return -EINVAL;
                }
 
                if (wr->opcode == IB_WR_RDMA_READ) {
-                       *bad_wr =  wr;
-                       dev_err(hr_dev->dev, "Not support inline data!\n");
+                       ibdev_err(ibdev, "Not support inline data!\n");
                        return -EINVAL;
                }
 
@@ -220,62 +263,287 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
        return 0;
 }
 
-static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
-                                const struct ib_qp_attr *attr,
-                                int attr_mask, enum ib_qp_state cur_state,
-                                enum ib_qp_state new_state);
-
 static int check_send_valid(struct hns_roce_dev *hr_dev,
                            struct hns_roce_qp *hr_qp)
 {
+       struct ib_device *ibdev = &hr_dev->ib_dev;
        struct ib_qp *ibqp = &hr_qp->ibqp;
-       struct device *dev = hr_dev->dev;
 
        if (unlikely(ibqp->qp_type != IB_QPT_RC &&
                     ibqp->qp_type != IB_QPT_GSI &&
                     ibqp->qp_type != IB_QPT_UD)) {
-               dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type);
+               ibdev_err(ibdev, "Not supported QP(0x%x)type!\n",
+                         ibqp->qp_type);
                return -EOPNOTSUPP;
        } else if (unlikely(hr_qp->state == IB_QPS_RESET ||
                   hr_qp->state == IB_QPS_INIT ||
                   hr_qp->state == IB_QPS_RTR)) {
-               dev_err(dev, "Post WQE fail, QP state %d!\n", hr_qp->state);
+               ibdev_err(ibdev, "failed to post WQE, QP state %d!\n",
+                         hr_qp->state);
                return -EINVAL;
        } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
-               dev_err(dev, "Post WQE fail, dev state %d!\n", hr_dev->state);
+               ibdev_err(ibdev, "failed to post WQE, dev state %d!\n",
+                         hr_dev->state);
                return -EIO;
        }
 
        return 0;
 }
 
+static inline int calc_wr_sge_num(const struct ib_send_wr *wr, u32 *sge_len)
+{
+       int valid_num = 0;
+       u32 len = 0;
+       int i;
+
+       for (i = 0; i < wr->num_sge; i++) {
+               if (likely(wr->sg_list[i].length)) {
+                       len += wr->sg_list[i].length;
+                       valid_num++;
+               }
+       }
+
+       *sge_len = len;
+       return valid_num;
+}
+
+static inline int set_ud_wqe(struct hns_roce_qp *qp,
+                            const struct ib_send_wr *wr,
+                            void *wqe, unsigned int *sge_idx,
+                            unsigned int owner_bit)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+       struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
+       struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe;
+       unsigned int curr_idx = *sge_idx;
+       int valid_num_sge;
+       u32 msg_len = 0;
+       bool loopback;
+       u8 *smac;
+
+       valid_num_sge = calc_wr_sge_num(wr, &msg_len);
+       memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
+
+       roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M,
+                      V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]);
+       roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M,
+                      V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]);
+       roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M,
+                      V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]);
+       roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M,
+                      V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]);
+       roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_4_M,
+                      V2_UD_SEND_WQE_BYTE_48_DMAC_4_S, ah->av.mac[4]);
+       roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_5_M,
+                      V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, ah->av.mac[5]);
+
+       /* MAC loopback */
+       smac = (u8 *)hr_dev->dev_addr[qp->port];
+       loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0;
+
+       roce_set_bit(ud_sq_wqe->byte_40,
+                    V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback);
+
+       roce_set_field(ud_sq_wqe->byte_4,
+                      V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
+                      V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
+                      HNS_ROCE_V2_WQE_OP_SEND);
+
+       ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
+
+       switch (wr->opcode) {
+       case IB_WR_SEND_WITH_IMM:
+       case IB_WR_RDMA_WRITE_WITH_IMM:
+               ud_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
+               break;
+       default:
+               ud_sq_wqe->immtdata = 0;
+               break;
+       }
+
+       /* Set sig attr */
+       roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S,
+                    (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
+
+       /* Set se attr */
+       roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S,
+                    (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
+
+       roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S,
+                    owner_bit);
+
+       roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M,
+                      V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn);
+
+       roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
+                      V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
+
+       roce_set_field(ud_sq_wqe->byte_20,
+                      V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
+                      V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
+                      curr_idx & (qp->sge.sge_cnt - 1));
+
+       roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
+                      V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0);
+       ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
+                         qp->qkey : ud_wr(wr)->remote_qkey);
+       roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M,
+                      V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn);
+
+       roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M,
+                      V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id);
+       roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
+                      V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit);
+       roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
+                      V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass);
+       roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
+                      V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel);
+       roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M,
+                      V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl);
+       roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M,
+                      V2_UD_SEND_WQE_BYTE_40_PORTN_S, qp->port);
+
+       roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
+                    ah->av.vlan_en ? 1 : 0);
+       roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
+                      V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index);
+
+       memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2);
+
+       set_extend_sge(qp, wr, &curr_idx, valid_num_sge);
+
+       *sge_idx = curr_idx;
+
+       return 0;
+}
+
+static inline int set_rc_wqe(struct hns_roce_qp *qp,
+                            const struct ib_send_wr *wr,
+                            void *wqe, unsigned int *sge_idx,
+                            unsigned int owner_bit)
+{
+       struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+       unsigned int curr_idx = *sge_idx;
+       int valid_num_sge;
+       u32 msg_len = 0;
+       int ret = 0;
+
+       valid_num_sge = calc_wr_sge_num(wr, &msg_len);
+       memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
+
+       rc_sq_wqe->msg_len = cpu_to_le32(msg_len);
+
+       switch (wr->opcode) {
+       case IB_WR_SEND_WITH_IMM:
+       case IB_WR_RDMA_WRITE_WITH_IMM:
+               rc_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
+               break;
+       case IB_WR_SEND_WITH_INV:
+               rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
+               break;
+       default:
+               rc_sq_wqe->immtdata = 0;
+               break;
+       }
+
+       roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S,
+                    (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
+
+       roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S,
+                    (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
+
+       roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S,
+                    (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
+
+       roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
+                    owner_bit);
+
+       wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
+       switch (wr->opcode) {
+       case IB_WR_RDMA_READ:
+       case IB_WR_RDMA_WRITE:
+       case IB_WR_RDMA_WRITE_WITH_IMM:
+               rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
+               rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
+               break;
+       case IB_WR_LOCAL_INV:
+               roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
+               rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
+               break;
+       case IB_WR_REG_MR:
+               set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr));
+               break;
+       case IB_WR_ATOMIC_CMP_AND_SWP:
+       case IB_WR_ATOMIC_FETCH_AND_ADD:
+               rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey);
+               rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
+               break;
+       default:
+               break;
+       }
+
+       roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+                      V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
+                      to_hr_opcode(wr->opcode));
+
+       if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+           wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+               set_atomic_seg(wr, wqe, rc_sq_wqe, valid_num_sge);
+       else if (wr->opcode != IB_WR_REG_MR)
+               ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe,
+                                       wqe, &curr_idx, valid_num_sge);
+
+       *sge_idx = curr_idx;
+
+       return ret;
+}
+
+static inline void update_sq_db(struct hns_roce_dev *hr_dev,
+                               struct hns_roce_qp *qp)
+{
+       /*
+        * Hip08 hardware cannot flush the WQEs in SQ if the QP state
+        * gets into errored mode. Hence, as a workaround to this
+        * hardware limitation, driver needs to assist in flushing. But
+        * the flushing operation uses mailbox to convey the QP state to
+        * the hardware and which can sleep due to the mutex protection
+        * around the mailbox calls. Hence, use the deferred flush for
+        * now.
+        */
+       if (qp->state == IB_QPS_ERR) {
+               if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag))
+                       init_flush_work(hr_dev, qp);
+       } else {
+               struct hns_roce_v2_db sq_db = {};
+
+               roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_TAG_M,
+                              V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn);
+               roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M,
+                              V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB);
+               roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M,
+                              V2_DB_PARAMETER_IDX_S,
+                              qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1));
+               roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M,
+                              V2_DB_PARAMETER_SL_S, qp->sl);
+
+               hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l);
+       }
+}
+
 static int hns_roce_v2_post_send(struct ib_qp *ibqp,
                                 const struct ib_send_wr *wr,
                                 const struct ib_send_wr **bad_wr)
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
-       struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
-       struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
-       struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
+       struct ib_device *ibdev = &hr_dev->ib_dev;
        struct hns_roce_qp *qp = to_hr_qp(ibqp);
-       struct hns_roce_wqe_frmr_seg *fseg;
-       struct device *dev = hr_dev->dev;
-       struct hns_roce_v2_db sq_db;
-       struct ib_qp_attr attr;
+       unsigned long flags = 0;
        unsigned int owner_bit;
        unsigned int sge_idx;
        unsigned int wqe_idx;
-       unsigned long flags;
-       int valid_num_sge;
        void *wqe = NULL;
-       bool loopback;
-       int attr_mask;
-       u32 tmp_len;
-       u32 hr_op;
-       u8 *smac;
        int nreq;
        int ret;
-       int i;
 
        spin_lock_irqsave(&qp->sq.lock, flags);
 
@@ -298,327 +566,37 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
                wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
 
                if (unlikely(wr->num_sge > qp->sq.max_gs)) {
-                       dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
-                               wr->num_sge, qp->sq.max_gs);
+                       ibdev_err(ibdev, "num_sge=%d > qp->sq.max_gs=%d\n",
+                                 wr->num_sge, qp->sq.max_gs);
                        ret = -EINVAL;
                        *bad_wr = wr;
                        goto out;
                }
 
-               wqe = get_send_wqe(qp, wqe_idx);
+               wqe = hns_roce_get_send_wqe(qp, wqe_idx);
                qp->sq.wrid[wqe_idx] = wr->wr_id;
                owner_bit =
                       ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
-               valid_num_sge = 0;
-               tmp_len = 0;
-
-               for (i = 0; i < wr->num_sge; i++) {
-                       if (likely(wr->sg_list[i].length)) {
-                               tmp_len += wr->sg_list[i].length;
-                               valid_num_sge++;
-                       }
-               }
 
                /* Corresponding to the QP type, wqe process separately */
-               if (ibqp->qp_type == IB_QPT_GSI) {
-                       ud_sq_wqe = wqe;
-                       memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
-
-                       roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M,
-                                      V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]);
-                       roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M,
-                                      V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]);
-                       roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M,
-                                      V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]);
-                       roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M,
-                                      V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]);
-                       roce_set_field(ud_sq_wqe->byte_48,
-                                      V2_UD_SEND_WQE_BYTE_48_DMAC_4_M,
-                                      V2_UD_SEND_WQE_BYTE_48_DMAC_4_S,
-                                      ah->av.mac[4]);
-                       roce_set_field(ud_sq_wqe->byte_48,
-                                      V2_UD_SEND_WQE_BYTE_48_DMAC_5_M,
-                                      V2_UD_SEND_WQE_BYTE_48_DMAC_5_S,
-                                      ah->av.mac[5]);
-
-                       /* MAC loopback */
-                       smac = (u8 *)hr_dev->dev_addr[qp->port];
-                       loopback = ether_addr_equal_unaligned(ah->av.mac,
-                                                             smac) ? 1 : 0;
-
-                       roce_set_bit(ud_sq_wqe->byte_40,
-                                    V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback);
-
-                       roce_set_field(ud_sq_wqe->byte_4,
-                                      V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
-                                      V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
-                                      HNS_ROCE_V2_WQE_OP_SEND);
-
-                       ud_sq_wqe->msg_len =
-                        cpu_to_le32(le32_to_cpu(ud_sq_wqe->msg_len) + tmp_len);
-
-                       switch (wr->opcode) {
-                       case IB_WR_SEND_WITH_IMM:
-                       case IB_WR_RDMA_WRITE_WITH_IMM:
-                               ud_sq_wqe->immtdata =
-                                     cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
-                               break;
-                       default:
-                               ud_sq_wqe->immtdata = 0;
-                               break;
-                       }
+               if (ibqp->qp_type == IB_QPT_GSI)
+                       ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit);
+               else if (ibqp->qp_type == IB_QPT_RC)
+                       ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit);
 
-                       /* Set sig attr */
-                       roce_set_bit(ud_sq_wqe->byte_4,
-                                  V2_UD_SEND_WQE_BYTE_4_CQE_S,
-                                  (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
-
-                       /* Set se attr */
-                       roce_set_bit(ud_sq_wqe->byte_4,
-                                 V2_UD_SEND_WQE_BYTE_4_SE_S,
-                                 (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
-
-                       roce_set_bit(ud_sq_wqe->byte_4,
-                                    V2_UD_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
-
-                       roce_set_field(ud_sq_wqe->byte_16,
-                                      V2_UD_SEND_WQE_BYTE_16_PD_M,
-                                      V2_UD_SEND_WQE_BYTE_16_PD_S,
-                                      to_hr_pd(ibqp->pd)->pdn);
-
-                       roce_set_field(ud_sq_wqe->byte_16,
-                                      V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
-                                      V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S,
-                                      valid_num_sge);
-
-                       roce_set_field(ud_sq_wqe->byte_20,
-                                    V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
-                                    V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
-                                    sge_idx & (qp->sge.sge_cnt - 1));
-
-                       roce_set_field(ud_sq_wqe->byte_24,
-                                      V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
-                                      V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0);
-                       ud_sq_wqe->qkey =
-                            cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
-                            qp->qkey : ud_wr(wr)->remote_qkey);
-                       roce_set_field(ud_sq_wqe->byte_32,
-                                      V2_UD_SEND_WQE_BYTE_32_DQPN_M,
-                                      V2_UD_SEND_WQE_BYTE_32_DQPN_S,
-                                      ud_wr(wr)->remote_qpn);
-
-                       roce_set_field(ud_sq_wqe->byte_36,
-                                      V2_UD_SEND_WQE_BYTE_36_VLAN_M,
-                                      V2_UD_SEND_WQE_BYTE_36_VLAN_S,
-                                      ah->av.vlan_id);
-                       roce_set_field(ud_sq_wqe->byte_36,
-                                      V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
-                                      V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S,
-                                      ah->av.hop_limit);
-                       roce_set_field(ud_sq_wqe->byte_36,
-                                      V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
-                                      V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
-                                      ah->av.tclass);
-                       roce_set_field(ud_sq_wqe->byte_40,
-                                      V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
-                                      V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S,
-                                      ah->av.flowlabel);
-                       roce_set_field(ud_sq_wqe->byte_40,
-                                      V2_UD_SEND_WQE_BYTE_40_SL_M,
-                                      V2_UD_SEND_WQE_BYTE_40_SL_S,
-                                      ah->av.sl);
-                       roce_set_field(ud_sq_wqe->byte_40,
-                                      V2_UD_SEND_WQE_BYTE_40_PORTN_M,
-                                      V2_UD_SEND_WQE_BYTE_40_PORTN_S,
-                                      qp->port);
-
-                       roce_set_bit(ud_sq_wqe->byte_40,
-                                    V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
-                                    ah->av.vlan_en ? 1 : 0);
-                       roce_set_field(ud_sq_wqe->byte_48,
-                                      V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
-                                      V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
-                                      hns_get_gid_index(hr_dev, qp->phy_port,
-                                                        ah->av.gid_index));
-
-                       memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
-                              GID_LEN_V2);
-
-                       set_extend_sge(qp, wr, &sge_idx, valid_num_sge);
-               } else if (ibqp->qp_type == IB_QPT_RC) {
-                       rc_sq_wqe = wqe;
-                       memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
-
-                       rc_sq_wqe->msg_len =
-                        cpu_to_le32(le32_to_cpu(rc_sq_wqe->msg_len) + tmp_len);
-
-                       switch (wr->opcode) {
-                       case IB_WR_SEND_WITH_IMM:
-                       case IB_WR_RDMA_WRITE_WITH_IMM:
-                               rc_sq_wqe->immtdata =
-                                     cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
-                               break;
-                       case IB_WR_SEND_WITH_INV:
-                               rc_sq_wqe->inv_key =
-                                       cpu_to_le32(wr->ex.invalidate_rkey);
-                               break;
-                       default:
-                               rc_sq_wqe->immtdata = 0;
-                               break;
-                       }
-
-                       roce_set_bit(rc_sq_wqe->byte_4,
-                                    V2_RC_SEND_WQE_BYTE_4_FENCE_S,
-                                    (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
-
-                       roce_set_bit(rc_sq_wqe->byte_4,
-                                 V2_RC_SEND_WQE_BYTE_4_SE_S,
-                                 (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
-
-                       roce_set_bit(rc_sq_wqe->byte_4,
-                                  V2_RC_SEND_WQE_BYTE_4_CQE_S,
-                                  (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
-
-                       roce_set_bit(rc_sq_wqe->byte_4,
-                                    V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
-
-                       wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
-                       switch (wr->opcode) {
-                       case IB_WR_RDMA_READ:
-                               hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ;
-                               rc_sq_wqe->rkey =
-                                       cpu_to_le32(rdma_wr(wr)->rkey);
-                               rc_sq_wqe->va =
-                                       cpu_to_le64(rdma_wr(wr)->remote_addr);
-                               break;
-                       case IB_WR_RDMA_WRITE:
-                               hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE;
-                               rc_sq_wqe->rkey =
-                                       cpu_to_le32(rdma_wr(wr)->rkey);
-                               rc_sq_wqe->va =
-                                       cpu_to_le64(rdma_wr(wr)->remote_addr);
-                               break;
-                       case IB_WR_RDMA_WRITE_WITH_IMM:
-                               hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM;
-                               rc_sq_wqe->rkey =
-                                       cpu_to_le32(rdma_wr(wr)->rkey);
-                               rc_sq_wqe->va =
-                                       cpu_to_le64(rdma_wr(wr)->remote_addr);
-                               break;
-                       case IB_WR_SEND:
-                               hr_op = HNS_ROCE_V2_WQE_OP_SEND;
-                               break;
-                       case IB_WR_SEND_WITH_INV:
-                               hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_INV;
-                               break;
-                       case IB_WR_SEND_WITH_IMM:
-                               hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM;
-                               break;
-                       case IB_WR_LOCAL_INV:
-                               hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV;
-                               roce_set_bit(rc_sq_wqe->byte_4,
-                                              V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
-                               rc_sq_wqe->inv_key =
-                                           cpu_to_le32(wr->ex.invalidate_rkey);
-                               break;
-                       case IB_WR_REG_MR:
-                               hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
-                               fseg = wqe;
-                               set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
-                               break;
-                       case IB_WR_ATOMIC_CMP_AND_SWP:
-                               hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
-                               rc_sq_wqe->rkey =
-                                       cpu_to_le32(atomic_wr(wr)->rkey);
-                               rc_sq_wqe->va =
-                                       cpu_to_le64(atomic_wr(wr)->remote_addr);
-                               break;
-                       case IB_WR_ATOMIC_FETCH_AND_ADD:
-                               hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD;
-                               rc_sq_wqe->rkey =
-                                       cpu_to_le32(atomic_wr(wr)->rkey);
-                               rc_sq_wqe->va =
-                                       cpu_to_le64(atomic_wr(wr)->remote_addr);
-                               break;
-                       case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
-                               hr_op =
-                                      HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP;
-                               break;
-                       case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
-                               hr_op =
-                                     HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD;
-                               break;
-                       default:
-                               hr_op = HNS_ROCE_V2_WQE_OP_MASK;
-                               break;
-                       }
-
-                       roce_set_field(rc_sq_wqe->byte_4,
-                                      V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-                                      V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
-
-                       if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                           wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
-                               struct hns_roce_v2_wqe_data_seg *dseg;
-
-                               dseg = wqe;
-                               set_data_seg_v2(dseg, wr->sg_list);
-                               wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
-                               set_atomic_seg(wqe, atomic_wr(wr));
-                               roce_set_field(rc_sq_wqe->byte_16,
-                                              V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
-                                              V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
-                                              valid_num_sge);
-                       } else if (wr->opcode != IB_WR_REG_MR) {
-                               ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
-                                                       wqe, &sge_idx,
-                                                       valid_num_sge, bad_wr);
-                               if (ret)
-                                       goto out;
-                       }
-               } else {
-                       dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
-                       spin_unlock_irqrestore(&qp->sq.lock, flags);
+               if (ret) {
                        *bad_wr = wr;
-                       return -EOPNOTSUPP;
+                       goto out;
                }
        }
 
 out:
        if (likely(nreq)) {
                qp->sq.head += nreq;
+               qp->next_sge = sge_idx;
                /* Memory barrier */
                wmb();
-
-               sq_db.byte_4 = 0;
-               sq_db.parameter = 0;
-
-               roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_TAG_M,
-                              V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn);
-               roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M,
-                              V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB);
-               roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M,
-                              V2_DB_PARAMETER_IDX_S,
-                              qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1));
-               roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M,
-                              V2_DB_PARAMETER_SL_S, qp->sl);
-
-               hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l);
-
-               qp->next_sge = sge_idx;
-
-               if (qp->state == IB_QPS_ERR) {
-                       attr_mask = IB_QP_STATE;
-                       attr.qp_state = IB_QPS_ERR;
-
-                       ret = hns_roce_v2_modify_qp(&qp->ibqp, &attr, attr_mask,
-                                                   qp->state, IB_QPS_ERR);
-                       if (ret) {
-                               spin_unlock_irqrestore(&qp->sq.lock, flags);
-                               *bad_wr = wr;
-                               return ret;
-                       }
-               }
+               update_sq_db(hr_dev, qp);
        }
 
        spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -643,13 +621,11 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+       struct ib_device *ibdev = &hr_dev->ib_dev;
        struct hns_roce_v2_wqe_data_seg *dseg;
        struct hns_roce_rinl_sge *sge_list;
-       struct device *dev = hr_dev->dev;
-       struct ib_qp_attr attr;
        unsigned long flags;
        void *wqe = NULL;
-       int attr_mask;
        u32 wqe_idx;
        int nreq;
        int ret;
@@ -675,14 +651,14 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
                wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
 
                if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
-                       dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n",
-                               wr->num_sge, hr_qp->rq.max_gs);
+                       ibdev_err(ibdev, "rq:num_sge=%d >= qp->sq.max_gs=%d\n",
+                                 wr->num_sge, hr_qp->rq.max_gs);
                        ret = -EINVAL;
                        *bad_wr = wr;
                        goto out;
                }
 
-               wqe = get_recv_wqe(hr_qp, wqe_idx);
+               wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
                dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
                for (i = 0; i < wr->num_sge; i++) {
                        if (!wr->sg_list[i].length)
@@ -717,20 +693,21 @@ out:
                /* Memory barrier */
                wmb();
 
-               *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff;
-
+               /*
+                * Hip08 hardware cannot flush the WQEs in RQ if the QP state
+                * gets into errored mode. Hence, as a workaround to this
+                * hardware limitation, driver needs to assist in flushing. But
+                * the flushing operation uses mailbox to convey the QP state to
+                * the hardware and which can sleep due to the mutex protection
+                * around the mailbox calls. Hence, use the deferred flush for
+                * now.
+                */
                if (hr_qp->state == IB_QPS_ERR) {
-                       attr_mask = IB_QP_STATE;
-                       attr.qp_state = IB_QPS_ERR;
-
-                       ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr,
-                                                   attr_mask, hr_qp->state,
-                                                   IB_QPS_ERR);
-                       if (ret) {
-                               spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
-                               *bad_wr = wr;
-                               return ret;
-                       }
+                       if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG,
+                                             &hr_qp->flush_flag))
+                               init_flush_work(hr_dev, hr_qp);
+               } else {
+                       *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff;
                }
        }
        spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
@@ -1448,82 +1425,63 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
                        desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
                else
                        desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
-
-               if (i == 0) {
-                       roce_set_field(req_a->vf_qpc_bt_idx_num,
-                                      VF_RES_A_DATA_1_VF_QPC_BT_IDX_M,
-                                      VF_RES_A_DATA_1_VF_QPC_BT_IDX_S, 0);
-                       roce_set_field(req_a->vf_qpc_bt_idx_num,
-                                      VF_RES_A_DATA_1_VF_QPC_BT_NUM_M,
-                                      VF_RES_A_DATA_1_VF_QPC_BT_NUM_S,
-                                      HNS_ROCE_VF_QPC_BT_NUM);
-
-                       roce_set_field(req_a->vf_srqc_bt_idx_num,
-                                      VF_RES_A_DATA_2_VF_SRQC_BT_IDX_M,
-                                      VF_RES_A_DATA_2_VF_SRQC_BT_IDX_S, 0);
-                       roce_set_field(req_a->vf_srqc_bt_idx_num,
-                                      VF_RES_A_DATA_2_VF_SRQC_BT_NUM_M,
-                                      VF_RES_A_DATA_2_VF_SRQC_BT_NUM_S,
-                                      HNS_ROCE_VF_SRQC_BT_NUM);
-
-                       roce_set_field(req_a->vf_cqc_bt_idx_num,
-                                      VF_RES_A_DATA_3_VF_CQC_BT_IDX_M,
-                                      VF_RES_A_DATA_3_VF_CQC_BT_IDX_S, 0);
-                       roce_set_field(req_a->vf_cqc_bt_idx_num,
-                                      VF_RES_A_DATA_3_VF_CQC_BT_NUM_M,
-                                      VF_RES_A_DATA_3_VF_CQC_BT_NUM_S,
-                                      HNS_ROCE_VF_CQC_BT_NUM);
-
-                       roce_set_field(req_a->vf_mpt_bt_idx_num,
-                                      VF_RES_A_DATA_4_VF_MPT_BT_IDX_M,
-                                      VF_RES_A_DATA_4_VF_MPT_BT_IDX_S, 0);
-                       roce_set_field(req_a->vf_mpt_bt_idx_num,
-                                      VF_RES_A_DATA_4_VF_MPT_BT_NUM_M,
-                                      VF_RES_A_DATA_4_VF_MPT_BT_NUM_S,
-                                      HNS_ROCE_VF_MPT_BT_NUM);
-
-                       roce_set_field(req_a->vf_eqc_bt_idx_num,
-                                      VF_RES_A_DATA_5_VF_EQC_IDX_M,
-                                      VF_RES_A_DATA_5_VF_EQC_IDX_S, 0);
-                       roce_set_field(req_a->vf_eqc_bt_idx_num,
-                                      VF_RES_A_DATA_5_VF_EQC_NUM_M,
-                                      VF_RES_A_DATA_5_VF_EQC_NUM_S,
-                                      HNS_ROCE_VF_EQC_NUM);
-               } else {
-                       roce_set_field(req_b->vf_smac_idx_num,
-                                      VF_RES_B_DATA_1_VF_SMAC_IDX_M,
-                                      VF_RES_B_DATA_1_VF_SMAC_IDX_S, 0);
-                       roce_set_field(req_b->vf_smac_idx_num,
-                                      VF_RES_B_DATA_1_VF_SMAC_NUM_M,
-                                      VF_RES_B_DATA_1_VF_SMAC_NUM_S,
-                                      HNS_ROCE_VF_SMAC_NUM);
-
-                       roce_set_field(req_b->vf_sgid_idx_num,
-                                      VF_RES_B_DATA_2_VF_SGID_IDX_M,
-                                      VF_RES_B_DATA_2_VF_SGID_IDX_S, 0);
-                       roce_set_field(req_b->vf_sgid_idx_num,
-                                      VF_RES_B_DATA_2_VF_SGID_NUM_M,
-                                      VF_RES_B_DATA_2_VF_SGID_NUM_S,
-                                      HNS_ROCE_VF_SGID_NUM);
-
-                       roce_set_field(req_b->vf_qid_idx_sl_num,
-                                      VF_RES_B_DATA_3_VF_QID_IDX_M,
-                                      VF_RES_B_DATA_3_VF_QID_IDX_S, 0);
-                       roce_set_field(req_b->vf_qid_idx_sl_num,
-                                      VF_RES_B_DATA_3_VF_SL_NUM_M,
-                                      VF_RES_B_DATA_3_VF_SL_NUM_S,
-                                      HNS_ROCE_VF_SL_NUM);
-
-                       roce_set_field(req_b->vf_sccc_idx_num,
-                                      VF_RES_B_DATA_4_VF_SCCC_BT_IDX_M,
-                                      VF_RES_B_DATA_4_VF_SCCC_BT_IDX_S, 0);
-                       roce_set_field(req_b->vf_sccc_idx_num,
-                                      VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M,
-                                      VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S,
-                                      HNS_ROCE_VF_SCCC_BT_NUM);
-               }
        }
 
+       roce_set_field(req_a->vf_qpc_bt_idx_num,
+                      VF_RES_A_DATA_1_VF_QPC_BT_IDX_M,
+                      VF_RES_A_DATA_1_VF_QPC_BT_IDX_S, 0);
+       roce_set_field(req_a->vf_qpc_bt_idx_num,
+                      VF_RES_A_DATA_1_VF_QPC_BT_NUM_M,
+                      VF_RES_A_DATA_1_VF_QPC_BT_NUM_S, HNS_ROCE_VF_QPC_BT_NUM);
+
+       roce_set_field(req_a->vf_srqc_bt_idx_num,
+                      VF_RES_A_DATA_2_VF_SRQC_BT_IDX_M,
+                      VF_RES_A_DATA_2_VF_SRQC_BT_IDX_S, 0);
+       roce_set_field(req_a->vf_srqc_bt_idx_num,
+                      VF_RES_A_DATA_2_VF_SRQC_BT_NUM_M,
+                      VF_RES_A_DATA_2_VF_SRQC_BT_NUM_S,
+                      HNS_ROCE_VF_SRQC_BT_NUM);
+
+       roce_set_field(req_a->vf_cqc_bt_idx_num,
+                      VF_RES_A_DATA_3_VF_CQC_BT_IDX_M,
+                      VF_RES_A_DATA_3_VF_CQC_BT_IDX_S, 0);
+       roce_set_field(req_a->vf_cqc_bt_idx_num,
+                      VF_RES_A_DATA_3_VF_CQC_BT_NUM_M,
+                      VF_RES_A_DATA_3_VF_CQC_BT_NUM_S, HNS_ROCE_VF_CQC_BT_NUM);
+
+       roce_set_field(req_a->vf_mpt_bt_idx_num,
+                      VF_RES_A_DATA_4_VF_MPT_BT_IDX_M,
+                      VF_RES_A_DATA_4_VF_MPT_BT_IDX_S, 0);
+       roce_set_field(req_a->vf_mpt_bt_idx_num,
+                      VF_RES_A_DATA_4_VF_MPT_BT_NUM_M,
+                      VF_RES_A_DATA_4_VF_MPT_BT_NUM_S, HNS_ROCE_VF_MPT_BT_NUM);
+
+       roce_set_field(req_a->vf_eqc_bt_idx_num, VF_RES_A_DATA_5_VF_EQC_IDX_M,
+                      VF_RES_A_DATA_5_VF_EQC_IDX_S, 0);
+       roce_set_field(req_a->vf_eqc_bt_idx_num, VF_RES_A_DATA_5_VF_EQC_NUM_M,
+                      VF_RES_A_DATA_5_VF_EQC_NUM_S, HNS_ROCE_VF_EQC_NUM);
+
+       roce_set_field(req_b->vf_smac_idx_num, VF_RES_B_DATA_1_VF_SMAC_IDX_M,
+                      VF_RES_B_DATA_1_VF_SMAC_IDX_S, 0);
+       roce_set_field(req_b->vf_smac_idx_num, VF_RES_B_DATA_1_VF_SMAC_NUM_M,
+                      VF_RES_B_DATA_1_VF_SMAC_NUM_S, HNS_ROCE_VF_SMAC_NUM);
+
+       roce_set_field(req_b->vf_sgid_idx_num, VF_RES_B_DATA_2_VF_SGID_IDX_M,
+                      VF_RES_B_DATA_2_VF_SGID_IDX_S, 0);
+       roce_set_field(req_b->vf_sgid_idx_num, VF_RES_B_DATA_2_VF_SGID_NUM_M,
+                      VF_RES_B_DATA_2_VF_SGID_NUM_S, HNS_ROCE_VF_SGID_NUM);
+
+       roce_set_field(req_b->vf_qid_idx_sl_num, VF_RES_B_DATA_3_VF_QID_IDX_M,
+                      VF_RES_B_DATA_3_VF_QID_IDX_S, 0);
+       roce_set_field(req_b->vf_qid_idx_sl_num, VF_RES_B_DATA_3_VF_SL_NUM_M,
+                      VF_RES_B_DATA_3_VF_SL_NUM_S, HNS_ROCE_VF_SL_NUM);
+
+       roce_set_field(req_b->vf_sccc_idx_num, VF_RES_B_DATA_4_VF_SCCC_BT_IDX_M,
+                      VF_RES_B_DATA_4_VF_SCCC_BT_IDX_S, 0);
+       roce_set_field(req_b->vf_sccc_idx_num, VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M,
+                      VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S,
+                      HNS_ROCE_VF_SCCC_BT_NUM);
+
        return hns_roce_cmq_send(hr_dev, desc, 2);
 }
 
@@ -1691,7 +1649,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
        caps->max_srq_wrs       = HNS_ROCE_V2_MAX_SRQ_WR;
        caps->max_srq_sges      = HNS_ROCE_V2_MAX_SRQ_SGE;
 
-       if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_B) {
+       if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) {
                caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
                               HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
                               HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
@@ -1939,7 +1897,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
                   caps->srqc_bt_num, &caps->srqc_buf_pg_sz,
                   &caps->srqc_ba_pg_sz, HEM_TYPE_SRQC);
 
-       if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_B) {
+       if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) {
                caps->sccc_hop_num = ctx_hop_num;
                caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
                caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
@@ -1999,7 +1957,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
                return ret;
        }
 
-       if (hr_dev->pci_dev->revision == 0x21) {
+       if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) {
                ret = hns_roce_query_pf_timer_resource(hr_dev);
                if (ret) {
                        dev_err(hr_dev->dev,
@@ -2007,16 +1965,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
                                ret);
                        return ret;
                }
-       }
-
-       ret = hns_roce_alloc_vf_resource(hr_dev);
-       if (ret) {
-               dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n",
-                       ret);
-               return ret;
-       }
 
-       if (hr_dev->pci_dev->revision == 0x21) {
                ret = hns_roce_set_vf_switch_param(hr_dev, 0);
                if (ret) {
                        dev_err(hr_dev->dev,
@@ -2046,6 +1995,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
        if (ret)
                set_default_caps(hr_dev);
 
+       ret = hns_roce_alloc_vf_resource(hr_dev);
+       if (ret) {
+               dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n",
+                       ret);
+               return ret;
+       }
+
        ret = hns_roce_v2_set_bt(hr_dev);
        if (ret)
                dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
@@ -2298,7 +2254,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
 {
        struct hns_roce_v2_priv *priv = hr_dev->priv;
 
-       if (hr_dev->pci_dev->revision == 0x21)
+       if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B)
                hns_roce_function_clear(hr_dev);
 
        hns_roce_free_link_table(hr_dev, &priv->tpq);
@@ -2461,7 +2417,9 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port,
 
        ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type);
        if (ret)
-               dev_err(hr_dev->dev, "Configure sgid table failed(%d)!\n", ret);
+               ibdev_err(&hr_dev->ib_dev,
+                         "failed to configure sgid table, ret = %d!\n",
+                         ret);
 
        return ret;
 }
@@ -2757,7 +2715,7 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
 
 static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
 {
-       *hr_cq->set_ci_db = cons_index & 0xffffff;
+       *hr_cq->set_ci_db = cons_index & V2_CQ_DB_PARAMETER_CONS_IDX_M;
 }
 
 static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
@@ -2942,7 +2900,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
 
        sge_list = (*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sg_list;
        sge_num = (*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sge_cnt;
-       wqe_buf = get_recv_wqe(*cur_qp, wr_cnt);
+       wqe_buf = hns_roce_get_recv_wqe(*cur_qp, wr_cnt);
        data_len = wc->byte_len;
 
        for (sge_cnt = 0; (sge_cnt < sge_num) && (data_len); sge_cnt++) {
@@ -3013,13 +2971,11 @@ out:
 static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
                                struct hns_roce_qp **cur_qp, struct ib_wc *wc)
 {
+       struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
        struct hns_roce_srq *srq = NULL;
-       struct hns_roce_dev *hr_dev;
        struct hns_roce_v2_cqe *cqe;
        struct hns_roce_qp *hr_qp;
        struct hns_roce_wq *wq;
-       struct ib_qp_attr attr;
-       int attr_mask;
        int is_send;
        u16 wqe_ctr;
        u32 opcode;
@@ -3043,16 +2999,17 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
                                V2_CQE_BYTE_16_LCL_QPN_S);
 
        if (!*cur_qp || (qpn & HNS_ROCE_V2_CQE_QPN_MASK) != (*cur_qp)->qpn) {
-               hr_dev = to_hr_dev(hr_cq->ib_cq.device);
                hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
                if (unlikely(!hr_qp)) {
-                       dev_err(hr_dev->dev, "CQ %06lx with entry for unknown QPN %06x\n",
-                               hr_cq->cqn, (qpn & HNS_ROCE_V2_CQE_QPN_MASK));
+                       ibdev_err(&hr_dev->ib_dev,
+                                 "CQ %06lx with entry for unknown QPN %06x\n",
+                                 hr_cq->cqn, qpn & HNS_ROCE_V2_CQE_QPN_MASK);
                        return -EINVAL;
                }
                *cur_qp = hr_qp;
        }
 
+       hr_qp = *cur_qp;
        wc->qp = &(*cur_qp)->ibqp;
        wc->vendor_err = 0;
 
@@ -3137,14 +3094,24 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
                break;
        }
 
-       /* flush cqe if wc status is error, excluding flush error */
-       if ((wc->status != IB_WC_SUCCESS) &&
-           (wc->status != IB_WC_WR_FLUSH_ERR)) {
-               attr_mask = IB_QP_STATE;
-               attr.qp_state = IB_QPS_ERR;
-               return hns_roce_v2_modify_qp(&(*cur_qp)->ibqp,
-                                            &attr, attr_mask,
-                                            (*cur_qp)->state, IB_QPS_ERR);
+       /*
+        * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets
+        * into errored mode. Hence, as a workaround to this hardware
+        * limitation, driver needs to assist in flushing. But the flushing
+        * operation uses mailbox to convey the QP state to the hardware and
+        * which can sleep due to the mutex protection around the mailbox calls.
+        * Hence, use the deferred flush for now. Once wc error detected, the
+        * flushing operation is needed.
+        */
+       if (wc->status != IB_WC_SUCCESS &&
+           wc->status != IB_WC_WR_FLUSH_ERR) {
+               ibdev_err(&hr_dev->ib_dev, "error cqe status is: 0x%x\n",
+                         status & HNS_ROCE_V2_CQE_STATUS_MASK);
+
+               if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag))
+                       init_flush_work(hr_dev, hr_qp);
+
+               return 0;
        }
 
        if (wc->status == IB_WC_WR_FLUSH_ERR)
@@ -3262,14 +3229,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
                wc->port_num = roce_get_field(cqe->byte_32,
                                V2_CQE_BYTE_32_PORTN_M, V2_CQE_BYTE_32_PORTN_S);
                wc->pkey_index = 0;
-               memcpy(wc->smac, cqe->smac, 4);
-               wc->smac[4] = roce_get_field(cqe->byte_28,
-                                            V2_CQE_BYTE_28_SMAC_4_M,
-                                            V2_CQE_BYTE_28_SMAC_4_S);
-               wc->smac[5] = roce_get_field(cqe->byte_28,
-                                            V2_CQE_BYTE_28_SMAC_5_M,
-                                            V2_CQE_BYTE_28_SMAC_5_S);
-               wc->wc_flags |= IB_WC_WITH_SMAC;
+
                if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) {
                        wc->vlan_id = (u16)roce_get_field(cqe->byte_28,
                                                          V2_CQE_BYTE_28_VID_M,
@@ -3567,14 +3527,9 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
                               HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ?
                               ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
 
-       roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
-                      V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
-
        roce_set_field(context->byte_20_smac_sgid_idx,
                       V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
                       ilog2((unsigned int)hr_qp->sq.wqe_cnt));
-       roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
-                      V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0);
 
        roce_set_field(context->byte_20_smac_sgid_idx,
                       V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
@@ -3582,9 +3537,6 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
                       hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT ||
                       hr_qp->ibqp.srq) ? 0 :
                       ilog2((unsigned int)hr_qp->rq.wqe_cnt));
-
-       roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
-                      V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
 }
 
 static void modify_qp_reset_to_init(struct ib_qp *ibqp,
@@ -3604,280 +3556,53 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
         */
        roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
                       V2_QPC_BYTE_4_TST_S, to_hr_qp_type(hr_qp->ibqp.qp_type));
-       roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
-                      V2_QPC_BYTE_4_TST_S, 0);
 
        roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
                       V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn);
-       roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
-                      V2_QPC_BYTE_4_SQPN_S, 0);
 
        roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
                       V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn);
-       roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
-                      V2_QPC_BYTE_16_PD_S, 0);
 
        roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M,
                       V2_QPC_BYTE_20_RQWS_S, ilog2(hr_qp->rq.max_gs));
-       roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M,
-                      V2_QPC_BYTE_20_RQWS_S, 0);
 
        set_qpc_wqe_cnt(hr_qp, context, qpc_mask);
 
        /* No VLAN need to set 0xFFF */
        roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
                       V2_QPC_BYTE_24_VLAN_ID_S, 0xfff);
-       roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
-                      V2_QPC_BYTE_24_VLAN_ID_S, 0);
-
-       /*
-        * Set some fields in context to zero, Because the default values
-        * of all fields in context are zero, we need not set them to 0 again.
-        * but we should set the relevant fields of context mask to 0.
-        */
-       roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_SQ_TX_ERR_S, 0);
-       roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_SQ_RX_ERR_S, 0);
-       roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0);
-       roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0);
 
-       roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_TEMPID_M,
-                      V2_QPC_BYTE_60_TEMPID_S, 0);
-
-       roce_set_field(qpc_mask->byte_60_qpst_tempid,
-                      V2_QPC_BYTE_60_SCC_TOKEN_M, V2_QPC_BYTE_60_SCC_TOKEN_S,
-                      0);
-       roce_set_bit(qpc_mask->byte_60_qpst_tempid,
-                    V2_QPC_BYTE_60_SQ_DB_DOING_S, 0);
-       roce_set_bit(qpc_mask->byte_60_qpst_tempid,
-                    V2_QPC_BYTE_60_RQ_DB_DOING_S, 0);
-       roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0);
-       roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0);
-
-       if (hr_qp->rdb_en) {
+       if (hr_qp->rdb_en)
                roce_set_bit(context->byte_68_rq_db,
                             V2_QPC_BYTE_68_RQ_RECORD_EN_S, 1);
-               roce_set_bit(qpc_mask->byte_68_rq_db,
-                            V2_QPC_BYTE_68_RQ_RECORD_EN_S, 0);
-       }
 
        roce_set_field(context->byte_68_rq_db,
                       V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M,
                       V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S,
                       ((u32)hr_qp->rdb.dma) >> 1);
-       roce_set_field(qpc_mask->byte_68_rq_db,
-                      V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M,
-                      V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S, 0);
        context->rq_db_record_addr = cpu_to_le32(hr_qp->rdb.dma >> 32);
-       qpc_mask->rq_db_record_addr = 0;
 
        roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S,
                    (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0);
-       roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0);
 
        roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
                       V2_QPC_BYTE_80_RX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn);
-       roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
-                      V2_QPC_BYTE_80_RX_CQN_S, 0);
        if (ibqp->srq) {
                roce_set_field(context->byte_76_srqn_op_en,
                               V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S,
                               to_hr_srq(ibqp->srq)->srqn);
-               roce_set_field(qpc_mask->byte_76_srqn_op_en,
-                              V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, 0);
                roce_set_bit(context->byte_76_srqn_op_en,
                             V2_QPC_BYTE_76_SRQ_EN_S, 1);
-               roce_set_bit(qpc_mask->byte_76_srqn_op_en,
-                            V2_QPC_BYTE_76_SRQ_EN_S, 0);
        }
 
-       roce_set_field(qpc_mask->byte_84_rq_ci_pi,
-                      V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
-                      V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
-       roce_set_field(qpc_mask->byte_84_rq_ci_pi,
-                      V2_QPC_BYTE_84_RQ_CONSUMER_IDX_M,
-                      V2_QPC_BYTE_84_RQ_CONSUMER_IDX_S, 0);
-
-       roce_set_field(qpc_mask->byte_92_srq_info, V2_QPC_BYTE_92_SRQ_INFO_M,
-                      V2_QPC_BYTE_92_SRQ_INFO_S, 0);
-
-       roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M,
-                      V2_QPC_BYTE_96_RX_REQ_MSN_S, 0);
-
-       roce_set_field(qpc_mask->byte_104_rq_sge,
-                      V2_QPC_BYTE_104_RQ_CUR_WQE_SGE_NUM_M,
-                      V2_QPC_BYTE_104_RQ_CUR_WQE_SGE_NUM_S, 0);
-
-       roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
-                    V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0);
-       roce_set_field(qpc_mask->byte_108_rx_reqepsn,
-                      V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_M,
-                      V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_S, 0);
-       roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
-                    V2_QPC_BYTE_108_RX_REQ_RNR_S, 0);
-
-       qpc_mask->rq_rnr_timer = 0;
-       qpc_mask->rx_msg_len = 0;
-       qpc_mask->rx_rkey_pkt_info = 0;
-       qpc_mask->rx_va = 0;
-
-       roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_HEAD_MAX_M,
-                      V2_QPC_BYTE_132_TRRL_HEAD_MAX_S, 0);
-       roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M,
-                      V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0);
-
-       roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S,
-                    0);
-       roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M,
-                      V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0);
-       roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M,
-                      V2_QPC_BYTE_140_RAQ_TRRL_TAIL_S, 0);
-
-       roce_set_field(qpc_mask->byte_144_raq,
-                      V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M,
-                      V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0);
-       roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M,
-                      V2_QPC_BYTE_144_RAQ_CREDIT_S, 0);
-       roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0);
-
-       roce_set_field(qpc_mask->byte_148_raq, V2_QPC_BYTE_148_RQ_MSN_M,
-                      V2_QPC_BYTE_148_RQ_MSN_S, 0);
-       roce_set_field(qpc_mask->byte_148_raq, V2_QPC_BYTE_148_RAQ_SYNDROME_M,
-                      V2_QPC_BYTE_148_RAQ_SYNDROME_S, 0);
-
-       roce_set_field(qpc_mask->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M,
-                      V2_QPC_BYTE_152_RAQ_PSN_S, 0);
-       roce_set_field(qpc_mask->byte_152_raq,
-                      V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_M,
-                      V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_S, 0);
-
-       roce_set_field(qpc_mask->byte_156_raq, V2_QPC_BYTE_156_RAQ_USE_PKTN_M,
-                      V2_QPC_BYTE_156_RAQ_USE_PKTN_S, 0);
-
-       roce_set_field(qpc_mask->byte_160_sq_ci_pi,
-                      V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
-                      V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
-       roce_set_field(qpc_mask->byte_160_sq_ci_pi,
-                      V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M,
-                      V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0);
-
-       roce_set_bit(qpc_mask->byte_168_irrl_idx,
-                    V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S, 0);
-       roce_set_bit(qpc_mask->byte_168_irrl_idx,
-                    V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S, 0);
-       roce_set_bit(qpc_mask->byte_168_irrl_idx,
-                    V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S, 0);
-       roce_set_bit(qpc_mask->byte_168_irrl_idx,
-                    V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0);
-       roce_set_bit(qpc_mask->byte_168_irrl_idx,
-                    V2_QPC_BYTE_168_SQ_INVLD_FLG_S, 0);
-       roce_set_field(qpc_mask->byte_168_irrl_idx,
-                      V2_QPC_BYTE_168_IRRL_IDX_LSB_M,
-                      V2_QPC_BYTE_168_IRRL_IDX_LSB_S, 0);
-
        roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
                       V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 4);
-       roce_set_field(qpc_mask->byte_172_sq_psn,
-                      V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
-                      V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 0);
-
-       roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S,
-                    0);
 
        roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
-       roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0);
-
-       roce_set_field(qpc_mask->byte_176_msg_pktn,
-                      V2_QPC_BYTE_176_MSG_USE_PKTN_M,
-                      V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0);
-       roce_set_field(qpc_mask->byte_176_msg_pktn,
-                      V2_QPC_BYTE_176_IRRL_HEAD_PRE_M,
-                      V2_QPC_BYTE_176_IRRL_HEAD_PRE_S, 0);
-
-       roce_set_field(qpc_mask->byte_184_irrl_idx,
-                      V2_QPC_BYTE_184_IRRL_IDX_MSB_M,
-                      V2_QPC_BYTE_184_IRRL_IDX_MSB_S, 0);
-
-       qpc_mask->cur_sge_offset = 0;
-
-       roce_set_field(qpc_mask->byte_192_ext_sge,
-                      V2_QPC_BYTE_192_CUR_SGE_IDX_M,
-                      V2_QPC_BYTE_192_CUR_SGE_IDX_S, 0);
-       roce_set_field(qpc_mask->byte_192_ext_sge,
-                      V2_QPC_BYTE_192_EXT_SGE_NUM_LEFT_M,
-                      V2_QPC_BYTE_192_EXT_SGE_NUM_LEFT_S, 0);
-
-       roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_IRRL_HEAD_M,
-                      V2_QPC_BYTE_196_IRRL_HEAD_S, 0);
-
-       roce_set_field(qpc_mask->byte_200_sq_max, V2_QPC_BYTE_200_SQ_MAX_IDX_M,
-                      V2_QPC_BYTE_200_SQ_MAX_IDX_S, 0);
-       roce_set_field(qpc_mask->byte_200_sq_max,
-                      V2_QPC_BYTE_200_LCL_OPERATED_CNT_M,
-                      V2_QPC_BYTE_200_LCL_OPERATED_CNT_S, 0);
-
-       roce_set_bit(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_PKT_RNR_FLG_S, 0);
-       roce_set_bit(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_PKT_RTY_FLG_S, 0);
-
-       roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_CHECK_FLG_M,
-                      V2_QPC_BYTE_212_CHECK_FLG_S, 0);
-
-       qpc_mask->sq_timer = 0;
-
-       roce_set_field(qpc_mask->byte_220_retry_psn_msn,
-                      V2_QPC_BYTE_220_RETRY_MSG_MSN_M,
-                      V2_QPC_BYTE_220_RETRY_MSG_MSN_S, 0);
-       roce_set_field(qpc_mask->byte_232_irrl_sge,
-                      V2_QPC_BYTE_232_IRRL_SGE_IDX_M,
-                      V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0);
-
-       roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_SO_LP_VLD_S,
-                    0);
-       roce_set_bit(qpc_mask->byte_232_irrl_sge,
-                    V2_QPC_BYTE_232_FENCE_LP_VLD_S, 0);
-       roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_IRRL_LP_VLD_S,
-                    0);
-
-       qpc_mask->irrl_cur_sge_offset = 0;
-
-       roce_set_field(qpc_mask->byte_240_irrl_tail,
-                      V2_QPC_BYTE_240_IRRL_TAIL_REAL_M,
-                      V2_QPC_BYTE_240_IRRL_TAIL_REAL_S, 0);
-       roce_set_field(qpc_mask->byte_240_irrl_tail,
-                      V2_QPC_BYTE_240_IRRL_TAIL_RD_M,
-                      V2_QPC_BYTE_240_IRRL_TAIL_RD_S, 0);
-       roce_set_field(qpc_mask->byte_240_irrl_tail,
-                      V2_QPC_BYTE_240_RX_ACK_MSN_M,
-                      V2_QPC_BYTE_240_RX_ACK_MSN_S, 0);
-
-       roce_set_field(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_IRRL_PSN_M,
-                      V2_QPC_BYTE_248_IRRL_PSN_S, 0);
-       roce_set_bit(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_ACK_PSN_ERR_S,
-                    0);
-       roce_set_field(qpc_mask->byte_248_ack_psn,
-                      V2_QPC_BYTE_248_ACK_LAST_OPTYPE_M,
-                      V2_QPC_BYTE_248_ACK_LAST_OPTYPE_S, 0);
-       roce_set_bit(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_IRRL_PSN_VLD_S,
-                    0);
-       roce_set_bit(qpc_mask->byte_248_ack_psn,
-                    V2_QPC_BYTE_248_RNR_RETRY_FLAG_S, 0);
-       roce_set_bit(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_CQ_ERR_IND_S,
-                    0);
 
        hr_qp->access_flags = attr->qp_access_flags;
        roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
                       V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn);
-       roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
-                      V2_QPC_BYTE_252_TX_CQN_S, 0);
-
-       roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_ERR_TYPE_M,
-                      V2_QPC_BYTE_252_ERR_TYPE_S, 0);
-
-       roce_set_field(qpc_mask->byte_256_sqflush_rqcqe,
-                      V2_QPC_BYTE_256_RQ_CQE_IDX_M,
-                      V2_QPC_BYTE_256_RQ_CQE_IDX_S, 0);
-       roce_set_field(qpc_mask->byte_256_sqflush_rqcqe,
-                      V2_QPC_BYTE_256_SQ_FLUSH_IDX_M,
-                      V2_QPC_BYTE_256_SQ_FLUSH_IDX_S, 0);
 }
 
 static void modify_qp_init_to_init(struct ib_qp *ibqp,
@@ -3987,21 +3712,22 @@ static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev,
                                   struct hns_roce_qp *hr_qp, int mtt_cnt,
                                   u32 page_size)
 {
-       struct device *dev = hr_dev->dev;
+       struct ib_device *ibdev = &hr_dev->ib_dev;
 
        if (hr_qp->rq.wqe_cnt < 1)
                return true;
 
        if (mtt_cnt < 1) {
-               dev_err(dev, "qp(0x%lx) rqwqe buf ba find failed\n",
-                       hr_qp->qpn);
+               ibdev_err(ibdev, "failed to find RQWQE buf ba of QP(0x%lx)\n",
+                         hr_qp->qpn);
                return false;
        }
 
        if (mtt_cnt < MTT_MIN_COUNT &&
                (hr_qp->rq.offset + page_size) < hr_qp->buff_size) {
-               dev_err(dev, "qp(0x%lx) next rqwqe buf ba find failed\n",
-                       hr_qp->qpn);
+               ibdev_err(ibdev,
+                         "failed to find next RQWQE buf ba of QP(0x%lx)\n",
+                         hr_qp->qpn);
                return false;
        }
 
@@ -4016,7 +3742,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
        const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
-       struct device *dev = hr_dev->dev;
+       struct ib_device *ibdev = &hr_dev->ib_dev;
        u64 mtts[MTT_MIN_COUNT] = { 0 };
        dma_addr_t dma_handle_3;
        dma_addr_t dma_handle_2;
@@ -4043,7 +3769,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
        mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table,
                                     hr_qp->qpn, &dma_handle_2);
        if (!mtts_2) {
-               dev_err(dev, "qp irrl_table find failed\n");
+               ibdev_err(ibdev, "failed to find QP irrl_table\n");
                return -EINVAL;
        }
 
@@ -4051,12 +3777,13 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
        mtts_3 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.trrl_table,
                                     hr_qp->qpn, &dma_handle_3);
        if (!mtts_3) {
-               dev_err(dev, "qp trrl_table find failed\n");
+               ibdev_err(ibdev, "failed to find QP trrl_table\n");
                return -EINVAL;
        }
 
        if (attr_mask & IB_QP_ALT_PATH) {
-               dev_err(dev, "INIT2RTR attr_mask (0x%x) error\n", attr_mask);
+               ibdev_err(ibdev, "INIT2RTR attr_mask (0x%x) error\n",
+                         attr_mask);
                return -EINVAL;
        }
 
@@ -4201,7 +3928,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
 
        /* mtu*(2^LP_PKTN_INI) should not bigger than 1 message length 64kb */
        roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
-                      V2_QPC_BYTE_56_LP_PKTN_INI_S, 4);
+                      V2_QPC_BYTE_56_LP_PKTN_INI_S, 0);
        roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
                       V2_QPC_BYTE_56_LP_PKTN_INI_S, 0);
 
@@ -4259,7 +3986,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
-       struct device *dev = hr_dev->dev;
+       struct ib_device *ibdev = &hr_dev->ib_dev;
        u64 sge_cur_blk = 0;
        u64 sq_cur_blk = 0;
        u32 page_size;
@@ -4268,7 +3995,8 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
        /* Search qp buf's mtts */
        count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL);
        if (count < 1) {
-               dev_err(dev, "qp(0x%lx) buf pa find failed\n", hr_qp->qpn);
+               ibdev_err(ibdev, "failed to find buf pa of QP(0x%lx)\n",
+                         hr_qp->qpn);
                return -EINVAL;
        }
 
@@ -4278,16 +4006,15 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
                                          hr_qp->sge.offset / page_size,
                                          &sge_cur_blk, 1, NULL);
                if (count < 1) {
-                       dev_err(dev, "qp(0x%lx) sge pa find failed\n",
-                               hr_qp->qpn);
+                       ibdev_err(ibdev, "failed to find sge pa of QP(0x%lx)\n",
+                                 hr_qp->qpn);
                        return -EINVAL;
                }
        }
 
        /* Not support alternate path and path migration */
-       if ((attr_mask & IB_QP_ALT_PATH) ||
-           (attr_mask & IB_QP_PATH_MIG_STATE)) {
-               dev_err(dev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask);
+       if (attr_mask & (IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE)) {
+               ibdev_err(ibdev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask);
                return -EINVAL;
        }
 
@@ -4405,6 +4132,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
        const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+       struct ib_device *ibdev = &hr_dev->ib_dev;
        const struct ib_gid_attr *gid_attr = NULL;
        int is_roce_protocol;
        u16 vlan_id = 0xffff;
@@ -4446,13 +4174,13 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
                       V2_QPC_BYTE_24_VLAN_ID_S, 0);
 
        if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
-               dev_err(hr_dev->dev, "sgid_index(%u) too large. max is %d\n",
-                       grh->sgid_index, hr_dev->caps.gid_table_len[hr_port]);
+               ibdev_err(ibdev, "sgid_index(%u) too large. max is %d\n",
+                         grh->sgid_index, hr_dev->caps.gid_table_len[hr_port]);
                return -EINVAL;
        }
 
        if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
-               dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n");
+               ibdev_err(ibdev, "ah attr is not RDMA roce type\n");
                return -EINVAL;
        }
 
@@ -4475,7 +4203,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
        roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
                       V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
 
-       if (hr_dev->pci_dev->revision == 0x21 && is_udp)
+       if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B && is_udp)
                roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
                               V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2);
        else
@@ -4530,7 +4258,7 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
                /* Nothing */
                ;
        } else {
-               dev_err(hr_dev->dev, "Illegal state for QP!\n");
+               ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n");
                ret = -EINVAL;
                goto out;
        }
@@ -4565,8 +4293,8 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
                                       V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S,
                                       0);
                } else {
-                       dev_warn(hr_dev->dev,
-                                "Local ACK timeout shall be 0 to 30.\n");
+                       ibdev_warn(&hr_dev->ib_dev,
+                                  "Local ACK timeout shall be 0 to 30.\n");
                }
        }
 
@@ -4734,7 +4462,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
        struct hns_roce_v2_qp_context ctx[2];
        struct hns_roce_v2_qp_context *context = ctx;
        struct hns_roce_v2_qp_context *qpc_mask = ctx + 1;
-       struct device *dev = hr_dev->dev;
+       struct ib_device *ibdev = &hr_dev->ib_dev;
+       unsigned long sq_flag = 0;
+       unsigned long rq_flag = 0;
        int ret;
 
        /*
@@ -4752,6 +4482,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 
        /* When QP state is err, SQ and RQ WQE should be flushed */
        if (new_state == IB_QPS_ERR) {
+               spin_lock_irqsave(&hr_qp->sq.lock, sq_flag);
+               hr_qp->state = IB_QPS_ERR;
                roce_set_field(context->byte_160_sq_ci_pi,
                               V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
                               V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
@@ -4759,8 +4491,10 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
                roce_set_field(qpc_mask->byte_160_sq_ci_pi,
                               V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
                               V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
+               spin_unlock_irqrestore(&hr_qp->sq.lock, sq_flag);
 
                if (!ibqp->srq) {
+                       spin_lock_irqsave(&hr_qp->rq.lock, rq_flag);
                        roce_set_field(context->byte_84_rq_ci_pi,
                               V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
                               V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
@@ -4768,6 +4502,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
                        roce_set_field(qpc_mask->byte_84_rq_ci_pi,
                               V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
                               V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
+                       spin_unlock_irqrestore(&hr_qp->rq.lock, rq_flag);
                }
        }
 
@@ -4791,7 +4526,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
        /* SW pass context to HW */
        ret = hns_roce_v2_qp_modify(hr_dev, ctx, hr_qp);
        if (ret) {
-               dev_err(dev, "hns_roce_qp_modify failed(%d)\n", ret);
+               ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret);
                goto out;
        }
 
@@ -4848,10 +4583,8 @@ static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev,
        ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
                                HNS_ROCE_CMD_QUERY_QPC,
                                HNS_ROCE_CMD_TIMEOUT_MSECS);
-       if (ret) {
-               dev_err(hr_dev->dev, "QUERY QP cmd process error\n");
+       if (ret)
                goto out;
-       }
 
        memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
 
@@ -4867,7 +4600,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
        struct hns_roce_v2_qp_context context = {};
-       struct device *dev = hr_dev->dev;
+       struct ib_device *ibdev = &hr_dev->ib_dev;
        int tmp_qp_state;
        int state;
        int ret;
@@ -4885,7 +4618,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 
        ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context);
        if (ret) {
-               dev_err(dev, "query qpc error\n");
+               ibdev_err(ibdev, "failed to query QPC, ret = %d\n", ret);
                ret = -EINVAL;
                goto out;
        }
@@ -4894,7 +4627,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
                               V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S);
        tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
        if (tmp_qp_state == -1) {
-               dev_err(dev, "Illegal ib_qp_state\n");
+               ibdev_err(ibdev, "Illegal ib_qp_state\n");
                ret = -EINVAL;
                goto out;
        }
@@ -4992,8 +4725,8 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
                                         struct hns_roce_qp *hr_qp,
                                         struct ib_udata *udata)
 {
-       struct hns_roce_cq *send_cq, *recv_cq;
        struct ib_device *ibdev = &hr_dev->ib_dev;
+       struct hns_roce_cq *send_cq, *recv_cq;
        unsigned long flags;
        int ret = 0;
 
@@ -5002,7 +4735,9 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
                ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
                                            hr_qp->state, IB_QPS_RESET);
                if (ret)
-                       ibdev_err(ibdev, "modify QP to Reset failed.\n");
+                       ibdev_err(ibdev,
+                                 "failed to modify QP to RST, ret = %d\n",
+                                 ret);
        }
 
        send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
@@ -5011,10 +4746,6 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
        spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
        hns_roce_lock_cqs(send_cq, recv_cq);
 
-       list_del(&hr_qp->node);
-       list_del(&hr_qp->sq_node);
-       list_del(&hr_qp->rq_node);
-
        if (!udata) {
                if (recv_cq)
                        __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn,
@@ -5032,43 +4763,6 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
        hns_roce_unlock_cqs(send_cq, recv_cq);
        spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
 
-       hns_roce_qp_free(hr_dev, hr_qp);
-
-       /* Not special_QP, free their QPN */
-       if ((hr_qp->ibqp.qp_type == IB_QPT_RC) ||
-           (hr_qp->ibqp.qp_type == IB_QPT_UC) ||
-           (hr_qp->ibqp.qp_type == IB_QPT_UD))
-               hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
-
-       hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr);
-
-       if (udata) {
-               struct hns_roce_ucontext *context =
-                       rdma_udata_to_drv_context(
-                               udata,
-                               struct hns_roce_ucontext,
-                               ibucontext);
-
-               if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1))
-                       hns_roce_db_unmap_user(context, &hr_qp->sdb);
-
-               if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1))
-                       hns_roce_db_unmap_user(context, &hr_qp->rdb);
-       } else {
-               kfree(hr_qp->sq.wrid);
-               kfree(hr_qp->rq.wrid);
-               hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
-               if (hr_qp->rq.wqe_cnt)
-                       hns_roce_free_db(hr_dev, &hr_qp->rdb);
-       }
-       ib_umem_release(hr_qp->umem);
-
-       if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
-            hr_qp->rq.wqe_cnt) {
-               kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
-               kfree(hr_qp->rq_inl_buf.wqe_list);
-       }
-
        return ret;
 }
 
@@ -5080,17 +4774,19 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 
        ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
        if (ret)
-               ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n",
+               ibdev_err(&hr_dev->ib_dev,
+                         "failed to destroy QP 0x%06lx, ret = %d\n",
                          hr_qp->qpn, ret);
 
-       kfree(hr_qp);
+       hns_roce_qp_destroy(hr_dev, hr_qp, udata);
 
        return 0;
 }
 
 static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
-                                               struct hns_roce_qp *hr_qp)
+                                           struct hns_roce_qp *hr_qp)
 {
+       struct ib_device *ibdev = &hr_dev->ib_dev;
        struct hns_roce_sccc_clr_done *resp;
        struct hns_roce_sccc_clr *clr;
        struct hns_roce_cmq_desc desc;
@@ -5102,7 +4798,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
        hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false);
        ret =  hns_roce_cmq_send(hr_dev, &desc, 1);
        if (ret) {
-               dev_err(hr_dev->dev, "Reset SCC ctx  failed(%d)\n", ret);
+               ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d\n", ret);
                goto out;
        }
 
@@ -5112,7 +4808,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
        clr->qpn = cpu_to_le32(hr_qp->qpn);
        ret =  hns_roce_cmq_send(hr_dev, &desc, 1);
        if (ret) {
-               dev_err(hr_dev->dev, "Clear SCC ctx failed(%d)\n", ret);
+               ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d\n", ret);
                goto out;
        }
 
@@ -5123,7 +4819,8 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
                                              HNS_ROCE_OPC_QUERY_SCCC, true);
                ret = hns_roce_cmq_send(hr_dev, &desc, 1);
                if (ret) {
-                       dev_err(hr_dev->dev, "Query clr cmq failed(%d)\n", ret);
+                       ibdev_err(ibdev, "failed to query clr cmq, ret = %d\n",
+                                 ret);
                        goto out;
                }
 
@@ -5133,7 +4830,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
                msleep(20);
        }
 
-       dev_err(hr_dev->dev, "Query SCC clr done flag overtime.\n");
+       ibdev_err(ibdev, "Query SCC clr done flag overtime.\n");
        ret = -ETIMEDOUT;
 
 out:
@@ -5177,99 +4874,65 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
                                HNS_ROCE_CMD_TIMEOUT_MSECS);
        hns_roce_free_cmd_mailbox(hr_dev, mailbox);
        if (ret)
-               dev_err(hr_dev->dev, "MODIFY CQ Failed to cmd mailbox.\n");
+               ibdev_err(&hr_dev->ib_dev,
+                         "failed to process cmd when modifying CQ, ret = %d\n",
+                         ret);
 
        return ret;
 }
 
-static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn)
-{
-       struct hns_roce_qp *hr_qp;
-       struct ib_qp_attr attr;
-       int attr_mask;
-       int ret;
-
-       hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
-       if (!hr_qp) {
-               dev_warn(hr_dev->dev, "no hr_qp can be found!\n");
-               return;
-       }
-
-       if (hr_qp->ibqp.uobject) {
-               if (hr_qp->sdb_en == 1) {
-                       hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
-                       if (hr_qp->rdb_en == 1)
-                               hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
-               } else {
-                       dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n");
-                       return;
-               }
-       }
-
-       attr_mask = IB_QP_STATE;
-       attr.qp_state = IB_QPS_ERR;
-       ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, attr_mask,
-                                   hr_qp->state, IB_QPS_ERR);
-       if (ret)
-               dev_err(hr_dev->dev, "failed to modify qp %d to err state.\n",
-                       qpn);
-}
-
 static void hns_roce_irq_work_handle(struct work_struct *work)
 {
        struct hns_roce_work *irq_work =
                                container_of(work, struct hns_roce_work, work);
-       struct device *dev = irq_work->hr_dev->dev;
+       struct ib_device *ibdev = &irq_work->hr_dev->ib_dev;
        u32 qpn = irq_work->qpn;
        u32 cqn = irq_work->cqn;
 
        switch (irq_work->event_type) {
        case HNS_ROCE_EVENT_TYPE_PATH_MIG:
-               dev_info(dev, "Path migrated succeeded.\n");
+               ibdev_info(ibdev, "Path migrated succeeded.\n");
                break;
        case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
-               dev_warn(dev, "Path migration failed.\n");
+               ibdev_warn(ibdev, "Path migration failed.\n");
                break;
        case HNS_ROCE_EVENT_TYPE_COMM_EST:
                break;
        case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
-               dev_warn(dev, "Send queue drained.\n");
+               ibdev_warn(ibdev, "Send queue drained.\n");
                break;
        case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
-               dev_err(dev, "Local work queue 0x%x catas error, sub_type:%d\n",
-                       qpn, irq_work->sub_type);
-               hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+               ibdev_err(ibdev, "Local work queue 0x%x catast error, sub_event type is: %d\n",
+                         qpn, irq_work->sub_type);
                break;
        case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
-               dev_err(dev, "Invalid request local work queue 0x%x error.\n",
-                       qpn);
-               hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+               ibdev_err(ibdev, "Invalid request local work queue 0x%x error.\n",
+                         qpn);
                break;
        case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
-               dev_err(dev, "Local access violation work queue 0x%x error, sub_type:%d\n",
-                       qpn, irq_work->sub_type);
-               hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+               ibdev_err(ibdev, "Local access violation work queue 0x%x error, sub_event type is: %d\n",
+                         qpn, irq_work->sub_type);
                break;
        case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
-               dev_warn(dev, "SRQ limit reach.\n");
+               ibdev_warn(ibdev, "SRQ limit reach.\n");
                break;
        case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
-               dev_warn(dev, "SRQ last wqe reach.\n");
+               ibdev_warn(ibdev, "SRQ last wqe reach.\n");
                break;
        case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
-               dev_err(dev, "SRQ catas error.\n");
+               ibdev_err(ibdev, "SRQ catas error.\n");
                break;
        case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
-               dev_err(dev, "CQ 0x%x access err.\n", cqn);
+               ibdev_err(ibdev, "CQ 0x%x access err.\n", cqn);
                break;
        case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
-               dev_warn(dev, "CQ 0x%x overflow\n", cqn);
+               ibdev_warn(ibdev, "CQ 0x%x overflow\n", cqn);
                break;
        case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
-               dev_warn(dev, "DB overflow.\n");
+               ibdev_warn(ibdev, "DB overflow.\n");
                break;
        case HNS_ROCE_EVENT_TYPE_FLR:
-               dev_warn(dev, "Function level reset.\n");
+               ibdev_warn(ibdev, "Function level reset.\n");
                break;
        default:
                break;
@@ -5326,44 +4989,24 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
        hns_roce_write64(hr_dev, doorbell, eq->doorbell);
 }
 
-static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
+static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset)
 {
        u32 buf_chk_sz;
-       unsigned long off;
 
        buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
-       off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
-
-       return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) +
-               off % buf_chk_sz);
-}
-
-static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry)
-{
-       u32 buf_chk_sz;
-       unsigned long off;
-
-       buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
-
-       off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
-
-       if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
-               return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) +
-                       off % buf_chk_sz);
+       if (eq->buf.nbufs == 1)
+               return eq->buf.direct.buf + offset % buf_chk_sz;
        else
-               return (struct hns_roce_aeqe *)((u8 *)
-                       (eq->buf[off / buf_chk_sz]) + off % buf_chk_sz);
+               return eq->buf.page_list[offset / buf_chk_sz].buf +
+                      offset % buf_chk_sz;
 }
 
 static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
 {
        struct hns_roce_aeqe *aeqe;
 
-       if (!eq->hop_num)
-               aeqe = get_aeqe_v2(eq, eq->cons_index);
-       else
-               aeqe = mhop_get_aeqe(eq, eq->cons_index);
-
+       aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
+                          HNS_ROCE_AEQ_ENTRY_SIZE);
        return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
                !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
 }
@@ -5456,44 +5099,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
        return aeqe_found;
 }
 
-static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry)
-{
-       u32 buf_chk_sz;
-       unsigned long off;
-
-       buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
-       off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
-
-       return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) +
-               off % buf_chk_sz);
-}
-
-static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry)
-{
-       u32 buf_chk_sz;
-       unsigned long off;
-
-       buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
-
-       off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
-
-       if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
-               return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) +
-                       off % buf_chk_sz);
-       else
-               return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off /
-                       buf_chk_sz]) + off % buf_chk_sz);
-}
-
 static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
 {
        struct hns_roce_ceqe *ceqe;
 
-       if (!eq->hop_num)
-               ceqe = get_ceqe_v2(eq, eq->cons_index);
-       else
-               ceqe = mhop_get_ceqe(eq, eq->cons_index);
-
+       ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
+                          HNS_ROCE_CEQ_ENTRY_SIZE);
        return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
                (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
 }
@@ -5501,7 +5112,6 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
 static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
                               struct hns_roce_eq *eq)
 {
-       struct device *dev = hr_dev->dev;
        struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
        int ceqe_found = 0;
        u32 cqn;
@@ -5520,10 +5130,8 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
                ++eq->cons_index;
                ceqe_found = 1;
 
-               if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1)) {
-                       dev_warn(dev, "cons_index overflow, set back to 0.\n");
+               if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1))
                        eq->cons_index = 0;
-               }
 
                ceqe = next_ceqe_sw_v2(eq);
        }
@@ -5653,90 +5261,11 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
                dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
 }
 
-static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
-                                 struct hns_roce_eq *eq)
-{
-       struct device *dev = hr_dev->dev;
-       u64 idx;
-       u64 size;
-       u32 buf_chk_sz;
-       u32 bt_chk_sz;
-       u32 mhop_num;
-       int eqe_alloc;
-       int i = 0;
-       int j = 0;
-
-       mhop_num = hr_dev->caps.eqe_hop_num;
-       buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
-       bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
-
-       if (mhop_num == HNS_ROCE_HOP_NUM_0) {
-               dma_free_coherent(dev, (unsigned int)(eq->entries *
-                                 eq->eqe_size), eq->bt_l0, eq->l0_dma);
-               return;
-       }
-
-       dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
-       if (mhop_num == 1) {
-               for (i = 0; i < eq->l0_last_num; i++) {
-                       if (i == eq->l0_last_num - 1) {
-                               eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
-                               size = (eq->entries - eqe_alloc) * eq->eqe_size;
-                               dma_free_coherent(dev, size, eq->buf[i],
-                                                 eq->buf_dma[i]);
-                               break;
-                       }
-                       dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
-                                         eq->buf_dma[i]);
-               }
-       } else if (mhop_num == 2) {
-               for (i = 0; i < eq->l0_last_num; i++) {
-                       dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
-                                         eq->l1_dma[i]);
-
-                       for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
-                               idx = i * (bt_chk_sz / BA_BYTE_LEN) + j;
-                               if ((i == eq->l0_last_num - 1)
-                                    && j == eq->l1_last_num - 1) {
-                                       eqe_alloc = (buf_chk_sz / eq->eqe_size)
-                                                   * idx;
-                                       size = (eq->entries - eqe_alloc)
-                                               * eq->eqe_size;
-                                       dma_free_coherent(dev, size,
-                                                         eq->buf[idx],
-                                                         eq->buf_dma[idx]);
-                                       break;
-                               }
-                               dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
-                                                 eq->buf_dma[idx]);
-                       }
-               }
-       }
-       kfree(eq->buf_dma);
-       kfree(eq->buf);
-       kfree(eq->l1_dma);
-       kfree(eq->bt_l1);
-       eq->buf_dma = NULL;
-       eq->buf = NULL;
-       eq->l1_dma = NULL;
-       eq->bt_l1 = NULL;
-}
-
-static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
-                               struct hns_roce_eq *eq)
+static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
 {
-       u32 buf_chk_sz;
-
-       buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
-
-       if (hr_dev->caps.eqe_hop_num) {
-               hns_roce_mhop_free_eq(hr_dev, eq);
-               return;
-       }
-
-       dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf,
-                         eq->buf_list->map);
-       kfree(eq->buf_list);
+       if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0)
+               hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
+       hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf);
 }
 
 static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
@@ -5744,6 +5273,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
                                void *mb_buf)
 {
        struct hns_roce_eq_context *eqc;
+       u64 ba[MTT_MIN_COUNT] = { 0 };
+       int count;
 
        eqc = mb_buf;
        memset(eqc, 0, sizeof(struct hns_roce_eq_context));
@@ -5759,10 +5290,23 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
        eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
        eq->shift = ilog2((unsigned int)eq->entries);
 
-       if (!eq->hop_num)
-               eq->eqe_ba = eq->buf_list->map;
-       else
-               eq->eqe_ba = eq->l0_dma;
+       /* if not muti-hop, eqe buffer only use one trunk */
+       if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) {
+               eq->eqe_ba = eq->buf.direct.map;
+               eq->cur_eqe_ba = eq->eqe_ba;
+               if (eq->buf.npages > 1)
+                       eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz);
+               else
+                       eq->nxt_eqe_ba = eq->eqe_ba;
+       } else {
+               count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba,
+                                         MTT_MIN_COUNT, &eq->eqe_ba);
+               eq->cur_eqe_ba = ba[0];
+               if (count > 1)
+                       eq->nxt_eqe_ba = ba[1];
+               else
+                       eq->nxt_eqe_ba = ba[0];
+       }
 
        /* set eqc state */
        roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S,
@@ -5860,220 +5404,97 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
                       HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
 }
 
-static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
-                                 struct hns_roce_eq *eq)
+static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
+                     u32 page_shift)
 {
-       struct device *dev = hr_dev->dev;
-       int eq_alloc_done = 0;
-       int eq_buf_cnt = 0;
-       int eqe_alloc;
-       u32 buf_chk_sz;
-       u32 bt_chk_sz;
-       u32 mhop_num;
-       u64 size;
-       u64 idx;
+       struct hns_roce_buf_region region = {};
+       dma_addr_t *buf_list = NULL;
        int ba_num;
-       int bt_num;
-       int record_i;
-       int record_j;
-       int i = 0;
-       int j = 0;
-
-       mhop_num = hr_dev->caps.eqe_hop_num;
-       buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
-       bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
+       int ret;
 
        ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size),
-                             buf_chk_sz);
-       bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN);
-
-       if (mhop_num == HNS_ROCE_HOP_NUM_0) {
-               if (eq->entries > buf_chk_sz / eq->eqe_size) {
-                       dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
-                               eq->entries);
-                       return -EINVAL;
-               }
-               eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size,
-                                              &(eq->l0_dma), GFP_KERNEL);
-               if (!eq->bt_l0)
-                       return -ENOMEM;
+                             1 << page_shift);
+       hns_roce_init_buf_region(&region, hr_dev->caps.eqe_hop_num, 0, ba_num);
 
-               eq->cur_eqe_ba = eq->l0_dma;
-               eq->nxt_eqe_ba = 0;
+       /* alloc a tmp list for storing eq buf address */
+       ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
+       if (ret) {
+               dev_err(hr_dev->dev, "alloc eq buf_list error\n");
+               return ret;
+       }
 
-               return 0;
+       ba_num = hns_roce_get_kmem_bufs(hr_dev, buf_list, region.count,
+                                       region.offset, &eq->buf);
+       if (ba_num != region.count) {
+               dev_err(hr_dev->dev, "get eqe buf err,expect %d,ret %d.\n",
+                       region.count, ba_num);
+               ret = -ENOBUFS;
+               goto done;
        }
 
-       eq->buf_dma = kcalloc(ba_num, sizeof(*eq->buf_dma), GFP_KERNEL);
-       if (!eq->buf_dma)
-               return -ENOMEM;
-       eq->buf = kcalloc(ba_num, sizeof(*eq->buf), GFP_KERNEL);
-       if (!eq->buf)
-               goto err_kcalloc_buf;
-
-       if (mhop_num == 2) {
-               eq->l1_dma = kcalloc(bt_num, sizeof(*eq->l1_dma), GFP_KERNEL);
-               if (!eq->l1_dma)
-                       goto err_kcalloc_l1_dma;
-
-               eq->bt_l1 = kcalloc(bt_num, sizeof(*eq->bt_l1), GFP_KERNEL);
-               if (!eq->bt_l1)
-                       goto err_kcalloc_bt_l1;
-       }
-
-       /* alloc L0 BT */
-       eq->bt_l0 = dma_alloc_coherent(dev, bt_chk_sz, &eq->l0_dma, GFP_KERNEL);
-       if (!eq->bt_l0)
-               goto err_dma_alloc_l0;
-
-       if (mhop_num == 1) {
-               if (ba_num > (bt_chk_sz / BA_BYTE_LEN))
-                       dev_err(dev, "ba_num %d is too large for 1 hop\n",
-                               ba_num);
-
-               /* alloc buf */
-               for (i = 0; i < bt_chk_sz / BA_BYTE_LEN; i++) {
-                       if (eq_buf_cnt + 1 < ba_num) {
-                               size = buf_chk_sz;
-                       } else {
-                               eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
-                               size = (eq->entries - eqe_alloc) * eq->eqe_size;
-                       }
-                       eq->buf[i] = dma_alloc_coherent(dev, size,
-                                                       &(eq->buf_dma[i]),
-                                                       GFP_KERNEL);
-                       if (!eq->buf[i])
-                               goto err_dma_alloc_buf;
+       hns_roce_mtr_init(&eq->mtr, PAGE_SHIFT + hr_dev->caps.eqe_ba_pg_sz,
+                         page_shift);
+       ret = hns_roce_mtr_attach(hr_dev, &eq->mtr, &buf_list, &region, 1);
+       if (ret)
+               dev_err(hr_dev->dev, "mtr attach error for eqe\n");
 
-                       *(eq->bt_l0 + i) = eq->buf_dma[i];
+       goto done;
 
-                       eq_buf_cnt++;
-                       if (eq_buf_cnt >= ba_num)
-                               break;
-               }
-               eq->cur_eqe_ba = eq->buf_dma[0];
-               if (ba_num > 1)
-                       eq->nxt_eqe_ba = eq->buf_dma[1];
-
-       } else if (mhop_num == 2) {
-               /* alloc L1 BT and buf */
-               for (i = 0; i < bt_chk_sz / BA_BYTE_LEN; i++) {
-                       eq->bt_l1[i] = dma_alloc_coherent(dev, bt_chk_sz,
-                                                         &(eq->l1_dma[i]),
-                                                         GFP_KERNEL);
-                       if (!eq->bt_l1[i])
-                               goto err_dma_alloc_l1;
-                       *(eq->bt_l0 + i) = eq->l1_dma[i];
-
-                       for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
-                               idx = i * bt_chk_sz / BA_BYTE_LEN + j;
-                               if (eq_buf_cnt + 1 < ba_num) {
-                                       size = buf_chk_sz;
-                               } else {
-                                       eqe_alloc = (buf_chk_sz / eq->eqe_size)
-                                                   * idx;
-                                       size = (eq->entries - eqe_alloc)
-                                               * eq->eqe_size;
-                               }
-                               eq->buf[idx] = dma_alloc_coherent(dev, size,
-                                                                 &(eq->buf_dma[idx]),
-                                                                 GFP_KERNEL);
-                               if (!eq->buf[idx])
-                                       goto err_dma_alloc_buf;
-
-                               *(eq->bt_l1[i] + j) = eq->buf_dma[idx];
-
-                               eq_buf_cnt++;
-                               if (eq_buf_cnt >= ba_num) {
-                                       eq_alloc_done = 1;
-                                       break;
-                               }
-                       }
+       hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
+done:
+       hns_roce_free_buf_list(&buf_list, 1);
 
-                       if (eq_alloc_done)
-                               break;
-               }
-               eq->cur_eqe_ba = eq->buf_dma[0];
-               if (ba_num > 1)
-                       eq->nxt_eqe_ba = eq->buf_dma[1];
-       }
+       return ret;
+}
 
-       eq->l0_last_num = i + 1;
-       if (mhop_num == 2)
-               eq->l1_last_num = j + 1;
+static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
+{
+       struct hns_roce_buf *buf = &eq->buf;
+       bool is_mhop = false;
+       u32 page_shift;
+       u32 mhop_num;
+       u32 max_size;
+       int ret;
 
-       return 0;
+       page_shift = PAGE_SHIFT + hr_dev->caps.eqe_buf_pg_sz;
+       mhop_num = hr_dev->caps.eqe_hop_num;
+       if (!mhop_num) {
+               max_size = 1 << page_shift;
+               buf->size = max_size;
+       } else if (mhop_num == HNS_ROCE_HOP_NUM_0) {
+               max_size = eq->entries * eq->eqe_size;
+               buf->size = max_size;
+       } else {
+               max_size = 1 << page_shift;
+               buf->size = PAGE_ALIGN(eq->entries * eq->eqe_size);
+               is_mhop = true;
+       }
 
-err_dma_alloc_l1:
-       dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
-       eq->bt_l0 = NULL;
-       eq->l0_dma = 0;
-       for (i -= 1; i >= 0; i--) {
-               dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
-                                 eq->l1_dma[i]);
-
-               for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
-                       idx = i * bt_chk_sz / BA_BYTE_LEN + j;
-                       dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
-                                         eq->buf_dma[idx]);
-               }
+       ret = hns_roce_buf_alloc(hr_dev, buf->size, max_size, buf, page_shift);
+       if (ret) {
+               dev_err(hr_dev->dev, "alloc eq buf error\n");
+               return ret;
        }
-       goto err_dma_alloc_l0;
-
-err_dma_alloc_buf:
-       dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
-       eq->bt_l0 = NULL;
-       eq->l0_dma = 0;
-
-       if (mhop_num == 1)
-               for (i -= 1; i >= 0; i--)
-                       dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
-                                         eq->buf_dma[i]);
-       else if (mhop_num == 2) {
-               record_i = i;
-               record_j = j;
-               for (; i >= 0; i--) {
-                       dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
-                                         eq->l1_dma[i]);
-
-                       for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
-                               if (i == record_i && j >= record_j)
-                                       break;
-
-                               idx = i * bt_chk_sz / BA_BYTE_LEN + j;
-                               dma_free_coherent(dev, buf_chk_sz,
-                                                 eq->buf[idx],
-                                                 eq->buf_dma[idx]);
-                       }
+
+       if (is_mhop) {
+               ret = map_eq_buf(hr_dev, eq, page_shift);
+               if (ret) {
+                       dev_err(hr_dev->dev, "map roce buf error\n");
+                       goto err_alloc;
                }
        }
 
-err_dma_alloc_l0:
-       kfree(eq->bt_l1);
-       eq->bt_l1 = NULL;
-
-err_kcalloc_bt_l1:
-       kfree(eq->l1_dma);
-       eq->l1_dma = NULL;
-
-err_kcalloc_l1_dma:
-       kfree(eq->buf);
-       eq->buf = NULL;
-
-err_kcalloc_buf:
-       kfree(eq->buf_dma);
-       eq->buf_dma = NULL;
-
-       return -ENOMEM;
+       return 0;
+err_alloc:
+       hns_roce_buf_free(hr_dev, buf->size, buf);
+       return ret;
 }
 
 static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
                                 struct hns_roce_eq *eq,
                                 unsigned int eq_cmd)
 {
-       struct device *dev = hr_dev->dev;
        struct hns_roce_cmd_mailbox *mailbox;
-       u32 buf_chk_sz = 0;
        int ret;
 
        /* Allocate mailbox memory */
@@ -6081,38 +5502,17 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
        if (IS_ERR(mailbox))
                return PTR_ERR(mailbox);
 
-       if (!hr_dev->caps.eqe_hop_num) {
-               buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
-
-               eq->buf_list = kzalloc(sizeof(struct hns_roce_buf_list),
-                                      GFP_KERNEL);
-               if (!eq->buf_list) {
-                       ret = -ENOMEM;
-                       goto free_cmd_mbox;
-               }
-
-               eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz,
-                                                      &(eq->buf_list->map),
-                                                      GFP_KERNEL);
-               if (!eq->buf_list->buf) {
-                       ret = -ENOMEM;
-                       goto err_alloc_buf;
-               }
-
-       } else {
-               ret = hns_roce_mhop_alloc_eq(hr_dev, eq);
-               if (ret) {
-                       ret = -ENOMEM;
-                       goto free_cmd_mbox;
-               }
+       ret = alloc_eq_buf(hr_dev, eq);
+       if (ret) {
+               ret = -ENOMEM;
+               goto free_cmd_mbox;
        }
-
        hns_roce_config_eqc(hr_dev, eq, mailbox->buf);
 
        ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0,
                                eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS);
        if (ret) {
-               dev_err(dev, "[mailbox cmd] create eqc failed.\n");
+               dev_err(hr_dev->dev, "[mailbox cmd] create eqc failed.\n");
                goto err_cmd_mbox;
        }
 
@@ -6121,16 +5521,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
        return 0;
 
 err_cmd_mbox:
-       if (!hr_dev->caps.eqe_hop_num)
-               dma_free_coherent(dev, buf_chk_sz, eq->buf_list->buf,
-                                 eq->buf_list->map);
-       else {
-               hns_roce_mhop_free_eq(hr_dev, eq);
-               goto free_cmd_mbox;
-       }
-
-err_alloc_buf:
-       kfree(eq->buf_list);
+       free_eq_buf(hr_dev, eq);
 
 free_cmd_mbox:
        hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -6292,8 +5683,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
                goto err_request_irq_fail;
        }
 
-       hr_dev->irq_workq =
-               create_singlethread_workqueue("hns_roce_irq_workqueue");
+       hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0);
        if (!hr_dev->irq_workq) {
                dev_err(dev, "Create irq workqueue failed!\n");
                ret = -ENOMEM;
@@ -6310,7 +5700,7 @@ err_request_irq_fail:
 
 err_create_eq_fail:
        for (i -= 1; i >= 0; i--)
-               hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
+               free_eq_buf(hr_dev, &eq_table->eq[i]);
        kfree(eq_table->eq);
 
        return ret;
@@ -6332,7 +5722,7 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
        for (i = 0; i < eq_num; i++) {
                hns_roce_v2_destroy_eqc(hr_dev, i);
 
-               hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
+               free_eq_buf(hr_dev, &eq_table->eq[i]);
        }
 
        kfree(eq_table->eq);
@@ -6472,8 +5862,9 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
                                        HNS_ROCE_CMD_TIMEOUT_MSECS);
                hns_roce_free_cmd_mailbox(hr_dev, mailbox);
                if (ret) {
-                       dev_err(hr_dev->dev,
-                               "MODIFY SRQ Failed to cmd mailbox.\n");
+                       ibdev_err(&hr_dev->ib_dev,
+                                 "failed to process cmd when modifying SRQ, ret = %d\n",
+                                 ret);
                        return ret;
                }
        }
@@ -6499,7 +5890,9 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
                                HNS_ROCE_CMD_QUERY_SRQC,
                                HNS_ROCE_CMD_TIMEOUT_MSECS);
        if (ret) {
-               dev_err(hr_dev->dev, "QUERY SRQ cmd process error\n");
+               ibdev_err(&hr_dev->ib_dev,
+                         "failed to process cmd when querying SRQ, ret = %d\n",
+                         ret);
                goto out;
        }
 
index 2a117ff..82dd9f6 100644 (file)
 #define HNS_ROCE_V2_MAX_WQE_NUM                        0x8000
 #define        HNS_ROCE_V2_MAX_SRQ                     0x100000
 #define HNS_ROCE_V2_MAX_SRQ_WR                 0x8000
-#define HNS_ROCE_V2_MAX_SRQ_SGE                        0x100
+#define HNS_ROCE_V2_MAX_SRQ_SGE                        64
 #define HNS_ROCE_V2_MAX_CQ_NUM                 0x100000
 #define HNS_ROCE_V2_MAX_CQC_TIMER_NUM          0x100
 #define HNS_ROCE_V2_MAX_SRQ_NUM                        0x100000
 #define HNS_ROCE_V2_MAX_CQE_NUM                        0x400000
 #define HNS_ROCE_V2_MAX_SRQWQE_NUM             0x8000
-#define HNS_ROCE_V2_MAX_RQ_SGE_NUM             0x100
-#define HNS_ROCE_V2_MAX_SQ_SGE_NUM             0xff
-#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM            0x100
+#define HNS_ROCE_V2_MAX_RQ_SGE_NUM             64
+#define HNS_ROCE_V2_MAX_SQ_SGE_NUM             64
 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM         0x200000
 #define HNS_ROCE_V2_MAX_SQ_INLINE              0x20
 #define HNS_ROCE_V2_UAR_NUM                    256
@@ -163,7 +162,7 @@ enum {
 
 #define        GID_LEN_V2                              16
 
-#define HNS_ROCE_V2_CQE_QPN_MASK               0x3ffff
+#define HNS_ROCE_V2_CQE_QPN_MASK               0xfffff
 
 enum {
        HNS_ROCE_V2_WQE_OP_SEND                         = 0x0,
@@ -460,8 +459,8 @@ enum hns_roce_v2_qp_state {
        HNS_ROCE_QP_ST_INIT,
        HNS_ROCE_QP_ST_RTR,
        HNS_ROCE_QP_ST_RTS,
-       HNS_ROCE_QP_ST_SQER,
        HNS_ROCE_QP_ST_SQD,
+       HNS_ROCE_QP_ST_SQER,
        HNS_ROCE_QP_ST_ERR,
        HNS_ROCE_QP_ST_SQ_DRAINING,
        HNS_ROCE_QP_NUM_ST
@@ -1056,11 +1055,6 @@ struct hns_roce_v2_mpt_entry {
 #define V2_DB_PARAMETER_SL_S 16
 #define V2_DB_PARAMETER_SL_M GENMASK(18, 16)
 
-struct hns_roce_v2_cq_db {
-       __le32  byte_4;
-       __le32  parameter;
-};
-
 #define        V2_CQ_DB_BYTE_4_TAG_S 0
 #define V2_CQ_DB_BYTE_4_TAG_M GENMASK(23, 0)
 
index b9898e7..176f346 100644 (file)
@@ -243,7 +243,7 @@ int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
        /* Allocate MTT entry */
        ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg,
                                       mtt->mtt_type);
-       if (ret == -1)
+       if (ret)
                return -ENOMEM;
 
        return 0;
index 780c780..b10c50b 100644 (file)
@@ -60,14 +60,12 @@ void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev)
 int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 {
        struct ib_device *ib_dev = ibpd->device;
-       struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
-       struct device *dev = hr_dev->dev;
        struct hns_roce_pd *pd = to_hr_pd(ibpd);
        int ret;
 
        ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn);
        if (ret) {
-               dev_err(dev, "[alloc_pd]hns_roce_pd_alloc failed!\n");
+               ibdev_err(ib_dev, "failed to alloc pd, ret = %d\n", ret);
                return ret;
        }
 
@@ -76,7 +74,7 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 
                if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
                        hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn);
-                       dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n");
+                       ibdev_err(ib_dev, "failed to copy to udata\n");
                        return -EFAULT;
                }
        }
index 3257ad1..6317901 100644 (file)
 
 #define SQP_NUM                                (2 * HNS_ROCE_MAX_PORTS)
 
+static void flush_work_handle(struct work_struct *work)
+{
+       struct hns_roce_work *flush_work = container_of(work,
+                                       struct hns_roce_work, work);
+       struct hns_roce_qp *hr_qp = container_of(flush_work,
+                                       struct hns_roce_qp, flush_work);
+       struct device *dev = flush_work->hr_dev->dev;
+       struct ib_qp_attr attr;
+       int attr_mask;
+       int ret;
+
+       attr_mask = IB_QP_STATE;
+       attr.qp_state = IB_QPS_ERR;
+
+       if (test_and_clear_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) {
+               ret = hns_roce_modify_qp(&hr_qp->ibqp, &attr, attr_mask, NULL);
+               if (ret)
+                       dev_err(dev, "Modify QP to error state failed(%d) during CQE flush\n",
+                               ret);
+       }
+
+       /*
+        * make sure we signal QP destroy leg that flush QP was completed
+        * so that it can safely proceed ahead now and destroy QP
+        */
+       if (atomic_dec_and_test(&hr_qp->refcount))
+               complete(&hr_qp->free);
+}
+
+void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+       struct hns_roce_work *flush_work = &hr_qp->flush_work;
+
+       flush_work->hr_dev = hr_dev;
+       INIT_WORK(&flush_work->work, flush_work_handle);
+       atomic_inc(&hr_qp->refcount);
+       queue_work(hr_dev->irq_workq, &flush_work->work);
+}
+
 void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
 {
        struct device *dev = hr_dev->dev;
@@ -59,6 +98,15 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
                return;
        }
 
+       if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 &&
+           (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR ||
+            event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR ||
+            event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR)) {
+               qp->state = IB_QPS_ERR;
+               if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag))
+                       init_flush_work(hr_dev, qp);
+       }
+
        qp->event(qp, (enum hns_roce_event)event_type);
 
        if (atomic_dec_and_test(&qp->refcount))
@@ -108,15 +156,34 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
        }
 }
 
-static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt,
-                                    int align, unsigned long *base)
+static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
 {
-       struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+       unsigned long num = 0;
+       int ret;
 
-       return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align,
-                                          base) ?
-                      -ENOMEM :
-                      0;
+       if (hr_qp->ibqp.qp_type == IB_QPT_GSI) {
+               /* when hw version is v1, the sqpn is allocated */
+               if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
+                       num = HNS_ROCE_MAX_PORTS +
+                             hr_dev->iboe.phy_port[hr_qp->port];
+               else
+                       num = 1;
+
+               hr_qp->doorbell_qpn = 1;
+       } else {
+               ret = hns_roce_bitmap_alloc_range(&hr_dev->qp_table.bitmap,
+                                                 1, 1, &num);
+               if (ret) {
+                       ibdev_err(&hr_dev->ib_dev, "Failed to alloc bitmap\n");
+                       return -ENOMEM;
+               }
+
+               hr_qp->doorbell_qpn = (u32)num;
+       }
+
+       hr_qp->qpn = num;
+
+       return 0;
 }
 
 enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
@@ -139,50 +206,75 @@ enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
        }
 }
 
-static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
-                                struct hns_roce_qp *hr_qp)
+static void add_qp_to_list(struct hns_roce_dev *hr_dev,
+                          struct hns_roce_qp *hr_qp,
+                          struct ib_cq *send_cq, struct ib_cq *recv_cq)
+{
+       struct hns_roce_cq *hr_send_cq, *hr_recv_cq;
+       unsigned long flags;
+
+       hr_send_cq = send_cq ? to_hr_cq(send_cq) : NULL;
+       hr_recv_cq = recv_cq ? to_hr_cq(recv_cq) : NULL;
+
+       spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+       hns_roce_lock_cqs(hr_send_cq, hr_recv_cq);
+
+       list_add_tail(&hr_qp->node, &hr_dev->qp_list);
+       if (hr_send_cq)
+               list_add_tail(&hr_qp->sq_node, &hr_send_cq->sq_list);
+       if (hr_recv_cq)
+               list_add_tail(&hr_qp->rq_node, &hr_recv_cq->rq_list);
+
+       hns_roce_unlock_cqs(hr_send_cq, hr_recv_cq);
+       spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+}
+
+static int hns_roce_qp_store(struct hns_roce_dev *hr_dev,
+                            struct hns_roce_qp *hr_qp,
+                            struct ib_qp_init_attr *init_attr)
 {
        struct xarray *xa = &hr_dev->qp_table_xa;
        int ret;
 
-       if (!qpn)
+       if (!hr_qp->qpn)
                return -EINVAL;
 
-       hr_qp->qpn = qpn;
-       atomic_set(&hr_qp->refcount, 1);
-       init_completion(&hr_qp->free);
-
-       ret = xa_err(xa_store_irq(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1),
-                               hr_qp, GFP_KERNEL));
+       ret = xa_err(xa_store_irq(xa, hr_qp->qpn, hr_qp, GFP_KERNEL));
        if (ret)
-               dev_err(hr_dev->dev, "QPC xa_store failed\n");
+               dev_err(hr_dev->dev, "Failed to xa store for QPC\n");
+       else
+               /* add QP to device's QP list for softwc */
+               add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq,
+                              init_attr->recv_cq);
 
        return ret;
 }
 
-static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
-                            struct hns_roce_qp *hr_qp)
+static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
 {
        struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
        struct device *dev = hr_dev->dev;
        int ret;
 
-       if (!qpn)
+       if (!hr_qp->qpn)
                return -EINVAL;
 
-       hr_qp->qpn = qpn;
+       /* In v1 engine, GSI QP context is saved in the RoCE hw's register */
+       if (hr_qp->ibqp.qp_type == IB_QPT_GSI &&
+           hr_dev->hw_rev == HNS_ROCE_HW_VER1)
+               return 0;
 
        /* Alloc memory for QPC */
        ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn);
        if (ret) {
-               dev_err(dev, "QPC table get failed\n");
+               dev_err(dev, "Failed to get QPC table\n");
                goto err_out;
        }
 
        /* Alloc memory for IRRL */
        ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
        if (ret) {
-               dev_err(dev, "IRRL table get failed\n");
+               dev_err(dev, "Failed to get IRRL table\n");
                goto err_put_qp;
        }
 
@@ -191,7 +283,7 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
                ret = hns_roce_table_get(hr_dev, &qp_table->trrl_table,
                                         hr_qp->qpn);
                if (ret) {
-                       dev_err(dev, "TRRL table get failed\n");
+                       dev_err(dev, "Failed to get TRRL table\n");
                        goto err_put_irrl;
                }
        }
@@ -201,22 +293,13 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
                ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
                                         hr_qp->qpn);
                if (ret) {
-                       dev_err(dev, "SCC CTX table get failed\n");
+                       dev_err(dev, "Failed to get SCC CTX table\n");
                        goto err_put_trrl;
                }
        }
 
-       ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp);
-       if (ret)
-               goto err_put_sccc;
-
        return 0;
 
-err_put_sccc:
-       if (hr_dev->caps.sccc_entry_sz)
-               hns_roce_table_put(hr_dev, &qp_table->sccc_table,
-                                  hr_qp->qpn);
-
 err_put_trrl:
        if (hr_dev->caps.trrl_entry_sz)
                hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
@@ -236,88 +319,84 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
        struct xarray *xa = &hr_dev->qp_table_xa;
        unsigned long flags;
 
+       list_del(&hr_qp->node);
+       list_del(&hr_qp->sq_node);
+       list_del(&hr_qp->rq_node);
+
        xa_lock_irqsave(xa, flags);
        __xa_erase(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1));
        xa_unlock_irqrestore(xa, flags);
 }
 
-void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
 {
        struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
 
-       if (atomic_dec_and_test(&hr_qp->refcount))
-               complete(&hr_qp->free);
-       wait_for_completion(&hr_qp->free);
+       /* In v1 engine, GSI QP context is saved in the RoCE hw's register */
+       if (hr_qp->ibqp.qp_type == IB_QPT_GSI &&
+           hr_dev->hw_rev == HNS_ROCE_HW_VER1)
+               return;
 
-       if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) {
-               if (hr_dev->caps.trrl_entry_sz)
-                       hns_roce_table_put(hr_dev, &qp_table->trrl_table,
-                                          hr_qp->qpn);
-               hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
-       }
+       if (hr_dev->caps.trrl_entry_sz)
+               hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
+       hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
 }
 
-void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
-                              int cnt)
+static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
 {
        struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
 
-       if (base_qpn < hr_dev->caps.reserved_qps)
+       if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
+               return;
+
+       if (hr_qp->qpn < hr_dev->caps.reserved_qps)
                return;
 
-       hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR);
+       hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR);
 }
 
-static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
+static int set_rq_size(struct hns_roce_dev *hr_dev,
                                struct ib_qp_cap *cap, bool is_user, int has_rq,
                                struct hns_roce_qp *hr_qp)
 {
-       struct device *dev = hr_dev->dev;
        u32 max_cnt;
 
-       /* Check the validity of QP support capacity */
-       if (cap->max_recv_wr > hr_dev->caps.max_wqes ||
-           cap->max_recv_sge > hr_dev->caps.max_rq_sg) {
-               dev_err(dev, "RQ WR or sge error!max_recv_wr=%d max_recv_sge=%d\n",
-                       cap->max_recv_wr, cap->max_recv_sge);
-               return -EINVAL;
-       }
-
        /* If srq exist, set zero for relative number of rq */
        if (!has_rq) {
                hr_qp->rq.wqe_cnt = 0;
                hr_qp->rq.max_gs = 0;
                cap->max_recv_wr = 0;
                cap->max_recv_sge = 0;
-       } else {
-               if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
-                       dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n");
-                       return -EINVAL;
-               }
 
-               if (hr_dev->caps.min_wqes)
-                       max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes);
-               else
-                       max_cnt = cap->max_recv_wr;
+               return 0;
+       }
 
-               hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt);
+       /* Check the validity of QP support capacity */
+       if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes ||
+           cap->max_recv_sge > hr_dev->caps.max_rq_sg) {
+               ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n",
+                         cap->max_recv_wr, cap->max_recv_sge);
+               return -EINVAL;
+       }
 
-               if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) {
-                       dev_err(dev, "while setting rq size, rq.wqe_cnt too large\n");
-                       return -EINVAL;
-               }
+       max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes);
 
-               max_cnt = max(1U, cap->max_recv_sge);
-               hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt);
-               if (hr_dev->caps.max_rq_sg <= 2)
-                       hr_qp->rq.wqe_shift =
-                                       ilog2(hr_dev->caps.max_rq_desc_sz);
-               else
-                       hr_qp->rq.wqe_shift =
-                                       ilog2(hr_dev->caps.max_rq_desc_sz
-                                             * hr_qp->rq.max_gs);
+       hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt);
+       if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) {
+               ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n",
+                         cap->max_recv_wr);
+               return -EINVAL;
        }
 
+       max_cnt = max(1U, cap->max_recv_sge);
+       hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt);
+
+       if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
+               hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
+       else
+               hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
+                                           hr_qp->rq.max_gs);
+
        cap->max_recv_wr = hr_qp->rq.wqe_cnt;
        cap->max_recv_sge = hr_qp->rq.max_gs;
 
@@ -334,12 +413,12 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
        /* Sanity check SQ size before proceeding */
        if (ucmd->log_sq_stride > max_sq_stride ||
            ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
-               ibdev_err(&hr_dev->ib_dev, "check SQ size error!\n");
+               ibdev_err(&hr_dev->ib_dev, "Failed to check SQ stride size\n");
                return -EINVAL;
        }
 
        if (cap->max_send_sge > hr_dev->caps.max_sq_sg) {
-               ibdev_err(&hr_dev->ib_dev, "SQ sge error! max_send_sge=%d\n",
+               ibdev_err(&hr_dev->ib_dev, "Failed to check SQ SGE size %d\n",
                          cap->max_send_sge);
                return -EINVAL;
        }
@@ -347,10 +426,9 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
        return 0;
 }
 
-static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
-                                    struct ib_qp_cap *cap,
-                                    struct hns_roce_qp *hr_qp,
-                                    struct hns_roce_ib_create_qp *ucmd)
+static int set_user_sq_size(struct hns_roce_dev *hr_dev,
+                           struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp,
+                           struct hns_roce_ib_create_qp *ucmd)
 {
        u32 ex_sge_num;
        u32 page_size;
@@ -363,27 +441,28 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
 
        ret = check_sq_size_with_integrity(hr_dev, cap, ucmd);
        if (ret) {
-               ibdev_err(&hr_dev->ib_dev, "Sanity check sq size failed\n");
+               ibdev_err(&hr_dev->ib_dev, "Failed to check user SQ size limit\n");
                return ret;
        }
 
        hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
 
        max_cnt = max(1U, cap->max_send_sge);
-       if (hr_dev->caps.max_sq_sg <= 2)
+       if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
                hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt);
        else
                hr_qp->sq.max_gs = max_cnt;
 
-       if (hr_qp->sq.max_gs > 2)
+       if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
                hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
                                                        (hr_qp->sq.max_gs - 2));
 
-       if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
+       if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE &&
+           hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) {
                if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
-                       dev_err(hr_dev->dev,
-                               "The extended sge cnt error! sge_cnt=%d\n",
-                               hr_qp->sge.sge_cnt);
+                       ibdev_err(&hr_dev->ib_dev,
+                                 "Failed to check extended SGE size limit %d\n",
+                                 hr_qp->sge.sge_cnt);
                        return -EINVAL;
                }
        }
@@ -392,7 +471,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
        ex_sge_num = hr_qp->sge.sge_cnt;
 
        /* Get buf size, SQ and RQ  are aligned to page_szie */
-       if (hr_dev->caps.max_sq_sg <= 2) {
+       if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
                hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt <<
                                             hr_qp->rq.wqe_shift), PAGE_SIZE) +
                                   round_up((hr_qp->sq.wqe_cnt <<
@@ -492,30 +571,6 @@ static int split_wqe_buf_region(struct hns_roce_dev *hr_dev,
        return region_cnt;
 }
 
-static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev,
-                                 struct hns_roce_buf_region *regions,
-                                 int region_cnt)
-{
-       int bt_pg_shift;
-       int ba_num;
-       int ret;
-
-       bt_pg_shift = PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz;
-
-       /* all root ba entries must in one bt page */
-       do {
-               ba_num = (1 << bt_pg_shift) / BA_BYTE_LEN;
-               ret = hns_roce_hem_list_calc_root_ba(regions, region_cnt,
-                                                    ba_num);
-               if (ret <= ba_num)
-                       break;
-
-               bt_pg_shift++;
-       } while (ret > ba_num);
-
-       return bt_pg_shift - PAGE_SHIFT;
-}
-
 static int set_extend_sge_param(struct hns_roce_dev *hr_dev,
                                struct hns_roce_qp *hr_qp)
 {
@@ -528,13 +583,15 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev,
        }
 
        /* ud sqwqe's sge use extend sge */
-       if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) {
+       if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 &&
+           hr_qp->ibqp.qp_type == IB_QPT_GSI) {
                hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
                                     hr_qp->sq.max_gs);
                hr_qp->sge.sge_shift = 4;
        }
 
-       if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
+       if (hr_qp->sq.max_gs > 2 &&
+           hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) {
                if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
                        dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
                                hr_qp->sge.sge_cnt);
@@ -545,46 +602,43 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev,
        return 0;
 }
 
-static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
-                                      struct ib_qp_cap *cap,
-                                      struct hns_roce_qp *hr_qp)
+static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
+                             struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp)
 {
-       struct device *dev = hr_dev->dev;
        u32 page_size;
        u32 max_cnt;
        int size;
        int ret;
 
-       if (cap->max_send_wr  > hr_dev->caps.max_wqes  ||
+       if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes ||
            cap->max_send_sge > hr_dev->caps.max_sq_sg ||
            cap->max_inline_data > hr_dev->caps.max_sq_inline) {
-               dev_err(dev, "SQ WR or sge or inline data error!\n");
+               ibdev_err(&hr_dev->ib_dev,
+                         "SQ WR or sge or inline data error!\n");
                return -EINVAL;
        }
 
        hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
 
-       if (hr_dev->caps.min_wqes)
-               max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes);
-       else
-               max_cnt = cap->max_send_wr;
+       max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes);
 
        hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt);
        if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) {
-               dev_err(dev, "while setting kernel sq size, sq.wqe_cnt too large\n");
+               ibdev_err(&hr_dev->ib_dev,
+                         "while setting kernel sq size, sq.wqe_cnt too large\n");
                return -EINVAL;
        }
 
        /* Get data_seg numbers */
        max_cnt = max(1U, cap->max_send_sge);
-       if (hr_dev->caps.max_sq_sg <= 2)
+       if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
                hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt);
        else
                hr_qp->sq.max_gs = max_cnt;
 
        ret = set_extend_sge_param(hr_dev, hr_qp);
        if (ret) {
-               dev_err(dev, "set extend sge parameters fail\n");
+               ibdev_err(&hr_dev->ib_dev, "set extend sge parameters fail\n");
                return ret;
        }
 
@@ -593,7 +647,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
        hr_qp->sq.offset = 0;
        size = round_up(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size);
 
-       if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
+       if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && hr_qp->sge.sge_cnt) {
                hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
                                         (u32)hr_qp->sge.sge_cnt);
                hr_qp->sge.offset = size;
@@ -677,362 +731,449 @@ static void free_rq_inline_buf(struct hns_roce_qp *hr_qp)
        kfree(hr_qp->rq_inl_buf.wqe_list);
 }
 
-static void add_qp_to_list(struct hns_roce_dev *hr_dev,
-                          struct hns_roce_qp *hr_qp,
-                          struct ib_cq *send_cq, struct ib_cq *recv_cq)
-{
-       struct hns_roce_cq *hr_send_cq, *hr_recv_cq;
-       unsigned long flags;
-
-       hr_send_cq = send_cq ? to_hr_cq(send_cq) : NULL;
-       hr_recv_cq = recv_cq ? to_hr_cq(recv_cq) : NULL;
-
-       spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
-       hns_roce_lock_cqs(hr_send_cq, hr_recv_cq);
-
-       list_add_tail(&hr_qp->node, &hr_dev->qp_list);
-       if (hr_send_cq)
-               list_add_tail(&hr_qp->sq_node, &hr_send_cq->sq_list);
-       if (hr_recv_cq)
-               list_add_tail(&hr_qp->rq_node, &hr_recv_cq->rq_list);
-
-       hns_roce_unlock_cqs(hr_send_cq, hr_recv_cq);
-       spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
-}
-
-static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
-                                    struct ib_pd *ib_pd,
-                                    struct ib_qp_init_attr *init_attr,
-                                    struct ib_udata *udata, unsigned long sqpn,
-                                    struct hns_roce_qp *hr_qp)
+static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+                      u32 page_shift, bool is_user)
 {
-       dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { NULL };
-       struct device *dev = hr_dev->dev;
-       struct hns_roce_ib_create_qp ucmd;
-       struct hns_roce_ib_create_qp_resp resp = {};
-       struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
-               udata, struct hns_roce_ucontext, ibucontext);
+/* WQE buffer include 3 parts: SQ, extend SGE and RQ. */
+#define HNS_ROCE_WQE_REGION_MAX         3
+       struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX] = {};
+       dma_addr_t *buf_list[HNS_ROCE_WQE_REGION_MAX] = {};
+       struct ib_device *ibdev = &hr_dev->ib_dev;
        struct hns_roce_buf_region *r;
-       unsigned long qpn = 0;
-       u32 page_shift;
+       int region_count;
        int buf_count;
        int ret;
        int i;
 
-       mutex_init(&hr_qp->mutex);
-       spin_lock_init(&hr_qp->sq.lock);
-       spin_lock_init(&hr_qp->rq.lock);
-
-       hr_qp->state = IB_QPS_RESET;
-
-       hr_qp->ibqp.qp_type = init_attr->qp_type;
-
-       if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
-               hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
-       else
-               hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
+       region_count = split_wqe_buf_region(hr_dev, hr_qp, regions,
+                                           ARRAY_SIZE(regions), page_shift);
 
-       ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata,
-                                  hns_roce_qp_has_rq(init_attr), hr_qp);
+       /* alloc a tmp list to store WQE buffers address */
+       ret = hns_roce_alloc_buf_list(regions, buf_list, region_count);
        if (ret) {
-               dev_err(dev, "hns_roce_set_rq_size failed\n");
-               goto err_out;
+               ibdev_err(ibdev, "Failed to alloc WQE buffer list\n");
+               return ret;
        }
 
-       if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
-           hns_roce_qp_has_rq(init_attr)) {
-               ret = alloc_rq_inline_buf(hr_qp, init_attr);
-               if (ret) {
-                       dev_err(dev, "allocate receive inline buffer failed\n");
-                       goto err_out;
+       for (i = 0; i < region_count; i++) {
+               r = &regions[i];
+               if (is_user)
+                       buf_count = hns_roce_get_umem_bufs(hr_dev, buf_list[i],
+                                       r->count, r->offset, hr_qp->umem,
+                                       page_shift);
+               else
+                       buf_count = hns_roce_get_kmem_bufs(hr_dev, buf_list[i],
+                                       r->count, r->offset, &hr_qp->hr_buf);
+
+               if (buf_count != r->count) {
+                       ibdev_err(ibdev, "Failed to get %s WQE buf, expect %d = %d.\n",
+                                 is_user ? "user" : "kernel",
+                                 r->count, buf_count);
+                       ret = -ENOBUFS;
+                       goto done;
                }
        }
 
-       page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
-       if (udata) {
-               if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
-                       dev_err(dev, "ib_copy_from_udata error for create qp\n");
-                       ret = -EFAULT;
-                       goto err_alloc_rq_inline_buf;
-               }
+       hr_qp->wqe_bt_pg_shift = hr_dev->caps.mtt_ba_pg_sz;
+       hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift,
+                         page_shift);
+       ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, regions,
+                                 region_count);
+       if (ret)
+               ibdev_err(ibdev, "Failed to attach WQE's mtr\n");
+
+       goto done;
 
-               ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp,
-                                               &ucmd);
+       hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr);
+done:
+       hns_roce_free_buf_list(buf_list, region_count);
+
+       return ret;
+}
+
+static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+                       struct ib_qp_init_attr *init_attr,
+                       struct ib_udata *udata, unsigned long addr)
+{
+       u32 page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
+       struct ib_device *ibdev = &hr_dev->ib_dev;
+       bool is_rq_buf_inline;
+       int ret;
+
+       is_rq_buf_inline = (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+                          hns_roce_qp_has_rq(init_attr);
+       if (is_rq_buf_inline) {
+               ret = alloc_rq_inline_buf(hr_qp, init_attr);
                if (ret) {
-                       dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n");
-                       goto err_alloc_rq_inline_buf;
+                       ibdev_err(ibdev, "Failed to alloc inline RQ buffer\n");
+                       return ret;
                }
+       }
 
-               hr_qp->umem = ib_umem_get(ib_pd->device, ucmd.buf_addr,
-                                         hr_qp->buff_size, 0);
+       if (udata) {
+               hr_qp->umem = ib_umem_get(ibdev, addr, hr_qp->buff_size, 0);
                if (IS_ERR(hr_qp->umem)) {
-                       dev_err(dev, "ib_umem_get error for create qp\n");
                        ret = PTR_ERR(hr_qp->umem);
-                       goto err_alloc_rq_inline_buf;
-               }
-               hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp,
-                               hr_qp->regions, ARRAY_SIZE(hr_qp->regions),
-                               page_shift);
-               ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list,
-                                             hr_qp->region_cnt);
-               if (ret) {
-                       dev_err(dev, "alloc buf_list error for create qp\n");
-                       goto err_alloc_list;
+                       goto err_inline;
                }
+       } else {
+               ret = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size,
+                                        (1 << page_shift) * 2,
+                                        &hr_qp->hr_buf, page_shift);
+               if (ret)
+                       goto err_inline;
+       }
 
-               for (i = 0; i < hr_qp->region_cnt; i++) {
-                       r = &hr_qp->regions[i];
-                       buf_count = hns_roce_get_umem_bufs(hr_dev,
-                                       buf_list[i], r->count, r->offset,
-                                       hr_qp->umem, page_shift);
-                       if (buf_count != r->count) {
-                               dev_err(dev,
-                                       "get umem buf err, expect %d,ret %d.\n",
-                                       r->count, buf_count);
-                               ret = -ENOBUFS;
-                               goto err_get_bufs;
-                       }
-               }
+       ret = map_wqe_buf(hr_dev, hr_qp, page_shift, udata);
+       if (ret)
+               goto err_alloc;
+
+       return 0;
+
+err_inline:
+       if (is_rq_buf_inline)
+               free_rq_inline_buf(hr_qp);
 
-               if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
-                   (udata->inlen >= sizeof(ucmd)) &&
-                   (udata->outlen >= sizeof(resp)) &&
-                   hns_roce_qp_has_sq(init_attr)) {
-                       ret = hns_roce_db_map_user(uctx, udata, ucmd.sdb_addr,
+err_alloc:
+       if (udata) {
+               ib_umem_release(hr_qp->umem);
+               hr_qp->umem = NULL;
+       } else {
+               hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+       }
+
+       ibdev_err(ibdev, "Failed to alloc WQE buffer, ret %d.\n", ret);
+
+       return ret;
+}
+
+static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+       hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr);
+       if (hr_qp->umem) {
+               ib_umem_release(hr_qp->umem);
+               hr_qp->umem = NULL;
+       }
+
+       if (hr_qp->hr_buf.nbufs > 0)
+               hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+
+       if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+            hr_qp->rq.wqe_cnt)
+               free_rq_inline_buf(hr_qp);
+}
+
+static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev,
+                                  struct ib_qp_init_attr *init_attr,
+                                  struct ib_udata *udata,
+                                  struct hns_roce_ib_create_qp_resp *resp,
+                                  struct hns_roce_ib_create_qp *ucmd)
+{
+       return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
+               udata->outlen >= offsetofend(typeof(*resp), cap_flags) &&
+               hns_roce_qp_has_sq(init_attr) &&
+               udata->inlen >= offsetofend(typeof(*ucmd), sdb_addr));
+}
+
+static inline bool user_qp_has_rdb(struct hns_roce_dev *hr_dev,
+                                  struct ib_qp_init_attr *init_attr,
+                                  struct ib_udata *udata,
+                                  struct hns_roce_ib_create_qp_resp *resp)
+{
+       return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+               udata->outlen >= offsetofend(typeof(*resp), cap_flags) &&
+               hns_roce_qp_has_rq(init_attr));
+}
+
+static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev,
+                                    struct ib_qp_init_attr *init_attr)
+{
+       return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+               hns_roce_qp_has_rq(init_attr));
+}
+
+static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+                      struct ib_qp_init_attr *init_attr,
+                      struct ib_udata *udata,
+                      struct hns_roce_ib_create_qp *ucmd,
+                      struct hns_roce_ib_create_qp_resp *resp)
+{
+       struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
+               udata, struct hns_roce_ucontext, ibucontext);
+       struct ib_device *ibdev = &hr_dev->ib_dev;
+       int ret;
+
+       if (udata) {
+               if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) {
+                       ret = hns_roce_db_map_user(uctx, udata, ucmd->sdb_addr,
                                                   &hr_qp->sdb);
                        if (ret) {
-                               dev_err(dev, "sq record doorbell map failed!\n");
-                               goto err_get_bufs;
+                               ibdev_err(ibdev,
+                                         "Failed to map user SQ doorbell\n");
+                               goto err_out;
                        }
-
-                       /* indicate kernel supports sq record db */
-                       resp.cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB;
                        hr_qp->sdb_en = 1;
+                       resp->cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB;
                }
 
-               if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
-                   (udata->outlen >= sizeof(resp)) &&
-                   hns_roce_qp_has_rq(init_attr)) {
-                       ret = hns_roce_db_map_user(uctx, udata, ucmd.db_addr,
+               if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) {
+                       ret = hns_roce_db_map_user(uctx, udata, ucmd->db_addr,
                                                   &hr_qp->rdb);
                        if (ret) {
-                               dev_err(dev, "rq record doorbell map failed!\n");
-                               goto err_sq_dbmap;
+                               ibdev_err(ibdev,
+                                         "Failed to map user RQ doorbell\n");
+                               goto err_sdb;
                        }
-
-                       /* indicate kernel supports rq record db */
-                       resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB;
                        hr_qp->rdb_en = 1;
+                       resp->cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB;
                }
        } else {
-               if (init_attr->create_flags &
-                   IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
-                       dev_err(dev, "init_attr->create_flags error!\n");
-                       ret = -EINVAL;
-                       goto err_alloc_rq_inline_buf;
-               }
-
-               if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
-                       dev_err(dev, "init_attr->create_flags error!\n");
-                       ret = -EINVAL;
-                       goto err_alloc_rq_inline_buf;
-               }
-
-               /* Set SQ size */
-               ret = hns_roce_set_kernel_sq_size(hr_dev, &init_attr->cap,
-                                                 hr_qp);
-               if (ret) {
-                       dev_err(dev, "hns_roce_set_kernel_sq_size error!\n");
-                       goto err_alloc_rq_inline_buf;
-               }
-
                /* QP doorbell register address */
                hr_qp->sq.db_reg_l = hr_dev->reg_base + hr_dev->sdb_offset +
                                     DB_REG_OFFSET * hr_dev->priv_uar.index;
                hr_qp->rq.db_reg_l = hr_dev->reg_base + hr_dev->odb_offset +
                                     DB_REG_OFFSET * hr_dev->priv_uar.index;
 
-               if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
-                   hns_roce_qp_has_rq(init_attr)) {
+               if (kernel_qp_has_rdb(hr_dev, init_attr)) {
                        ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
                        if (ret) {
-                               dev_err(dev, "rq record doorbell alloc failed!\n");
-                               goto err_alloc_rq_inline_buf;
+                               ibdev_err(ibdev,
+                                         "Failed to alloc kernel RQ doorbell\n");
+                               goto err_out;
                        }
                        *hr_qp->rdb.db_record = 0;
                        hr_qp->rdb_en = 1;
                }
+       }
 
-               /* Allocate QP buf */
-               if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size,
-                                      (1 << page_shift) * 2,
-                                      &hr_qp->hr_buf, page_shift)) {
-                       dev_err(dev, "hns_roce_buf_alloc error!\n");
+       return 0;
+err_sdb:
+       if (udata && hr_qp->sdb_en)
+               hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+err_out:
+       return ret;
+}
+
+static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+                      struct ib_udata *udata)
+{
+       struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
+               udata, struct hns_roce_ucontext, ibucontext);
+
+       if (udata) {
+               if (hr_qp->rdb_en)
+                       hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
+               if (hr_qp->sdb_en)
+                       hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+       } else {
+               if (hr_qp->rdb_en)
+                       hns_roce_free_db(hr_dev, &hr_qp->rdb);
+       }
+}
+
+static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev,
+                            struct hns_roce_qp *hr_qp)
+{
+       struct ib_device *ibdev = &hr_dev->ib_dev;
+       u64 *sq_wrid = NULL;
+       u64 *rq_wrid = NULL;
+       int ret;
+
+       sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL);
+       if (ZERO_OR_NULL_PTR(sq_wrid)) {
+               ibdev_err(ibdev, "Failed to alloc SQ wrid\n");
+               return -ENOMEM;
+       }
+
+       if (hr_qp->rq.wqe_cnt) {
+               rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL);
+               if (ZERO_OR_NULL_PTR(rq_wrid)) {
+                       ibdev_err(ibdev, "Failed to alloc RQ wrid\n");
                        ret = -ENOMEM;
-                       goto err_db;
-               }
-               hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp,
-                               hr_qp->regions, ARRAY_SIZE(hr_qp->regions),
-                               page_shift);
-               ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list,
-                                             hr_qp->region_cnt);
-               if (ret) {
-                       dev_err(dev, "alloc buf_list error for create qp!\n");
-                       goto err_alloc_list;
+                       goto err_sq;
                }
+       }
 
-               for (i = 0; i < hr_qp->region_cnt; i++) {
-                       r = &hr_qp->regions[i];
-                       buf_count = hns_roce_get_kmem_bufs(hr_dev,
-                                       buf_list[i], r->count, r->offset,
-                                       &hr_qp->hr_buf);
-                       if (buf_count != r->count) {
-                               dev_err(dev,
-                                       "get kmem buf err, expect %d,ret %d.\n",
-                                       r->count, buf_count);
-                               ret = -ENOBUFS;
-                               goto err_get_bufs;
-                       }
+       hr_qp->sq.wrid = sq_wrid;
+       hr_qp->rq.wrid = rq_wrid;
+       return 0;
+err_sq:
+       kfree(sq_wrid);
+
+       return ret;
+}
+
+static void free_kernel_wrid(struct hns_roce_dev *hr_dev,
+                            struct hns_roce_qp *hr_qp)
+{
+       kfree(hr_qp->rq.wrid);
+       kfree(hr_qp->sq.wrid);
+}
+
+static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+                       struct ib_qp_init_attr *init_attr,
+                       struct ib_udata *udata,
+                       struct hns_roce_ib_create_qp *ucmd)
+{
+       struct ib_device *ibdev = &hr_dev->ib_dev;
+       int ret;
+
+       hr_qp->ibqp.qp_type = init_attr->qp_type;
+
+       if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+               hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
+       else
+               hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
+
+       ret = set_rq_size(hr_dev, &init_attr->cap, udata,
+                         hns_roce_qp_has_rq(init_attr), hr_qp);
+       if (ret) {
+               ibdev_err(ibdev, "Failed to set user RQ size\n");
+               return ret;
+       }
+
+       if (udata) {
+               if (ib_copy_from_udata(ucmd, udata, sizeof(*ucmd))) {
+                       ibdev_err(ibdev, "Failed to copy QP ucmd\n");
+                       return -EFAULT;
                }
 
-               hr_qp->sq.wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64),
-                                        GFP_KERNEL);
-               if (ZERO_OR_NULL_PTR(hr_qp->sq.wrid)) {
-                       ret = -ENOMEM;
-                       goto err_get_bufs;
+               ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
+               if (ret)
+                       ibdev_err(ibdev, "Failed to set user SQ size\n");
+       } else {
+               if (init_attr->create_flags &
+                   IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+                       ibdev_err(ibdev, "Failed to check multicast loopback\n");
+                       return -EINVAL;
                }
 
-               if (hr_qp->rq.wqe_cnt) {
-                       hr_qp->rq.wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64),
-                                                GFP_KERNEL);
-                       if (ZERO_OR_NULL_PTR(hr_qp->rq.wrid)) {
-                               ret = -ENOMEM;
-                               goto err_sq_wrid;
-                       }
+               if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
+                       ibdev_err(ibdev, "Failed to check ipoib ud lso\n");
+                       return -EINVAL;
                }
+
+               ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
+               if (ret)
+                       ibdev_err(ibdev, "Failed to set kernel SQ size\n");
        }
 
-       if (sqpn) {
-               qpn = sqpn;
-       } else {
-               /* Get QPN */
-               ret = hns_roce_reserve_range_qp(hr_dev, 1, 1, &qpn);
+       return ret;
+}
+
+static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
+                                    struct ib_pd *ib_pd,
+                                    struct ib_qp_init_attr *init_attr,
+                                    struct ib_udata *udata,
+                                    struct hns_roce_qp *hr_qp)
+{
+       struct hns_roce_ib_create_qp_resp resp = {};
+       struct ib_device *ibdev = &hr_dev->ib_dev;
+       struct hns_roce_ib_create_qp ucmd;
+       int ret;
+
+       mutex_init(&hr_qp->mutex);
+       spin_lock_init(&hr_qp->sq.lock);
+       spin_lock_init(&hr_qp->rq.lock);
+
+       hr_qp->state = IB_QPS_RESET;
+       hr_qp->flush_flag = 0;
+
+       ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd);
+       if (ret) {
+               ibdev_err(ibdev, "Failed to set QP param\n");
+               return ret;
+       }
+
+       if (!udata) {
+               ret = alloc_kernel_wrid(hr_dev, hr_qp);
                if (ret) {
-                       dev_err(dev, "hns_roce_reserve_range_qp alloc qpn error\n");
-                       goto err_wrid;
+                       ibdev_err(ibdev, "Failed to alloc wrid\n");
+                       return ret;
                }
        }
 
-       hr_qp->wqe_bt_pg_shift = calc_wqe_bt_page_shift(hr_dev, hr_qp->regions,
-                                                       hr_qp->region_cnt);
-       hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift,
-                         page_shift);
-       ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list,
-                                 hr_qp->regions, hr_qp->region_cnt);
+       ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp);
        if (ret) {
-               dev_err(dev, "mtr attach error for create qp\n");
-               goto err_mtr;
+               ibdev_err(ibdev, "Failed to alloc QP doorbell\n");
+               goto err_wrid;
        }
 
-       if (init_attr->qp_type == IB_QPT_GSI &&
-           hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
-               /* In v1 engine, GSI QP context in RoCE engine's register */
-               ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp);
-               if (ret) {
-                       dev_err(dev, "hns_roce_qp_alloc failed!\n");
-                       goto err_qpn;
-               }
-       } else {
-               ret = hns_roce_qp_alloc(hr_dev, qpn, hr_qp);
-               if (ret) {
-                       dev_err(dev, "hns_roce_qp_alloc failed!\n");
-                       goto err_qpn;
-               }
+       ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr);
+       if (ret) {
+               ibdev_err(ibdev, "Failed to alloc QP buffer\n");
+               goto err_db;
        }
 
-       if (sqpn)
-               hr_qp->doorbell_qpn = 1;
-       else
-               hr_qp->doorbell_qpn = (u32)hr_qp->qpn;
+       ret = alloc_qpn(hr_dev, hr_qp);
+       if (ret) {
+               ibdev_err(ibdev, "Failed to alloc QPN\n");
+               goto err_buf;
+       }
+
+       ret = alloc_qpc(hr_dev, hr_qp);
+       if (ret) {
+               ibdev_err(ibdev, "Failed to alloc QP context\n");
+               goto err_qpn;
+       }
+
+       ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr);
+       if (ret) {
+               ibdev_err(ibdev, "Failed to store QP\n");
+               goto err_qpc;
+       }
 
        if (udata) {
                ret = ib_copy_to_udata(udata, &resp,
                                       min(udata->outlen, sizeof(resp)));
-               if (ret)
-                       goto err_qp;
+               if (ret) {
+                       ibdev_err(ibdev, "copy qp resp failed!\n");
+                       goto err_store;
+               }
        }
 
        if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
                ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp);
                if (ret)
-                       goto err_qp;
+                       goto err_store;
        }
 
+       hr_qp->ibqp.qp_num = hr_qp->qpn;
        hr_qp->event = hns_roce_ib_qp_event;
-
-       add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq, init_attr->recv_cq);
-
-       hns_roce_free_buf_list(buf_list, hr_qp->region_cnt);
+       atomic_set(&hr_qp->refcount, 1);
+       init_completion(&hr_qp->free);
 
        return 0;
 
-err_qp:
-       if (init_attr->qp_type == IB_QPT_GSI &&
-               hr_dev->hw_rev == HNS_ROCE_HW_VER1)
-               hns_roce_qp_remove(hr_dev, hr_qp);
-       else
-               hns_roce_qp_free(hr_dev, hr_qp);
-
+err_store:
+       hns_roce_qp_remove(hr_dev, hr_qp);
+err_qpc:
+       free_qpc(hr_dev, hr_qp);
 err_qpn:
-       if (!sqpn)
-               hns_roce_release_range_qp(hr_dev, qpn, 1);
-
-err_mtr:
-       hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr);
-
+       free_qpn(hr_dev, hr_qp);
+err_buf:
+       free_qp_buf(hr_dev, hr_qp);
+err_db:
+       free_qp_db(hr_dev, hr_qp, udata);
 err_wrid:
-       if (udata) {
-               if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
-                   (udata->outlen >= sizeof(resp)) &&
-                   hns_roce_qp_has_rq(init_attr))
-                       hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
-       } else {
-               if (hr_qp->rq.wqe_cnt)
-                       kfree(hr_qp->rq.wrid);
-       }
-
-err_sq_dbmap:
-       if (udata)
-               if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
-                   (udata->inlen >= sizeof(ucmd)) &&
-                   (udata->outlen >= sizeof(resp)) &&
-                   hns_roce_qp_has_sq(init_attr))
-                       hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
-
-err_sq_wrid:
-       if (!udata)
-               kfree(hr_qp->sq.wrid);
-
-err_get_bufs:
-       hns_roce_free_buf_list(buf_list, hr_qp->region_cnt);
-
-err_alloc_list:
-       if (!hr_qp->umem)
-               hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
-       ib_umem_release(hr_qp->umem);
+       free_kernel_wrid(hr_dev, hr_qp);
+       return ret;
+}
 
-err_db:
-       if (!udata && hns_roce_qp_has_rq(init_attr) &&
-           (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
-               hns_roce_free_db(hr_dev, &hr_qp->rdb);
+void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+                        struct ib_udata *udata)
+{
+       if (atomic_dec_and_test(&hr_qp->refcount))
+               complete(&hr_qp->free);
+       wait_for_completion(&hr_qp->free);
 
-err_alloc_rq_inline_buf:
-       if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
-            hns_roce_qp_has_rq(init_attr))
-               free_rq_inline_buf(hr_qp);
+       free_qpc(hr_dev, hr_qp);
+       free_qpn(hr_dev, hr_qp);
+       free_qp_buf(hr_dev, hr_qp);
+       free_kernel_wrid(hr_dev, hr_qp);
+       free_qp_db(hr_dev, hr_qp, udata);
 
-err_out:
-       return ret;
+       kfree(hr_qp);
 }
 
 struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
@@ -1050,7 +1191,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
                if (!hr_qp)
                        return ERR_PTR(-ENOMEM);
 
-               ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0,
+               ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
                                                hr_qp);
                if (ret) {
                        ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n",
@@ -1059,8 +1200,6 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
                        return ERR_PTR(ret);
                }
 
-               hr_qp->ibqp.qp_num = hr_qp->qpn;
-
                break;
        }
        case IB_QPT_GSI: {
@@ -1077,15 +1216,8 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
                hr_qp->port = init_attr->port_num - 1;
                hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
 
-               /* when hw version is v1, the sqpn is allocated */
-               if (hr_dev->caps.max_sq_sg <= 2)
-                       hr_qp->ibqp.qp_num = HNS_ROCE_MAX_PORTS +
-                                            hr_dev->iboe.phy_port[hr_qp->port];
-               else
-                       hr_qp->ibqp.qp_num = 1;
-
                ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
-                                               hr_qp->ibqp.qp_num, hr_qp);
+                                               hr_qp);
                if (ret) {
                        ibdev_err(ibdev, "Create GSI QP failed!\n");
                        kfree(hr_qp);
@@ -1097,7 +1229,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
        default:{
                ibdev_err(ibdev, "not support QP type %d\n",
                          init_attr->qp_type);
-               return ERR_PTR(-EINVAL);
+               return ERR_PTR(-EOPNOTSUPP);
        }
        }
 
@@ -1230,11 +1362,10 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                goto out;
 
        if (cur_state == new_state && cur_state == IB_QPS_RESET) {
-               if (hr_dev->caps.min_wqes) {
+               if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
                        ret = -EPERM;
                        ibdev_err(&hr_dev->ib_dev,
-                               "cur_state=%d new_state=%d\n", cur_state,
-                               new_state);
+                                 "RST2RST state is not supported\n");
                } else {
                        ret = 0;
                }
@@ -1306,17 +1437,17 @@ static void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
        return hns_roce_buf_offset(&hr_qp->hr_buf, offset);
 }
 
-void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
 {
        return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
 }
 
-void *get_send_wqe(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n)
 {
        return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
 }
 
-void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n)
 {
        return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset +
                                        (n << hr_qp->sge.sge_shift));
index c6d5f06..5b3dd1a 100644 (file)
@@ -381,7 +381,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
        srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1);
        srq->max_gs = init_attr->attr.max_sge;
 
-       srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs));
+       srq_desc_size = roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE,
+                                       HNS_ROCE_SGE_SIZE * srq->max_gs));
 
        srq->wqe_shift = ilog2(srq_desc_size);
 
index 8feec35..3c62c93 100644 (file)
@@ -67,7 +67,7 @@
 #include "i40iw_user.h"
 #include "i40iw_puda.h"
 
-#define I40IW_FW_VERSION  2
+#define I40IW_FW_VER_DEFAULT 2
 #define I40IW_HW_VERSION  2
 
 #define I40IW_ARP_ADD     1
@@ -325,6 +325,26 @@ struct i40iw_handler {
        struct i40e_info ldev;
 };
 
+/**
+ * i40iw_fw_major_ver - get firmware major version
+ * @dev: iwarp device
+ **/
+static inline u64 i40iw_fw_major_ver(struct i40iw_sc_dev *dev)
+{
+       return RS_64(dev->feature_info[I40IW_FEATURE_FW_INFO],
+                    I40IW_FW_VER_MAJOR);
+}
+
+/**
+ * i40iw_fw_minor_ver - get firmware minor version
+ * @dev: iwarp device
+ **/
+static inline u64 i40iw_fw_minor_ver(struct i40iw_sc_dev *dev)
+{
+       return RS_64(dev->feature_info[I40IW_FEATURE_FW_INFO],
+                    I40IW_FW_VER_MINOR);
+}
+
 /**
  * to_iwdev - get device
  * @ibdev: ib device
index 66dc1ba..6e43e4d 100644 (file)
@@ -85,7 +85,7 @@ struct ietf_mpa_v1 {
        u8 flags;
        u8 rev;
        __be16 priv_data_len;
-       u8 priv_data[0];
+       u8 priv_data[];
 };
 
 #define ietf_mpa_req_resp_frame ietf_mpa_frame
@@ -101,7 +101,7 @@ struct ietf_mpa_v2 {
        u8 rev;
        __be16 priv_data_len;
        struct ietf_rtr_msg rtr_msg;
-       u8 priv_data[0];
+       u8 priv_data[];
 };
 
 struct i40iw_cm_node;
index 4d841a3..e8b4b37 100644 (file)
@@ -1021,6 +1021,95 @@ static enum i40iw_status_code i40iw_sc_commit_fpm_values(
        return ret_code;
 }
 
+/**
+ * i40iw_sc_query_rdma_features_done - poll cqp for query features done
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code
+i40iw_sc_query_rdma_features_done(struct i40iw_sc_cqp *cqp)
+{
+       return i40iw_sc_poll_for_cqp_op_done(
+               cqp, I40IW_CQP_OP_QUERY_RDMA_FEATURES, NULL);
+}
+
+/**
+ * i40iw_sc_query_rdma_features - query rdma features
+ * @cqp: struct for cqp hw
+ * @feat_mem: holds PA for HW to use
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static enum i40iw_status_code
+i40iw_sc_query_rdma_features(struct i40iw_sc_cqp *cqp,
+                            struct i40iw_dma_mem *feat_mem, u64 scratch)
+{
+       u64 *wqe;
+       u64 header;
+
+       wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+       if (wqe)
+               return I40IW_ERR_RING_FULL;
+
+       set_64bit_val(wqe, 32, feat_mem->pa);
+
+       header = LS_64(I40IW_CQP_OP_QUERY_RDMA_FEATURES, I40IW_CQPSQ_OPCODE) |
+                LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) | feat_mem->size;
+
+       i40iw_insert_wqe_hdr(wqe, header);
+
+       i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QUERY RDMA FEATURES WQE",
+                       wqe, I40IW_CQP_WQE_SIZE * 8);
+
+       i40iw_sc_cqp_post_sq(cqp);
+
+       return 0;
+}
+
+/**
+ * i40iw_get_rdma_features - get RDMA features
+ * @dev - sc device struct
+ */
+enum i40iw_status_code i40iw_get_rdma_features(struct i40iw_sc_dev *dev)
+{
+       enum i40iw_status_code ret_code;
+       struct i40iw_dma_mem feat_buf;
+       u64 temp;
+       u16 byte_idx, feat_type, feat_cnt;
+
+       ret_code = i40iw_allocate_dma_mem(dev->hw, &feat_buf,
+                                         I40IW_FEATURE_BUF_SIZE,
+                                         I40IW_FEATURE_BUF_ALIGNMENT);
+
+       if (ret_code)
+               return I40IW_ERR_NO_MEMORY;
+
+       ret_code = i40iw_sc_query_rdma_features(dev->cqp, &feat_buf, 0);
+       if (!ret_code)
+               ret_code = i40iw_sc_query_rdma_features_done(dev->cqp);
+
+       if (ret_code)
+               goto exit;
+
+       get_64bit_val(feat_buf.va, 0, &temp);
+       feat_cnt = RS_64(temp, I40IW_FEATURE_CNT);
+       if (feat_cnt < I40IW_MAX_FEATURES) {
+               ret_code = I40IW_ERR_INVALID_FEAT_CNT;
+               goto exit;
+       } else if (feat_cnt > I40IW_MAX_FEATURES) {
+               i40iw_debug(dev, I40IW_DEBUG_CQP,
+                           "features buf size insufficient\n");
+       }
+
+       for (byte_idx = 0, feat_type = 0; feat_type < I40IW_MAX_FEATURES;
+            feat_type++, byte_idx += 8) {
+               get_64bit_val((u64 *)feat_buf.va, byte_idx, &temp);
+               dev->feature_info[feat_type] = RS_64(temp, I40IW_FEATURE_INFO);
+       }
+exit:
+       i40iw_free_dma_mem(dev->hw, &feat_buf);
+
+       return ret_code;
+}
+
 /**
  * i40iw_sc_query_fpm_values_done - poll for cqp wqe completion for query fpm
  * @cqp: struct for cqp hw
@@ -4265,6 +4354,13 @@ static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev,
                                true,
                                I40IW_CQP_WAIT_EVENT);
                break;
+       case OP_QUERY_RDMA_FEATURES:
+               values_mem.pa = pcmdinfo->in.u.query_rdma_features.cap_pa;
+               values_mem.va = pcmdinfo->in.u.query_rdma_features.cap_va;
+               status = i40iw_sc_query_rdma_features(
+                       pcmdinfo->in.u.query_rdma_features.cqp, &values_mem,
+                       pcmdinfo->in.u.query_rdma_features.scratch);
+               break;
        default:
                status = I40IW_NOT_SUPPORTED;
                break;
index 6ddaeec..e8367d6 100644 (file)
 #define I40IW_CQP_OP_MANAGE_ARP                 0x0f
 #define I40IW_CQP_OP_MANAGE_VF_PBLE_BP          0x10
 #define I40IW_CQP_OP_MANAGE_PUSH_PAGES          0x11
-#define I40IW_CQP_OP_MANAGE_PE_TEAM             0x12
+#define I40IW_CQP_OP_QUERY_RDMA_FEATURES       0x12
 #define I40IW_CQP_OP_UPLOAD_CONTEXT             0x13
 #define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14
 #define I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE   0x15
 #define I40IW_CQP_OP_SHMC_PAGES_ALLOCATED       0x2b
 #define I40IW_CQP_OP_SET_HMC_RESOURCE_PROFILE   0x2d
 
+#define I40IW_FEATURE_BUF_SIZE                  (8 * I40IW_MAX_FEATURES)
+
+#define I40IW_FW_VER_MINOR_SHIFT        0
+#define I40IW_FW_VER_MINOR_MASK         \
+       (0xffffULL << I40IW_FW_VER_MINOR_SHIFT)
+
+#define I40IW_FW_VER_MAJOR_SHIFT        16
+#define I40IW_FW_VER_MAJOR_MASK                \
+       (0xffffULL << I40IW_FW_VER_MAJOR_SHIFT)
+
+#define I40IW_FEATURE_INFO_SHIFT        0
+#define I40IW_FEATURE_INFO_MASK         \
+       (0xffffULL << I40IW_FEATURE_INFO_SHIFT)
+
+#define I40IW_FEATURE_CNT_SHIFT         32
+#define I40IW_FEATURE_CNT_MASK          \
+       (0xffffULL << I40IW_FEATURE_CNT_SHIFT)
+
 #define I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT 16
 #define I40IW_UDA_QPSQ_NEXT_HEADER_MASK ((u64)0xff << I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT)
 
@@ -1529,7 +1547,8 @@ enum i40iw_alignment {
        I40IW_AEQ_ALIGNMENT =           0x100,
        I40IW_CEQ_ALIGNMENT =           0x100,
        I40IW_CQ0_ALIGNMENT =           0x100,
-       I40IW_SD_BUF_ALIGNMENT =        0x80
+       I40IW_SD_BUF_ALIGNMENT =        0x80,
+       I40IW_FEATURE_BUF_ALIGNMENT =   0x8
 };
 
 #define I40IW_WQE_SIZE_64      64
@@ -1732,6 +1751,7 @@ enum i40iw_alignment {
 #define OP_REQUESTED_COMMANDS                   31
 #define OP_COMPLETED_COMMANDS                   32
 #define OP_GEN_AE                               33
-#define OP_SIZE_CQP_STAT_ARRAY                  34
+#define OP_QUERY_RDMA_FEATURES                  34
+#define OP_SIZE_CQP_STAT_ARRAY                 35
 
 #endif
index 2386143..9c96ece 100644 (file)
@@ -1212,22 +1212,19 @@ static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev)
 {
        struct net_device *dev;
        struct in_device *idev;
-       bool got_lock = true;
        u32 ip_addr;
 
-       if (!rtnl_trylock())
-               got_lock = false;
-
-       for_each_netdev(&init_net, dev) {
+       rcu_read_lock();
+       for_each_netdev_rcu(&init_net, dev) {
                if ((((rdma_vlan_dev_vlan_id(dev) < 0xFFFF) &&
                      (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) ||
-                   (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) {
+                   (dev == iwdev->netdev)) && (READ_ONCE(dev->flags) & IFF_UP)) {
                        const struct in_ifaddr *ifa;
 
-                       idev = in_dev_get(dev);
+                       idev = __in_dev_get_rcu(dev);
                        if (!idev)
                                continue;
-                       in_dev_for_each_ifa_rtnl(ifa, idev) {
+                       in_dev_for_each_ifa_rcu(ifa, idev) {
                                i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
                                            "IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address,
                                             rdma_vlan_dev_vlan_id(dev), dev->dev_addr);
@@ -1239,12 +1236,9 @@ static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev)
                                                       true,
                                                       I40IW_ARP_ADD);
                        }
-
-                       in_dev_put(idev);
                }
        }
-       if (got_lock)
-               rtnl_unlock();
+       rcu_read_unlock();
 }
 
 /**
@@ -1689,6 +1683,12 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
                status = i40iw_setup_ceqs(iwdev, ldev);
                if (status)
                        break;
+
+               status = i40iw_get_rdma_features(dev);
+               if (status)
+                       dev->feature_info[I40IW_FEATURE_FW_INFO] =
+                               I40IW_FW_VER_DEFAULT;
+
                iwdev->init_state = CEQ_CREATED;
                status = i40iw_initialize_hw_resources(iwdev);
                if (status)
index 11d3a2a..4c42956 100644 (file)
@@ -105,6 +105,7 @@ enum i40iw_status_code i40iw_sc_static_hmc_pages_allocated(struct i40iw_sc_cqp *
                                                           bool poll_registers);
 
 enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_count);
+enum i40iw_status_code i40iw_get_rdma_features(struct i40iw_sc_dev *dev);
 
 void free_sd_mem(struct i40iw_sc_dev *dev);
 
index f7013f1..d1c5855 100644 (file)
@@ -95,7 +95,8 @@ enum i40iw_status_code {
        I40IW_ERR_INVALID_MAC_ADDR = -65,
        I40IW_ERR_BAD_STAG      = -66,
        I40IW_ERR_CQ_COMPL_ERROR = -67,
-       I40IW_ERR_QUEUE_DESTROYED = -68
+       I40IW_ERR_QUEUE_DESTROYED = -68,
+       I40IW_ERR_INVALID_FEAT_CNT = -69
 
 };
 #endif
index adc8d2e..54c323c 100644 (file)
@@ -234,6 +234,11 @@ enum i40iw_hw_stats_index_64b {
        I40IW_HW_STAT_INDEX_MAX_64
 };
 
+enum i40iw_feature_type {
+       I40IW_FEATURE_FW_INFO = 0,
+       I40IW_MAX_FEATURES
+};
+
 struct i40iw_dev_hw_stats_offsets {
        u32 stats_offset_32[I40IW_HW_STAT_INDEX_MAX_32];
        u32 stats_offset_64[I40IW_HW_STAT_INDEX_MAX_64];
@@ -501,6 +506,7 @@ struct i40iw_sc_dev {
        const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
 
        struct i40iw_hmc_fpm_misc hmc_fpm_misc;
+       u64 feature_info[I40IW_MAX_FEATURES];
        u32 debug_mask;
        u8 hmc_fn_id;
        bool is_pf;
@@ -1340,6 +1346,12 @@ struct cqp_info {
                        struct i40iw_sc_qp *qp;
                        u64 scratch;
                } suspend_resume;
+               struct {
+                       struct i40iw_sc_cqp *cqp;
+                       void *cap_va;
+                       u64 cap_pa;
+                       u64 scratch;
+               } query_rdma_features;
        } u;
 };
 
index c335de9..1b6fb13 100644 (file)
@@ -64,7 +64,8 @@ static int i40iw_query_device(struct ib_device *ibdev,
                return -EINVAL;
        memset(props, 0, sizeof(*props));
        ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr);
-       props->fw_ver = I40IW_FW_VERSION;
+       props->fw_ver = i40iw_fw_major_ver(&iwdev->sc_dev) << 32 |
+                       i40iw_fw_minor_ver(&iwdev->sc_dev);
        props->device_cap_flags = iwdev->device_cap_flags;
        props->vendor_id = iwdev->ldev->pcidev->vendor;
        props->vendor_part_id = iwdev->ldev->pcidev->device;
@@ -617,7 +618,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
        iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
 
        if (init_attr->qp_type != IB_QPT_RC) {
-               err_code = -EINVAL;
+               err_code = -EOPNOTSUPP;
                goto error;
        }
        if (iwdev->push_mode)
@@ -2534,10 +2535,11 @@ static const char * const i40iw_hw_stat_names[] = {
 
 static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str)
 {
-       u32 firmware_version = I40IW_FW_VERSION;
+       struct i40iw_device *iwdev = to_iwdev(dev);
 
-       snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u", firmware_version,
-                (firmware_version & 0x000000ff));
+       snprintf(str, IB_FW_VERSION_NAME_MAX, "%llu.%llu",
+                i40iw_fw_major_ver(&iwdev->sc_dev),
+                i40iw_fw_minor_ver(&iwdev->sc_dev));
 }
 
 /**
index 2f5d9b1..a66518a 100644 (file)
@@ -434,9 +434,6 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
        return real_index;
 }
 
-#define field_avail(type, fld, sz) (offsetof(type, fld) + \
-                                   sizeof(((type *)0)->fld) <= (sz))
-
 static int mlx4_ib_query_device(struct ib_device *ibdev,
                                struct ib_device_attr *props,
                                struct ib_udata *uhw)
@@ -447,7 +444,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
        int err;
        int have_ib_ports;
        struct mlx4_uverbs_ex_query_device cmd;
-       struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
+       struct mlx4_uverbs_ex_query_device_resp resp = {};
        struct mlx4_clock_params clock_params;
 
        if (uhw->inlen) {
@@ -602,7 +599,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
                        sizeof(struct mlx4_wqe_data_seg);
        }
 
-       if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
+       if (offsetofend(typeof(resp), rss_caps) <= uhw->outlen) {
                if (props->rss_caps.supported_qpts) {
                        resp.rss_caps.rx_hash_function =
                                MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
@@ -626,7 +623,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
                                       sizeof(resp.rss_caps);
        }
 
-       if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
+       if (offsetofend(typeof(resp), tso_caps) <= uhw->outlen) {
                if (dev->dev->caps.max_gso_sz &&
                    ((mlx4_ib_port_link_layer(ibdev, 1) ==
                    IB_LINK_LAYER_ETHERNET) ||
index 26425dd..2f9f789 100644 (file)
@@ -1636,7 +1636,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
        }
        default:
                /* Don't support raw QPs */
-               return ERR_PTR(-EINVAL);
+               return ERR_PTR(-EOPNOTSUPP);
        }
 
        return &qp->ibqp;
index d0a043c..2a33480 100644 (file)
@@ -8,3 +8,4 @@ mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
 mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
 mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
 mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += flow.o
+mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += qos.o
index 8ba439f..de4da92 100644 (file)
@@ -47,6 +47,7 @@ static const char * const mlx5_ib_dbg_cc_name[] = {
        "rp_byte_reset",
        "rp_threshold",
        "rp_ai_rate",
+       "rp_max_rate",
        "rp_hai_rate",
        "rp_min_dec_fac",
        "rp_min_rate",
@@ -56,6 +57,7 @@ static const char * const mlx5_ib_dbg_cc_name[] = {
        "rp_rate_reduce_monitor_period",
        "rp_initial_alpha_value",
        "rp_gd",
+       "np_min_time_between_cnps",
        "np_cnp_dscp",
        "np_cnp_prio_mode",
        "np_cnp_prio",
@@ -66,6 +68,7 @@ static const char * const mlx5_ib_dbg_cc_name[] = {
 #define MLX5_IB_RP_TIME_RESET_ATTR                     BIT(3)
 #define MLX5_IB_RP_BYTE_RESET_ATTR                     BIT(4)
 #define MLX5_IB_RP_THRESHOLD_ATTR                      BIT(5)
+#define MLX5_IB_RP_MAX_RATE_ATTR                       BIT(6)
 #define MLX5_IB_RP_AI_RATE_ATTR                                BIT(7)
 #define MLX5_IB_RP_HAI_RATE_ATTR                       BIT(8)
 #define MLX5_IB_RP_MIN_DEC_FAC_ATTR                    BIT(9)
@@ -77,6 +80,7 @@ static const char * const mlx5_ib_dbg_cc_name[] = {
 #define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR            BIT(15)
 #define MLX5_IB_RP_GD_ATTR                             BIT(16)
 
+#define MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR          BIT(2)
 #define MLX5_IB_NP_CNP_DSCP_ATTR                       BIT(3)
 #define MLX5_IB_NP_CNP_PRIO_MODE_ATTR                  BIT(4)
 
@@ -111,6 +115,9 @@ static u32 mlx5_get_cc_param_val(void *field, int offset)
        case MLX5_IB_DBG_CC_RP_AI_RATE:
                return MLX5_GET(cong_control_r_roce_ecn_rp, field,
                                rpg_ai_rate);
+       case MLX5_IB_DBG_CC_RP_MAX_RATE:
+               return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+                               rpg_max_rate);
        case MLX5_IB_DBG_CC_RP_HAI_RATE:
                return MLX5_GET(cong_control_r_roce_ecn_rp, field,
                                rpg_hai_rate);
@@ -138,6 +145,9 @@ static u32 mlx5_get_cc_param_val(void *field, int offset)
        case MLX5_IB_DBG_CC_RP_GD:
                return MLX5_GET(cong_control_r_roce_ecn_rp, field,
                                rpg_gd);
+       case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS:
+               return MLX5_GET(cong_control_r_roce_ecn_np, field,
+                               min_time_between_cnps);
        case MLX5_IB_DBG_CC_NP_CNP_DSCP:
                return MLX5_GET(cong_control_r_roce_ecn_np, field,
                                cnp_dscp);
@@ -186,6 +196,11 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
                MLX5_SET(cong_control_r_roce_ecn_rp, field,
                         rpg_ai_rate, var);
                break;
+       case MLX5_IB_DBG_CC_RP_MAX_RATE:
+               *attr_mask |= MLX5_IB_RP_MAX_RATE_ATTR;
+               MLX5_SET(cong_control_r_roce_ecn_rp, field,
+                        rpg_max_rate, var);
+               break;
        case MLX5_IB_DBG_CC_RP_HAI_RATE:
                *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
                MLX5_SET(cong_control_r_roce_ecn_rp, field,
@@ -231,6 +246,11 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
                MLX5_SET(cong_control_r_roce_ecn_rp, field,
                         rpg_gd, var);
                break;
+       case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS:
+               *attr_mask |= MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR;
+               MLX5_SET(cong_control_r_roce_ecn_np, field,
+                        min_time_between_cnps, var);
+               break;
        case MLX5_IB_DBG_CC_NP_CNP_DSCP:
                *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
                MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
index 3dec3de..146ba29 100644 (file)
@@ -715,17 +715,19 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
        struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
                udata, struct mlx5_ib_ucontext, ibucontext);
 
-       ucmdlen = udata->inlen < sizeof(ucmd) ?
-                 (sizeof(ucmd) - sizeof(ucmd.flags)) : sizeof(ucmd);
+       ucmdlen = min(udata->inlen, sizeof(ucmd));
+       if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags))
+               return -EINVAL;
 
        if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
                return -EFAULT;
 
-       if (ucmdlen == sizeof(ucmd) &&
-           (ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD)))
+       if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD |
+                           MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX)))
                return -EINVAL;
 
-       if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
+       if ((ucmd.cqe_size != 64 && ucmd.cqe_size != 128) ||
+           ucmd.reserved0 || ucmd.reserved1)
                return -EINVAL;
 
        *cqe_size = ucmd.cqe_size;
@@ -762,7 +764,14 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
        MLX5_SET(cqc, cqc, log_page_size,
                 page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 
-       *index = context->bfregi.sys_pages[0];
+       if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
+               *index = ucmd.uar_page_index;
+       } else if (context->bfregi.lib_uar_dyn) {
+               err = -EINVAL;
+               goto err_cqb;
+       } else {
+               *index = context->bfregi.sys_pages[0];
+       }
 
        if (ucmd.cqe_comp_en == 1) {
                int mini_cqe_format;
index dbee17d..862b7bf 100644 (file)
@@ -35,6 +35,9 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
        case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
                *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
                break;
+       case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
+               *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
+               break;
        default:
                return -EINVAL;
        }
index 3efa749..6679756 100644 (file)
@@ -39,9 +39,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/bitmap.h>
-#if defined(CONFIG_X86)
-#include <asm/memtype.h>
-#endif
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task.h>
@@ -898,7 +895,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                        props->raw_packet_caps |=
                                IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
 
-               if (field_avail(typeof(resp), tso_caps, uhw_outlen)) {
+               if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen) {
                        max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
                        if (max_tso) {
                                resp.tso_caps.max_tso = 1 << max_tso;
@@ -908,7 +905,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                        }
                }
 
-               if (field_avail(typeof(resp), rss_caps, uhw_outlen)) {
+               if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen) {
                        resp.rss_caps.rx_hash_function =
                                                MLX5_RX_HASH_FUNC_TOEPLITZ;
                        resp.rss_caps.rx_hash_fields_mask =
@@ -928,9 +925,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                        resp.response_length += sizeof(resp.rss_caps);
                }
        } else {
-               if (field_avail(typeof(resp), tso_caps, uhw_outlen))
+               if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen)
                        resp.response_length += sizeof(resp.tso_caps);
-               if (field_avail(typeof(resp), rss_caps, uhw_outlen))
+               if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen)
                        resp.response_length += sizeof(resp.rss_caps);
        }
 
@@ -1072,7 +1069,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                                                MLX5_MAX_CQ_PERIOD;
        }
 
-       if (field_avail(typeof(resp), cqe_comp_caps, uhw_outlen)) {
+       if (offsetofend(typeof(resp), cqe_comp_caps) <= uhw_outlen) {
                resp.response_length += sizeof(resp.cqe_comp_caps);
 
                if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) {
@@ -1090,7 +1087,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                }
        }
 
-       if (field_avail(typeof(resp), packet_pacing_caps, uhw_outlen) &&
+       if (offsetofend(typeof(resp), packet_pacing_caps) <= uhw_outlen &&
            raw_support) {
                if (MLX5_CAP_QOS(mdev, packet_pacing) &&
                    MLX5_CAP_GEN(mdev, qos)) {
@@ -1108,8 +1105,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                resp.response_length += sizeof(resp.packet_pacing_caps);
        }
 
-       if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
-                       uhw_outlen)) {
+       if (offsetofend(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes) <=
+           uhw_outlen) {
                if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe))
                        resp.mlx5_ib_support_multi_pkt_send_wqes =
                                MLX5_IB_ALLOW_MPW;
@@ -1122,7 +1119,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                        sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
        }
 
-       if (field_avail(typeof(resp), flags, uhw_outlen)) {
+       if (offsetofend(typeof(resp), flags) <= uhw_outlen) {
                resp.response_length += sizeof(resp.flags);
 
                if (MLX5_CAP_GEN(mdev, cqe_compression_128))
@@ -1138,7 +1135,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
        }
 
-       if (field_avail(typeof(resp), sw_parsing_caps, uhw_outlen)) {
+       if (offsetofend(typeof(resp), sw_parsing_caps) <= uhw_outlen) {
                resp.response_length += sizeof(resp.sw_parsing_caps);
                if (MLX5_CAP_ETH(mdev, swp)) {
                        resp.sw_parsing_caps.sw_parsing_offloads |=
@@ -1158,7 +1155,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                }
        }
 
-       if (field_avail(typeof(resp), striding_rq_caps, uhw_outlen) &&
+       if (offsetofend(typeof(resp), striding_rq_caps) <= uhw_outlen &&
            raw_support) {
                resp.response_length += sizeof(resp.striding_rq_caps);
                if (MLX5_CAP_GEN(mdev, striding_rq)) {
@@ -1181,7 +1178,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                }
        }
 
-       if (field_avail(typeof(resp), tunnel_offloads_caps, uhw_outlen)) {
+       if (offsetofend(typeof(resp), tunnel_offloads_caps) <= uhw_outlen) {
                resp.response_length += sizeof(resp.tunnel_offloads_caps);
                if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan))
                        resp.tunnel_offloads_caps |=
@@ -1192,12 +1189,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre))
                        resp.tunnel_offloads_caps |=
                                MLX5_IB_TUNNELED_OFFLOADS_GRE;
-               if (MLX5_CAP_GEN(mdev, flex_parser_protocols) &
-                   MLX5_FLEX_PROTO_CW_MPLS_GRE)
+               if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre))
                        resp.tunnel_offloads_caps |=
                                MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE;
-               if (MLX5_CAP_GEN(mdev, flex_parser_protocols) &
-                   MLX5_FLEX_PROTO_CW_MPLS_UDP)
+               if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_udp))
                        resp.tunnel_offloads_caps |=
                                MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP;
        }
@@ -1791,6 +1786,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
                                     max_cqe_version);
        u32 dump_fill_mkey;
        bool lib_uar_4k;
+       bool lib_uar_dyn;
 
        if (!dev->ib_active)
                return -EAGAIN;
@@ -1849,8 +1845,14 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
        }
 
        lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
+       lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
        bfregi = &context->bfregi;
 
+       if (lib_uar_dyn) {
+               bfregi->lib_uar_dyn = lib_uar_dyn;
+               goto uar_done;
+       }
+
        /* updates req->total_num_bfregs */
        err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
        if (err)
@@ -1877,6 +1879,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
        if (err)
                goto out_sys_pages;
 
+uar_done:
        if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
                err = mlx5_ib_devx_create(dev, true);
                if (err < 0)
@@ -1898,19 +1901,19 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
        INIT_LIST_HEAD(&context->db_page_list);
        mutex_init(&context->db_page_mutex);
 
-       resp.tot_bfregs = req.total_num_bfregs;
+       resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs;
        resp.num_ports = dev->num_ports;
 
-       if (field_avail(typeof(resp), cqe_version, udata->outlen))
+       if (offsetofend(typeof(resp), cqe_version) <= udata->outlen)
                resp.response_length += sizeof(resp.cqe_version);
 
-       if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
+       if (offsetofend(typeof(resp), cmds_supp_uhw) <= udata->outlen) {
                resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
                                      MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
                resp.response_length += sizeof(resp.cmds_supp_uhw);
        }
 
-       if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) {
+       if (offsetofend(typeof(resp), eth_min_inline) <= udata->outlen) {
                if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
                        mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
                        resp.eth_min_inline++;
@@ -1918,7 +1921,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
                resp.response_length += sizeof(resp.eth_min_inline);
        }
 
-       if (field_avail(typeof(resp), clock_info_versions, udata->outlen)) {
+       if (offsetofend(typeof(resp), clock_info_versions) <= udata->outlen) {
                if (mdev->clock_info)
                        resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
                resp.response_length += sizeof(resp.clock_info_versions);
@@ -1930,7 +1933,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
         * pretend we don't support reading the HCA's core clock. This is also
         * forced by mmap function.
         */
-       if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+       if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) {
                if (PAGE_SIZE <= 4096) {
                        resp.comp_mask |=
                                MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
@@ -1940,18 +1943,18 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
                resp.response_length += sizeof(resp.hca_core_clock_offset);
        }
 
-       if (field_avail(typeof(resp), log_uar_size, udata->outlen))
+       if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen)
                resp.response_length += sizeof(resp.log_uar_size);
 
-       if (field_avail(typeof(resp), num_uars_per_page, udata->outlen))
+       if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen)
                resp.response_length += sizeof(resp.num_uars_per_page);
 
-       if (field_avail(typeof(resp), num_dyn_bfregs, udata->outlen)) {
+       if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) {
                resp.num_dyn_bfregs = bfregi->num_dyn_bfregs;
                resp.response_length += sizeof(resp.num_dyn_bfregs);
        }
 
-       if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) {
+       if (offsetofend(typeof(resp), dump_fill_mkey) <= udata->outlen) {
                if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
                        resp.dump_fill_mkey = dump_fill_mkey;
                        resp.comp_mask |=
@@ -2026,6 +2029,17 @@ static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
        return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
 }
 
+static u64 uar_index2paddress(struct mlx5_ib_dev *dev,
+                                int uar_idx)
+{
+       unsigned int fw_uars_per_page;
+
+       fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+                               MLX5_UARS_IN_PAGE : 1;
+
+       return (dev->mdev->bar_addr + (uar_idx / fw_uars_per_page) * PAGE_SIZE);
+}
+
 static int get_command(unsigned long offset)
 {
        return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
@@ -2110,6 +2124,11 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
                mutex_unlock(&var_table->bitmap_lock);
                kfree(mentry);
                break;
+       case MLX5_IB_MMAP_TYPE_UAR_WC:
+       case MLX5_IB_MMAP_TYPE_UAR_NC:
+               mlx5_cmd_free_uar(dev->mdev, mentry->page_idx);
+               kfree(mentry);
+               break;
        default:
                WARN_ON(true);
        }
@@ -2130,6 +2149,9 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
        int max_valid_idx = dyn_uar ? bfregi->num_sys_pages :
                                bfregi->num_static_sys_pages;
 
+       if (bfregi->lib_uar_dyn)
+               return -EINVAL;
+
        if (vma->vm_end - vma->vm_start != PAGE_SIZE)
                return -EINVAL;
 
@@ -2147,14 +2169,6 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
        switch (cmd) {
        case MLX5_IB_MMAP_WC_PAGE:
        case MLX5_IB_MMAP_ALLOC_WC:
-/* Some architectures don't support WC memory */
-#if defined(CONFIG_X86)
-               if (!pat_enabled())
-                       return -EPERM;
-#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU)))
-                       return -EPERM;
-#endif
-       /* fall through */
        case MLX5_IB_MMAP_REGULAR_PAGE:
                /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
                prot = pgprot_writecombine(vma->vm_page_prot);
@@ -2269,7 +2283,8 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
 
        mentry = to_mmmap(entry);
        pfn = (mentry->address >> PAGE_SHIFT);
-       if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR)
+       if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR ||
+           mentry->mmap_flag == MLX5_IB_MMAP_TYPE_UAR_NC)
                prot = pgprot_noncached(vma->vm_page_prot);
        else
                prot = pgprot_writecombine(vma->vm_page_prot);
@@ -2300,9 +2315,12 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
        command = get_command(vma->vm_pgoff);
        switch (command) {
        case MLX5_IB_MMAP_WC_PAGE:
+       case MLX5_IB_MMAP_ALLOC_WC:
+               if (!dev->wc_support)
+                       return -EPERM;
+               fallthrough;
        case MLX5_IB_MMAP_NC_PAGE:
        case MLX5_IB_MMAP_REGULAR_PAGE:
-       case MLX5_IB_MMAP_ALLOC_WC:
                return uar_mmap(dev, command, vma, context);
 
        case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
@@ -4046,6 +4064,11 @@ _get_flow_table(struct mlx5_ib_dev *dev,
                        BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
                                                       log_max_ft_size));
                priority = fs_matcher->priority;
+       } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
+               max_table_size =
+                       BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+                                                      log_max_ft_size));
+               priority = fs_matcher->priority;
        }
 
        max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
@@ -4062,6 +4085,8 @@ _get_flow_table(struct mlx5_ib_dev *dev,
                prio = &dev->flow_db->fdb;
        else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
                prio = &dev->flow_db->rdma_rx[priority];
+       else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)
+               prio = &dev->flow_db->rdma_tx[priority];
 
        if (!prio)
                return ERR_PTR(-EINVAL);
@@ -6090,9 +6115,9 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
        mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
-static int var_obj_cleanup(struct ib_uobject *uobject,
-                          enum rdma_remove_reason why,
-                          struct uverbs_attr_bundle *attrs)
+static int mmap_obj_cleanup(struct ib_uobject *uobject,
+                           enum rdma_remove_reason why,
+                           struct uverbs_attr_bundle *attrs)
 {
        struct mlx5_user_mmap_entry *obj = uobject->object;
 
@@ -6100,6 +6125,16 @@ static int var_obj_cleanup(struct ib_uobject *uobject,
        return 0;
 }
 
+static int mlx5_rdma_user_mmap_entry_insert(struct mlx5_ib_ucontext *c,
+                                           struct mlx5_user_mmap_entry *entry,
+                                           size_t length)
+{
+       return rdma_user_mmap_entry_insert_range(
+               &c->ibucontext, &entry->rdma_entry, length,
+               (MLX5_IB_MMAP_OFFSET_START << 16),
+               ((MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1));
+}
+
 static struct mlx5_user_mmap_entry *
 alloc_var_entry(struct mlx5_ib_ucontext *c)
 {
@@ -6130,10 +6165,8 @@ alloc_var_entry(struct mlx5_ib_ucontext *c)
        entry->page_idx = page_idx;
        entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR;
 
-       err = rdma_user_mmap_entry_insert_range(
-               &c->ibucontext, &entry->rdma_entry, var_table->stride_size,
-               MLX5_IB_MMAP_OFFSET_START << 16,
-               (MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1);
+       err = mlx5_rdma_user_mmap_entry_insert(c, entry,
+                                              var_table->stride_size);
        if (err)
                goto err_insert;
 
@@ -6217,7 +6250,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
                        UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR,
-                           UVERBS_TYPE_ALLOC_IDR(var_obj_cleanup),
+                           UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup),
                            &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC),
                            &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY));
 
@@ -6229,6 +6262,134 @@ static bool var_is_supported(struct ib_device *device)
                        MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q);
 }
 
+static struct mlx5_user_mmap_entry *
+alloc_uar_entry(struct mlx5_ib_ucontext *c,
+               enum mlx5_ib_uapi_uar_alloc_type alloc_type)
+{
+       struct mlx5_user_mmap_entry *entry;
+       struct mlx5_ib_dev *dev;
+       u32 uar_index;
+       int err;
+
+       entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+       if (!entry)
+               return ERR_PTR(-ENOMEM);
+
+       dev = to_mdev(c->ibucontext.device);
+       err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
+       if (err)
+               goto end;
+
+       entry->page_idx = uar_index;
+       entry->address = uar_index2paddress(dev, uar_index);
+       if (alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
+               entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_WC;
+       else
+               entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_NC;
+
+       err = mlx5_rdma_user_mmap_entry_insert(c, entry, PAGE_SIZE);
+       if (err)
+               goto err_insert;
+
+       return entry;
+
+err_insert:
+       mlx5_cmd_free_uar(dev->mdev, uar_index);
+end:
+       kfree(entry);
+       return ERR_PTR(err);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)(
+       struct uverbs_attr_bundle *attrs)
+{
+       struct ib_uobject *uobj = uverbs_attr_get_uobject(
+               attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE);
+       enum mlx5_ib_uapi_uar_alloc_type alloc_type;
+       struct mlx5_ib_ucontext *c;
+       struct mlx5_user_mmap_entry *entry;
+       u64 mmap_offset;
+       u32 length;
+       int err;
+
+       c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+       if (IS_ERR(c))
+               return PTR_ERR(c);
+
+       err = uverbs_get_const(&alloc_type, attrs,
+                              MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE);
+       if (err)
+               return err;
+
+       if (alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF &&
+           alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC)
+               return -EOPNOTSUPP;
+
+       if (!to_mdev(c->ibucontext.device)->wc_support &&
+           alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
+               return -EOPNOTSUPP;
+
+       entry = alloc_uar_entry(c, alloc_type);
+       if (IS_ERR(entry))
+               return PTR_ERR(entry);
+
+       mmap_offset = mlx5_entry_to_mmap_offset(entry);
+       length = entry->rdma_entry.npages * PAGE_SIZE;
+       uobj->object = entry;
+
+       err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
+                            &mmap_offset, sizeof(mmap_offset));
+       if (err)
+               goto err;
+
+       err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
+                            &entry->page_idx, sizeof(entry->page_idx));
+       if (err)
+               goto err;
+
+       err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
+                            &length, sizeof(length));
+       if (err)
+               goto err;
+
+       return 0;
+
+err:
+       rdma_user_mmap_entry_remove(&entry->rdma_entry);
+       return err;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+       MLX5_IB_METHOD_UAR_OBJ_ALLOC,
+       UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE,
+                       MLX5_IB_OBJECT_UAR,
+                       UVERBS_ACCESS_NEW,
+                       UA_MANDATORY),
+       UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE,
+                            enum mlx5_ib_uapi_uar_alloc_type,
+                            UA_MANDATORY),
+       UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
+                          UVERBS_ATTR_TYPE(u32),
+                          UA_MANDATORY),
+       UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
+                          UVERBS_ATTR_TYPE(u32),
+                          UA_MANDATORY),
+       UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
+                           UVERBS_ATTR_TYPE(u64),
+                           UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+       MLX5_IB_METHOD_UAR_OBJ_DESTROY,
+       UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_DESTROY_HANDLE,
+                       MLX5_IB_OBJECT_UAR,
+                       UVERBS_ACCESS_DESTROY,
+                       UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR,
+                           UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup),
+                           &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_ALLOC),
+                           &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY));
+
 ADD_UVERBS_ATTRIBUTES_SIMPLE(
        mlx5_ib_dm,
        UVERBS_OBJECT_DM,
@@ -6253,12 +6414,14 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
 static const struct uapi_definition mlx5_ib_defs[] = {
        UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
        UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
+       UAPI_DEF_CHAIN(mlx5_ib_qos_defs),
 
        UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
                                &mlx5_ib_flow_action),
        UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
        UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
                                UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR),
        {}
 };
 
@@ -6392,7 +6555,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
        spin_lock_init(&dev->reset_flow_resource_lock);
        xa_init(&dev->odp_mkeys);
        xa_init(&dev->sig_mrs);
-       spin_lock_init(&dev->mkey_lock);
+       atomic_set(&dev->mkey_var, 0);
 
        spin_lock_init(&dev->dm.lock);
        dev->dm.dev = mdev;
@@ -6548,7 +6711,8 @@ static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev)
                                        doorbell_bar_offset);
        bar_size = (1ULL << log_doorbell_bar_size) * 4096;
        var_table->stride_size = 1ULL << log_doorbell_stride;
-       var_table->num_var_hw_entries = div64_u64(bar_size, var_table->stride_size);
+       var_table->num_var_hw_entries = div_u64(bar_size,
+                                               var_table->stride_size);
        mutex_init(&var_table->bitmap_lock);
        var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries,
                                          GFP_KERNEL);
@@ -7080,6 +7244,9 @@ const struct mlx5_ib_profile raw_eth_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
                     mlx5_ib_stage_counters_init,
                     mlx5_ib_stage_counters_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
+                    mlx5_ib_stage_cong_debugfs_init,
+                    mlx5_ib_stage_cong_debugfs_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_UAR,
                     mlx5_ib_stage_uar_init,
                     mlx5_ib_stage_uar_cleanup),
index b90a364..c19ec9f 100644 (file)
@@ -316,7 +316,7 @@ int mlx5_ib_test_wc(struct mlx5_ib_dev *dev)
        if (!dev->mdev->roce.roce_en &&
            port_type_cap == MLX5_CAP_PORT_TYPE_ETH) {
                if (mlx5_core_is_pf(dev->mdev))
-                       dev->wc_support = true;
+                       dev->wc_support = arch_can_pci_mmap_wc();
                return 0;
        }
 
index fc19dc1..a4e5223 100644 (file)
@@ -64,8 +64,6 @@
        dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__,     \
                 __LINE__, current->pid, ##arg)
 
-#define field_avail(type, fld, sz) (offsetof(type, fld) +              \
-                                   sizeof(((type *)0)->fld) <= (sz))
 #define MLX5_IB_DEFAULT_UIDX 0xffffff
 #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
 
@@ -126,11 +124,27 @@ enum {
 enum mlx5_ib_mmap_type {
        MLX5_IB_MMAP_TYPE_MEMIC = 1,
        MLX5_IB_MMAP_TYPE_VAR = 2,
+       MLX5_IB_MMAP_TYPE_UAR_WC = 3,
+       MLX5_IB_MMAP_TYPE_UAR_NC = 4,
 };
 
-#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)                                        \
-       (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
-#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
+struct mlx5_bfreg_info {
+       u32 *sys_pages;
+       int num_low_latency_bfregs;
+       unsigned int *count;
+
+       /*
+        * protect bfreg allocation data structs
+        */
+       struct mutex lock;
+       u32 ver;
+       u8 lib_uar_4k : 1;
+       u8 lib_uar_dyn : 1;
+       u32 num_sys_pages;
+       u32 num_static_sys_pages;
+       u32 total_num_bfregs;
+       u32 num_dyn_bfregs;
+};
 
 struct mlx5_ib_ucontext {
        struct ib_ucontext      ibucontext;
@@ -203,6 +217,11 @@ struct mlx5_ib_flow_matcher {
        u8                      match_criteria_enable;
 };
 
+struct mlx5_ib_pp {
+       u16 index;
+       struct mlx5_core_dev *mdev;
+};
+
 struct mlx5_ib_flow_db {
        struct mlx5_ib_flow_prio        prios[MLX5_IB_NUM_FLOW_FT];
        struct mlx5_ib_flow_prio        egress_prios[MLX5_IB_NUM_FLOW_FT];
@@ -210,6 +229,7 @@ struct mlx5_ib_flow_db {
        struct mlx5_ib_flow_prio        egress[MLX5_IB_NUM_EGRESS_FTS];
        struct mlx5_ib_flow_prio        fdb;
        struct mlx5_ib_flow_prio        rdma_rx[MLX5_IB_NUM_FLOW_FT];
+       struct mlx5_ib_flow_prio        rdma_tx[MLX5_IB_NUM_FLOW_FT];
        struct mlx5_flow_table          *lag_demux_ft;
        /* Protect flow steering bypass flow tables
         * when add/del flow rules.
@@ -618,8 +638,8 @@ struct mlx5_ib_mr {
        struct ib_umem         *umem;
        struct mlx5_shared_mr_info      *smr_info;
        struct list_head        list;
-       int                     order;
-       bool                    allocated_from_cache;
+       unsigned int            order;
+       struct mlx5_cache_ent  *cache_ent;
        int                     npages;
        struct mlx5_ib_dev     *dev;
        u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
@@ -701,22 +721,34 @@ struct mlx5_cache_ent {
        u32                     access_mode;
        u32                     page;
 
-       u32                     size;
-       u32                     cur;
+       u8 disabled:1;
+       u8 fill_to_high_water:1;
+
+       /*
+        * - available_mrs is the length of list head, ie the number of MRs
+        *   available for immediate allocation.
+        * - total_mrs is available_mrs plus all in use MRs that could be
+        *   returned to the cache.
+        * - limit is the low water mark for available_mrs, 2* limit is the
+        *   upper water mark.
+        * - pending is the number of MRs currently being created
+        */
+       u32 total_mrs;
+       u32 available_mrs;
+       u32 limit;
+       u32 pending;
+
+       /* Statistics */
        u32                     miss;
-       u32                     limit;
 
        struct mlx5_ib_dev     *dev;
        struct work_struct      work;
        struct delayed_work     dwork;
-       int                     pending;
-       struct completion       compl;
 };
 
 struct mlx5_mr_cache {
        struct workqueue_struct *wq;
        struct mlx5_cache_ent   ent[MAX_MR_CACHE_ENTRIES];
-       int                     stopped;
        struct dentry           *root;
        unsigned long           last_add;
 };
@@ -794,6 +826,7 @@ enum mlx5_ib_dbg_cc_types {
        MLX5_IB_DBG_CC_RP_BYTE_RESET,
        MLX5_IB_DBG_CC_RP_THRESHOLD,
        MLX5_IB_DBG_CC_RP_AI_RATE,
+       MLX5_IB_DBG_CC_RP_MAX_RATE,
        MLX5_IB_DBG_CC_RP_HAI_RATE,
        MLX5_IB_DBG_CC_RP_MIN_DEC_FAC,
        MLX5_IB_DBG_CC_RP_MIN_RATE,
@@ -803,6 +836,7 @@ enum mlx5_ib_dbg_cc_types {
        MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD,
        MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE,
        MLX5_IB_DBG_CC_RP_GD,
+       MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS,
        MLX5_IB_DBG_CC_NP_CNP_DSCP,
        MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE,
        MLX5_IB_DBG_CC_NP_CNP_PRIO,
@@ -986,19 +1020,16 @@ struct mlx5_ib_dev {
         */
        struct mutex                    cap_mask_mutex;
        u8                              ib_active:1;
-       u8                              fill_delay:1;
        u8                              is_rep:1;
        u8                              lag_active:1;
        u8                              wc_support:1;
+       u8                              fill_delay;
        struct umr_common               umrc;
        /* sync used page count stats
         */
        struct mlx5_ib_resources        devr;
 
-       /* protect mkey key part */
-       spinlock_t                      mkey_lock;
-       u8                              mkey_key;
-
+       atomic_t                        mkey_var;
        struct mlx5_mr_cache            cache;
        struct timer_list               delay_timer;
        /* Prevents soft lock on massive reg MRs */
@@ -1268,7 +1299,8 @@ int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
 
-struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+                                      unsigned int entry);
 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr);
 
@@ -1388,6 +1420,7 @@ int mlx5_ib_fill_stat_entry(struct sk_buff *msg,
 
 extern const struct uapi_definition mlx5_ib_devx_defs[];
 extern const struct uapi_definition mlx5_ib_flow_defs[];
+extern const struct uapi_definition mlx5_ib_qos_defs[];
 
 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
 int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
@@ -1477,12 +1510,11 @@ static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
 {
        u8 cqe_version = ucontext->cqe_version;
 
-       if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
-           !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+       if ((offsetofend(typeof(*ucmd), uidx) <= inlen) && !cqe_version &&
+           (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
                return 0;
 
-       if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
-              !!cqe_version))
+       if ((offsetofend(typeof(*ucmd), uidx) <= inlen) != !!cqe_version)
                return -EINVAL;
 
        return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
@@ -1495,12 +1527,11 @@ static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
 {
        u8 cqe_version = ucontext->cqe_version;
 
-       if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
-           !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+       if ((offsetofend(typeof(*ucmd), uidx) <= inlen) && !cqe_version &&
+           (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
                return 0;
 
-       if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
-              !!cqe_version))
+       if ((offsetofend(typeof(*ucmd), uidx) <= inlen) != !!cqe_version)
                return -EINVAL;
 
        return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
@@ -1539,7 +1570,9 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
            MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
                return false;
 
-       if (access_flags & IB_ACCESS_RELAXED_ORDERING)
+       if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
+           (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) ||
+            MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)))
                return false;
 
        return true;
index 8508af5..a401931 100644 (file)
@@ -54,12 +54,8 @@ static void
 assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
                    u32 *in)
 {
+       u8 key = atomic_inc_return(&dev->mkey_var);
        void *mkc;
-       u8 key;
-
-       spin_lock_irq(&dev->mkey_lock);
-       key = dev->mkey_key++;
-       spin_unlock_irq(&dev->mkey_lock);
 
        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
        MLX5_SET(mkc, mkc, mkey_7_0, key);
@@ -90,6 +86,7 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 static int mr_cache_max_order(struct mlx5_ib_dev *dev);
+static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
 
 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
 {
@@ -103,16 +100,6 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
        return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
 }
 
-static int order2idx(struct mlx5_ib_dev *dev, int order)
-{
-       struct mlx5_mr_cache *cache = &dev->cache;
-
-       if (order < cache->ent[0].order)
-               return 0;
-       else
-               return order - cache->ent[0].order;
-}
-
 static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
 {
        return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
@@ -124,18 +111,16 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
        struct mlx5_ib_mr *mr =
                container_of(context, struct mlx5_ib_mr, cb_work);
        struct mlx5_ib_dev *dev = mr->dev;
-       struct mlx5_mr_cache *cache = &dev->cache;
-       int c = order2idx(dev, mr->order);
-       struct mlx5_cache_ent *ent = &cache->ent[c];
+       struct mlx5_cache_ent *ent = mr->cache_ent;
        unsigned long flags;
 
-       spin_lock_irqsave(&ent->lock, flags);
-       ent->pending--;
-       spin_unlock_irqrestore(&ent->lock, flags);
        if (status) {
                mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
                kfree(mr);
-               dev->fill_delay = 1;
+               spin_lock_irqsave(&ent->lock, flags);
+               ent->pending--;
+               WRITE_ONCE(dev->fill_delay, 1);
+               spin_unlock_irqrestore(&ent->lock, flags);
                mod_timer(&dev->delay_timer, jiffies + HZ);
                return;
        }
@@ -144,23 +129,44 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
        mr->mmkey.key |= mlx5_idx_to_mkey(
                MLX5_GET(create_mkey_out, mr->out, mkey_index));
 
-       cache->last_add = jiffies;
+       WRITE_ONCE(dev->cache.last_add, jiffies);
 
        spin_lock_irqsave(&ent->lock, flags);
        list_add_tail(&mr->list, &ent->head);
-       ent->cur++;
-       ent->size++;
+       ent->available_mrs++;
+       ent->total_mrs++;
+       /* If we are doing fill_to_high_water then keep going. */
+       queue_adjust_cache_locked(ent);
+       ent->pending--;
        spin_unlock_irqrestore(&ent->lock, flags);
+}
 
-       if (!completion_done(&ent->compl))
-               complete(&ent->compl);
+static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
+{
+       struct mlx5_ib_mr *mr;
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return NULL;
+       mr->order = ent->order;
+       mr->cache_ent = ent;
+       mr->dev = ent->dev;
+
+       MLX5_SET(mkc, mkc, free, 1);
+       MLX5_SET(mkc, mkc, umr_en, 1);
+       MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
+       MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
+
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
+       MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
+       MLX5_SET(mkc, mkc, log_page_size, ent->page);
+       return mr;
 }
 
-static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+/* Asynchronously schedule new MRs to be populated in the cache. */
+static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
 {
-       struct mlx5_mr_cache *cache = &dev->cache;
-       struct mlx5_cache_ent *ent = &cache->ent[c];
-       int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+       size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        struct mlx5_ib_mr *mr;
        void *mkc;
        u32 *in;
@@ -173,42 +179,29 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 
        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
        for (i = 0; i < num; i++) {
-               if (ent->pending >= MAX_PENDING_REG_MR) {
-                       err = -EAGAIN;
-                       break;
-               }
-
-               mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+               mr = alloc_cache_mr(ent, mkc);
                if (!mr) {
                        err = -ENOMEM;
                        break;
                }
-               mr->order = ent->order;
-               mr->allocated_from_cache = true;
-               mr->dev = dev;
-
-               MLX5_SET(mkc, mkc, free, 1);
-               MLX5_SET(mkc, mkc, umr_en, 1);
-               MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
-               MLX5_SET(mkc, mkc, access_mode_4_2,
-                        (ent->access_mode >> 2) & 0x7);
-
-               MLX5_SET(mkc, mkc, qpn, 0xffffff);
-               MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
-               MLX5_SET(mkc, mkc, log_page_size, ent->page);
-
                spin_lock_irq(&ent->lock);
+               if (ent->pending >= MAX_PENDING_REG_MR) {
+                       err = -EAGAIN;
+                       spin_unlock_irq(&ent->lock);
+                       kfree(mr);
+                       break;
+               }
                ent->pending++;
                spin_unlock_irq(&ent->lock);
-               err = mlx5_ib_create_mkey_cb(dev, &mr->mmkey,
-                                              &dev->async_ctx, in, inlen,
-                                              mr->out, sizeof(mr->out),
-                                              &mr->cb_work);
+               err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey,
+                                            &ent->dev->async_ctx, in, inlen,
+                                            mr->out, sizeof(mr->out),
+                                            &mr->cb_work);
                if (err) {
                        spin_lock_irq(&ent->lock);
                        ent->pending--;
                        spin_unlock_irq(&ent->lock);
-                       mlx5_ib_warn(dev, "create mkey failed %d\n", err);
+                       mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
                        kfree(mr);
                        break;
                }
@@ -218,70 +211,128 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
        return err;
 }
 
-static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
+/* Synchronously create a MR in the cache */
+static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
 {
-       struct mlx5_mr_cache *cache = &dev->cache;
-       struct mlx5_cache_ent *ent = &cache->ent[c];
-       struct mlx5_ib_mr *tmp_mr;
+       size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        struct mlx5_ib_mr *mr;
-       LIST_HEAD(del_list);
-       int i;
+       void *mkc;
+       u32 *in;
+       int err;
 
-       for (i = 0; i < num; i++) {
-               spin_lock_irq(&ent->lock);
-               if (list_empty(&ent->head)) {
-                       spin_unlock_irq(&ent->lock);
-                       break;
-               }
-               mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
-               list_move(&mr->list, &del_list);
-               ent->cur--;
-               ent->size--;
-               spin_unlock_irq(&ent->lock);
-               mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
-       }
+       in = kzalloc(inlen, GFP_KERNEL);
+       if (!in)
+               return ERR_PTR(-ENOMEM);
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 
-       list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
-               list_del(&mr->list);
-               kfree(mr);
+       mr = alloc_cache_mr(ent, mkc);
+       if (!mr) {
+               err = -ENOMEM;
+               goto free_in;
        }
+
+       err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen);
+       if (err)
+               goto free_mr;
+
+       mr->mmkey.type = MLX5_MKEY_MR;
+       WRITE_ONCE(ent->dev->cache.last_add, jiffies);
+       spin_lock_irq(&ent->lock);
+       ent->total_mrs++;
+       spin_unlock_irq(&ent->lock);
+       kfree(in);
+       return mr;
+free_mr:
+       kfree(mr);
+free_in:
+       kfree(in);
+       return ERR_PTR(err);
 }
 
-static ssize_t size_write(struct file *filp, const char __user *buf,
-                         size_t count, loff_t *pos)
+static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
+{
+       struct mlx5_ib_mr *mr;
+
+       lockdep_assert_held(&ent->lock);
+       if (list_empty(&ent->head))
+               return;
+       mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+       list_del(&mr->list);
+       ent->available_mrs--;
+       ent->total_mrs--;
+       spin_unlock_irq(&ent->lock);
+       mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
+       kfree(mr);
+       spin_lock_irq(&ent->lock);
+}
+
+static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
+                               bool limit_fill)
 {
-       struct mlx5_cache_ent *ent = filp->private_data;
-       struct mlx5_ib_dev *dev = ent->dev;
-       char lbuf[20] = {0};
-       u32 var;
        int err;
-       int c;
 
-       count = min(count, sizeof(lbuf) - 1);
-       if (copy_from_user(lbuf, buf, count))
-               return -EFAULT;
+       lockdep_assert_held(&ent->lock);
 
-       c = order2idx(dev, ent->order);
+       while (true) {
+               if (limit_fill)
+                       target = ent->limit * 2;
+               if (target == ent->available_mrs + ent->pending)
+                       return 0;
+               if (target > ent->available_mrs + ent->pending) {
+                       u32 todo = target - (ent->available_mrs + ent->pending);
 
-       if (sscanf(lbuf, "%u", &var) != 1)
-               return -EINVAL;
+                       spin_unlock_irq(&ent->lock);
+                       err = add_keys(ent, todo);
+                       if (err == -EAGAIN)
+                               usleep_range(3000, 5000);
+                       spin_lock_irq(&ent->lock);
+                       if (err) {
+                               if (err != -EAGAIN)
+                                       return err;
+                       } else
+                               return 0;
+               } else {
+                       remove_cache_mr_locked(ent);
+               }
+       }
+}
 
-       if (var < ent->limit)
-               return -EINVAL;
+static ssize_t size_write(struct file *filp, const char __user *buf,
+                         size_t count, loff_t *pos)
+{
+       struct mlx5_cache_ent *ent = filp->private_data;
+       u32 target;
+       int err;
 
-       if (var > ent->size) {
-               do {
-                       err = add_keys(dev, c, var - ent->size);
-                       if (err && err != -EAGAIN)
-                               return err;
+       err = kstrtou32_from_user(buf, count, 0, &target);
+       if (err)
+               return err;
 
-                       usleep_range(3000, 5000);
-               } while (err);
-       } else if (var < ent->size) {
-               remove_keys(dev, c, ent->size - var);
+       /*
+        * Target is the new value of total_mrs the user requests, however we
+        * cannot free MRs that are in use. Compute the target value for
+        * available_mrs.
+        */
+       spin_lock_irq(&ent->lock);
+       if (target < ent->total_mrs - ent->available_mrs) {
+               err = -EINVAL;
+               goto err_unlock;
        }
+       target = target - (ent->total_mrs - ent->available_mrs);
+       if (target < ent->limit || target > ent->limit*2) {
+               err = -EINVAL;
+               goto err_unlock;
+       }
+       err = resize_available_mrs(ent, target, false);
+       if (err)
+               goto err_unlock;
+       spin_unlock_irq(&ent->lock);
 
        return count;
+
+err_unlock:
+       spin_unlock_irq(&ent->lock);
+       return err;
 }
 
 static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
@@ -291,7 +342,7 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
        char lbuf[20];
        int err;
 
-       err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
+       err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs);
        if (err < 0)
                return err;
 
@@ -309,32 +360,23 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
                           size_t count, loff_t *pos)
 {
        struct mlx5_cache_ent *ent = filp->private_data;
-       struct mlx5_ib_dev *dev = ent->dev;
-       char lbuf[20] = {0};
        u32 var;
        int err;
-       int c;
-
-       count = min(count, sizeof(lbuf) - 1);
-       if (copy_from_user(lbuf, buf, count))
-               return -EFAULT;
-
-       c = order2idx(dev, ent->order);
-
-       if (sscanf(lbuf, "%u", &var) != 1)
-               return -EINVAL;
 
-       if (var > ent->size)
-               return -EINVAL;
+       err = kstrtou32_from_user(buf, count, 0, &var);
+       if (err)
+               return err;
 
+       /*
+        * Upon set we immediately fill the cache to high water mark implied by
+        * the limit.
+        */
+       spin_lock_irq(&ent->lock);
        ent->limit = var;
-
-       if (ent->cur < ent->limit) {
-               err = add_keys(dev, c, 2 * ent->limit - ent->cur);
-               if (err)
-                       return err;
-       }
-
+       err = resize_available_mrs(ent, 0, true);
+       spin_unlock_irq(&ent->lock);
+       if (err)
+               return err;
        return count;
 }
 
@@ -359,68 +401,119 @@ static const struct file_operations limit_fops = {
        .read   = limit_read,
 };
 
-static int someone_adding(struct mlx5_mr_cache *cache)
+static bool someone_adding(struct mlx5_mr_cache *cache)
 {
-       int i;
+       unsigned int i;
 
        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
-               if (cache->ent[i].cur < cache->ent[i].limit)
-                       return 1;
+               struct mlx5_cache_ent *ent = &cache->ent[i];
+               bool ret;
+
+               spin_lock_irq(&ent->lock);
+               ret = ent->available_mrs < ent->limit;
+               spin_unlock_irq(&ent->lock);
+               if (ret)
+                       return true;
        }
+       return false;
+}
 
-       return 0;
+/*
+ * Check if the bucket is outside the high/low water mark and schedule an async
+ * update. The cache refill has hysteresis, once the low water mark is hit it is
+ * refilled up to the high mark.
+ */
+static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
+{
+       lockdep_assert_held(&ent->lock);
+
+       if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
+               return;
+       if (ent->available_mrs < ent->limit) {
+               ent->fill_to_high_water = true;
+               queue_work(ent->dev->cache.wq, &ent->work);
+       } else if (ent->fill_to_high_water &&
+                  ent->available_mrs + ent->pending < 2 * ent->limit) {
+               /*
+                * Once we start populating due to hitting a low water mark
+                * continue until we pass the high water mark.
+                */
+               queue_work(ent->dev->cache.wq, &ent->work);
+       } else if (ent->available_mrs == 2 * ent->limit) {
+               ent->fill_to_high_water = false;
+       } else if (ent->available_mrs > 2 * ent->limit) {
+               /* Queue deletion of excess entries */
+               ent->fill_to_high_water = false;
+               if (ent->pending)
+                       queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
+                                          msecs_to_jiffies(1000));
+               else
+                       queue_work(ent->dev->cache.wq, &ent->work);
+       }
 }
 
 static void __cache_work_func(struct mlx5_cache_ent *ent)
 {
        struct mlx5_ib_dev *dev = ent->dev;
        struct mlx5_mr_cache *cache = &dev->cache;
-       int i = order2idx(dev, ent->order);
        int err;
 
-       if (cache->stopped)
-               return;
+       spin_lock_irq(&ent->lock);
+       if (ent->disabled)
+               goto out;
 
-       ent = &dev->cache.ent[i];
-       if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
-               err = add_keys(dev, i, 1);
-               if (ent->cur < 2 * ent->limit) {
-                       if (err == -EAGAIN) {
-                               mlx5_ib_dbg(dev, "returned eagain, order %d\n",
-                                           i + 2);
-                               queue_delayed_work(cache->wq, &ent->dwork,
-                                                  msecs_to_jiffies(3));
-                       } else if (err) {
-                               mlx5_ib_warn(dev, "command failed order %d, err %d\n",
-                                            i + 2, err);
+       if (ent->fill_to_high_water &&
+           ent->available_mrs + ent->pending < 2 * ent->limit &&
+           !READ_ONCE(dev->fill_delay)) {
+               spin_unlock_irq(&ent->lock);
+               err = add_keys(ent, 1);
+               spin_lock_irq(&ent->lock);
+               if (ent->disabled)
+                       goto out;
+               if (err) {
+                       /*
+                        * EAGAIN only happens if pending is positive, so we
+                        * will be rescheduled from reg_mr_callback(). The only
+                        * failure path here is ENOMEM.
+                        */
+                       if (err != -EAGAIN) {
+                               mlx5_ib_warn(
+                                       dev,
+                                       "command failed order %d, err %d\n",
+                                       ent->order, err);
                                queue_delayed_work(cache->wq, &ent->dwork,
                                                   msecs_to_jiffies(1000));
-                       } else {
-                               queue_work(cache->wq, &ent->work);
                        }
                }
-       } else if (ent->cur > 2 * ent->limit) {
+       } else if (ent->available_mrs > 2 * ent->limit) {
+               bool need_delay;
+
                /*
-                * The remove_keys() logic is performed as garbage collection
-                * task. Such task is intended to be run when no other active
-                * processes are running.
+                * The remove_cache_mr() logic is performed as garbage
+                * collection task. Such task is intended to be run when no
+                * other active processes are running.
                 *
                 * The need_resched() will return TRUE if there are user tasks
                 * to be activated in near future.
                 *
-                * In such case, we don't execute remove_keys() and postpone
-                * the garbage collection work to try to run in next cycle,
-                * in order to free CPU resources to other tasks.
+                * In such case, we don't execute remove_cache_mr() and postpone
+                * the garbage collection work to try to run in next cycle, in
+                * order to free CPU resources to other tasks.
                 */
-               if (!need_resched() && !someone_adding(cache) &&
-                   time_after(jiffies, cache->last_add + 300 * HZ)) {
-                       remove_keys(dev, i, 1);
-                       if (ent->cur > ent->limit)
-                               queue_work(cache->wq, &ent->work);
-               } else {
+               spin_unlock_irq(&ent->lock);
+               need_delay = need_resched() || someone_adding(cache) ||
+                            time_after(jiffies,
+                                       READ_ONCE(cache->last_add) + 300 * HZ);
+               spin_lock_irq(&ent->lock);
+               if (ent->disabled)
+                       goto out;
+               if (need_delay)
                        queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
-               }
+               remove_cache_mr_locked(ent);
+               queue_adjust_cache_locked(ent);
        }
+out:
+       spin_unlock_irq(&ent->lock);
 }
 
 static void delayed_cache_work_func(struct work_struct *work)
@@ -439,117 +532,95 @@ static void cache_work_func(struct work_struct *work)
        __cache_work_func(ent);
 }
 
-struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
+/* Allocate a special entry from the cache */
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+                                      unsigned int entry)
 {
        struct mlx5_mr_cache *cache = &dev->cache;
        struct mlx5_cache_ent *ent;
        struct mlx5_ib_mr *mr;
-       int err;
 
-       if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
-               mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
+       if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
+                   entry >= ARRAY_SIZE(cache->ent)))
                return ERR_PTR(-EINVAL);
-       }
 
        ent = &cache->ent[entry];
-       while (1) {
-               spin_lock_irq(&ent->lock);
-               if (list_empty(&ent->head)) {
-                       spin_unlock_irq(&ent->lock);
-
-                       err = add_keys(dev, entry, 1);
-                       if (err && err != -EAGAIN)
-                               return ERR_PTR(err);
-
-                       wait_for_completion(&ent->compl);
-               } else {
-                       mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
-                                             list);
-                       list_del(&mr->list);
-                       ent->cur--;
-                       spin_unlock_irq(&ent->lock);
-                       if (ent->cur < ent->limit)
-                               queue_work(cache->wq, &ent->work);
+       spin_lock_irq(&ent->lock);
+       if (list_empty(&ent->head)) {
+               spin_unlock_irq(&ent->lock);
+               mr = create_cache_mr(ent);
+               if (IS_ERR(mr))
                        return mr;
-               }
+       } else {
+               mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+               list_del(&mr->list);
+               ent->available_mrs--;
+               queue_adjust_cache_locked(ent);
+               spin_unlock_irq(&ent->lock);
        }
+       return mr;
 }
 
-static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
+/* Return a MR already available in the cache */
+static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
 {
-       struct mlx5_mr_cache *cache = &dev->cache;
+       struct mlx5_ib_dev *dev = req_ent->dev;
        struct mlx5_ib_mr *mr = NULL;
-       struct mlx5_cache_ent *ent;
-       int last_umr_cache_entry;
-       int c;
-       int i;
+       struct mlx5_cache_ent *ent = req_ent;
 
-       c = order2idx(dev, order);
-       last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
-       if (c < 0 || c > last_umr_cache_entry) {
-               mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
-               return NULL;
-       }
-
-       for (i = c; i <= last_umr_cache_entry; i++) {
-               ent = &cache->ent[i];
-
-               mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
+       /* Try larger MR pools from the cache to satisfy the allocation */
+       for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) {
+               mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order,
+                           ent - dev->cache.ent);
 
                spin_lock_irq(&ent->lock);
                if (!list_empty(&ent->head)) {
                        mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
                                              list);
                        list_del(&mr->list);
-                       ent->cur--;
+                       ent->available_mrs--;
+                       queue_adjust_cache_locked(ent);
                        spin_unlock_irq(&ent->lock);
-                       if (ent->cur < ent->limit)
-                               queue_work(cache->wq, &ent->work);
                        break;
                }
+               queue_adjust_cache_locked(ent);
                spin_unlock_irq(&ent->lock);
-
-               queue_work(cache->wq, &ent->work);
        }
 
        if (!mr)
-               cache->ent[c].miss++;
+               req_ent->miss++;
 
        return mr;
 }
 
+static void detach_mr_from_cache(struct mlx5_ib_mr *mr)
+{
+       struct mlx5_cache_ent *ent = mr->cache_ent;
+
+       mr->cache_ent = NULL;
+       spin_lock_irq(&ent->lock);
+       ent->total_mrs--;
+       spin_unlock_irq(&ent->lock);
+}
+
 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
-       struct mlx5_mr_cache *cache = &dev->cache;
-       struct mlx5_cache_ent *ent;
-       int shrink = 0;
-       int c;
+       struct mlx5_cache_ent *ent = mr->cache_ent;
 
-       if (!mr->allocated_from_cache)
+       if (!ent)
                return;
 
-       c = order2idx(dev, mr->order);
-       WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES);
-
        if (mlx5_mr_cache_invalidate(mr)) {
-               mr->allocated_from_cache = false;
+               detach_mr_from_cache(mr);
                destroy_mkey(dev, mr);
-               ent = &cache->ent[c];
-               if (ent->cur < ent->limit)
-                       queue_work(cache->wq, &ent->work);
                return;
        }
 
-       ent = &cache->ent[c];
        spin_lock_irq(&ent->lock);
        list_add_tail(&mr->list, &ent->head);
-       ent->cur++;
-       if (ent->cur > 2 * ent->limit)
-               shrink = 1;
+       ent->available_mrs++;
+       queue_adjust_cache_locked(ent);
        spin_unlock_irq(&ent->lock);
-
-       if (shrink)
-               queue_work(cache->wq, &ent->work);
 }
 
 static void clean_keys(struct mlx5_ib_dev *dev, int c)
@@ -569,8 +640,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
                }
                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
                list_move(&mr->list, &del_list);
-               ent->cur--;
-               ent->size--;
+               ent->available_mrs--;
+               ent->total_mrs--;
                spin_unlock_irq(&ent->lock);
                mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
        }
@@ -608,7 +679,7 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
                dir = debugfs_create_dir(ent->name, cache->root);
                debugfs_create_file("size", 0600, dir, ent, &size_fops);
                debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
-               debugfs_create_u32("cur", 0400, dir, &ent->cur);
+               debugfs_create_u32("cur", 0400, dir, &ent->available_mrs);
                debugfs_create_u32("miss", 0600, dir, &ent->miss);
        }
 }
@@ -617,7 +688,7 @@ static void delay_time_func(struct timer_list *t)
 {
        struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
 
-       dev->fill_delay = 0;
+       WRITE_ONCE(dev->fill_delay, 0);
 }
 
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
@@ -643,7 +714,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
                ent->dev = dev;
                ent->limit = 0;
 
-               init_completion(&ent->compl);
                INIT_WORK(&ent->work, cache_work_func);
                INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
 
@@ -665,7 +735,9 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
                        ent->limit = dev->mdev->profile->mr_cache[i].limit;
                else
                        ent->limit = 0;
-               queue_work(cache->wq, &ent->work);
+               spin_lock_irq(&ent->lock);
+               queue_adjust_cache_locked(ent);
+               spin_unlock_irq(&ent->lock);
        }
 
        mlx5_mr_cache_debugfs_init(dev);
@@ -675,13 +747,20 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 {
-       int i;
+       unsigned int i;
 
        if (!dev->cache.wq)
                return 0;
 
-       dev->cache.stopped = 1;
-       flush_workqueue(dev->cache.wq);
+       for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+               struct mlx5_cache_ent *ent = &dev->cache.ent[i];
+
+               spin_lock_irq(&ent->lock);
+               ent->disabled = true;
+               spin_unlock_irq(&ent->lock);
+               cancel_work_sync(&ent->work);
+               cancel_delayed_work_sync(&ent->dwork);
+       }
 
        mlx5_mr_cache_debugfs_cleanup(dev);
        mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
@@ -876,31 +955,37 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
        return err;
 }
 
-static struct mlx5_ib_mr *alloc_mr_from_cache(
-                                 struct ib_pd *pd, struct ib_umem *umem,
-                                 u64 virt_addr, u64 len, int npages,
-                                 int page_shift, int order, int access_flags)
+static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
+                                                     unsigned int order)
+{
+       struct mlx5_mr_cache *cache = &dev->cache;
+
+       if (order < cache->ent[0].order)
+               return &cache->ent[0];
+       order = order - cache->ent[0].order;
+       if (order > MR_CACHE_LAST_STD_ENTRY)
+               return NULL;
+       return &cache->ent[order];
+}
+
+static struct mlx5_ib_mr *
+alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
+                   u64 len, int npages, int page_shift, unsigned int order,
+                   int access_flags)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order);
        struct mlx5_ib_mr *mr;
-       int err = 0;
-       int i;
-
-       for (i = 0; i < 1; i++) {
-               mr = alloc_cached_mr(dev, order);
-               if (mr)
-                       break;
 
-               err = add_keys(dev, order2idx(dev, order), 1);
-               if (err && err != -EAGAIN) {
-                       mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
-                       break;
-               }
+       if (!ent)
+               return ERR_PTR(-E2BIG);
+       mr = get_cache_mr(ent);
+       if (!mr) {
+               mr = create_cache_mr(ent);
+               if (IS_ERR(mr))
+                       return mr;
        }
 
-       if (!mr)
-               return ERR_PTR(-EAGAIN);
-
        mr->ibmr.pd = pd;
        mr->umem = umem;
        mr->access_flags = access_flags;
@@ -1474,10 +1559,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                /*
                 * UMR can't be used - MKey needs to be replaced.
                 */
-               if (mr->allocated_from_cache)
-                       err = mlx5_mr_cache_invalidate(mr);
-               else
-                       err = destroy_mkey(dev, mr);
+               if (mr->cache_ent)
+                       detach_mr_from_cache(mr);
+               err = destroy_mkey(dev, mr);
                if (err)
                        goto err;
 
@@ -1489,8 +1573,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                        mr = to_mmr(ib_mr);
                        goto err;
                }
-
-               mr->allocated_from_cache = false;
        } else {
                /*
                 * Send a UMR WQE
@@ -1577,8 +1659,6 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
 
 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
-       int allocated_from_cache = mr->allocated_from_cache;
-
        if (mr->sig) {
                if (mlx5_core_destroy_psv(dev->mdev,
                                          mr->sig->psv_memory.psv_idx))
@@ -1593,7 +1673,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
                mr->sig = NULL;
        }
 
-       if (!allocated_from_cache) {
+       if (!mr->cache_ent) {
                destroy_mkey(dev, mr);
                mlx5_free_priv_descs(mr);
        }
@@ -1610,7 +1690,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
        else
                clean_mr(dev, mr);
 
-       if (mr->allocated_from_cache)
+       if (mr->cache_ent)
                mlx5_mr_cache_free(dev, mr);
        else
                kfree(mr);
index bf50cd9..3de7606 100644 (file)
@@ -197,7 +197,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr)
        odp->private = NULL;
        mutex_unlock(&odp->umem_mutex);
 
-       if (!mr->allocated_from_cache) {
+       if (!mr->cache_ent) {
                mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey);
                WARN_ON(mr->descs);
        }
diff --git a/drivers/infiniband/hw/mlx5/qos.c b/drivers/infiniband/hw/mlx5/qos.c
new file mode 100644 (file)
index 0000000..cac878a
--- /dev/null
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_ib.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static bool pp_is_supported(struct ib_device *device)
+{
+       struct mlx5_ib_dev *dev = to_mdev(device);
+
+       return (MLX5_CAP_GEN(dev->mdev, qos) &&
+               MLX5_CAP_QOS(dev->mdev, packet_pacing) &&
+               MLX5_CAP_QOS(dev->mdev, packet_pacing_uid));
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_PP_OBJ_ALLOC)(
+       struct uverbs_attr_bundle *attrs)
+{
+       u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {};
+       struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+               MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE);
+       struct mlx5_ib_dev *dev;
+       struct mlx5_ib_ucontext *c;
+       struct mlx5_ib_pp *pp_entry;
+       void *in_ctx;
+       u16 uid;
+       int inlen;
+       u32 flags;
+       int err;
+
+       c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+       if (IS_ERR(c))
+               return PTR_ERR(c);
+
+       /* The allocated entry can be used only by a DEVX context */
+       if (!c->devx_uid)
+               return -EINVAL;
+
+       dev = to_mdev(c->ibucontext.device);
+       pp_entry = kzalloc(sizeof(*pp_entry), GFP_KERNEL);
+       if (!pp_entry)
+               return -ENOMEM;
+
+       in_ctx = uverbs_attr_get_alloced_ptr(attrs,
+                                            MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX);
+       inlen = uverbs_attr_get_len(attrs,
+                                   MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX);
+       memcpy(rl_raw, in_ctx, inlen);
+       err = uverbs_get_flags32(&flags, attrs,
+               MLX5_IB_ATTR_PP_OBJ_ALLOC_FLAGS,
+               MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX);
+       if (err)
+               goto err;
+
+       uid = (flags & MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX) ?
+               c->devx_uid : MLX5_SHARED_RESOURCE_UID;
+
+       err = mlx5_rl_add_rate_raw(dev->mdev, rl_raw, uid,
+                       (flags & MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX),
+                       &pp_entry->index);
+       if (err)
+               goto err;
+
+       err = uverbs_copy_to(attrs, MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX,
+                            &pp_entry->index, sizeof(pp_entry->index));
+       if (err)
+               goto clean;
+
+       pp_entry->mdev = dev->mdev;
+       uobj->object = pp_entry;
+       return 0;
+
+clean:
+       mlx5_rl_remove_rate_raw(dev->mdev, pp_entry->index);
+err:
+       kfree(pp_entry);
+       return err;
+}
+
+static int pp_obj_cleanup(struct ib_uobject *uobject,
+                         enum rdma_remove_reason why,
+                         struct uverbs_attr_bundle *attrs)
+{
+       struct mlx5_ib_pp *pp_entry = uobject->object;
+
+       mlx5_rl_remove_rate_raw(pp_entry->mdev, pp_entry->index);
+       kfree(pp_entry);
+       return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+       MLX5_IB_METHOD_PP_OBJ_ALLOC,
+       UVERBS_ATTR_IDR(MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE,
+                       MLX5_IB_OBJECT_PP,
+                       UVERBS_ACCESS_NEW,
+                       UA_MANDATORY),
+       UVERBS_ATTR_PTR_IN(
+               MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX,
+               UVERBS_ATTR_SIZE(1,
+                       MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)),
+               UA_MANDATORY,
+               UA_ALLOC_AND_COPY),
+       UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_PP_OBJ_ALLOC_FLAGS,
+                       enum mlx5_ib_uapi_pp_alloc_flags,
+                       UA_MANDATORY),
+       UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX,
+                          UVERBS_ATTR_TYPE(u16),
+                          UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+       MLX5_IB_METHOD_PP_OBJ_DESTROY,
+       UVERBS_ATTR_IDR(MLX5_IB_ATTR_PP_OBJ_DESTROY_HANDLE,
+                       MLX5_IB_OBJECT_PP,
+                       UVERBS_ACCESS_DESTROY,
+                       UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_PP,
+                           UVERBS_TYPE_ALLOC_IDR(pp_obj_cleanup),
+                           &UVERBS_METHOD(MLX5_IB_METHOD_PP_OBJ_ALLOC),
+                           &UVERBS_METHOD(MLX5_IB_METHOD_PP_OBJ_DESTROY));
+
+
+const struct uapi_definition mlx5_ib_qos_defs[] = {
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+               MLX5_IB_OBJECT_PP,
+               UAPI_DEF_IS_OBJ_SUPPORTED(pp_is_supported)),
+       {},
+};
index 8fe149e..1456db4 100644 (file)
@@ -697,6 +697,9 @@ static int alloc_bfreg(struct mlx5_ib_dev *dev,
 {
        int bfregn = -ENOMEM;
 
+       if (bfregi->lib_uar_dyn)
+               return -EINVAL;
+
        mutex_lock(&bfregi->lock);
        if (bfregi->ver >= 2) {
                bfregn = alloc_high_class_bfreg(dev, bfregi);
@@ -768,6 +771,9 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
        u32 index_of_sys_page;
        u32 offset;
 
+       if (bfregi->lib_uar_dyn)
+               return -EINVAL;
+
        bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
                                MLX5_NON_FP_BFREGS_PER_UAR;
        index_of_sys_page = bfregn / bfregs_per_sys_page;
@@ -919,6 +925,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        void *qpc;
        int err;
        u16 uid;
+       u32 uar_flags;
 
        err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
        if (err) {
@@ -928,24 +935,29 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 
        context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext,
                                            ibucontext);
-       if (ucmd.flags & MLX5_QP_FLAG_BFREG_INDEX) {
+       uar_flags = ucmd.flags & (MLX5_QP_FLAG_UAR_PAGE_INDEX |
+                                 MLX5_QP_FLAG_BFREG_INDEX);
+       switch (uar_flags) {
+       case MLX5_QP_FLAG_UAR_PAGE_INDEX:
+               uar_index = ucmd.bfreg_index;
+               bfregn = MLX5_IB_INVALID_BFREG;
+               break;
+       case MLX5_QP_FLAG_BFREG_INDEX:
                uar_index = bfregn_to_uar_index(dev, &context->bfregi,
                                                ucmd.bfreg_index, true);
                if (uar_index < 0)
                        return uar_index;
-
                bfregn = MLX5_IB_INVALID_BFREG;
-       } else if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) {
-               /*
-                * TBD: should come from the verbs when we have the API
-                */
-               /* In CROSS_CHANNEL CQ and QP must use the same UAR */
-               bfregn = MLX5_CROSS_CHANNEL_BFREG;
-       }
-       else {
+               break;
+       case 0:
+               if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+                       return -EINVAL;
                bfregn = alloc_bfreg(dev, &context->bfregi);
                if (bfregn < 0)
                        return bfregn;
+               break;
+       default:
+               return -EINVAL;
        }
 
        mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
@@ -2100,6 +2112,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                                      MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC |
                                      MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
                                      MLX5_QP_FLAG_TUNNEL_OFFLOADS |
+                                     MLX5_QP_FLAG_UAR_PAGE_INDEX |
                                      MLX5_QP_FLAG_TYPE_DCI |
                                      MLX5_QP_FLAG_TYPE_DCT))
                        return -EINVAL;
@@ -2789,7 +2802,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
                mlx5_ib_dbg(dev, "unsupported qp type %d\n",
                            init_attr->qp_type);
                /* Don't support raw QPs */
-               return ERR_PTR(-EINVAL);
+               return ERR_PTR(-EOPNOTSUPP);
        }
 
        if (verbs_init_attr->qp_type == IB_QPT_DRIVER)
index 78a48ae..fa80858 100644 (file)
@@ -58,7 +58,7 @@ struct mthca_user_db_table {
                u64                uvirt;
                struct scatterlist mem;
                int                refcount;
-       }                page[0];
+       } page[];
 };
 
 static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
index da9b8f9..f9a2e65 100644 (file)
@@ -68,7 +68,7 @@ struct mthca_icm_table {
        int               lowmem;
        int               coherent;
        struct mutex      mutex;
-       struct mthca_icm *icm[0];
+       struct mthca_icm *icm[];
 };
 
 struct mthca_icm_iter {
index ac19d57..69a3e4f 100644 (file)
@@ -561,7 +561,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
        }
        default:
                /* Don't support raw QPs */
-               return ERR_PTR(-ENOSYS);
+               return ERR_PTR(-EOPNOTSUPP);
        }
 
        if (err) {
index d47ea67..10e3438 100644 (file)
@@ -1111,7 +1111,7 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
            (attrs->qp_type != IB_QPT_UD)) {
                pr_err("%s(%d) unsupported qp type=0x%x requested\n",
                       __func__, dev->id, attrs->qp_type);
-               return -EINVAL;
+               return -EOPNOTSUPP;
        }
        /* Skip the check for QP1 to support CM size of 128 */
        if ((attrs->qp_type != IB_QPT_GSI) &&
index 484b555..a5bd3ad 100644 (file)
@@ -1186,7 +1186,7 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
                DP_DEBUG(dev, QEDR_MSG_QP,
                         "create qp: unsupported qp type=0x%x requested\n",
                         attrs->qp_type);
-               return -EINVAL;
+               return -EOPNOTSUPP;
        }
 
        if (attrs->cap.max_send_wr > qattr->max_sqe) {
index 5ef93f8..7508abb 100644 (file)
@@ -39,7 +39,6 @@
 #include <linux/utsname.h>
 #include <linux/rculist.h>
 #include <linux/mm.h>
-#include <linux/random.h>
 #include <linux/vmalloc.h>
 #include <rdma/rdma_vt.h>
 
@@ -1503,7 +1502,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
        unsigned i, ctxt;
        int ret;
 
-       get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
        for (i = 0; i < dd->num_pports; i++)
                init_ibport(ppd + i);
 
index 8bf414b..dc0e81f 100644 (file)
@@ -177,7 +177,6 @@ struct qib_ibdev {
        struct timer_list mem_timer;
        struct qib_pio_header *pio_hdrs;
        dma_addr_t pio_hdrs_phys;
-       u32 qp_rnd; /* random bytes for hash */
 
        u32 n_piowait;
        u32 n_txwait;
index 556b8e4..71f8233 100644 (file)
@@ -504,7 +504,7 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
        if (init_attr->qp_type != IB_QPT_UD) {
                usnic_err("%s asked to make a non-UD QP: %d\n",
                          dev_name(&us_ibdev->ib_dev.dev), init_attr->qp_type);
-               return ERR_PTR(-EINVAL);
+               return ERR_PTR(-EOPNOTSUPP);
        }
 
        trans_spec = cmd.spec;
index 70be49b..7ec8991 100644 (file)
@@ -77,7 +77,7 @@ struct usnic_uiom_reg {
 struct usnic_uiom_chunk {
        struct list_head                list;
        int                             nents;
-       struct scatterlist              page_list[0];
+       struct scatterlist              page_list[];
 };
 
 struct usnic_uiom_pd *usnic_uiom_alloc_pd(void);
index 9de1281..afcc2ab 100644 (file)
@@ -217,7 +217,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
            init_attr->qp_type != IB_QPT_GSI) {
                dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n",
                         init_attr->qp_type);
-               return ERR_PTR(-EINVAL);
+               return ERR_PTR(-EOPNOTSUPP);
        }
 
        if (is_srq && !dev->dsr->caps.max_srq) {
index 7858d49..0e1b291 100644 (file)
@@ -1220,7 +1220,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 
        default:
                /* Don't support raw QPs */
-               return ERR_PTR(-EINVAL);
+               return ERR_PTR(-EOPNOTSUPP);
        }
 
        init_attr->cap.max_inline_data = 0;
index 986265a..72b031a 100644 (file)
@@ -284,12 +284,6 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port_num,
                                         &gid->global.interface_id);
 }
 
-static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext
-                                               *ibucontext)
-{
-       return container_of(ibucontext, struct rvt_ucontext, ibucontext);
-}
-
 /**
  * rvt_alloc_ucontext - Allocate a user context
  * @uctx: Verbs context
index 0946a30..4afdd2e 100644 (file)
@@ -103,6 +103,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
        rxe->attr.max_fast_reg_page_list_len    = RXE_MAX_FMR_PAGE_LIST_LEN;
        rxe->attr.max_pkeys                     = RXE_MAX_PKEYS;
        rxe->attr.local_ca_ack_delay            = RXE_LOCAL_CA_ACK_DELAY;
+       addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid,
+                       rxe->ndev->dev_addr);
 
        rxe->max_ucontext                       = RXE_MAX_UCONTEXT;
 }
index ec21f61..6c11c3a 100644 (file)
@@ -590,15 +590,16 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
        int err;
 
        if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
-               int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic);
+               int max_rd_atomic = attr->max_rd_atomic ?
+                       roundup_pow_of_two(attr->max_rd_atomic) : 0;
 
                qp->attr.max_rd_atomic = max_rd_atomic;
                atomic_set(&qp->req.rd_atomic, max_rd_atomic);
        }
 
        if (mask & IB_QP_MAX_DEST_RD_ATOMIC) {
-               int max_dest_rd_atomic =
-                       __roundup_pow_of_two(attr->max_dest_rd_atomic);
+               int max_dest_rd_atomic = attr->max_dest_rd_atomic ?
+                       roundup_pow_of_two(attr->max_dest_rd_atomic) : 0;
 
                qp->attr.max_dest_rd_atomic = max_dest_rd_atomic;
 
index acd0a92..8ef17d6 100644 (file)
@@ -63,7 +63,7 @@ struct rxe_queue_buf {
        __u32                   pad_2[31];
        __u32                   consumer_index;
        __u32                   pad_3[31];
-       __u8                    data[0];
+       __u8                    data[];
 };
 
 struct rxe_queue {
index c5651a9..559e5fd 100644 (file)
@@ -1769,14 +1769,23 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len)
        return 0;
 }
 
-static int siw_listen_address(struct iw_cm_id *id, int backlog,
-                             struct sockaddr *laddr, int addr_family)
+/*
+ * siw_create_listen - Create resources for a listener's IWCM ID @id
+ *
+ * Starts listen on the socket address id->local_addr.
+ *
+ */
+int siw_create_listen(struct iw_cm_id *id, int backlog)
 {
        struct socket *s;
        struct siw_cep *cep = NULL;
        struct siw_device *sdev = to_siw_dev(id->device);
+       int addr_family = id->local_addr.ss_family;
        int rv = 0, s_val;
 
+       if (addr_family != AF_INET && addr_family != AF_INET6)
+               return -EAFNOSUPPORT;
+
        rv = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
        if (rv < 0)
                return rv;
@@ -1791,9 +1800,25 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog,
                siw_dbg(id->device, "setsockopt error: %d\n", rv);
                goto error;
        }
-       rv = s->ops->bind(s, laddr, addr_family == AF_INET ?
-                                   sizeof(struct sockaddr_in) :
-                                   sizeof(struct sockaddr_in6));
+       if (addr_family == AF_INET) {
+               struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
+
+               /* For wildcard addr, limit binding to current device only */
+               if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
+                       s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
+
+               rv = s->ops->bind(s, (struct sockaddr *)laddr,
+                                 sizeof(struct sockaddr_in));
+       } else {
+               struct sockaddr_in6 *laddr = &to_sockaddr_in6(id->local_addr);
+
+               /* For wildcard addr, limit binding to current device only */
+               if (ipv6_addr_any(&laddr->sin6_addr))
+                       s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
+
+               rv = s->ops->bind(s, (struct sockaddr *)laddr,
+                                 sizeof(struct sockaddr_in6));
+       }
        if (rv) {
                siw_dbg(id->device, "socket bind error: %d\n", rv);
                goto error;
@@ -1852,7 +1877,7 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog,
        list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
        cep->state = SIW_EPSTATE_LISTENING;
 
-       siw_dbg(id->device, "Listen at laddr %pISp\n", laddr);
+       siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr);
 
        return 0;
 
@@ -1910,106 +1935,6 @@ static void siw_drop_listeners(struct iw_cm_id *id)
        }
 }
 
-/*
- * siw_create_listen - Create resources for a listener's IWCM ID @id
- *
- * Listens on the socket address id->local_addr.
- *
- * If the listener's @id provides a specific local IP address, at most one
- * listening socket is created and associated with @id.
- *
- * If the listener's @id provides the wildcard (zero) local IP address,
- * a separate listen is performed for each local IP address of the device
- * by creating a listening socket and binding to that local IP address.
- *
- */
-int siw_create_listen(struct iw_cm_id *id, int backlog)
-{
-       struct net_device *dev = to_siw_dev(id->device)->netdev;
-       int rv = 0, listeners = 0;
-
-       siw_dbg(id->device, "backlog %d\n", backlog);
-
-       /*
-        * For each attached address of the interface, create a
-        * listening socket, if id->local_addr is the wildcard
-        * IP address or matches the IP address.
-        */
-       if (id->local_addr.ss_family == AF_INET) {
-               struct in_device *in_dev = in_dev_get(dev);
-               struct sockaddr_in s_laddr;
-               const struct in_ifaddr *ifa;
-
-               if (!in_dev) {
-                       rv = -ENODEV;
-                       goto out;
-               }
-               memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr));
-
-               siw_dbg(id->device, "laddr %pISp\n", &s_laddr);
-
-               rtnl_lock();
-               in_dev_for_each_ifa_rtnl(ifa, in_dev) {
-                       if (ipv4_is_zeronet(s_laddr.sin_addr.s_addr) ||
-                           s_laddr.sin_addr.s_addr == ifa->ifa_address) {
-                               s_laddr.sin_addr.s_addr = ifa->ifa_address;
-
-                               rv = siw_listen_address(id, backlog,
-                                               (struct sockaddr *)&s_laddr,
-                                               AF_INET);
-                               if (!rv)
-                                       listeners++;
-                       }
-               }
-               rtnl_unlock();
-               in_dev_put(in_dev);
-       } else if (id->local_addr.ss_family == AF_INET6) {
-               struct inet6_dev *in6_dev = in6_dev_get(dev);
-               struct inet6_ifaddr *ifp;
-               struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr);
-
-               if (!in6_dev) {
-                       rv = -ENODEV;
-                       goto out;
-               }
-               siw_dbg(id->device, "laddr %pISp\n", &s_laddr);
-
-               rtnl_lock();
-               list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
-                       if (ifp->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
-                               continue;
-                       if (ipv6_addr_any(&s_laddr->sin6_addr) ||
-                           ipv6_addr_equal(&s_laddr->sin6_addr, &ifp->addr)) {
-                               struct sockaddr_in6 bind_addr  = {
-                                       .sin6_family = AF_INET6,
-                                       .sin6_port = s_laddr->sin6_port,
-                                       .sin6_flowinfo = 0,
-                                       .sin6_addr = ifp->addr,
-                                       .sin6_scope_id = dev->ifindex };
-
-                               rv = siw_listen_address(id, backlog,
-                                               (struct sockaddr *)&bind_addr,
-                                               AF_INET6);
-                               if (!rv)
-                                       listeners++;
-                       }
-               }
-               rtnl_unlock();
-               in6_dev_put(in6_dev);
-       } else {
-               rv = -EAFNOSUPPORT;
-       }
-out:
-       if (listeners)
-               rv = 0;
-       else if (!rv)
-               rv = -EINVAL;
-
-       siw_dbg(id->device, "%s\n", rv ? "FAIL" : "OK");
-
-       return rv;
-}
-
 int siw_destroy_listen(struct iw_cm_id *id)
 {
        if (!id->provider_data) {
index 9ccce29..6505202 100644 (file)
@@ -332,7 +332,7 @@ static struct siw_wqe *siw_rqe_get(struct siw_qp *qp)
        struct siw_srq *srq;
        struct siw_wqe *wqe = NULL;
        bool srq_event = false;
-       unsigned long flags;
+       unsigned long uninitialized_var(flags);
 
        srq = qp->srq;
        if (srq) {
index 07e3013..aeb842b 100644 (file)
@@ -165,15 +165,16 @@ int siw_query_port(struct ib_device *base_dev, u8 port,
                   struct ib_port_attr *attr)
 {
        struct siw_device *sdev = to_siw_dev(base_dev);
+       int rv;
 
        memset(attr, 0, sizeof(*attr));
 
-       attr->active_mtu = attr->max_mtu;
-       attr->active_speed = 2;
-       attr->active_width = 2;
+       rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
+                        &attr->active_width);
        attr->gid_tbl_len = 1;
        attr->max_msg_sz = -1;
        attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
+       attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
        attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
                IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
        attr->pkey_tbl_len = 1;
@@ -192,7 +193,7 @@ int siw_query_port(struct ib_device *base_dev, u8 port,
         * attr->subnet_timeout = 0;
         * attr->init_type_repy = 0;
         */
-       return 0;
+       return rv;
 }
 
 int siw_get_port_immutable(struct ib_device *base_dev, u8 port,
@@ -322,7 +323,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
        }
        if (attrs->qp_type != IB_QPT_RC) {
                siw_dbg(base_dev, "only RC QP's supported\n");
-               rv = -EINVAL;
+               rv = -EOPNOTSUPP;
                goto err_out;
        }
        if ((attrs->cap.max_send_wr > SIW_MAX_QP_WR) ||
index 2aa3457..e188a95 100644 (file)
@@ -838,6 +838,4 @@ extern int ipoib_debug_level;
 
 #define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
 
-extern const char ipoib_driver_version[];
-
 #endif /* _IPOIB_H */
index a10a0c2..67a21fd 100644 (file)
@@ -68,9 +68,6 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
        strlcpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent),
                sizeof(drvinfo->bus_info));
 
-       strlcpy(drvinfo->version, ipoib_driver_version,
-               sizeof(drvinfo->version));
-
        strlcpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver));
 }
 
index 4a0d3a9..81b8227 100644 (file)
 #include <linux/inetdevice.h>
 #include <rdma/ib_cache.h>
 
-#define DRV_VERSION "1.0.0"
-
-const char ipoib_driver_version[] = DRV_VERSION;
-
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
 MODULE_LICENSE("Dual BSD/GPL");
index 7a8f24d..999ef7c 100644 (file)
@@ -292,12 +292,27 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
 {
        struct iser_device *device = iser_task->iser_conn->ib_conn.device;
        struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
+       struct iser_fr_desc *desc;
+       struct ib_mr_status mr_status;
 
-       if (!reg->mem_h)
+       desc = reg->mem_h;
+       if (!desc)
                return;
 
-       device->reg_ops->reg_desc_put(&iser_task->iser_conn->ib_conn,
-                                    reg->mem_h);
+       /*
+        * The signature MR cannot be invalidated and reused without checking.
+        * libiscsi calls the check_protection transport handler only if
+        * SCSI-Response is received. And the signature MR is not checked if
+        * the task is completed for some other reason like a timeout or error
+        * handling. That's why we must check the signature MR here before
+        * putting it to the free pool.
+        */
+       if (unlikely(desc->sig_protected)) {
+               desc->sig_protected = false;
+               ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS,
+                                  &mr_status);
+       }
+       device->reg_ops->reg_desc_put(&iser_task->iser_conn->ib_conn, desc);
        reg->mem_h = NULL;
 }
 
index 4480092..d324312 100644 (file)
@@ -239,7 +239,7 @@ struct opa_veswport_mactable_entry {
  * @offset: mac table starting offset
  * @num_entries: Number of entries to get or set
  * @mac_tbl_digest: mac table digest
- * @tbl_entries[]: Array of table entries
+ * @tbl_entries: Array of table entries
  *
  * The EM sends down this structure in a MAD indicating
  * the starting offset in the forwarding table that this
@@ -258,7 +258,7 @@ struct opa_veswport_mactable {
        __be16                              offset;
        __be16                              num_entries;
        __be32                              mac_tbl_digest;
-       struct opa_veswport_mactable_entry  tbl_entries[0];
+       struct opa_veswport_mactable_entry  tbl_entries[];
 } __packed;
 
 /**
@@ -440,7 +440,7 @@ struct opa_veswport_iface_macs {
        __be16 num_macs_in_msg;
        __be16 tot_macs_in_lst;
        __be16 gen_count;
-       struct opa_vnic_iface_mac_entry entry[0];
+       struct opa_vnic_iface_mac_entry entry[];
 } __packed;
 
 /**
index 8ad7da9..42d557d 100644 (file)
@@ -125,8 +125,6 @@ static void vnic_get_drvinfo(struct net_device *netdev,
                             struct ethtool_drvinfo *drvinfo)
 {
        strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver));
-       strlcpy(drvinfo->version, opa_vnic_driver_version,
-               sizeof(drvinfo->version));
        strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
                sizeof(drvinfo->bus_info));
 }
index 6dbc08e..dd942dd 100644 (file)
@@ -292,7 +292,6 @@ struct opa_vnic_mac_tbl_node {
                hlist_for_each_entry(obj, &name[bkt], member)
 
 extern char opa_vnic_driver_name[];
-extern const char opa_vnic_driver_version[];
 
 struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
                                             u8 port_num, u8 vport_num);
index be5befd..6e8d650 100644 (file)
@@ -59,9 +59,7 @@
 
 #include "opa_vnic_internal.h"
 
-#define DRV_VERSION "1.0"
 char opa_vnic_driver_name[] = "opa_vnic";
-const char opa_vnic_driver_version[] = DRV_VERSION;
 
 /*
  * The trap service level is kept in bits 3 to 7 in the trap_sl_rsvd
@@ -1041,9 +1039,6 @@ static int __init opa_vnic_init(void)
 {
        int rc;
 
-       pr_info("OPA Virtual Network Driver - v%s\n",
-               opa_vnic_driver_version);
-
        rc = ib_register_client(&opa_vnic_client);
        if (rc)
                pr_err("VNIC driver register failed %d\n", rc);
index 5359ece..6fabcc2 100644 (file)
@@ -309,7 +309,7 @@ struct srp_fr_pool {
        int                     max_page_list_len;
        spinlock_t              lock;
        struct list_head        free_list;
-       struct srp_fr_desc      desc[0];
+       struct srp_fr_desc      desc[];
 };
 
 /**
index b25465d..9004869 100644 (file)
@@ -904,6 +904,7 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ
        case FS_FT_SNIFFER_TX:
        case FS_FT_NIC_TX:
        case FS_FT_RDMA_RX:
+       case FS_FT_RDMA_TX:
                return mlx5_fs_cmd_get_fw_cmds();
        default:
                return mlx5_fs_cmd_get_stub_cmds();
index 62ce2b9..d5defe0 100644 (file)
                               .identified_miss_table_mode),                   \
                FS_CAP(flow_table_properties_nic_transmit.flow_table_modify))
 
+#define FS_CHAINING_CAPS_RDMA_TX                                                \
+       FS_REQUIRED_CAPS(                                                       \
+               FS_CAP(flow_table_properties_nic_transmit_rdma.flow_modify_en), \
+               FS_CAP(flow_table_properties_nic_transmit_rdma.modify_root),    \
+               FS_CAP(flow_table_properties_nic_transmit_rdma                  \
+                              .identified_miss_table_mode),                    \
+               FS_CAP(flow_table_properties_nic_transmit_rdma                  \
+                              .flow_table_modify))
+
 #define LEFTOVERS_NUM_LEVELS 1
 #define LEFTOVERS_NUM_PRIOS 1
 
@@ -202,6 +211,18 @@ static struct init_tree_node rdma_rx_root_fs = {
        }
 };
 
+static struct init_tree_node rdma_tx_root_fs = {
+       .type = FS_TYPE_NAMESPACE,
+       .ar_size = 1,
+       .children = (struct init_tree_node[]) {
+               ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
+                        FS_CHAINING_CAPS_RDMA_TX,
+                        ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+                               ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+                                                 BY_PASS_PRIO_NUM_LEVELS))),
+       }
+};
+
 enum fs_i_lock_class {
        FS_LOCK_GRANDPARENT,
        FS_LOCK_PARENT,
@@ -2121,6 +2142,8 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
        } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL) {
                root_ns = steering->rdma_rx_root_ns;
                prio = RDMA_RX_KERNEL_PRIO;
+       } else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
+               root_ns = steering->rdma_tx_root_ns;
        } else { /* Must be NIC RX */
                root_ns = steering->root_ns;
                prio = type;
@@ -2524,6 +2547,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
        cleanup_root_ns(steering->sniffer_rx_root_ns);
        cleanup_root_ns(steering->sniffer_tx_root_ns);
        cleanup_root_ns(steering->rdma_rx_root_ns);
+       cleanup_root_ns(steering->rdma_tx_root_ns);
        cleanup_root_ns(steering->egress_root_ns);
        mlx5_cleanup_fc_stats(dev);
        kmem_cache_destroy(steering->ftes_cache);
@@ -2580,6 +2604,29 @@ out_err:
        return err;
 }
 
+static int init_rdma_tx_root_ns(struct mlx5_flow_steering *steering)
+{
+       int err;
+
+       steering->rdma_tx_root_ns = create_root_ns(steering, FS_FT_RDMA_TX);
+       if (!steering->rdma_tx_root_ns)
+               return -ENOMEM;
+
+       err = init_root_tree(steering, &rdma_tx_root_fs,
+                            &steering->rdma_tx_root_ns->ns.node);
+       if (err)
+               goto out_err;
+
+       set_prio_attrs(steering->rdma_tx_root_ns);
+
+       return 0;
+
+out_err:
+       cleanup_root_ns(steering->rdma_tx_root_ns);
+       steering->rdma_tx_root_ns = NULL;
+       return err;
+}
+
 /* FT and tc chains are stored in the same array so we can re-use the
  * mlx5_get_fdb_sub_ns() and tc api for FT chains.
  * When creating a new ns for each chain store it in the first available slot.
@@ -2890,6 +2937,12 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
                        goto err;
        }
 
+       if (MLX5_CAP_FLOWTABLE_RDMA_TX(dev, ft_support)) {
+               err = init_rdma_tx_root_ns(steering);
+               if (err)
+                       goto err;
+       }
+
        if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
                err = init_egress_root_ns(steering);
                if (err)
index be5f5e3..508108c 100644 (file)
@@ -86,7 +86,8 @@ enum fs_flow_table_type {
        FS_FT_SNIFFER_RX        = 0X5,
        FS_FT_SNIFFER_TX        = 0X6,
        FS_FT_RDMA_RX           = 0X7,
-       FS_FT_MAX_TYPE = FS_FT_RDMA_RX,
+       FS_FT_RDMA_TX           = 0X8,
+       FS_FT_MAX_TYPE = FS_FT_RDMA_TX,
 };
 
 enum fs_flow_table_op_mod {
@@ -116,6 +117,7 @@ struct mlx5_flow_steering {
        struct mlx5_flow_root_namespace *sniffer_tx_root_ns;
        struct mlx5_flow_root_namespace *sniffer_rx_root_ns;
        struct mlx5_flow_root_namespace *rdma_rx_root_ns;
+       struct mlx5_flow_root_namespace *rdma_tx_root_ns;
        struct mlx5_flow_root_namespace *egress_root_ns;
 };
 
@@ -316,7 +318,8 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
        (type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) :         \
        (type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) :         \
        (type == FS_FT_RDMA_RX) ? MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) :               \
-       (BUILD_BUG_ON_ZERO(FS_FT_RDMA_RX != FS_FT_MAX_TYPE))\
+       (type == FS_FT_RDMA_TX) ? MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) :      \
+       (BUILD_BUG_ON_ZERO(FS_FT_RDMA_TX != FS_FT_MAX_TYPE))\
        )
 
 #endif
index 0e62c3d..2b90097 100644 (file)
@@ -1211,6 +1211,12 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_FLOWTABLE_RDMA_RX_MAX(mdev, cap) \
        MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_rdma.cap)
 
+#define MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) \
+       MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_rdma.cap)
+
+#define MLX5_CAP_FLOWTABLE_RDMA_TX_MAX(mdev, cap) \
+       MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_rdma.cap)
+
 #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \
        MLX5_GET(flow_table_eswitch_cap, \
                 mdev->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
index d143b8b..6f8f79e 100644 (file)
@@ -213,23 +213,6 @@ enum mlx5_port_status {
        MLX5_PORT_DOWN      = 2,
 };
 
-struct mlx5_bfreg_info {
-       u32                    *sys_pages;
-       int                     num_low_latency_bfregs;
-       unsigned int           *count;
-
-       /*
-        * protect bfreg allocation data structs
-        */
-       struct mutex            lock;
-       u32                     ver;
-       bool                    lib_uar_4k;
-       u32                     num_sys_pages;
-       u32                     num_static_sys_pages;
-       u32                     total_num_bfregs;
-       u32                     num_dyn_bfregs;
-};
-
 struct mlx5_cmd_first {
        __be32          data[4];
 };
index a5cf5c7..e2d13e0 100644 (file)
@@ -77,6 +77,7 @@ enum mlx5_flow_namespace_type {
        MLX5_FLOW_NAMESPACE_EGRESS,
        MLX5_FLOW_NAMESPACE_RDMA_RX,
        MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL,
+       MLX5_FLOW_NAMESPACE_RDMA_TX,
 };
 
 enum {
index cc55cee..69b27c7 100644 (file)
@@ -709,7 +709,7 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit;
 
-       u8         reserved_at_a00[0x200];
+       struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_rdma;
 
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer;
 
@@ -879,7 +879,11 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
        u8         swp_csum[0x1];
        u8         swp_lso[0x1];
        u8         cqe_checksum_full[0x1];
-       u8         reserved_at_24[0x5];
+       u8         tunnel_stateless_geneve_tx[0x1];
+       u8         tunnel_stateless_mpls_over_udp[0x1];
+       u8         tunnel_stateless_mpls_over_gre[0x1];
+       u8         tunnel_stateless_vxlan_gpe[0x1];
+       u8         tunnel_stateless_ipv4_over_vxlan[0x1];
        u8         tunnel_stateless_ip_over_ip[0x1];
        u8         reserved_at_2a[0x6];
        u8         max_vxlan_udp_ports[0x8];
index 870b5e6..e06d133 100644 (file)
@@ -39,6 +39,7 @@
 
 int rdma_query_gid(struct ib_device *device, u8 port_num, int index,
                   union ib_gid *gid);
+void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr);
 const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
                                        const union ib_gid *gid,
                                        enum ib_gid_type gid_type,
index 8ec482e..058cfbc 100644 (file)
@@ -360,7 +360,6 @@ struct ib_cm_req_param {
        u32                     starting_psn;
        const void              *private_data;
        u8                      private_data_len;
-       u8                      peer_to_peer;
        u8                      responder_resources;
        u8                      initiator_depth;
        u8                      remote_cm_response_timeout;
index f8982e4..2fd9bfb 100644 (file)
@@ -73,7 +73,7 @@ struct ib_pool_fmr {
        int                 remap_count;
        u64                 io_virtual_address;
        int                 page_list_len;
-       u64                 page_list[0];
+       u64                 page_list[];
 };
 
 struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
index 1f779fa..bbc5cfb 100644 (file)
@@ -1876,7 +1876,7 @@ struct ib_flow_eth_filter {
        __be16  ether_type;
        __be16  vlan_tag;
        /* Must be last */
-       u8      real_sz[0];
+       u8      real_sz[];
 };
 
 struct ib_flow_spec_eth {
@@ -1890,7 +1890,7 @@ struct ib_flow_ib_filter {
        __be16 dlid;
        __u8   sl;
        /* Must be last */
-       u8      real_sz[0];
+       u8      real_sz[];
 };
 
 struct ib_flow_spec_ib {
@@ -1915,7 +1915,7 @@ struct ib_flow_ipv4_filter {
        u8      ttl;
        u8      flags;
        /* Must be last */
-       u8      real_sz[0];
+       u8      real_sz[];
 };
 
 struct ib_flow_spec_ipv4 {
@@ -1933,7 +1933,7 @@ struct ib_flow_ipv6_filter {
        u8      traffic_class;
        u8      hop_limit;
        /* Must be last */
-       u8      real_sz[0];
+       u8      real_sz[];
 };
 
 struct ib_flow_spec_ipv6 {
@@ -1947,7 +1947,7 @@ struct ib_flow_tcp_udp_filter {
        __be16  dst_port;
        __be16  src_port;
        /* Must be last */
-       u8      real_sz[0];
+       u8      real_sz[];
 };
 
 struct ib_flow_spec_tcp_udp {
@@ -1959,7 +1959,7 @@ struct ib_flow_spec_tcp_udp {
 
 struct ib_flow_tunnel_filter {
        __be32  tunnel_id;
-       u8      real_sz[0];
+       u8      real_sz[];
 };
 
 /* ib_flow_spec_tunnel describes the Vxlan tunnel
@@ -1976,7 +1976,7 @@ struct ib_flow_esp_filter {
        __be32  spi;
        __be32  seq;
        /* Must be last */
-       u8      real_sz[0];
+       u8      real_sz[];
 };
 
 struct ib_flow_spec_esp {
@@ -1991,7 +1991,7 @@ struct ib_flow_gre_filter {
        __be16 protocol;
        __be32 key;
        /* Must be last */
-       u8      real_sz[0];
+       u8      real_sz[];
 };
 
 struct ib_flow_spec_gre {
@@ -2004,7 +2004,7 @@ struct ib_flow_spec_gre {
 struct ib_flow_mpls_filter {
        __be32 tag;
        /* Must be last */
-       u8      real_sz[0];
+       u8      real_sz[];
 };
 
 struct ib_flow_spec_mpls {
@@ -3627,35 +3627,8 @@ static inline int ib_post_srq_recv(struct ib_srq *srq,
                                              bad_recv_wr ? : &dummy);
 }
 
-/**
- * ib_create_qp_user - Creates a QP associated with the specified protection
- *   domain.
- * @pd: The protection domain associated with the QP.
- * @qp_init_attr: A list of initial attributes required to create the
- *   QP.  If QP creation succeeds, then the attributes are updated to
- *   the actual capabilities of the created QP.
- * @udata: Valid user data or NULL for kernel objects
- */
-struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
-                               struct ib_qp_init_attr *qp_init_attr,
-                               struct ib_udata *udata);
-
-/**
- * ib_create_qp - Creates a kernel QP associated with the specified protection
- *   domain.
- * @pd: The protection domain associated with the QP.
- * @qp_init_attr: A list of initial attributes required to create the
- *   QP.  If QP creation succeeds, then the attributes are updated to
- *   the actual capabilities of the created QP.
- * @udata: Valid user data or NULL for kernel objects
- *
- * NOTE: for user qp use ib_create_qp_user with valid udata!
- */
-static inline struct ib_qp *ib_create_qp(struct ib_pd *pd,
-                                        struct ib_qp_init_attr *qp_init_attr)
-{
-       return ib_create_qp_user(pd, qp_init_attr, NULL);
-}
+struct ib_qp *ib_create_qp(struct ib_pd *pd,
+                          struct ib_qp_init_attr *qp_init_attr);
 
 /**
  * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
index 0c07a70..e90b149 100644 (file)
@@ -75,7 +75,7 @@
 struct opa_vnic_rdma_netdev {
        struct rdma_netdev rn;  /* keep this first */
        /* followed by device private data */
-       char *dev_priv[0];
+       char *dev_priv[];
 };
 
 static inline void *opa_vnic_priv(const struct net_device *dev)
index 72a3856..ce6c888 100644 (file)
@@ -85,7 +85,7 @@ struct rvt_mregion {
        u8  lkey_published;     /* in global table */
        struct percpu_ref refcount;
        struct completion comp; /* complete when refcount goes to zero */
-       struct rvt_segarray *map[0];    /* the segments */
+       struct rvt_segarray *map[];    /* the segments */
 };
 
 #define RVT_MAX_LKEY_TABLE_BITS 23
index 0d5c70e..5fc1010 100644 (file)
@@ -191,7 +191,7 @@ struct rvt_swqe {
        u32 ssn;                /* send sequence number */
        u32 length;             /* total length of data in sg_list */
        void *priv;             /* driver dependent field */
-       struct rvt_sge sg_list[0];
+       struct rvt_sge sg_list[];
 };
 
 /**
index 28570ac..9f3b1e0 100644 (file)
@@ -173,7 +173,7 @@ enum uapi_radix_data {
        UVERBS_API_OBJ_KEY_BITS = 5,
        UVERBS_API_OBJ_KEY_SHIFT =
                UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT,
-       UVERBS_API_OBJ_KEY_NUM_CORE = 24,
+       UVERBS_API_OBJ_KEY_NUM_CORE = 20,
        UVERBS_API_OBJ_KEY_NUM_DRIVER =
                (1 << UVERBS_API_OBJ_KEY_BITS) - UVERBS_API_OBJ_KEY_NUM_CORE,
        UVERBS_API_OBJ_KEY_MASK = GENMASK(31, UVERBS_API_OBJ_KEY_SHIFT),
index 624f5b5..df1cc36 100644 (file)
@@ -49,6 +49,7 @@ enum {
        MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7,
        MLX5_QP_FLAG_ALLOW_SCATTER_CQE  = 1 << 8,
        MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE   = 1 << 9,
+       MLX5_QP_FLAG_UAR_PAGE_INDEX = 1 << 10,
 };
 
 enum {
@@ -78,6 +79,7 @@ struct mlx5_ib_alloc_ucontext_req {
 
 enum mlx5_lib_caps {
        MLX5_LIB_CAP_4K_UAR     = (__u64)1 << 0,
+       MLX5_LIB_CAP_DYN_UAR    = (__u64)1 << 1,
 };
 
 enum mlx5_ib_alloc_uctx_v2_flags {
@@ -266,6 +268,7 @@ struct mlx5_ib_query_device_resp {
 
 enum mlx5_ib_create_cq_flags {
        MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD    = 1 << 0,
+       MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX  = 1 << 1,
 };
 
 struct mlx5_ib_create_cq {
@@ -275,6 +278,9 @@ struct mlx5_ib_create_cq {
        __u8    cqe_comp_en;
        __u8    cqe_comp_res_format;
        __u16   flags;
+       __u16   uar_page_index;
+       __u16   reserved0;
+       __u32   reserved1;
 };
 
 struct mlx5_ib_create_cq_resp {
index afe7da6..24f3388 100644 (file)
@@ -131,6 +131,23 @@ enum mlx5_ib_var_obj_methods {
        MLX5_IB_METHOD_VAR_OBJ_DESTROY,
 };
 
+enum mlx5_ib_uar_alloc_attrs {
+       MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+       MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE,
+       MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
+       MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
+       MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
+};
+
+enum mlx5_ib_uar_obj_destroy_attrs {
+       MLX5_IB_ATTR_UAR_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_uar_obj_methods {
+       MLX5_IB_METHOD_UAR_OBJ_ALLOC = (1U << UVERBS_ID_NS_SHIFT),
+       MLX5_IB_METHOD_UAR_OBJ_DESTROY,
+};
+
 enum mlx5_ib_devx_umem_reg_attrs {
        MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
        MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
@@ -143,6 +160,22 @@ enum mlx5_ib_devx_umem_dereg_attrs {
        MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
 };
 
+enum mlx5_ib_pp_obj_methods {
+       MLX5_IB_METHOD_PP_OBJ_ALLOC = (1U << UVERBS_ID_NS_SHIFT),
+       MLX5_IB_METHOD_PP_OBJ_DESTROY,
+};
+
+enum mlx5_ib_pp_alloc_attrs {
+       MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+       MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX,
+       MLX5_IB_ATTR_PP_OBJ_ALLOC_FLAGS,
+       MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX,
+};
+
+enum mlx5_ib_pp_obj_destroy_attrs {
+       MLX5_IB_ATTR_PP_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
 enum mlx5_ib_devx_umem_methods {
        MLX5_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT),
        MLX5_IB_METHOD_DEVX_UMEM_DEREG,
@@ -173,6 +206,8 @@ enum mlx5_ib_objects {
        MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
        MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
        MLX5_IB_OBJECT_VAR,
+       MLX5_IB_OBJECT_PP,
+       MLX5_IB_OBJECT_UAR,
 };
 
 enum mlx5_ib_flow_matcher_create_attrs {
index 88b6ca7..56b26ea 100644 (file)
@@ -44,6 +44,7 @@ enum mlx5_ib_uapi_flow_table_type {
        MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX     = 0x1,
        MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB        = 0x2,
        MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX    = 0x3,
+       MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX    = 0x4,
 };
 
 enum mlx5_ib_uapi_flow_action_packet_reformat_type {
@@ -73,5 +74,14 @@ struct mlx5_ib_uapi_devx_async_event_hdr {
        __u8            out_data[];
 };
 
+enum mlx5_ib_uapi_pp_alloc_flags {
+       MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX = 1 << 0,
+};
+
+enum mlx5_ib_uapi_uar_alloc_type {
+       MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF = 0x0,
+       MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC = 0x1,
+};
+
 #endif