Merge branch 'dynamic_sg' into rdma.git for-next
authorJason Gunthorpe <jgg@nvidia.com>
Fri, 9 Oct 2020 15:56:02 +0000 (12:56 -0300)
committerJason Gunthorpe <jgg@nvidia.com>
Fri, 16 Oct 2020 15:40:58 +0000 (12:40 -0300)
From Maor Gottlieb says:

====================
This series extends __sg_alloc_table_from_pages to allow chaining of new
pages to an already initialized SG table.

This allows for drivers to utilize the optimization of merging contiguous
pages without a need to pre allocate all the pages and hold them in a very
large temporary buffer prior to the call to SG table initialization.

The last patch changes the Infiniband core to use the new API. It removes
duplicate functionality from the code and benefits from the optimization
of allocating dynamic SG table from pages.

In huge pages system of 2MB page size, without this change, the SG table
would contain x512 SG entries.
====================

* branch 'dynamic_sg':
  RDMA/umem: Move to allocate SG table from pages
  lib/scatterlist: Add support in dynamic allocation of SG table from pages
  tools/testing/scatterlist: Show errors in human readable form
  tools/testing/scatterlist: Rejuvenate bit-rotten test

211 files changed:
.clang-format
Documentation/ABI/stable/sysfs-class-infiniband
MAINTAINERS
drivers/infiniband/Kconfig
drivers/infiniband/core/Makefile
drivers/infiniband/core/addr.c
drivers/infiniband/core/cache.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cm_trace.c [new file with mode: 0644]
drivers/infiniband/core/cm_trace.h [new file with mode: 0644]
drivers/infiniband/core/cma.c
drivers/infiniband/core/cma_configfs.c
drivers/infiniband/core/cma_trace.h
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/counters.c
drivers/infiniband/core/cq.c
drivers/infiniband/core/device.c
drivers/infiniband/core/rdma_core.c
drivers/infiniband/core/restrack.c
drivers/infiniband/core/restrack.h
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ucma.c
drivers/infiniband/core/umem.c
drivers/infiniband/core/umem_odp.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/uverbs_std_types.c
drivers/infiniband/core/uverbs_std_types_counters.c
drivers/infiniband/core/uverbs_std_types_cq.c
drivers/infiniband/core/uverbs_std_types_device.c
drivers/infiniband/core/uverbs_std_types_wq.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/bnxt_re/bnxt_re.h
drivers/infiniband/hw/bnxt_re/ib_verbs.c
drivers/infiniband/hw/bnxt_re/ib_verbs.h
drivers/infiniband/hw/bnxt_re/qplib_fp.c
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
drivers/infiniband/hw/bnxt_re/qplib_res.c
drivers/infiniband/hw/bnxt_re/qplib_res.h
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/efa/efa.h
drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
drivers/infiniband/hw/efa/efa_com_cmd.c
drivers/infiniband/hw/efa/efa_com_cmd.h
drivers/infiniband/hw/efa/efa_verbs.c
drivers/infiniband/hw/hfi1/sdma.c
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hns/hns_roce_ah.c
drivers/infiniband/hw/hns/hns_roce_alloc.c
drivers/infiniband/hw/hns/hns_roce_cq.c
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_hem.c
drivers/infiniband/hw/hns/hns_roce_hw_v1.c
drivers/infiniband/hw/hns/hns_roce_hw_v1.h
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/hns/hns_roce_mr.c
drivers/infiniband/hw/hns/hns_roce_pd.c
drivers/infiniband/hw/hns/hns_roce_qp.c
drivers/infiniband/hw/hns/hns_roce_srq.c
drivers/infiniband/hw/i40iw/i40iw.h
drivers/infiniband/hw/i40iw/i40iw_cm.c
drivers/infiniband/hw/i40iw/i40iw_hw.c
drivers/infiniband/hw/i40iw/i40iw_main.c
drivers/infiniband/hw/i40iw/i40iw_pble.c
drivers/infiniband/hw/i40iw/i40iw_type.h
drivers/infiniband/hw/i40iw/i40iw_utils.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/i40iw/i40iw_verbs.h
drivers/infiniband/hw/mlx4/ah.c
drivers/infiniband/hw/mlx4/cm.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx4/srq.c
drivers/infiniband/hw/mlx5/ah.c
drivers/infiniband/hw/mlx5/cmd.c
drivers/infiniband/hw/mlx5/cmd.h
drivers/infiniband/hw/mlx5/counters.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/fs.c
drivers/infiniband/hw/mlx5/gsi.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mem.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/qp.h
drivers/infiniband/hw/mlx5/qpc.c
drivers/infiniband/hw/mlx5/srq.c
drivers/infiniband/hw/mlx5/srq.h
drivers/infiniband/hw/mlx5/srq_cmd.c
drivers/infiniband/hw/mlx5/wr.c
drivers/infiniband/hw/mthca/mthca_dev.h
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/mthca/mthca_provider.h
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/ocrdma/ocrdma.h
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
drivers/infiniband/hw/ocrdma/ocrdma_ah.h
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
drivers/infiniband/hw/qedr/main.c
drivers/infiniband/hw/qedr/qedr.h
drivers/infiniband/hw/qedr/qedr_iw_cm.c
drivers/infiniband/hw/qedr/verbs.c
drivers/infiniband/hw/qedr/verbs.h
drivers/infiniband/hw/qib/qib.h
drivers/infiniband/hw/qib/qib_iba7322.c
drivers/infiniband/hw/qib/qib_mad.c
drivers/infiniband/hw/qib/qib_sdma.c
drivers/infiniband/hw/usnic/usnic_ib_main.c
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
drivers/infiniband/hw/usnic/usnic_ib_verbs.h
drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
drivers/infiniband/sw/rdmavt/ah.c
drivers/infiniband/sw/rdmavt/ah.h
drivers/infiniband/sw/rdmavt/cq.c
drivers/infiniband/sw/rdmavt/cq.h
drivers/infiniband/sw/rdmavt/pd.c
drivers/infiniband/sw/rdmavt/pd.h
drivers/infiniband/sw/rdmavt/srq.c
drivers/infiniband/sw/rdmavt/srq.h
drivers/infiniband/sw/rdmavt/vt.c
drivers/infiniband/sw/rxe/rxe.c
drivers/infiniband/sw/rxe/rxe.h
drivers/infiniband/sw/rxe/rxe_av.c
drivers/infiniband/sw/rxe/rxe_comp.c
drivers/infiniband/sw/rxe/rxe_cq.c
drivers/infiniband/sw/rxe/rxe_hdr.h
drivers/infiniband/sw/rxe/rxe_hw_counters.c
drivers/infiniband/sw/rxe/rxe_hw_counters.h
drivers/infiniband/sw/rxe/rxe_icrc.c
drivers/infiniband/sw/rxe/rxe_loc.h
drivers/infiniband/sw/rxe/rxe_mcast.c
drivers/infiniband/sw/rxe/rxe_mmap.c
drivers/infiniband/sw/rxe/rxe_mr.c
drivers/infiniband/sw/rxe/rxe_net.c
drivers/infiniband/sw/rxe/rxe_net.h
drivers/infiniband/sw/rxe/rxe_opcode.c
drivers/infiniband/sw/rxe/rxe_opcode.h
drivers/infiniband/sw/rxe/rxe_param.h
drivers/infiniband/sw/rxe/rxe_pool.c
drivers/infiniband/sw/rxe/rxe_pool.h
drivers/infiniband/sw/rxe/rxe_qp.c
drivers/infiniband/sw/rxe/rxe_queue.c
drivers/infiniband/sw/rxe/rxe_queue.h
drivers/infiniband/sw/rxe/rxe_recv.c
drivers/infiniband/sw/rxe/rxe_req.c
drivers/infiniband/sw/rxe/rxe_resp.c
drivers/infiniband/sw/rxe/rxe_srq.c
drivers/infiniband/sw/rxe/rxe_sysfs.c
drivers/infiniband/sw/rxe/rxe_task.c
drivers/infiniband/sw/rxe/rxe_task.h
drivers/infiniband/sw/rxe/rxe_verbs.c
drivers/infiniband/sw/rxe/rxe_verbs.h
drivers/infiniband/sw/siw/siw_verbs.c
drivers/infiniband/sw/siw/siw_verbs.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_fs.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_netlink.c
drivers/infiniband/ulp/ipoib/ipoib_vlan.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
drivers/infiniband/ulp/rtrs/rtrs-pri.h
drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
drivers/infiniband/ulp/rtrs/rtrs-srv.c
drivers/infiniband/ulp/rtrs/rtrs-srv.h
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/port.c
drivers/net/ethernet/qlogic/qed/qed_rdma.c
drivers/net/ethernet/qlogic/qede/qede_ethtool.c
drivers/net/ethernet/qlogic/qede/qede_rdma.c
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/port.h
include/linux/overflow.h
include/linux/qed/qed_rdma_if.h
include/linux/qed/qede_rdma.h
include/rdma/ib_cache.h
include/rdma/ib_cm.h
include/rdma/ib_umem.h
include/rdma/ib_umem_odp.h
include/rdma/ib_verbs.h
include/rdma/rdma_cm.h
include/rdma/restrack.h
include/trace/events/rdma.h
include/trace/events/rpcrdma.h
include/uapi/rdma/efa-abi.h
include/uapi/rdma/hns-abi.h
include/uapi/rdma/ib_user_ioctl_cmds.h
include/uapi/rdma/ib_user_ioctl_verbs.h
include/uapi/rdma/ib_user_verbs.h
include/uapi/rdma/rdma_user_rxe.h

index badfc1b..95ec5da 100644 (file)
@@ -426,6 +426,7 @@ ForEachMacros:
   - 'rbtree_postorder_for_each_entry_safe'
   - 'rdma_for_each_block'
   - 'rdma_for_each_port'
+  - 'rdma_umem_for_each_dma_block'
   - 'resource_list_for_each_entry'
   - 'resource_list_for_each_entry_safe'
   - 'rhl_for_each_entry_rcu'
index 96dfe19..87b11f9 100644 (file)
@@ -258,23 +258,6 @@ Description:
                userspace ABI compatibility of umad & issm devices.
 
 
-What:          /sys/class/infiniband_cm/ucmN/ibdev
-Date:          Oct, 2005
-KernelVersion: v2.6.14
-Contact:       linux-rdma@vger.kernel.org
-Description:
-               (RO) Display Infiniband (IB) device name
-
-
-What:          /sys/class/infiniband_cm/abi_version
-Date:          Oct, 2005
-KernelVersion: v2.6.14
-Contact:       linux-rdma@vger.kernel.org
-Description:
-               (RO) Value is incremented if any changes are made that break
-               userspace ABI compatibility of ucm devices.
-
-
 What:          /sys/class/infiniband_verbs/uverbsN/ibdev
 What:          /sys/class/infiniband_verbs/uverbsN/abi_version
 Date:          Sept, 2005
index 33b27e6..3361460 100644 (file)
@@ -4247,7 +4247,6 @@ F:        drivers/net/ethernet/cisco/enic/
 CISCO VIC LOW LATENCY NIC DRIVER
 M:     Christian Benvenuti <benve@cisco.com>
 M:     Nelson Escobar <neescoba@cisco.com>
-M:     Parvi Kaustubhi <pkaustub@cisco.com>
 S:     Supported
 F:     drivers/infiniband/hw/usnic/
 
@@ -7745,8 +7744,8 @@ F:        include/linux/cciss*.h
 F:     include/uapi/linux/cciss*.h
 
 HFI1 DRIVER
-M:     Mike Marciniszyn <mike.marciniszyn@intel.com>
-M:     Dennis Dalessandro <dennis.dalessandro@intel.com>
+M:     Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
+M:     Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
 L:     linux-rdma@vger.kernel.org
 S:     Supported
 F:     drivers/infiniband/hw/hfi1
@@ -12883,8 +12882,8 @@ S:      Maintained
 F:     drivers/char/hw_random/optee-rng.c
 
 OPA-VNIC DRIVER
-M:     Dennis Dalessandro <dennis.dalessandro@intel.com>
-M:     Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
+M:     Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+M:     Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
 L:     linux-rdma@vger.kernel.org
 S:     Supported
 F:     drivers/infiniband/ulp/opa_vnic
@@ -14183,8 +14182,8 @@ F:      drivers/firmware/qemu_fw_cfg.c
 F:     include/uapi/linux/qemu_fw_cfg.h
 
 QIB DRIVER
-M:     Dennis Dalessandro <dennis.dalessandro@intel.com>
-M:     Mike Marciniszyn <mike.marciniszyn@intel.com>
+M:     Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+M:     Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
 L:     linux-rdma@vger.kernel.org
 S:     Supported
 F:     drivers/infiniband/hw/qib/
@@ -14606,8 +14605,8 @@ S:      Maintained
 F:     drivers/net/ethernet/rdc/r6040.c
 
 RDMAVT - RDMA verbs software
-M:     Dennis Dalessandro <dennis.dalessandro@intel.com>
-M:     Mike Marciniszyn <mike.marciniszyn@intel.com>
+M:     Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+M:     Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
 L:     linux-rdma@vger.kernel.org
 S:     Supported
 F:     drivers/infiniband/sw/rdmavt
index 91b0233..32a5143 100644 (file)
@@ -48,6 +48,7 @@ config INFINIBAND_ON_DEMAND_PAGING
        depends on INFINIBAND_USER_MEM
        select MMU_NOTIFIER
        select INTERVAL_TREE
+       select HMM_MIRROR
        default y
        help
          On demand paging support for the InfiniBand subsystem.
index 24cb71a..ccf2670 100644 (file)
@@ -17,7 +17,7 @@ ib_core-y :=                  packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
 ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
 ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
 
-ib_cm-y :=                     cm.o
+ib_cm-y :=                     cm.o cm_trace.o
 
 iw_cm-y :=                     iwcm.o iwpm_util.o iwpm_msg.o
 
index 3a98439..0abce00 100644 (file)
@@ -647,13 +647,12 @@ static void process_one_req(struct work_struct *_work)
        req->callback = NULL;
 
        spin_lock_bh(&lock);
+       /*
+        * Although the work will normally have been canceled by the workqueue,
+        * it can still be requeued as long as it is on the req_list.
+        */
+       cancel_delayed_work(&req->work);
        if (!list_empty(&req->list)) {
-               /*
-                * Although the work will normally have been canceled by the
-                * workqueue, it can still be requeued as long as it is on the
-                * req_list.
-                */
-               cancel_delayed_work(&req->work);
                list_del_init(&req->list);
                kfree(req);
        }
index ffad73b..3a86a10 100644 (file)
@@ -133,7 +133,11 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
 }
 
 static const char * const gid_type_str[] = {
+       /* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for
+        * user space compatibility reasons.
+        */
        [IB_GID_TYPE_IB]        = "IB/RoCE v1",
+       [IB_GID_TYPE_ROCE]      = "IB/RoCE v1",
        [IB_GID_TYPE_ROCE_UDP_ENCAP]    = "RoCE v2",
 };
 
@@ -1220,7 +1224,7 @@ EXPORT_SYMBOL(ib_get_cached_port_state);
 const struct ib_gid_attr *
 rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
 {
-       const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
+       const struct ib_gid_attr *attr = ERR_PTR(-ENODATA);
        struct ib_gid_table *table;
        unsigned long flags;
 
@@ -1243,6 +1247,67 @@ done:
 }
 EXPORT_SYMBOL(rdma_get_gid_attr);
 
+/**
+ * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries.
+ * @device: The device to query.
+ * @entries: Entries where GID entries are returned.
+ * @max_entries: Maximum number of entries that can be returned.
+ * Entries array must be allocated to hold max_entries number of entries.
+ * @num_entries: Updated to the number of entries that were successfully read.
+ *
+ * Returns number of entries on success or appropriate error code.
+ */
+ssize_t rdma_query_gid_table(struct ib_device *device,
+                            struct ib_uverbs_gid_entry *entries,
+                            size_t max_entries)
+{
+       const struct ib_gid_attr *gid_attr;
+       ssize_t num_entries = 0, ret;
+       struct ib_gid_table *table;
+       unsigned int port_num, i;
+       struct net_device *ndev;
+       unsigned long flags;
+
+       rdma_for_each_port(device, port_num) {
+               if (!rdma_ib_or_roce(device, port_num))
+                       continue;
+
+               table = rdma_gid_table(device, port_num);
+               read_lock_irqsave(&table->rwlock, flags);
+               for (i = 0; i < table->sz; i++) {
+                       if (!is_gid_entry_valid(table->data_vec[i]))
+                               continue;
+                       if (num_entries >= max_entries) {
+                               ret = -EINVAL;
+                               goto err;
+                       }
+
+                       gid_attr = &table->data_vec[i]->attr;
+
+                       memcpy(&entries->gid, &gid_attr->gid,
+                              sizeof(gid_attr->gid));
+                       entries->gid_index = gid_attr->index;
+                       entries->port_num = gid_attr->port_num;
+                       entries->gid_type = gid_attr->gid_type;
+                       ndev = rcu_dereference_protected(
+                               gid_attr->ndev,
+                               lockdep_is_held(&table->rwlock));
+                       if (ndev)
+                               entries->netdev_ifindex = ndev->ifindex;
+
+                       num_entries++;
+                       entries++;
+               }
+               read_unlock_irqrestore(&table->rwlock, flags);
+       }
+
+       return num_entries;
+err:
+       read_unlock_irqrestore(&table->rwlock, flags);
+       return ret;
+}
+EXPORT_SYMBOL(rdma_query_gid_table);
+
 /**
  * rdma_put_gid_attr - Release reference to the GID attribute
  * @attr:              Pointer to the GID attribute whose reference
@@ -1299,7 +1364,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
        struct ib_gid_table_entry *entry =
                        container_of(attr, struct ib_gid_table_entry, attr);
        struct ib_device *device = entry->attr.device;
-       struct net_device *ndev = ERR_PTR(-ENODEV);
+       struct net_device *ndev = ERR_PTR(-EINVAL);
        u8 port_num = entry->attr.port_num;
        struct ib_gid_table *table;
        unsigned long flags;
@@ -1311,8 +1376,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
        valid = is_gid_entry_valid(table->data_vec[attr->index]);
        if (valid) {
                ndev = rcu_dereference(attr->ndev);
-               if (!ndev ||
-                   (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0)))
+               if (!ndev)
                        ndev = ERR_PTR(-ENODEV);
        }
        read_unlock_irqrestore(&table->rwlock, flags);
index fbc28f1..5740d1b 100644 (file)
@@ -27,6 +27,7 @@
 #include <rdma/ib_cm.h>
 #include "cm_msgs.h"
 #include "core_priv.h"
+#include "cm_trace.h"
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("InfiniBand CM");
@@ -201,7 +202,6 @@ static struct attribute *cm_counter_default_attrs[] = {
 struct cm_port {
        struct cm_device *cm_dev;
        struct ib_mad_agent *mad_agent;
-       struct kobject port_obj;
        u8 port_num;
        struct list_head cm_priv_prim_list;
        struct list_head cm_priv_altr_list;
@@ -1563,6 +1563,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
        cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
        cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
 
+       trace_icm_send_req(&cm_id_priv->id);
        spin_lock_irqsave(&cm_id_priv->lock, flags);
        ret = ib_post_send_mad(cm_id_priv->msg, NULL);
        if (ret) {
@@ -1610,6 +1611,9 @@ static int cm_issue_rej(struct cm_port *port,
                IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
        }
 
+       trace_icm_issue_rej(
+               IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
+               IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
        ret = ib_post_send_mad(msg, NULL);
        if (ret)
                cm_free_msg(msg);
@@ -1961,6 +1965,7 @@ static void cm_dup_req_handler(struct cm_work *work,
        }
        spin_unlock_irq(&cm_id_priv->lock);
 
+       trace_icm_send_dup_req(&cm_id_priv->id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret)
                goto free;
@@ -2124,8 +2129,7 @@ static int cm_req_handler(struct cm_work *work)
 
        listen_cm_id_priv = cm_match_req(work, cm_id_priv);
        if (!listen_cm_id_priv) {
-               pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
-                        be32_to_cpu(cm_id_priv->id.local_id));
+               trace_icm_no_listener_err(&cm_id_priv->id);
                cm_id_priv->id.state = IB_CM_IDLE;
                ret = -EINVAL;
                goto destroy;
@@ -2274,8 +2278,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
        spin_lock_irqsave(&cm_id_priv->lock, flags);
        if (cm_id->state != IB_CM_REQ_RCVD &&
            cm_id->state != IB_CM_MRA_REQ_SENT) {
-               pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
-                        be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
+               trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state);
                ret = -EINVAL;
                goto out;
        }
@@ -2289,6 +2292,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
        msg->timeout_ms = cm_id_priv->timeout_ms;
        msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
 
+       trace_icm_send_rep(cm_id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2348,8 +2352,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
        spin_lock_irqsave(&cm_id_priv->lock, flags);
        if (cm_id->state != IB_CM_REP_RCVD &&
            cm_id->state != IB_CM_MRA_REP_SENT) {
-               pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
-                        be32_to_cpu(cm_id->local_id), cm_id->state);
+               trace_icm_send_cm_rtu_err(cm_id);
                ret = -EINVAL;
                goto error;
        }
@@ -2361,6 +2364,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
        cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
                      private_data, private_data_len);
 
+       trace_icm_send_rtu(cm_id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2442,6 +2446,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
                goto unlock;
        spin_unlock_irq(&cm_id_priv->lock);
 
+       trace_icm_send_dup_rep(&cm_id_priv->id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret)
                goto free;
@@ -2465,7 +2470,7 @@ static int cm_rep_handler(struct cm_work *work)
                cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
        if (!cm_id_priv) {
                cm_dup_rep_handler(work);
-               pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
+               trace_icm_remote_no_priv_err(
                         IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
                return -EINVAL;
        }
@@ -2479,11 +2484,10 @@ static int cm_rep_handler(struct cm_work *work)
                break;
        default:
                ret = -EINVAL;
-               pr_debug(
-                       "%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
-                       __func__, cm_id_priv->id.state,
+               trace_icm_rep_unknown_err(
                        IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
-                       IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
+                       IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg),
+                       cm_id_priv->id.state);
                spin_unlock_irq(&cm_id_priv->lock);
                goto error;
        }
@@ -2500,7 +2504,7 @@ static int cm_rep_handler(struct cm_work *work)
                spin_unlock(&cm.lock);
                spin_unlock_irq(&cm_id_priv->lock);
                ret = -EINVAL;
-               pr_debug("%s: Failed to insert remote id %d\n", __func__,
+               trace_icm_insert_failed_err(
                         IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
                goto error;
        }
@@ -2517,9 +2521,8 @@ static int cm_rep_handler(struct cm_work *work)
                             IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
                             NULL, 0);
                ret = -EINVAL;
-               pr_debug(
-                       "%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
-                       __func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+               trace_icm_staleconn_err(
+                       IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
                        IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
 
                if (cur_cm_id_priv) {
@@ -2646,9 +2649,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
                return -EINVAL;
 
        if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
-               pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
-                        be32_to_cpu(cm_id_priv->id.local_id),
-                        cm_id_priv->id.state);
+               trace_icm_dreq_skipped(&cm_id_priv->id);
                return -EINVAL;
        }
 
@@ -2667,6 +2668,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
        msg->timeout_ms = cm_id_priv->timeout_ms;
        msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
 
+       trace_icm_send_dreq(&cm_id_priv->id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                cm_enter_timewait(cm_id_priv);
@@ -2722,10 +2724,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
                return -EINVAL;
 
        if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
-               pr_debug(
-                       "%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
-                       __func__, be32_to_cpu(cm_id_priv->id.local_id),
-                       cm_id_priv->id.state);
+               trace_icm_send_drep_err(&cm_id_priv->id);
                kfree(private_data);
                return -EINVAL;
        }
@@ -2740,6 +2739,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
        cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
                       private_data, private_data_len);
 
+       trace_icm_send_drep(&cm_id_priv->id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                cm_free_msg(msg);
@@ -2789,6 +2789,9 @@ static int cm_issue_drep(struct cm_port *port,
        IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
                IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
 
+       trace_icm_issue_drep(
+               IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+               IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
        ret = ib_post_send_mad(msg, NULL);
        if (ret)
                cm_free_msg(msg);
@@ -2810,9 +2813,8 @@ static int cm_dreq_handler(struct cm_work *work)
                atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
                                counter[CM_DREQ_COUNTER]);
                cm_issue_drep(work->port, work->mad_recv_wc);
-               pr_debug(
-                       "%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
-                       __func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+               trace_icm_no_priv_err(
+                       IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
                        IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
                return -EINVAL;
        }
@@ -2858,9 +2860,7 @@ static int cm_dreq_handler(struct cm_work *work)
                                counter[CM_DREQ_COUNTER]);
                goto unlock;
        default:
-               pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
-                        __func__, be32_to_cpu(cm_id_priv->id.local_id),
-                        cm_id_priv->id.state);
+               trace_icm_dreq_unknown_err(&cm_id_priv->id);
                goto unlock;
        }
        cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@@ -2945,12 +2945,11 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
                              state);
                break;
        default:
-               pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
-                        be32_to_cpu(cm_id_priv->id.local_id),
-                        cm_id_priv->id.state);
+               trace_icm_send_unknown_rej_err(&cm_id_priv->id);
                return -EINVAL;
        }
 
+       trace_icm_send_rej(&cm_id_priv->id, reason);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                cm_free_msg(msg);
@@ -3060,9 +3059,7 @@ static int cm_rej_handler(struct cm_work *work)
                }
                fallthrough;
        default:
-               pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
-                        __func__, be32_to_cpu(cm_id_priv->id.local_id),
-                        cm_id_priv->id.state);
+               trace_icm_rej_unknown_err(&cm_id_priv->id);
                spin_unlock_irq(&cm_id_priv->lock);
                goto out;
        }
@@ -3118,9 +3115,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
                }
                fallthrough;
        default:
-               pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
-                        __func__, be32_to_cpu(cm_id_priv->id.local_id),
-                        cm_id_priv->id.state);
+               trace_icm_send_mra_unknown_err(&cm_id_priv->id);
                ret = -EINVAL;
                goto error1;
        }
@@ -3133,6 +3128,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
                cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
                              msg_response, service_timeout,
                              private_data, private_data_len);
+               trace_icm_send_mra(cm_id);
                ret = ib_post_send_mad(msg, NULL);
                if (ret)
                        goto error2;
@@ -3229,9 +3225,7 @@ static int cm_mra_handler(struct cm_work *work)
                                counter[CM_MRA_COUNTER]);
                fallthrough;
        default:
-               pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
-                        __func__, be32_to_cpu(cm_id_priv->id.local_id),
-                        cm_id_priv->id.state);
+               trace_icm_mra_unknown_err(&cm_id_priv->id);
                goto out;
        }
 
@@ -3505,10 +3499,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
        msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
 
        spin_lock_irqsave(&cm_id_priv->lock, flags);
-       if (cm_id->state == IB_CM_IDLE)
+       if (cm_id->state == IB_CM_IDLE) {
+               trace_icm_send_sidr_req(&cm_id_priv->id);
                ret = ib_post_send_mad(msg, NULL);
-       else
+       } else {
                ret = -EINVAL;
+       }
 
        if (ret) {
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -3670,6 +3666,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
 
        cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
                           param);
+       trace_icm_send_sidr_rep(&cm_id_priv->id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                cm_free_msg(msg);
@@ -3767,8 +3764,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
        if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
                goto discard;
 
-       pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
-                            state, ib_wc_status_msg(wc_status));
+       trace_icm_mad_send_err(state, wc_status);
        switch (state) {
        case IB_CM_REQ_SENT:
        case IB_CM_MRA_REQ_RCVD:
@@ -3891,7 +3887,7 @@ static void cm_work_handler(struct work_struct *_work)
                ret = cm_timewait_handler(work);
                break;
        default:
-               pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
+               trace_icm_handler_err(work->cm_event.event);
                ret = -EINVAL;
                break;
        }
@@ -3927,8 +3923,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
                ret = -EISCONN;
                break;
        default:
-               pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
-                        be32_to_cpu(cm_id->local_id), cm_id->state);
+               trace_icm_establish_err(cm_id);
                ret = -EINVAL;
                break;
        }
@@ -4125,9 +4120,7 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
                ret = 0;
                break;
        default:
-               pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
-                        __func__, be32_to_cpu(cm_id_priv->id.local_id),
-                        cm_id_priv->id.state);
+               trace_icm_qp_init_err(&cm_id_priv->id);
                ret = -EINVAL;
                break;
        }
@@ -4175,9 +4168,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
                ret = 0;
                break;
        default:
-               pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
-                        __func__, be32_to_cpu(cm_id_priv->id.local_id),
-                        cm_id_priv->id.state);
+               trace_icm_qp_rtr_err(&cm_id_priv->id);
                ret = -EINVAL;
                break;
        }
@@ -4237,9 +4228,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
                ret = 0;
                break;
        default:
-               pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
-                        __func__, be32_to_cpu(cm_id_priv->id.local_id),
-                        cm_id_priv->id.state);
+               trace_icm_qp_rts_err(&cm_id_priv->id);
                ret = -EINVAL;
                break;
        }
@@ -4295,20 +4284,6 @@ static struct kobj_type cm_counter_obj_type = {
        .default_attrs = cm_counter_default_attrs
 };
 
-static char *cm_devnode(struct device *dev, umode_t *mode)
-{
-       if (mode)
-               *mode = 0666;
-       return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
-}
-
-struct class cm_class = {
-       .owner   = THIS_MODULE,
-       .name    = "infiniband_cm",
-       .devnode = cm_devnode,
-};
-EXPORT_SYMBOL(cm_class);
-
 static int cm_create_port_fs(struct cm_port *port)
 {
        int i, ret;
@@ -4511,12 +4486,6 @@ static int __init ib_cm_init(void)
        get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
        INIT_LIST_HEAD(&cm.timewait_list);
 
-       ret = class_register(&cm_class);
-       if (ret) {
-               ret = -ENOMEM;
-               goto error1;
-       }
-
        cm.wq = alloc_workqueue("ib_cm", 0, 1);
        if (!cm.wq) {
                ret = -ENOMEM;
@@ -4531,8 +4500,6 @@ static int __init ib_cm_init(void)
 error3:
        destroy_workqueue(cm.wq);
 error2:
-       class_unregister(&cm_class);
-error1:
        return ret;
 }
 
@@ -4553,7 +4520,6 @@ static void __exit ib_cm_cleanup(void)
                kfree(timewait_info);
        }
 
-       class_unregister(&cm_class);
        WARN_ON(!xa_empty(&cm.local_id_table));
 }
 
diff --git a/drivers/infiniband/core/cm_trace.c b/drivers/infiniband/core/cm_trace.c
new file mode 100644 (file)
index 0000000..8f3482f
--- /dev/null
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the IB Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <rdma/rdma_cm.h>
+#include "cma_priv.h"
+
+#define CREATE_TRACE_POINTS
+
+#include "cm_trace.h"
diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
new file mode 100644 (file)
index 0000000..e9d2826
--- /dev/null
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ib_cma
+
+#if !defined(_TRACE_IB_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_IB_CMA_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_cm.h>
+#include <trace/events/rdma.h>
+
+/*
+ * enum ib_cm_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_STATE_LIST                                       \
+       ib_cm_state(IDLE)                                       \
+       ib_cm_state(LISTEN)                                     \
+       ib_cm_state(REQ_SENT)                                   \
+       ib_cm_state(REQ_RCVD)                                   \
+       ib_cm_state(MRA_REQ_SENT)                               \
+       ib_cm_state(MRA_REQ_RCVD)                               \
+       ib_cm_state(REP_SENT)                                   \
+       ib_cm_state(REP_RCVD)                                   \
+       ib_cm_state(MRA_REP_SENT)                               \
+       ib_cm_state(MRA_REP_RCVD)                               \
+       ib_cm_state(ESTABLISHED)                                \
+       ib_cm_state(DREQ_SENT)                                  \
+       ib_cm_state(DREQ_RCVD)                                  \
+       ib_cm_state(TIMEWAIT)                                   \
+       ib_cm_state(SIDR_REQ_SENT)                              \
+       ib_cm_state_end(SIDR_REQ_RCVD)
+
+#undef  ib_cm_state
+#undef  ib_cm_state_end
+#define ib_cm_state(x)         TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_state_end(x)     TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_STATE_LIST
+
+#undef  ib_cm_state
+#undef  ib_cm_state_end
+#define ib_cm_state(x)         { IB_CM_##x, #x },
+#define ib_cm_state_end(x)     { IB_CM_##x, #x }
+
+#define show_ib_cm_state(x) \
+               __print_symbolic(x, IB_CM_STATE_LIST)
+
+/*
+ * enum ib_cm_lap_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_LAP_STATE_LIST                                   \
+       ib_cm_lap_state(LAP_UNINIT)                             \
+       ib_cm_lap_state(LAP_IDLE)                               \
+       ib_cm_lap_state(LAP_SENT)                               \
+       ib_cm_lap_state(LAP_RCVD)                               \
+       ib_cm_lap_state(MRA_LAP_SENT)                           \
+       ib_cm_lap_state_end(MRA_LAP_RCVD)
+
+#undef  ib_cm_lap_state
+#undef  ib_cm_lap_state_end
+#define ib_cm_lap_state(x)     TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_lap_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_LAP_STATE_LIST
+
+#undef  ib_cm_lap_state
+#undef  ib_cm_lap_state_end
+#define ib_cm_lap_state(x)     { IB_CM_##x, #x },
+#define ib_cm_lap_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_lap_state(x) \
+               __print_symbolic(x, IB_CM_LAP_STATE_LIST)
+
+/*
+ * enum ib_cm_rej_reason, from include/rdma/ib_cm.h
+ */
+#define IB_CM_REJ_REASON_LIST                                  \
+       ib_cm_rej_reason(REJ_NO_QP)                             \
+       ib_cm_rej_reason(REJ_NO_EEC)                            \
+       ib_cm_rej_reason(REJ_NO_RESOURCES)                      \
+       ib_cm_rej_reason(REJ_TIMEOUT)                           \
+       ib_cm_rej_reason(REJ_UNSUPPORTED)                       \
+       ib_cm_rej_reason(REJ_INVALID_COMM_ID)                   \
+       ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE)             \
+       ib_cm_rej_reason(REJ_INVALID_SERVICE_ID)                \
+       ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE)            \
+       ib_cm_rej_reason(REJ_STALE_CONN)                        \
+       ib_cm_rej_reason(REJ_RDC_NOT_EXIST)                     \
+       ib_cm_rej_reason(REJ_INVALID_GID)                       \
+       ib_cm_rej_reason(REJ_INVALID_LID)                       \
+       ib_cm_rej_reason(REJ_INVALID_SL)                        \
+       ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS)             \
+       ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT)                 \
+       ib_cm_rej_reason(REJ_INVALID_PACKET_RATE)               \
+       ib_cm_rej_reason(REJ_INVALID_ALT_GID)                   \
+       ib_cm_rej_reason(REJ_INVALID_ALT_LID)                   \
+       ib_cm_rej_reason(REJ_INVALID_ALT_SL)                    \
+       ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS)         \
+       ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT)             \
+       ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE)           \
+       ib_cm_rej_reason(REJ_PORT_CM_REDIRECT)                  \
+       ib_cm_rej_reason(REJ_PORT_REDIRECT)                     \
+       ib_cm_rej_reason(REJ_INVALID_MTU)                       \
+       ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES)       \
+       ib_cm_rej_reason(REJ_CONSUMER_DEFINED)                  \
+       ib_cm_rej_reason(REJ_INVALID_RNR_RETRY)                 \
+       ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID)           \
+       ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION)             \
+       ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL)                \
+       ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL)            \
+       ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED)
+
+#undef  ib_cm_rej_reason
+#undef  ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x)    TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_rej_reason_end(x)        TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_REJ_REASON_LIST
+
+#undef  ib_cm_rej_reason
+#undef  ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x)    { IB_CM_##x, #x },
+#define ib_cm_rej_reason_end(x)        { IB_CM_##x, #x }
+
+#define show_ib_cm_rej_reason(x) \
+               __print_symbolic(x, IB_CM_REJ_REASON_LIST)
+
+DECLARE_EVENT_CLASS(icm_id_class,
+       TP_PROTO(
+               const struct ib_cm_id *cm_id
+       ),
+
+       TP_ARGS(cm_id),
+
+       TP_STRUCT__entry(
+               __field(const void *, cm_id)    /* for eBPF scripts */
+               __field(unsigned int, local_id)
+               __field(unsigned int, remote_id)
+               __field(unsigned long, state)
+               __field(unsigned long, lap_state)
+       ),
+
+       TP_fast_assign(
+               __entry->cm_id = cm_id;
+               __entry->local_id = be32_to_cpu(cm_id->local_id);
+               __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+               __entry->state = cm_id->state;
+               __entry->lap_state = cm_id->lap_state;
+       ),
+
+       TP_printk("local_id=%u remote_id=%u state=%s lap_state=%s",
+               __entry->local_id, __entry->remote_id,
+               show_ib_cm_state(__entry->state),
+               show_ib_cm_lap_state(__entry->lap_state)
+       )
+);
+
+#define DEFINE_CM_SEND_EVENT(name)                                     \
+               DEFINE_EVENT(icm_id_class,                              \
+                               icm_send_##name,                                \
+                               TP_PROTO(                               \
+                                       const struct ib_cm_id *cm_id    \
+                               ),                                      \
+                               TP_ARGS(cm_id))
+
+DEFINE_CM_SEND_EVENT(req);
+DEFINE_CM_SEND_EVENT(rep);
+DEFINE_CM_SEND_EVENT(dup_req);
+DEFINE_CM_SEND_EVENT(dup_rep);
+DEFINE_CM_SEND_EVENT(rtu);
+DEFINE_CM_SEND_EVENT(mra);
+DEFINE_CM_SEND_EVENT(sidr_req);
+DEFINE_CM_SEND_EVENT(sidr_rep);
+DEFINE_CM_SEND_EVENT(dreq);
+DEFINE_CM_SEND_EVENT(drep);
+
+TRACE_EVENT(icm_send_rej,
+       TP_PROTO(
+               const struct ib_cm_id *cm_id,
+               enum ib_cm_rej_reason reason
+       ),
+
+       TP_ARGS(cm_id, reason),
+
+       TP_STRUCT__entry(
+               __field(const void *, cm_id)
+               __field(u32, local_id)
+               __field(u32, remote_id)
+               __field(unsigned long, state)
+               __field(unsigned long, reason)
+       ),
+
+       TP_fast_assign(
+               __entry->cm_id = cm_id;
+               __entry->local_id = be32_to_cpu(cm_id->local_id);
+               __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+               __entry->state = cm_id->state;
+               __entry->reason = reason;
+       ),
+
+       TP_printk("local_id=%u remote_id=%u state=%s reason=%s",
+               __entry->local_id, __entry->remote_id,
+               show_ib_cm_state(__entry->state),
+               show_ib_cm_rej_reason(__entry->reason)
+       )
+);
+
+#define DEFINE_CM_ERR_EVENT(name)                                      \
+               DEFINE_EVENT(icm_id_class,                              \
+                               icm_##name##_err,                       \
+                               TP_PROTO(                               \
+                                       const struct ib_cm_id *cm_id    \
+                               ),                                      \
+                               TP_ARGS(cm_id))
+
+DEFINE_CM_ERR_EVENT(send_cm_rtu);
+DEFINE_CM_ERR_EVENT(establish);
+DEFINE_CM_ERR_EVENT(no_listener);
+DEFINE_CM_ERR_EVENT(send_drep);
+DEFINE_CM_ERR_EVENT(dreq_unknown);
+DEFINE_CM_ERR_EVENT(send_unknown_rej);
+DEFINE_CM_ERR_EVENT(rej_unknown);
+DEFINE_CM_ERR_EVENT(send_mra_unknown);
+DEFINE_CM_ERR_EVENT(mra_unknown);
+DEFINE_CM_ERR_EVENT(qp_init);
+DEFINE_CM_ERR_EVENT(qp_rtr);
+DEFINE_CM_ERR_EVENT(qp_rts);
+
+DEFINE_EVENT(icm_id_class,                                             \
+       icm_dreq_skipped,                                               \
+       TP_PROTO(                                                       \
+               const struct ib_cm_id *cm_id                            \
+       ),                                                              \
+       TP_ARGS(cm_id)                                                  \
+);
+
+DECLARE_EVENT_CLASS(icm_local_class,
+       TP_PROTO(
+               unsigned int local_id,
+               unsigned int remote_id
+       ),
+
+       TP_ARGS(local_id, remote_id),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, local_id)
+               __field(unsigned int, remote_id)
+       ),
+
+       TP_fast_assign(
+               __entry->local_id = local_id;
+               __entry->remote_id = remote_id;
+       ),
+
+       TP_printk("local_id=%u remote_id=%u",
+               __entry->local_id, __entry->remote_id
+       )
+);
+
+#define DEFINE_CM_LOCAL_EVENT(name)                                    \
+               DEFINE_EVENT(icm_local_class,                           \
+                               icm_##name,                             \
+                               TP_PROTO(                               \
+                                       unsigned int local_id,                  \
+                                       unsigned int remote_id                  \
+                               ),                                      \
+                               TP_ARGS(local_id, remote_id))
+
+DEFINE_CM_LOCAL_EVENT(issue_rej);
+DEFINE_CM_LOCAL_EVENT(issue_drep);
+DEFINE_CM_LOCAL_EVENT(staleconn_err);
+DEFINE_CM_LOCAL_EVENT(no_priv_err);
+
+DECLARE_EVENT_CLASS(icm_remote_class,
+       TP_PROTO(
+               u32 remote_id
+       ),
+
+       TP_ARGS(remote_id),
+
+       TP_STRUCT__entry(
+               __field(u32, remote_id)
+       ),
+
+       TP_fast_assign(
+               __entry->remote_id = remote_id;
+       ),
+
+       TP_printk("remote_id=%u",
+               __entry->remote_id
+       )
+);
+
+#define DEFINE_CM_REMOTE_EVENT(name)                                   \
+               DEFINE_EVENT(icm_remote_class,                          \
+                               icm_##name,                             \
+                               TP_PROTO(                               \
+                                       u32 remote_id                   \
+                               ),                                      \
+                               TP_ARGS(remote_id))
+
+DEFINE_CM_REMOTE_EVENT(remote_no_priv_err);
+DEFINE_CM_REMOTE_EVENT(insert_failed_err);
+
+TRACE_EVENT(icm_send_rep_err,
+       TP_PROTO(
+               __be32 local_id,
+               enum ib_cm_state state
+       ),
+
+       TP_ARGS(local_id, state),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, local_id)
+               __field(unsigned long, state)
+       ),
+
+       TP_fast_assign(
+               __entry->local_id = be32_to_cpu(local_id);
+               __entry->state = state;
+       ),
+
+       TP_printk("local_id=%u state=%s",
+               __entry->local_id, show_ib_cm_state(__entry->state)
+       )
+);
+
+TRACE_EVENT(icm_rep_unknown_err,
+       TP_PROTO(
+               unsigned int local_id,
+               unsigned int remote_id,
+               enum ib_cm_state state
+       ),
+
+       TP_ARGS(local_id, remote_id, state),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, local_id)
+               __field(unsigned int, remote_id)
+               __field(unsigned long, state)
+       ),
+
+       TP_fast_assign(
+               __entry->local_id = local_id;
+               __entry->remote_id = remote_id;
+               __entry->state = state;
+       ),
+
+       TP_printk("local_id=%u remote_id=%u state=%s",
+               __entry->local_id, __entry->remote_id,
+               show_ib_cm_state(__entry->state)
+       )
+);
+
+TRACE_EVENT(icm_handler_err,
+       TP_PROTO(
+               enum ib_cm_event_type event
+       ),
+
+       TP_ARGS(event),
+
+       TP_STRUCT__entry(
+               __field(unsigned long, event)
+       ),
+
+       TP_fast_assign(
+               __entry->event = event;
+       ),
+
+       TP_printk("unhandled event=%s",
+               rdma_show_ib_cm_event(__entry->event)
+       )
+);
+
+TRACE_EVENT(icm_mad_send_err,
+       TP_PROTO(
+               enum ib_cm_state state,
+               enum ib_wc_status wc_status
+       ),
+
+       TP_ARGS(state, wc_status),
+
+       TP_STRUCT__entry(
+               __field(unsigned long, state)
+               __field(unsigned long, wc_status)
+       ),
+
+       TP_fast_assign(
+               __entry->state = state;
+               __entry->wc_status = wc_status;
+       ),
+
+       TP_printk("state=%s completion status=%s",
+               show_ib_cm_state(__entry->state),
+               rdma_show_wc_status(__entry->wc_status)
+       )
+);
+
+#endif /* _TRACE_IB_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/infiniband/core
+#define TRACE_INCLUDE_FILE cm_trace
+
+#include <trace/define_trace.h>
index 7f0e91e..09a8447 100644 (file)
@@ -68,6 +68,9 @@ static const char * const cma_events[] = {
        [RDMA_CM_EVENT_TIMEWAIT_EXIT]    = "timewait exit",
 };
 
+static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr,
+                        union ib_gid *mgid);
+
 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
 {
        size_t index = event;
@@ -301,6 +304,10 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
        if (!rdma_is_port_valid(cma_dev->device, port))
                return -EINVAL;
 
+       if (default_gid_type == IB_GID_TYPE_IB &&
+           rdma_protocol_roce_eth_encap(cma_dev->device, port))
+               default_gid_type = IB_GID_TYPE_ROCE;
+
        supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
 
        if (!(supported_gids & 1 << default_gid_type))
@@ -345,13 +352,10 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
 
 struct cma_multicast {
        struct rdma_id_private *id_priv;
-       union {
-               struct ib_sa_multicast *ib;
-       } multicast;
+       struct ib_sa_multicast *sa_mc;
        struct list_head        list;
        void                    *context;
        struct sockaddr_storage addr;
-       struct kref             mcref;
        u8                      join_state;
 };
 
@@ -363,18 +367,6 @@ struct cma_work {
        struct rdma_cm_event    event;
 };
 
-struct cma_ndev_work {
-       struct work_struct      work;
-       struct rdma_id_private  *id;
-       struct rdma_cm_event    event;
-};
-
-struct iboe_mcast_work {
-       struct work_struct       work;
-       struct rdma_id_private  *id;
-       struct cma_multicast    *mc;
-};
-
 union cma_ip_addr {
        struct in6_addr ip6;
        struct {
@@ -404,23 +396,21 @@ struct cma_req_info {
        u16 pkey;
 };
 
-static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
-{
-       unsigned long flags;
-       int ret;
-
-       spin_lock_irqsave(&id_priv->lock, flags);
-       ret = (id_priv->state == comp);
-       spin_unlock_irqrestore(&id_priv->lock, flags);
-       return ret;
-}
-
 static int cma_comp_exch(struct rdma_id_private *id_priv,
                         enum rdma_cm_state comp, enum rdma_cm_state exch)
 {
        unsigned long flags;
        int ret;
 
+       /*
+        * The FSM uses a funny double locking where state is protected by both
+        * the handler_mutex and the spinlock. State is not allowed to change
+        * away from a handler_mutex protected value without also holding
+        * handler_mutex.
+        */
+       if (comp == RDMA_CM_CONNECT)
+               lockdep_assert_held(&id_priv->handler_mutex);
+
        spin_lock_irqsave(&id_priv->lock, flags);
        if ((ret = (id_priv->state == comp)))
                id_priv->state = exch;
@@ -467,10 +457,8 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
        id_priv->id.route.addr.dev_addr.transport =
                rdma_node_get_transport(cma_dev->device->node_type);
        list_add_tail(&id_priv->list, &cma_dev->id_list);
-       if (id_priv->res.kern_name)
-               rdma_restrack_kadd(&id_priv->res);
-       else
-               rdma_restrack_uadd(&id_priv->res);
+       rdma_restrack_add(&id_priv->res);
+
        trace_cm_id_attach(id_priv, cma_dev->device);
 }
 
@@ -483,14 +471,6 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
                                          rdma_start_port(cma_dev->device)];
 }
 
-static inline void release_mc(struct kref *kref)
-{
-       struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
-
-       kfree(mc->multicast.ib);
-       kfree(mc);
-}
-
 static void cma_release_dev(struct rdma_id_private *id_priv)
 {
        mutex_lock(&lock);
@@ -844,10 +824,10 @@ static void cma_id_put(struct rdma_id_private *id_priv)
                complete(&id_priv->comp);
 }
 
-struct rdma_cm_id *__rdma_create_id(struct net *net,
-                                   rdma_cm_event_handler event_handler,
-                                   void *context, enum rdma_ucm_port_space ps,
-                                   enum ib_qp_type qp_type, const char *caller)
+static struct rdma_id_private *
+__rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
+                void *context, enum rdma_ucm_port_space ps,
+                enum ib_qp_type qp_type, const struct rdma_id_private *parent)
 {
        struct rdma_id_private *id_priv;
 
@@ -855,8 +835,6 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
        if (!id_priv)
                return ERR_PTR(-ENOMEM);
 
-       rdma_restrack_set_task(&id_priv->res, caller);
-       id_priv->res.type = RDMA_RESTRACK_CM_ID;
        id_priv->state = RDMA_CM_IDLE;
        id_priv->id.context = context;
        id_priv->id.event_handler = event_handler;
@@ -876,9 +854,45 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
        id_priv->id.route.addr.dev_addr.net = get_net(net);
        id_priv->seq_num &= 0x00ffffff;
 
-       return &id_priv->id;
+       rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID);
+       if (parent)
+               rdma_restrack_parent_name(&id_priv->res, &parent->res);
+
+       return id_priv;
+}
+
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+                       void *context, enum rdma_ucm_port_space ps,
+                       enum ib_qp_type qp_type, const char *caller)
+{
+       struct rdma_id_private *ret;
+
+       ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL);
+       if (IS_ERR(ret))
+               return ERR_CAST(ret);
+
+       rdma_restrack_set_name(&ret->res, caller);
+       return &ret->id;
 }
-EXPORT_SYMBOL(__rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_kernel_id);
+
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+                                      void *context,
+                                      enum rdma_ucm_port_space ps,
+                                      enum ib_qp_type qp_type)
+{
+       struct rdma_id_private *ret;
+
+       ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context,
+                              ps, qp_type, NULL);
+       if (IS_ERR(ret))
+               return ERR_CAST(ret);
+
+       rdma_restrack_set_name(&ret->res, NULL);
+       return &ret->id;
+}
+EXPORT_SYMBOL(rdma_create_user_id);
 
 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 {
@@ -1783,19 +1797,30 @@ static void cma_release_port(struct rdma_id_private *id_priv)
        mutex_unlock(&lock);
 }
 
-static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
-                                   struct cma_multicast *mc)
+static void destroy_mc(struct rdma_id_private *id_priv,
+                      struct cma_multicast *mc)
 {
-       struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
-       struct net_device *ndev = NULL;
+       if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
+               ib_sa_free_multicast(mc->sa_mc);
 
-       if (dev_addr->bound_dev_if)
-               ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
-       if (ndev) {
-               cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
-               dev_put(ndev);
+       if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
+               struct rdma_dev_addr *dev_addr =
+                       &id_priv->id.route.addr.dev_addr;
+               struct net_device *ndev = NULL;
+
+               if (dev_addr->bound_dev_if)
+                       ndev = dev_get_by_index(dev_addr->net,
+                                               dev_addr->bound_dev_if);
+               if (ndev) {
+                       union ib_gid mgid;
+
+                       cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr,
+                                    &mgid);
+                       cma_igmp_send(ndev, &mgid, false);
+                       dev_put(ndev);
+               }
        }
-       kref_put(&mc->mcref, release_mc);
+       kfree(mc);
 }
 
 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
@@ -1803,16 +1828,10 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
        struct cma_multicast *mc;
 
        while (!list_empty(&id_priv->mc_list)) {
-               mc = container_of(id_priv->mc_list.next,
-                                 struct cma_multicast, list);
+               mc = list_first_entry(&id_priv->mc_list, struct cma_multicast,
+                                     list);
                list_del(&mc->list);
-               if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
-                                     id_priv->id.port_num)) {
-                       ib_sa_free_multicast(mc->multicast.ib);
-                       kfree(mc);
-               } else {
-                       cma_leave_roce_mc_group(id_priv, mc);
-               }
+               destroy_mc(id_priv, mc);
        }
 }
 
@@ -1821,7 +1840,6 @@ static void _destroy_id(struct rdma_id_private *id_priv,
 {
        cma_cancel_operation(id_priv, state);
 
-       rdma_restrack_del(&id_priv->res);
        if (id_priv->cma_dev) {
                if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
                        if (id_priv->cm_id.ib)
@@ -1847,6 +1865,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
                rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
 
        put_net(id_priv->id.route.addr.dev_addr.net);
+       rdma_restrack_del(&id_priv->res);
        kfree(id_priv);
 }
 
@@ -1949,13 +1968,15 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
 {
        struct rdma_id_private *id_priv = cm_id->context;
        struct rdma_cm_event event = {};
+       enum rdma_cm_state state;
        int ret;
 
        mutex_lock(&id_priv->handler_mutex);
+       state = READ_ONCE(id_priv->state);
        if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
-            id_priv->state != RDMA_CM_CONNECT) ||
+            state != RDMA_CM_CONNECT) ||
            (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
-            id_priv->state != RDMA_CM_DISCONNECT))
+            state != RDMA_CM_DISCONNECT))
                goto out;
 
        switch (ib_event->event) {
@@ -1965,7 +1986,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
                event.status = -ETIMEDOUT;
                break;
        case IB_CM_REP_RECEIVED:
-               if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
+               if (state == RDMA_CM_CONNECT &&
                    (id_priv->id.qp_type != IB_QPT_UD)) {
                        trace_cm_send_mra(id_priv);
                        ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
@@ -2043,14 +2064,15 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
        int ret;
 
        listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
-       id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
-                           listen_id->event_handler, listen_id->context,
-                           listen_id->ps, ib_event->param.req_rcvd.qp_type,
-                           listen_id_priv->res.kern_name);
-       if (IS_ERR(id))
+       id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net,
+                                  listen_id->event_handler, listen_id->context,
+                                  listen_id->ps,
+                                  ib_event->param.req_rcvd.qp_type,
+                                  listen_id_priv);
+       if (IS_ERR(id_priv))
                return NULL;
 
-       id_priv = container_of(id, struct rdma_id_private, id);
+       id = &id_priv->id;
        if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
                              (struct sockaddr *)&id->route.addr.dst_addr,
                              listen_id, ib_event, ss_family, service_id))
@@ -2104,13 +2126,13 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
        int ret;
 
        listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
-       id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
-                             listen_id->ps, IB_QPT_UD,
-                             listen_id_priv->res.kern_name);
-       if (IS_ERR(id))
+       id_priv = __rdma_create_id(net, listen_id->event_handler,
+                                  listen_id->context, listen_id->ps, IB_QPT_UD,
+                                  listen_id_priv);
+       if (IS_ERR(id_priv))
                return NULL;
 
-       id_priv = container_of(id, struct rdma_id_private, id);
+       id = &id_priv->id;
        if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
                              (struct sockaddr *)&id->route.addr.dst_addr,
                              listen_id, ib_event, ss_family,
@@ -2184,7 +2206,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
        }
 
        mutex_lock(&listen_id->handler_mutex);
-       if (listen_id->state != RDMA_CM_LISTEN) {
+       if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) {
                ret = -ECONNABORTED;
                goto err_unlock;
        }
@@ -2226,8 +2248,8 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
                goto net_dev_put;
        }
 
-       if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
-           (conn_id->id.qp_type != IB_QPT_UD)) {
+       if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
+           conn_id->id.qp_type != IB_QPT_UD) {
                trace_cm_send_mra(cm_id->context);
                ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
        }
@@ -2288,7 +2310,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
        struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
        mutex_lock(&id_priv->handler_mutex);
-       if (id_priv->state != RDMA_CM_CONNECT)
+       if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
                goto out;
 
        switch (iw_event->event) {
@@ -2346,7 +2368,6 @@ out:
 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
                               struct iw_cm_event *iw_event)
 {
-       struct rdma_cm_id *new_cm_id;
        struct rdma_id_private *listen_id, *conn_id;
        struct rdma_cm_event event = {};
        int ret = -ECONNABORTED;
@@ -2362,20 +2383,18 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        listen_id = cm_id->context;
 
        mutex_lock(&listen_id->handler_mutex);
-       if (listen_id->state != RDMA_CM_LISTEN)
+       if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN)
                goto out;
 
        /* Create a new RDMA id for the new IW CM ID */
-       new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
-                                    listen_id->id.event_handler,
-                                    listen_id->id.context,
-                                    RDMA_PS_TCP, IB_QPT_RC,
-                                    listen_id->res.kern_name);
-       if (IS_ERR(new_cm_id)) {
+       conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+                                  listen_id->id.event_handler,
+                                  listen_id->id.context, RDMA_PS_TCP,
+                                  IB_QPT_RC, listen_id);
+       if (IS_ERR(conn_id)) {
                ret = -ENOMEM;
                goto out;
        }
-       conn_id = container_of(new_cm_id, struct rdma_id_private, id);
        mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
        conn_id->state = RDMA_CM_CONNECT;
 
@@ -2480,7 +2499,6 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
                              struct cma_device *cma_dev)
 {
        struct rdma_id_private *dev_id_priv;
-       struct rdma_cm_id *id;
        struct net *net = id_priv->id.route.addr.dev_addr.net;
        int ret;
 
@@ -2489,13 +2507,12 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
        if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
                return;
 
-       id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
-                             id_priv->id.qp_type, id_priv->res.kern_name);
-       if (IS_ERR(id))
+       dev_id_priv =
+               __rdma_create_id(net, cma_listen_handler, id_priv,
+                                id_priv->id.ps, id_priv->id.qp_type, id_priv);
+       if (IS_ERR(dev_id_priv))
                return;
 
-       dev_id_priv = container_of(id, struct rdma_id_private, id);
-
        dev_id_priv->state = RDMA_CM_ADDR_BOUND;
        memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
               rdma_addr_size(cma_src_addr(id_priv)));
@@ -2508,7 +2525,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
        dev_id_priv->tos_set = id_priv->tos_set;
        dev_id_priv->tos = id_priv->tos;
 
-       ret = rdma_listen(id, id_priv->backlog);
+       ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
        if (ret)
                dev_warn(&cma_dev->device->dev,
                         "RDMA CMA: cma_listen_on_dev, error %d\n", ret);
@@ -2647,32 +2664,14 @@ static void cma_work_handler(struct work_struct *_work)
        struct rdma_id_private *id_priv = work->id;
 
        mutex_lock(&id_priv->handler_mutex);
-       if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+       if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+           READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
                goto out_unlock;
-
-       if (cma_cm_event_handler(id_priv, &work->event)) {
-               cma_id_put(id_priv);
-               destroy_id_handler_unlock(id_priv);
-               goto out_free;
+       if (work->old_state != 0 || work->new_state != 0) {
+               if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+                       goto out_unlock;
        }
 
-out_unlock:
-       mutex_unlock(&id_priv->handler_mutex);
-       cma_id_put(id_priv);
-out_free:
-       kfree(work);
-}
-
-static void cma_ndev_work_handler(struct work_struct *_work)
-{
-       struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
-       struct rdma_id_private *id_priv = work->id;
-
-       mutex_lock(&id_priv->handler_mutex);
-       if (id_priv->state == RDMA_CM_DESTROYING ||
-           id_priv->state == RDMA_CM_DEVICE_REMOVAL)
-               goto out_unlock;
-
        if (cma_cm_event_handler(id_priv, &work->event)) {
                cma_id_put(id_priv);
                destroy_id_handler_unlock(id_priv);
@@ -2683,6 +2682,8 @@ out_unlock:
        mutex_unlock(&id_priv->handler_mutex);
        cma_id_put(id_priv);
 out_free:
+       if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN)
+               rdma_destroy_ah_attr(&work->event.param.ud.ah_attr);
        kfree(work);
 }
 
@@ -3237,32 +3238,54 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
        return rdma_bind_addr(id, src_addr);
 }
 
-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
-                     const struct sockaddr *dst_addr, unsigned long timeout_ms)
+/*
+ * If required, resolve the source address for bind and leave the id_priv in
+ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
+ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
+ * ignored.
+ */
+static int resolve_prepare_src(struct rdma_id_private *id_priv,
+                              struct sockaddr *src_addr,
+                              const struct sockaddr *dst_addr)
 {
-       struct rdma_id_private *id_priv;
        int ret;
 
-       id_priv = container_of(id, struct rdma_id_private, id);
        memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
-       if (id_priv->state == RDMA_CM_IDLE) {
-               ret = cma_bind_addr(id, src_addr, dst_addr);
-               if (ret) {
-                       memset(cma_dst_addr(id_priv), 0,
-                              rdma_addr_size(dst_addr));
-                       return ret;
+       if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+               /* For a well behaved ULP state will be RDMA_CM_IDLE */
+               ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
+               if (ret)
+                       goto err_dst;
+               if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+                                          RDMA_CM_ADDR_QUERY))) {
+                       ret = -EINVAL;
+                       goto err_dst;
                }
        }
 
        if (cma_family(id_priv) != dst_addr->sa_family) {
-               memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
-               return -EINVAL;
+               ret = -EINVAL;
+               goto err_state;
        }
+       return 0;
 
-       if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
-               memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
-               return -EINVAL;
-       }
+err_state:
+       cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+err_dst:
+       memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+       return ret;
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+                     const struct sockaddr *dst_addr, unsigned long timeout_ms)
+{
+       struct rdma_id_private *id_priv =
+               container_of(id, struct rdma_id_private, id);
+       int ret;
+
+       ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
+       if (ret)
+               return ret;
 
        if (cma_any_addr(dst_addr)) {
                ret = cma_resolve_loopback(id_priv);
@@ -3294,7 +3317,8 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
 
        id_priv = container_of(id, struct rdma_id_private, id);
        spin_lock_irqsave(&id_priv->lock, flags);
-       if (reuse || id_priv->state == RDMA_CM_IDLE) {
+       if ((reuse && id_priv->state != RDMA_CM_LISTEN) ||
+           id_priv->state == RDMA_CM_IDLE) {
                id_priv->reuseaddr = reuse;
                ret = 0;
        } else {
@@ -3488,8 +3512,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
                if (id_priv == cur_id)
                        continue;
 
-               if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
-                   cur_id->reuseaddr)
+               if (reuseaddr && cur_id->reuseaddr)
                        continue;
 
                cur_addr = cma_src_addr(cur_id);
@@ -3530,18 +3553,6 @@ static int cma_use_port(enum rdma_ucm_port_space ps,
        return ret;
 }
 
-static int cma_bind_listen(struct rdma_id_private *id_priv)
-{
-       struct rdma_bind_list *bind_list = id_priv->bind_list;
-       int ret = 0;
-
-       mutex_lock(&lock);
-       if (bind_list->owners.first->next)
-               ret = cma_check_port(bind_list, id_priv, 0);
-       mutex_unlock(&lock);
-       return ret;
-}
-
 static enum rdma_ucm_port_space
 cma_select_inet_ps(struct rdma_id_private *id_priv)
 {
@@ -3635,22 +3646,31 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
 
 int rdma_listen(struct rdma_cm_id *id, int backlog)
 {
-       struct rdma_id_private *id_priv;
+       struct rdma_id_private *id_priv =
+               container_of(id, struct rdma_id_private, id);
        int ret;
 
-       id_priv = container_of(id, struct rdma_id_private, id);
-       if (id_priv->state == RDMA_CM_IDLE) {
+       if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
+               /* For a well behaved ULP state will be RDMA_CM_IDLE */
                id->route.addr.src_addr.ss_family = AF_INET;
                ret = rdma_bind_addr(id, cma_src_addr(id_priv));
                if (ret)
                        return ret;
+               if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+                                          RDMA_CM_LISTEN)))
+                       return -EINVAL;
        }
 
-       if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
-               return -EINVAL;
-
+       /*
+        * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable
+        * any more, and has to be unique in the bind list.
+        */
        if (id_priv->reuseaddr) {
-               ret = cma_bind_listen(id_priv);
+               mutex_lock(&lock);
+               ret = cma_check_port(id_priv->bind_list, id_priv, 0);
+               if (!ret)
+                       id_priv->reuseaddr = 0;
+               mutex_unlock(&lock);
                if (ret)
                        goto err;
        }
@@ -3675,6 +3695,10 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
        return 0;
 err:
        id_priv->backlog = 0;
+       /*
+        * All the failure paths that lead here will not allow the req_handler's
+        * to have run.
+        */
        cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
        return ret;
 }
@@ -3729,7 +3753,6 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
        return 0;
 err2:
-       rdma_restrack_del(&id_priv->res);
        if (id_priv->cma_dev)
                cma_release_dev(id_priv);
 err1:
@@ -3778,7 +3801,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
        int ret;
 
        mutex_lock(&id_priv->handler_mutex);
-       if (id_priv->state != RDMA_CM_CONNECT)
+       if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
                goto out;
 
        switch (ib_event->event) {
@@ -4014,12 +4037,15 @@ out:
 
 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
-       struct rdma_id_private *id_priv;
+       struct rdma_id_private *id_priv =
+               container_of(id, struct rdma_id_private, id);
        int ret;
 
-       id_priv = container_of(id, struct rdma_id_private, id);
-       if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
-               return -EINVAL;
+       mutex_lock(&id_priv->handler_mutex);
+       if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) {
+               ret = -EINVAL;
+               goto err_unlock;
+       }
 
        if (!id->qp) {
                id_priv->qp_num = conn_param->qp_num;
@@ -4036,11 +4062,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
        else
                ret = -ENOSYS;
        if (ret)
-               goto err;
-
+               goto err_state;
+       mutex_unlock(&id_priv->handler_mutex);
        return 0;
-err:
+err_state:
        cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
+err_unlock:
+       mutex_unlock(&id_priv->handler_mutex);
        return ret;
 }
 EXPORT_SYMBOL(rdma_connect);
@@ -4152,17 +4180,33 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
        return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
 }
 
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
-                 const char *caller)
+/**
+ * rdma_accept - Called to accept a connection request or response.
+ * @id: Connection identifier associated with the request.
+ * @conn_param: Information needed to establish the connection.  This must be
+ *   provided if accepting a connection request.  If accepting a connection
+ *   response, this parameter must be NULL.
+ *
+ * Typically, this routine is only called by the listener to accept a connection
+ * request.  It must also be called on the active side of a connection if the
+ * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
+ *
+ * This function is for use by kernel ULPs and must be called from under the
+ * handler callback.
+ */
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
-       struct rdma_id_private *id_priv;
+       struct rdma_id_private *id_priv =
+               container_of(id, struct rdma_id_private, id);
        int ret;
 
-       id_priv = container_of(id, struct rdma_id_private, id);
-
-       rdma_restrack_set_task(&id_priv->res, caller);
+       lockdep_assert_held(&id_priv->handler_mutex);
 
-       if (!cma_comp(id_priv, RDMA_CM_CONNECT))
+       if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
                return -EINVAL;
 
        if (!id->qp && conn_param) {
@@ -4200,10 +4244,10 @@ reject:
        rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED);
        return ret;
 }
-EXPORT_SYMBOL(__rdma_accept);
+EXPORT_SYMBOL(rdma_accept);
 
-int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
-                     const char *caller, struct rdma_ucm_ece *ece)
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+                   struct rdma_ucm_ece *ece)
 {
        struct rdma_id_private *id_priv =
                container_of(id, struct rdma_id_private, id);
@@ -4211,9 +4255,27 @@ int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
        id_priv->ece.vendor_id = ece->vendor_id;
        id_priv->ece.attr_mod = ece->attr_mod;
 
-       return __rdma_accept(id, conn_param, caller);
+       return rdma_accept(id, conn_param);
+}
+EXPORT_SYMBOL(rdma_accept_ece);
+
+void rdma_lock_handler(struct rdma_cm_id *id)
+{
+       struct rdma_id_private *id_priv =
+               container_of(id, struct rdma_id_private, id);
+
+       mutex_lock(&id_priv->handler_mutex);
 }
-EXPORT_SYMBOL(__rdma_accept_ece);
+EXPORT_SYMBOL(rdma_lock_handler);
+
+void rdma_unlock_handler(struct rdma_cm_id *id)
+{
+       struct rdma_id_private *id_priv =
+               container_of(id, struct rdma_id_private, id);
+
+       mutex_unlock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_unlock_handler);
 
 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
 {
@@ -4296,63 +4358,66 @@ out:
 }
 EXPORT_SYMBOL(rdma_disconnect);
 
-static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
+                             struct ib_sa_multicast *multicast,
+                             struct rdma_cm_event *event,
+                             struct cma_multicast *mc)
 {
-       struct rdma_id_private *id_priv;
-       struct cma_multicast *mc = multicast->context;
-       struct rdma_cm_event event = {};
-       int ret = 0;
-
-       id_priv = mc->id_priv;
-       mutex_lock(&id_priv->handler_mutex);
-       if (id_priv->state != RDMA_CM_ADDR_BOUND &&
-           id_priv->state != RDMA_CM_ADDR_RESOLVED)
-               goto out;
+       struct rdma_dev_addr *dev_addr;
+       enum ib_gid_type gid_type;
+       struct net_device *ndev;
 
        if (!status)
                status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
        else
                pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
                                     status);
-       mutex_lock(&id_priv->qp_mutex);
-       if (!status && id_priv->id.qp) {
-               status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
-                                        be16_to_cpu(multicast->rec.mlid));
-               if (status)
-                       pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
-                                            status);
+
+       event->status = status;
+       event->param.ud.private_data = mc->context;
+       if (status) {
+               event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+               return;
        }
-       mutex_unlock(&id_priv->qp_mutex);
 
-       event.status = status;
-       event.param.ud.private_data = mc->context;
-       if (!status) {
-               struct rdma_dev_addr *dev_addr =
-                       &id_priv->id.route.addr.dev_addr;
-               struct net_device *ndev =
-                       dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
-               enum ib_gid_type gid_type =
-                       id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
-                       rdma_start_port(id_priv->cma_dev->device)];
-
-               event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
-               ret = ib_init_ah_from_mcmember(id_priv->id.device,
-                                              id_priv->id.port_num,
-                                              &multicast->rec,
-                                              ndev, gid_type,
-                                              &event.param.ud.ah_attr);
-               if (ret)
-                       event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+       dev_addr = &id_priv->id.route.addr.dev_addr;
+       ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+       gid_type =
+               id_priv->cma_dev
+                       ->default_gid_type[id_priv->id.port_num -
+                                          rdma_start_port(
+                                                  id_priv->cma_dev->device)];
+
+       event->event = RDMA_CM_EVENT_MULTICAST_JOIN;
+       if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num,
+                                    &multicast->rec, ndev, gid_type,
+                                    &event->param.ud.ah_attr)) {
+               event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+               goto out;
+       }
 
-               event.param.ud.qp_num = 0xFFFFFF;
-               event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
-               if (ndev)
-                       dev_put(ndev);
-       } else
-               event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+       event->param.ud.qp_num = 0xFFFFFF;
+       event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
 
-       ret = cma_cm_event_handler(id_priv, &event);
+out:
+       if (ndev)
+               dev_put(ndev);
+}
 
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+       struct cma_multicast *mc = multicast->context;
+       struct rdma_id_private *id_priv = mc->id_priv;
+       struct rdma_cm_event event = {};
+       int ret = 0;
+
+       mutex_lock(&id_priv->handler_mutex);
+       if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL ||
+           READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
+               goto out;
+
+       cma_make_mc_event(status, id_priv, multicast, &event, mc);
+       ret = cma_cm_event_handler(id_priv, &event);
        rdma_destroy_ah_attr(&event.param.ud.ah_attr);
        if (ret) {
                destroy_id_handler_unlock(id_priv);
@@ -4442,23 +4507,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
                             IB_SA_MCMEMBER_REC_MTU |
                             IB_SA_MCMEMBER_REC_HOP_LIMIT;
 
-       mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
-                                               id_priv->id.port_num, &rec,
-                                               comp_mask, GFP_KERNEL,
-                                               cma_ib_mc_handler, mc);
-       return PTR_ERR_OR_ZERO(mc->multicast.ib);
-}
-
-static void iboe_mcast_work_handler(struct work_struct *work)
-{
-       struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
-       struct cma_multicast *mc = mw->mc;
-       struct ib_sa_multicast *m = mc->multicast.ib;
-
-       mc->multicast.ib->context = mc;
-       cma_ib_mc_handler(0, m);
-       kref_put(&mc->mcref, release_mc);
-       kfree(mw);
+       mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+                                        id_priv->id.port_num, &rec, comp_mask,
+                                        GFP_KERNEL, cma_ib_mc_handler, mc);
+       return PTR_ERR_OR_ZERO(mc->sa_mc);
 }
 
 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
@@ -4493,52 +4545,47 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
                                   struct cma_multicast *mc)
 {
-       struct iboe_mcast_work *work;
+       struct cma_work *work;
        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
        int err = 0;
        struct sockaddr *addr = (struct sockaddr *)&mc->addr;
        struct net_device *ndev = NULL;
+       struct ib_sa_multicast ib;
        enum ib_gid_type gid_type;
        bool send_only;
 
        send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
 
-       if (cma_zero_addr((struct sockaddr *)&mc->addr))
+       if (cma_zero_addr(addr))
                return -EINVAL;
 
        work = kzalloc(sizeof *work, GFP_KERNEL);
        if (!work)
                return -ENOMEM;
 
-       mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
-       if (!mc->multicast.ib) {
-               err = -ENOMEM;
-               goto out1;
-       }
-
        gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
                   rdma_start_port(id_priv->cma_dev->device)];
-       cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
+       cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
 
-       mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
+       ib.rec.pkey = cpu_to_be16(0xffff);
        if (id_priv->id.ps == RDMA_PS_UDP)
-               mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+               ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
 
        if (dev_addr->bound_dev_if)
                ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
        if (!ndev) {
                err = -ENODEV;
-               goto out2;
+               goto err_free;
        }
-       mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
-       mc->multicast.ib->rec.hop_limit = 1;
-       mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+       ib.rec.rate = iboe_get_rate(ndev);
+       ib.rec.hop_limit = 1;
+       ib.rec.mtu = iboe_get_mtu(ndev->mtu);
 
        if (addr->sa_family == AF_INET) {
                if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
-                       mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+                       ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
                        if (!send_only) {
-                               err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+                               err = cma_igmp_send(ndev, &ib.rec.mgid,
                                                    true);
                        }
                }
@@ -4547,24 +4594,22 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
                        err = -ENOTSUPP;
        }
        dev_put(ndev);
-       if (err || !mc->multicast.ib->rec.mtu) {
+       if (err || !ib.rec.mtu) {
                if (!err)
                        err = -EINVAL;
-               goto out2;
+               goto err_free;
        }
        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
-                   &mc->multicast.ib->rec.port_gid);
+                   &ib.rec.port_gid);
        work->id = id_priv;
-       work->mc = mc;
-       INIT_WORK(&work->work, iboe_mcast_work_handler);
-       kref_get(&mc->mcref);
+       INIT_WORK(&work->work, cma_work_handler);
+       cma_make_mc_event(0, id_priv, &ib, &work->event, mc);
+       /* Balances with cma_id_put() in cma_work_handler */
+       cma_id_get(id_priv);
        queue_work(cma_wq, &work->work);
-
        return 0;
 
-out2:
-       kfree(mc->multicast.ib);
-out1:
+err_free:
        kfree(work);
        return err;
 }
@@ -4572,19 +4617,21 @@ out1:
 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
                        u8 join_state, void *context)
 {
-       struct rdma_id_private *id_priv;
+       struct rdma_id_private *id_priv =
+               container_of(id, struct rdma_id_private, id);
        struct cma_multicast *mc;
        int ret;
 
-       if (!id->device)
+       /* Not supported for kernel QPs */
+       if (WARN_ON(id->qp))
                return -EINVAL;
 
-       id_priv = container_of(id, struct rdma_id_private, id);
-       if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
-           !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
+       /* ULP is calling this wrong. */
+       if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND &&
+                           READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
                return -EINVAL;
 
-       mc = kmalloc(sizeof *mc, GFP_KERNEL);
+       mc = kzalloc(sizeof(*mc), GFP_KERNEL);
        if (!mc)
                return -ENOMEM;
 
@@ -4594,7 +4641,6 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
        mc->join_state = join_state;
 
        if (rdma_protocol_roce(id->device, id->port_num)) {
-               kref_init(&mc->mcref);
                ret = cma_iboe_join_multicast(id_priv, mc);
                if (ret)
                        goto out_err;
@@ -4626,25 +4672,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
        id_priv = container_of(id, struct rdma_id_private, id);
        spin_lock_irq(&id_priv->lock);
        list_for_each_entry(mc, &id_priv->mc_list, list) {
-               if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
-                       list_del(&mc->list);
-                       spin_unlock_irq(&id_priv->lock);
-
-                       if (id->qp)
-                               ib_detach_mcast(id->qp,
-                                               &mc->multicast.ib->rec.mgid,
-                                               be16_to_cpu(mc->multicast.ib->rec.mlid));
-
-                       BUG_ON(id_priv->cma_dev->device != id->device);
-
-                       if (rdma_cap_ib_mcast(id->device, id->port_num)) {
-                               ib_sa_free_multicast(mc->multicast.ib);
-                               kfree(mc);
-                       } else if (rdma_protocol_roce(id->device, id->port_num)) {
-                               cma_leave_roce_mc_group(id_priv, mc);
-                       }
-                       return;
-               }
+               if (memcmp(&mc->addr, addr, rdma_addr_size(addr)) != 0)
+                       continue;
+               list_del(&mc->list);
+               spin_unlock_irq(&id_priv->lock);
+
+               WARN_ON(id_priv->cma_dev->device != id->device);
+               destroy_mc(id_priv, mc);
+               return;
        }
        spin_unlock_irq(&id_priv->lock);
 }
@@ -4653,7 +4688,7 @@ EXPORT_SYMBOL(rdma_leave_multicast);
 static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
 {
        struct rdma_dev_addr *dev_addr;
-       struct cma_ndev_work *work;
+       struct cma_work *work;
 
        dev_addr = &id_priv->id.route.addr.dev_addr;
 
@@ -4666,7 +4701,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
                if (!work)
                        return -ENOMEM;
 
-               INIT_WORK(&work->work, cma_ndev_work_handler);
+               INIT_WORK(&work->work, cma_work_handler);
                work->id = id_priv;
                work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
                cma_id_get(id_priv);
index 3c1e2ca..7ec4af2 100644 (file)
@@ -123,16 +123,17 @@ static ssize_t default_roce_mode_store(struct config_item *item,
 {
        struct cma_device *cma_dev;
        struct cma_dev_port_group *group;
-       int gid_type = ib_cache_gid_parse_type_str(buf);
+       int gid_type;
        ssize_t ret;
 
-       if (gid_type < 0)
-               return -EINVAL;
-
        ret = cma_configfs_params_get(item, &cma_dev, &group);
        if (ret)
                return ret;
 
+       gid_type = ib_cache_gid_parse_type_str(buf);
+       if (gid_type < 0)
+               return -EINVAL;
+
        ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
 
        cma_configfs_params_put(cma_dev);
index e6e20c3..e452642 100644 (file)
 #include <linux/tracepoint.h>
 #include <trace/events/rdma.h>
 
-/*
- * enum ib_cm_event_type, from include/rdma/ib_cm.h
- */
-#define IB_CM_EVENT_LIST                       \
-       ib_cm_event(REQ_ERROR)                  \
-       ib_cm_event(REQ_RECEIVED)               \
-       ib_cm_event(REP_ERROR)                  \
-       ib_cm_event(REP_RECEIVED)               \
-       ib_cm_event(RTU_RECEIVED)               \
-       ib_cm_event(USER_ESTABLISHED)           \
-       ib_cm_event(DREQ_ERROR)                 \
-       ib_cm_event(DREQ_RECEIVED)              \
-       ib_cm_event(DREP_RECEIVED)              \
-       ib_cm_event(TIMEWAIT_EXIT)              \
-       ib_cm_event(MRA_RECEIVED)               \
-       ib_cm_event(REJ_RECEIVED)               \
-       ib_cm_event(LAP_ERROR)                  \
-       ib_cm_event(LAP_RECEIVED)               \
-       ib_cm_event(APR_RECEIVED)               \
-       ib_cm_event(SIDR_REQ_ERROR)             \
-       ib_cm_event(SIDR_REQ_RECEIVED)          \
-       ib_cm_event_end(SIDR_REP_RECEIVED)
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x)         TRACE_DEFINE_ENUM(IB_CM_##x);
-#define ib_cm_event_end(x)     TRACE_DEFINE_ENUM(IB_CM_##x);
-
-IB_CM_EVENT_LIST
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x)         { IB_CM_##x, #x },
-#define ib_cm_event_end(x)     { IB_CM_##x, #x }
-
-#define rdma_show_ib_cm_event(x) \
-               __print_symbolic(x, IB_CM_EVENT_LIST)
-
 
 DECLARE_EVENT_CLASS(cma_fsm_class,
        TP_PROTO(
index a1e6a67..e84b0fe 100644 (file)
@@ -44,6 +44,7 @@
 #include <rdma/ib_mad.h>
 #include <rdma/restrack.h>
 #include "mad_priv.h"
+#include "restrack.h"
 
 /* Total number of ports combined across all struct ib_devices's */
 #define RDMA_MAX_PORTS 8192
@@ -352,6 +353,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
        INIT_LIST_HEAD(&qp->rdma_mrs);
        INIT_LIST_HEAD(&qp->sig_mrs);
 
+       rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
        /*
         * We don't track XRC QPs for now, because they don't have PD
         * and more importantly they are created internaly by driver,
@@ -359,14 +361,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
         */
        is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT;
        if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) {
-               qp->res.type = RDMA_RESTRACK_QP;
-               if (uobj)
-                       rdma_restrack_uadd(&qp->res);
-               else
-                       rdma_restrack_kadd(&qp->res);
-       } else
-               qp->res.valid = false;
-
+               rdma_restrack_parent_name(&qp->res, &pd->res);
+               rdma_restrack_add(&qp->res);
+       }
        return qp;
 }
 
index 6361668..e4ff0d3 100644 (file)
@@ -80,8 +80,9 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
 
        counter->device    = dev;
        counter->port      = port;
-       counter->res.type  = RDMA_RESTRACK_COUNTER;
-       counter->stats     = dev->ops.counter_alloc_stats(counter);
+
+       rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
+       counter->stats = dev->ops.counter_alloc_stats(counter);
        if (!counter->stats)
                goto err_stats;
 
@@ -107,6 +108,7 @@ err_mode:
        mutex_unlock(&port_counter->lock);
        kfree(counter->stats);
 err_stats:
+       rdma_restrack_put(&counter->res);
        kfree(counter);
        return NULL;
 }
@@ -248,13 +250,8 @@ next:
 static void rdma_counter_res_add(struct rdma_counter *counter,
                                 struct ib_qp *qp)
 {
-       if (rdma_is_kernel_res(&qp->res)) {
-               rdma_restrack_set_task(&counter->res, qp->res.kern_name);
-               rdma_restrack_kadd(&counter->res);
-       } else {
-               rdma_restrack_attach_task(&counter->res, qp->res.task);
-               rdma_restrack_uadd(&counter->res);
-       }
+       rdma_restrack_parent_name(&counter->res, &qp->res);
+       rdma_restrack_add(&counter->res);
 }
 
 static void counter_release(struct kref *kref)
index a92fc3f..12ebacf 100644 (file)
@@ -197,24 +197,22 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
 }
 
 /**
- * __ib_alloc_cq_user - allocate a completion queue
+ * __ib_alloc_cq        allocate a completion queue
  * @dev:               device to allocate the CQ for
  * @private:           driver private data, accessible from cq->cq_context
  * @nr_cqe:            number of CQEs to allocate
  * @comp_vector:       HCA completion vectors for this CQ
  * @poll_ctx:          context to poll the CQ from.
  * @caller:            module owner name.
- * @udata:             Valid user data or NULL for kernel object
  *
  * This is the proper interface to allocate a CQ for in-kernel users. A
  * CQ allocated with this interface will automatically be polled from the
  * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
  * to use this CQ abstraction.
  */
-struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
-                                int nr_cqe, int comp_vector,
-                                enum ib_poll_context poll_ctx,
-                                const char *caller, struct ib_udata *udata)
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+                           int comp_vector, enum ib_poll_context poll_ctx,
+                           const char *caller)
 {
        struct ib_cq_init_attr cq_attr = {
                .cqe            = nr_cqe,
@@ -237,15 +235,13 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
        if (!cq->wc)
                goto out_free_cq;
 
-       cq->res.type = RDMA_RESTRACK_CQ;
-       rdma_restrack_set_task(&cq->res, caller);
+       rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+       rdma_restrack_set_name(&cq->res, caller);
 
        ret = dev->ops.create_cq(cq, &cq_attr, NULL);
        if (ret)
                goto out_free_wc;
 
-       rdma_restrack_kadd(&cq->res);
-
        rdma_dim_init(cq);
 
        switch (cq->poll_ctx) {
@@ -271,21 +267,22 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
                goto out_destroy_cq;
        }
 
+       rdma_restrack_add(&cq->res);
        trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
        return cq;
 
 out_destroy_cq:
        rdma_dim_destroy(cq);
-       rdma_restrack_del(&cq->res);
-       cq->device->ops.destroy_cq(cq, udata);
+       cq->device->ops.destroy_cq(cq, NULL);
 out_free_wc:
+       rdma_restrack_put(&cq->res);
        kfree(cq->wc);
 out_free_cq:
        kfree(cq);
        trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
        return ERR_PTR(ret);
 }
-EXPORT_SYMBOL(__ib_alloc_cq_user);
+EXPORT_SYMBOL(__ib_alloc_cq);
 
 /**
  * __ib_alloc_cq_any - allocate a completion queue
@@ -310,18 +307,19 @@ struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
                        atomic_inc_return(&counter) %
                        min_t(int, dev->num_comp_vectors, num_online_cpus());
 
-       return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
-                                 caller, NULL);
+       return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+                            caller);
 }
 EXPORT_SYMBOL(__ib_alloc_cq_any);
 
 /**
- * ib_free_cq_user - free a completion queue
+ * ib_free_cq - free a completion queue
  * @cq:                completion queue to free.
- * @udata:     User data or NULL for kernel object
  */
-void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
+void ib_free_cq(struct ib_cq *cq)
 {
+       int ret;
+
        if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
                return;
        if (WARN_ON_ONCE(cq->cqe_used))
@@ -343,12 +341,13 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
 
        rdma_dim_destroy(cq);
        trace_cq_free(cq);
+       ret = cq->device->ops.destroy_cq(cq, NULL);
+       WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
        rdma_restrack_del(&cq->res);
-       cq->device->ops.destroy_cq(cq, udata);
        kfree(cq->wc);
        kfree(cq);
 }
-EXPORT_SYMBOL(ib_free_cq_user);
+EXPORT_SYMBOL(ib_free_cq);
 
 void ib_cq_pool_init(struct ib_device *dev)
 {
index 23ee65a..dab1f9d 100644 (file)
@@ -2697,7 +2697,9 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
        SET_OBJ_SIZE(dev_ops, ib_ah);
        SET_OBJ_SIZE(dev_ops, ib_counters);
        SET_OBJ_SIZE(dev_ops, ib_cq);
+       SET_OBJ_SIZE(dev_ops, ib_mw);
        SET_OBJ_SIZE(dev_ops, ib_pd);
+       SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table);
        SET_OBJ_SIZE(dev_ops, ib_srq);
        SET_OBJ_SIZE(dev_ops, ib_ucontext);
        SET_OBJ_SIZE(dev_ops, ib_xrcd);
index 6d3ed7c..ffe11b0 100644 (file)
@@ -130,17 +130,6 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
        lockdep_assert_held(&ufile->hw_destroy_rwsem);
        assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
 
-       if (reason == RDMA_REMOVE_ABORT_HWOBJ) {
-               reason = RDMA_REMOVE_ABORT;
-               ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
-                                                               attrs);
-               /*
-                * Drivers are not permitted to ignore RDMA_REMOVE_ABORT, see
-                * ib_is_destroy_retryable, cleanup_retryable == false here.
-                */
-               WARN_ON(ret);
-       }
-
        if (reason == RDMA_REMOVE_ABORT) {
                WARN_ON(!list_empty(&uobj->list));
                WARN_ON(!uobj->context);
@@ -674,11 +663,22 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
                              bool hw_obj_valid)
 {
        struct ib_uverbs_file *ufile = uobj->ufile;
+       int ret;
+
+       if (hw_obj_valid) {
+               ret = uobj->uapi_object->type_class->destroy_hw(
+                       uobj, RDMA_REMOVE_ABORT, attrs);
+               /*
+                * If the driver couldn't destroy the object then go ahead and
+                * commit it. Leaking objects that can't be destroyed is only
+                * done during FD close after the driver has a few more tries to
+                * destroy it.
+                */
+               if (WARN_ON(ret))
+                       return rdma_alloc_commit_uobject(uobj, attrs);
+       }
 
-       uverbs_destroy_uobject(uobj,
-                              hw_obj_valid ? RDMA_REMOVE_ABORT_HWOBJ :
-                                             RDMA_REMOVE_ABORT,
-                              attrs);
+       uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
 
        /* Matches the down_read in rdma_alloc_begin_uobject */
        up_read(&ufile->hw_destroy_rwsem);
@@ -889,14 +889,14 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
        if (!ufile->ucontext)
                goto done;
 
-       ufile->ucontext->closing = true;
        ufile->ucontext->cleanup_retryable = true;
        while (!list_empty(&ufile->uobjects))
                if (__uverbs_cleanup_ufile(ufile, reason)) {
                        /*
                         * No entry was cleaned-up successfully during this
-                        * iteration
+                        * iteration. It is a driver bug to fail destruction.
                         */
+                       WARN_ON(!list_empty(&ufile->uobjects));
                        break;
                }
 
index 62fbb0a..4aeeaae 100644 (file)
@@ -123,32 +123,6 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type)
 }
 EXPORT_SYMBOL(rdma_restrack_count);
 
-static void set_kern_name(struct rdma_restrack_entry *res)
-{
-       struct ib_pd *pd;
-
-       switch (res->type) {
-       case RDMA_RESTRACK_QP:
-               pd = container_of(res, struct ib_qp, res)->pd;
-               if (!pd) {
-                       WARN_ONCE(true, "XRC QPs are not supported\n");
-                       /* Survive, despite the programmer's error */
-                       res->kern_name = " ";
-               }
-               break;
-       case RDMA_RESTRACK_MR:
-               pd = container_of(res, struct ib_mr, res)->pd;
-               break;
-       default:
-               /* Other types set kern_name directly */
-               pd = NULL;
-               break;
-       }
-
-       if (pd)
-               res->kern_name = pd->res.kern_name;
-}
-
 static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 {
        switch (res->type) {
@@ -173,36 +147,77 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
        }
 }
 
-void rdma_restrack_set_task(struct rdma_restrack_entry *res,
-                           const char *caller)
+/**
+ * rdma_restrack_attach_task() - attach the task onto this resource,
+ * valid for user space restrack entries.
+ * @res:  resource entry
+ * @task: the task to attach
+ */
+static void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
+                                     struct task_struct *task)
 {
-       if (caller) {
-               res->kern_name = caller;
+       if (WARN_ON_ONCE(!task))
                return;
-       }
 
        if (res->task)
                put_task_struct(res->task);
-       get_task_struct(current);
-       res->task = current;
+       get_task_struct(task);
+       res->task = task;
+       res->user = true;
 }
-EXPORT_SYMBOL(rdma_restrack_set_task);
 
 /**
- * rdma_restrack_attach_task() - attach the task onto this resource
+ * rdma_restrack_set_name() - set the task for this resource
  * @res:  resource entry
- * @task: the task to attach, the current task will be used if it is NULL.
+ * @caller: kernel name, the current task will be used if the caller is NULL.
  */
-void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
-                              struct task_struct *task)
+void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller)
 {
-       if (res->task)
-               put_task_struct(res->task);
-       get_task_struct(task);
-       res->task = task;
+       if (caller) {
+               res->kern_name = caller;
+               return;
+       }
+
+       rdma_restrack_attach_task(res, current);
+}
+EXPORT_SYMBOL(rdma_restrack_set_name);
+
+/**
+ * rdma_restrack_parent_name() - set the restrack name properties based
+ * on parent restrack
+ * @dst: destination resource entry
+ * @parent: parent resource entry
+ */
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+                              const struct rdma_restrack_entry *parent)
+{
+       if (rdma_is_kernel_res(parent))
+               dst->kern_name = parent->kern_name;
+       else
+               rdma_restrack_attach_task(dst, parent->task);
+}
+EXPORT_SYMBOL(rdma_restrack_parent_name);
+
+/**
+ * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface
+ * to release memory in fully automatic way.
+ * @res - Entry to initialize
+ * @type - REstrack type
+ */
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+                      enum rdma_restrack_type type)
+{
+       kref_init(&res->kref);
+       init_completion(&res->comp);
+       res->type = type;
 }
+EXPORT_SYMBOL(rdma_restrack_new);
 
-static void rdma_restrack_add(struct rdma_restrack_entry *res)
+/**
+ * rdma_restrack_add() - add object to the reource tracking database
+ * @res:  resource entry
+ */
+void rdma_restrack_add(struct rdma_restrack_entry *res)
 {
        struct ib_device *dev = res_to_dev(res);
        struct rdma_restrack_root *rt;
@@ -213,8 +228,6 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res)
 
        rt = &dev->res[res->type];
 
-       kref_init(&res->kref);
-       init_completion(&res->comp);
        if (res->type == RDMA_RESTRACK_QP) {
                /* Special case to ensure that LQPN points to right QP */
                struct ib_qp *qp = container_of(res, struct ib_qp, res);
@@ -236,38 +249,7 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res)
        if (!ret)
                res->valid = true;
 }
-
-/**
- * rdma_restrack_kadd() - add kernel object to the reource tracking database
- * @res:  resource entry
- */
-void rdma_restrack_kadd(struct rdma_restrack_entry *res)
-{
-       res->task = NULL;
-       set_kern_name(res);
-       res->user = false;
-       rdma_restrack_add(res);
-}
-EXPORT_SYMBOL(rdma_restrack_kadd);
-
-/**
- * rdma_restrack_uadd() - add user object to the reource tracking database
- * @res:  resource entry
- */
-void rdma_restrack_uadd(struct rdma_restrack_entry *res)
-{
-       if ((res->type != RDMA_RESTRACK_CM_ID) &&
-           (res->type != RDMA_RESTRACK_COUNTER))
-               res->task = NULL;
-
-       if (!res->task)
-               rdma_restrack_set_task(res, NULL);
-       res->kern_name = NULL;
-
-       res->user = true;
-       rdma_restrack_add(res);
-}
-EXPORT_SYMBOL(rdma_restrack_uadd);
+EXPORT_SYMBOL(rdma_restrack_add);
 
 int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
 {
@@ -305,6 +287,10 @@ static void restrack_release(struct kref *kref)
        struct rdma_restrack_entry *res;
 
        res = container_of(kref, struct rdma_restrack_entry, kref);
+       if (res->task) {
+               put_task_struct(res->task);
+               res->task = NULL;
+       }
        complete(&res->comp);
 }
 
@@ -314,14 +300,23 @@ int rdma_restrack_put(struct rdma_restrack_entry *res)
 }
 EXPORT_SYMBOL(rdma_restrack_put);
 
+/**
+ * rdma_restrack_del() - delete object from the reource tracking database
+ * @res:  resource entry
+ */
 void rdma_restrack_del(struct rdma_restrack_entry *res)
 {
        struct rdma_restrack_entry *old;
        struct rdma_restrack_root *rt;
        struct ib_device *dev;
 
-       if (!res->valid)
-               goto out;
+       if (!res->valid) {
+               if (res->task) {
+                       put_task_struct(res->task);
+                       res->task = NULL;
+               }
+               return;
+       }
 
        dev = res_to_dev(res);
        if (WARN_ON(!dev))
@@ -330,16 +325,12 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
        rt = &dev->res[res->type];
 
        old = xa_erase(&rt->xa, res->id);
+       if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP)
+               return;
        WARN_ON(old != res);
        res->valid = false;
 
        rdma_restrack_put(res);
        wait_for_completion(&res->comp);
-
-out:
-       if (res->task) {
-               put_task_struct(res->task);
-               res->task = NULL;
-       }
 }
 EXPORT_SYMBOL(rdma_restrack_del);
index d084e5f..6a04fc4 100644 (file)
@@ -25,6 +25,12 @@ struct rdma_restrack_root {
 
 int rdma_restrack_init(struct ib_device *dev);
 void rdma_restrack_clean(struct ib_device *dev);
-void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
-                              struct task_struct *task);
+void rdma_restrack_add(struct rdma_restrack_entry *res);
+void rdma_restrack_del(struct rdma_restrack_entry *res);
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+                      enum rdma_restrack_type type);
+void rdma_restrack_set_name(struct rdma_restrack_entry *res,
+                           const char *caller);
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+                              const struct rdma_restrack_entry *parent);
 #endif /* _RDMA_CORE_RESTRACK_H_ */
index c11e505..914cdde 100644 (file)
@@ -59,7 +59,7 @@ struct ib_port {
        struct gid_attr_group *gid_attr_group;
        struct attribute_group gid_group;
        struct attribute_group *pkey_group;
-       struct attribute_group *pma_table;
+       const struct attribute_group *pma_table;
        struct attribute_group *hw_stats_ag;
        struct rdma_hw_stats   *hw_stats;
        u8                     port_num;
@@ -387,7 +387,8 @@ static ssize_t _show_port_gid_attr(
 
        gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
        if (IS_ERR(gid_attr))
-               return PTR_ERR(gid_attr);
+               /* -EINVAL is returned for user space compatibility reasons. */
+               return -EINVAL;
 
        ret = print(gid_attr, buf);
        rdma_put_gid_attr(gid_attr);
@@ -653,17 +654,17 @@ static struct attribute *pma_attrs_noietf[] = {
        NULL
 };
 
-static struct attribute_group pma_group = {
+static const struct attribute_group pma_group = {
        .name  = "counters",
        .attrs  = pma_attrs
 };
 
-static struct attribute_group pma_group_ext = {
+static const struct attribute_group pma_group_ext = {
        .name  = "counters",
        .attrs  = pma_attrs_ext
 };
 
-static struct attribute_group pma_group_noietf = {
+static const struct attribute_group pma_group_noietf = {
        .name  = "counters",
        .attrs  = pma_attrs_noietf
 };
@@ -778,8 +779,8 @@ err:
  * Figure out which counter table to use depending on
  * the device capabilities.
  */
-static struct attribute_group *get_counter_table(struct ib_device *dev,
-                                                int port_num)
+static const struct attribute_group *get_counter_table(struct ib_device *dev,
+                                                      int port_num)
 {
        struct ib_class_port_info cpi;
 
index 1d184ea..08a6282 100644 (file)
@@ -80,7 +80,6 @@ struct ucma_file {
        struct list_head        ctx_list;
        struct list_head        event_list;
        wait_queue_head_t       poll_wait;
-       struct workqueue_struct *close_wq;
 };
 
 struct ucma_context {
@@ -88,7 +87,7 @@ struct ucma_context {
        struct completion       comp;
        refcount_t              ref;
        int                     events_reported;
-       int                     backlog;
+       atomic_t                backlog;
 
        struct ucma_file        *file;
        struct rdma_cm_id       *cm_id;
@@ -96,11 +95,6 @@ struct ucma_context {
        u64                     uid;
 
        struct list_head        list;
-       struct list_head        mc_list;
-       /* mark that device is in process of destroying the internal HW
-        * resources, protected by the ctx_table lock
-        */
-       int                     closing;
        /* sync between removal event and id destroy, protected by file mut */
        int                     destroying;
        struct work_struct      close_work;
@@ -113,23 +107,22 @@ struct ucma_multicast {
 
        u64                     uid;
        u8                      join_state;
-       struct list_head        list;
        struct sockaddr_storage addr;
 };
 
 struct ucma_event {
        struct ucma_context     *ctx;
+       struct ucma_context     *listen_ctx;
        struct ucma_multicast   *mc;
        struct list_head        list;
-       struct rdma_cm_id       *cm_id;
        struct rdma_ucm_event_resp resp;
-       struct work_struct      close_work;
 };
 
 static DEFINE_XARRAY_ALLOC(ctx_table);
 static DEFINE_XARRAY_ALLOC(multicast_table);
 
 static const struct file_operations ucma_fops;
+static int __destroy_id(struct ucma_context *ctx);
 
 static inline struct ucma_context *_ucma_find_context(int id,
                                                      struct ucma_file *file)
@@ -139,7 +132,7 @@ static inline struct ucma_context *_ucma_find_context(int id,
        ctx = xa_load(&ctx_table, id);
        if (!ctx)
                ctx = ERR_PTR(-ENOENT);
-       else if (ctx->file != file || !ctx->cm_id)
+       else if (ctx->file != file)
                ctx = ERR_PTR(-EINVAL);
        return ctx;
 }
@@ -150,12 +143,9 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
 
        xa_lock(&ctx_table);
        ctx = _ucma_find_context(id, file);
-       if (!IS_ERR(ctx)) {
-               if (ctx->closing)
-                       ctx = ERR_PTR(-EIO);
-               else
-                       refcount_inc(&ctx->ref);
-       }
+       if (!IS_ERR(ctx))
+               if (!refcount_inc_not_zero(&ctx->ref))
+                       ctx = ERR_PTR(-ENXIO);
        xa_unlock(&ctx_table);
        return ctx;
 }
@@ -183,14 +173,6 @@ static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id)
        return ctx;
 }
 
-static void ucma_close_event_id(struct work_struct *work)
-{
-       struct ucma_event *uevent_close =  container_of(work, struct ucma_event, close_work);
-
-       rdma_destroy_id(uevent_close->cm_id);
-       kfree(uevent_close);
-}
-
 static void ucma_close_id(struct work_struct *work)
 {
        struct ucma_context *ctx =  container_of(work, struct ucma_context, close_work);
@@ -203,6 +185,14 @@ static void ucma_close_id(struct work_struct *work)
        wait_for_completion(&ctx->comp);
        /* No new events will be generated after destroying the id. */
        rdma_destroy_id(ctx->cm_id);
+
+       /*
+        * At this point ctx->ref is zero so the only place the ctx can be is in
+        * a uevent or in __destroy_id(). Since the former doesn't touch
+        * ctx->cm_id and the latter sync cancels this, there is no races with
+        * this store.
+        */
+       ctx->cm_id = NULL;
 }
 
 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
@@ -216,39 +206,23 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
        INIT_WORK(&ctx->close_work, ucma_close_id);
        refcount_set(&ctx->ref, 1);
        init_completion(&ctx->comp);
-       INIT_LIST_HEAD(&ctx->mc_list);
+       /* So list_del() will work if we don't do ucma_finish_ctx() */
+       INIT_LIST_HEAD(&ctx->list);
        ctx->file = file;
        mutex_init(&ctx->mutex);
 
-       if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL))
-               goto error;
-
-       list_add_tail(&ctx->list, &file->ctx_list);
+       if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) {
+               kfree(ctx);
+               return NULL;
+       }
        return ctx;
-
-error:
-       kfree(ctx);
-       return NULL;
 }
 
-static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
+static void ucma_finish_ctx(struct ucma_context *ctx)
 {
-       struct ucma_multicast *mc;
-
-       mc = kzalloc(sizeof(*mc), GFP_KERNEL);
-       if (!mc)
-               return NULL;
-
-       mc->ctx = ctx;
-       if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL))
-               goto error;
-
-       list_add_tail(&mc->list, &ctx->mc_list);
-       return mc;
-
-error:
-       kfree(mc);
-       return NULL;
+       lockdep_assert_held(&ctx->file->mut);
+       list_add_tail(&ctx->list, &ctx->file->ctx_list);
+       xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL);
 }
 
 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
@@ -280,10 +254,15 @@ static void ucma_copy_ud_event(struct ib_device *device,
        dst->qkey = src->qkey;
 }
 
-static void ucma_set_event_context(struct ucma_context *ctx,
-                                  struct rdma_cm_event *event,
-                                  struct ucma_event *uevent)
+static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
+                                            struct rdma_cm_event *event)
 {
+       struct ucma_event *uevent;
+
+       uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
+       if (!uevent)
+               return NULL;
+
        uevent->ctx = ctx;
        switch (event->event) {
        case RDMA_CM_EVENT_MULTICAST_JOIN:
@@ -298,44 +277,56 @@ static void ucma_set_event_context(struct ucma_context *ctx,
                uevent->resp.id = ctx->id;
                break;
        }
+       uevent->resp.event = event->event;
+       uevent->resp.status = event->status;
+       if (ctx->cm_id->qp_type == IB_QPT_UD)
+               ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud,
+                                  &event->param.ud);
+       else
+               ucma_copy_conn_event(&uevent->resp.param.conn,
+                                    &event->param.conn);
+
+       uevent->resp.ece.vendor_id = event->ece.vendor_id;
+       uevent->resp.ece.attr_mod = event->ece.attr_mod;
+       return uevent;
 }
 
-/* Called with file->mut locked for the relevant context. */
-static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
+static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
+                                     struct rdma_cm_event *event)
 {
-       struct ucma_context *ctx = cm_id->context;
-       struct ucma_event *con_req_eve;
-       int event_found = 0;
+       struct ucma_context *listen_ctx = cm_id->context;
+       struct ucma_context *ctx;
+       struct ucma_event *uevent;
 
-       if (ctx->destroying)
-               return;
+       if (!atomic_add_unless(&listen_ctx->backlog, -1, 0))
+               return -ENOMEM;
+       ctx = ucma_alloc_ctx(listen_ctx->file);
+       if (!ctx)
+               goto err_backlog;
+       ctx->cm_id = cm_id;
 
-       /* only if context is pointing to cm_id that it owns it and can be
-        * queued to be closed, otherwise that cm_id is an inflight one that
-        * is part of that context event list pending to be detached and
-        * reattached to its new context as part of ucma_get_event,
-        * handled separately below.
-        */
-       if (ctx->cm_id == cm_id) {
-               xa_lock(&ctx_table);
-               ctx->closing = 1;
-               xa_unlock(&ctx_table);
-               queue_work(ctx->file->close_wq, &ctx->close_work);
-               return;
-       }
+       uevent = ucma_create_uevent(listen_ctx, event);
+       if (!uevent)
+               goto err_alloc;
+       uevent->listen_ctx = listen_ctx;
+       uevent->resp.id = ctx->id;
 
-       list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
-               if (con_req_eve->cm_id == cm_id &&
-                   con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
-                       list_del(&con_req_eve->list);
-                       INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
-                       queue_work(ctx->file->close_wq, &con_req_eve->close_work);
-                       event_found = 1;
-                       break;
-               }
-       }
-       if (!event_found)
-               pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
+       ctx->cm_id->context = ctx;
+
+       mutex_lock(&ctx->file->mut);
+       ucma_finish_ctx(ctx);
+       list_add_tail(&uevent->list, &ctx->file->event_list);
+       mutex_unlock(&ctx->file->mut);
+       wake_up_interruptible(&ctx->file->poll_wait);
+       return 0;
+
+err_alloc:
+       xa_erase(&ctx_table, ctx->id);
+       kfree(ctx);
+err_backlog:
+       atomic_inc(&listen_ctx->backlog);
+       /* Returning error causes the new ID to be destroyed */
+       return -ENOMEM;
 }
 
 static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -343,66 +334,38 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
 {
        struct ucma_event *uevent;
        struct ucma_context *ctx = cm_id->context;
-       int ret = 0;
-
-       uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
-       if (!uevent)
-               return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
 
-       mutex_lock(&ctx->file->mut);
-       uevent->cm_id = cm_id;
-       ucma_set_event_context(ctx, event, uevent);
-       uevent->resp.event = event->event;
-       uevent->resp.status = event->status;
-       if (cm_id->qp_type == IB_QPT_UD)
-               ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud,
-                                  &event->param.ud);
-       else
-               ucma_copy_conn_event(&uevent->resp.param.conn,
-                                    &event->param.conn);
-
-       uevent->resp.ece.vendor_id = event->ece.vendor_id;
-       uevent->resp.ece.attr_mod = event->ece.attr_mod;
-
-       if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
-               if (!ctx->backlog) {
-                       ret = -ENOMEM;
-                       kfree(uevent);
-                       goto out;
-               }
-               ctx->backlog--;
-       } else if (!ctx->uid || ctx->cm_id != cm_id) {
-               /*
-                * We ignore events for new connections until userspace has set
-                * their context.  This can only happen if an error occurs on a
-                * new connection before the user accepts it.  This is okay,
-                * since the accept will just fail later. However, we do need
-                * to release the underlying HW resources in case of a device
-                * removal event.
-                */
-               if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
-                       ucma_removal_event_handler(cm_id);
+       if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
+               return ucma_connect_event_handler(cm_id, event);
 
-               kfree(uevent);
-               goto out;
+       /*
+        * We ignore events for new connections until userspace has set their
+        * context.  This can only happen if an error occurs on a new connection
+        * before the user accepts it.  This is okay, since the accept will just
+        * fail later. However, we do need to release the underlying HW
+        * resources in case of a device removal event.
+        */
+       if (ctx->uid) {
+               uevent = ucma_create_uevent(ctx, event);
+               if (!uevent)
+                       return 0;
+
+               mutex_lock(&ctx->file->mut);
+               list_add_tail(&uevent->list, &ctx->file->event_list);
+               mutex_unlock(&ctx->file->mut);
+               wake_up_interruptible(&ctx->file->poll_wait);
        }
 
-       list_add_tail(&uevent->list, &ctx->file->event_list);
-       wake_up_interruptible(&ctx->file->poll_wait);
-       if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
-               ucma_removal_event_handler(cm_id);
-out:
-       mutex_unlock(&ctx->file->mut);
-       return ret;
+       if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying)
+               queue_work(system_unbound_wq, &ctx->close_work);
+       return 0;
 }
 
 static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
                              int in_len, int out_len)
 {
-       struct ucma_context *ctx;
        struct rdma_ucm_get_event cmd;
        struct ucma_event *uevent;
-       int ret = 0;
 
        /*
         * Old 32 bit user space does not send the 4 byte padding in the
@@ -429,35 +392,25 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
                mutex_lock(&file->mut);
        }
 
-       uevent = list_entry(file->event_list.next, struct ucma_event, list);
-
-       if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
-               ctx = ucma_alloc_ctx(file);
-               if (!ctx) {
-                       ret = -ENOMEM;
-                       goto done;
-               }
-               uevent->ctx->backlog++;
-               ctx->cm_id = uevent->cm_id;
-               ctx->cm_id->context = ctx;
-               uevent->resp.id = ctx->id;
-       }
+       uevent = list_first_entry(&file->event_list, struct ucma_event, list);
 
        if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &uevent->resp,
                         min_t(size_t, out_len, sizeof(uevent->resp)))) {
-               ret = -EFAULT;
-               goto done;
+               mutex_unlock(&file->mut);
+               return -EFAULT;
        }
 
        list_del(&uevent->list);
        uevent->ctx->events_reported++;
        if (uevent->mc)
                uevent->mc->events_reported++;
-       kfree(uevent);
-done:
+       if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
+               atomic_inc(&uevent->ctx->backlog);
        mutex_unlock(&file->mut);
-       return ret;
+
+       kfree(uevent);
+       return 0;
 }
 
 static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
@@ -498,58 +451,60 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
        if (ret)
                return ret;
 
-       mutex_lock(&file->mut);
        ctx = ucma_alloc_ctx(file);
-       mutex_unlock(&file->mut);
        if (!ctx)
                return -ENOMEM;
 
        ctx->uid = cmd.uid;
-       cm_id = __rdma_create_id(current->nsproxy->net_ns,
-                                ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
+       cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type);
        if (IS_ERR(cm_id)) {
                ret = PTR_ERR(cm_id);
                goto err1;
        }
+       ctx->cm_id = cm_id;
 
        resp.id = ctx->id;
        if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp))) {
-               ret = -EFAULT;
-               goto err2;
+               xa_erase(&ctx_table, ctx->id);
+               __destroy_id(ctx);
+               return -EFAULT;
        }
 
-       ctx->cm_id = cm_id;
+       mutex_lock(&file->mut);
+       ucma_finish_ctx(ctx);
+       mutex_unlock(&file->mut);
        return 0;
 
-err2:
-       rdma_destroy_id(cm_id);
 err1:
        xa_erase(&ctx_table, ctx->id);
-       mutex_lock(&file->mut);
-       list_del(&ctx->list);
-       mutex_unlock(&file->mut);
        kfree(ctx);
        return ret;
 }
 
 static void ucma_cleanup_multicast(struct ucma_context *ctx)
 {
-       struct ucma_multicast *mc, *tmp;
+       struct ucma_multicast *mc;
+       unsigned long index;
 
-       mutex_lock(&ctx->file->mut);
-       list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
-               list_del(&mc->list);
-               xa_erase(&multicast_table, mc->id);
+       xa_for_each(&multicast_table, index, mc) {
+               if (mc->ctx != ctx)
+                       continue;
+               /*
+                * At this point mc->ctx->ref is 0 so the mc cannot leave the
+                * lock on the reader and this is enough serialization
+                */
+               xa_erase(&multicast_table, index);
                kfree(mc);
        }
-       mutex_unlock(&ctx->file->mut);
 }
 
 static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
 {
        struct ucma_event *uevent, *tmp;
 
+       rdma_lock_handler(mc->ctx->cm_id);
+       mutex_lock(&mc->ctx->file->mut);
        list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
                if (uevent->mc != mc)
                        continue;
@@ -557,6 +512,8 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
                list_del(&uevent->list);
                kfree(uevent);
        }
+       mutex_unlock(&mc->ctx->file->mut);
+       rdma_unlock_handler(mc->ctx->cm_id);
 }
 
 /*
@@ -564,10 +521,6 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
  * this point, no new events will be reported from the hardware. However, we
  * still need to cleanup the UCMA context for this ID. Specifically, there
  * might be events that have not yet been consumed by the user space software.
- * These might include pending connect requests which we have not completed
- * processing.  We cannot call rdma_destroy_id while holding the lock of the
- * context (file->mut), as it might cause a deadlock. We therefore extract all
- * relevant events from the context pending events list while holding the
  * mutex. After that we release them as needed.
  */
 static int ucma_free_ctx(struct ucma_context *ctx)
@@ -576,31 +529,56 @@ static int ucma_free_ctx(struct ucma_context *ctx)
        struct ucma_event *uevent, *tmp;
        LIST_HEAD(list);
 
-
        ucma_cleanup_multicast(ctx);
 
        /* Cleanup events not yet reported to the user. */
        mutex_lock(&ctx->file->mut);
        list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
-               if (uevent->ctx == ctx)
+               if (uevent->ctx == ctx || uevent->listen_ctx == ctx)
                        list_move_tail(&uevent->list, &list);
        }
        list_del(&ctx->list);
+       events_reported = ctx->events_reported;
        mutex_unlock(&ctx->file->mut);
 
+       /*
+        * If this was a listening ID then any connections spawned from it
+        * that have not been delivered to userspace are cleaned up too.
+        * Must be done outside any locks.
+        */
        list_for_each_entry_safe(uevent, tmp, &list, list) {
                list_del(&uevent->list);
                if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
-                       rdma_destroy_id(uevent->cm_id);
+                       __destroy_id(uevent->ctx);
                kfree(uevent);
        }
 
-       events_reported = ctx->events_reported;
        mutex_destroy(&ctx->mutex);
        kfree(ctx);
        return events_reported;
 }
 
+static int __destroy_id(struct ucma_context *ctx)
+{
+       /*
+        * If the refcount is already 0 then ucma_close_id() has already
+        * destroyed the cm_id, otherwise holding the refcount keeps cm_id
+        * valid. Prevent queue_work() from being called.
+        */
+       if (refcount_inc_not_zero(&ctx->ref)) {
+               rdma_lock_handler(ctx->cm_id);
+               ctx->destroying = 1;
+               rdma_unlock_handler(ctx->cm_id);
+               ucma_put_ctx(ctx);
+       }
+
+       cancel_work_sync(&ctx->close_work);
+       /* At this point it's guaranteed that there is no inflight closing task */
+       if (ctx->cm_id)
+               ucma_close_id(&ctx->close_work);
+       return ucma_free_ctx(ctx);
+}
+
 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
                               int in_len, int out_len)
 {
@@ -624,24 +602,7 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
-       mutex_lock(&ctx->file->mut);
-       ctx->destroying = 1;
-       mutex_unlock(&ctx->file->mut);
-
-       flush_workqueue(ctx->file->close_wq);
-       /* At this point it's guaranteed that there is no inflight
-        * closing task */
-       xa_lock(&ctx_table);
-       if (!ctx->closing) {
-               xa_unlock(&ctx_table);
-               ucma_put_ctx(ctx);
-               wait_for_completion(&ctx->comp);
-               rdma_destroy_id(ctx->cm_id);
-       } else {
-               xa_unlock(&ctx_table);
-       }
-
-       resp.events_reported = ucma_free_ctx(ctx);
+       resp.events_reported = __destroy_id(ctx);
        if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp)))
                ret = -EFAULT;
@@ -1124,10 +1085,12 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
-       ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
-                      cmd.backlog : max_backlog;
+       if (cmd.backlog <= 0 || cmd.backlog > max_backlog)
+               cmd.backlog = max_backlog;
+       atomic_set(&ctx->backlog, cmd.backlog);
+
        mutex_lock(&ctx->mutex);
-       ret = rdma_listen(ctx->cm_id, ctx->backlog);
+       ret = rdma_listen(ctx->cm_id, cmd.backlog);
        mutex_unlock(&ctx->mutex);
        ucma_put_ctx(ctx);
        return ret;
@@ -1160,16 +1123,20 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
 
        if (cmd.conn_param.valid) {
                ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
-               mutex_lock(&file->mut);
                mutex_lock(&ctx->mutex);
-               ret = __rdma_accept_ece(ctx->cm_id, &conn_param, NULL, &ece);
-               mutex_unlock(&ctx->mutex);
-               if (!ret)
+               rdma_lock_handler(ctx->cm_id);
+               ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece);
+               if (!ret) {
+                       /* The uid must be set atomically with the handler */
                        ctx->uid = cmd.uid;
-               mutex_unlock(&file->mut);
+               }
+               rdma_unlock_handler(ctx->cm_id);
+               mutex_unlock(&ctx->mutex);
        } else {
                mutex_lock(&ctx->mutex);
-               ret = __rdma_accept_ece(ctx->cm_id, NULL, NULL, &ece);
+               rdma_lock_handler(ctx->cm_id);
+               ret = rdma_accept_ece(ctx->cm_id, NULL, &ece);
+               rdma_unlock_handler(ctx->cm_id);
                mutex_unlock(&ctx->mutex);
        }
        ucma_put_ctx(ctx);
@@ -1482,44 +1449,52 @@ static ssize_t ucma_process_join(struct ucma_file *file,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
-       mutex_lock(&file->mut);
-       mc = ucma_alloc_multicast(ctx);
+       mc = kzalloc(sizeof(*mc), GFP_KERNEL);
        if (!mc) {
                ret = -ENOMEM;
-               goto err1;
+               goto err_put_ctx;
        }
+
+       mc->ctx = ctx;
        mc->join_state = join_state;
        mc->uid = cmd->uid;
        memcpy(&mc->addr, addr, cmd->addr_size);
+
+       if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
+                    GFP_KERNEL)) {
+               ret = -ENOMEM;
+               goto err_free_mc;
+       }
+
        mutex_lock(&ctx->mutex);
        ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
                                  join_state, mc);
        mutex_unlock(&ctx->mutex);
        if (ret)
-               goto err2;
+               goto err_xa_erase;
 
        resp.id = mc->id;
        if (copy_to_user(u64_to_user_ptr(cmd->response),
                         &resp, sizeof(resp))) {
                ret = -EFAULT;
-               goto err3;
+               goto err_leave_multicast;
        }
 
        xa_store(&multicast_table, mc->id, mc, 0);
 
-       mutex_unlock(&file->mut);
        ucma_put_ctx(ctx);
        return 0;
 
-err3:
+err_leave_multicast:
+       mutex_lock(&ctx->mutex);
        rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
+       mutex_unlock(&ctx->mutex);
        ucma_cleanup_mc_events(mc);
-err2:
+err_xa_erase:
        xa_erase(&multicast_table, mc->id);
-       list_del(&mc->list);
+err_free_mc:
        kfree(mc);
-err1:
-       mutex_unlock(&file->mut);
+err_put_ctx:
        ucma_put_ctx(ctx);
        return ret;
 }
@@ -1581,7 +1556,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
        mc = xa_load(&multicast_table, cmd.id);
        if (!mc)
                mc = ERR_PTR(-ENOENT);
-       else if (mc->ctx->file != file)
+       else if (READ_ONCE(mc->ctx->file) != file)
                mc = ERR_PTR(-EINVAL);
        else if (!refcount_inc_not_zero(&mc->ctx->ref))
                mc = ERR_PTR(-ENXIO);
@@ -1598,10 +1573,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
        rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
        mutex_unlock(&mc->ctx->mutex);
 
-       mutex_lock(&mc->ctx->file->mut);
        ucma_cleanup_mc_events(mc);
-       list_del(&mc->list);
-       mutex_unlock(&mc->ctx->file->mut);
 
        ucma_put_ctx(mc->ctx);
        resp.events_reported = mc->events_reported;
@@ -1614,45 +1586,15 @@ out:
        return ret;
 }
 
-static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
-       /* Acquire mutex's based on pointer comparison to prevent deadlock. */
-       if (file1 < file2) {
-               mutex_lock(&file1->mut);
-               mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING);
-       } else {
-               mutex_lock(&file2->mut);
-               mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING);
-       }
-}
-
-static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
-       if (file1 < file2) {
-               mutex_unlock(&file2->mut);
-               mutex_unlock(&file1->mut);
-       } else {
-               mutex_unlock(&file1->mut);
-               mutex_unlock(&file2->mut);
-       }
-}
-
-static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
-{
-       struct ucma_event *uevent, *tmp;
-
-       list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
-               if (uevent->ctx == ctx)
-                       list_move_tail(&uevent->list, &file->event_list);
-}
-
 static ssize_t ucma_migrate_id(struct ucma_file *new_file,
                               const char __user *inbuf,
                               int in_len, int out_len)
 {
        struct rdma_ucm_migrate_id cmd;
        struct rdma_ucm_migrate_resp resp;
+       struct ucma_event *uevent, *tmp;
        struct ucma_context *ctx;
+       LIST_HEAD(event_list);
        struct fd f;
        struct ucma_file *cur_file;
        int ret = 0;
@@ -1668,40 +1610,53 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
                ret = -EINVAL;
                goto file_put;
        }
+       cur_file = f.file->private_data;
 
        /* Validate current fd and prevent destruction of id. */
-       ctx = ucma_get_ctx(f.file->private_data, cmd.id);
+       ctx = ucma_get_ctx(cur_file, cmd.id);
        if (IS_ERR(ctx)) {
                ret = PTR_ERR(ctx);
                goto file_put;
        }
 
-       cur_file = ctx->file;
-       if (cur_file == new_file) {
-               resp.events_reported = ctx->events_reported;
-               goto response;
-       }
-
+       rdma_lock_handler(ctx->cm_id);
        /*
-        * Migrate events between fd's, maintaining order, and avoiding new
-        * events being added before existing events.
+        * ctx->file can only be changed under the handler & xa_lock. xa_load()
+        * must be checked again to ensure the ctx hasn't begun destruction
+        * since the ucma_get_ctx().
         */
-       ucma_lock_files(cur_file, new_file);
        xa_lock(&ctx_table);
-
-       list_move_tail(&ctx->list, &new_file->ctx_list);
-       ucma_move_events(ctx, new_file);
+       if (_ucma_find_context(cmd.id, cur_file) != ctx) {
+               xa_unlock(&ctx_table);
+               ret = -ENOENT;
+               goto err_unlock;
+       }
        ctx->file = new_file;
+       xa_unlock(&ctx_table);
+
+       mutex_lock(&cur_file->mut);
+       list_del(&ctx->list);
+       /*
+        * At this point lock_handler() prevents addition of new uevents for
+        * this ctx.
+        */
+       list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list)
+               if (uevent->ctx == ctx)
+                       list_move_tail(&uevent->list, &event_list);
        resp.events_reported = ctx->events_reported;
+       mutex_unlock(&cur_file->mut);
 
-       xa_unlock(&ctx_table);
-       ucma_unlock_files(cur_file, new_file);
+       mutex_lock(&new_file->mut);
+       list_add_tail(&ctx->list, &new_file->ctx_list);
+       list_splice_tail(&event_list, &new_file->event_list);
+       mutex_unlock(&new_file->mut);
 
-response:
        if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp)))
                ret = -EFAULT;
 
+err_unlock:
+       rdma_unlock_handler(ctx->cm_id);
        ucma_put_ctx(ctx);
 file_put:
        fdput(f);
@@ -1801,13 +1756,6 @@ static int ucma_open(struct inode *inode, struct file *filp)
        if (!file)
                return -ENOMEM;
 
-       file->close_wq = alloc_ordered_workqueue("ucma_close_id",
-                                                WQ_MEM_RECLAIM);
-       if (!file->close_wq) {
-               kfree(file);
-               return -ENOMEM;
-       }
-
        INIT_LIST_HEAD(&file->event_list);
        INIT_LIST_HEAD(&file->ctx_list);
        init_waitqueue_head(&file->poll_wait);
@@ -1822,37 +1770,22 @@ static int ucma_open(struct inode *inode, struct file *filp)
 static int ucma_close(struct inode *inode, struct file *filp)
 {
        struct ucma_file *file = filp->private_data;
-       struct ucma_context *ctx, *tmp;
 
-       mutex_lock(&file->mut);
-       list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
-               ctx->destroying = 1;
-               mutex_unlock(&file->mut);
+       /*
+        * All paths that touch ctx_list or ctx_list starting from write() are
+        * prevented by this being a FD release function. The list_add_tail() in
+        * ucma_connect_event_handler() can run concurrently, however it only
+        * adds to the list *after* a listening ID. By only reading the first of
+        * the list, and relying on __destroy_id() to block
+        * ucma_connect_event_handler(), no additional locking is needed.
+        */
+       while (!list_empty(&file->ctx_list)) {
+               struct ucma_context *ctx = list_first_entry(
+                       &file->ctx_list, struct ucma_context, list);
 
                xa_erase(&ctx_table, ctx->id);
-               flush_workqueue(file->close_wq);
-               /* At that step once ctx was marked as destroying and workqueue
-                * was flushed we are safe from any inflights handlers that
-                * might put other closing task.
-                */
-               xa_lock(&ctx_table);
-               if (!ctx->closing) {
-                       xa_unlock(&ctx_table);
-                       ucma_put_ctx(ctx);
-                       wait_for_completion(&ctx->comp);
-                       /* rdma_destroy_id ensures that no event handlers are
-                        * inflight for that id before releasing it.
-                        */
-                       rdma_destroy_id(ctx->cm_id);
-               } else {
-                       xa_unlock(&ctx_table);
-               }
-
-               ucma_free_ctx(ctx);
-               mutex_lock(&file->mut);
+               __destroy_id(ctx);
        }
-       mutex_unlock(&file->mut);
-       destroy_workqueue(file->close_wq);
        kfree(file);
        return 0;
 }
index 52c55e9..e9fecbd 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
+#include <linux/count_zeros.h>
 #include <rdma/ib_umem_odp.h>
 
 #include "uverbs.h"
@@ -79,18 +80,28 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
                                     unsigned long virt)
 {
        struct scatterlist *sg;
-       unsigned int best_pg_bit;
        unsigned long va, pgoff;
        dma_addr_t mask;
        int i;
 
+       /* rdma_for_each_block() has a bug if the page size is smaller than the
+        * page size used to build the umem. For now prevent smaller page sizes
+        * from being returned.
+        */
+       pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
+
        /* At minimum, drivers must support PAGE_SIZE or smaller */
        if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0))))
                return 0;
 
-       va = virt;
-       /* max page size not to exceed MR length */
-       mask = roundup_pow_of_two(umem->length);
+       umem->iova = va = virt;
+       /* The best result is the smallest page size that results in the minimum
+        * number of required pages. Compute the largest page size that could
+        * work based on VA address bits that don't change.
+        */
+       mask = pgsz_bitmap &
+              GENMASK(BITS_PER_LONG - 1,
+                      bits_per((umem->length - 1 + virt) ^ virt));
        /* offset into first SGL */
        pgoff = umem->address & ~PAGE_MASK;
 
@@ -108,9 +119,14 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
                        mask |= va;
                pgoff = 0;
        }
-       best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
 
-       return BIT_ULL(best_pg_bit);
+       /* The mask accumulates 1's in each position where the VA and physical
+        * address differ, thus the length of trailing 0 is the largest page
+        * size that can pass the VA through to the physical.
+        */
+       if (mask)
+               pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
+       return rounddown_pow_of_two(pgsz_bitmap);
 }
 EXPORT_SYMBOL(ib_umem_find_best_pgsz);
 
@@ -157,6 +173,11 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
        umem->ibdev      = device;
        umem->length     = size;
        umem->address    = addr;
+       /*
+        * Drivers should call ib_umem_find_best_pgsz() to set the iova
+        * correctly.
+        */
+       umem->iova = addr;
        umem->writable   = ib_access_writable(access);
        umem->owning_mm = mm = current->mm;
        mmgrab(mm);
@@ -259,18 +280,6 @@ void ib_umem_release(struct ib_umem *umem)
 }
 EXPORT_SYMBOL(ib_umem_release);
 
-int ib_umem_page_count(struct ib_umem *umem)
-{
-       int i, n = 0;
-       struct scatterlist *sg;
-
-       for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
-               n += sg_dma_len(sg) >> PAGE_SHIFT;
-
-       return n;
-}
-EXPORT_SYMBOL(ib_umem_page_count);
-
 /*
  * Copy from the given ib_umem's pages to the given buffer.
  *
index cc6b4be..323f6cf 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/vmalloc.h>
 #include <linux/hugetlb.h>
 #include <linux/interval_tree.h>
+#include <linux/hmm.h>
 #include <linux/pagemap.h>
 
 #include <rdma/ib_verbs.h>
@@ -60,7 +61,7 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
                size_t page_size = 1UL << umem_odp->page_shift;
                unsigned long start;
                unsigned long end;
-               size_t pages;
+               size_t ndmas, npfns;
 
                start = ALIGN_DOWN(umem_odp->umem.address, page_size);
                if (check_add_overflow(umem_odp->umem.address,
@@ -71,20 +72,21 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
                if (unlikely(end < page_size))
                        return -EOVERFLOW;
 
-               pages = (end - start) >> umem_odp->page_shift;
-               if (!pages)
+               ndmas = (end - start) >> umem_odp->page_shift;
+               if (!ndmas)
                        return -EINVAL;
 
-               umem_odp->page_list = kvcalloc(
-                       pages, sizeof(*umem_odp->page_list), GFP_KERNEL);
-               if (!umem_odp->page_list)
+               npfns = (end - start) >> PAGE_SHIFT;
+               umem_odp->pfn_list = kvcalloc(
+                       npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL);
+               if (!umem_odp->pfn_list)
                        return -ENOMEM;
 
                umem_odp->dma_list = kvcalloc(
-                       pages, sizeof(*umem_odp->dma_list), GFP_KERNEL);
+                       ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL);
                if (!umem_odp->dma_list) {
                        ret = -ENOMEM;
-                       goto out_page_list;
+                       goto out_pfn_list;
                }
 
                ret = mmu_interval_notifier_insert(&umem_odp->notifier,
@@ -98,8 +100,8 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 
 out_dma_list:
        kvfree(umem_odp->dma_list);
-out_page_list:
-       kvfree(umem_odp->page_list);
+out_pfn_list:
+       kvfree(umem_odp->pfn_list);
        return ret;
 }
 
@@ -276,7 +278,7 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
                mutex_unlock(&umem_odp->umem_mutex);
                mmu_interval_notifier_remove(&umem_odp->notifier);
                kvfree(umem_odp->dma_list);
-               kvfree(umem_odp->page_list);
+               kvfree(umem_odp->pfn_list);
        }
        put_pid(umem_odp->tgid);
        kfree(umem_odp);
@@ -287,87 +289,56 @@ EXPORT_SYMBOL(ib_umem_odp_release);
  * Map for DMA and insert a single page into the on-demand paging page tables.
  *
  * @umem: the umem to insert the page to.
- * @page_index: index in the umem to add the page to.
+ * @dma_index: index in the umem to add the dma to.
  * @page: the page struct to map and add.
  * @access_mask: access permissions needed for this page.
  * @current_seq: sequence number for synchronization with invalidations.
  *               the sequence number is taken from
  *               umem_odp->notifiers_seq.
  *
- * The function returns -EFAULT if the DMA mapping operation fails. It returns
- * -EAGAIN if a concurrent invalidation prevents us from updating the page.
+ * The function returns -EFAULT if the DMA mapping operation fails.
  *
- * The page is released via put_page even if the operation failed. For on-demand
- * pinning, the page is released whenever it isn't stored in the umem.
  */
 static int ib_umem_odp_map_dma_single_page(
                struct ib_umem_odp *umem_odp,
-               unsigned int page_index,
+               unsigned int dma_index,
                struct page *page,
-               u64 access_mask,
-               unsigned long current_seq)
+               u64 access_mask)
 {
        struct ib_device *dev = umem_odp->umem.ibdev;
-       dma_addr_t dma_addr;
-       int ret = 0;
+       dma_addr_t *dma_addr = &umem_odp->dma_list[dma_index];
 
-       if (mmu_interval_check_retry(&umem_odp->notifier, current_seq)) {
-               ret = -EAGAIN;
-               goto out;
-       }
-       if (!(umem_odp->dma_list[page_index])) {
-               dma_addr =
-                       ib_dma_map_page(dev, page, 0, BIT(umem_odp->page_shift),
-                                       DMA_BIDIRECTIONAL);
-               if (ib_dma_mapping_error(dev, dma_addr)) {
-                       ret = -EFAULT;
-                       goto out;
-               }
-               umem_odp->dma_list[page_index] = dma_addr | access_mask;
-               umem_odp->page_list[page_index] = page;
-               umem_odp->npages++;
-       } else if (umem_odp->page_list[page_index] == page) {
-               umem_odp->dma_list[page_index] |= access_mask;
-       } else {
+       if (*dma_addr) {
                /*
-                * This is a race here where we could have done:
-                *
-                *         CPU0                             CPU1
-                *   get_user_pages()
-                *                                       invalidate()
-                *                                       page_fault()
-                *   mutex_lock(umem_mutex)
-                *    page from GUP != page in ODP
-                *
-                * It should be prevented by the retry test above as reading
-                * the seq number should be reliable under the
-                * umem_mutex. Thus something is really not working right if
-                * things get here.
+                * If the page is already dma mapped it means it went through
+                * a non-invalidating trasition, like read-only to writable.
+                * Resync the flags.
                 */
-               WARN(true,
-                    "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
-                    umem_odp->page_list[page_index], page);
-               ret = -EAGAIN;
+               *dma_addr = (*dma_addr & ODP_DMA_ADDR_MASK) | access_mask;
+               return 0;
        }
 
-out:
-       put_page(page);
-       return ret;
+       *dma_addr = ib_dma_map_page(dev, page, 0, 1 << umem_odp->page_shift,
+                                   DMA_BIDIRECTIONAL);
+       if (ib_dma_mapping_error(dev, *dma_addr)) {
+               *dma_addr = 0;
+               return -EFAULT;
+       }
+       umem_odp->npages++;
+       *dma_addr |= access_mask;
+       return 0;
 }
 
 /**
- * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR.
+ * ib_umem_odp_map_dma_and_lock - DMA map userspace memory in an ODP MR and lock it.
  *
- * Pins the range of pages passed in the argument, and maps them to
- * DMA addresses. The DMA addresses of the mapped pages is updated in
- * umem_odp->dma_list.
+ * Maps the range passed in the argument to DMA addresses.
+ * The DMA addresses of the mapped pages is updated in umem_odp->dma_list.
+ * Upon success the ODP MR will be locked to let caller complete its device
+ * page table update.
  *
  * Returns the number of pages mapped in success, negative error code
  * for failure.
- * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
- * the function from completing its task.
- * An -ENOENT error code indicates that userspace process is being terminated
- * and mm was already destroyed.
  * @umem_odp: the umem to map and pin
  * @user_virt: the address from which we need to map.
  * @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -376,21 +347,19 @@ out:
  *        the return value.
  * @access_mask: bit mask of the requested access permissions for the given
  *               range.
- * @current_seq: the MMU notifiers sequance value for synchronization with
- *               invalidations. the sequance number is read from
- *               umem_odp->notifiers_seq before calling this function
+ * @fault: is faulting required for the given range
  */
-int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
-                             u64 bcnt, u64 access_mask,
-                             unsigned long current_seq)
+int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
+                                u64 bcnt, u64 access_mask, bool fault)
+                       __acquires(&umem_odp->umem_mutex)
 {
        struct task_struct *owning_process  = NULL;
        struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
-       struct page       **local_page_list = NULL;
-       u64 page_mask, off;
-       int j, k, ret = 0, start_idx, npages = 0;
-       unsigned int flags = 0, page_shift;
-       phys_addr_t p = 0;
+       int pfn_index, dma_index, ret = 0, start_idx;
+       unsigned int page_shift, hmm_order, pfn_start_idx;
+       unsigned long num_pfns, current_seq;
+       struct hmm_range range = {};
+       unsigned long timeout;
 
        if (access_mask == 0)
                return -EINVAL;
@@ -399,15 +368,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
            user_virt + bcnt > ib_umem_end(umem_odp))
                return -EFAULT;
 
-       local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
-       if (!local_page_list)
-               return -ENOMEM;
-
        page_shift = umem_odp->page_shift;
-       page_mask = ~(BIT(page_shift) - 1);
-       off = user_virt & (~page_mask);
-       user_virt = user_virt & page_mask;
-       bcnt += off; /* Charge for the first page offset as well. */
 
        /*
         * owning_process is allowed to be NULL, this means somehow the mm is
@@ -420,99 +381,104 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
                goto out_put_task;
        }
 
-       if (access_mask & ODP_WRITE_ALLOWED_BIT)
-               flags |= FOLL_WRITE;
+       range.notifier = &umem_odp->notifier;
+       range.start = ALIGN_DOWN(user_virt, 1UL << page_shift);
+       range.end = ALIGN(user_virt + bcnt, 1UL << page_shift);
+       pfn_start_idx = (range.start - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
+       num_pfns = (range.end - range.start) >> PAGE_SHIFT;
+       if (fault) {
+               range.default_flags = HMM_PFN_REQ_FAULT;
 
-       start_idx = (user_virt - ib_umem_start(umem_odp)) >> page_shift;
-       k = start_idx;
+               if (access_mask & ODP_WRITE_ALLOWED_BIT)
+                       range.default_flags |= HMM_PFN_REQ_WRITE;
+       }
 
-       while (bcnt > 0) {
-               const size_t gup_num_pages = min_t(size_t,
-                               ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
-                               PAGE_SIZE / sizeof(struct page *));
+       range.hmm_pfns = &(umem_odp->pfn_list[pfn_start_idx]);
+       timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
 
-               mmap_read_lock(owning_mm);
-               /*
-                * Note: this might result in redundent page getting. We can
-                * avoid this by checking dma_list to be 0 before calling
-                * get_user_pages. However, this make the code much more
-                * complex (and doesn't gain us much performance in most use
-                * cases).
-                */
-               npages = get_user_pages_remote(owning_mm,
-                               user_virt, gup_num_pages,
-                               flags, local_page_list, NULL, NULL);
-               mmap_read_unlock(owning_mm);
-
-               if (npages < 0) {
-                       if (npages != -EAGAIN)
-                               pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
-                       else
-                               pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
-                       break;
-               }
+retry:
+       current_seq = range.notifier_seq =
+               mmu_interval_read_begin(&umem_odp->notifier);
 
-               bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
-               mutex_lock(&umem_odp->umem_mutex);
-               for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
-                       if (user_virt & ~page_mask) {
-                               p += PAGE_SIZE;
-                               if (page_to_phys(local_page_list[j]) != p) {
-                                       ret = -EFAULT;
-                                       break;
-                               }
-                               put_page(local_page_list[j]);
-                               continue;
-                       }
+       mmap_read_lock(owning_mm);
+       ret = hmm_range_fault(&range);
+       mmap_read_unlock(owning_mm);
+       if (unlikely(ret)) {
+               if (ret == -EBUSY && !time_after(jiffies, timeout))
+                       goto retry;
+               goto out_put_mm;
+       }
 
-                       ret = ib_umem_odp_map_dma_single_page(
-                                       umem_odp, k, local_page_list[j],
-                                       access_mask, current_seq);
-                       if (ret < 0) {
-                               if (ret != -EAGAIN)
-                                       pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
-                               else
-                                       pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
-                               break;
-                       }
+       start_idx = (range.start - ib_umem_start(umem_odp)) >> page_shift;
+       dma_index = start_idx;
 
-                       p = page_to_phys(local_page_list[j]);
-                       k++;
-               }
+       mutex_lock(&umem_odp->umem_mutex);
+       if (mmu_interval_read_retry(&umem_odp->notifier, current_seq)) {
                mutex_unlock(&umem_odp->umem_mutex);
+               goto retry;
+       }
 
-               if (ret < 0) {
+       for (pfn_index = 0; pfn_index < num_pfns;
+               pfn_index += 1 << (page_shift - PAGE_SHIFT), dma_index++) {
+
+               if (fault) {
                        /*
-                        * Release pages, remembering that the first page
-                        * to hit an error was already released by
-                        * ib_umem_odp_map_dma_single_page().
+                        * Since we asked for hmm_range_fault() to populate
+                        * pages it shouldn't return an error entry on success.
                         */
-                       if (npages - (j + 1) > 0)
-                               release_pages(&local_page_list[j+1],
-                                             npages - (j + 1));
+                       WARN_ON(range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
+                       WARN_ON(!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
+               } else {
+                       if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID)) {
+                               WARN_ON(umem_odp->dma_list[dma_index]);
+                               continue;
+                       }
+                       access_mask = ODP_READ_ALLOWED_BIT;
+                       if (range.hmm_pfns[pfn_index] & HMM_PFN_WRITE)
+                               access_mask |= ODP_WRITE_ALLOWED_BIT;
+               }
+
+               hmm_order = hmm_pfn_to_map_order(range.hmm_pfns[pfn_index]);
+               /* If a hugepage was detected and ODP wasn't set for, the umem
+                * page_shift will be used, the opposite case is an error.
+                */
+               if (hmm_order + PAGE_SHIFT < page_shift) {
+                       ret = -EINVAL;
+                       ibdev_dbg(umem_odp->umem.ibdev,
+                                 "%s: un-expected hmm_order %d, page_shift %d\n",
+                                 __func__, hmm_order, page_shift);
                        break;
                }
-       }
 
-       if (ret >= 0) {
-               if (npages < 0 && k == start_idx)
-                       ret = npages;
-               else
-                       ret = k - start_idx;
+               ret = ib_umem_odp_map_dma_single_page(
+                               umem_odp, dma_index, hmm_pfn_to_page(range.hmm_pfns[pfn_index]),
+                               access_mask);
+               if (ret < 0) {
+                       ibdev_dbg(umem_odp->umem.ibdev,
+                                 "ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
+                       break;
+               }
        }
+       /* upon sucesss lock should stay on hold for the callee */
+       if (!ret)
+               ret = dma_index - start_idx;
+       else
+               mutex_unlock(&umem_odp->umem_mutex);
 
+out_put_mm:
        mmput(owning_mm);
 out_put_task:
        if (owning_process)
                put_task_struct(owning_process);
-       free_page((unsigned long)local_page_list);
        return ret;
 }
-EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
+EXPORT_SYMBOL(ib_umem_odp_map_dma_and_lock);
 
 void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
                                 u64 bound)
 {
+       dma_addr_t dma_addr;
+       dma_addr_t dma;
        int idx;
        u64 addr;
        struct ib_device *dev = umem_odp->umem.ibdev;
@@ -521,20 +487,16 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 
        virt = max_t(u64, virt, ib_umem_start(umem_odp));
        bound = min_t(u64, bound, ib_umem_end(umem_odp));
-       /* Note that during the run of this function, the
-        * notifiers_count of the MR is > 0, preventing any racing
-        * faults from completion. We might be racing with other
-        * invalidations, so we must make sure we free each page only
-        * once. */
        for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
                idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
-               if (umem_odp->page_list[idx]) {
-                       struct page *page = umem_odp->page_list[idx];
-                       dma_addr_t dma = umem_odp->dma_list[idx];
-                       dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
+               dma = umem_odp->dma_list[idx];
 
-                       WARN_ON(!dma_addr);
+               /* The access flags guaranteed a valid DMA address in case was NULL */
+               if (dma) {
+                       unsigned long pfn_idx = (addr - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
+                       struct page *page = hmm_pfn_to_page(umem_odp->pfn_list[pfn_idx]);
 
+                       dma_addr = dma & ODP_DMA_ADDR_MASK;
                        ib_dma_unmap_page(dev, dma_addr,
                                          BIT(umem_odp->page_shift),
                                          DMA_BIDIRECTIONAL);
@@ -551,7 +513,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
                                 */
                                set_page_dirty(head_page);
                        }
-                       umem_odp->page_list[idx] = NULL;
                        umem_odp->dma_list[idx] = 0;
                        umem_odp->npages--;
                }
index 2fbc583..418d133 100644 (file)
@@ -218,10 +218,12 @@ int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs)
        if (!ucontext)
                return -ENOMEM;
 
-       ucontext->res.type = RDMA_RESTRACK_CTX;
        ucontext->device = ib_dev;
        ucontext->ufile = ufile;
        xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC);
+
+       rdma_restrack_new(&ucontext->res, RDMA_RESTRACK_CTX);
+       rdma_restrack_set_name(&ucontext->res, NULL);
        attrs->context = ucontext;
        return 0;
 }
@@ -250,7 +252,7 @@ int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
        if (ret)
                goto err_uncharge;
 
-       rdma_restrack_uadd(&ucontext->res);
+       rdma_restrack_add(&ucontext->res);
 
        /*
         * Make sure that ib_uverbs_get_ucontext() sees the pointer update
@@ -313,6 +315,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
 err_uobj:
        rdma_alloc_abort_uobject(uobj, attrs, false);
 err_ucontext:
+       rdma_restrack_put(&attrs->context->res);
        kfree(attrs->context);
        attrs->context = NULL;
        return ret;
@@ -439,12 +442,14 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
        pd->device  = ib_dev;
        pd->uobject = uobj;
        atomic_set(&pd->usecnt, 0);
-       pd->res.type = RDMA_RESTRACK_PD;
+
+       rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+       rdma_restrack_set_name(&pd->res, NULL);
 
        ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata);
        if (ret)
                goto err_alloc;
-       rdma_restrack_uadd(&pd->res);
+       rdma_restrack_add(&pd->res);
 
        uobj->object = pd;
        uobj_finalize_uobj_create(uobj, attrs);
@@ -453,6 +458,7 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
        return uverbs_response(attrs, &resp, sizeof(resp));
 
 err_alloc:
+       rdma_restrack_put(&pd->res);
        kfree(pd);
 err:
        uobj_alloc_abort(uobj, attrs);
@@ -742,9 +748,11 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
        mr->sig_attrs = NULL;
        mr->uobject = uobj;
        atomic_inc(&pd->usecnt);
-       mr->res.type = RDMA_RESTRACK_MR;
        mr->iova = cmd.hca_va;
-       rdma_restrack_uadd(&mr->res);
+
+       rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+       rdma_restrack_set_name(&mr->res, NULL);
+       rdma_restrack_add(&mr->res);
 
        uobj->object = mr;
        uobj_put_obj_read(pd);
@@ -858,7 +866,7 @@ static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
 static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_alloc_mw      cmd;
-       struct ib_uverbs_alloc_mw_resp resp;
+       struct ib_uverbs_alloc_mw_resp resp = {};
        struct ib_uobject             *uobj;
        struct ib_pd                  *pd;
        struct ib_mw                  *mw;
@@ -884,15 +892,21 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
                goto err_put;
        }
 
-       mw = pd->device->ops.alloc_mw(pd, cmd.mw_type, &attrs->driver_udata);
-       if (IS_ERR(mw)) {
-               ret = PTR_ERR(mw);
+       mw = rdma_zalloc_drv_obj(ib_dev, ib_mw);
+       if (!mw) {
+               ret = -ENOMEM;
                goto err_put;
        }
 
-       mw->device  = pd->device;
-       mw->pd      = pd;
+       mw->device = ib_dev;
+       mw->pd = pd;
        mw->uobject = uobj;
+       mw->type = cmd.mw_type;
+
+       ret = pd->device->ops.alloc_mw(mw, &attrs->driver_udata);
+       if (ret)
+               goto err_alloc;
+
        atomic_inc(&pd->usecnt);
 
        uobj->object = mw;
@@ -903,6 +917,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
        resp.mw_handle = uobj->id;
        return uverbs_response(attrs, &resp, sizeof(resp));
 
+err_alloc:
+       kfree(mw);
 err_put:
        uobj_put_obj_read(pd);
 err_free:
@@ -994,12 +1010,14 @@ static int create_cq(struct uverbs_attr_bundle *attrs,
        cq->event_handler = ib_uverbs_cq_event_handler;
        cq->cq_context    = ev_file ? &ev_file->ev_queue : NULL;
        atomic_set(&cq->usecnt, 0);
-       cq->res.type = RDMA_RESTRACK_CQ;
+
+       rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+       rdma_restrack_set_name(&cq->res, NULL);
 
        ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
        if (ret)
                goto err_free;
-       rdma_restrack_uadd(&cq->res);
+       rdma_restrack_add(&cq->res);
 
        obj->uevent.uobject.object = cq;
        obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
@@ -1013,6 +1031,7 @@ static int create_cq(struct uverbs_attr_bundle *attrs,
        return uverbs_response(attrs, &resp, sizeof(resp));
 
 err_free:
+       rdma_restrack_put(&cq->res);
        kfree(cq);
 err_file:
        if (ev_file)
@@ -1237,8 +1256,21 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
        bool has_sq = true;
        struct ib_device *ib_dev;
 
-       if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
-               return -EPERM;
+       switch (cmd->qp_type) {
+       case IB_QPT_RAW_PACKET:
+               if (!capable(CAP_NET_RAW))
+                       return -EPERM;
+               break;
+       case IB_QPT_RC:
+       case IB_QPT_UC:
+       case IB_QPT_UD:
+       case IB_QPT_XRC_INI:
+       case IB_QPT_XRC_TGT:
+       case IB_QPT_DRIVER:
+               break;
+       default:
+               return -EINVAL;
+       }
 
        obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
                                                 &ib_dev);
@@ -2985,11 +3017,11 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_ex_create_rwq_ind_table cmd;
        struct ib_uverbs_ex_create_rwq_ind_table_resp  resp = {};
-       struct ib_uobject                 *uobj;
+       struct ib_uobject *uobj;
        int err;
        struct ib_rwq_ind_table_init_attr init_attr = {};
        struct ib_rwq_ind_table *rwq_ind_tbl;
-       struct ib_wq    **wqs = NULL;
+       struct ib_wq **wqs = NULL;
        u32 *wqs_handles = NULL;
        struct ib_wq    *wq = NULL;
        int i, num_read_wqs;
@@ -3047,17 +3079,15 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
                goto put_wqs;
        }
 
-       init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
-       init_attr.ind_tbl = wqs;
-
-       rwq_ind_tbl = ib_dev->ops.create_rwq_ind_table(ib_dev, &init_attr,
-                                                      &attrs->driver_udata);
-
-       if (IS_ERR(rwq_ind_tbl)) {
-               err = PTR_ERR(rwq_ind_tbl);
+       rwq_ind_tbl = rdma_zalloc_drv_obj(ib_dev, ib_rwq_ind_table);
+       if (!rwq_ind_tbl) {
+               err = -ENOMEM;
                goto err_uobj;
        }
 
+       init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
+       init_attr.ind_tbl = wqs;
+
        rwq_ind_tbl->ind_tbl = wqs;
        rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size;
        rwq_ind_tbl->uobject = uobj;
@@ -3065,6 +3095,11 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
        rwq_ind_tbl->device = ib_dev;
        atomic_set(&rwq_ind_tbl->usecnt, 0);
 
+       err = ib_dev->ops.create_rwq_ind_table(rwq_ind_tbl, &init_attr,
+                                              &attrs->driver_udata);
+       if (err)
+               goto err_create;
+
        for (i = 0; i < num_wq_handles; i++)
                rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject,
                                        UVERBS_LOOKUP_READ);
@@ -3076,6 +3111,8 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
        resp.response_length = uverbs_response_length(attrs, sizeof(resp));
        return uverbs_response(attrs, &resp, sizeof(resp));
 
+err_create:
+       kfree(rwq_ind_tbl);
 err_uobj:
        uobj_alloc_abort(uobj, attrs);
 put_wqs:
@@ -3232,8 +3269,8 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
                goto err_free;
        }
 
-       flow_id = qp->device->ops.create_flow(
-               qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata);
+       flow_id = qp->device->ops.create_flow(qp, flow_attr,
+                                             &attrs->driver_udata);
 
        if (IS_ERR(flow_id)) {
                err = PTR_ERR(flow_id);
index 37794d8..36ea760 100644 (file)
@@ -108,8 +108,11 @@ int uverbs_dealloc_mw(struct ib_mw *mw)
        int ret;
 
        ret = mw->device->ops.dealloc_mw(mw);
-       if (!ret)
-               atomic_dec(&pd->usecnt);
+       if (ret)
+               return ret;
+
+       atomic_dec(&pd->usecnt);
+       kfree(mw);
        return ret;
 }
 
index 08c39cf..0658101 100644 (file)
@@ -81,12 +81,20 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
 {
        struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
        struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
-       int ret;
+       u32 table_size = (1 << rwq_ind_tbl->log_ind_tbl_size);
+       int ret, i;
+
+       if (atomic_read(&rwq_ind_tbl->usecnt))
+               return -EBUSY;
 
-       ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+       ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl);
        if (ib_is_destroy_retryable(ret, why, uobject))
                return ret;
 
+       for (i = 0; i < table_size; i++)
+               atomic_dec(&ind_tbl[i]->usecnt);
+
+       kfree(rwq_ind_tbl);
        kfree(ind_tbl);
        return ret;
 }
@@ -122,8 +130,7 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
        if (ret)
                return ret;
 
-       ib_dealloc_pd_user(pd, &attrs->driver_udata);
-       return 0;
+       return ib_dealloc_pd_user(pd, &attrs->driver_udata);
 }
 
 void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
index c7e7438..b3c6c06 100644 (file)
@@ -46,7 +46,9 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
        if (ret)
                return ret;
 
-       counters->device->ops.destroy_counters(counters);
+       ret = counters->device->ops.destroy_counters(counters);
+       if (ret)
+               return ret;
        kfree(counters);
        return 0;
 }
index b1c7dac..8dabd05 100644 (file)
@@ -33,6 +33,7 @@
 #include <rdma/uverbs_std_types.h>
 #include "rdma_core.h"
 #include "uverbs.h"
+#include "restrack.h"
 
 static int uverbs_free_cq(struct ib_uobject *uobject,
                          enum rdma_remove_reason why,
@@ -123,7 +124,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
        cq->event_handler = ib_uverbs_cq_event_handler;
        cq->cq_context    = ev_file ? &ev_file->ev_queue : NULL;
        atomic_set(&cq->usecnt, 0);
-       cq->res.type = RDMA_RESTRACK_CQ;
+
+       rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+       rdma_restrack_set_name(&cq->res, NULL);
 
        ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
        if (ret)
@@ -131,7 +134,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
 
        obj->uevent.uobject.object = cq;
        obj->uevent.uobject.user_handle = user_handle;
-       rdma_restrack_uadd(&cq->res);
+       rdma_restrack_add(&cq->res);
        uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE);
 
        ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
@@ -139,6 +142,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
        return ret;
 
 err_free:
+       rdma_restrack_put(&cq->res);
        kfree(cq);
 err_event_file:
        if (obj->uevent.event_file)
index 75df209..f367d52 100644 (file)
@@ -3,11 +3,13 @@
  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
  */
 
+#include <linux/overflow.h>
 #include <rdma/uverbs_std_types.h>
 #include "rdma_core.h"
 #include "uverbs.h"
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/opa_addr.h>
+#include <rdma/ib_cache.h>
 
 /*
  * This ioctl method allows calling any defined write or write_ex
@@ -165,7 +167,8 @@ void copy_port_attr_to_resp(struct ib_port_attr *attr,
        resp->subnet_timeout = attr->subnet_timeout;
        resp->init_type_reply = attr->init_type_reply;
        resp->active_width = attr->active_width;
-       resp->active_speed = attr->active_speed;
+       /* This ABI needs to be extended to provide any speed more than IB_SPEED_NDR */
+       resp->active_speed = min_t(u16, attr->active_speed, IB_SPEED_NDR);
        resp->phys_state = attr->phys_state;
        resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num);
 }
@@ -265,6 +268,172 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_CONTEXT)(
        return ucontext->device->ops.query_ucontext(ucontext, attrs);
 }
 
+static int copy_gid_entries_to_user(struct uverbs_attr_bundle *attrs,
+                                   struct ib_uverbs_gid_entry *entries,
+                                   size_t num_entries, size_t user_entry_size)
+{
+       const struct uverbs_attr *attr;
+       void __user *user_entries;
+       size_t copy_len;
+       int ret;
+       int i;
+
+       if (user_entry_size == sizeof(*entries)) {
+               ret = uverbs_copy_to(attrs,
+                                    UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+                                    entries, sizeof(*entries) * num_entries);
+               return ret;
+       }
+
+       copy_len = min_t(size_t, user_entry_size, sizeof(*entries));
+       attr = uverbs_attr_get(attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+       if (IS_ERR(attr))
+               return PTR_ERR(attr);
+
+       user_entries = u64_to_user_ptr(attr->ptr_attr.data);
+       for (i = 0; i < num_entries; i++) {
+               if (copy_to_user(user_entries, entries, copy_len))
+                       return -EFAULT;
+
+               if (user_entry_size > sizeof(*entries)) {
+                       if (clear_user(user_entries + sizeof(*entries),
+                                      user_entry_size - sizeof(*entries)))
+                               return -EFAULT;
+               }
+
+               entries++;
+               user_entries += user_entry_size;
+       }
+
+       return uverbs_output_written(attrs,
+                                    UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)(
+       struct uverbs_attr_bundle *attrs)
+{
+       struct ib_uverbs_gid_entry *entries;
+       struct ib_ucontext *ucontext;
+       struct ib_device *ib_dev;
+       size_t user_entry_size;
+       ssize_t num_entries;
+       size_t max_entries;
+       size_t num_bytes;
+       u32 flags;
+       int ret;
+
+       ret = uverbs_get_flags32(&flags, attrs,
+                                UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, 0);
+       if (ret)
+               return ret;
+
+       ret = uverbs_get_const(&user_entry_size, attrs,
+                              UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE);
+       if (ret)
+               return ret;
+
+       max_entries = uverbs_attr_ptr_get_array_size(
+               attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+               user_entry_size);
+       if (max_entries <= 0)
+               return -EINVAL;
+
+       ucontext = ib_uverbs_get_ucontext(attrs);
+       if (IS_ERR(ucontext))
+               return PTR_ERR(ucontext);
+       ib_dev = ucontext->device;
+
+       if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes))
+               return -EINVAL;
+
+       entries = uverbs_zalloc(attrs, num_bytes);
+       if (!entries)
+               return -ENOMEM;
+
+       num_entries = rdma_query_gid_table(ib_dev, entries, max_entries);
+       if (num_entries < 0)
+               return -EINVAL;
+
+       ret = copy_gid_entries_to_user(attrs, entries, num_entries,
+                                      user_entry_size);
+       if (ret)
+               return ret;
+
+       ret = uverbs_copy_to(attrs,
+                            UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+                            &num_entries, sizeof(num_entries));
+       return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_ENTRY)(
+       struct uverbs_attr_bundle *attrs)
+{
+       struct ib_uverbs_gid_entry entry = {};
+       const struct ib_gid_attr *gid_attr;
+       struct ib_ucontext *ucontext;
+       struct ib_device *ib_dev;
+       struct net_device *ndev;
+       u32 gid_index;
+       u32 port_num;
+       u32 flags;
+       int ret;
+
+       ret = uverbs_get_flags32(&flags, attrs,
+                                UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, 0);
+       if (ret)
+               return ret;
+
+       ret = uverbs_get_const(&port_num, attrs,
+                              UVERBS_ATTR_QUERY_GID_ENTRY_PORT);
+       if (ret)
+               return ret;
+
+       ret = uverbs_get_const(&gid_index, attrs,
+                              UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX);
+       if (ret)
+               return ret;
+
+       ucontext = ib_uverbs_get_ucontext(attrs);
+       if (IS_ERR(ucontext))
+               return PTR_ERR(ucontext);
+       ib_dev = ucontext->device;
+
+       if (!rdma_is_port_valid(ib_dev, port_num))
+               return -EINVAL;
+
+       if (!rdma_ib_or_roce(ib_dev, port_num))
+               return -EOPNOTSUPP;
+
+       gid_attr = rdma_get_gid_attr(ib_dev, port_num, gid_index);
+       if (IS_ERR(gid_attr))
+               return PTR_ERR(gid_attr);
+
+       memcpy(&entry.gid, &gid_attr->gid, sizeof(gid_attr->gid));
+       entry.gid_index = gid_attr->index;
+       entry.port_num = gid_attr->port_num;
+       entry.gid_type = gid_attr->gid_type;
+
+       rcu_read_lock();
+       ndev = rdma_read_gid_attr_ndev_rcu(gid_attr);
+       if (IS_ERR(ndev)) {
+               if (PTR_ERR(ndev) != -ENODEV) {
+                       ret = PTR_ERR(ndev);
+                       rcu_read_unlock();
+                       goto out;
+               }
+       } else {
+               entry.netdev_ifindex = ndev->ifindex;
+       }
+       rcu_read_unlock();
+
+       ret = uverbs_copy_to_struct_or_zero(
+               attrs, UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY, &entry,
+               sizeof(entry));
+out:
+       rdma_put_gid_attr(gid_attr);
+       return ret;
+}
+
 DECLARE_UVERBS_NAMED_METHOD(
        UVERBS_METHOD_GET_CONTEXT,
        UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
@@ -299,12 +468,38 @@ DECLARE_UVERBS_NAMED_METHOD(
                                   reserved),
                UA_MANDATORY));
 
+DECLARE_UVERBS_NAMED_METHOD(
+       UVERBS_METHOD_QUERY_GID_TABLE,
+       UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE, u64,
+                            UA_MANDATORY),
+       UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, u32,
+                            UA_OPTIONAL),
+       UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+                           UVERBS_ATTR_MIN_SIZE(0), UA_MANDATORY),
+       UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+                           UVERBS_ATTR_TYPE(u64), UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+       UVERBS_METHOD_QUERY_GID_ENTRY,
+       UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_PORT, u32,
+                            UA_MANDATORY),
+       UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX, u32,
+                            UA_MANDATORY),
+       UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, u32,
+                            UA_MANDATORY),
+       UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
+                           UVERBS_ATTR_STRUCT(struct ib_uverbs_gid_entry,
+                                              netdev_ifindex),
+                           UA_MANDATORY));
+
 DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
                              &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT),
                              &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
                              &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
                              &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT),
-                             &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT));
+                             &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT),
+                             &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_TABLE),
+                             &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_ENTRY));
 
 const struct uapi_definition uverbs_def_obj_device[] = {
        UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
index cad842e..f2e6a62 100644 (file)
@@ -16,7 +16,7 @@ static int uverbs_free_wq(struct ib_uobject *uobject,
                container_of(uobject, struct ib_uwq_object, uevent.uobject);
        int ret;
 
-       ret = ib_destroy_wq(wq, &attrs->driver_udata);
+       ret = ib_destroy_wq_user(wq, &attrs->driver_udata);
        if (ib_is_destroy_retryable(ret, why, uobject))
                return ret;
 
index 3078867..740f845 100644 (file)
@@ -272,15 +272,16 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
        atomic_set(&pd->usecnt, 0);
        pd->flags = flags;
 
-       pd->res.type = RDMA_RESTRACK_PD;
-       rdma_restrack_set_task(&pd->res, caller);
+       rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+       rdma_restrack_set_name(&pd->res, caller);
 
        ret = device->ops.alloc_pd(pd, NULL);
        if (ret) {
+               rdma_restrack_put(&pd->res);
                kfree(pd);
                return ERR_PTR(ret);
        }
-       rdma_restrack_kadd(&pd->res);
+       rdma_restrack_add(&pd->res);
 
        if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
                pd->local_dma_lkey = device->local_dma_lkey;
@@ -329,7 +330,7 @@ EXPORT_SYMBOL(__ib_alloc_pd);
  * exist.  The caller is responsible to synchronously destroy them and
  * guarantee no new allocations will happen.
  */
-void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
 {
        int ret;
 
@@ -343,9 +344,13 @@ void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
           requires the caller to guarantee we can't race here. */
        WARN_ON(atomic_read(&pd->usecnt));
 
+       ret = pd->device->ops.dealloc_pd(pd, udata);
+       if (ret)
+               return ret;
+
        rdma_restrack_del(&pd->res);
-       pd->device->ops.dealloc_pd(pd, udata);
        kfree(pd);
+       return ret;
 }
 EXPORT_SYMBOL(ib_dealloc_pd_user);
 
@@ -728,7 +733,7 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
                                       (struct in6_addr *)dgid);
                return 0;
        } else if (net_type == RDMA_NETWORK_IPV6 ||
-                  net_type == RDMA_NETWORK_IB) {
+                  net_type == RDMA_NETWORK_IB || RDMA_NETWORK_ROCE_V1) {
                *dgid = hdr->ibgrh.dgid;
                *sgid = hdr->ibgrh.sgid;
                return 0;
@@ -964,18 +969,22 @@ int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata)
 {
        const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
        struct ib_pd *pd;
+       int ret;
 
        might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
 
        pd = ah->pd;
 
-       ah->device->ops.destroy_ah(ah, flags);
+       ret = ah->device->ops.destroy_ah(ah, flags);
+       if (ret)
+               return ret;
+
        atomic_dec(&pd->usecnt);
        if (sgid_attr)
                rdma_put_gid_attr(sgid_attr);
 
        kfree(ah);
-       return 0;
+       return ret;
 }
 EXPORT_SYMBOL(rdma_destroy_ah_user);
 
@@ -1060,10 +1069,14 @@ EXPORT_SYMBOL(ib_query_srq);
 
 int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
 {
+       int ret;
+
        if (atomic_read(&srq->usecnt))
                return -EBUSY;
 
-       srq->device->ops.destroy_srq(srq, udata);
+       ret = srq->device->ops.destroy_srq(srq, udata);
+       if (ret)
+               return ret;
 
        atomic_dec(&srq->pd->usecnt);
        if (srq->srq_type == IB_SRQT_XRC)
@@ -1072,7 +1085,7 @@ int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
                atomic_dec(&srq->ext.cq->usecnt);
        kfree(srq);
 
-       return 0;
+       return ret;
 }
 EXPORT_SYMBOL(ib_destroy_srq_user);
 
@@ -1781,7 +1794,7 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
 }
 EXPORT_SYMBOL(ib_modify_qp_with_udata);
 
-int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
+int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width)
 {
        int rc;
        u32 netdev_speed;
@@ -1984,16 +1997,18 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
        cq->event_handler = event_handler;
        cq->cq_context = cq_context;
        atomic_set(&cq->usecnt, 0);
-       cq->res.type = RDMA_RESTRACK_CQ;
-       rdma_restrack_set_task(&cq->res, caller);
+
+       rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+       rdma_restrack_set_name(&cq->res, caller);
 
        ret = device->ops.create_cq(cq, cq_attr, NULL);
        if (ret) {
+               rdma_restrack_put(&cq->res);
                kfree(cq);
                return ERR_PTR(ret);
        }
 
-       rdma_restrack_kadd(&cq->res);
+       rdma_restrack_add(&cq->res);
        return cq;
 }
 EXPORT_SYMBOL(__ib_create_cq);
@@ -2011,16 +2026,21 @@ EXPORT_SYMBOL(rdma_set_cq_moderation);
 
 int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
 {
+       int ret;
+
        if (WARN_ON_ONCE(cq->shared))
                return -EOPNOTSUPP;
 
        if (atomic_read(&cq->usecnt))
                return -EBUSY;
 
+       ret = cq->device->ops.destroy_cq(cq, udata);
+       if (ret)
+               return ret;
+
        rdma_restrack_del(&cq->res);
-       cq->device->ops.destroy_cq(cq, udata);
        kfree(cq);
-       return 0;
+       return ret;
 }
 EXPORT_SYMBOL(ib_destroy_cq_user);
 
@@ -2059,8 +2079,10 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        mr->pd = pd;
        mr->dm = NULL;
        atomic_inc(&pd->usecnt);
-       mr->res.type = RDMA_RESTRACK_MR;
-       rdma_restrack_kadd(&mr->res);
+
+       rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+       rdma_restrack_parent_name(&mr->res, &pd->res);
+       rdma_restrack_add(&mr->res);
 
        return mr;
 }
@@ -2139,11 +2161,12 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
        mr->uobject = NULL;
        atomic_inc(&pd->usecnt);
        mr->need_inval = false;
-       mr->res.type = RDMA_RESTRACK_MR;
-       rdma_restrack_kadd(&mr->res);
        mr->type = mr_type;
        mr->sig_attrs = NULL;
 
+       rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+       rdma_restrack_parent_name(&mr->res, &pd->res);
+       rdma_restrack_add(&mr->res);
 out:
        trace_mr_alloc(pd, mr_type, max_num_sg, mr);
        return mr;
@@ -2199,11 +2222,12 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
        mr->uobject = NULL;
        atomic_inc(&pd->usecnt);
        mr->need_inval = false;
-       mr->res.type = RDMA_RESTRACK_MR;
-       rdma_restrack_kadd(&mr->res);
        mr->type = IB_MR_TYPE_INTEGRITY;
        mr->sig_attrs = sig_attrs;
 
+       rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+       rdma_restrack_parent_name(&mr->res, &pd->res);
+       rdma_restrack_add(&mr->res);
 out:
        trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr);
        return mr;
@@ -2328,13 +2352,17 @@ EXPORT_SYMBOL(ib_alloc_xrcd_user);
  */
 int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata)
 {
+       int ret;
+
        if (atomic_read(&xrcd->usecnt))
                return -EBUSY;
 
        WARN_ON(!xa_empty(&xrcd->tgt_qps));
-       xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+       ret = xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+       if (ret)
+               return ret;
        kfree(xrcd);
-       return 0;
+       return ret;
 }
 EXPORT_SYMBOL(ib_dealloc_xrcd_user);
 
@@ -2378,25 +2406,28 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd,
 EXPORT_SYMBOL(ib_create_wq);
 
 /**
- * ib_destroy_wq - Destroys the specified user WQ.
+ * ib_destroy_wq_user - Destroys the specified user WQ.
  * @wq: The WQ to destroy.
  * @udata: Valid user data
  */
-int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata)
 {
        struct ib_cq *cq = wq->cq;
        struct ib_pd *pd = wq->pd;
+       int ret;
 
        if (atomic_read(&wq->usecnt))
                return -EBUSY;
 
-       wq->device->ops.destroy_wq(wq, udata);
+       ret = wq->device->ops.destroy_wq(wq, udata);
+       if (ret)
+               return ret;
+
        atomic_dec(&pd->usecnt);
        atomic_dec(&cq->usecnt);
-
-       return 0;
+       return ret;
 }
-EXPORT_SYMBOL(ib_destroy_wq);
+EXPORT_SYMBOL(ib_destroy_wq_user);
 
 /**
  * ib_modify_wq - Modifies the specified WQ.
@@ -2419,29 +2450,6 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
 }
 EXPORT_SYMBOL(ib_modify_wq);
 
-/*
- * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
- * @wq_ind_table: The Indirection Table to destroy.
-*/
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
-{
-       int err, i;
-       u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size);
-       struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl;
-
-       if (atomic_read(&rwq_ind_table->usecnt))
-               return -EBUSY;
-
-       err = rwq_ind_table->device->ops.destroy_rwq_ind_table(rwq_ind_table);
-       if (!err) {
-               for (i = 0; i < table_size; i++)
-                       atomic_dec(&ind_tbl[i]->usecnt);
-       }
-
-       return err;
-}
-EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
-
 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
                       struct ib_mr_status *mr_status)
 {
index a300588..b930ea3 100644 (file)
@@ -150,7 +150,7 @@ struct bnxt_re_dev {
 
        struct delayed_work             worker;
        u8                              cur_prio_map;
-       u                             active_speed;
+       u16                             active_speed;
        u8                              active_width;
 
        /* FP Notification Queue (CQ & SRQ) */
index 1d7a9ca..cf3db96 100644 (file)
@@ -532,7 +532,7 @@ fail:
 }
 
 /* Protection Domains */
-void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
+int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
 {
        struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
        struct bnxt_re_dev *rdev = pd->rdev;
@@ -542,6 +542,7 @@ void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
        if (pd->qplib_pd.id)
                bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
                                      &pd->qplib_pd);
+       return 0;
 }
 
 int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
@@ -601,13 +602,14 @@ fail:
 }
 
 /* Address Handles */
-void bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
+int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
 {
        struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
        struct bnxt_re_dev *rdev = ah->rdev;
 
        bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah,
                              !(flags & RDMA_DESTROY_AH_SLEEPABLE));
+       return 0;
 }
 
 static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype)
@@ -938,9 +940,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
                return PTR_ERR(umem);
 
        qp->sumem = umem;
-       qplib_qp->sq.sg_info.sghead = umem->sg_head.sgl;
-       qplib_qp->sq.sg_info.npages = ib_umem_num_pages(umem);
-       qplib_qp->sq.sg_info.nmap = umem->nmap;
+       qplib_qp->sq.sg_info.umem = umem;
        qplib_qp->sq.sg_info.pgsize = PAGE_SIZE;
        qplib_qp->sq.sg_info.pgshft = PAGE_SHIFT;
        qplib_qp->qp_handle = ureq.qp_handle;
@@ -953,9 +953,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
                if (IS_ERR(umem))
                        goto rqfail;
                qp->rumem = umem;
-               qplib_qp->rq.sg_info.sghead = umem->sg_head.sgl;
-               qplib_qp->rq.sg_info.npages = ib_umem_num_pages(umem);
-               qplib_qp->rq.sg_info.nmap = umem->nmap;
+               qplib_qp->rq.sg_info.umem = umem;
                qplib_qp->rq.sg_info.pgsize = PAGE_SIZE;
                qplib_qp->rq.sg_info.pgshft = PAGE_SHIFT;
        }
@@ -1568,7 +1566,7 @@ static enum ib_mtu __to_ib_mtu(u32 mtu)
 }
 
 /* Shared Receive Queues */
-void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
+int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
 {
        struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
                                               ib_srq);
@@ -1583,6 +1581,7 @@ void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
        atomic_dec(&rdev->srq_count);
        if (nq)
                nq->budget--;
+       return 0;
 }
 
 static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
@@ -1608,9 +1607,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
                return PTR_ERR(umem);
 
        srq->umem = umem;
-       qplib_srq->sg_info.sghead = umem->sg_head.sgl;
-       qplib_srq->sg_info.npages = ib_umem_num_pages(umem);
-       qplib_srq->sg_info.nmap = umem->nmap;
+       qplib_srq->sg_info.umem = umem;
        qplib_srq->sg_info.pgsize = PAGE_SIZE;
        qplib_srq->sg_info.pgshft = PAGE_SHIFT;
        qplib_srq->srq_handle = ureq.srq_handle;
@@ -2800,7 +2797,7 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
 }
 
 /* Completion Queues */
-void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 {
        struct bnxt_re_cq *cq;
        struct bnxt_qplib_nq *nq;
@@ -2816,6 +2813,7 @@ void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
        atomic_dec(&rdev->cq_count);
        nq->budget--;
        kfree(cq->cql);
+       return 0;
 }
 
 int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
@@ -2860,9 +2858,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                        rc = PTR_ERR(cq->umem);
                        goto fail;
                }
-               cq->qplib_cq.sg_info.sghead = cq->umem->sg_head.sgl;
-               cq->qplib_cq.sg_info.npages = ib_umem_num_pages(cq->umem);
-               cq->qplib_cq.sg_info.nmap = cq->umem->nmap;
+               cq->qplib_cq.sg_info.umem = cq->umem;
                cq->qplib_cq.dpi = &uctx->dpi;
        } else {
                cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL);
@@ -3774,23 +3770,6 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
        return rc;
 }
 
-static int bnxt_re_page_size_ok(int page_shift)
-{
-       switch (page_shift) {
-       case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K:
-       case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K:
-       case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K:
-       case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M:
-       case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K:
-       case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M:
-       case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M:
-       case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G:
-               return 1;
-       default:
-               return 0;
-       }
-}
-
 static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig,
                             int page_shift)
 {
@@ -3798,7 +3777,7 @@ static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig,
        u64 page_size =  BIT_ULL(page_shift);
        struct ib_block_iter biter;
 
-       rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, page_size)
+       rdma_umem_for_each_dma_block(umem, &biter, page_size)
                *pbl_tbl++ = rdma_block_iter_dma_address(&biter);
 
        return pbl_tbl - pbl_tbl_orig;
@@ -3814,7 +3793,8 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
        struct bnxt_re_mr *mr;
        struct ib_umem *umem;
        u64 *pbl_tbl = NULL;
-       int umem_pgs, page_shift, rc;
+       unsigned long page_size;
+       int umem_pgs, rc;
 
        if (length > BNXT_RE_MAX_MR_SIZE) {
                ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n",
@@ -3848,42 +3828,34 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
        mr->ib_umem = umem;
 
        mr->qplib_mr.va = virt_addr;
-       umem_pgs = ib_umem_page_count(umem);
-       if (!umem_pgs) {
-               ibdev_err(&rdev->ibdev, "umem is invalid!");
-               rc = -EINVAL;
-               goto free_umem;
-       }
-       mr->qplib_mr.total_size = length;
-
-       pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL);
-       if (!pbl_tbl) {
-               rc = -ENOMEM;
-               goto free_umem;
-       }
-
-       page_shift = __ffs(ib_umem_find_best_pgsz(umem,
-                               BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M,
-                               virt_addr));
-
-       if (!bnxt_re_page_size_ok(page_shift)) {
+       page_size = ib_umem_find_best_pgsz(
+               umem, BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, virt_addr);
+       if (!page_size) {
                ibdev_err(&rdev->ibdev, "umem page size unsupported!");
                rc = -EFAULT;
-               goto fail;
+               goto free_umem;
        }
+       mr->qplib_mr.total_size = length;
 
-       if (page_shift == BNXT_RE_PAGE_SHIFT_4K &&
+       if (page_size == BNXT_RE_PAGE_SIZE_4K &&
            length > BNXT_RE_MAX_MR_SIZE_LOW) {
                ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu",
                          length, (u64)BNXT_RE_MAX_MR_SIZE_LOW);
                rc = -EINVAL;
-               goto fail;
+               goto free_umem;
+       }
+
+       umem_pgs = ib_umem_num_dma_blocks(umem, page_size);
+       pbl_tbl = kcalloc(umem_pgs, sizeof(*pbl_tbl), GFP_KERNEL);
+       if (!pbl_tbl) {
+               rc = -ENOMEM;
+               goto free_umem;
        }
 
        /* Map umem buf ptrs to the PBL */
-       umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, page_shift);
+       umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, order_base_2(page_size));
        rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl,
-                              umem_pgs, false, 1 << page_shift);
+                              umem_pgs, false, page_size);
        if (rc) {
                ibdev_err(&rdev->ibdev, "Failed to register user MR");
                goto fail;
index 1daeb30..9a8130b 100644 (file)
@@ -163,12 +163,12 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
 enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
                                            u8 port_num);
 int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
 int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
                      struct ib_udata *udata);
 int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
 int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
-void bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags);
+int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags);
 int bnxt_re_create_srq(struct ib_srq *srq,
                       struct ib_srq_init_attr *srq_init_attr,
                       struct ib_udata *udata);
@@ -176,7 +176,7 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
                       enum ib_srq_attr_mask srq_attr_mask,
                       struct ib_udata *udata);
 int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-void bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
 int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr,
                          const struct ib_recv_wr **bad_recv_wr);
 struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
@@ -193,7 +193,7 @@ int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
                      const struct ib_recv_wr **bad_recv_wr);
 int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                      struct ib_udata *udata);
-void bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
 int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
 int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
 struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
index f78da54..995d463 100644 (file)
@@ -295,9 +295,9 @@ static void __wait_for_all_nqes(struct bnxt_qplib_cq *cq, u16 cnq_events)
        }
 }
 
-static void bnxt_qplib_service_nq(unsigned long data)
+static void bnxt_qplib_service_nq(struct tasklet_struct *t)
 {
-       struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data;
+       struct bnxt_qplib_nq *nq = from_tasklet(nq, t, nq_tasklet);
        struct bnxt_qplib_hwq *hwq = &nq->hwq;
        int num_srqne_processed = 0;
        int num_cqne_processed = 0;
@@ -448,8 +448,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
 
        nq->msix_vec = msix_vector;
        if (need_init)
-               tasklet_init(&nq->nq_tasklet, bnxt_qplib_service_nq,
-                            (unsigned long)nq);
+               tasklet_setup(&nq->nq_tasklet, bnxt_qplib_service_nq);
        else
                tasklet_enable(&nq->nq_tasklet);
 
index f7736e3..441eb42 100644 (file)
@@ -50,7 +50,7 @@
 #include "qplib_sp.h"
 #include "qplib_fp.h"
 
-static void bnxt_qplib_service_creq(unsigned long data);
+static void bnxt_qplib_service_creq(struct tasklet_struct *t);
 
 /* Hardware communication channel */
 static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
@@ -79,7 +79,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
                goto done;
        do {
                mdelay(1); /* 1m sec */
-               bnxt_qplib_service_creq((unsigned long)rcfw);
+               bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
        } while (test_bit(cbit, cmdq->cmdq_bitmap) && --count);
 done:
        return count ? 0 : -ETIMEDOUT;
@@ -370,9 +370,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 }
 
 /* SP - CREQ Completion handlers */
-static void bnxt_qplib_service_creq(unsigned long data)
+static void bnxt_qplib_service_creq(struct tasklet_struct *t)
 {
-       struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data;
+       struct bnxt_qplib_rcfw *rcfw = from_tasklet(rcfw, t, creq.creq_tasklet);
        struct bnxt_qplib_creq_ctx *creq = &rcfw->creq;
        u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
        struct bnxt_qplib_hwq *hwq = &creq->hwq;
@@ -687,8 +687,7 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
 
        creq->msix_vec = msix_vector;
        if (need_init)
-               tasklet_init(&creq->creq_tasklet,
-                            bnxt_qplib_service_creq, (unsigned long)rcfw);
+               tasklet_setup(&creq->creq_tasklet, bnxt_qplib_service_creq);
        else
                tasklet_enable(&creq->creq_tasklet);
        rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0,
index 7efa6e5..fa78783 100644 (file)
@@ -45,6 +45,9 @@
 #include <linux/dma-mapping.h>
 #include <linux/if_vlan.h>
 #include <linux/vmalloc.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+
 #include "roce_hsi.h"
 #include "qplib_res.h"
 #include "qplib_sp.h"
@@ -87,12 +90,11 @@ static void __free_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pbl *pbl,
 static void bnxt_qplib_fill_user_dma_pages(struct bnxt_qplib_pbl *pbl,
                                           struct bnxt_qplib_sg_info *sginfo)
 {
-       struct scatterlist *sghead = sginfo->sghead;
-       struct sg_dma_page_iter sg_iter;
+       struct ib_block_iter biter;
        int i = 0;
 
-       for_each_sg_dma_page(sghead, &sg_iter, sginfo->nmap, 0) {
-               pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter);
+       rdma_umem_for_each_dma_block(sginfo->umem, &biter, sginfo->pgsize) {
+               pbl->pg_map_arr[i] = rdma_block_iter_dma_address(&biter);
                pbl->pg_arr[i] = NULL;
                pbl->pg_count++;
                i++;
@@ -104,15 +106,16 @@ static int __alloc_pbl(struct bnxt_qplib_res *res,
                       struct bnxt_qplib_sg_info *sginfo)
 {
        struct pci_dev *pdev = res->pdev;
-       struct scatterlist *sghead;
        bool is_umem = false;
        u32 pages;
        int i;
 
        if (sginfo->nopte)
                return 0;
-       pages = sginfo->npages;
-       sghead = sginfo->sghead;
+       if (sginfo->umem)
+               pages = ib_umem_num_dma_blocks(sginfo->umem, sginfo->pgsize);
+       else
+               pages = sginfo->npages;
        /* page ptr arrays */
        pbl->pg_arr = vmalloc(pages * sizeof(void *));
        if (!pbl->pg_arr)
@@ -127,7 +130,7 @@ static int __alloc_pbl(struct bnxt_qplib_res *res,
        pbl->pg_count = 0;
        pbl->pg_size = sginfo->pgsize;
 
-       if (!sghead) {
+       if (!sginfo->umem) {
                for (i = 0; i < pages; i++) {
                        pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
                                                            pbl->pg_size,
@@ -183,14 +186,12 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
        struct bnxt_qplib_sg_info sginfo = {};
        u32 depth, stride, npbl, npde;
        dma_addr_t *src_phys_ptr, **dst_virt_ptr;
-       struct scatterlist *sghead = NULL;
        struct bnxt_qplib_res *res;
        struct pci_dev *pdev;
        int i, rc, lvl;
 
        res = hwq_attr->res;
        pdev = res->pdev;
-       sghead = hwq_attr->sginfo->sghead;
        pg_size = hwq_attr->sginfo->pgsize;
        hwq->level = PBL_LVL_MAX;
 
@@ -204,7 +205,7 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
                        aux_pages++;
        }
 
-       if (!sghead) {
+       if (!hwq_attr->sginfo->umem) {
                hwq->is_user = false;
                npages = (depth * stride) / pg_size + aux_pages;
                if ((depth * stride) % pg_size)
@@ -213,11 +214,14 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
                        return -EINVAL;
                hwq_attr->sginfo->npages = npages;
        } else {
+               unsigned long sginfo_num_pages = ib_umem_num_dma_blocks(
+                       hwq_attr->sginfo->umem, hwq_attr->sginfo->pgsize);
+
                hwq->is_user = true;
-               npages = hwq_attr->sginfo->npages;
+               npages = sginfo_num_pages;
                npages = (npages * PAGE_SIZE) /
                          BIT_ULL(hwq_attr->sginfo->pgshft);
-               if ((hwq_attr->sginfo->npages * PAGE_SIZE) %
+               if ((sginfo_num_pages * PAGE_SIZE) %
                     BIT_ULL(hwq_attr->sginfo->pgshft))
                        if (!npages)
                                npages++;
index 9da470d..7a1ab38 100644 (file)
@@ -126,8 +126,7 @@ struct bnxt_qplib_pbl {
 };
 
 struct bnxt_qplib_sg_info {
-       struct scatterlist              *sghead;
-       u32                             nmap;
+       struct ib_umem                  *umem;
        u32                             npages;
        u32                             pgshft;
        u32                             pgsize;
index 1f288c7..8769e7a 100644 (file)
@@ -77,9 +77,9 @@ static int enable_ecn;
 module_param(enable_ecn, int, 0644);
 MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
 
-static int dack_mode = 1;
+static int dack_mode;
 module_param(dack_mode, int, 0644);
-MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
+MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)");
 
 uint c4iw_max_read_depth = 32;
 module_param(c4iw_max_read_depth, int, 0644);
index 352b8af..28349ed 100644 (file)
@@ -967,7 +967,7 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
        return !err || err == -ENODATA ? npolled : err;
 }
 
-void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 {
        struct c4iw_cq *chp;
        struct c4iw_ucontext *ucontext;
@@ -985,6 +985,7 @@ void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
                   ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx,
                   chp->destroy_skb, chp->wr_waitp);
        c4iw_put_wr_wait(chp->wr_waitp);
+       return 0;
 }
 
 int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
index 2b2b009..a278994 100644 (file)
@@ -985,21 +985,20 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
                   unsigned int *sg_offset);
 int c4iw_dealloc_mw(struct ib_mw *mw);
 void c4iw_dealloc(struct uld_ctx *ctx);
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
-                           struct ib_udata *udata);
+int c4iw_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
 struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
                                           u64 length, u64 virt, int acc,
                                           struct ib_udata *udata);
 struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
 int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
-void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
 int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                   struct ib_udata *udata);
 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
 int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
                    enum ib_srq_attr_mask srq_attr_mask,
                    struct ib_udata *udata);
-void c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata);
+int c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata);
 int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs,
                    struct ib_udata *udata);
 int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata);
index 73936c3..42234df 100644 (file)
@@ -510,7 +510,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        __be64 *pages;
        int shift, n, i;
        int err = -ENOMEM;
-       struct sg_dma_page_iter sg_iter;
+       struct ib_block_iter biter;
        struct c4iw_dev *rhp;
        struct c4iw_pd *php;
        struct c4iw_mr *mhp;
@@ -548,7 +548,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
        shift = PAGE_SHIFT;
 
-       n = ib_umem_num_pages(mhp->umem);
+       n = ib_umem_num_dma_blocks(mhp->umem, 1 << shift);
        err = alloc_pbl(mhp, n);
        if (err)
                goto err_umem_release;
@@ -561,8 +561,8 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
        i = n = 0;
 
-       for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
-               pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
+       rdma_umem_for_each_dma_block(mhp->umem, &biter, 1 << shift) {
+               pages[i++] = cpu_to_be64(rdma_block_iter_dma_address(&biter));
                if (i == PAGE_SIZE / sizeof(*pages)) {
                        err = write_pbl(&mhp->rhp->rdev, pages,
                                        mhp->attr.pbl_addr + (n << 3), i,
@@ -611,30 +611,23 @@ err_free_mhp:
        return ERR_PTR(err);
 }
 
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
-                           struct ib_udata *udata)
+int c4iw_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
 {
+       struct c4iw_mw *mhp = to_c4iw_mw(ibmw);
        struct c4iw_dev *rhp;
        struct c4iw_pd *php;
-       struct c4iw_mw *mhp;
        u32 mmid;
        u32 stag = 0;
        int ret;
 
-       if (type != IB_MW_TYPE_1)
-               return ERR_PTR(-EINVAL);
+       if (ibmw->type != IB_MW_TYPE_1)
+               return -EINVAL;
 
-       php = to_c4iw_pd(pd);
+       php = to_c4iw_pd(ibmw->pd);
        rhp = php->rhp;
-       mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
-       if (!mhp)
-               return ERR_PTR(-ENOMEM);
-
        mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
-       if (!mhp->wr_waitp) {
-               ret = -ENOMEM;
-               goto free_mhp;
-       }
+       if (!mhp->wr_waitp)
+               return -ENOMEM;
 
        mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
        if (!mhp->dereg_skb) {
@@ -645,18 +638,19 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
        ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp);
        if (ret)
                goto free_skb;
+
        mhp->rhp = rhp;
        mhp->attr.pdid = php->pdid;
        mhp->attr.type = FW_RI_STAG_MW;
        mhp->attr.stag = stag;
        mmid = (stag) >> 8;
-       mhp->ibmw.rkey = stag;
+       ibmw->rkey = stag;
        if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) {
                ret = -ENOMEM;
                goto dealloc_win;
        }
        pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag);
-       return &(mhp->ibmw);
+       return 0;
 
 dealloc_win:
        deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb,
@@ -665,9 +659,7 @@ free_skb:
        kfree_skb(mhp->dereg_skb);
 free_wr_wait:
        c4iw_put_wr_wait(mhp->wr_waitp);
-free_mhp:
-       kfree(mhp);
-       return ERR_PTR(ret);
+       return ret;
 }
 
 int c4iw_dealloc_mw(struct ib_mw *mw)
@@ -684,8 +676,6 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
                          mhp->wr_waitp);
        kfree_skb(mhp->dereg_skb);
        c4iw_put_wr_wait(mhp->wr_waitp);
-       pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp);
-       kfree(mhp);
        return 0;
 }
 
index 6c579d2..4b76f2f 100644 (file)
@@ -190,7 +190,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
        return ret;
 }
 
-static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
 {
        struct c4iw_dev *rhp;
        struct c4iw_pd *php;
@@ -202,6 +202,7 @@ static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
        mutex_lock(&rhp->rdev.stats.lock);
        rhp->rdev.stats.pd.cur--;
        mutex_unlock(&rhp->rdev.stats.lock);
+       return 0;
 }
 
 static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
@@ -497,8 +498,10 @@ static const struct ib_device_ops c4iw_dev_ops = {
        .query_qp = c4iw_ib_query_qp,
        .reg_user_mr = c4iw_reg_user_mr,
        .req_notify_cq = c4iw_arm_cq,
-       INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
+
        INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq),
+       INIT_RDMA_OBJ_SIZE(ib_mw, c4iw_mw, ibmw),
+       INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
        INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq),
        INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext),
 };
index cbddb20..f20379e 100644 (file)
@@ -2797,7 +2797,7 @@ err_free_wr_wait:
        return ret;
 }
 
-void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 {
        struct c4iw_dev *rhp;
        struct c4iw_srq *srq;
@@ -2813,4 +2813,5 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
                       srq->wr_waitp);
        c4iw_free_srq_idx(&rhp->rdev, srq->idx);
        c4iw_put_wr_wait(srq->wr_waitp);
+       return 0;
 }
index 1889dd1..e5d9712 100644 (file)
@@ -33,7 +33,8 @@ struct efa_irq {
        char name[EFA_IRQNAME_SIZE];
 };
 
-struct efa_sw_stats {
+/* Don't use anything other than atomic64 */
+struct efa_stats {
        atomic64_t alloc_pd_err;
        atomic64_t create_qp_err;
        atomic64_t create_cq_err;
@@ -41,11 +42,6 @@ struct efa_sw_stats {
        atomic64_t alloc_ucontext_err;
        atomic64_t create_ah_err;
        atomic64_t mmap_err;
-};
-
-/* Don't use anything other than atomic64 */
-struct efa_stats {
-       struct efa_sw_stats sw_stats;
        atomic64_t keep_alive_rcvd;
 };
 
@@ -134,12 +130,12 @@ int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
 int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
                   u16 *pkey);
 int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
-void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
 struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
                            struct ib_qp_init_attr *init_attr,
                            struct ib_udata *udata);
-void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
 int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                  struct ib_udata *udata);
 struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
@@ -156,7 +152,7 @@ void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
 int efa_create_ah(struct ib_ah *ibah,
                  struct rdma_ah_init_attr *init_attr,
                  struct ib_udata *udata);
-void efa_destroy_ah(struct ib_ah *ibah, u32 flags);
+int efa_destroy_ah(struct ib_ah *ibah, u32 flags);
 int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
                  int qp_attr_mask, struct ib_udata *udata);
 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
index 5484b08..b199e4a 100644 (file)
@@ -61,6 +61,8 @@ enum efa_admin_qp_state {
 
 enum efa_admin_get_stats_type {
        EFA_ADMIN_GET_STATS_TYPE_BASIC              = 0,
+       EFA_ADMIN_GET_STATS_TYPE_MESSAGES           = 1,
+       EFA_ADMIN_GET_STATS_TYPE_RDMA_READ          = 2,
 };
 
 enum efa_admin_get_stats_scope {
@@ -68,14 +70,6 @@ enum efa_admin_get_stats_scope {
        EFA_ADMIN_GET_STATS_SCOPE_QUEUE             = 1,
 };
 
-enum efa_admin_modify_qp_mask_bits {
-       EFA_ADMIN_QP_STATE_BIT                      = 0,
-       EFA_ADMIN_CUR_QP_STATE_BIT                  = 1,
-       EFA_ADMIN_QKEY_BIT                          = 2,
-       EFA_ADMIN_SQ_PSN_BIT                        = 3,
-       EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT       = 4,
-};
-
 /*
  * QP allocation sizes, converted by fabric QueuePair (QP) create command
  * from QP capabilities.
@@ -199,8 +193,14 @@ struct efa_admin_modify_qp_cmd {
        struct efa_admin_aq_common_desc aq_common_desc;
 
        /*
-        * Mask indicating which fields should be updated see enum
-        * efa_admin_modify_qp_mask_bits
+        * Mask indicating which fields should be updated
+        * 0 : qp_state
+        * 1 : cur_qp_state
+        * 2 : qkey
+        * 3 : sq_psn
+        * 4 : sq_drained_async_notify
+        * 5 : rnr_retry
+        * 31:6 : reserved
         */
        u32 modify_mask;
 
@@ -222,8 +222,8 @@ struct efa_admin_modify_qp_cmd {
        /* Enable async notification when SQ is drained */
        u8 sq_drained_async_notify;
 
-       /* MBZ */
-       u8 reserved1;
+       /* Number of RNR retries (valid only for SRD QPs) */
+       u8 rnr_retry;
 
        /* MBZ */
        u16 reserved2;
@@ -258,8 +258,8 @@ struct efa_admin_query_qp_resp {
        /* Indicates that draining is in progress */
        u8 sq_draining;
 
-       /* MBZ */
-       u8 reserved1;
+       /* Number of RNR retries (valid only for SRD QPs) */
+       u8 rnr_retry;
 
        /* MBZ */
        u16 reserved2;
@@ -530,10 +530,36 @@ struct efa_admin_basic_stats {
        u64 rx_drops;
 };
 
+struct efa_admin_messages_stats {
+       u64 send_bytes;
+
+       u64 send_wrs;
+
+       u64 recv_bytes;
+
+       u64 recv_wrs;
+};
+
+struct efa_admin_rdma_read_stats {
+       u64 read_wrs;
+
+       u64 read_bytes;
+
+       u64 read_wr_err;
+
+       u64 read_resp_bytes;
+};
+
 struct efa_admin_acq_get_stats_resp {
        struct efa_admin_acq_common_desc acq_common_desc;
 
-       struct efa_admin_basic_stats basic_stats;
+       union {
+               struct efa_admin_basic_stats basic_stats;
+
+               struct efa_admin_messages_stats messages_stats;
+
+               struct efa_admin_rdma_read_stats rdma_read_stats;
+       } u;
 };
 
 struct efa_admin_get_set_feature_common_desc {
@@ -576,7 +602,9 @@ struct efa_admin_feature_device_attr_desc {
        /*
         * 0 : rdma_read - If set, RDMA Read is supported on
         *    TX queues
-        * 31:1 : reserved - MBZ
+        * 1 : rnr_retry - If set, RNR retry is supported on
+        *    modify QP command
+        * 31:2 : reserved - MBZ
         */
        u32 device_caps;
 
@@ -862,6 +890,14 @@ struct efa_admin_host_info {
 #define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK                BIT(0)
 #define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK                BIT(1)
 
+/* modify_qp_cmd */
+#define EFA_ADMIN_MODIFY_QP_CMD_QP_STATE_MASK               BIT(0)
+#define EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE_MASK           BIT(1)
+#define EFA_ADMIN_MODIFY_QP_CMD_QKEY_MASK                   BIT(2)
+#define EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN_MASK                 BIT(3)
+#define EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY_MASK BIT(4)
+#define EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY_MASK              BIT(5)
+
 /* reg_mr_cmd */
 #define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK      GENMASK(4, 0)
 #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK      BIT(7)
@@ -878,6 +914,7 @@ struct efa_admin_host_info {
 
 /* feature_device_attr_desc */
 #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK   BIT(0)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK   BIT(1)
 
 /* host_info */
 #define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK         GENMASK(7, 0)
index 6ac2362..f752ef6 100644 (file)
@@ -76,6 +76,7 @@ int efa_com_modify_qp(struct efa_com_dev *edev,
        cmd.qkey = params->qkey;
        cmd.sq_psn = params->sq_psn;
        cmd.sq_drained_async_notify = params->sq_drained_async_notify;
+       cmd.rnr_retry = params->rnr_retry;
 
        err = efa_com_cmd_exec(aq,
                               (struct efa_admin_aq_entry *)&cmd,
@@ -121,6 +122,7 @@ int efa_com_query_qp(struct efa_com_dev *edev,
        result->qkey = resp.qkey;
        result->sq_draining = resp.sq_draining;
        result->sq_psn = resp.sq_psn;
+       result->rnr_retry = resp.rnr_retry;
 
        return 0;
 }
@@ -750,11 +752,27 @@ int efa_com_get_stats(struct efa_com_dev *edev,
                return err;
        }
 
-       result->basic_stats.tx_bytes = resp.basic_stats.tx_bytes;
-       result->basic_stats.tx_pkts = resp.basic_stats.tx_pkts;
-       result->basic_stats.rx_bytes = resp.basic_stats.rx_bytes;
-       result->basic_stats.rx_pkts = resp.basic_stats.rx_pkts;
-       result->basic_stats.rx_drops = resp.basic_stats.rx_drops;
+       switch (cmd.type) {
+       case EFA_ADMIN_GET_STATS_TYPE_BASIC:
+               result->basic_stats.tx_bytes = resp.u.basic_stats.tx_bytes;
+               result->basic_stats.tx_pkts = resp.u.basic_stats.tx_pkts;
+               result->basic_stats.rx_bytes = resp.u.basic_stats.rx_bytes;
+               result->basic_stats.rx_pkts = resp.u.basic_stats.rx_pkts;
+               result->basic_stats.rx_drops = resp.u.basic_stats.rx_drops;
+               break;
+       case EFA_ADMIN_GET_STATS_TYPE_MESSAGES:
+               result->messages_stats.send_bytes = resp.u.messages_stats.send_bytes;
+               result->messages_stats.send_wrs = resp.u.messages_stats.send_wrs;
+               result->messages_stats.recv_bytes = resp.u.messages_stats.recv_bytes;
+               result->messages_stats.recv_wrs = resp.u.messages_stats.recv_wrs;
+               break;
+       case EFA_ADMIN_GET_STATS_TYPE_RDMA_READ:
+               result->rdma_read_stats.read_wrs = resp.u.rdma_read_stats.read_wrs;
+               result->rdma_read_stats.read_bytes = resp.u.rdma_read_stats.read_bytes;
+               result->rdma_read_stats.read_wr_err = resp.u.rdma_read_stats.read_wr_err;
+               result->rdma_read_stats.read_resp_bytes = resp.u.rdma_read_stats.read_resp_bytes;
+               break;
+       }
 
        return 0;
 }
index 190bac2..eea4ebf 100644 (file)
@@ -47,6 +47,7 @@ struct efa_com_modify_qp_params {
        u32 qkey;
        u32 sq_psn;
        u8 sq_drained_async_notify;
+       u8 rnr_retry;
 };
 
 struct efa_com_query_qp_params {
@@ -58,6 +59,7 @@ struct efa_com_query_qp_result {
        u32 qkey;
        u32 sq_draining;
        u32 sq_psn;
+       u8 rnr_retry;
 };
 
 struct efa_com_destroy_qp_params {
@@ -238,8 +240,24 @@ struct efa_com_basic_stats {
        u64 rx_drops;
 };
 
+struct efa_com_messages_stats {
+       u64 send_bytes;
+       u64 send_wrs;
+       u64 recv_bytes;
+       u64 recv_wrs;
+};
+
+struct efa_com_rdma_read_stats {
+       u64 read_wrs;
+       u64 read_bytes;
+       u64 read_wr_err;
+       u64 read_resp_bytes;
+};
+
 union efa_com_get_stats_result {
        struct efa_com_basic_stats basic_stats;
+       struct efa_com_messages_stats messages_stats;
+       struct efa_com_rdma_read_stats rdma_read_stats;
 };
 
 void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low);
index 9e201f1..191e084 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include <linux/vmalloc.h>
+#include <linux/log2.h>
 
 #include <rdma/ib_addr.h>
 #include <rdma/ib_umem.h>
@@ -35,6 +36,14 @@ struct efa_user_mmap_entry {
        op(EFA_RX_BYTES, "rx_bytes") \
        op(EFA_RX_PKTS, "rx_pkts") \
        op(EFA_RX_DROPS, "rx_drops") \
+       op(EFA_SEND_BYTES, "send_bytes") \
+       op(EFA_SEND_WRS, "send_wrs") \
+       op(EFA_RECV_BYTES, "recv_bytes") \
+       op(EFA_RECV_WRS, "recv_wrs") \
+       op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \
+       op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \
+       op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \
+       op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \
        op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
        op(EFA_COMPLETED_CMDS, "completed_cmds") \
        op(EFA_CMDS_ERR, "cmds_err") \
@@ -142,10 +151,9 @@ to_emmap(struct rdma_user_mmap_entry *rdma_entry)
        return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry);
 }
 
-static inline bool is_rdma_read_cap(struct efa_dev *dev)
-{
-       return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK;
-}
+#define EFA_DEV_CAP(dev, cap) \
+       ((dev)->dev_attr.device_caps & \
+        EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK)
 
 #define is_reserved_cleared(reserved) \
        !memchr_inv(reserved, 0, sizeof(reserved))
@@ -221,9 +229,12 @@ int efa_query_device(struct ib_device *ibdev,
                resp.max_rq_wr = dev_attr->max_rq_depth;
                resp.max_rdma_size = dev_attr->max_rdma_size;
 
-               if (is_rdma_read_cap(dev))
+               if (EFA_DEV_CAP(dev, RDMA_READ))
                        resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
 
+               if (EFA_DEV_CAP(dev, RNR_RETRY))
+                       resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY;
+
                err = ib_copy_to_udata(udata, &resp,
                                       min(sizeof(resp), udata->outlen));
                if (err) {
@@ -269,7 +280,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 
 #define EFA_QUERY_QP_SUPP_MASK \
        (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
-        IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP)
+        IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY)
 
        if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
                ibdev_dbg(&dev->ibdev,
@@ -291,6 +302,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
        qp_attr->sq_psn = result.sq_psn;
        qp_attr->sq_draining = result.sq_draining;
        qp_attr->port_num = 1;
+       qp_attr->rnr_retry = result.rnr_retry;
 
        qp_attr->cap.max_send_wr = qp->max_send_wr;
        qp_attr->cap.max_recv_wr = qp->max_recv_wr;
@@ -376,17 +388,18 @@ int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 err_dealloc_pd:
        efa_pd_dealloc(dev, result.pdn);
 err_out:
-       atomic64_inc(&dev->stats.sw_stats.alloc_pd_err);
+       atomic64_inc(&dev->stats.alloc_pd_err);
        return err;
 }
 
-void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 {
        struct efa_dev *dev = to_edev(ibpd->device);
        struct efa_pd *pd = to_epd(ibpd);
 
        ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
        efa_pd_dealloc(dev, pd->pdn);
+       return 0;
 }
 
 static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
@@ -737,18 +750,130 @@ err_free_mapped:
 err_free_qp:
        kfree(qp);
 err_out:
-       atomic64_inc(&dev->stats.sw_stats.create_qp_err);
+       atomic64_inc(&dev->stats.create_qp_err);
        return ERR_PTR(err);
 }
 
+static const struct {
+       int                     valid;
+       enum ib_qp_attr_mask    req_param;
+       enum ib_qp_attr_mask    opt_param;
+} srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+       [IB_QPS_RESET] = {
+               [IB_QPS_RESET] = { .valid = 1 },
+               [IB_QPS_INIT]  = {
+                       .valid = 1,
+                       .req_param = IB_QP_PKEY_INDEX |
+                                    IB_QP_PORT |
+                                    IB_QP_QKEY,
+               },
+       },
+       [IB_QPS_INIT] = {
+               [IB_QPS_RESET] = { .valid = 1 },
+               [IB_QPS_ERR]   = { .valid = 1 },
+               [IB_QPS_INIT]  = {
+                       .valid = 1,
+                       .opt_param = IB_QP_PKEY_INDEX |
+                                    IB_QP_PORT |
+                                    IB_QP_QKEY,
+               },
+               [IB_QPS_RTR]   = {
+                       .valid = 1,
+                       .opt_param = IB_QP_PKEY_INDEX |
+                                    IB_QP_QKEY,
+               },
+       },
+       [IB_QPS_RTR] = {
+               [IB_QPS_RESET] = { .valid = 1 },
+               [IB_QPS_ERR]   = { .valid = 1 },
+               [IB_QPS_RTS]   = {
+                       .valid = 1,
+                       .req_param = IB_QP_SQ_PSN,
+                       .opt_param = IB_QP_CUR_STATE |
+                                    IB_QP_QKEY |
+                                    IB_QP_RNR_RETRY,
+
+               }
+       },
+       [IB_QPS_RTS] = {
+               [IB_QPS_RESET] = { .valid = 1 },
+               [IB_QPS_ERR]   = { .valid = 1 },
+               [IB_QPS_RTS]   = {
+                       .valid = 1,
+                       .opt_param = IB_QP_CUR_STATE |
+                                    IB_QP_QKEY,
+               },
+               [IB_QPS_SQD] = {
+                       .valid = 1,
+                       .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY,
+               },
+       },
+       [IB_QPS_SQD] = {
+               [IB_QPS_RESET] = { .valid = 1 },
+               [IB_QPS_ERR]   = { .valid = 1 },
+               [IB_QPS_RTS]   = {
+                       .valid = 1,
+                       .opt_param = IB_QP_CUR_STATE |
+                                    IB_QP_QKEY,
+               },
+               [IB_QPS_SQD] = {
+                       .valid = 1,
+                       .opt_param = IB_QP_PKEY_INDEX |
+                                    IB_QP_QKEY,
+               }
+       },
+       [IB_QPS_SQE] = {
+               [IB_QPS_RESET] = { .valid = 1 },
+               [IB_QPS_ERR]   = { .valid = 1 },
+               [IB_QPS_RTS]   = {
+                       .valid = 1,
+                       .opt_param = IB_QP_CUR_STATE |
+                                    IB_QP_QKEY,
+               }
+       },
+       [IB_QPS_ERR] = {
+               [IB_QPS_RESET] = { .valid = 1 },
+               [IB_QPS_ERR]   = { .valid = 1 },
+       }
+};
+
+static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state,
+                                   enum ib_qp_state next_state,
+                                   enum ib_qp_attr_mask mask)
+{
+       enum ib_qp_attr_mask req_param, opt_param;
+
+       if (mask & IB_QP_CUR_STATE  &&
+           cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
+           cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
+               return false;
+
+       if (!srd_qp_state_table[cur_state][next_state].valid)
+               return false;
+
+       req_param = srd_qp_state_table[cur_state][next_state].req_param;
+       opt_param = srd_qp_state_table[cur_state][next_state].opt_param;
+
+       if ((mask & req_param) != req_param)
+               return false;
+
+       if (mask & ~(req_param | opt_param | IB_QP_STATE))
+               return false;
+
+       return true;
+}
+
 static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
                                  struct ib_qp_attr *qp_attr, int qp_attr_mask,
                                  enum ib_qp_state cur_state,
                                  enum ib_qp_state new_state)
 {
+       int err;
+
 #define EFA_MODIFY_QP_SUPP_MASK \
        (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
-        IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN)
+        IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \
+        IB_QP_RNR_RETRY)
 
        if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
                ibdev_dbg(&dev->ibdev,
@@ -757,8 +882,14 @@ static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
                return -EOPNOTSUPP;
        }
 
-       if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
-                               qp_attr_mask)) {
+       if (qp->ibqp.qp_type == IB_QPT_DRIVER)
+               err = !efa_modify_srd_qp_is_ok(cur_state, new_state,
+                                              qp_attr_mask);
+       else
+               err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
+                                         qp_attr_mask);
+
+       if (err) {
                ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
                return -EINVAL;
        }
@@ -805,28 +936,36 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
        params.qp_handle = qp->qp_handle;
 
        if (qp_attr_mask & IB_QP_STATE) {
-               params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) |
-                                     BIT(EFA_ADMIN_CUR_QP_STATE_BIT);
+               EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE,
+                       1);
+               EFA_SET(&params.modify_mask,
+                       EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1);
                params.cur_qp_state = qp_attr->cur_qp_state;
                params.qp_state = qp_attr->qp_state;
        }
 
        if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
-               params.modify_mask |=
-                       BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT);
+               EFA_SET(&params.modify_mask,
+                       EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1);
                params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
        }
 
        if (qp_attr_mask & IB_QP_QKEY) {
-               params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT);
+               EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1);
                params.qkey = qp_attr->qkey;
        }
 
        if (qp_attr_mask & IB_QP_SQ_PSN) {
-               params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT);
+               EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1);
                params.sq_psn = qp_attr->sq_psn;
        }
 
+       if (qp_attr_mask & IB_QP_RNR_RETRY) {
+               EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY,
+                       1);
+               params.rnr_retry = qp_attr->rnr_retry;
+       }
+
        err = efa_com_modify_qp(&dev->edev, &params);
        if (err)
                return err;
@@ -843,7 +982,7 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
        return efa_com_destroy_cq(&dev->edev, &params);
 }
 
-void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 {
        struct efa_dev *dev = to_edev(ibcq->device);
        struct efa_cq *cq = to_ecq(ibcq);
@@ -856,6 +995,7 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
        efa_destroy_cq_idx(dev, cq->cq_idx);
        efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
                        DMA_FROM_DEVICE);
+       return 0;
 }
 
 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
@@ -996,7 +1136,7 @@ err_free_mapped:
                        DMA_FROM_DEVICE);
 
 err_out:
-       atomic64_inc(&dev->stats.sw_stats.create_cq_err);
+       atomic64_inc(&dev->stats.create_cq_err);
        return err;
 }
 
@@ -1013,8 +1153,7 @@ static int umem_to_page_list(struct efa_dev *dev,
        ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
                  hp_cnt, pages_in_hp);
 
-       rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
-                           BIT(hp_shift))
+       rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift))
                page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
 
        return 0;
@@ -1026,7 +1165,7 @@ static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
        struct page *pg;
        int i;
 
-       sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL);
+       sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL);
        if (!sglist)
                return NULL;
        sg_init_table(sglist, page_cnt);
@@ -1370,7 +1509,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
 
        supp_access_flags =
                IB_ACCESS_LOCAL_WRITE |
-               (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0);
+               (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0);
 
        access_flags &= ~IB_ACCESS_OPTIONAL;
        if (access_flags & ~supp_access_flags) {
@@ -1410,9 +1549,8 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
                goto err_unmap;
        }
 
-       params.page_shift = __ffs(pg_sz);
-       params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)),
-                                      pg_sz);
+       params.page_shift = order_base_2(pg_sz);
+       params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz);
 
        ibdev_dbg(&dev->ibdev,
                  "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
@@ -1451,7 +1589,7 @@ err_unmap:
 err_free:
        kfree(mr);
 err_out:
-       atomic64_inc(&dev->stats.sw_stats.reg_mr_err);
+       atomic64_inc(&dev->stats.reg_mr_err);
        return ERR_PTR(err);
 }
 
@@ -1569,19 +1707,17 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
        resp.max_tx_batch = dev->dev_attr.max_tx_batch;
        resp.min_sq_wr = dev->dev_attr.min_sq_depth;
 
-       if (udata && udata->outlen) {
-               err = ib_copy_to_udata(udata, &resp,
-                                      min(sizeof(resp), udata->outlen));
-               if (err)
-                       goto err_dealloc_uar;
-       }
+       err = ib_copy_to_udata(udata, &resp,
+                              min(sizeof(resp), udata->outlen));
+       if (err)
+               goto err_dealloc_uar;
 
        return 0;
 
 err_dealloc_uar:
        efa_dealloc_uar(dev, result.uarn);
 err_out:
-       atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err);
+       atomic64_inc(&dev->stats.alloc_ucontext_err);
        return err;
 }
 
@@ -1614,7 +1750,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
                ibdev_dbg(&dev->ibdev,
                          "pgoff[%#lx] does not have valid entry\n",
                          vma->vm_pgoff);
-               atomic64_inc(&dev->stats.sw_stats.mmap_err);
+               atomic64_inc(&dev->stats.mmap_err);
                return -EINVAL;
        }
        entry = to_emmap(rdma_entry);
@@ -1656,7 +1792,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
                        "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
                        entry->address, rdma_entry->npages * PAGE_SIZE,
                        entry->mmap_flag, err);
-               atomic64_inc(&dev->stats.sw_stats.mmap_err);
+               atomic64_inc(&dev->stats.mmap_err);
        }
 
        rdma_user_mmap_entry_put(rdma_entry);
@@ -1741,11 +1877,11 @@ int efa_create_ah(struct ib_ah *ibah,
 err_destroy_ah:
        efa_ah_destroy(dev, ah);
 err_out:
-       atomic64_inc(&dev->stats.sw_stats.create_ah_err);
+       atomic64_inc(&dev->stats.create_ah_err);
        return err;
 }
 
-void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
+int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
 {
        struct efa_dev *dev = to_edev(ibah->pd->device);
        struct efa_ah *ah = to_eah(ibah);
@@ -1755,10 +1891,11 @@ void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
        if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
                ibdev_dbg(&dev->ibdev,
                          "Destroy address handle is not supported in atomic context\n");
-               return;
+               return -EOPNOTSUPP;
        }
 
        efa_ah_destroy(dev, ah);
+       return 0;
 }
 
 struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num)
@@ -1774,13 +1911,15 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
        struct efa_com_get_stats_params params = {};
        union efa_com_get_stats_result result;
        struct efa_dev *dev = to_edev(ibdev);
+       struct efa_com_rdma_read_stats *rrs;
+       struct efa_com_messages_stats *ms;
        struct efa_com_basic_stats *bs;
        struct efa_com_stats_admin *as;
        struct efa_stats *s;
        int err;
 
-       params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
        params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
+       params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
 
        err = efa_com_get_stats(&dev->edev, &params, &result);
        if (err)
@@ -1793,6 +1932,28 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
        stats->value[EFA_RX_PKTS] = bs->rx_pkts;
        stats->value[EFA_RX_DROPS] = bs->rx_drops;
 
+       params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES;
+       err = efa_com_get_stats(&dev->edev, &params, &result);
+       if (err)
+               return err;
+
+       ms = &result.messages_stats;
+       stats->value[EFA_SEND_BYTES] = ms->send_bytes;
+       stats->value[EFA_SEND_WRS] = ms->send_wrs;
+       stats->value[EFA_RECV_BYTES] = ms->recv_bytes;
+       stats->value[EFA_RECV_WRS] = ms->recv_wrs;
+
+       params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ;
+       err = efa_com_get_stats(&dev->edev, &params, &result);
+       if (err)
+               return err;
+
+       rrs = &result.rdma_read_stats;
+       stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs;
+       stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes;
+       stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err;
+       stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes;
+
        as = &dev->edev.aq.stats;
        stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
        stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
@@ -1801,13 +1962,14 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
 
        s = &dev->stats;
        stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
-       stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err);
-       stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err);
-       stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->sw_stats.create_cq_err);
-       stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err);
-       stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err);
-       stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err);
-       stats->value[EFA_MMAP_ERR] = atomic64_read(&s->sw_stats.mmap_err);
+       stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err);
+       stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err);
+       stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err);
+       stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err);
+       stats->value[EFA_ALLOC_UCONTEXT_ERR] =
+               atomic64_read(&s->alloc_ucontext_err);
+       stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err);
+       stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err);
 
        return ARRAY_SIZE(efa_stats_names);
 }
index 04575c9..a307d4c 100644 (file)
@@ -232,11 +232,11 @@ static const struct sdma_set_state_action sdma_action_table[] = {
 static void sdma_complete(struct kref *);
 static void sdma_finalput(struct sdma_state *);
 static void sdma_get(struct sdma_state *);
-static void sdma_hw_clean_up_task(unsigned long);
+static void sdma_hw_clean_up_task(struct tasklet_struct *);
 static void sdma_put(struct sdma_state *);
 static void sdma_set_state(struct sdma_engine *, enum sdma_states);
 static void sdma_start_hw_clean_up(struct sdma_engine *);
-static void sdma_sw_clean_up_task(unsigned long);
+static void sdma_sw_clean_up_task(struct tasklet_struct *);
 static void sdma_sendctrl(struct sdma_engine *, unsigned);
 static void init_sdma_regs(struct sdma_engine *, u32, uint);
 static void sdma_process_event(
@@ -545,9 +545,10 @@ static void sdma_err_progress_check(struct timer_list *t)
        schedule_work(&sde->err_halt_worker);
 }
 
-static void sdma_hw_clean_up_task(unsigned long opaque)
+static void sdma_hw_clean_up_task(struct tasklet_struct *t)
 {
-       struct sdma_engine *sde = (struct sdma_engine *)opaque;
+       struct sdma_engine *sde = from_tasklet(sde, t,
+                                              sdma_hw_clean_up_task);
        u64 statuscsr;
 
        while (1) {
@@ -604,9 +605,9 @@ static void sdma_flush_descq(struct sdma_engine *sde)
                sdma_desc_avail(sde, sdma_descq_freecnt(sde));
 }
 
-static void sdma_sw_clean_up_task(unsigned long opaque)
+static void sdma_sw_clean_up_task(struct tasklet_struct *t)
 {
-       struct sdma_engine *sde = (struct sdma_engine *)opaque;
+       struct sdma_engine *sde = from_tasklet(sde, t, sdma_sw_clean_up_task);
        unsigned long flags;
 
        spin_lock_irqsave(&sde->tail_lock, flags);
@@ -1454,11 +1455,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
                sde->tail_csr =
                        get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
 
-               tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
-                            (unsigned long)sde);
-
-               tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
-                            (unsigned long)sde);
+               tasklet_setup(&sde->sdma_hw_clean_up_task,
+                             sdma_hw_clean_up_task);
+               tasklet_setup(&sde->sdma_sw_clean_up_task,
+                             sdma_sw_clean_up_task);
                INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
                INIT_WORK(&sde->flush_worker, sdma_field_flush);
 
index 3086563..3591923 100644 (file)
@@ -1424,7 +1424,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
        props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
        props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
        /* see rate_show() in ib core/sysfs.c */
-       props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
+       props->active_speed = opa_speed_to_ib(ppd->link_speed_active);
        props->max_vl_num = ppd->vls_supported;
 
        /* Once we are a "first class" citizen and have added the OPA MTUs to
index 5b2f931..75b06db 100644 (file)
 #define HNS_ROCE_VLAN_SL_BIT_MASK      7
 #define HNS_ROCE_VLAN_SL_SHIFT         13
 
+static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr)
+{
+       u32 fl = ah_attr->grh.flow_label;
+       u16 sport;
+
+       if (!fl)
+               sport = get_random_u32() %
+                       (IB_ROCE_UDP_ENCAP_VALID_PORT_MAX + 1 -
+                        IB_ROCE_UDP_ENCAP_VALID_PORT_MIN) +
+                       IB_ROCE_UDP_ENCAP_VALID_PORT_MIN;
+       else
+               sport = rdma_flow_label_to_udp_sport(fl);
+
+       return sport;
+}
+
 int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
                       struct ib_udata *udata)
 {
@@ -79,6 +95,8 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
 
        memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
        ah->av.sl = rdma_ah_get_sl(ah_attr);
+       ah->av.flowlabel = grh->flow_label;
+       ah->av.udp_sport = get_ah_udp_sport(ah_attr);
 
        return 0;
 }
@@ -98,8 +116,3 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
 
        return 0;
 }
-
-void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags)
-{
-       return;
-}
index a522cb2..a6b23de 100644 (file)
@@ -268,8 +268,7 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
        }
 
        /* convert system page cnt to hw page cnt */
-       rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
-                           1 << page_shift) {
+       rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) {
                addr = rdma_block_iter_dma_address(&biter);
                if (idx >= start) {
                        bufs[total++] = addr;
index e87d616..809b22a 100644 (file)
@@ -150,7 +150,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
        int err;
 
        buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT;
-       buf_attr.region[0].size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz;
+       buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
        buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
        buf_attr.region_count = 1;
        buf_attr.fixed_page = true;
@@ -224,6 +224,21 @@ static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
        }
 }
 
+static void set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
+                        struct hns_roce_ib_create_cq *ucmd)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+
+       if (udata) {
+               if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size))
+                       hr_cq->cqe_size = ucmd->cqe_size;
+               else
+                       hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE;
+       } else {
+               hr_cq->cqe_size = hr_dev->caps.cqe_sz;
+       }
+}
+
 int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
                       struct ib_udata *udata)
 {
@@ -258,7 +273,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
        INIT_LIST_HEAD(&hr_cq->rq_list);
 
        if (udata) {
-               ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+               ret = ib_copy_from_udata(&ucmd, udata,
+                                        min(sizeof(ucmd), udata->inlen));
                if (ret) {
                        ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n",
                                  ret);
@@ -266,6 +282,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
                }
        }
 
+       set_cqe_size(hr_cq, udata, &ucmd);
+
        ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
        if (ret) {
                ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret);
@@ -287,7 +305,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
        /*
         * For the QP created by kernel space, tptr value should be initialized
         * to zero; For the QP created by user space, it will cause synchronous
-        * problems if tptr is set to zero here, so we initialze it in user
+        * problems if tptr is set to zero here, so we initialize it in user
         * space.
         */
        if (!udata && hr_cq->tptr_addr)
@@ -311,7 +329,7 @@ err_cq_buf:
        return ret;
 }
 
-void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
        struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
@@ -322,6 +340,7 @@ void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
        free_cq_buf(hr_dev, hr_cq);
        free_cq_db(hr_dev, hr_cq, udata);
        free_cqc(hr_dev, hr_cq);
+       return 0;
 }
 
 void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
index 6edcbdc..6d2acff 100644 (file)
@@ -37,8 +37,8 @@
 
 #define DRV_NAME "hns_roce"
 
-/* hip08 is a pci device */
 #define PCI_REVISION_ID_HIP08                  0x21
+#define PCI_REVISION_ID_HIP09                  0x30
 
 #define HNS_ROCE_HW_VER1       ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
 
@@ -57,7 +57,6 @@
 /* Hardware specification only for v1 engine */
 #define HNS_ROCE_MAX_INNER_MTPT_NUM            0x7
 #define HNS_ROCE_MAX_MTPT_PBL_NUM              0x100000
-#define HNS_ROCE_MAX_SGE_NUM                   2
 
 #define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS       20
 #define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT  \
 #define HNS_ROCE_CEQ                           0
 #define HNS_ROCE_AEQ                           1
 
-#define HNS_ROCE_CEQ_ENTRY_SIZE                        0x4
-#define HNS_ROCE_AEQ_ENTRY_SIZE                        0x10
+#define HNS_ROCE_CEQE_SIZE 0x4
+#define HNS_ROCE_AEQE_SIZE 0x10
 
-#define HNS_ROCE_SL_SHIFT                      28
-#define HNS_ROCE_TCLASS_SHIFT                  20
-#define HNS_ROCE_FLOW_LABEL_MASK               0xfffff
+#define HNS_ROCE_V3_EQE_SIZE 0x40
+
+#define HNS_ROCE_V2_CQE_SIZE 32
+#define HNS_ROCE_V3_CQE_SIZE 64
+
+#define HNS_ROCE_V2_QPC_SZ 256
+#define HNS_ROCE_V3_QPC_SZ 512
 
 #define HNS_ROCE_MAX_PORTS                     6
-#define HNS_ROCE_MAX_GID_NUM                   16
 #define HNS_ROCE_GID_SIZE                      16
 #define HNS_ROCE_SGE_SIZE                      16
 
 #define PAGES_SHIFT_24                         24
 #define PAGES_SHIFT_32                         32
 
-#define HNS_ROCE_PCI_BAR_NUM                   2
-
 #define HNS_ROCE_IDX_QUE_ENTRY_SZ              4
 #define SRQ_DB_REG                             0x230
 
@@ -467,6 +467,7 @@ struct hns_roce_cq {
        void __iomem                    *cq_db_l;
        u16                             *tptr_addr;
        int                             arm_sn;
+       int                             cqe_size;
        unsigned long                   cqn;
        u32                             vector;
        atomic_t                        refcount;
@@ -535,17 +536,18 @@ struct hns_roce_raq_table {
 };
 
 struct hns_roce_av {
-       u8          port;
-       u8          gid_index;
-       u8          stat_rate;
-       u8          hop_limit;
-       u32         flowlabel;
-       u8          sl;
-       u8          tclass;
-       u8          dgid[HNS_ROCE_GID_SIZE];
-       u8          mac[ETH_ALEN];
-       u16         vlan_id;
-       bool        vlan_en;
+       u8 port;
+       u8 gid_index;
+       u8 stat_rate;
+       u8 hop_limit;
+       u32 flowlabel;
+       u16 udp_sport;
+       u8 sl;
+       u8 tclass;
+       u8 dgid[HNS_ROCE_GID_SIZE];
+       u8 mac[ETH_ALEN];
+       u16 vlan_id;
+       bool vlan_en;
 };
 
 struct hns_roce_ah {
@@ -655,6 +657,8 @@ struct hns_roce_qp {
 
        struct hns_roce_sge     sge;
        u32                     next_sge;
+       enum ib_mtu             path_mtu;
+       u32                     max_inline_data;
 
        /* 0: flush needed, 1: unneeded */
        unsigned long           flush_flag;
@@ -678,7 +682,8 @@ enum {
 };
 
 struct hns_roce_ceqe {
-       __le32                  comp;
+       __le32  comp;
+       __le32  rsv[15];
 };
 
 struct hns_roce_aeqe {
@@ -715,6 +720,7 @@ struct hns_roce_aeqe {
                        u8      rsv0;
                } __packed cmd;
         } event;
+       __le32 rsv[12];
 };
 
 struct hns_roce_eq {
@@ -791,15 +797,15 @@ struct hns_roce_caps {
        int             num_pds;
        int             reserved_pds;
        u32             mtt_entry_sz;
-       u32             cq_entry_sz;
+       u32             cqe_sz;
        u32             page_size_cap;
        u32             reserved_lkey;
        int             mtpt_entry_sz;
-       int             qpc_entry_sz;
+       int             qpc_sz;
        int             irrl_entry_sz;
        int             trrl_entry_sz;
        int             cqc_entry_sz;
-       int             sccc_entry_sz;
+       int             sccc_sz;
        int             qpc_timer_entry_sz;
        int             cqc_timer_entry_sz;
        int             srqc_entry_sz;
@@ -809,6 +815,8 @@ struct hns_roce_caps {
        u32             pbl_hop_num;
        int             aeqe_depth;
        int             ceqe_depth;
+       u32             aeqe_size;
+       u32             ceqe_size;
        enum ib_mtu     max_mtu;
        u32             qpc_bt_num;
        u32             qpc_timer_bt_num;
@@ -930,7 +938,7 @@ struct hns_roce_hw {
        int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
        int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
                        struct ib_udata *udata);
-       void (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata);
+       int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata);
        int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
        int (*init_eq)(struct hns_roce_dev *hr_dev);
        void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
@@ -1178,10 +1186,13 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
 int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
                       struct ib_udata *udata);
 int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags);
+static inline int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+       return 0;
+}
 
 int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
 
 struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc);
 struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
@@ -1200,8 +1211,7 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
                            unsigned long mpt_index);
 unsigned long key_to_hw_index(u32 key);
 
-struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
-                               struct ib_udata *udata);
+int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
 int hns_roce_dealloc_mw(struct ib_mw *ibmw);
 
 void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf);
@@ -1220,7 +1230,7 @@ int hns_roce_create_srq(struct ib_srq *srq,
 int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
                        enum ib_srq_attr_mask srq_attr_mask,
                        struct ib_udata *udata);
-void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
 
 struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
                                 struct ib_qp_init_attr *init_attr,
@@ -1247,7 +1257,7 @@ int to_hr_qp_type(int qp_type);
 int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
                       struct ib_udata *udata);
 
-void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
 int hns_roce_db_map_user(struct hns_roce_ucontext *context,
                         struct ib_udata *udata, unsigned long virt,
                         struct hns_roce_db *db);
index c8db6f8..7487cf3 100644 (file)
@@ -338,8 +338,8 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
        void __iomem *bt_cmd;
        __le32 bt_cmd_val[2];
        __le32 bt_cmd_h = 0;
-       __le32 bt_cmd_l = 0;
-       u64 bt_ba = 0;
+       __le32 bt_cmd_l;
+       u64 bt_ba;
        int ret = 0;
 
        /* Find the HEM(Hardware Entry Memory) entry */
@@ -1027,7 +1027,7 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
        if (hr_dev->caps.cqc_timer_entry_sz)
                hns_roce_cleanup_hem_table(hr_dev,
                                           &hr_dev->cqc_timer_table);
-       if (hr_dev->caps.sccc_entry_sz)
+       if (hr_dev->caps.sccc_sz)
                hns_roce_cleanup_hem_table(hr_dev,
                                           &hr_dev->qp_table.sccc_table);
        if (hr_dev->caps.trrl_entry_sz)
@@ -1404,7 +1404,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
 {
        const struct hns_roce_buf_region *r;
        int ofs, end;
-       int ret = 0;
+       int ret;
        int unit;
        int i;
 
index aeb3a6f..5f4d8a3 100644 (file)
@@ -70,15 +70,15 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
        struct hns_roce_qp *qp = to_hr_qp(ibqp);
        struct device *dev = &hr_dev->pdev->dev;
        struct hns_roce_sq_db sq_db = {};
-       int ps_opcode = 0, i = 0;
+       int ps_opcode, i;
        unsigned long flags = 0;
        void *wqe = NULL;
        __le32 doorbell[2];
-       u32 wqe_idx = 0;
-       int nreq = 0;
        int ret = 0;
-       u8 *smac;
        int loopback;
+       u32 wqe_idx;
+       int nreq;
+       u8 *smac;
 
        if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
                ibqp->qp_type != IB_QPT_RC)) {
@@ -271,7 +271,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
                                ps_opcode = HNS_ROCE_WQE_OPCODE_SEND;
                                break;
                        case IB_WR_LOCAL_INV:
-                               break;
                        case IB_WR_ATOMIC_CMP_AND_SWP:
                        case IB_WR_ATOMIC_FETCH_AND_ADD:
                        case IB_WR_LSO:
@@ -888,7 +887,7 @@ static int hns_roce_db_init(struct hns_roce_dev *hr_dev)
        u32 odb_ext_mod;
        u32 sdb_evt_mod;
        u32 odb_evt_mod;
-       int ret = 0;
+       int ret;
 
        memset(db, 0, sizeof(*db));
 
@@ -1148,8 +1147,8 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev)
        struct hns_roce_v1_priv *priv = hr_dev->priv;
        struct hns_roce_raq_table *raq = &priv->raq_table;
        struct device *dev = &hr_dev->pdev->dev;
-       int raq_shift = 0;
        dma_addr_t addr;
+       int raq_shift;
        __le32 tmp;
        u32 val;
        int ret;
@@ -1360,7 +1359,7 @@ static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev)
        struct hns_roce_v1_priv *priv = hr_dev->priv;
        struct hns_roce_free_mr *free_mr = &priv->free_mr;
        struct device *dev = &hr_dev->pdev->dev;
-       int ret = 0;
+       int ret;
 
        free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr");
        if (!free_mr->free_mr_wq) {
@@ -1440,8 +1439,8 @@ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset)
 
 static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
 {
-       int i = 0;
        struct hns_roce_caps *caps = &hr_dev->caps;
+       int i;
 
        hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG);
        hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG);
@@ -1471,12 +1470,12 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
        caps->max_qp_dest_rdma  = HNS_ROCE_V1_MAX_QP_DEST_RDMA;
        caps->max_sq_desc_sz    = HNS_ROCE_V1_MAX_SQ_DESC_SZ;
        caps->max_rq_desc_sz    = HNS_ROCE_V1_MAX_RQ_DESC_SZ;
-       caps->qpc_entry_sz      = HNS_ROCE_V1_QPC_ENTRY_SIZE;
+       caps->qpc_sz            = HNS_ROCE_V1_QPC_SIZE;
        caps->irrl_entry_sz     = HNS_ROCE_V1_IRRL_ENTRY_SIZE;
        caps->cqc_entry_sz      = HNS_ROCE_V1_CQC_ENTRY_SIZE;
        caps->mtpt_entry_sz     = HNS_ROCE_V1_MTPT_ENTRY_SIZE;
        caps->mtt_entry_sz      = HNS_ROCE_V1_MTT_ENTRY_SIZE;
-       caps->cq_entry_sz       = HNS_ROCE_V1_CQE_ENTRY_SIZE;
+       caps->cqe_sz            = HNS_ROCE_V1_CQE_SIZE;
        caps->page_size_cap     = HNS_ROCE_V1_PAGE_SIZE_SUPPORT;
        caps->reserved_lkey     = 0;
        caps->reserved_pds      = 0;
@@ -1643,7 +1642,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev,
                                unsigned long timeout)
 {
        u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG;
-       unsigned long end = 0;
+       unsigned long end;
        u32 status = 0;
 
        end = msecs_to_jiffies(timeout) + jiffies;
@@ -1671,7 +1670,7 @@ static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port,
 {
        unsigned long flags;
        u32 *p = NULL;
-       u8 gid_idx = 0;
+       u8 gid_idx;
 
        gid_idx = hns_get_gid_index(hr_dev, port, gid_index);
 
@@ -1897,8 +1896,7 @@ static int hns_roce_v1_write_mtpt(struct hns_roce_dev *hr_dev, void *mb_buf,
 
 static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
 {
-       return hns_roce_buf_offset(hr_cq->mtr.kmem,
-                                  n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
+       return hns_roce_buf_offset(hr_cq->mtr.kmem, n * HNS_ROCE_V1_CQE_SIZE);
 }
 
 static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
@@ -2445,7 +2443,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
 
        struct hns_roce_cmd_mailbox *mailbox;
        struct device *dev = &hr_dev->pdev->dev;
-       int ret = 0;
+       int ret;
 
        if (cur_state >= HNS_ROCE_QP_NUM_STATE ||
            new_state >= HNS_ROCE_QP_NUM_STATE ||
@@ -3394,7 +3392,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
        struct device *dev = &hr_dev->pdev->dev;
        struct hns_roce_qp_context *context;
-       int tmp_qp_state = 0;
+       int tmp_qp_state;
        int ret = 0;
        int state;
 
@@ -3572,7 +3570,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
        return 0;
 }
 
-static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
        struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
@@ -3603,6 +3601,7 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
                }
                wait_time++;
        }
+       return 0;
 }
 
 static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not)
@@ -3775,8 +3774,7 @@ static void hns_roce_v1_db_overflow_handle(struct hns_roce_dev *hr_dev,
 
 static struct hns_roce_aeqe *get_aeqe_v1(struct hns_roce_eq *eq, u32 entry)
 {
-       unsigned long off = (entry & (eq->entries - 1)) *
-                            HNS_ROCE_AEQ_ENTRY_SIZE;
+       unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQE_SIZE;
 
        return (struct hns_roce_aeqe *)((u8 *)
                (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
@@ -3881,8 +3879,7 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev,
 
 static struct hns_roce_ceqe *get_ceqe_v1(struct hns_roce_eq *eq, u32 entry)
 {
-       unsigned long off = (entry & (eq->entries - 1)) *
-                            HNS_ROCE_CEQ_ENTRY_SIZE;
+       unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQE_SIZE;
 
        return (struct hns_roce_ceqe *)((u8 *)
                        (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
@@ -3934,7 +3931,7 @@ static irqreturn_t hns_roce_v1_msix_interrupt_eq(int irq, void *eq_ptr)
 {
        struct hns_roce_eq  *eq  = eq_ptr;
        struct hns_roce_dev *hr_dev = eq->hr_dev;
-       int int_work = 0;
+       int int_work;
 
        if (eq->type_flag == HNS_ROCE_CEQ)
                /* CEQ irq routine, CEQ is pulse irq, not clear */
@@ -4132,9 +4129,9 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
        void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn];
        struct device *dev = &hr_dev->pdev->dev;
        dma_addr_t tmp_dma_addr;
-       u32 eqconsindx_val = 0;
        u32 eqcuridx_val = 0;
-       u32 eqshift_val = 0;
+       u32 eqconsindx_val;
+       u32 eqshift_val;
        __le32 tmp2 = 0;
        __le32 tmp1 = 0;
        __le32 tmp = 0;
@@ -4253,7 +4250,7 @@ static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev)
                                       CEQ_REG_OFFSET * i;
                        eq->entries = hr_dev->caps.ceqe_depth;
                        eq->log_entries = ilog2(eq->entries);
-                       eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
+                       eq->eqe_size = HNS_ROCE_CEQE_SIZE;
                } else {
                        /* AEQ */
                        eq_table->eqc_base[i] = hr_dev->reg_base +
@@ -4263,7 +4260,7 @@ static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev)
                                       ROCEE_CAEP_AEQE_CONS_IDX_REG;
                        eq->entries = hr_dev->caps.aeqe_depth;
                        eq->log_entries = ilog2(eq->entries);
-                       eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
+                       eq->eqe_size = HNS_ROCE_AEQE_SIZE;
                }
        }
 
index 52307b2..ffd0156 100644 (file)
 #define HNS_ROCE_V1_COMP_EQE_NUM                       0x8000
 #define HNS_ROCE_V1_ASYNC_EQE_NUM                      0x400
 
-#define HNS_ROCE_V1_QPC_ENTRY_SIZE                     256
+#define HNS_ROCE_V1_QPC_SIZE                           256
 #define HNS_ROCE_V1_IRRL_ENTRY_SIZE                    8
 #define HNS_ROCE_V1_CQC_ENTRY_SIZE                     64
 #define HNS_ROCE_V1_MTPT_ENTRY_SIZE                    64
 #define HNS_ROCE_V1_MTT_ENTRY_SIZE                     64
 
-#define HNS_ROCE_V1_CQE_ENTRY_SIZE                     32
+#define HNS_ROCE_V1_CQE_SIZE                           32
 #define HNS_ROCE_V1_PAGE_SIZE_SUPPORT                  0xFFFFF000
 
 #define HNS_ROCE_V1_TABLE_CHUNK_SIZE                   (1 << 17)
index 4cda95e..6d30850 100644 (file)
@@ -153,6 +153,67 @@ static void set_atomic_seg(const struct ib_send_wr *wr,
                       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
 }
 
+static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
+                                const struct ib_send_wr *wr,
+                                unsigned int *sge_idx, u32 msg_len)
+{
+       struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
+       unsigned int dseg_len = sizeof(struct hns_roce_v2_wqe_data_seg);
+       unsigned int ext_sge_sz = qp->sq.max_gs * dseg_len;
+       unsigned int left_len_in_pg;
+       unsigned int idx = *sge_idx;
+       unsigned int i = 0;
+       unsigned int len;
+       void *addr;
+       void *dseg;
+
+       if (msg_len > ext_sge_sz) {
+               ibdev_err(ibdev,
+                         "no enough extended sge space for inline data.\n");
+               return -EINVAL;
+       }
+
+       dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
+       left_len_in_pg = hr_hw_page_align((uintptr_t)dseg) - (uintptr_t)dseg;
+       len = wr->sg_list[0].length;
+       addr = (void *)(unsigned long)(wr->sg_list[0].addr);
+
+       /* When copying data to extended sge space, the left length in page may
+        * not long enough for current user's sge. So the data should be
+        * splited into several parts, one in the first page, and the others in
+        * the subsequent pages.
+        */
+       while (1) {
+               if (len <= left_len_in_pg) {
+                       memcpy(dseg, addr, len);
+
+                       idx += len / dseg_len;
+
+                       i++;
+                       if (i >= wr->num_sge)
+                               break;
+
+                       left_len_in_pg -= len;
+                       len = wr->sg_list[i].length;
+                       addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+                       dseg += len;
+               } else {
+                       memcpy(dseg, addr, left_len_in_pg);
+
+                       len -= left_len_in_pg;
+                       addr += left_len_in_pg;
+                       idx += left_len_in_pg / dseg_len;
+                       dseg = hns_roce_get_extend_sge(qp,
+                                               idx & (qp->sge.sge_cnt - 1));
+                       left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT;
+               }
+       }
+
+       *sge_idx = idx;
+
+       return 0;
+}
+
 static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
                           unsigned int *sge_ind, unsigned int valid_num_sge)
 {
@@ -177,73 +238,115 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
        *sge_ind = idx;
 }
 
+static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+       int mtu = ib_mtu_enum_to_int(qp->path_mtu);
+
+       if (len > qp->max_inline_data || len > mtu) {
+               ibdev_err(&hr_dev->ib_dev,
+                         "invalid length of data, data len = %u, max inline len = %u, path mtu = %d.\n",
+                         len, qp->max_inline_data, mtu);
+               return false;
+       }
+
+       return true;
+}
+
+static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
+                     struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+                     unsigned int *sge_idx)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+       u32 msg_len = le32_to_cpu(rc_sq_wqe->msg_len);
+       struct ib_device *ibdev = &hr_dev->ib_dev;
+       unsigned int curr_idx = *sge_idx;
+       void *dseg = rc_sq_wqe;
+       unsigned int i;
+       int ret;
+
+       if (unlikely(wr->opcode == IB_WR_RDMA_READ)) {
+               ibdev_err(ibdev, "invalid inline parameters!\n");
+               return -EINVAL;
+       }
+
+       if (!check_inl_data_len(qp, msg_len))
+               return -EINVAL;
+
+       dseg += sizeof(struct hns_roce_v2_rc_send_wqe);
+
+       roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1);
+
+       if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) {
+               roce_set_bit(rc_sq_wqe->byte_20,
+                            V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0);
+
+               for (i = 0; i < wr->num_sge; i++) {
+                       memcpy(dseg, ((void *)wr->sg_list[i].addr),
+                              wr->sg_list[i].length);
+                       dseg += wr->sg_list[i].length;
+               }
+       } else {
+               roce_set_bit(rc_sq_wqe->byte_20,
+                            V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1);
+
+               ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len);
+               if (ret)
+                       return ret;
+
+               roce_set_field(rc_sq_wqe->byte_16,
+                              V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+                              V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
+                              curr_idx - *sge_idx);
+       }
+
+       *sge_idx = curr_idx;
+
+       return 0;
+}
+
 static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                             struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
                             unsigned int *sge_ind,
                             unsigned int valid_num_sge)
 {
-       struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_v2_wqe_data_seg *dseg =
                (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
-       struct ib_device *ibdev = &hr_dev->ib_dev;
        struct hns_roce_qp *qp = to_hr_qp(ibqp);
-       void *wqe = dseg;
        int j = 0;
        int i;
 
-       if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) {
-               if (unlikely(le32_to_cpu(rc_sq_wqe->msg_len) >
-                            hr_dev->caps.max_sq_inline)) {
-                       ibdev_err(ibdev, "inline len(1-%d)=%d, illegal",
-                                 rc_sq_wqe->msg_len,
-                                 hr_dev->caps.max_sq_inline);
-                       return -EINVAL;
-               }
+       roce_set_field(rc_sq_wqe->byte_20,
+                      V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
+                      V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
+                      (*sge_ind) & (qp->sge.sge_cnt - 1));
 
-               if (unlikely(wr->opcode == IB_WR_RDMA_READ)) {
-                       ibdev_err(ibdev, "Not support inline data!\n");
-                       return -EINVAL;
-               }
+       if (wr->send_flags & IB_SEND_INLINE)
+               return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind);
 
+       if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
                for (i = 0; i < wr->num_sge; i++) {
-                       memcpy(wqe, ((void *)wr->sg_list[i].addr),
-                              wr->sg_list[i].length);
-                       wqe += wr->sg_list[i].length;
+                       if (likely(wr->sg_list[i].length)) {
+                               set_data_seg_v2(dseg, wr->sg_list + i);
+                               dseg++;
+                       }
                }
-
-               roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
-                            1);
        } else {
-               if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
-                       for (i = 0; i < wr->num_sge; i++) {
-                               if (likely(wr->sg_list[i].length)) {
-                                       set_data_seg_v2(dseg, wr->sg_list + i);
-                                       dseg++;
-                               }
+               for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; i++) {
+                       if (likely(wr->sg_list[i].length)) {
+                               set_data_seg_v2(dseg, wr->sg_list + i);
+                               dseg++;
+                               j++;
                        }
-               } else {
-                       roce_set_field(rc_sq_wqe->byte_20,
-                                    V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
-                                    V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
-                                    (*sge_ind) & (qp->sge.sge_cnt - 1));
-
-                       for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE;
-                            i++) {
-                               if (likely(wr->sg_list[i].length)) {
-                                       set_data_seg_v2(dseg, wr->sg_list + i);
-                                       dseg++;
-                                       j++;
-                               }
-                       }
-
-                       set_extend_sge(qp, wr, sge_ind, valid_num_sge);
                }
 
-               roce_set_field(rc_sq_wqe->byte_16,
-                              V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
-                              V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
+               set_extend_sge(qp, wr, sge_ind, valid_num_sge);
        }
 
+       roce_set_field(rc_sq_wqe->byte_16,
+                      V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+                      V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
+
        return 0;
 }
 
@@ -292,6 +395,33 @@ static unsigned int calc_wr_sge_num(const struct ib_send_wr *wr,
        return valid_num;
 }
 
+static __le32 get_immtdata(const struct ib_send_wr *wr)
+{
+       switch (wr->opcode) {
+       case IB_WR_SEND_WITH_IMM:
+       case IB_WR_RDMA_WRITE_WITH_IMM:
+               return cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
+       default:
+               return 0;
+       }
+}
+
+static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+                        const struct ib_send_wr *wr)
+{
+       u32 ib_op = wr->opcode;
+
+       if (ib_op != IB_WR_SEND && ib_op != IB_WR_SEND_WITH_IMM)
+               return -EINVAL;
+
+       ud_sq_wqe->immtdata = get_immtdata(wr);
+
+       roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
+                      V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
+
+       return 0;
+}
+
 static inline int set_ud_wqe(struct hns_roce_qp *qp,
                             const struct ib_send_wr *wr,
                             void *wqe, unsigned int *sge_idx,
@@ -305,10 +435,15 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
        u32 msg_len = 0;
        bool loopback;
        u8 *smac;
+       int ret;
 
        valid_num_sge = calc_wr_sge_num(wr, &msg_len);
        memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
 
+       ret = set_ud_opcode(ud_sq_wqe, wr);
+       if (WARN_ON(ret))
+               return ret;
+
        roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M,
                       V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]);
        roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M,
@@ -329,23 +464,8 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
        roce_set_bit(ud_sq_wqe->byte_40,
                     V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback);
 
-       roce_set_field(ud_sq_wqe->byte_4,
-                      V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
-                      V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
-                      HNS_ROCE_V2_WQE_OP_SEND);
-
        ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
 
-       switch (wr->opcode) {
-       case IB_WR_SEND_WITH_IMM:
-       case IB_WR_RDMA_WRITE_WITH_IMM:
-               ud_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
-               break;
-       default:
-               ud_sq_wqe->immtdata = 0;
-               break;
-       }
-
        /* Set sig attr */
        roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S,
                     (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
@@ -369,7 +489,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
                       curr_idx & (qp->sge.sge_cnt - 1));
 
        roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
-                      V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0);
+                      V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport);
        ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
                          qp->qkey : ud_wr(wr)->remote_qkey);
        roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M,
@@ -402,6 +522,46 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
        return 0;
 }
 
+static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+                        const struct ib_send_wr *wr)
+{
+       u32 ib_op = wr->opcode;
+
+       rc_sq_wqe->immtdata = get_immtdata(wr);
+
+       switch (ib_op) {
+       case IB_WR_RDMA_READ:
+       case IB_WR_RDMA_WRITE:
+       case IB_WR_RDMA_WRITE_WITH_IMM:
+               rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
+               rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
+               break;
+       case IB_WR_SEND:
+       case IB_WR_SEND_WITH_IMM:
+               break;
+       case IB_WR_ATOMIC_CMP_AND_SWP:
+       case IB_WR_ATOMIC_FETCH_AND_ADD:
+               rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey);
+               rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
+               break;
+       case IB_WR_REG_MR:
+               set_frmr_seg(rc_sq_wqe, reg_wr(wr));
+               break;
+       case IB_WR_LOCAL_INV:
+               roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
+               fallthrough;
+       case IB_WR_SEND_WITH_INV:
+               rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+                      V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
+
+       return 0;
+}
 static inline int set_rc_wqe(struct hns_roce_qp *qp,
                             const struct ib_send_wr *wr,
                             void *wqe, unsigned int *sge_idx,
@@ -411,25 +571,16 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
        unsigned int curr_idx = *sge_idx;
        unsigned int valid_num_sge;
        u32 msg_len = 0;
-       int ret = 0;
+       int ret;
 
        valid_num_sge = calc_wr_sge_num(wr, &msg_len);
        memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
 
        rc_sq_wqe->msg_len = cpu_to_le32(msg_len);
 
-       switch (wr->opcode) {
-       case IB_WR_SEND_WITH_IMM:
-       case IB_WR_RDMA_WRITE_WITH_IMM:
-               rc_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
-               break;
-       case IB_WR_SEND_WITH_INV:
-               rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
-               break;
-       default:
-               rc_sq_wqe->immtdata = 0;
-               break;
-       }
+       ret = set_rc_opcode(rc_sq_wqe, wr);
+       if (WARN_ON(ret))
+               return ret;
 
        roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S,
                     (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
@@ -443,33 +594,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
        roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
                     owner_bit);
 
-       switch (wr->opcode) {
-       case IB_WR_RDMA_READ:
-       case IB_WR_RDMA_WRITE:
-       case IB_WR_RDMA_WRITE_WITH_IMM:
-               rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
-               rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
-               break;
-       case IB_WR_LOCAL_INV:
-               roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
-               rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
-               break;
-       case IB_WR_REG_MR:
-               set_frmr_seg(rc_sq_wqe, reg_wr(wr));
-               break;
-       case IB_WR_ATOMIC_CMP_AND_SWP:
-       case IB_WR_ATOMIC_FETCH_AND_ADD:
-               rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey);
-               rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
-               break;
-       default:
-               break;
-       }
-
-       roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-                      V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-                      to_hr_opcode(wr->opcode));
-
        if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
            wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
                set_atomic_seg(wr, rc_sq_wqe, valid_num_sge);
@@ -1682,7 +1806,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
        caps->max_sq_desc_sz    = HNS_ROCE_V2_MAX_SQ_DESC_SZ;
        caps->max_rq_desc_sz    = HNS_ROCE_V2_MAX_RQ_DESC_SZ;
        caps->max_srq_desc_sz   = HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
-       caps->qpc_entry_sz      = HNS_ROCE_V2_QPC_ENTRY_SZ;
+       caps->qpc_sz            = HNS_ROCE_V2_QPC_SZ;
        caps->irrl_entry_sz     = HNS_ROCE_V2_IRRL_ENTRY_SZ;
        caps->trrl_entry_sz     = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
        caps->cqc_entry_sz      = HNS_ROCE_V2_CQC_ENTRY_SZ;
@@ -1690,7 +1814,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
        caps->mtpt_entry_sz     = HNS_ROCE_V2_MTPT_ENTRY_SZ;
        caps->mtt_entry_sz      = HNS_ROCE_V2_MTT_ENTRY_SZ;
        caps->idx_entry_sz      = HNS_ROCE_V2_IDX_ENTRY_SZ;
-       caps->cq_entry_sz       = HNS_ROCE_V2_CQE_ENTRY_SIZE;
+       caps->cqe_sz            = HNS_ROCE_V2_CQE_SIZE;
        caps->page_size_cap     = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
        caps->reserved_lkey     = 0;
        caps->reserved_pds      = 0;
@@ -1739,6 +1863,8 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
        caps->gid_table_len[0]  = HNS_ROCE_V2_GID_INDEX_NUM;
        caps->ceqe_depth        = HNS_ROCE_V2_COMP_EQE_NUM;
        caps->aeqe_depth        = HNS_ROCE_V2_ASYNC_EQE_NUM;
+       caps->aeqe_size         = HNS_ROCE_AEQE_SIZE;
+       caps->ceqe_size         = HNS_ROCE_CEQE_SIZE;
        caps->local_ca_ack_delay = 0;
        caps->max_mtu = IB_MTU_4096;
 
@@ -1760,19 +1886,26 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
        caps->cqc_timer_buf_pg_sz = 0;
        caps->cqc_timer_hop_num   = HNS_ROCE_HOP_NUM_0;
 
-       caps->sccc_entry_sz       = HNS_ROCE_V2_SCCC_ENTRY_SZ;
+       caps->sccc_sz = HNS_ROCE_V2_SCCC_SZ;
        caps->sccc_ba_pg_sz       = 0;
        caps->sccc_buf_pg_sz      = 0;
        caps->sccc_hop_num        = HNS_ROCE_SCCC_HOP_NUM;
+
+       if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+               caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE;
+               caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
+               caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
+               caps->qpc_sz = HNS_ROCE_V3_QPC_SZ;
+       }
 }
 
 static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num,
                       int *buf_page_size, int *bt_page_size, u32 hem_type)
 {
        u64 obj_per_chunk;
-       int bt_chunk_size = 1 << PAGE_SHIFT;
-       int buf_chunk_size = 1 << PAGE_SHIFT;
-       int obj_per_chunk_default = buf_chunk_size / obj_size;
+       u64 bt_chunk_size = PAGE_SIZE;
+       u64 buf_chunk_size = PAGE_SIZE;
+       u64 obj_per_chunk_default = buf_chunk_size / obj_size;
 
        *buf_page_size = 0;
        *bt_page_size = 0;
@@ -1855,7 +1988,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
        caps->max_sq_desc_sz         = resp_a->max_sq_desc_sz;
        caps->max_rq_desc_sz         = resp_a->max_rq_desc_sz;
        caps->max_srq_desc_sz        = resp_a->max_srq_desc_sz;
-       caps->cq_entry_sz            = resp_a->cq_entry_sz;
+       caps->cqe_sz                 = HNS_ROCE_V2_CQE_SIZE;
 
        caps->mtpt_entry_sz          = resp_b->mtpt_entry_sz;
        caps->irrl_entry_sz          = resp_b->irrl_entry_sz;
@@ -1863,9 +1996,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
        caps->cqc_entry_sz           = resp_b->cqc_entry_sz;
        caps->srqc_entry_sz          = resp_b->srqc_entry_sz;
        caps->idx_entry_sz           = resp_b->idx_entry_sz;
-       caps->sccc_entry_sz          = resp_b->scc_ctx_entry_sz;
+       caps->sccc_sz                = resp_b->sccc_sz;
        caps->max_mtu                = resp_b->max_mtu;
-       caps->qpc_entry_sz           = le16_to_cpu(resp_b->qpc_entry_sz);
+       caps->qpc_sz                 = HNS_ROCE_V2_QPC_SZ;
        caps->min_cqes               = resp_b->min_cqes;
        caps->min_wqes               = resp_b->min_wqes;
        caps->page_size_cap          = le32_to_cpu(resp_b->page_size_cap);
@@ -1958,6 +2091,8 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
        caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
        caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
        caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
+       caps->ceqe_size = HNS_ROCE_CEQE_SIZE;
+       caps->aeqe_size = HNS_ROCE_AEQE_SIZE;
        caps->mtt_ba_pg_sz = 0;
        caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
        caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
@@ -1981,7 +2116,15 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
                                          V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M,
                                          V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S);
 
-       calc_pg_sz(caps->num_qps, caps->qpc_entry_sz, caps->qpc_hop_num,
+       if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+               caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
+               caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE;
+               caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
+               caps->qpc_sz = HNS_ROCE_V3_QPC_SZ;
+               caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ;
+       }
+
+       calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num,
                   caps->qpc_bt_num, &caps->qpc_buf_pg_sz, &caps->qpc_ba_pg_sz,
                   HEM_TYPE_QPC);
        calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num,
@@ -1998,7 +2141,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
        caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
        caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
 
-       calc_pg_sz(caps->num_qps, caps->sccc_entry_sz,
+       calc_pg_sz(caps->num_qps, caps->sccc_sz,
                   caps->sccc_hop_num, caps->sccc_bt_num,
                   &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz,
                   HEM_TYPE_SCCC);
@@ -2018,6 +2161,56 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
        return 0;
 }
 
+static int hns_roce_config_qpc_size(struct hns_roce_dev *hr_dev)
+{
+       struct hns_roce_cmq_desc desc;
+       struct hns_roce_cfg_entry_size *cfg_size =
+                                 (struct hns_roce_cfg_entry_size *)desc.data;
+
+       hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE,
+                                     false);
+
+       cfg_size->type = cpu_to_le32(HNS_ROCE_CFG_QPC_SIZE);
+       cfg_size->size = cpu_to_le32(hr_dev->caps.qpc_sz);
+
+       return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int hns_roce_config_sccc_size(struct hns_roce_dev *hr_dev)
+{
+       struct hns_roce_cmq_desc desc;
+       struct hns_roce_cfg_entry_size *cfg_size =
+                                 (struct hns_roce_cfg_entry_size *)desc.data;
+
+       hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE,
+                                     false);
+
+       cfg_size->type = cpu_to_le32(HNS_ROCE_CFG_SCCC_SIZE);
+       cfg_size->size = cpu_to_le32(hr_dev->caps.sccc_sz);
+
+       return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int hns_roce_config_entry_size(struct hns_roce_dev *hr_dev)
+{
+       int ret;
+
+       if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09)
+               return 0;
+
+       ret = hns_roce_config_qpc_size(hr_dev);
+       if (ret) {
+               dev_err(hr_dev->dev, "failed to cfg qpc sz, ret = %d.\n", ret);
+               return ret;
+       }
+
+       ret = hns_roce_config_sccc_size(hr_dev);
+       if (ret)
+               dev_err(hr_dev->dev, "failed to cfg sccc sz, ret = %d.\n", ret);
+
+       return ret;
+}
+
 static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 {
        struct hns_roce_caps *caps = &hr_dev->caps;
@@ -2090,9 +2283,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
        }
 
        ret = hns_roce_v2_set_bt(hr_dev);
-       if (ret)
-               dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
-                       ret);
+       if (ret) {
+               dev_err(hr_dev->dev,
+                       "Configure bt attribute fail, ret = %d.\n", ret);
+               return ret;
+       }
+
+       /* Configure the size of QPC, SCCC, etc. */
+       ret = hns_roce_config_entry_size(hr_dev);
 
        return ret;
 }
@@ -2757,8 +2955,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
 
 static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
 {
-       return hns_roce_buf_offset(hr_cq->mtr.kmem,
-                                  n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
+       return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size);
 }
 
 static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n)
@@ -2858,6 +3055,10 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
        roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M,
                       V2_CQC_BYTE_8_CQN_S, hr_cq->cqn);
 
+       roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQE_SIZE_M,
+                      V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size ==
+                      HNS_ROCE_V3_CQE_SIZE ? 1 : 0);
+
        cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
 
        roce_set_field(cq_context->byte_16_hop_addr,
@@ -3025,7 +3226,8 @@ out:
 }
 
 static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
-                          struct hns_roce_v2_cqe *cqe, struct ib_wc *wc)
+                          struct hns_roce_cq *cq, struct hns_roce_v2_cqe *cqe,
+                          struct ib_wc *wc)
 {
        static const struct {
                u32 cqe_status;
@@ -3066,7 +3268,7 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
 
        ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status);
        print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe,
-                      sizeof(*cqe), false);
+                      cq->cqe_size, false);
 
        /*
         * For hns ROCEE, GENERAL_ERR is an error type that is not defined in
@@ -3163,7 +3365,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
                ++wq->tail;
        }
 
-       get_cqe_status(hr_dev, *cur_qp, cqe, wc);
+       get_cqe_status(hr_dev, *cur_qp, hr_cq, cqe, wc);
        if (unlikely(wc->status != IB_WC_SUCCESS))
                return 0;
 
@@ -3514,16 +3716,21 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
 
 static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev,
                                 struct hns_roce_v2_qp_context *context,
+                                struct hns_roce_v2_qp_context *qpc_mask,
                                 struct hns_roce_qp *hr_qp)
 {
        struct hns_roce_cmd_mailbox *mailbox;
+       int qpc_size;
        int ret;
 
        mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
        if (IS_ERR(mailbox))
                return PTR_ERR(mailbox);
 
-       memcpy(mailbox->buf, context, sizeof(*context) * 2);
+       /* The qpc size of HIP08 is only 256B, which is half of HIP09 */
+       qpc_size = hr_dev->caps.qpc_sz;
+       memcpy(mailbox->buf, context, qpc_size);
+       memcpy(mailbox->buf + qpc_size, qpc_mask, qpc_size);
 
        ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
                                HNS_ROCE_CMD_MODIFY_QPC,
@@ -3641,9 +3848,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
                             V2_QPC_BYTE_76_SRQ_EN_S, 1);
        }
 
-       roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
-                      V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 4);
-
        roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
 
        hr_qp->access_flags = attr->qp_access_flags;
@@ -3954,6 +4158,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
        dma_addr_t trrl_ba;
        dma_addr_t irrl_ba;
        enum ib_mtu mtu;
+       u8 lp_pktn_ini;
        u8 port_num;
        u64 *mtts;
        u8 *dmac;
@@ -4052,6 +4257,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
                       V2_QPC_BYTE_52_DMAC_S, 0);
 
        mtu = get_mtu(ibqp, attr);
+       hr_qp->path_mtu = mtu;
 
        if (attr_mask & IB_QP_PATH_MTU) {
                roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
@@ -4061,13 +4267,21 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
        }
 
 #define MAX_LP_MSG_LEN 65536
-       /* MTU*(2^LP_PKTN_INI) shouldn't be bigger than 64kb */
+       /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */
+       lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu));
+
        roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
-                      V2_QPC_BYTE_56_LP_PKTN_INI_S,
-                      ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu)));
+                      V2_QPC_BYTE_56_LP_PKTN_INI_S, lp_pktn_ini);
        roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
                       V2_QPC_BYTE_56_LP_PKTN_INI_S, 0);
 
+       /* ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI */
+       roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
+                      V2_QPC_BYTE_172_ACK_REQ_FREQ_S, lp_pktn_ini);
+       roce_set_field(qpc_mask->byte_172_sq_psn,
+                      V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
+                      V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 0);
+
        roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
                     V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0);
        roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M,
@@ -4164,6 +4378,14 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
        return 0;
 }
 
+static inline u16 get_udp_sport(u32 fl, u32 lqpn, u32 rqpn)
+{
+       if (!fl)
+               fl = rdma_calc_flow_label(lqpn, rqpn);
+
+       return rdma_flow_label_to_udp_sport(fl);
+}
+
 static int hns_roce_v2_set_path(struct ib_qp *ibqp,
                                const struct ib_qp_attr *attr,
                                int attr_mask,
@@ -4227,7 +4449,8 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
 
        roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M,
                       V2_QPC_BYTE_52_UDPSPN_S,
-                      is_udp ? 0x12b7 : 0);
+                      is_udp ? get_udp_sport(grh->flow_label, ibqp->qp_num,
+                                             attr->dest_qp_num) : 0);
 
        roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M,
                       V2_QPC_BYTE_52_UDPSPN_S, 0);
@@ -4259,11 +4482,19 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
                       V2_QPC_BYTE_28_FL_S, 0);
        memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
        memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
+
+       hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+       if (unlikely(hr_qp->sl > MAX_SERVICE_LEVEL)) {
+               ibdev_err(ibdev,
+                         "failed to fill QPC, sl (%d) shouldn't be larger than %d.\n",
+                         hr_qp->sl, MAX_SERVICE_LEVEL);
+               return -EINVAL;
+       }
+
        roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
-                      V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr));
+                      V2_QPC_BYTE_28_SL_S, hr_qp->sl);
        roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
                       V2_QPC_BYTE_28_SL_S, 0);
-       hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
 
        return 0;
 }
@@ -4309,7 +4540,7 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
        }
 
        if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
-               memset(qpc_mask, 0, sizeof(*qpc_mask));
+               memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
                modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
                                        qpc_mask);
        } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
@@ -4532,8 +4763,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
         * we should set all bits of the relevant fields in context mask to
         * 0 at the same time, else set them to 0x1.
         */
-       memset(context, 0, sizeof(*context));
-       memset(qpc_mask, 0xff, sizeof(*qpc_mask));
+       memset(context, 0, hr_dev->caps.qpc_sz);
+       memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz);
+
        ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state,
                                         new_state, context, qpc_mask);
        if (ret)
@@ -4583,7 +4815,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
                       V2_QPC_BYTE_60_QP_ST_S, 0);
 
        /* SW pass context to HW */
-       ret = hns_roce_v2_qp_modify(hr_dev, ctx, hr_qp);
+       ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp);
        if (ret) {
                ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret);
                goto out;
@@ -4646,7 +4878,7 @@ static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev,
        if (ret)
                goto out;
 
-       memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
+       memcpy(hr_context, mailbox->buf, hr_dev->caps.qpc_sz);
 
 out:
        hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -4759,7 +4991,9 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
        qp_attr->retry_cnt = roce_get_field(context.byte_212_lsn,
                                            V2_QPC_BYTE_212_RETRY_CNT_M,
                                            V2_QPC_BYTE_212_RETRY_CNT_S);
-       qp_attr->rnr_retry = le32_to_cpu(context.rq_rnr_timer);
+       qp_attr->rnr_retry = roce_get_field(context.byte_244_rnr_rxack,
+                                           V2_QPC_BYTE_244_RNR_CNT_M,
+                                           V2_QPC_BYTE_244_RNR_CNT_S);
 
 done:
        qp_attr->cur_qp_state = qp_attr->qp_state;
@@ -4775,6 +5009,7 @@ done:
        }
 
        qp_init_attr->cap = qp_attr->cap;
+       qp_init_attr->sq_sig_type = hr_qp->sq_signal_bits;
 
 out:
        mutex_unlock(&hr_qp->mutex);
@@ -5004,6 +5239,10 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
        struct hns_roce_cmd_mailbox *mailbox;
        int ret;
 
+       /* Resizing SRQs is not supported yet */
+       if (srq_attr_mask & IB_SRQ_MAX_WR)
+               return -EINVAL;
+
        if (srq_attr_mask & IB_SRQ_LIMIT) {
                if (srq_attr->srq_limit >= srq->wqe_cnt)
                        return -EINVAL;
@@ -5233,7 +5472,7 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
 
        aeqe = hns_roce_buf_offset(eq->mtr.kmem,
                                   (eq->cons_index & (eq->entries - 1)) *
-                                  HNS_ROCE_AEQ_ENTRY_SIZE);
+                                  eq->eqe_size);
 
        return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
                !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
@@ -5333,7 +5572,8 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
 
        ceqe = hns_roce_buf_offset(eq->mtr.kmem,
                                   (eq->cons_index & (eq->entries - 1)) *
-                                  HNS_ROCE_CEQ_ENTRY_SIZE);
+                                  eq->eqe_size);
+
        return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
                (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
 }
@@ -5374,7 +5614,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
 {
        struct hns_roce_eq *eq = eq_ptr;
        struct hns_roce_dev *hr_dev = eq->hr_dev;
-       int int_work = 0;
+       int int_work;
 
        if (eq->type_flag == HNS_ROCE_CEQ)
                /* Completion event interrupt */
@@ -5609,14 +5849,16 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
        roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M,
                       HNS_ROCE_EQC_CONS_INDX_S, HNS_ROCE_EQ_INIT_CONS_IDX);
 
-       /* set nex_eqe_ba[43:12] */
-       roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M,
+       roce_set_field(eqc->byte_40, HNS_ROCE_EQC_NXT_EQE_BA_L_M,
                       HNS_ROCE_EQC_NXT_EQE_BA_L_S, eqe_ba[1] >> 12);
 
-       /* set nex_eqe_ba[63:44] */
-       roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M,
+       roce_set_field(eqc->byte_44, HNS_ROCE_EQC_NXT_EQE_BA_H_M,
                       HNS_ROCE_EQC_NXT_EQE_BA_H_S, eqe_ba[1] >> 44);
 
+       roce_set_field(eqc->byte_44, HNS_ROCE_EQC_EQE_SIZE_M,
+                      HNS_ROCE_EQC_EQE_SIZE_S,
+                      eq->eqe_size == HNS_ROCE_V3_EQE_SIZE ? 1 : 0);
+
        return 0;
 }
 
@@ -5807,7 +6049,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
                        eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
                        eq->type_flag = HNS_ROCE_CEQ;
                        eq->entries = hr_dev->caps.ceqe_depth;
-                       eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
+                       eq->eqe_size = hr_dev->caps.ceqe_size;
                        eq->irq = hr_dev->irq[i + other_num + aeq_num];
                        eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
                        eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
@@ -5816,7 +6058,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
                        eq_cmd = HNS_ROCE_CMD_CREATE_AEQC;
                        eq->type_flag = HNS_ROCE_AEQ;
                        eq->entries = hr_dev->caps.aeqe_depth;
-                       eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
+                       eq->eqe_size = hr_dev->caps.aeqe_size;
                        eq->irq = hr_dev->irq[i - comp_num + other_num];
                        eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
                        eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
index ac29be4..29c9dd4 100644 (file)
@@ -60,6 +60,7 @@
 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM             64
 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM         0x200000
 #define HNS_ROCE_V2_MAX_SQ_INLINE              0x20
+#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ          32
 #define HNS_ROCE_V2_UAR_NUM                    256
 #define HNS_ROCE_V2_PHY_UAR_NUM                        1
 #define HNS_ROCE_V2_MAX_IRQ_NUM                        65
@@ -77,7 +78,6 @@
 #define HNS_ROCE_V2_MAX_SQ_DESC_SZ             64
 #define HNS_ROCE_V2_MAX_RQ_DESC_SZ             16
 #define HNS_ROCE_V2_MAX_SRQ_DESC_SZ            64
-#define HNS_ROCE_V2_QPC_ENTRY_SZ               256
 #define HNS_ROCE_V2_IRRL_ENTRY_SZ              64
 #define HNS_ROCE_V2_TRRL_ENTRY_SZ              48
 #define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ   100
 #define HNS_ROCE_V2_MTPT_ENTRY_SZ              64
 #define HNS_ROCE_V2_MTT_ENTRY_SZ               64
 #define HNS_ROCE_V2_IDX_ENTRY_SZ               4
-#define HNS_ROCE_V2_CQE_ENTRY_SIZE             32
-#define HNS_ROCE_V2_SCCC_ENTRY_SZ              32
+
+#define HNS_ROCE_V2_SCCC_SZ                    32
+#define HNS_ROCE_V3_SCCC_SZ                    64
+
 #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ         PAGE_SIZE
 #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ         PAGE_SIZE
 #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED                0xFFFFF000
@@ -229,6 +231,7 @@ enum hns_roce_opcode_type {
        HNS_ROCE_OPC_CFG_TMOUT_LLM                      = 0x8404,
        HNS_ROCE_OPC_QUERY_PF_TIMER_RES                 = 0x8406,
        HNS_ROCE_OPC_QUERY_PF_CAPS_NUM                  = 0x8408,
+       HNS_ROCE_OPC_CFG_ENTRY_SIZE                     = 0x8409,
        HNS_ROCE_OPC_CFG_SGID_TB                        = 0x8500,
        HNS_ROCE_OPC_CFG_SMAC_TB                        = 0x8501,
        HNS_ROCE_OPC_POST_MB                            = 0x8504,
@@ -309,6 +312,9 @@ struct hns_roce_v2_cq_context {
 #define        V2_CQC_BYTE_8_CQN_S 0
 #define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0)
 
+#define V2_CQC_BYTE_8_CQE_SIZE_S 27
+#define V2_CQC_BYTE_8_CQE_SIZE_M GENMASK(28, 27)
+
 #define        V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S 0
 #define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M GENMASK(19, 0)
 
@@ -512,6 +518,7 @@ struct hns_roce_v2_qp_context {
        __le32  byte_248_ack_psn;
        __le32  byte_252_err_txcqn;
        __le32  byte_256_sqflush_rqcqe;
+       __le32  ext[64];
 };
 
 #define        V2_QPC_BYTE_4_TST_S 0
@@ -896,6 +903,7 @@ struct hns_roce_v2_cqe {
        u8      smac[4];
        __le32  byte_28;
        __le32  byte_32;
+       __le32  rsv[8];
 };
 
 #define        V2_CQE_BYTE_4_OPCODE_S 0
@@ -1187,6 +1195,8 @@ struct hns_roce_v2_rc_send_wqe {
 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
 
+#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31
+
 struct hns_roce_wqe_frmr_seg {
        __le32  pbl_size;
        __le32  mode_buf_pg_sz;
@@ -1537,6 +1547,18 @@ struct hns_roce_cfg_sgid_tb {
        __le32  vf_sgid_h;
        __le32  vf_sgid_type_rsv;
 };
+
+enum {
+       HNS_ROCE_CFG_QPC_SIZE = BIT(0),
+       HNS_ROCE_CFG_SCCC_SIZE = BIT(1),
+};
+
+struct hns_roce_cfg_entry_size {
+       __le32  type;
+       __le32  rsv[4];
+       __le32  size;
+};
+
 #define CFG_SGID_TB_TABLE_IDX_S 0
 #define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0)
 
@@ -1571,7 +1593,7 @@ struct hns_roce_query_pf_caps_a {
        u8 max_sq_desc_sz;
        u8 max_rq_desc_sz;
        u8 max_srq_desc_sz;
-       u8 cq_entry_sz;
+       u8 cqe_sz;
 };
 
 struct hns_roce_query_pf_caps_b {
@@ -1581,9 +1603,9 @@ struct hns_roce_query_pf_caps_b {
        u8 cqc_entry_sz;
        u8 srqc_entry_sz;
        u8 idx_entry_sz;
-       u8 scc_ctx_entry_sz;
+       u8 sccc_sz;
        u8 max_mtu;
-       __le16 qpc_entry_sz;
+       __le16 qpc_sz;
        __le16 qpc_timer_entry_sz;
        __le16 cqc_timer_entry_sz;
        u8 min_cqes;
@@ -1777,8 +1799,8 @@ struct hns_roce_eq_context {
        __le32  byte_28;
        __le32  byte_32;
        __le32  byte_36;
-       __le32  nxt_eqe_ba0;
-       __le32  nxt_eqe_ba1;
+       __le32  byte_40;
+       __le32  byte_44;
        __le32  rsv[5];
 };
 
@@ -1920,6 +1942,9 @@ struct hns_roce_eq_context {
 #define HNS_ROCE_EQC_NXT_EQE_BA_H_S 0
 #define HNS_ROCE_EQC_NXT_EQE_BA_H_M GENMASK(19, 0)
 
+#define HNS_ROCE_EQC_EQE_SIZE_S 20
+#define HNS_ROCE_EQC_EQE_SIZE_M GENMASK(21, 20)
+
 #define HNS_ROCE_V2_CEQE_COMP_CQN_S 0
 #define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0)
 
@@ -1941,6 +1966,8 @@ struct hns_roce_eq_context {
 #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
 #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
 
+#define MAX_SERVICE_LEVEL 0x7
+
 struct hns_roce_wqe_atomic_seg {
        __le64          fetchadd_swap_data;
        __le64          cmp_data;
index 5907cfd..467c829 100644 (file)
@@ -141,8 +141,8 @@ static int hns_roce_netdev_event(struct notifier_block *self,
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct hns_roce_ib_iboe *iboe = NULL;
        struct hns_roce_dev *hr_dev = NULL;
-       u8 port = 0;
-       int ret = 0;
+       int ret;
+       u8 port;
 
        hr_dev = container_of(self, struct hns_roce_dev, iboe.nb);
        iboe = &hr_dev->iboe;
@@ -323,6 +323,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
                mutex_init(&context->page_mutex);
        }
 
+       resp.cqe_size = hr_dev->caps.cqe_sz;
+
        ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
        if (ret)
                goto error_fail_copy_to_udata;
@@ -454,6 +456,8 @@ static const struct ib_device_ops hns_roce_dev_mr_ops = {
 static const struct ib_device_ops hns_roce_dev_mw_ops = {
        .alloc_mw = hns_roce_alloc_mw,
        .dealloc_mw = hns_roce_dealloc_mw,
+
+       INIT_RDMA_OBJ_SIZE(ib_mw, hns_roce_mw, ibmw),
 };
 
 static const struct ib_device_ops hns_roce_dev_frmr_ops = {
@@ -587,7 +591,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
        }
 
        ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table,
-                                     HEM_TYPE_QPC, hr_dev->caps.qpc_entry_sz,
+                                     HEM_TYPE_QPC, hr_dev->caps.qpc_sz,
                                      hr_dev->caps.num_qps, 1);
        if (ret) {
                dev_err(dev, "Failed to init QP context memory, aborting.\n");
@@ -638,11 +642,11 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
                }
        }
 
-       if (hr_dev->caps.sccc_entry_sz) {
+       if (hr_dev->caps.sccc_sz) {
                ret = hns_roce_init_hem_table(hr_dev,
                                              &hr_dev->qp_table.sccc_table,
                                              HEM_TYPE_SCCC,
-                                             hr_dev->caps.sccc_entry_sz,
+                                             hr_dev->caps.sccc_sz,
                                              hr_dev->caps.num_qps, 1);
                if (ret) {
                        dev_err(dev,
@@ -682,7 +686,7 @@ err_unmap_qpc_timer:
                hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table);
 
 err_unmap_ctx:
-       if (hr_dev->caps.sccc_entry_sz)
+       if (hr_dev->caps.sccc_sz)
                hns_roce_cleanup_hem_table(hr_dev,
                                           &hr_dev->qp_table.sccc_table);
 err_unmap_srq:
index e5df388..7f81a69 100644 (file)
@@ -589,28 +589,22 @@ err_table:
        return ret;
 }
 
-struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
-                               struct ib_udata *udata)
+int hns_roce_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
 {
-       struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
-       struct hns_roce_mw *mw;
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
+       struct hns_roce_mw *mw = to_hr_mw(ibmw);
        unsigned long index = 0;
        int ret;
 
-       mw = kmalloc(sizeof(*mw), GFP_KERNEL);
-       if (!mw)
-               return ERR_PTR(-ENOMEM);
-
        /* Allocate a key for mw from bitmap */
        ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
        if (ret)
-               goto err_bitmap;
+               return ret;
 
        mw->rkey = hw_index_to_key(index);
 
-       mw->ibmw.rkey = mw->rkey;
-       mw->ibmw.type = type;
-       mw->pdn = to_hr_pd(ib_pd)->pdn;
+       ibmw->rkey = mw->rkey;
+       mw->pdn = to_hr_pd(ibmw->pd)->pdn;
        mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
        mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
        mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
@@ -619,15 +613,11 @@ struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
        if (ret)
                goto err_mw;
 
-       return &mw->ibmw;
+       return 0;
 
 err_mw:
        hns_roce_mw_free(hr_dev, mw);
-
-err_bitmap:
-       kfree(mw);
-
-       return ERR_PTR(ret);
+       return ret;
 }
 
 int hns_roce_dealloc_mw(struct ib_mw *ibmw)
@@ -636,8 +626,6 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw)
        struct hns_roce_mw *mw = to_hr_mw(ibmw);
 
        hns_roce_mw_free(hr_dev, mw);
-       kfree(mw);
-
        return 0;
 }
 
@@ -707,19 +695,6 @@ static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr)
        return size;
 }
 
-static inline int mtr_umem_page_count(struct ib_umem *umem,
-                                     unsigned int page_shift)
-{
-       int count = ib_umem_page_count(umem);
-
-       if (page_shift >= PAGE_SHIFT)
-               count >>= page_shift - PAGE_SHIFT;
-       else
-               count <<= PAGE_SHIFT - page_shift;
-
-       return count;
-}
-
 static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size,
                                          unsigned int page_shift)
 {
@@ -767,13 +742,11 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
                          struct ib_udata *udata, unsigned long user_addr)
 {
        struct ib_device *ibdev = &hr_dev->ib_dev;
-       unsigned int max_pg_shift = buf_attr->page_shift;
-       unsigned int best_pg_shift = 0;
+       unsigned int best_pg_shift;
        int all_pg_count = 0;
        size_t direct_size;
        size_t total_size;
-       unsigned long tmp;
-       int ret = 0;
+       int ret;
 
        total_size = mtr_bufs_size(buf_attr);
        if (total_size < 1) {
@@ -782,6 +755,9 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
        }
 
        if (udata) {
+               unsigned long pgsz_bitmap;
+               unsigned long page_size;
+
                mtr->kmem = NULL;
                mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
                                        buf_attr->user_access);
@@ -790,15 +766,17 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
                                  PTR_ERR(mtr->umem));
                        return -ENOMEM;
                }
-               if (buf_attr->fixed_page) {
-                       best_pg_shift = max_pg_shift;
-               } else {
-                       tmp = GENMASK(max_pg_shift, 0);
-                       ret = ib_umem_find_best_pgsz(mtr->umem, tmp, user_addr);
-                       best_pg_shift = (ret <= PAGE_SIZE) ?
-                                       PAGE_SHIFT : ilog2(ret);
-               }
-               all_pg_count = mtr_umem_page_count(mtr->umem, best_pg_shift);
+               if (buf_attr->fixed_page)
+                       pgsz_bitmap = 1 << buf_attr->page_shift;
+               else
+                       pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT);
+
+               page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap,
+                                                  user_addr);
+               if (!page_size)
+                       return -EINVAL;
+               best_pg_shift = order_base_2(page_size);
+               all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size);
                ret = 0;
        } else {
                mtr->umem = NULL;
@@ -808,16 +786,15 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
                        return -ENOMEM;
                }
                direct_size = mtr_kmem_direct_size(is_direct, total_size,
-                                                  max_pg_shift);
+                                                  buf_attr->page_shift);
                ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size,
-                                        mtr->kmem, max_pg_shift);
+                                        mtr->kmem, buf_attr->page_shift);
                if (ret) {
                        ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret);
                        goto err_alloc_mem;
-               } else {
-                       best_pg_shift = max_pg_shift;
-                       all_pg_count = mtr->kmem->npages;
                }
+               best_pg_shift = buf_attr->page_shift;
+               all_pg_count = mtr->kmem->npages;
        }
 
        /* must bigger than minimum hardware page shift */
@@ -967,7 +944,7 @@ static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
                            unsigned int *buf_page_shift)
 {
        struct hns_roce_buf_region *r;
-       unsigned int page_shift = 0;
+       unsigned int page_shift;
        int page_cnt = 0;
        size_t buf_size;
        int region_cnt;
index b10c50b..98f6949 100644 (file)
@@ -82,9 +82,10 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
        return 0;
 }
 
-void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
 {
        hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn);
+       return 0;
 }
 
 int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
index c063c45..6c081dd 100644 (file)
@@ -41,8 +41,6 @@
 #include "hns_roce_hem.h"
 #include <rdma/hns-abi.h>
 
-#define SQP_NUM                                (2 * HNS_ROCE_MAX_PORTS)
-
 static void flush_work_handle(struct work_struct *work)
 {
        struct hns_roce_work *flush_work = container_of(work,
@@ -288,7 +286,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
                }
        }
 
-       if (hr_dev->caps.sccc_entry_sz) {
+       if (hr_dev->caps.sccc_sz) {
                /* Alloc memory for SCC CTX */
                ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
                                         hr_qp->qpn);
@@ -551,10 +549,9 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
        int ret;
 
        if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes ||
-           cap->max_send_sge > hr_dev->caps.max_sq_sg ||
-           cap->max_inline_data > hr_dev->caps.max_sq_inline) {
+           cap->max_send_sge > hr_dev->caps.max_sq_sg) {
                ibdev_err(ibdev,
-                         "failed to check SQ WR, SGE or inline num, ret = %d.\n",
+                         "failed to check SQ WR or SGE num, ret = %d.\n",
                          -EINVAL);
                return -EINVAL;
        }
@@ -577,9 +574,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
        cap->max_send_wr = cnt;
        cap->max_send_sge = hr_qp->sq.max_gs;
 
-       /* We don't support inline sends for kernel QPs (yet) */
-       cap->max_inline_data = 0;
-
        return 0;
 }
 
@@ -847,6 +841,11 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 
        hr_qp->ibqp.qp_type = init_attr->qp_type;
 
+       if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline)
+               init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline;
+
+       hr_qp->max_inline_data = init_attr->cap.max_inline_data;
+
        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
                hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
        else
@@ -1014,53 +1013,32 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
        int ret;
 
        switch (init_attr->qp_type) {
-       case IB_QPT_RC: {
-               hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
-               if (!hr_qp)
-                       return ERR_PTR(-ENOMEM);
-
-               ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
-                                               hr_qp);
-               if (ret) {
-                       ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n",
-                                 hr_qp->qpn, ret);
-                       kfree(hr_qp);
-                       return ERR_PTR(ret);
-               }
-
+       case IB_QPT_RC:
+       case IB_QPT_GSI:
                break;
+       default:
+               ibdev_err(ibdev, "not support QP type %d\n",
+                         init_attr->qp_type);
+               return ERR_PTR(-EOPNOTSUPP);
        }
-       case IB_QPT_GSI: {
-               /* Userspace is not allowed to create special QPs: */
-               if (udata) {
-                       ibdev_err(ibdev, "not support usr space GSI\n");
-                       return ERR_PTR(-EINVAL);
-               }
 
-               hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
-               if (!hr_qp)
-                       return ERR_PTR(-ENOMEM);
+       hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
+       if (!hr_qp)
+               return ERR_PTR(-ENOMEM);
 
+       if (init_attr->qp_type == IB_QPT_GSI) {
                hr_qp->port = init_attr->port_num - 1;
                hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
-
-               ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
-                                               hr_qp);
-               if (ret) {
-                       ibdev_err(ibdev, "Create GSI QP failed!\n");
-                       kfree(hr_qp);
-                       return ERR_PTR(ret);
-               }
-
-               break;
-       }
-       default:{
-               ibdev_err(ibdev, "not support QP type %d\n",
-                         init_attr->qp_type);
-               return ERR_PTR(-EOPNOTSUPP);
-       }
        }
 
+       ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp);
+       if (ret) {
+               ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n",
+                         init_attr->qp_type, ret);
+               ibdev_err(ibdev, "Create GSI QP failed!\n");
+               kfree(hr_qp);
+               return ERR_PTR(ret);
+       }
        return &hr_qp->ibqp;
 }
 
@@ -1161,8 +1139,10 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
        mutex_lock(&hr_qp->mutex);
 
-       cur_state = attr_mask & IB_QP_CUR_STATE ?
-                   attr->cur_qp_state : (enum ib_qp_state)hr_qp->state;
+       if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state)
+               goto out;
+
+       cur_state = hr_qp->state;
        new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
        if (ibqp->uobject &&
index b9e2dbd..8caf74e 100644 (file)
@@ -285,7 +285,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
        struct hns_roce_srq *srq = to_hr_srq(ib_srq);
        struct ib_device *ibdev = &hr_dev->ib_dev;
        struct hns_roce_ib_create_srq ucmd = {};
-       int ret = 0;
+       int ret;
        u32 cqn;
 
        /* Check the actual SRQ wqe and SRQ sge num */
@@ -363,7 +363,7 @@ err_buf_alloc:
        return ret;
 }
 
-void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
        struct hns_roce_srq *srq = to_hr_srq(ibsrq);
@@ -372,6 +372,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
        free_srq_idx(hr_dev, srq);
        free_srq_wrid(srq);
        free_srq_buf(hr_dev, srq);
+       return 0;
 }
 
 int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
index 25747b8..832b80d 100644 (file)
@@ -409,8 +409,8 @@ static inline struct i40iw_qp *to_iwqp(struct ib_qp *ibqp)
 }
 
 /* i40iw.c */
-void i40iw_add_ref(struct ib_qp *);
-void i40iw_rem_ref(struct ib_qp *);
+void i40iw_qp_add_ref(struct ib_qp *ibqp);
+void i40iw_qp_rem_ref(struct ib_qp *ibqp);
 struct ib_qp *i40iw_get_qp(struct ib_device *, int);
 
 void i40iw_flush_wqes(struct i40iw_device *iwdev,
@@ -554,9 +554,8 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
                                          bool wait);
 void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf);
 void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp);
-void i40iw_free_qp_resources(struct i40iw_device *iwdev,
-                            struct i40iw_qp *iwqp,
-                            u32 qp_num);
+void i40iw_free_qp_resources(struct i40iw_qp *iwqp);
+
 enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
                                             struct i40iw_dma_mem *memptr,
                                             u32 size, u32 mask);
index a3b9580..3053c34 100644 (file)
@@ -2322,7 +2322,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
        iwqp = cm_node->iwqp;
        if (iwqp) {
                iwqp->cm_node = NULL;
-               i40iw_rem_ref(&iwqp->ibqp);
+               i40iw_qp_rem_ref(&iwqp->ibqp);
                cm_node->iwqp = NULL;
        } else if (cm_node->qhash_set) {
                i40iw_get_addr_info(cm_node, &nfo);
@@ -3452,7 +3452,7 @@ void i40iw_cm_disconn(struct i40iw_qp *iwqp)
                kfree(work);
                return;
        }
-       i40iw_add_ref(&iwqp->ibqp);
+       i40iw_qp_add_ref(&iwqp->ibqp);
        spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
 
        work->iwqp = iwqp;
@@ -3623,7 +3623,7 @@ static void i40iw_disconnect_worker(struct work_struct *work)
 
        kfree(dwork);
        i40iw_cm_disconn_true(iwqp);
-       i40iw_rem_ref(&iwqp->ibqp);
+       i40iw_qp_rem_ref(&iwqp->ibqp);
 }
 
 /**
@@ -3745,7 +3745,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        cm_node->lsmm_size = accept.size + conn_param->private_data_len;
        i40iw_cm_init_tsa_conn(iwqp, cm_node);
        cm_id->add_ref(cm_id);
-       i40iw_add_ref(&iwqp->ibqp);
+       i40iw_qp_add_ref(&iwqp->ibqp);
 
        attr.qp_state = IB_QPS_RTS;
        cm_node->qhash_set = false;
@@ -3908,7 +3908,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        iwqp->cm_node = cm_node;
        cm_node->iwqp = iwqp;
        iwqp->cm_id = cm_id;
-       i40iw_add_ref(&iwqp->ibqp);
+       i40iw_qp_add_ref(&iwqp->ibqp);
 
        if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
                cm_node->state = I40IW_CM_STATE_SYN_SENT;
index e108563..56fdc16 100644 (file)
@@ -313,7 +313,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
                                            __func__, info->qp_cq_id);
                                continue;
                        }
-                       i40iw_add_ref(&iwqp->ibqp);
+                       i40iw_qp_add_ref(&iwqp->ibqp);
                        spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
                        qp = &iwqp->sc_qp;
                        spin_lock_irqsave(&iwqp->lock, flags);
@@ -426,7 +426,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
                        break;
                }
                if (info->qp)
-                       i40iw_rem_ref(&iwqp->ibqp);
+                       i40iw_qp_rem_ref(&iwqp->ibqp);
        } while (1);
 
        if (aeqcnt)
index 58a4331..2408b27 100644 (file)
@@ -192,9 +192,9 @@ static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id)
  * i40iw_dpc - tasklet for aeq and ceq 0
  * @data: iwarp device
  */
-static void i40iw_dpc(unsigned long data)
+static void i40iw_dpc(struct tasklet_struct *t)
 {
-       struct i40iw_device *iwdev = (struct i40iw_device *)data;
+       struct i40iw_device *iwdev = from_tasklet(iwdev, t, dpc_tasklet);
 
        if (iwdev->msix_shared)
                i40iw_process_ceq(iwdev, iwdev->ceqlist);
@@ -206,9 +206,9 @@ static void i40iw_dpc(unsigned long data)
  * i40iw_ceq_dpc - dpc handler for CEQ
  * @data: data points to CEQ
  */
-static void i40iw_ceq_dpc(unsigned long data)
+static void i40iw_ceq_dpc(struct tasklet_struct *t)
 {
-       struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
+       struct i40iw_ceq *iwceq = from_tasklet(iwceq, t, dpc_tasklet);
        struct i40iw_device *iwdev = iwceq->iwdev;
 
        i40iw_process_ceq(iwdev, iwceq);
@@ -689,10 +689,10 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw
        enum i40iw_status_code status;
 
        if (iwdev->msix_shared && !ceq_id) {
-               tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
+               tasklet_setup(&iwdev->dpc_tasklet, i40iw_dpc);
                status = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "AEQCEQ", iwdev);
        } else {
-               tasklet_init(&iwceq->dpc_tasklet, i40iw_ceq_dpc, (unsigned long)iwceq);
+               tasklet_setup(&iwceq->dpc_tasklet, i40iw_ceq_dpc);
                status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
        }
 
@@ -841,7 +841,7 @@ static enum i40iw_status_code i40iw_configure_aeq_vector(struct i40iw_device *iw
        u32 ret = 0;
 
        if (!iwdev->msix_shared) {
-               tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
+               tasklet_setup(&iwdev->dpc_tasklet, i40iw_dpc);
                ret = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "i40iw", iwdev);
        }
        if (ret) {
@@ -1573,7 +1573,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
        status = i40iw_save_msix_info(iwdev, ldev);
        if (status)
                return status;
-       iwdev->hw.dev_context = (void *)ldev->pcidev;
+       iwdev->hw.pcidev = ldev->pcidev;
        iwdev->hw.hw_addr = ldev->hw_addr;
        status = i40iw_allocate_dma_mem(&iwdev->hw,
                                        &iwdev->obj_mem, 8192, 4096);
index 540aab5..5f97643 100644 (file)
@@ -167,7 +167,7 @@ static enum i40iw_status_code add_sd_direct(struct i40iw_sc_dev *dev,
  */
 static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk)
 {
-       struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+       struct pci_dev *pcidev = hw->pcidev;
        int i;
 
        if (!chunk->pg_cnt)
@@ -193,7 +193,7 @@ static enum i40iw_status_code i40iw_get_vmalloc_mem(struct i40iw_hw *hw,
                                                    struct i40iw_chunk *chunk,
                                                    int pg_cnt)
 {
-       struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+       struct pci_dev *pcidev = hw->pcidev;
        struct page *page;
        u8 *addr;
        u32 size;
index 54c323c..c3babf3 100644 (file)
@@ -73,6 +73,7 @@ struct i40iw_pd_ops;
 struct i40iw_priv_qp_ops;
 struct i40iw_priv_cq_ops;
 struct i40iw_hmc_ops;
+struct pci_dev;
 
 enum i40iw_page_size {
        I40IW_PAGE_SIZE_4K,
@@ -261,7 +262,7 @@ struct i40iw_vsi_pestat {
 
 struct i40iw_hw {
        u8 __iomem *hw_addr;
-       void *dev_context;
+       struct pci_dev *pcidev;
        struct i40iw_hmc_info hmc;
 };
 
index e07fb37..644f8c6 100644 (file)
@@ -477,25 +477,6 @@ void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev)
        }
 }
 
-/**
- * i40iw_free_qp - callback after destroy cqp completes
- * @cqp_request: cqp request for destroy qp
- * @num: not used
- */
-static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num)
-{
-       struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param;
-       struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
-       struct i40iw_device *iwdev;
-       u32 qp_num = iwqp->ibqp.qp_num;
-
-       iwdev = iwqp->iwdev;
-
-       i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
-       i40iw_free_qp_resources(iwdev, iwqp, qp_num);
-       i40iw_rem_devusecount(iwdev);
-}
-
 /**
  * i40iw_wait_event - wait for completion
  * @iwdev: iwarp device
@@ -616,26 +597,23 @@ void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev)
 }
 
 /**
- * i40iw_add_ref - add refcount for qp
+ * i40iw_qp_add_ref - add refcount for qp
  * @ibqp: iqarp qp
  */
-void i40iw_add_ref(struct ib_qp *ibqp)
+void i40iw_qp_add_ref(struct ib_qp *ibqp)
 {
        struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp;
 
-       atomic_inc(&iwqp->refcount);
+       refcount_inc(&iwqp->refcount);
 }
 
 /**
- * i40iw_rem_ref - rem refcount for qp and free if 0
+ * i40iw_qp_rem_ref - rem refcount for qp and free if 0
  * @ibqp: iqarp qp
  */
-void i40iw_rem_ref(struct ib_qp *ibqp)
+void i40iw_qp_rem_ref(struct ib_qp *ibqp)
 {
        struct i40iw_qp *iwqp;
-       enum i40iw_status_code status;
-       struct i40iw_cqp_request *cqp_request;
-       struct cqp_commands_info *cqp_info;
        struct i40iw_device *iwdev;
        u32 qp_num;
        unsigned long flags;
@@ -643,7 +621,7 @@ void i40iw_rem_ref(struct ib_qp *ibqp)
        iwqp = to_iwqp(ibqp);
        iwdev = iwqp->iwdev;
        spin_lock_irqsave(&iwdev->qptable_lock, flags);
-       if (!atomic_dec_and_test(&iwqp->refcount)) {
+       if (!refcount_dec_and_test(&iwqp->refcount)) {
                spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
                return;
        }
@@ -651,25 +629,8 @@ void i40iw_rem_ref(struct ib_qp *ibqp)
        qp_num = iwqp->ibqp.qp_num;
        iwdev->qp_table[qp_num] = NULL;
        spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
-       cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
-       if (!cqp_request)
-               return;
-
-       cqp_request->callback_fcn = i40iw_free_qp;
-       cqp_request->param = (void *)&iwqp->sc_qp;
-       cqp_info = &cqp_request->info;
-       cqp_info->cqp_cmd = OP_QP_DESTROY;
-       cqp_info->post_sq = 1;
-       cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp;
-       cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
-       cqp_info->in.u.qp_destroy.remove_hash_idx = true;
-       status = i40iw_handle_cqp_op(iwdev, cqp_request);
-       if (!status)
-               return;
+       complete(&iwqp->free_qp);
 
-       i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
-       i40iw_free_qp_resources(iwdev, iwqp, qp_num);
-       i40iw_rem_devusecount(iwdev);
 }
 
 /**
@@ -751,7 +712,7 @@ enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
                                              u64 size,
                                              u32 alignment)
 {
-       struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+       struct pci_dev *pcidev = hw->pcidev;
 
        if (!mem)
                return I40IW_ERR_PARAM;
@@ -770,7 +731,7 @@ enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
  */
 void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem)
 {
-       struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+       struct pci_dev *pcidev = hw->pcidev;
 
        if (!mem || !mem->va)
                return;
@@ -936,7 +897,7 @@ static void i40iw_terminate_timeout(struct timer_list *t)
        struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp;
 
        i40iw_terminate_done(qp, 1);
-       i40iw_rem_ref(&iwqp->ibqp);
+       i40iw_qp_rem_ref(&iwqp->ibqp);
 }
 
 /**
@@ -948,7 +909,7 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp)
        struct i40iw_qp *iwqp;
 
        iwqp = (struct i40iw_qp *)qp->back_qp;
-       i40iw_add_ref(&iwqp->ibqp);
+       i40iw_qp_add_ref(&iwqp->ibqp);
        timer_setup(&iwqp->terminate_timer, i40iw_terminate_timeout, 0);
        iwqp->terminate_timer.expires = jiffies + HZ;
        add_timer(&iwqp->terminate_timer);
@@ -964,7 +925,7 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp)
 
        iwqp = (struct i40iw_qp *)qp->back_qp;
        if (del_timer(&iwqp->terminate_timer))
-               i40iw_rem_ref(&iwqp->ibqp);
+               i40iw_qp_rem_ref(&iwqp->ibqp);
 }
 
 /**
index b513393..747b4de 100644 (file)
@@ -328,12 +328,13 @@ error:
  * @ibpd: ptr of pd to be deallocated
  * @udata: user data or null for kernel object
  */
-static void i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+static int i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 {
        struct i40iw_pd *iwpd = to_iwpd(ibpd);
        struct i40iw_device *iwdev = to_iwdev(ibpd->device);
 
        i40iw_rem_pdusecount(iwpd, iwdev);
+       return 0;
 }
 
 /**
@@ -363,11 +364,11 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
  * @iwqp: qp ptr (user or kernel)
  * @qp_num: qp number assigned
  */
-void i40iw_free_qp_resources(struct i40iw_device *iwdev,
-                            struct i40iw_qp *iwqp,
-                            u32 qp_num)
+void i40iw_free_qp_resources(struct i40iw_qp *iwqp)
 {
        struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
+       struct i40iw_device *iwdev = iwqp->iwdev;
+       u32 qp_num = iwqp->ibqp.qp_num;
 
        i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
        i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
@@ -379,7 +380,7 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev,
        i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem);
        kfree(iwqp->kqp.wrid_mem);
        iwqp->kqp.wrid_mem = NULL;
-       kfree(iwqp->allocated_buffer);
+       kfree(iwqp);
 }
 
 /**
@@ -401,6 +402,10 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq)
 static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 {
        struct i40iw_qp *iwqp = to_iwqp(ibqp);
+       struct ib_qp_attr attr;
+       struct i40iw_device *iwdev = iwqp->iwdev;
+
+       memset(&attr, 0, sizeof(attr));
 
        iwqp->destroyed = 1;
 
@@ -415,7 +420,15 @@ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
                }
        }
 
-       i40iw_rem_ref(&iwqp->ibqp);
+       attr.qp_state = IB_QPS_ERR;
+       i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+       i40iw_qp_rem_ref(&iwqp->ibqp);
+       wait_for_completion(&iwqp->free_qp);
+       i40iw_cqp_qp_destroy_cmd(&iwdev->sc_dev, &iwqp->sc_qp);
+       i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
+       i40iw_free_qp_resources(iwqp);
+       i40iw_rem_devusecount(iwdev);
+
        return 0;
 }
 
@@ -524,7 +537,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
        struct i40iw_create_qp_req req;
        struct i40iw_create_qp_resp uresp;
        u32 qp_num = 0;
-       void *mem;
        enum i40iw_status_code ret;
        int err_code;
        int sq_size;
@@ -566,16 +578,15 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
        init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
        init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
 
-       mem = kzalloc(sizeof(*iwqp), GFP_KERNEL);
-       if (!mem)
+       iwqp = kzalloc(sizeof(*iwqp), GFP_KERNEL);
+       if (!iwqp)
                return ERR_PTR(-ENOMEM);
 
-       iwqp = (struct i40iw_qp *)mem;
-       iwqp->allocated_buffer = mem;
        qp = &iwqp->sc_qp;
        qp->back_qp = (void *)iwqp;
        qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
 
+       iwqp->iwdev = iwdev;
        iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info;
 
        if (i40iw_allocate_dma_mem(dev->hw,
@@ -600,7 +611,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
                goto error;
        }
 
-       iwqp->iwdev = iwdev;
        iwqp->iwpd = iwpd;
        iwqp->ibqp.qp_num = qp_num;
        qp = &iwqp->sc_qp;
@@ -714,7 +724,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
                goto error;
        }
 
-       i40iw_add_ref(&iwqp->ibqp);
+       refcount_set(&iwqp->refcount, 1);
        spin_lock_init(&iwqp->lock);
        iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
        iwdev->qp_table[qp_num] = iwqp;
@@ -736,10 +746,11 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
        }
        init_completion(&iwqp->sq_drained);
        init_completion(&iwqp->rq_drained);
+       init_completion(&iwqp->free_qp);
 
        return &iwqp->ibqp;
 error:
-       i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+       i40iw_free_qp_resources(iwqp);
        return ERR_PTR(err_code);
 }
 
@@ -1052,7 +1063,7 @@ void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
  * @ib_cq: cq pointer
  * @udata: user data or NULL for kernel object
  */
-static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 {
        struct i40iw_cq *iwcq;
        struct i40iw_device *iwdev;
@@ -1064,6 +1075,7 @@ static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
        i40iw_cq_wq_destroy(iwdev, cq);
        cq_free_resources(iwdev, iwcq);
        i40iw_rem_devusecount(iwdev);
+       return 0;
 }
 
 /**
@@ -1320,8 +1332,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
        if (iwmr->type == IW_MEMREG_TYPE_QP)
                iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl);
 
-       rdma_for_each_block(region->sg_head.sgl, &biter, region->nmap,
-                           iwmr->page_size) {
+       rdma_umem_for_each_dma_block(region, &biter, iwmr->page_size) {
                *pbl = rdma_block_iter_dma_address(&biter);
                pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx);
        }
@@ -1744,15 +1755,12 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
        struct i40iw_mr *iwmr;
        struct ib_umem *region;
        struct i40iw_mem_reg_req req;
-       u64 pbl_depth = 0;
        u32 stag = 0;
        u16 access;
-       u64 region_length;
        bool use_pbles = false;
        unsigned long flags;
        int err = -ENOSYS;
        int ret;
-       int pg_shift;
 
        if (!udata)
                return ERR_PTR(-EOPNOTSUPP);
@@ -1787,18 +1795,13 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
        if (req.reg_type == IW_MEMREG_TYPE_MEM)
                iwmr->page_size = ib_umem_find_best_pgsz(region, SZ_4K | SZ_2M,
                                                         virt);
-
-       region_length = region->length + (start & (iwmr->page_size - 1));
-       pg_shift = ffs(iwmr->page_size) - 1;
-       pbl_depth = region_length >> pg_shift;
-       pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0;
        iwmr->length = region->length;
 
        iwpbl->user_base = virt;
        palloc = &iwpbl->pble_alloc;
 
        iwmr->type = req.reg_type;
-       iwmr->page_cnt = (u32)pbl_depth;
+       iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size);
 
        switch (req.reg_type) {
        case IW_MEMREG_TYPE_QP:
@@ -2636,13 +2639,13 @@ static const struct ib_device_ops i40iw_dev_ops = {
        .get_hw_stats = i40iw_get_hw_stats,
        .get_port_immutable = i40iw_port_immutable,
        .iw_accept = i40iw_accept,
-       .iw_add_ref = i40iw_add_ref,
+       .iw_add_ref = i40iw_qp_add_ref,
        .iw_connect = i40iw_connect,
        .iw_create_listen = i40iw_create_listen,
        .iw_destroy_listen = i40iw_destroy_listen,
        .iw_get_qp = i40iw_get_qp,
        .iw_reject = i40iw_reject,
-       .iw_rem_ref = i40iw_rem_ref,
+       .iw_rem_ref = i40iw_qp_rem_ref,
        .map_mr_sg = i40iw_map_mr_sg,
        .mmap = i40iw_mmap,
        .modify_qp = i40iw_modify_qp,
@@ -2668,7 +2671,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
 {
        struct i40iw_ib_device *iwibdev;
        struct net_device *netdev = iwdev->netdev;
-       struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context;
+       struct pci_dev *pcidev = iwdev->hw.pcidev;
 
        iwibdev = ib_alloc_device(i40iw_ib_device, ibdev);
        if (!iwibdev) {
index 331bc21..bab71f3 100644 (file)
@@ -139,7 +139,7 @@ struct i40iw_qp {
        struct i40iw_qp_host_ctx_info ctx_info;
        struct i40iwarp_offload_info iwarp_info;
        void *allocated_buffer;
-       atomic_t refcount;
+       refcount_t refcount;
        struct iw_cm_id *cm_id;
        void *cm_node;
        struct ib_mr *lsmm_mr;
@@ -174,5 +174,6 @@ struct i40iw_qp {
        struct i40iw_dma_mem ietf_mem;
        struct completion sq_drained;
        struct completion rq_drained;
+       struct completion free_qp;
 };
 #endif
index 5f8f8d5..7321d6a 100644 (file)
@@ -232,8 +232,3 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
 
        return 0;
 }
-
-void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags)
-{
-       return;
-}
index b591861..4aff1c8 100644 (file)
@@ -54,11 +54,20 @@ struct id_map_entry {
        struct delayed_work timeout;
 };
 
+struct rej_tmout_entry {
+       int slave;
+       u32 rem_pv_cm_id;
+       struct delayed_work timeout;
+       struct xarray *xa_rej_tmout;
+};
+
 struct cm_generic_msg {
        struct ib_mad_hdr hdr;
 
        __be32 local_comm_id;
        __be32 remote_comm_id;
+       unsigned char unused[2];
+       __be16 rej_reason;
 };
 
 struct cm_sidr_generic_msg {
@@ -280,11 +289,15 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
        if (!sriov->is_going_down && !id->scheduled_delete) {
                id->scheduled_delete = 1;
                schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+       } else if (id->scheduled_delete) {
+               /* Adjust timeout if already scheduled */
+               mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
        }
        spin_unlock_irqrestore(&sriov->going_down_lock, flags);
        spin_unlock(&sriov->id_map_lock);
 }
 
+#define REJ_REASON(m) be16_to_cpu(((struct cm_generic_msg *)(m))->rej_reason)
 int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
                struct ib_mad *mad)
 {
@@ -293,8 +306,10 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
        int pv_cm_id = -1;
 
        if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
-                       mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
-                       mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+           mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
+           mad->mad_hdr.attr_id == CM_MRA_ATTR_ID ||
+           mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID ||
+           (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID && REJ_REASON(mad) == IB_CM_REJ_TIMEOUT)) {
                sl_cm_id = get_local_comm_id(mad);
                id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
                if (id)
@@ -314,8 +329,8 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
        }
 
        if (!id) {
-               pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
-                        slave_id, sl_cm_id);
+               pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL! attr_id: 0x%x\n",
+                        slave_id, sl_cm_id, be16_to_cpu(mad->mad_hdr.attr_id));
                return -EINVAL;
        }
 
@@ -327,11 +342,94 @@ cont:
        return 0;
 }
 
+static void rej_tmout_timeout(struct work_struct *work)
+{
+       struct delayed_work *delay = to_delayed_work(work);
+       struct rej_tmout_entry *item = container_of(delay, struct rej_tmout_entry, timeout);
+       struct rej_tmout_entry *deleted;
+
+       deleted = xa_cmpxchg(item->xa_rej_tmout, item->rem_pv_cm_id, item, NULL, 0);
+
+       if (deleted != item)
+               pr_debug("deleted(%p) != item(%p)\n", deleted, item);
+
+       kfree(item);
+}
+
+static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int slave)
+{
+       struct rej_tmout_entry *item;
+       struct rej_tmout_entry *old;
+       int ret = 0;
+
+       xa_lock(&sriov->xa_rej_tmout);
+       item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
+
+       if (item) {
+               if (xa_err(item))
+                       ret =  xa_err(item);
+               else
+                       /* If a retry, adjust delayed work */
+                       mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+               goto err_or_exists;
+       }
+       xa_unlock(&sriov->xa_rej_tmout);
+
+       item = kmalloc(sizeof(*item), GFP_KERNEL);
+       if (!item)
+               return -ENOMEM;
+
+       INIT_DELAYED_WORK(&item->timeout, rej_tmout_timeout);
+       item->slave = slave;
+       item->rem_pv_cm_id = rem_pv_cm_id;
+       item->xa_rej_tmout = &sriov->xa_rej_tmout;
+
+       old = xa_cmpxchg(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id, NULL, item, GFP_KERNEL);
+       if (old) {
+               pr_debug(
+                       "Non-null old entry (%p) or error (%d) when inserting\n",
+                       old, xa_err(old));
+               kfree(item);
+               return xa_err(old);
+       }
+
+       schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+
+       return 0;
+
+err_or_exists:
+       xa_unlock(&sriov->xa_rej_tmout);
+       return ret;
+}
+
+static int lookup_rej_tmout_slave(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id)
+{
+       struct rej_tmout_entry *item;
+       int slave;
+
+       xa_lock(&sriov->xa_rej_tmout);
+       item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
+
+       if (!item || xa_err(item)) {
+               pr_debug("Could not find slave. rem_pv_cm_id 0x%x error: %d\n",
+                        rem_pv_cm_id, xa_err(item));
+               slave = !item ? -ENOENT : xa_err(item);
+       } else {
+               slave = item->slave;
+       }
+       xa_unlock(&sriov->xa_rej_tmout);
+
+       return slave;
+}
+
 int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
                             struct ib_mad *mad)
 {
+       struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+       u32 rem_pv_cm_id = get_local_comm_id(mad);
        u32 pv_cm_id;
        struct id_map_entry *id;
+       int sts;
 
        if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
            mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
@@ -347,6 +445,13 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
                                     be64_to_cpu(gid.global.interface_id));
                        return -ENOENT;
                }
+
+               sts = alloc_rej_tmout(sriov, rem_pv_cm_id, *slave);
+               if (sts)
+                       /* Even if this fails, we pass on the REQ to the slave */
+                       pr_debug("Could not allocate rej_tmout entry. rem_pv_cm_id 0x%x slave %d status %d\n",
+                                rem_pv_cm_id, *slave, sts);
+
                return 0;
        }
 
@@ -354,7 +459,14 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
        id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
 
        if (!id) {
-               pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
+               if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID &&
+                   REJ_REASON(mad) == IB_CM_REJ_TIMEOUT && slave) {
+                       *slave = lookup_rej_tmout_slave(sriov, rem_pv_cm_id);
+
+                       return (*slave < 0) ? *slave : 0;
+               }
+               pr_debug("Couldn't find an entry for pv_cm_id 0x%x, attr_id 0x%x\n",
+                        pv_cm_id, be16_to_cpu(mad->mad_hdr.attr_id));
                return -ENOENT;
        }
 
@@ -375,6 +487,34 @@ void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
        INIT_LIST_HEAD(&dev->sriov.cm_list);
        dev->sriov.sl_id_map = RB_ROOT;
        xa_init_flags(&dev->sriov.pv_id_table, XA_FLAGS_ALLOC);
+       xa_init(&dev->sriov.xa_rej_tmout);
+}
+
+static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
+{
+       struct rej_tmout_entry *item;
+       bool flush_needed = false;
+       unsigned long id;
+       int cnt = 0;
+
+       xa_lock(&sriov->xa_rej_tmout);
+       xa_for_each(&sriov->xa_rej_tmout, id, item) {
+               if (slave < 0 || slave == item->slave) {
+                       mod_delayed_work(system_wq, &item->timeout, 0);
+                       flush_needed = true;
+                       ++cnt;
+               }
+       }
+       xa_unlock(&sriov->xa_rej_tmout);
+
+       if (flush_needed) {
+               flush_scheduled_work();
+               pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n",
+                        cnt, slave);
+       }
+
+       if (slave < 0)
+               WARN_ON(!xa_empty(&sriov->xa_rej_tmout));
 }
 
 /* slave = -1 ==> all slaves */
@@ -444,4 +584,6 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
                list_del(&map->list);
                kfree(map);
        }
+
+       rej_tmout_xa_cleanup(sriov, slave);
 }
index 8a34369..e9b5a4d 100644 (file)
@@ -149,7 +149,6 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata,
        if (IS_ERR(*umem))
                return PTR_ERR(*umem);
 
-       n = ib_umem_page_count(*umem);
        shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
        err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
 
@@ -475,7 +474,7 @@ out:
        return err;
 }
 
-void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
 {
        struct mlx4_ib_dev *dev = to_mdev(cq->device);
        struct mlx4_ib_cq *mcq = to_mcq(cq);
@@ -495,6 +494,7 @@ void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
                mlx4_db_free(dev->dev, &mcq->db);
        }
        ib_umem_release(mcq->umem);
+       return 0;
 }
 
 static void dump_cqe(void *cqe)
index abe6870..8bd1647 100644 (file)
@@ -500,6 +500,13 @@ static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid,
                                         sgid, dgid);
 }
 
+static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
+{
+       int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
+
+       return (qpn >= proxy_start && qpn <= proxy_start + 1);
+}
+
 int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
                          enum ib_qp_type dest_qpt, struct ib_wc *wc,
                          struct ib_grh *grh, struct ib_mad *mad)
@@ -520,8 +527,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
        u16 cached_pkey;
        u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
 
-       if (dest_qpt > IB_QPT_GSI)
+       if (dest_qpt > IB_QPT_GSI) {
+               pr_debug("dest_qpt (%d) > IB_QPT_GSI\n", dest_qpt);
                return -EINVAL;
+       }
 
        tun_ctx = dev->sriov.demux[port-1].tun[slave];
 
@@ -538,12 +547,20 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
        if (dest_qpt) {
                u16 pkey_ix;
                ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
-               if (ret)
+               if (ret) {
+                       pr_debug("unable to get %s cached pkey for index %d, ret %d\n",
+                                is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+                                wc->pkey_index, ret);
                        return -EINVAL;
+               }
 
                ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
-               if (ret)
+               if (ret) {
+                       pr_debug("unable to get %s pkey ix for pkey 0x%x, ret %d\n",
+                                is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+                                cached_pkey, ret);
                        return -EINVAL;
+               }
                tun_pkey_ix = pkey_ix;
        } else
                tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
@@ -715,7 +732,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
 
                err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
                if (err)
-                       pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+                       pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n",
+                                is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
                                 slave, err);
                return 0;
        }
@@ -794,7 +812,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
 
        err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
        if (err)
-               pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+               pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n",
+                        is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
                         slave, err);
        return 0;
 }
@@ -807,27 +826,6 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
        int err;
        struct ib_port_attr pattr;
 
-       if (in_wc && in_wc->qp) {
-               pr_debug("received MAD: port:%d slid:%d sqpn:%d "
-                        "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n",
-                        port_num,
-                        in_wc->slid, in_wc->src_qp,
-                        in_wc->dlid_path_bits,
-                        in_wc->qp->qp_num,
-                        in_wc->wc_flags,
-                        be64_to_cpu(in_mad->mad_hdr.tid),
-                        in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
-                        be16_to_cpu(in_mad->mad_hdr.attr_id));
-               if (in_wc->wc_flags & IB_WC_GRH) {
-                       pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
-                                be64_to_cpu(in_grh->sgid.global.subnet_prefix),
-                                be64_to_cpu(in_grh->sgid.global.interface_id));
-                       pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
-                                be64_to_cpu(in_grh->dgid.global.subnet_prefix),
-                                be64_to_cpu(in_grh->dgid.global.interface_id));
-               }
-       }
-
        slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
 
        if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
@@ -1299,6 +1297,18 @@ static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
        spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
 }
 
+static void mlx4_ib_wire_comp_handler(struct ib_cq *cq, void *arg)
+{
+       unsigned long flags;
+       struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
+       struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+
+       spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+       if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
+               queue_work(ctx->wi_wq, &ctx->work);
+       spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
 static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
                                  struct mlx4_ib_demux_pv_qp *tun_qp,
                                  int index)
@@ -1341,14 +1351,6 @@ static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
        return ret;
 }
 
-static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
-{
-       int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
-
-       return (qpn >= proxy_start && qpn <= proxy_start + 1);
-}
-
-
 int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
                         enum ib_qp_type dest_qpt, u16 pkey_index,
                         u32 remote_qpn, u32 qkey, struct rdma_ah_attr *attr,
@@ -1401,10 +1403,10 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 
        spin_lock(&sqp->tx_lock);
        if (sqp->tx_ix_head - sqp->tx_ix_tail >=
-           (MLX4_NUM_TUNNEL_BUFS - 1))
+           (MLX4_NUM_WIRE_BUFS - 1))
                ret = -EAGAIN;
        else
-               wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+               wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_WIRE_BUFS - 1);
        spin_unlock(&sqp->tx_lock);
        if (ret)
                goto out;
@@ -1484,6 +1486,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
        u16 vlan_id;
        u8 qos;
        u8 *dmac;
+       int sts;
 
        /* Get slave that sent this packet */
        if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1580,13 +1583,17 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
                                        &vlan_id, &qos))
                rdma_ah_set_sl(&ah_attr, qos);
 
-       mlx4_ib_send_to_wire(dev, slave, ctx->port,
-                            is_proxy_qp0(dev, wc->src_qp, slave) ?
-                            IB_QPT_SMI : IB_QPT_GSI,
-                            be16_to_cpu(tunnel->hdr.pkey_index),
-                            be32_to_cpu(tunnel->hdr.remote_qpn),
-                            be32_to_cpu(tunnel->hdr.qkey),
-                            &ah_attr, wc->smac, vlan_id, &tunnel->mad);
+       sts = mlx4_ib_send_to_wire(dev, slave, ctx->port,
+                                  is_proxy_qp0(dev, wc->src_qp, slave) ?
+                                  IB_QPT_SMI : IB_QPT_GSI,
+                                  be16_to_cpu(tunnel->hdr.pkey_index),
+                                  be32_to_cpu(tunnel->hdr.remote_qpn),
+                                  be32_to_cpu(tunnel->hdr.qkey),
+                                  &ah_attr, wc->smac, vlan_id, &tunnel->mad);
+       if (sts)
+               pr_debug("failed sending %s to wire on behalf of slave %d (%d)\n",
+                        is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+                        slave, sts);
 }
 
 static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
@@ -1595,19 +1602,20 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
        int i;
        struct mlx4_ib_demux_pv_qp *tun_qp;
        int rx_buf_size, tx_buf_size;
+       const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
 
        if (qp_type > IB_QPT_GSI)
                return -EINVAL;
 
        tun_qp = &ctx->qp[qp_type];
 
-       tun_qp->ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+       tun_qp->ring = kcalloc(nmbr_bufs,
                               sizeof(struct mlx4_ib_buf),
                               GFP_KERNEL);
        if (!tun_qp->ring)
                return -ENOMEM;
 
-       tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+       tun_qp->tx_ring = kcalloc(nmbr_bufs,
                                  sizeof (struct mlx4_ib_tun_tx_buf),
                                  GFP_KERNEL);
        if (!tun_qp->tx_ring) {
@@ -1624,7 +1632,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
                tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
        }
 
-       for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+       for (i = 0; i < nmbr_bufs; i++) {
                tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
                if (!tun_qp->ring[i].addr)
                        goto err;
@@ -1638,7 +1646,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
                }
        }
 
-       for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+       for (i = 0; i < nmbr_bufs; i++) {
                tun_qp->tx_ring[i].buf.addr =
                        kmalloc(tx_buf_size, GFP_KERNEL);
                if (!tun_qp->tx_ring[i].buf.addr)
@@ -1669,7 +1677,7 @@ tx_err:
                                    tx_buf_size, DMA_TO_DEVICE);
                kfree(tun_qp->tx_ring[i].buf.addr);
        }
-       i = MLX4_NUM_TUNNEL_BUFS;
+       i = nmbr_bufs;
 err:
        while (i > 0) {
                --i;
@@ -1690,6 +1698,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
        int i;
        struct mlx4_ib_demux_pv_qp *tun_qp;
        int rx_buf_size, tx_buf_size;
+       const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
 
        if (qp_type > IB_QPT_GSI)
                return;
@@ -1704,13 +1713,13 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
        }
 
 
-       for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+       for (i = 0; i < nmbr_bufs; i++) {
                ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
                                    rx_buf_size, DMA_FROM_DEVICE);
                kfree(tun_qp->ring[i].addr);
        }
 
-       for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+       for (i = 0; i < nmbr_bufs; i++) {
                ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
                                    tx_buf_size, DMA_TO_DEVICE);
                kfree(tun_qp->tx_ring[i].buf.addr);
@@ -1744,9 +1753,6 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
                                               "buf:%lld\n", wc.wr_id);
                                break;
                        case IB_WC_SEND:
-                               pr_debug("received tunnel send completion:"
-                                        "wrid=0x%llx, status=0x%x\n",
-                                        wc.wr_id, wc.status);
                                rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
                                              (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
                                tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
@@ -1793,6 +1799,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
        struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
        struct ib_qp_attr attr;
        int qp_attr_mask_INIT;
+       const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
 
        if (qp_type > IB_QPT_GSI)
                return -EINVAL;
@@ -1803,8 +1810,8 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
        qp_init_attr.init_attr.send_cq = ctx->cq;
        qp_init_attr.init_attr.recv_cq = ctx->cq;
        qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-       qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
-       qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
+       qp_init_attr.init_attr.cap.max_send_wr = nmbr_bufs;
+       qp_init_attr.init_attr.cap.max_recv_wr = nmbr_bufs;
        qp_init_attr.init_attr.cap.max_send_sge = 1;
        qp_init_attr.init_attr.cap.max_recv_sge = 1;
        if (create_tun) {
@@ -1866,7 +1873,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
                goto err_qp;
        }
 
-       for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+       for (i = 0; i < nmbr_bufs; i++) {
                ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
                if (ret) {
                        pr_err(" mlx4_ib_post_pv_buf error"
@@ -1902,8 +1909,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
                        switch (wc.opcode) {
                        case IB_WC_SEND:
                                kfree(sqp->tx_ring[wc.wr_id &
-                                     (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
-                               sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+                                     (MLX4_NUM_WIRE_BUFS - 1)].ah);
+                               sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
                                        = NULL;
                                spin_lock(&sqp->tx_lock);
                                sqp->tx_ix_tail++;
@@ -1912,13 +1919,13 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
                        case IB_WC_RECV:
                                mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
                                                (sqp->ring[wc.wr_id &
-                                               (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
+                                               (MLX4_NUM_WIRE_BUFS - 1)].addr))->payload);
                                grh = &(((struct mlx4_mad_rcv_buf *)
                                                (sqp->ring[wc.wr_id &
-                                               (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
+                                               (MLX4_NUM_WIRE_BUFS - 1)].addr))->grh);
                                mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
                                if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
-                                                          (MLX4_NUM_TUNNEL_BUFS - 1)))
+                                                          (MLX4_NUM_WIRE_BUFS - 1)))
                                        pr_err("Failed reposting SQP "
                                               "buf:%lld\n", wc.wr_id);
                                break;
@@ -1931,8 +1938,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
                                 ctx->slave, wc.status, wc.wr_id);
                        if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
                                kfree(sqp->tx_ring[wc.wr_id &
-                                     (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
-                               sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+                                     (MLX4_NUM_WIRE_BUFS - 1)].ah);
+                               sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
                                        = NULL;
                                spin_lock(&sqp->tx_lock);
                                sqp->tx_ix_tail++;
@@ -1972,6 +1979,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
 {
        int ret, cq_size;
        struct ib_cq_init_attr cq_attr = {};
+       const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
 
        if (ctx->state != DEMUX_PV_STATE_DOWN)
                return -EEXIST;
@@ -1996,12 +2004,13 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
                goto err_out_qp0;
        }
 
-       cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
+       cq_size = 2 * nmbr_bufs;
        if (ctx->has_smi)
                cq_size *= 2;
 
        cq_attr.cqe = cq_size;
-       ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
+       ctx->cq = ib_create_cq(ctx->ib_dev,
+                              create_tun ? mlx4_ib_tunnel_comp_handler : mlx4_ib_wire_comp_handler,
                               NULL, ctx, &cq_attr);
        if (IS_ERR(ctx->cq)) {
                ret = PTR_ERR(ctx->cq);
@@ -2038,6 +2047,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
                INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
 
        ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
+       ctx->wi_wq = to_mdev(ibdev)->sriov.demux[port - 1].wi_wq;
 
        ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
        if (ret) {
@@ -2181,7 +2191,7 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
                goto err_mcg;
        }
 
-       snprintf(name, sizeof name, "mlx4_ibt%d", port);
+       snprintf(name, sizeof(name), "mlx4_ibt%d", port);
        ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
        if (!ctx->wq) {
                pr_err("Failed to create tunnelling WQ for port %d\n", port);
@@ -2189,7 +2199,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
                goto err_wq;
        }
 
-       snprintf(name, sizeof name, "mlx4_ibud%d", port);
+       snprintf(name, sizeof(name), "mlx4_ibwi%d", port);
+       ctx->wi_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
+       if (!ctx->wi_wq) {
+               pr_err("Failed to create wire WQ for port %d\n", port);
+               ret = -ENOMEM;
+               goto err_wiwq;
+       }
+
+       snprintf(name, sizeof(name), "mlx4_ibud%d", port);
        ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
        if (!ctx->ud_wq) {
                pr_err("Failed to create up/down WQ for port %d\n", port);
@@ -2200,6 +2218,10 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
        return 0;
 
 err_udwq:
+       destroy_workqueue(ctx->wi_wq);
+       ctx->wi_wq = NULL;
+
+err_wiwq:
        destroy_workqueue(ctx->wq);
        ctx->wq = NULL;
 
@@ -2247,12 +2269,14 @@ static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
                                ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
                }
                flush_workqueue(ctx->wq);
+               flush_workqueue(ctx->wi_wq);
                for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
                        destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
                        free_pv_object(dev, i, ctx->port);
                }
                kfree(ctx->tun);
                destroy_workqueue(ctx->ud_wq);
+               destroy_workqueue(ctx->wi_wq);
                destroy_workqueue(ctx->wq);
        }
 }
index bd4f975..753c704 100644 (file)
@@ -1215,9 +1215,10 @@ static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
        return 0;
 }
 
-static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
 {
        mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
+       return 0;
 }
 
 static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
@@ -1256,11 +1257,12 @@ err2:
        return err;
 }
 
-static void mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
 {
        ib_destroy_cq(to_mxrcd(xrcd)->cq);
        ib_dealloc_pd(to_mxrcd(xrcd)->pd);
        mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
+       return 0;
 }
 
 static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
@@ -1533,23 +1535,11 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
        struct mlx4_net_trans_rule_hw_ctrl *ctrl;
        int default_flow;
 
-       static const u16 __mlx4_domain[] = {
-               [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
-               [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
-               [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
-               [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
-       };
-
        if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
                pr_err("Invalid priority value %d\n", flow_attr->priority);
                return -EINVAL;
        }
 
-       if (domain >= IB_FLOW_DOMAIN_NUM) {
-               pr_err("Invalid domain value %d\n", domain);
-               return -EINVAL;
-       }
-
        if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
                return -EINVAL;
 
@@ -1558,8 +1548,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
                return PTR_ERR(mailbox);
        ctrl = mailbox->buf;
 
-       ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
-                                flow_attr->priority);
+       ctrl->prio = cpu_to_be16(domain | flow_attr->priority);
        ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
        ctrl->port = flow_attr->port;
        ctrl->qpn = cpu_to_be32(qp->qp_num);
@@ -1701,8 +1690,8 @@ static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
 }
 
 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
-                                   struct ib_flow_attr *flow_attr,
-                                   int domain, struct ib_udata *udata)
+                                          struct ib_flow_attr *flow_attr,
+                                          struct ib_udata *udata)
 {
        int err = 0, i = 0, j = 0;
        struct mlx4_ib_flow *mflow;
@@ -1768,8 +1757,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
        }
 
        while (i < ARRAY_SIZE(type) && type[i]) {
-               err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
-                                           &mflow->reg_id[i].id);
+               err = __mlx4_ib_create_flow(qp, flow_attr, MLX4_DOMAIN_UVERBS,
+                                           type[i], &mflow->reg_id[i].id);
                if (err)
                        goto err_create_flow;
                if (is_bonded) {
@@ -1778,7 +1767,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
                         */
                        flow_attr->port = 2;
                        err = __mlx4_ib_create_flow(qp, flow_attr,
-                                                   domain, type[j],
+                                                   MLX4_DOMAIN_UVERBS, type[j],
                                                    &mflow->reg_id[j].mirror);
                        flow_attr->port = 1;
                        if (err)
@@ -2589,11 +2578,16 @@ static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
        .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table,
        .destroy_wq = mlx4_ib_destroy_wq,
        .modify_wq = mlx4_ib_modify_wq,
+
+       INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx4_ib_rwq_ind_table,
+                          ib_rwq_ind_tbl),
 };
 
 static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
        .alloc_mw = mlx4_ib_alloc_mw,
        .dealloc_mw = mlx4_ib_dealloc_mw,
+
+       INIT_RDMA_OBJ_SIZE(ib_mw, mlx4_ib_mw, ibmw),
 };
 
 static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
@@ -2989,10 +2983,8 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
                /* Add an empty rule for IB L2 */
                memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
 
-               err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
-                                           IB_FLOW_DOMAIN_NIC,
-                                           MLX4_FS_REGULAR,
-                                           &mqp->reg_id);
+               err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC,
+                                           MLX4_FS_REGULAR, &mqp->reg_id);
        } else {
                err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
        }
index 38e87a7..58df064 100644 (file)
@@ -233,7 +233,8 @@ enum mlx4_ib_mad_ifc_flags {
 };
 
 enum {
-       MLX4_NUM_TUNNEL_BUFS            = 256,
+       MLX4_NUM_TUNNEL_BUFS            = 512,
+       MLX4_NUM_WIRE_BUFS              = 2048,
 };
 
 struct mlx4_ib_tunnel_header {
@@ -298,6 +299,26 @@ struct mlx4_ib_rss {
        u8                      rss_key[MLX4_EN_RSS_KEY_SIZE];
 };
 
+enum {
+       /*
+        * Largest possible UD header: send with GRH and immediate
+        * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
+        * tag.  (LRH would only use 8 bytes, so Ethernet is the
+        * biggest case)
+        */
+       MLX4_IB_UD_HEADER_SIZE          = 82,
+       MLX4_IB_LSO_HEADER_SPARE        = 128,
+};
+
+struct mlx4_ib_sqp {
+       int pkey_index;
+       u32 qkey;
+       u32 send_psn;
+       struct ib_ud_header ud_header;
+       u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
+       struct ib_qp *roce_v2_gsi;
+};
+
 struct mlx4_ib_qp {
        union {
                struct ib_qp    ibqp;
@@ -343,7 +364,10 @@ struct mlx4_ib_qp {
        struct mlx4_wqn_range   *wqn_range;
        /* Number of RSS QP parents that uses this WQ */
        u32                     rss_usecnt;
-       struct mlx4_ib_rss      *rss_ctx;
+       union {
+               struct mlx4_ib_rss *rss_ctx;
+               struct mlx4_ib_sqp *sqp;
+       };
 };
 
 struct mlx4_ib_srq {
@@ -366,6 +390,10 @@ struct mlx4_ib_ah {
        union mlx4_ext_av       av;
 };
 
+struct mlx4_ib_rwq_ind_table {
+       struct ib_rwq_ind_table ib_rwq_ind_tbl;
+};
+
 /****************************************/
 /* alias guid support */
 /****************************************/
@@ -454,6 +482,7 @@ struct mlx4_ib_demux_pv_ctx {
        struct ib_pd *pd;
        struct work_struct work;
        struct workqueue_struct *wq;
+       struct workqueue_struct *wi_wq;
        struct mlx4_ib_demux_pv_qp qp[2];
 };
 
@@ -461,6 +490,7 @@ struct mlx4_ib_demux_ctx {
        struct ib_device *ib_dev;
        int port;
        struct workqueue_struct *wq;
+       struct workqueue_struct *wi_wq;
        struct workqueue_struct *ud_wq;
        spinlock_t ud_lock;
        atomic64_t subnet_prefix;
@@ -492,6 +522,7 @@ struct mlx4_ib_sriov {
        spinlock_t id_map_lock;
        struct rb_root sl_id_map;
        struct list_head cm_list;
+       struct xarray xa_rej_tmout;
 };
 
 struct gid_cache_context {
@@ -725,8 +756,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                  u64 virt_addr, int access_flags,
                                  struct ib_udata *udata);
 int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
-struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
-                              struct ib_udata *udata);
+int mlx4_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
 int mlx4_ib_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
                               u32 max_num_sg);
@@ -736,7 +766,7 @@ int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
 int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
 int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                      struct ib_udata *udata);
-void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
 int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
 void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
@@ -747,14 +777,17 @@ int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
 int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
                            int slave_sgid_index, u8 *s_mac, u16 vlan_tag);
 int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+static inline int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+       return 0;
+}
 
 int mlx4_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
                       struct ib_udata *udata);
 int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
                       enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
 void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
 int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
                          const struct ib_recv_wr **bad_wr);
@@ -890,15 +923,18 @@ void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port);
 struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
                                struct ib_wq_init_attr *init_attr,
                                struct ib_udata *udata);
-void mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+int mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
 int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
                      u32 wq_attr_mask, struct ib_udata *udata);
 
-struct ib_rwq_ind_table
-*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
-                             struct ib_rwq_ind_table_init_attr *init_attr,
-                             struct ib_udata *udata);
-int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
+int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl,
+                                struct ib_rwq_ind_table_init_attr *init_attr,
+                                struct ib_udata *udata);
+static inline int
+mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table)
+{
+       return 0;
+}
 int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
                                       int *num_of_mtts);
 
index 1d5ef0d..426fed0 100644 (file)
@@ -271,6 +271,8 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
        u64 total_len = 0;
        int i;
 
+       *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
+
        for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
                /*
                 * Initialization - save the first chunk start as the
@@ -421,7 +423,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                goto err_free;
        }
 
-       n = ib_umem_page_count(mr->umem);
        shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
 
        err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
@@ -511,7 +512,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
                        mmr->umem = NULL;
                        goto release_mpt_entry;
                }
-               n = ib_umem_page_count(mmr->umem);
+               n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE);
                shift = PAGE_SHIFT;
 
                err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
@@ -610,37 +611,27 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
        return 0;
 }
 
-struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
-                              struct ib_udata *udata)
+int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
 {
-       struct mlx4_ib_dev *dev = to_mdev(pd->device);
-       struct mlx4_ib_mw *mw;
+       struct mlx4_ib_dev *dev = to_mdev(ibmw->device);
+       struct mlx4_ib_mw *mw = to_mmw(ibmw);
        int err;
 
-       mw = kmalloc(sizeof(*mw), GFP_KERNEL);
-       if (!mw)
-               return ERR_PTR(-ENOMEM);
-
-       err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn,
-                           to_mlx4_type(type), &mw->mmw);
+       err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn,
+                           to_mlx4_type(ibmw->type), &mw->mmw);
        if (err)
-               goto err_free;
+               return err;
 
        err = mlx4_mw_enable(dev->dev, &mw->mmw);
        if (err)
                goto err_mw;
 
-       mw->ibmw.rkey = mw->mmw.key;
-
-       return &mw->ibmw;
+       ibmw->rkey = mw->mmw.key;
+       return 0;
 
 err_mw:
        mlx4_mw_free(dev->dev, &mw->mmw);
-
-err_free:
-       kfree(mw);
-
-       return ERR_PTR(err);
+       return err;
 }
 
 int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
@@ -648,8 +639,6 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
        struct mlx4_ib_mw *mw = to_mmw(ibmw);
 
        mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
-       kfree(mw);
-
        return 0;
 }
 
index 2975f35..5cb8e60 100644 (file)
@@ -67,27 +67,6 @@ enum {
        MLX4_IB_LINK_TYPE_ETH           = 1
 };
 
-enum {
-       /*
-        * Largest possible UD header: send with GRH and immediate
-        * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
-        * tag.  (LRH would only use 8 bytes, so Ethernet is the
-        * biggest case)
-        */
-       MLX4_IB_UD_HEADER_SIZE          = 82,
-       MLX4_IB_LSO_HEADER_SPARE        = 128,
-};
-
-struct mlx4_ib_sqp {
-       struct mlx4_ib_qp       qp;
-       int                     pkey_index;
-       u32                     qkey;
-       u32                     send_psn;
-       struct ib_ud_header     ud_header;
-       u8                      header_buf[MLX4_IB_UD_HEADER_SIZE];
-       struct ib_qp            *roce_v2_gsi;
-};
-
 enum {
        MLX4_IB_MIN_SQ_STRIDE   = 6,
        MLX4_IB_CACHE_LINE_SIZE = 64,
@@ -123,11 +102,6 @@ enum mlx4_ib_source_type {
        MLX4_IB_RWQ_SRC = 1,
 };
 
-static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
-{
-       return container_of(mqp, struct mlx4_ib_sqp, qp);
-}
-
 static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
 {
        if (!mlx4_is_master(dev->dev))
@@ -656,8 +630,6 @@ static int create_qp_rss(struct mlx4_ib_dev *dev,
        if (err)
                goto err_qpn;
 
-       mutex_init(&qp->mutex);
-
        INIT_LIST_HEAD(&qp->gid_list);
        INIT_LIST_HEAD(&qp->steering_rules);
 
@@ -696,80 +668,72 @@ err_qpn:
        return err;
 }
 
-static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd,
-                                           struct ib_qp_init_attr *init_attr,
-                                           struct ib_udata *udata)
+static int _mlx4_ib_create_qp_rss(struct ib_pd *pd, struct mlx4_ib_qp *qp,
+                                 struct ib_qp_init_attr *init_attr,
+                                 struct ib_udata *udata)
 {
-       struct mlx4_ib_qp *qp;
        struct mlx4_ib_create_qp_rss ucmd = {};
        size_t required_cmd_sz;
        int err;
 
        if (!udata) {
                pr_debug("RSS QP with NULL udata\n");
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
        }
 
        if (udata->outlen)
-               return ERR_PTR(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
 
        required_cmd_sz = offsetof(typeof(ucmd), reserved1) +
                                        sizeof(ucmd.reserved1);
        if (udata->inlen < required_cmd_sz) {
                pr_debug("invalid inlen\n");
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
        }
 
        if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
                pr_debug("copy failed\n");
-               return ERR_PTR(-EFAULT);
+               return -EFAULT;
        }
 
        if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)))
-               return ERR_PTR(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
 
        if (ucmd.comp_mask || ucmd.reserved1)
-               return ERR_PTR(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
 
        if (udata->inlen > sizeof(ucmd) &&
            !ib_is_udata_cleared(udata, sizeof(ucmd),
                                 udata->inlen - sizeof(ucmd))) {
                pr_debug("inlen is not supported\n");
-               return ERR_PTR(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
        }
 
        if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
                pr_debug("RSS QP with unsupported QP type %d\n",
                         init_attr->qp_type);
-               return ERR_PTR(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
        }
 
        if (init_attr->create_flags) {
                pr_debug("RSS QP doesn't support create flags\n");
-               return ERR_PTR(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
        }
 
        if (init_attr->send_cq || init_attr->cap.max_send_wr) {
                pr_debug("RSS QP with unsupported send attributes\n");
-               return ERR_PTR(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
        }
 
-       qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-       if (!qp)
-               return ERR_PTR(-ENOMEM);
-
        qp->pri.vid = 0xFFFF;
        qp->alt.vid = 0xFFFF;
 
        err = create_qp_rss(to_mdev(pd->device), init_attr, &ucmd, qp);
-       if (err) {
-               kfree(qp);
-               return ERR_PTR(err);
-       }
+       if (err)
+               return err;
 
        qp->ibqp.qp_num = qp->mqp.qpn;
-
-       return &qp->ibqp;
+       return 0;
 }
 
 /*
@@ -873,7 +837,6 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
 
        qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET;
 
-       mutex_init(&qp->mutex);
        spin_lock_init(&qp->sq.lock);
        spin_lock_init(&qp->rq.lock);
        INIT_LIST_HEAD(&qp->gid_list);
@@ -922,7 +885,6 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
                goto err;
        }
 
-       n = ib_umem_page_count(qp->umem);
        shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
        err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
 
@@ -989,13 +951,11 @@ err:
 
 static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
                            struct ib_udata *udata, int sqpn,
-                           struct mlx4_ib_qp **caller_qp)
+                           struct mlx4_ib_qp *qp)
 {
        struct mlx4_ib_dev *dev = to_mdev(pd->device);
        int qpn;
        int err;
-       struct mlx4_ib_sqp *sqp = NULL;
-       struct mlx4_ib_qp *qp;
        struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
                udata, struct mlx4_ib_ucontext, ibucontext);
        enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
@@ -1043,27 +1003,18 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
                sqpn = qpn;
        }
 
-       if (!*caller_qp) {
-               if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
-                   (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
-                               MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
-                       sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL);
-                       if (!sqp)
-                               return -ENOMEM;
-                       qp = &sqp->qp;
-               } else {
-                       qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL);
-                       if (!qp)
-                               return -ENOMEM;
-               }
-               qp->pri.vid = 0xFFFF;
-               qp->alt.vid = 0xFFFF;
-       } else
-               qp = *caller_qp;
+       if (init_attr->qp_type == IB_QPT_SMI ||
+           init_attr->qp_type == IB_QPT_GSI || qp_type == MLX4_IB_QPT_SMI ||
+           qp_type == MLX4_IB_QPT_GSI ||
+           (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
+                       MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
+               qp->sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL);
+               if (!qp->sqp)
+                       return -ENOMEM;
+       }
 
        qp->mlx4_ib_qp_type = qp_type;
 
-       mutex_init(&qp->mutex);
        spin_lock_init(&qp->sq.lock);
        spin_lock_init(&qp->rq.lock);
        INIT_LIST_HEAD(&qp->gid_list);
@@ -1117,7 +1068,6 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
                        goto err;
                }
 
-               n = ib_umem_page_count(qp->umem);
                shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
                err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
 
@@ -1239,9 +1189,6 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
 
        qp->mqp.event = mlx4_ib_qp_event;
 
-       if (!*caller_qp)
-               *caller_qp = qp;
-
        spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
        mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
                         to_mcq(init_attr->recv_cq));
@@ -1293,10 +1240,7 @@ err_db:
                mlx4_db_free(dev->dev, &qp->db);
 
 err:
-       if (!sqp && !*caller_qp)
-               kfree(qp);
-       kfree(sqp);
-
+       kfree(qp->sqp);
        return err;
 }
 
@@ -1410,7 +1354,6 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
        mlx4_qp_free(dev->dev, &qp->mqp);
        mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
        del_gid_entries(qp);
-       kfree(qp->rss_ctx);
 }
 
 static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
@@ -1529,17 +1472,16 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
                return dev->dev->caps.spec_qps[attr->port_num - 1].qp1_proxy;
 }
 
-static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
-                                       struct ib_qp_init_attr *init_attr,
-                                       struct ib_udata *udata)
+static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp,
+                             struct ib_qp_init_attr *init_attr,
+                             struct ib_udata *udata)
 {
-       struct mlx4_ib_qp *qp = NULL;
        int err;
        int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
        u16 xrcdn = 0;
 
        if (init_attr->rwq_ind_tbl)
-               return _mlx4_ib_create_qp_rss(pd, init_attr, udata);
+               return _mlx4_ib_create_qp_rss(pd, qp, init_attr, udata);
 
        /*
         * We only support LSO, vendor flag1, and multicast loopback blocking,
@@ -1551,16 +1493,16 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
                                        MLX4_IB_SRIOV_SQP |
                                        MLX4_IB_QP_NETIF |
                                        MLX4_IB_QP_CREATE_ROCE_V2_GSI))
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
 
        if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
                if (init_attr->qp_type != IB_QPT_UD)
-                       return ERR_PTR(-EINVAL);
+                       return -EINVAL;
        }
 
        if (init_attr->create_flags) {
                if (udata && init_attr->create_flags & ~(sup_u_create_flags))
-                       return ERR_PTR(-EINVAL);
+                       return -EINVAL;
 
                if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
                                                 MLX4_IB_QP_CREATE_ROCE_V2_GSI  |
@@ -1570,7 +1512,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
                     init_attr->qp_type > IB_QPT_GSI) ||
                    (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
                     init_attr->qp_type != IB_QPT_GSI))
-                       return ERR_PTR(-EINVAL);
+                       return -EINVAL;
        }
 
        switch (init_attr->qp_type) {
@@ -1581,53 +1523,43 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
                fallthrough;
        case IB_QPT_XRC_INI:
                if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
-                       return ERR_PTR(-ENOSYS);
+                       return -ENOSYS;
                init_attr->recv_cq = init_attr->send_cq;
                fallthrough;
        case IB_QPT_RC:
        case IB_QPT_UC:
        case IB_QPT_RAW_PACKET:
-               qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-               if (!qp)
-                       return ERR_PTR(-ENOMEM);
+       case IB_QPT_UD:
                qp->pri.vid = 0xFFFF;
                qp->alt.vid = 0xFFFF;
-               fallthrough;
-       case IB_QPT_UD:
-       {
-               err = create_qp_common(pd, init_attr, udata, 0, &qp);
-               if (err) {
-                       kfree(qp);
-                       return ERR_PTR(err);
-               }
+               err = create_qp_common(pd, init_attr, udata, 0, qp);
+               if (err)
+                       return err;
 
                qp->ibqp.qp_num = qp->mqp.qpn;
                qp->xrcdn = xrcdn;
-
                break;
-       }
        case IB_QPT_SMI:
        case IB_QPT_GSI:
        {
                int sqpn;
 
-               /* Userspace is not allowed to create special QPs: */
-               if (udata)
-                       return ERR_PTR(-EINVAL);
                if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
                        int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev,
                                                        1, 1, &sqpn, 0,
                                                        MLX4_RES_USAGE_DRIVER);
 
                        if (res)
-                               return ERR_PTR(res);
+                               return res;
                } else {
                        sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
                }
 
-               err = create_qp_common(pd, init_attr, udata, sqpn, &qp);
+               qp->pri.vid = 0xFFFF;
+               qp->alt.vid = 0xFFFF;
+               err = create_qp_common(pd, init_attr, udata, sqpn, qp);
                if (err)
-                       return ERR_PTR(err);
+                       return err;
 
                qp->port        = init_attr->port_num;
                qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
@@ -1636,25 +1568,33 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
        }
        default:
                /* Don't support raw QPs */
-               return ERR_PTR(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
        }
-
-       return &qp->ibqp;
+       return 0;
 }
 
 struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                                struct ib_qp_init_attr *init_attr,
                                struct ib_udata *udata) {
        struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
-       struct ib_qp *ibqp;
        struct mlx4_ib_dev *dev = to_mdev(device);
+       struct mlx4_ib_qp *qp;
+       int ret;
 
-       ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+       qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+       if (!qp)
+               return ERR_PTR(-ENOMEM);
 
-       if (!IS_ERR(ibqp) &&
-           (init_attr->qp_type == IB_QPT_GSI) &&
+       mutex_init(&qp->mutex);
+       ret = _mlx4_ib_create_qp(pd, qp, init_attr, udata);
+       if (ret) {
+               kfree(qp);
+               return ERR_PTR(ret);
+       }
+
+       if (init_attr->qp_type == IB_QPT_GSI &&
            !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
-               struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+               struct mlx4_ib_sqp *sqp = qp->sqp;
                int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
 
                if (is_eth &&
@@ -1666,14 +1606,14 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                                pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
                                sqp->roce_v2_gsi = NULL;
                        } else {
-                               sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
-                               sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+                               to_mqp(sqp->roce_v2_gsi)->flags |=
+                                       MLX4_IB_ROCE_V2_GSI_QP;
                        }
 
                        init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
                }
        }
-       return ibqp;
+       return &qp->ibqp;
 }
 
 static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
@@ -1700,10 +1640,8 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
                destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, udata);
        }
 
-       if (is_sqp(dev, mqp))
-               kfree(to_msqp(mqp));
-       else
-               kfree(mqp);
+       kfree(mqp->sqp);
+       kfree(mqp);
 
        return 0;
 }
@@ -1713,7 +1651,7 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
        struct mlx4_ib_qp *mqp = to_mqp(qp);
 
        if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
-               struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+               struct mlx4_ib_sqp *sqp = mqp->sqp;
 
                if (sqp->roce_v2_gsi)
                        ib_destroy_qp(sqp->roce_v2_gsi);
@@ -2575,7 +2513,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
                qp->alt_port = attr->alt_port_num;
 
        if (is_sqp(dev, qp))
-               store_sqp_attrs(to_msqp(qp), attr, attr_mask);
+               store_sqp_attrs(qp->sqp, attr, attr_mask);
 
        /*
         * If we moved QP0 to RTR, bring the IB link up; if we moved
@@ -2852,7 +2790,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
 
        if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
-               struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+               struct mlx4_ib_sqp *sqp = mqp->sqp;
                int err = 0;
 
                if (sqp->roce_v2_gsi)
@@ -2877,12 +2815,13 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
        return -EINVAL;
 }
 
-static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
+static int build_sriov_qp0_header(struct mlx4_ib_qp *qp,
                                  const struct ib_ud_wr *wr,
                                  void *wqe, unsigned *mlx_seg_len)
 {
-       struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
-       struct ib_device *ib_dev = &mdev->ib_dev;
+       struct mlx4_ib_dev *mdev = to_mdev(qp->ibqp.device);
+       struct mlx4_ib_sqp *sqp = qp->sqp;
+       struct ib_device *ib_dev = qp->ibqp.device;
        struct mlx4_wqe_mlx_seg *mlx = wqe;
        struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
        struct mlx4_ib_ah *ah = to_mah(wr->ah);
@@ -2904,12 +2843,12 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
 
        /* for proxy-qp0 sends, need to add in size of tunnel header */
        /* for tunnel-qp0 sends, tunnel header is already in s/g list */
-       if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
+       if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
                send_size += sizeof (struct mlx4_ib_tunnel_header);
 
        ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
 
-       if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
+       if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
                sqp->ud_header.lrh.service_level =
                        be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
                sqp->ud_header.lrh.destination_lid =
@@ -2926,26 +2865,26 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
 
        sqp->ud_header.lrh.virtual_lane    = 0;
        sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
-       err = ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
+       err = ib_get_cached_pkey(ib_dev, qp->port, 0, &pkey);
        if (err)
                return err;
        sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
-       if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
+       if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
                sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
        else
                sqp->ud_header.bth.destination_qpn =
-                       cpu_to_be32(mdev->dev->caps.spec_qps[sqp->qp.port - 1].qp0_tunnel);
+                       cpu_to_be32(mdev->dev->caps.spec_qps[qp->port - 1].qp0_tunnel);
 
        sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
        if (mlx4_is_master(mdev->dev)) {
-               if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+               if (mlx4_get_parav_qkey(mdev->dev, qp->mqp.qpn, &qkey))
                        return -EINVAL;
        } else {
-               if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+               if (vf_get_qp0_qkey(mdev->dev, qp->mqp.qpn, &qkey))
                        return -EINVAL;
        }
        sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
-       sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
+       sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->mqp.qpn);
 
        sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY;
        sqp->ud_header.immediate_present = 0;
@@ -3029,10 +2968,11 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
 }
 
 #define MLX4_ROCEV2_QP1_SPORT 0xC000
-static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
+static int build_mlx_header(struct mlx4_ib_qp *qp, const struct ib_ud_wr *wr,
                            void *wqe, unsigned *mlx_seg_len)
 {
-       struct ib_device *ib_dev = sqp->qp.ibqp.device;
+       struct mlx4_ib_sqp *sqp = qp->sqp;
+       struct ib_device *ib_dev = qp->ibqp.device;
        struct mlx4_ib_dev *ibdev = to_mdev(ib_dev);
        struct mlx4_wqe_mlx_seg *mlx = wqe;
        struct mlx4_wqe_ctrl_seg *ctrl = wqe;
@@ -3056,7 +2996,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
        for (i = 0; i < wr->wr.num_sge; ++i)
                send_size += wr->wr.sg_list[i].length;
 
-       is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
+       is_eth = rdma_port_get_link_layer(qp->ibqp.device, qp->port) == IB_LINK_LAYER_ETHERNET;
        is_grh = mlx4_ib_ah_grh_present(ah);
        if (is_eth) {
                enum ib_gid_type gid_type;
@@ -3070,9 +3010,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
                        if (err)
                                return err;
                } else  {
-                       err = fill_gid_by_hw_index(ibdev, sqp->qp.port,
-                                           ah->av.ib.gid_index,
-                                           &sgid, &gid_type);
+                       err = fill_gid_by_hw_index(ibdev, qp->port,
+                                                  ah->av.ib.gid_index, &sgid,
+                                                  &gid_type);
                        if (!err) {
                                is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
                                if (is_udp) {
@@ -3117,13 +3057,18 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
                                 * indexes don't necessarily match the hw ones, so
                                 * we must use our own cache
                                 */
-                               sqp->ud_header.grh.source_gid.global.subnet_prefix =
-                                       cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
-                                                                   demux[sqp->qp.port - 1].
-                                                                   subnet_prefix)));
-                               sqp->ud_header.grh.source_gid.global.interface_id =
-                                       to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
-                                                      guid_cache[ah->av.ib.gid_index];
+                               sqp->ud_header.grh.source_gid.global
+                                       .subnet_prefix =
+                                       cpu_to_be64(atomic64_read(
+                                               &(to_mdev(ib_dev)
+                                                         ->sriov
+                                                         .demux[qp->port - 1]
+                                                         .subnet_prefix)));
+                               sqp->ud_header.grh.source_gid.global
+                                       .interface_id =
+                                       to_mdev(ib_dev)
+                                               ->sriov.demux[qp->port - 1]
+                                               .guid_cache[ah->av.ib.gid_index];
                        } else {
                                sqp->ud_header.grh.source_gid =
                                        ah->ibah.sgid_attr->gid;
@@ -3155,10 +3100,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
        mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
 
        if (!is_eth) {
-               mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
-                                         (sqp->ud_header.lrh.destination_lid ==
-                                          IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
-                                         (sqp->ud_header.lrh.service_level << 8));
+               mlx->flags |=
+                       cpu_to_be32((!qp->ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
+                                   (sqp->ud_header.lrh.destination_lid ==
+                                                    IB_LID_PERMISSIVE ?
+                                            MLX4_WQE_MLX_SLR :
+                                            0) |
+                                   (sqp->ud_header.lrh.service_level << 8));
                if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
                        mlx->flags |= cpu_to_be32(0x1); /* force loopback */
                mlx->rlid = sqp->ud_header.lrh.destination_lid;
@@ -3204,21 +3152,23 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
                        sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
                }
        } else {
-               sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 :
-                                                       sl_to_vl(to_mdev(ib_dev),
-                                                                sqp->ud_header.lrh.service_level,
-                                                                sqp->qp.port);
-               if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
+               sqp->ud_header.lrh.virtual_lane =
+                       !qp->ibqp.qp_num ?
+                               15 :
+                               sl_to_vl(to_mdev(ib_dev),
+                                        sqp->ud_header.lrh.service_level,
+                                        qp->port);
+               if (qp->ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
                        return -EINVAL;
                if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
                        sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
        }
        sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
-       if (!sqp->qp.ibqp.qp_num)
-               err = ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index,
+       if (!qp->ibqp.qp_num)
+               err = ib_get_cached_pkey(ib_dev, qp->port, sqp->pkey_index,
                                         &pkey);
        else
-               err = ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index,
+               err = ib_get_cached_pkey(ib_dev, qp->port, wr->pkey_index,
                                         &pkey);
        if (err)
                return err;
@@ -3228,7 +3178,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
        sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
        sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
                                               sqp->qkey : wr->remote_qkey);
-       sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+       sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num);
 
        header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
 
@@ -3551,14 +3501,14 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
        struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 
        if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
-               struct mlx4_ib_sqp *sqp = to_msqp(qp);
+               struct mlx4_ib_sqp *sqp = qp->sqp;
 
                if (sqp->roce_v2_gsi) {
                        struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
                        enum ib_gid_type gid_type;
                        union ib_gid gid;
 
-                       if (!fill_gid_by_hw_index(mdev, sqp->qp.port,
+                       if (!fill_gid_by_hw_index(mdev, qp->port,
                                           ah->av.ib.gid_index,
                                           &gid, &gid_type))
                                qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
@@ -3678,8 +3628,8 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                        break;
 
                case MLX4_IB_QPT_TUN_SMI_OWNER:
-                       err =  build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
-                                       ctrl, &seglen);
+                       err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
+                                                    &seglen);
                        if (unlikely(err)) {
                                *bad_wr = wr;
                                goto out;
@@ -3715,8 +3665,8 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                        break;
 
                case MLX4_IB_QPT_PROXY_SMI_OWNER:
-                       err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
-                                       ctrl, &seglen);
+                       err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
+                                                    &seglen);
                        if (unlikely(err)) {
                                *bad_wr = wr;
                                goto out;
@@ -3749,8 +3699,7 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 
                case MLX4_IB_QPT_SMI:
                case MLX4_IB_QPT_GSI:
-                       err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl,
-                                       &seglen);
+                       err = build_mlx_header(qp, ud_wr(wr), ctrl, &seglen);
                        if (unlikely(err)) {
                                *bad_wr = wr;
                                goto out;
@@ -4172,6 +4121,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
        if (!qp)
                return ERR_PTR(-ENOMEM);
 
+       mutex_init(&qp->mutex);
        qp->pri.vid = 0xFFFF;
        qp->alt.vid = 0xFFFF;
 
@@ -4327,7 +4277,7 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr,
        return err;
 }
 
-void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
+int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
 {
        struct mlx4_ib_dev *dev = to_mdev(ibwq->device);
        struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
@@ -4338,36 +4288,35 @@ void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
        destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, udata);
 
        kfree(qp);
+       return 0;
 }
 
-struct ib_rwq_ind_table
-*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
-                             struct ib_rwq_ind_table_init_attr *init_attr,
-                             struct ib_udata *udata)
+int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table,
+                                struct ib_rwq_ind_table_init_attr *init_attr,
+                                struct ib_udata *udata)
 {
-       struct ib_rwq_ind_table *rwq_ind_table;
        struct mlx4_ib_create_rwq_ind_tbl_resp resp = {};
        unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size;
+       struct ib_device *device = rwq_ind_table->device;
        unsigned int base_wqn;
        size_t min_resp_len;
-       int i;
-       int err;
+       int i, err = 0;
 
        if (udata->inlen > 0 &&
            !ib_is_udata_cleared(udata, 0,
                                 udata->inlen))
-               return ERR_PTR(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
 
        min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
        if (udata->outlen && udata->outlen < min_resp_len)
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
 
        if (ind_tbl_size >
            device->attrs.rss_caps.max_rwq_indirection_table_size) {
                pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n",
                         ind_tbl_size,
                         device->attrs.rss_caps.max_rwq_indirection_table_size);
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
        }
 
        base_wqn = init_attr->ind_tbl[0]->wq_num;
@@ -4375,39 +4324,23 @@ struct ib_rwq_ind_table
        if (base_wqn % ind_tbl_size) {
                pr_debug("WQN=0x%x isn't aligned with indirection table size\n",
                         base_wqn);
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
        }
 
        for (i = 1; i < ind_tbl_size; i++) {
                if (++base_wqn != init_attr->ind_tbl[i]->wq_num) {
                        pr_debug("indirection table's WQNs aren't consecutive\n");
-                       return ERR_PTR(-EINVAL);
+                       return -EINVAL;
                }
        }
 
-       rwq_ind_table = kzalloc(sizeof(*rwq_ind_table), GFP_KERNEL);
-       if (!rwq_ind_table)
-               return ERR_PTR(-ENOMEM);
-
        if (udata->outlen) {
                resp.response_length = offsetof(typeof(resp), response_length) +
                                        sizeof(resp.response_length);
                err = ib_copy_to_udata(udata, &resp, resp.response_length);
-               if (err)
-                       goto err;
        }
 
-       return rwq_ind_table;
-
-err:
-       kfree(rwq_ind_table);
-       return ERR_PTR(err);
-}
-
-int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
-{
-       kfree(ib_rwq_ind_tbl);
-       return 0;
+       return err;
 }
 
 struct mlx4_ib_drain_cqe {
index 8f9d503..bf61852 100644 (file)
@@ -115,8 +115,9 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq,
                if (IS_ERR(srq->umem))
                        return PTR_ERR(srq->umem);
 
-               err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
-                                   PAGE_SHIFT, &srq->mtt);
+               err = mlx4_mtt_init(
+                       dev->dev, ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE),
+                       PAGE_SHIFT, &srq->mtt);
                if (err)
                        goto err_buf;
 
@@ -260,7 +261,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
        return 0;
 }
 
-void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
 {
        struct mlx4_ib_dev *dev = to_mdev(srq->device);
        struct mlx4_ib_srq *msrq = to_msrq(srq);
@@ -282,6 +283,7 @@ void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
                mlx4_db_free(dev->dev, &msrq->db);
        }
        ib_umem_release(msrq->umem);
+       return 0;
 }
 
 void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
index 59e5ec3..505bc47 100644 (file)
@@ -106,8 +106,8 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
        if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) {
                int err;
                struct mlx5_ib_create_ah_resp resp = {};
-               u32 min_resp_len = offsetof(typeof(resp), dmac) +
-                                  sizeof(resp.dmac);
+               u32 min_resp_len =
+                       offsetofend(struct mlx5_ib_create_ah_resp, dmac);
 
                if (udata->outlen < min_resp_len)
                        return -EINVAL;
@@ -147,8 +147,3 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
 
        return 0;
 }
-
-void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags)
-{
-       return;
-}
index ebb2f10..234f299 100644 (file)
@@ -168,14 +168,14 @@ void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid)
        mlx5_cmd_exec_in(dev, destroy_tis, in);
 }
 
-void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
+int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
 {
        u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
 
        MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
        MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
        MLX5_SET(destroy_rqt_in, in, uid, uid);
-       mlx5_cmd_exec_in(dev, destroy_rqt, in);
+       return mlx5_cmd_exec_in(dev, destroy_rqt, in);
 }
 
 int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
@@ -209,14 +209,14 @@ void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
        mlx5_cmd_exec_in(dev, dealloc_transport_domain, in);
 }
 
-void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
+int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
 {
        u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
 
        MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
        MLX5_SET(dealloc_pd_in, in, pd, pdn);
        MLX5_SET(dealloc_pd_in, in, uid, uid);
-       mlx5_cmd_exec_in(dev, dealloc_pd, in);
+       return mlx5_cmd_exec_in(dev, dealloc_pd, in);
 }
 
 int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
index 1d192a8..88ea6ef 100644 (file)
@@ -44,10 +44,10 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
 int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
                         u64 length, u32 alignment);
 void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
-void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
+int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
 void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
 void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
-void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
+int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
 int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
                                    u16 uid);
 void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
index 145f3cb..70c8fd6 100644 (file)
@@ -117,7 +117,7 @@ err_bound:
        return ret;
 }
 
-static void mlx5_ib_destroy_counters(struct ib_counters *counters)
+static int mlx5_ib_destroy_counters(struct ib_counters *counters)
 {
        struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 
@@ -125,6 +125,7 @@ static void mlx5_ib_destroy_counters(struct ib_counters *counters)
        if (mcounters->hw_cntrs_hndl)
                mlx5_fc_destroy(to_mdev(counters->device)->mdev,
                                mcounters->hw_cntrs_hndl);
+       return 0;
 }
 
 static int mlx5_ib_create_counters(struct ib_counters *counters,
@@ -456,12 +457,12 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
                cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
                num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
        }
-       cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
+       cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL);
        if (!cnts->names)
                return -ENOMEM;
 
        cnts->offsets = kcalloc(num_counters,
-                               sizeof(cnts->offsets), GFP_KERNEL);
+                               sizeof(*cnts->offsets), GFP_KERNEL);
        if (!cnts->offsets)
                goto err_names;
 
index dceb0eb..fb62f1d 100644 (file)
@@ -168,7 +168,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 {
        enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
        struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
-       struct mlx5_ib_srq *srq;
+       struct mlx5_ib_srq *srq = NULL;
        struct mlx5_ib_wq *wq;
        u16 wqe_ctr;
        u8  roce_packet_type;
@@ -180,7 +180,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 
                if (qp->ibqp.xrcd) {
                        msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
-                       srq = to_mibsrq(msrq);
+                       if (msrq)
+                               srq = to_mibsrq(msrq);
                } else {
                        srq = to_msrq(qp->ibqp.srq);
                }
@@ -254,7 +255,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 
        switch (roce_packet_type) {
        case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
-               wc->network_hdr_type = RDMA_NETWORK_IB;
+               wc->network_hdr_type = RDMA_NETWORK_ROCE_V1;
                break;
        case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
                wc->network_hdr_type = RDMA_NETWORK_IPV6;
@@ -1023,16 +1024,21 @@ err_cqb:
        return err;
 }
 
-void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
 {
        struct mlx5_ib_dev *dev = to_mdev(cq->device);
        struct mlx5_ib_cq *mcq = to_mcq(cq);
+       int ret;
+
+       ret = mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
+       if (ret)
+               return ret;
 
-       mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
        if (udata)
                destroy_cq_user(mcq, udata);
        else
                destroy_cq_kernel(dev, mcq);
+       return 0;
 }
 
 static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
index e9cfb9a..492cfe0 100644 (file)
@@ -136,12 +136,9 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
 #define LAST_COUNTERS_FIELD counters
 
 /* Field is the last supported field */
-#define FIELDS_NOT_SUPPORTED(filter, field)\
-       memchr_inv((void *)&filter.field  +\
-                  sizeof(filter.field), 0,\
-                  sizeof(filter) -\
-                  offsetof(typeof(filter), field) -\
-                  sizeof(filter.field))
+#define FIELDS_NOT_SUPPORTED(filter, field)                                    \
+       memchr_inv((void *)&filter.field + sizeof(filter.field), 0,            \
+                  sizeof(filter) - offsetofend(typeof(filter), field))
 
 int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
                           bool is_egress,
@@ -767,6 +764,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 {
        bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
        struct mlx5_flow_namespace *ns = NULL;
+       enum mlx5_flow_namespace_type fn_type;
        struct mlx5_ib_flow_prio *prio;
        struct mlx5_flow_table *ft;
        int max_table_size;
@@ -780,11 +778,9 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                                                       log_max_ft_size));
        esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
                DEVLINK_ESWITCH_ENCAP_MODE_NONE;
-       if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
-               enum mlx5_flow_namespace_type fn_type;
-
-               if (flow_is_multicast_only(flow_attr) &&
-                   !dont_trap)
+       switch (flow_attr->type) {
+       case IB_FLOW_ATTR_NORMAL:
+               if (flow_is_multicast_only(flow_attr) && !dont_trap)
                        priority = MLX5_IB_FLOW_MCAST_PRIO;
                else
                        priority = ib_prio_to_core_prio(flow_attr->priority,
@@ -797,12 +793,11 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                                flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
                        if (!dev->is_rep && !esw_encap &&
                            MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
-                                       reformat_l3_tunnel_to_l2))
+                                                     reformat_l3_tunnel_to_l2))
                                flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
                } else {
-                       max_table_size =
-                               BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
-                                                             log_max_ft_size));
+                       max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
+                               dev->mdev, log_max_ft_size));
                        fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
                        prio = &dev->flow_db->egress_prios[priority];
                        if (!dev->is_rep && !esw_encap &&
@@ -812,27 +807,31 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
                num_entries = MLX5_FS_MAX_ENTRIES;
                num_groups = MLX5_FS_MAX_TYPES;
-       } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
-                  flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+               break;
+       case IB_FLOW_ATTR_ALL_DEFAULT:
+       case IB_FLOW_ATTR_MC_DEFAULT:
                ns = mlx5_get_flow_namespace(dev->mdev,
                                             MLX5_FLOW_NAMESPACE_LEFTOVERS);
-               build_leftovers_ft_param(&priority,
-                                        &num_entries,
-                                        &num_groups);
+               build_leftovers_ft_param(&priority, &num_entries, &num_groups);
                prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
-       } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+               break;
+       case IB_FLOW_ATTR_SNIFFER:
                if (!MLX5_CAP_FLOWTABLE(dev->mdev,
                                        allow_sniffer_and_nic_rx_shared_tir))
                        return ERR_PTR(-EOPNOTSUPP);
 
-               ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
-                                            MLX5_FLOW_NAMESPACE_SNIFFER_RX :
-                                            MLX5_FLOW_NAMESPACE_SNIFFER_TX);
+               ns = mlx5_get_flow_namespace(
+                       dev->mdev, ft_type == MLX5_IB_FT_RX ?
+                                          MLX5_FLOW_NAMESPACE_SNIFFER_RX :
+                                          MLX5_FLOW_NAMESPACE_SNIFFER_TX);
 
                prio = &dev->flow_db->sniffer[ft_type];
                priority = 0;
                num_entries = 1;
                num_groups = 1;
+               break;
+       default:
+               break;
        }
 
        if (!ns)
@@ -954,7 +953,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
        if (!flow_is_multicast_only(flow_attr))
                set_underlay_qp(dev, spec, underlay_qpn);
 
-       if (dev->is_rep) {
+       if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
                struct mlx5_eswitch_rep *rep;
 
                rep = dev->port[flow_attr->port - 1].rep;
@@ -1116,6 +1115,7 @@ static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
        int err;
        static const struct ib_flow_attr flow_attr  = {
                .num_of_specs = 0,
+               .type = IB_FLOW_ATTR_SNIFFER,
                .size = sizeof(flow_attr)
        };
 
@@ -1143,10 +1143,8 @@ err:
        return ERR_PTR(err);
 }
 
-
 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
                                           struct ib_flow_attr *flow_attr,
-                                          int domain,
                                           struct ib_udata *udata)
 {
        struct mlx5_ib_dev *dev = to_mdev(qp->device);
@@ -1162,8 +1160,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
        int underlay_qpn;
 
        if (udata && udata->inlen) {
-               min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
-                               sizeof(ucmd_hdr.reserved);
+               min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
                if (udata->inlen < min_ucmd_sz)
                        return ERR_PTR(-EOPNOTSUPP);
 
@@ -1197,10 +1194,9 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
                goto free_ucmd;
        }
 
-       if (domain != IB_FLOW_DOMAIN_USER ||
-           flow_attr->port > dev->num_ports ||
-           (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
-                                 IB_FLOW_ATTR_FLAGS_EGRESS))) {
+       if (flow_attr->port > dev->num_ports ||
+           (flow_attr->flags &
+            ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS))) {
                err = -EINVAL;
                goto free_ucmd;
        }
@@ -1245,19 +1241,22 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
                        dst->tir_num = mqp->raw_packet_qp.rq.tirn;
        }
 
-       if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+       switch (flow_attr->type) {
+       case IB_FLOW_ATTR_NORMAL:
                underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
                                       mqp->underlay_qpn :
                                       0;
                handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
                                            underlay_qpn, ucmd);
-       } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
-                  flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
-               handler = create_leftovers_rule(dev, ft_prio, flow_attr,
-                                               dst);
-       } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+               break;
+       case IB_FLOW_ATTR_ALL_DEFAULT:
+       case IB_FLOW_ATTR_MC_DEFAULT:
+               handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
+               break;
+       case IB_FLOW_ATTR_SNIFFER:
                handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
-       } else {
+               break;
+       default:
                err = -EINVAL;
                goto destroy_ft;
        }
@@ -1305,39 +1304,47 @@ _get_flow_table(struct mlx5_ib_dev *dev,
 
        esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
                DEVLINK_ESWITCH_ENCAP_MODE_NONE;
-       if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
-               max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
-                                       log_max_ft_size));
+       switch (fs_matcher->ns_type) {
+       case MLX5_FLOW_NAMESPACE_BYPASS:
+               max_table_size = BIT(
+                       MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
                if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
                        flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
                if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
                                              reformat_l3_tunnel_to_l2) &&
                    !esw_encap)
                        flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
-       } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
+               break;
+       case MLX5_FLOW_NAMESPACE_EGRESS:
                max_table_size = BIT(
                        MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
-               if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
+               if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
+                   !esw_encap)
                        flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
-       } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
+               break;
+       case MLX5_FLOW_NAMESPACE_FDB:
                max_table_size = BIT(
                        MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
                if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
                        flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
-               if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
+               if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
+                                              reformat_l3_tunnel_to_l2) &&
                    esw_encap)
                        flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
                priority = FDB_BYPASS_PATH;
-       } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
-               max_table_size =
-                       BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
-                                                      log_max_ft_size));
+               break;
+       case MLX5_FLOW_NAMESPACE_RDMA_RX:
+               max_table_size = BIT(
+                       MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
                priority = fs_matcher->priority;
-       } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
-               max_table_size =
-                       BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
-                                                      log_max_ft_size));
+               break;
+       case MLX5_FLOW_NAMESPACE_RDMA_TX:
+               max_table_size = BIT(
+                       MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
                priority = fs_matcher->priority;
+               break;
+       default:
+               break;
        }
 
        max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
@@ -1346,16 +1353,24 @@ _get_flow_table(struct mlx5_ib_dev *dev,
        if (!ns)
                return ERR_PTR(-EOPNOTSUPP);
 
-       if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
+       switch (fs_matcher->ns_type) {
+       case MLX5_FLOW_NAMESPACE_BYPASS:
                prio = &dev->flow_db->prios[priority];
-       else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
+               break;
+       case MLX5_FLOW_NAMESPACE_EGRESS:
                prio = &dev->flow_db->egress_prios[priority];
-       else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+               break;
+       case MLX5_FLOW_NAMESPACE_FDB:
                prio = &dev->flow_db->fdb;
-       else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
+               break;
+       case MLX5_FLOW_NAMESPACE_RDMA_RX:
                prio = &dev->flow_db->rdma_rx[priority];
-       else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)
+               break;
+       case MLX5_FLOW_NAMESPACE_RDMA_TX:
                prio = &dev->flow_db->rdma_tx[priority];
+               break;
+       default: return ERR_PTR(-EINVAL);
+       }
 
        if (!prio)
                return ERR_PTR(-EINVAL);
@@ -1488,20 +1503,25 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
                goto unlock;
        }
 
-       if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
+       switch (dest_type) {
+       case MLX5_FLOW_DESTINATION_TYPE_TIR:
                dst[dst_num].type = dest_type;
                dst[dst_num++].tir_num = dest_id;
                flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-       } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+               break;
+       case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
                dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
                dst[dst_num++].ft_num = dest_id;
                flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-       } else  if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
+               break;
+       case MLX5_FLOW_DESTINATION_TYPE_PORT:
                dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
                flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+               break;
+       default:
+               break;
        }
 
-
        if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
                dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
                dst[dst_num].counter_id = counter_id;
index 40d4181..7fcad91 100644 (file)
 struct mlx5_ib_gsi_wr {
        struct ib_cqe cqe;
        struct ib_wc wc;
-       int send_flags;
        bool completed:1;
 };
 
-struct mlx5_ib_gsi_qp {
-       struct ib_qp ibqp;
-       struct ib_qp *rx_qp;
-       u8 port_num;
-       struct ib_qp_cap cap;
-       enum ib_sig_type sq_sig_type;
-       /* Serialize qp state modifications */
-       struct mutex mutex;
-       struct ib_cq *cq;
-       struct mlx5_ib_gsi_wr *outstanding_wrs;
-       u32 outstanding_pi, outstanding_ci;
-       int num_qps;
-       /* Protects access to the tx_qps. Post send operations synchronize
-        * with tx_qp creation in setup_qp(). Also protects the
-        * outstanding_wrs array and indices.
-        */
-       spinlock_t lock;
-       struct ib_qp **tx_qps;
-};
-
-static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
-{
-       return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
-}
-
 static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
 {
        return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
 }
 
 /* Call with gsi->lock locked */
-static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
+static void generate_completions(struct mlx5_ib_qp *mqp)
 {
-       struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
+       struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
+       struct ib_cq *gsi_cq = mqp->ibqp.send_cq;
        struct mlx5_ib_gsi_wr *wr;
        u32 index;
 
@@ -83,10 +58,7 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
                if (!wr->completed)
                        break;
 
-               if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
-                   wr->send_flags & IB_SEND_SIGNALED)
-                       WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
-
+               WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
                wr->completed = false;
        }
 
@@ -98,6 +70,7 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
        struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
        struct mlx5_ib_gsi_wr *wr =
                container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
+       struct mlx5_ib_qp *mqp = container_of(gsi, struct mlx5_ib_qp, gsi);
        u64 wr_id;
        unsigned long flags;
 
@@ -106,19 +79,19 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
        wr_id = wr->wc.wr_id;
        wr->wc = *wc;
        wr->wc.wr_id = wr_id;
-       wr->wc.qp = &gsi->ibqp;
+       wr->wc.qp = &mqp->ibqp;
 
-       generate_completions(gsi);
+       generate_completions(mqp);
        spin_unlock_irqrestore(&gsi->lock, flags);
 }
 
-struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
-                                   struct ib_qp_init_attr *init_attr)
+int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
+                      struct ib_qp_init_attr *attr)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
        struct mlx5_ib_gsi_qp *gsi;
-       struct ib_qp_init_attr hw_init_attr = *init_attr;
-       const u8 port_num = init_attr->port_num;
+       struct ib_qp_init_attr hw_init_attr = *attr;
+       const u8 port_num = attr->port_num;
        int num_qps = 0;
        int ret;
 
@@ -130,26 +103,19 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
                        num_qps = MLX5_MAX_PORTS;
        }
 
-       gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
-       if (!gsi)
-               return ERR_PTR(-ENOMEM);
-
+       gsi = &mqp->gsi;
        gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
-       if (!gsi->tx_qps) {
-               ret = -ENOMEM;
-               goto err_free;
-       }
+       if (!gsi->tx_qps)
+               return -ENOMEM;
 
-       gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
-                                      sizeof(*gsi->outstanding_wrs),
-                                      GFP_KERNEL);
+       gsi->outstanding_wrs =
+               kcalloc(attr->cap.max_send_wr, sizeof(*gsi->outstanding_wrs),
+                       GFP_KERNEL);
        if (!gsi->outstanding_wrs) {
                ret = -ENOMEM;
                goto err_free_tx;
        }
 
-       mutex_init(&gsi->mutex);
-
        mutex_lock(&dev->devr.mutex);
 
        if (dev->devr.ports[port_num - 1].gsi) {
@@ -161,12 +127,10 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
        gsi->num_qps = num_qps;
        spin_lock_init(&gsi->lock);
 
-       gsi->cap = init_attr->cap;
-       gsi->sq_sig_type = init_attr->sq_sig_type;
-       gsi->ibqp.qp_num = 1;
+       gsi->cap = attr->cap;
        gsi->port_num = port_num;
 
-       gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
+       gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0,
                              IB_POLL_SOFTIRQ);
        if (IS_ERR(gsi->cq)) {
                mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
@@ -182,19 +146,31 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
                hw_init_attr.cap.max_send_sge = 0;
                hw_init_attr.cap.max_inline_data = 0;
        }
-       gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
+
+       gsi->rx_qp = mlx5_ib_create_qp(pd, &hw_init_attr, NULL);
        if (IS_ERR(gsi->rx_qp)) {
                mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
                             PTR_ERR(gsi->rx_qp));
                ret = PTR_ERR(gsi->rx_qp);
                goto err_destroy_cq;
        }
+       gsi->rx_qp->device = pd->device;
+       gsi->rx_qp->pd = pd;
+       gsi->rx_qp->real_qp = gsi->rx_qp;
+
+       gsi->rx_qp->qp_type = hw_init_attr.qp_type;
+       gsi->rx_qp->send_cq = hw_init_attr.send_cq;
+       gsi->rx_qp->recv_cq = hw_init_attr.recv_cq;
+       gsi->rx_qp->event_handler = hw_init_attr.event_handler;
+       spin_lock_init(&gsi->rx_qp->mr_lock);
+       INIT_LIST_HEAD(&gsi->rx_qp->rdma_mrs);
+       INIT_LIST_HEAD(&gsi->rx_qp->sig_mrs);
 
-       dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
+       dev->devr.ports[attr->port_num - 1].gsi = gsi;
 
        mutex_unlock(&dev->devr.mutex);
 
-       return &gsi->ibqp;
+       return 0;
 
 err_destroy_cq:
        ib_free_cq(gsi->cq);
@@ -203,23 +179,19 @@ err_free_wrs:
        kfree(gsi->outstanding_wrs);
 err_free_tx:
        kfree(gsi->tx_qps);
-err_free:
-       kfree(gsi);
-       return ERR_PTR(ret);
+       return ret;
 }
 
-int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
+int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp)
 {
-       struct mlx5_ib_dev *dev = to_mdev(qp->device);
-       struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+       struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device);
+       struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
        const int port_num = gsi->port_num;
        int qp_index;
        int ret;
 
-       mlx5_ib_dbg(dev, "destroying GSI QP\n");
-
        mutex_lock(&dev->devr.mutex);
-       ret = ib_destroy_qp(gsi->rx_qp);
+       ret = mlx5_ib_destroy_qp(gsi->rx_qp, NULL);
        if (ret) {
                mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
                             ret);
@@ -241,7 +213,7 @@ int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
 
        kfree(gsi->outstanding_wrs);
        kfree(gsi->tx_qps);
-       kfree(gsi);
+       kfree(mqp);
 
        return 0;
 }
@@ -259,7 +231,6 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
                        .max_send_sge = gsi->cap.max_send_sge,
                        .max_inline_data = gsi->cap.max_inline_data,
                },
-               .sq_sig_type = gsi->sq_sig_type,
                .qp_type = IB_QPT_UD,
                .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
        };
@@ -370,56 +341,54 @@ err_destroy_qp:
 
 static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
 {
+       struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
        u16 qp_index;
 
+       mutex_lock(&dev->devr.mutex);
        for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
                setup_qp(gsi, qp_index);
+       mutex_unlock(&dev->devr.mutex);
 }
 
 int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
                          int attr_mask)
 {
        struct mlx5_ib_dev *dev = to_mdev(qp->device);
-       struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+       struct mlx5_ib_qp *mqp = to_mqp(qp);
+       struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
        int ret;
 
        mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
 
-       mutex_lock(&gsi->mutex);
        ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
        if (ret) {
                mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
-               goto unlock;
+               return ret;
        }
 
        if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
                setup_qps(gsi);
-
-unlock:
-       mutex_unlock(&gsi->mutex);
-
-       return ret;
+       return 0;
 }
 
 int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
                         int qp_attr_mask,
                         struct ib_qp_init_attr *qp_init_attr)
 {
-       struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+       struct mlx5_ib_qp *mqp = to_mqp(qp);
+       struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
        int ret;
 
-       mutex_lock(&gsi->mutex);
        ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
        qp_init_attr->cap = gsi->cap;
-       mutex_unlock(&gsi->mutex);
-
        return ret;
 }
 
 /* Call with gsi->lock locked */
-static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
+static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_qp *mqp,
                                      struct ib_ud_wr *wr, struct ib_wc *wc)
 {
+       struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
        struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
        struct mlx5_ib_gsi_wr *gsi_wr;
 
@@ -448,22 +417,21 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
 }
 
 /* Call with gsi->lock locked */
-static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
-                                   struct ib_ud_wr *wr)
+static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr)
 {
        struct ib_wc wc = {
                { .wr_id = wr->wr.wr_id },
                .status = IB_WC_SUCCESS,
                .opcode = IB_WC_SEND,
-               .qp = &gsi->ibqp,
+               .qp = &mqp->ibqp,
        };
        int ret;
 
-       ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
+       ret = mlx5_ib_add_outstanding_wr(mqp, wr, &wc);
        if (ret)
                return ret;
 
-       generate_completions(gsi);
+       generate_completions(mqp);
 
        return 0;
 }
@@ -490,7 +458,8 @@ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
 int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
                          const struct ib_send_wr **bad_wr)
 {
-       struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+       struct mlx5_ib_qp *mqp = to_mqp(qp);
+       struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
        struct ib_qp *tx_qp;
        unsigned long flags;
        int ret;
@@ -503,14 +472,14 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
                spin_lock_irqsave(&gsi->lock, flags);
                tx_qp = get_tx_qp(gsi, &cur_wr);
                if (!tx_qp) {
-                       ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
+                       ret = mlx5_ib_gsi_silent_drop(mqp, &cur_wr);
                        if (ret)
                                goto err;
                        spin_unlock_irqrestore(&gsi->lock, flags);
                        continue;
                }
 
-               ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
+               ret = mlx5_ib_add_outstanding_wr(mqp, &cur_wr, NULL);
                if (ret)
                        goto err;
 
@@ -534,7 +503,8 @@ err:
 int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
                          const struct ib_recv_wr **bad_wr)
 {
-       struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+       struct mlx5_ib_qp *mqp = to_mqp(qp);
+       struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
 
        return ib_post_recv(gsi->rx_qp, wr, bad_wr);
 }
@@ -544,7 +514,5 @@ void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
        if (!gsi)
                return;
 
-       mutex_lock(&gsi->mutex);
        setup_qps(gsi);
-       mutex_unlock(&gsi->mutex);
 }
index d60d632..7082172 100644 (file)
@@ -326,8 +326,8 @@ out:
        spin_unlock(&port->mp.mpi_lock);
 }
 
-static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed,
-                                          u8 *active_width)
+static int translate_eth_legacy_proto_oper(u32 eth_proto_oper,
+                                          u16 *active_speed, u8 *active_width)
 {
        switch (eth_proto_oper) {
        case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
@@ -384,7 +384,7 @@ static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed,
        return 0;
 }
 
-static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
                                        u8 *active_width)
 {
        switch (eth_proto_oper) {
@@ -436,7 +436,7 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed,
        return 0;
 }
 
-static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed,
                                    u8 *active_width, bool ext)
 {
        return ext ?
@@ -546,7 +546,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
                         unsigned int index, const union ib_gid *gid,
                         const struct ib_gid_attr *attr)
 {
-       enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+       enum ib_gid_type gid_type = IB_GID_TYPE_ROCE;
        u16 vlan_id = 0xffff;
        u8 roce_version = 0;
        u8 roce_l3_type = 0;
@@ -561,7 +561,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
        }
 
        switch (gid_type) {
-       case IB_GID_TYPE_IB:
+       case IB_GID_TYPE_ROCE:
                roce_version = MLX5_ROCE_VERSION_1;
                break;
        case IB_GID_TYPE_ROCE_UDP_ENCAP:
@@ -840,7 +840,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                /* We support 'Gappy' memory registration too */
                props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
        }
-       props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+       /* IB_WR_REG_MR always requires changing the entity size with UMR */
+       if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+               props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
        if (MLX5_CAP_GEN(mdev, sho)) {
                props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER;
                /* At this stage no support for signature handover */
@@ -1175,32 +1177,24 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        return 0;
 }
 
-enum mlx5_ib_width {
-       MLX5_IB_WIDTH_1X        = 1 << 0,
-       MLX5_IB_WIDTH_2X        = 1 << 1,
-       MLX5_IB_WIDTH_4X        = 1 << 2,
-       MLX5_IB_WIDTH_8X        = 1 << 3,
-       MLX5_IB_WIDTH_12X       = 1 << 4
-};
-
-static void translate_active_width(struct ib_device *ibdev, u8 active_width,
-                                 u8 *ib_width)
+static void translate_active_width(struct ib_device *ibdev, u16 active_width,
+                                  u8 *ib_width)
 {
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 
-       if (active_width & MLX5_IB_WIDTH_1X)
+       if (active_width & MLX5_PTYS_WIDTH_1X)
                *ib_width = IB_WIDTH_1X;
-       else if (active_width & MLX5_IB_WIDTH_2X)
+       else if (active_width & MLX5_PTYS_WIDTH_2X)
                *ib_width = IB_WIDTH_2X;
-       else if (active_width & MLX5_IB_WIDTH_4X)
+       else if (active_width & MLX5_PTYS_WIDTH_4X)
                *ib_width = IB_WIDTH_4X;
-       else if (active_width & MLX5_IB_WIDTH_8X)
+       else if (active_width & MLX5_PTYS_WIDTH_8X)
                *ib_width = IB_WIDTH_8X;
-       else if (active_width & MLX5_IB_WIDTH_12X)
+       else if (active_width & MLX5_PTYS_WIDTH_12X)
                *ib_width = IB_WIDTH_12X;
        else {
                mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n",
-                           (int)active_width);
+                           active_width);
                *ib_width = IB_WIDTH_4X;
        }
 
@@ -1277,7 +1271,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
        u16 max_mtu;
        u16 oper_mtu;
        int err;
-       u8 ib_link_width_oper;
+       u16 ib_link_width_oper;
        u8 vl_hw_cap;
 
        rep = kzalloc(sizeof(*rep), GFP_KERNEL);
@@ -1310,16 +1304,13 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
        if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
                props->port_cap_flags2 = rep->cap_mask2;
 
-       err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
+       err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper,
+                                     &props->active_speed, port);
        if (err)
                goto out;
 
        translate_active_width(ibdev, ib_link_width_oper, &props->active_width);
 
-       err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
-       if (err)
-               goto out;
-
        mlx5_query_port_max_mtu(mdev, &max_mtu, port);
 
        props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu);
@@ -2354,7 +2345,9 @@ static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
                        return -EPERM;
 
                if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
-                     MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner)))
+                     MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner) ||
+                     MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2) ||
+                     MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner_v2)))
                        return -EOPNOTSUPP;
                break;
        }
@@ -2569,12 +2562,12 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
        return 0;
 }
 
-static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
 {
        struct mlx5_ib_dev *mdev = to_mdev(pd->device);
        struct mlx5_ib_pd *mpd = to_mpd(pd);
 
-       mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
+       return mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
 }
 
 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
@@ -2699,9 +2692,7 @@ static void pkey_change_handler(struct work_struct *work)
                container_of(work, struct mlx5_ib_port_resources,
                             pkey_change_work);
 
-       mutex_lock(&ports->devr->mutex);
        mlx5_ib_gsi_pkey_change(ports->gsi);
-       mutex_unlock(&ports->devr->mutex);
 }
 
 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
@@ -3127,11 +3118,9 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
        atomic_inc(&devr->p0->usecnt);
        atomic_set(&devr->s1->usecnt, 0);
 
-       for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
+       for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
                INIT_WORK(&devr->ports[port].pkey_change_work,
                          pkey_change_handler);
-               devr->ports[port].devr = devr;
-       }
 
        return 0;
 
@@ -4098,6 +4087,8 @@ static const struct ib_device_ops mlx5_ib_dev_sriov_ops = {
 static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
        .alloc_mw = mlx5_ib_alloc_mw,
        .dealloc_mw = mlx5_ib_dealloc_mw,
+
+       INIT_RDMA_OBJ_SIZE(ib_mw, mlx5_ib_mw, ibmw),
 };
 
 static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
@@ -4268,6 +4259,9 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
        .destroy_wq = mlx5_ib_destroy_wq,
        .get_netdev = mlx5_ib_get_netdev,
        .modify_wq = mlx5_ib_modify_wq,
+
+       INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table,
+                          ib_rwq_ind_tbl),
 };
 
 static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
index c19ec9f..13de3d2 100644 (file)
@@ -169,8 +169,8 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
                          int page_shift, __be64 *pas, int access_flags)
 {
        return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
-                                     ib_umem_num_pages(umem), pas,
-                                     access_flags);
+                                     ib_umem_num_dma_blocks(umem, PAGE_SIZE),
+                                     pas, access_flags);
 }
 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
 {
index 5287fc8..b1f2b34 100644 (file)
@@ -384,6 +384,22 @@ struct mlx5_ib_dct {
        u32                     *in;
 };
 
+struct mlx5_ib_gsi_qp {
+       struct ib_qp *rx_qp;
+       u8 port_num;
+       struct ib_qp_cap cap;
+       struct ib_cq *cq;
+       struct mlx5_ib_gsi_wr *outstanding_wrs;
+       u32 outstanding_pi, outstanding_ci;
+       int num_qps;
+       /* Protects access to the tx_qps. Post send operations synchronize
+        * with tx_qp creation in setup_qp(). Also protects the
+        * outstanding_wrs array and indices.
+        */
+       spinlock_t lock;
+       struct ib_qp **tx_qps;
+};
+
 struct mlx5_ib_qp {
        struct ib_qp            ibqp;
        union {
@@ -391,6 +407,7 @@ struct mlx5_ib_qp {
                struct mlx5_ib_raw_packet_qp raw_packet_qp;
                struct mlx5_ib_rss_qp rss_qp;
                struct mlx5_ib_dct dct;
+               struct mlx5_ib_gsi_qp gsi;
        };
        struct mlx5_frag_buf    buf;
 
@@ -693,10 +710,7 @@ struct mlx5_mr_cache {
        unsigned long           last_add;
 };
 
-struct mlx5_ib_gsi_qp;
-
 struct mlx5_ib_port_resources {
-       struct mlx5_ib_resources *devr;
        struct mlx5_ib_gsi_qp *gsi;
        struct work_struct pkey_change_work;
 };
@@ -1119,13 +1133,16 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
 int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
                      struct ib_udata *udata);
 int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+static inline int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+       return 0;
+}
 int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
                       struct ib_udata *udata);
 int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
                       enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
-void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
 int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
                          const struct ib_recv_wr **bad_wr);
 int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
@@ -1148,7 +1165,7 @@ int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
                         size_t buflen, size_t *bc);
 int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                      struct ib_udata *udata);
-void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
 int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
 int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
@@ -1163,8 +1180,7 @@ int mlx5_ib_advise_mr(struct ib_pd *pd,
                      struct ib_sge *sg_list,
                      u32 num_sge,
                      struct uverbs_attr_bundle *attrs);
-struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
-                              struct ib_udata *udata);
+int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
 int mlx5_ib_dealloc_mw(struct ib_mw *mw);
 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
                       int page_shift, int flags);
@@ -1193,7 +1209,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
                        const struct ib_mad *in, struct ib_mad *out,
                        size_t *out_mad_size, u16 *out_mad_pkey_index);
 int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
-void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
 int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
 int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
@@ -1229,7 +1245,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
 
 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-                                      unsigned int entry);
+                                      unsigned int entry, int access_flags);
 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr);
 
@@ -1238,12 +1254,12 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
                                struct ib_wq_init_attr *init_attr,
                                struct ib_udata *udata);
-void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
 int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
                      u32 wq_attr_mask, struct ib_udata *udata);
-struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
-                                                     struct ib_rwq_ind_table_init_attr *init_attr,
-                                                     struct ib_udata *udata);
+int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+                                struct ib_rwq_ind_table_init_attr *init_attr,
+                                struct ib_udata *udata);
 int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
 struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
                               struct ib_ucontext *context,
@@ -1267,6 +1283,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
 int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
                               enum ib_uverbs_advise_mr_advice advice,
                               u32 flags, struct ib_sge *sg_list, u32 num_sge);
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable);
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 {
@@ -1288,6 +1305,10 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
 {
        return -EOPNOTSUPP;
 }
+static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
+{
+       return -EOPNOTSUPP;
+}
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
 extern const struct mmu_interval_notifier_ops mlx5_mn_ops;
@@ -1318,9 +1339,9 @@ void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
 void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
 
 /* GSI QP helper functions */
-struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
-                                   struct ib_qp_init_attr *init_attr);
-int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp);
+int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
+                      struct ib_qp_init_attr *attr);
+int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp);
 int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
                          int attr_mask);
 int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
@@ -1358,7 +1379,7 @@ static inline void init_query_mad(struct ib_smp *mad)
 
 static inline int is_qp1(enum ib_qp_type qp_type)
 {
-       return qp_type == MLX5_IB_QPT_HW_GSI;
+       return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI;
 }
 
 #define MLX5_MAX_UMR_SHIFT 16
@@ -1442,25 +1463,54 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
                        struct mlx5_bfreg_info *bfregi, u32 bfregn,
                        bool dyn_bfreg);
 
-static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
-                                      bool do_modify_atomic, int access_flags)
+static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev,
+                                                size_t length)
 {
+       /*
+        * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
+        * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
+        * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
+        * can never be enabled without this capability. Simplify this weird
+        * quirky hardware by just saying it can't use PAS lists with UMR at
+        * all.
+        */
        if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
                return false;
 
-       if (do_modify_atomic &&
+       /*
+        * length is the size of the MR in bytes when mlx5_ib_update_xlt() is
+        * used.
+        */
+       if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
+           length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
+               return false;
+       return true;
+}
+
+/*
+ * true if an existing MR can be reconfigured to new access_flags using UMR.
+ * Older HW cannot use UMR to update certain elements of the MKC. See
+ * umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask()
+ */
+static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev,
+                                                unsigned int current_access_flags,
+                                                unsigned int target_access_flags)
+{
+       unsigned int diffs = current_access_flags ^ target_access_flags;
+
+       if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
            MLX5_CAP_GEN(dev->mdev, atomic) &&
            MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
                return false;
 
-       if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
+       if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
            MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
            !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
                return false;
 
-       if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
-            MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
-            !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+       if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
+           MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
+           !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
                return false;
 
        return true;
index 3e6f2f9..b261797 100644 (file)
@@ -50,6 +50,29 @@ enum {
 static void
 create_mkey_callback(int status, struct mlx5_async_work *context);
 
+static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
+                                         struct ib_pd *pd)
+{
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+
+       MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+       MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+       MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+       MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+       MLX5_SET(mkc, mkc, lr, 1);
+
+       if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
+               MLX5_SET(mkc, mkc, relaxed_ordering_write,
+                        !!(acc & IB_ACCESS_RELAXED_ORDERING));
+       if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
+               MLX5_SET(mkc, mkc, relaxed_ordering_read,
+                        !!(acc & IB_ACCESS_RELAXED_ORDERING));
+
+       MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
+       MLX5_SET64(mkc, mkc, start_addr, start_addr);
+}
+
 static void
 assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
                    u32 *in)
@@ -100,7 +123,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
        return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
 }
 
-static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
+static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start,
+                                         u64 length)
 {
        return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
                length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
@@ -152,12 +176,12 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
        mr->cache_ent = ent;
        mr->dev = ent->dev;
 
+       set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
        MLX5_SET(mkc, mkc, free, 1);
        MLX5_SET(mkc, mkc, umr_en, 1);
        MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
        MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
 
-       MLX5_SET(mkc, mkc, qpn, 0xffffff);
        MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
        MLX5_SET(mkc, mkc, log_page_size, ent->page);
        return mr;
@@ -534,7 +558,7 @@ static void cache_work_func(struct work_struct *work)
 
 /* Allocate a special entry from the cache */
 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-                                      unsigned int entry)
+                                      unsigned int entry, int access_flags)
 {
        struct mlx5_mr_cache *cache = &dev->cache;
        struct mlx5_cache_ent *ent;
@@ -544,6 +568,10 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
                    entry >= ARRAY_SIZE(cache->ent)))
                return ERR_PTR(-EINVAL);
 
+       /* Matches access in alloc_cache_mr() */
+       if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
+               return ERR_PTR(-EOPNOTSUPP);
+
        ent = &cache->ent[entry];
        spin_lock_irq(&ent->lock);
        if (list_empty(&ent->head)) {
@@ -558,6 +586,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
                queue_adjust_cache_locked(ent);
                spin_unlock_irq(&ent->lock);
        }
+       mr->access_flags = access_flags;
        return mr;
 }
 
@@ -730,8 +759,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
                           MLX5_IB_UMR_OCTOWORD;
                ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
                if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
-                   !dev->is_rep &&
-                   mlx5_core_is_pf(dev->mdev))
+                   !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
+                   mlx5_ib_can_load_pas_with_umr(dev, 0))
                        ent->limit = dev->mdev->profile->mr_cache[i].limit;
                else
                        ent->limit = 0;
@@ -774,29 +803,6 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
        return 0;
 }
 
-static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
-                                         struct ib_pd *pd)
-{
-       struct mlx5_ib_dev *dev = to_mdev(pd->device);
-
-       MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
-       MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
-       MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
-       MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
-       MLX5_SET(mkc, mkc, lr, 1);
-
-       if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
-               MLX5_SET(mkc, mkc, relaxed_ordering_write,
-                        !!(acc & IB_ACCESS_RELAXED_ORDERING));
-       if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
-               MLX5_SET(mkc, mkc, relaxed_ordering_read,
-                        !!(acc & IB_ACCESS_RELAXED_ORDERING));
-
-       MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
-       MLX5_SET(mkc, mkc, qpn, 0xffffff);
-       MLX5_SET64(mkc, mkc, start_addr, start_addr);
-}
-
 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -979,6 +985,11 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
 
        if (!ent)
                return ERR_PTR(-E2BIG);
+
+       /* Matches access in alloc_cache_mr() */
+       if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
+               return ERR_PTR(-EOPNOTSUPP);
+
        mr = get_cache_mr(ent);
        if (!mr) {
                mr = create_cache_mr(ent);
@@ -1181,38 +1192,31 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
                goto err_1;
        }
        pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
-       if (populate && !(access_flags & IB_ACCESS_ON_DEMAND))
+       if (populate) {
+               if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
+                       err = -EINVAL;
+                       goto err_2;
+               }
                mlx5_ib_populate_pas(dev, umem, page_shift, pas,
                                     pg_cap ? MLX5_IB_MTT_PRESENT : 0);
+       }
 
        /* The pg_access bit allows setting the access flags
         * in the page list submitted with the command. */
        MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
 
        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+       set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr,
+                                     populate ? pd : dev->umrc.pd);
        MLX5_SET(mkc, mkc, free, !populate);
        MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
-       if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
-               MLX5_SET(mkc, mkc, relaxed_ordering_write,
-                        !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
-       if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
-               MLX5_SET(mkc, mkc, relaxed_ordering_read,
-                        !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
-       MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
-       MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
-       MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
-       MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
-       MLX5_SET(mkc, mkc, lr, 1);
        MLX5_SET(mkc, mkc, umr_en, 1);
 
-       MLX5_SET64(mkc, mkc, start_addr, virt_addr);
        MLX5_SET64(mkc, mkc, len, length);
-       MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
        MLX5_SET(mkc, mkc, bsf_octword_size, 0);
        MLX5_SET(mkc, mkc, translations_octword_size,
                 get_octo_len(virt_addr, length, page_shift));
        MLX5_SET(mkc, mkc, log_page_size, page_shift);
-       MLX5_SET(mkc, mkc, qpn, 0xffffff);
        if (populate) {
                MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
                         get_octo_len(virt_addr, length, page_shift));
@@ -1308,7 +1312,8 @@ int mlx5_ib_advise_mr(struct ib_pd *pd,
                      struct uverbs_attr_bundle *attrs)
 {
        if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
-           advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE)
+           advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
+           advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
                return -EOPNOTSUPP;
 
        return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
@@ -1353,7 +1358,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
        struct mlx5_ib_mr *mr = NULL;
-       bool use_umr;
+       bool xlt_with_umr;
        struct ib_umem *umem;
        int page_shift;
        int npages;
@@ -1367,6 +1372,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
                    start, virt_addr, length, access_flags);
 
+       xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
+       /* ODP requires xlt update via umr to work. */
+       if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
+               return ERR_PTR(-EINVAL);
+
        if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
            length == U64_MAX) {
                if (virt_addr != start)
@@ -1387,28 +1397,17 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        if (err < 0)
                return ERR_PTR(err);
 
-       use_umr = mlx5_ib_can_use_umr(dev, true, access_flags);
-
-       if (order <= mr_cache_max_order(dev) && use_umr) {
+       if (xlt_with_umr) {
                mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
                                         page_shift, order, access_flags);
-               if (PTR_ERR(mr) == -EAGAIN) {
-                       mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
+               if (IS_ERR(mr))
                        mr = NULL;
-               }
-       } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
-               if (access_flags & IB_ACCESS_ON_DEMAND) {
-                       err = -EINVAL;
-                       pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
-                       goto error;
-               }
-               use_umr = false;
        }
 
        if (!mr) {
                mutex_lock(&dev->slow_path_mutex);
                mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
-                               page_shift, access_flags, !use_umr);
+                               page_shift, access_flags, !xlt_with_umr);
                mutex_unlock(&dev->slow_path_mutex);
        }
 
@@ -1422,15 +1421,16 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        mr->umem = umem;
        set_mr_fields(dev, mr, npages, length, access_flags);
 
-       if (use_umr) {
+       if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) {
+               /*
+                * If the MR was created with reg_create then it will be
+                * configured properly but left disabled. It is safe to go ahead
+                * and configure it again via UMR while enabling it.
+                */
                int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
 
-               if (access_flags & IB_ACCESS_ON_DEMAND)
-                       update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP;
-
                err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
                                         update_xlt_flags);
-
                if (err) {
                        dereg_mr(dev, mr);
                        return ERR_PTR(err);
@@ -1448,6 +1448,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                        dereg_mr(dev, mr);
                        return ERR_PTR(err);
                }
+
+               err = mlx5_ib_init_odp_mr(mr, xlt_with_umr);
+               if (err) {
+                       dereg_mr(dev, mr);
+                       return ERR_PTR(err);
+               }
        }
 
        return &mr->ibmr;
@@ -1555,8 +1561,11 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                        goto err;
        }
 
-       if (!mlx5_ib_can_use_umr(dev, true, access_flags) ||
-           (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) {
+       if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags,
+                                          access_flags) ||
+           !mlx5_ib_can_load_pas_with_umr(dev, len) ||
+           (flags & IB_MR_REREG_TRANS &&
+            !mlx5_ib_pas_fits_in_mr(mr, addr, len))) {
                /*
                 * UMR can't be used - MKey needs to be replaced.
                 */
@@ -1727,9 +1736,9 @@ static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
 
        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 
+       /* This is only used from the kernel, so setting the PD is OK. */
+       set_mkc_access_pd_addr_fields(mkc, 0, 0, pd);
        MLX5_SET(mkc, mkc, free, 1);
-       MLX5_SET(mkc, mkc, qpn, 0xffffff);
-       MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
        MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
        MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
        MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
@@ -1973,12 +1982,11 @@ struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
                                  max_num_meta_sg);
 }
 
-struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
-                              struct ib_udata *udata)
+int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
 {
-       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
-       struct mlx5_ib_mw *mw = NULL;
+       struct mlx5_ib_mw *mw = to_mmw(ibmw);
        u32 *in = NULL;
        void *mkc;
        int ndescs;
@@ -1991,21 +1999,20 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
 
        err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
        if (err)
-               return ERR_PTR(err);
+               return err;
 
        if (req.comp_mask || req.reserved1 || req.reserved2)
-               return ERR_PTR(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
 
        if (udata->inlen > sizeof(req) &&
            !ib_is_udata_cleared(udata, sizeof(req),
                                 udata->inlen - sizeof(req)))
-               return ERR_PTR(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
 
        ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
 
-       mw = kzalloc(sizeof(*mw), GFP_KERNEL);
        in = kzalloc(inlen, GFP_KERNEL);
-       if (!mw || !in) {
+       if (!in) {
                err = -ENOMEM;
                goto free;
        }
@@ -2014,11 +2021,11 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
 
        MLX5_SET(mkc, mkc, free, 1);
        MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
-       MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+       MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
        MLX5_SET(mkc, mkc, umr_en, 1);
        MLX5_SET(mkc, mkc, lr, 1);
        MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
-       MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
+       MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
        MLX5_SET(mkc, mkc, qpn, 0xffffff);
 
        err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
@@ -2026,17 +2033,15 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
                goto free;
 
        mw->mmkey.type = MLX5_MKEY_MW;
-       mw->ibmw.rkey = mw->mmkey.key;
+       ibmw->rkey = mw->mmkey.key;
        mw->ndescs = ndescs;
 
-       resp.response_length = min(offsetof(typeof(resp), response_length) +
-                                  sizeof(resp.response_length), udata->outlen);
+       resp.response_length =
+               min(offsetofend(typeof(resp), response_length), udata->outlen);
        if (resp.response_length) {
                err = ib_copy_to_udata(udata, &resp, resp.response_length);
-               if (err) {
-                       mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
-                       goto free;
-               }
+               if (err)
+                       goto free_mkey;
        }
 
        if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
@@ -2048,21 +2053,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
        }
 
        kfree(in);
-       return &mw->ibmw;
+       return 0;
 
 free_mkey:
        mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
 free:
-       kfree(mw);
        kfree(in);
-       return ERR_PTR(err);
+       return err;
 }
 
 int mlx5_ib_dealloc_mw(struct ib_mw *mw)
 {
        struct mlx5_ib_dev *dev = to_mdev(mw->device);
        struct mlx5_ib_mw *mmw = to_mmw(mw);
-       int err;
 
        if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
                xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
@@ -2073,11 +2076,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw)
                synchronize_srcu(&dev->odp_srcu);
        }
 
-       err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
-       if (err)
-               return err;
-       kfree(mmw);
-       return 0;
+       return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
 }
 
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
index cfd7efa..5c853ec 100644 (file)
@@ -382,7 +382,7 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
        memset(caps, 0, sizeof(*caps));
 
        if (!MLX5_CAP_GEN(dev->mdev, pg) ||
-           !mlx5_ib_can_use_umr(dev, true, 0))
+           !mlx5_ib_can_load_pas_with_umr(dev, 0))
                return;
 
        caps->general_caps = IB_ODP_SUPPORT;
@@ -476,12 +476,12 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
        if (IS_ERR(odp))
                return ERR_CAST(odp);
 
-       ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY);
+       ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY,
+                                      imr->access_flags);
        if (IS_ERR(mr))
                goto out_umem;
 
        mr->ibmr.pd = imr->ibmr.pd;
-       mr->access_flags = imr->access_flags;
        mr->umem = &odp->umem;
        mr->ibmr.lkey = mr->mmkey.key;
        mr->ibmr.rkey = mr->mmkey.key;
@@ -540,14 +540,13 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
        if (IS_ERR(umem_odp))
                return ERR_CAST(umem_odp);
 
-       imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY);
+       imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY, access_flags);
        if (IS_ERR(imr)) {
                err = PTR_ERR(imr);
                goto out_umem;
        }
 
        imr->ibmr.pd = &pd->ibpd;
-       imr->access_flags = access_flags;
        imr->mmkey.iova = 0;
        imr->umem = &umem_odp->umem;
        imr->ibmr.lkey = imr->mmkey.key;
@@ -666,15 +665,21 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
 }
 
 #define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
+#define MLX5_PF_FLAGS_SNAPSHOT BIT(2)
+#define MLX5_PF_FLAGS_ENABLE BIT(3)
 static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
                             u64 user_va, size_t bcnt, u32 *bytes_mapped,
                             u32 flags)
 {
        int page_shift, ret, np;
        bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
-       unsigned long current_seq;
        u64 access_mask;
        u64 start_idx;
+       bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT);
+       u32 xlt_flags = MLX5_IB_UPD_XLT_ATOMIC;
+
+       if (flags & MLX5_PF_FLAGS_ENABLE)
+               xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
 
        page_shift = odp->page_shift;
        start_idx = (user_va - ib_umem_start(odp)) >> page_shift;
@@ -683,25 +688,15 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
        if (odp->umem.writable && !downgrade)
                access_mask |= ODP_WRITE_ALLOWED_BIT;
 
-       current_seq = mmu_interval_read_begin(&odp->notifier);
-
-       np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask,
-                                      current_seq);
+       np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault);
        if (np < 0)
                return np;
 
-       mutex_lock(&odp->umem_mutex);
-       if (!mmu_interval_read_retry(&odp->notifier, current_seq)) {
-               /*
-                * No need to check whether the MTTs really belong to
-                * this MR, since ib_umem_odp_map_dma_pages already
-                * checks this.
-                */
-               ret = mlx5_ib_update_xlt(mr, start_idx, np,
-                                        page_shift, MLX5_IB_UPD_XLT_ATOMIC);
-       } else {
-               ret = -EAGAIN;
-       }
+       /*
+        * No need to check whether the MTTs really belong to this MR, since
+        * ib_umem_odp_map_dma_and_lock already checks this.
+        */
+       ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
        mutex_unlock(&odp->umem_mutex);
 
        if (ret < 0) {
@@ -836,6 +831,20 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
                                     flags);
 }
 
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
+{
+       u32 flags = MLX5_PF_FLAGS_SNAPSHOT;
+       int ret;
+
+       if (enable)
+               flags |= MLX5_PF_FLAGS_ENABLE;
+
+       ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem),
+                               mr->umem->address, mr->umem->length, NULL,
+                               flags);
+       return ret >= 0 ? 0 : ret;
+}
+
 struct pf_frame {
        struct pf_frame *next;
        u32 key;
@@ -1862,6 +1871,9 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
        if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
                pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
 
+       if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
+               pf_flags |= MLX5_PF_FLAGS_SNAPSHOT;
+
        if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH)
                return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list,
                                                num_sge);
index 5758dbe..600e056 100644 (file)
@@ -1477,7 +1477,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
                        resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN;
                        resp->tirn = rq->tirn;
                        resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
-                       if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) {
+                       if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+                           MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
                                resp->tir_icm_addr = MLX5_GET(
                                        create_tir_out, out, icm_address_31_0);
                                resp->tir_icm_addr |=
@@ -1739,7 +1740,8 @@ create_tir:
        if (mucontext->devx_uid) {
                params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
                params->resp.tirn = qp->rss_qp.tirn;
-               if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) {
+               if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+                   MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
                        params->resp.tir_icm_addr =
                                MLX5_GET(create_tir_out, out, icm_address_31_0);
                        params->resp.tir_icm_addr |=
@@ -2409,6 +2411,9 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        u32 uidx = params->uidx;
        void *dctc;
 
+       if (mlx5_lag_is_active(dev->mdev) && !MLX5_CAP_GEN(dev->mdev, lag_dct))
+               return -EOPNOTSUPP;
+
        qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL);
        if (!qp->dct.in)
                return -ENOMEM;
@@ -2506,18 +2511,6 @@ static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                return -EINVAL;
        }
 
-       switch (attr->qp_type) {
-       case IB_QPT_SMI:
-       case MLX5_IB_QPT_HW_GSI:
-       case MLX5_IB_QPT_REG_UMR:
-       case IB_QPT_GSI:
-               mlx5_ib_dbg(dev, "Kernel doesn't support QP type %d\n",
-                           attr->qp_type);
-               return -EINVAL;
-       default:
-               break;
-       }
-
        /*
         * We don't need to see this warning, it means that kernel code
         * missing ib_pd. Placed here to catch developer's mistakes.
@@ -2780,21 +2773,23 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                goto out;
        }
 
-       if (qp->type == MLX5_IB_QPT_DCT) {
+       switch (qp->type) {
+       case MLX5_IB_QPT_DCT:
                err = create_dct(dev, pd, qp, params);
-               goto out;
-       }
-
-       if (qp->type == IB_QPT_XRC_TGT) {
+               break;
+       case IB_QPT_XRC_TGT:
                err = create_xrc_tgt_qp(dev, qp, params);
-               goto out;
+               break;
+       case IB_QPT_GSI:
+               err = mlx5_ib_create_gsi(pd, qp, params->attr);
+               break;
+       default:
+               if (params->udata)
+                       err = create_user_qp(dev, pd, qp, params);
+               else
+                       err = create_kernel_qp(dev, pd, qp, params);
        }
 
-       if (params->udata)
-               err = create_user_qp(dev, pd, qp, params);
-       else
-               err = create_kernel_qp(dev, pd, qp, params);
-
 out:
        if (err) {
                mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type);
@@ -2934,9 +2929,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr,
        if (err)
                return ERR_PTR(err);
 
-       if (attr->qp_type == IB_QPT_GSI)
-               return mlx5_ib_gsi_create_qp(pd, attr);
-
        params.udata = udata;
        params.uidx = MLX5_IB_DEFAULT_UIDX;
        params.attr = attr;
@@ -3005,9 +2997,14 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr,
        return &qp->ibqp;
 
 destroy_qp:
-       if (qp->type == MLX5_IB_QPT_DCT) {
+       switch (qp->type) {
+       case MLX5_IB_QPT_DCT:
                mlx5_ib_destroy_dct(qp);
-       } else {
+               break;
+       case IB_QPT_GSI:
+               mlx5_ib_destroy_gsi(qp);
+               break;
+       default:
                /*
                 * These lines below are temp solution till QP allocation
                 * will be moved to be under IB/core responsiblity.
@@ -3032,7 +3029,7 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
        struct mlx5_ib_qp *mqp = to_mqp(qp);
 
        if (unlikely(qp->qp_type == IB_QPT_GSI))
-               return mlx5_ib_gsi_destroy_qp(qp);
+               return mlx5_ib_destroy_gsi(mqp);
 
        if (mqp->type == MLX5_IB_QPT_DCT)
                return mlx5_ib_destroy_dct(mqp);
@@ -3088,20 +3085,44 @@ enum {
        MLX5_PATH_FLAG_COUNTER  = 1 << 2,
 };
 
+static int ib_to_mlx5_rate_map(u8 rate)
+{
+       switch (rate) {
+       case IB_RATE_PORT_CURRENT:
+               return 0;
+       case IB_RATE_56_GBPS:
+               return 1;
+       case IB_RATE_25_GBPS:
+               return 2;
+       case IB_RATE_100_GBPS:
+               return 3;
+       case IB_RATE_200_GBPS:
+               return 4;
+       case IB_RATE_50_GBPS:
+               return 5;
+       default:
+               return rate + MLX5_STAT_RATE_OFFSET;
+       };
+
+       return 0;
+}
+
 static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
 {
+       u32 stat_rate_support;
+
        if (rate == IB_RATE_PORT_CURRENT)
                return 0;
 
        if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS)
                return -EINVAL;
 
+       stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support);
        while (rate != IB_RATE_PORT_CURRENT &&
-              !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
-                MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
+              !(1 << ib_to_mlx5_rate_map(rate) & stat_rate_support))
                --rate;
 
-       return rate ? rate + MLX5_STAT_RATE_OFFSET : rate;
+       return ib_to_mlx5_rate_map(rate);
 }
 
 static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
@@ -3643,14 +3664,12 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev,
                MLX5_MAX_PORTS + 1;
 }
 
-static bool qp_supports_affinity(struct ib_qp *qp)
+static bool qp_supports_affinity(struct mlx5_ib_qp *qp)
 {
-       if ((qp->qp_type == IB_QPT_RC) ||
-           (qp->qp_type == IB_QPT_UD) ||
-           (qp->qp_type == IB_QPT_UC) ||
-           (qp->qp_type == IB_QPT_RAW_PACKET) ||
-           (qp->qp_type == IB_QPT_XRC_INI) ||
-           (qp->qp_type == IB_QPT_XRC_TGT))
+       if ((qp->type == IB_QPT_RC) || (qp->type == IB_QPT_UD) ||
+           (qp->type == IB_QPT_UC) || (qp->type == IB_QPT_RAW_PACKET) ||
+           (qp->type == IB_QPT_XRC_INI) || (qp->type == IB_QPT_XRC_TGT) ||
+           (qp->type == MLX5_IB_QPT_DCI))
                return true;
        return false;
 }
@@ -3668,7 +3687,7 @@ static unsigned int get_tx_affinity(struct ib_qp *qp,
        unsigned int tx_affinity;
 
        if (!(mlx5_ib_lag_should_assign_affinity(dev) &&
-             qp_supports_affinity(qp)))
+             qp_supports_affinity(mqp)))
                return 0;
 
        if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
@@ -4161,7 +4180,11 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                        MLX5_SET(dctc, dctc, rae, 1);
                }
                MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
-               MLX5_SET(dctc, dctc, port, attr->port_num);
+               if (mlx5_lag_is_active(dev->mdev))
+                       MLX5_SET(dctc, dctc, port,
+                                get_tx_affinity_rr(dev, udata));
+               else
+                       MLX5_SET(dctc, dctc, port, attr->port_num);
 
                set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1);
                MLX5_SET(dctc, dctc, counter_set_id, set_id);
@@ -4716,12 +4739,12 @@ int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
        return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
 }
 
-void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
 {
        struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
        u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
 
-       mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
+       return mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
 }
 
 static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
@@ -4921,8 +4944,8 @@ static int prepare_user_rq(struct ib_pd *pd,
        int err;
        size_t required_cmd_sz;
 
-       required_cmd_sz = offsetof(typeof(ucmd), single_stride_log_num_of_bytes)
-               + sizeof(ucmd.single_stride_log_num_of_bytes);
+       required_cmd_sz = offsetofend(struct mlx5_ib_create_wq,
+                                     single_stride_log_num_of_bytes);
        if (udata->inlen < required_cmd_sz) {
                mlx5_ib_dbg(dev, "invalid inlen\n");
                return -EINVAL;
@@ -5006,7 +5029,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
        if (!udata)
                return ERR_PTR(-ENOSYS);
 
-       min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+       min_resp_len = offsetofend(struct mlx5_ib_create_wq_resp, reserved);
        if (udata->outlen && udata->outlen < min_resp_len)
                return ERR_PTR(-EINVAL);
 
@@ -5036,8 +5059,8 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
        rwq->ibwq.wq_num = rwq->core_qp.qpn;
        rwq->ibwq.state = IB_WQS_RESET;
        if (udata->outlen) {
-               resp.response_length = offsetof(typeof(resp), response_length) +
-                               sizeof(resp.response_length);
+               resp.response_length = offsetofend(
+                       struct mlx5_ib_create_wq_resp, response_length);
                err = ib_copy_to_udata(udata, &resp, resp.response_length);
                if (err)
                        goto err_copy;
@@ -5056,22 +5079,27 @@ err:
        return ERR_PTR(err);
 }
 
-void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
+int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
 {
        struct mlx5_ib_dev *dev = to_mdev(wq->device);
        struct mlx5_ib_rwq *rwq = to_mrwq(wq);
+       int ret;
 
-       mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
+       ret = mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
+       if (ret)
+               return ret;
        destroy_user_rq(dev, wq->pd, rwq, udata);
        kfree(rwq);
+       return 0;
 }
 
-struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
-                                                     struct ib_rwq_ind_table_init_attr *init_attr,
-                                                     struct ib_udata *udata)
+int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+                                struct ib_rwq_ind_table_init_attr *init_attr,
+                                struct ib_udata *udata)
 {
-       struct mlx5_ib_dev *dev = to_mdev(device);
-       struct mlx5_ib_rwq_ind_table *rwq_ind_tbl;
+       struct mlx5_ib_rwq_ind_table *rwq_ind_tbl =
+               to_mrwq_ind_table(ib_rwq_ind_table);
+       struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_table->device);
        int sz = 1 << init_attr->log_ind_tbl_size;
        struct mlx5_ib_create_rwq_ind_tbl_resp resp = {};
        size_t min_resp_len;
@@ -5084,30 +5112,25 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
        if (udata->inlen > 0 &&
            !ib_is_udata_cleared(udata, 0,
                                 udata->inlen))
-               return ERR_PTR(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
 
        if (init_attr->log_ind_tbl_size >
            MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
                mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
                            init_attr->log_ind_tbl_size,
                            MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
        }
 
-       min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+       min_resp_len =
+               offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, reserved);
        if (udata->outlen && udata->outlen < min_resp_len)
-               return ERR_PTR(-EINVAL);
-
-       rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL);
-       if (!rwq_ind_tbl)
-               return ERR_PTR(-ENOMEM);
+               return -EINVAL;
 
        inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
        in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in) {
-               err = -ENOMEM;
-               goto err;
-       }
+       if (!in)
+               return -ENOMEM;
 
        rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
 
@@ -5122,26 +5145,24 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
 
        err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
        kvfree(in);
-
        if (err)
-               goto err;
+               return err;
 
        rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn;
        if (udata->outlen) {
-               resp.response_length = offsetof(typeof(resp), response_length) +
-                                       sizeof(resp.response_length);
+               resp.response_length =
+                       offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp,
+                                   response_length);
                err = ib_copy_to_udata(udata, &resp, resp.response_length);
                if (err)
                        goto err_copy;
        }
 
-       return &rwq_ind_tbl->ib_rwq_ind_tbl;
+       return 0;
 
 err_copy:
        mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
-err:
-       kfree(rwq_ind_tbl);
-       return ERR_PTR(err);
+       return err;
 }
 
 int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
@@ -5149,10 +5170,7 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
        struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
        struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
 
-       mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
-
-       kfree(rwq_ind_tbl);
-       return 0;
+       return mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
 }
 
 int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
@@ -5169,7 +5187,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
        void *rqc;
        void *in;
 
-       required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
+       required_cmd_sz = offsetofend(struct mlx5_ib_modify_wq, reserved);
        if (udata->inlen < required_cmd_sz)
                return -EINVAL;
 
index ba899df..5d4e140 100644 (file)
@@ -26,8 +26,8 @@ int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
 
 int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev, u32 timeout_usec);
 
-void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
-                                 struct mlx5_core_qp *rq);
+int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+                                struct mlx5_core_qp *rq);
 int mlx5_core_create_sq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
                                struct mlx5_core_qp *sq);
 void mlx5_core_destroy_sq_tracked(struct mlx5_ib_dev *dev,
index 7c3968e..c683d70 100644 (file)
@@ -576,11 +576,12 @@ err_destroy_rq:
        return err;
 }
 
-void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
-                                 struct mlx5_core_qp *rq)
+int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+                                struct mlx5_core_qp *rq)
 {
        destroy_resource_common(dev, rq);
        destroy_rq_tracked(dev, rq->qpn, rq->uid);
+       return 0;
 }
 
 static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
index 7e10cbc..e2f720e 100644 (file)
@@ -389,24 +389,21 @@ out_box:
        return ret;
 }
 
-void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
 {
        struct mlx5_ib_dev *dev = to_mdev(srq->device);
        struct mlx5_ib_srq *msrq = to_msrq(srq);
+       int ret;
+
+       ret = mlx5_cmd_destroy_srq(dev, &msrq->msrq);
+       if (ret)
+               return ret;
 
-       mlx5_cmd_destroy_srq(dev, &msrq->msrq);
-
-       if (srq->uobject) {
-               mlx5_ib_db_unmap_user(
-                       rdma_udata_to_drv_context(
-                               udata,
-                               struct mlx5_ib_ucontext,
-                               ibucontext),
-                       &msrq->db);
-               ib_umem_release(msrq->umem);
-       } else {
+       if (udata)
+               destroy_srq_user(srq->pd, msrq, udata);
+       else
                destroy_srq_kernel(dev, msrq);
-       }
+       return 0;
 }
 
 void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
index af197c3..2c3627b 100644 (file)
@@ -56,7 +56,7 @@ struct mlx5_srq_table {
 
 int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
                        struct mlx5_srq_attr *in);
-void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
 int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
                       struct mlx5_srq_attr *out);
 int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
index 37aaace..db889ec 100644 (file)
@@ -590,22 +590,32 @@ err_destroy_srq_split:
        return err;
 }
 
-void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
 {
        struct mlx5_srq_table *table = &dev->srq_table;
        struct mlx5_core_srq *tmp;
        int err;
 
-       tmp = xa_erase_irq(&table->array, srq->srqn);
-       if (!tmp || tmp != srq)
-               return;
+       /* Delete entry, but leave index occupied */
+       tmp = xa_cmpxchg_irq(&table->array, srq->srqn, srq, XA_ZERO_ENTRY, 0);
+       if (WARN_ON(tmp != srq))
+               return xa_err(tmp) ?: -EINVAL;
 
        err = destroy_srq_split(dev, srq);
-       if (err)
-               return;
+       if (err) {
+               /*
+                * We don't need to check returned result for an error,
+                * because  we are storing in pre-allocated space xarray
+                * entry and it can't fail at this stage.
+                */
+               xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0);
+               return err;
+       }
+       xa_erase_irq(&table->array, srq->srqn);
 
        mlx5_core_res_put(&srq->common);
        wait_for_completion(&srq->common.free);
+       return 0;
 }
 
 int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
index 4388097..d6038fb 100644 (file)
@@ -398,7 +398,8 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
        seg->status = MLX5_MKEY_STATUS_FREE;
 }
 
-static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
+static void set_reg_mkey_segment(struct mlx5_ib_dev *dev,
+                                struct mlx5_mkey_seg *seg,
                                 const struct ib_send_wr *wr)
 {
        const struct mlx5_umr_wr *umrwr = umr_wr(wr);
@@ -414,10 +415,12 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
        MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
        MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
        MLX5_SET(mkc, seg, lr, 1);
-       MLX5_SET(mkc, seg, relaxed_ordering_write,
-                !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
-       MLX5_SET(mkc, seg, relaxed_ordering_read,
-                !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
+       if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+               MLX5_SET(mkc, seg, relaxed_ordering_write,
+                        !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
+       if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+               MLX5_SET(mkc, seg, relaxed_ordering_read,
+                        !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
 
        if (umrwr->pd)
                MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
@@ -863,13 +866,11 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
        bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
        u8 flags = 0;
 
-       if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) {
-               mlx5_ib_warn(to_mdev(qp->ibqp.device),
-                            "Fast update of %s for MR is disabled\n",
-                            (MLX5_CAP_GEN(dev->mdev,
-                                          umr_modify_entity_size_disabled)) ?
-                                    "entity size" :
-                                    "atomic access");
+       /* Matches access in mlx5_set_umr_free_mkey() */
+       if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) {
+               mlx5_ib_warn(
+                       to_mdev(qp->ibqp.device),
+                       "Fast update for MR access flags is not possible\n");
                return -EINVAL;
        }
 
@@ -1263,7 +1264,7 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-       set_reg_mkey_segment(*seg, wr);
+       set_reg_mkey_segment(dev, *seg, wr);
        *seg += sizeof(struct mlx5_mkey_seg);
        *size += sizeof(struct mlx5_mkey_seg) / 16;
        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
index 7550e9d..9dbbf4d 100644 (file)
@@ -548,7 +548,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
                    struct ib_qp_cap *cap,
                    int qpn,
                    int port,
-                   struct mthca_sqp *sqp,
+                   struct mthca_qp *qp,
                    struct ib_udata *udata);
 void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
 int mthca_create_ah(struct mthca_dev *dev,
index 9fa2f91..31b558f 100644 (file)
@@ -373,9 +373,10 @@ static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
        return 0;
 }
 
-static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
 {
        mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
+       return 0;
 }
 
 static int mthca_ah_create(struct ib_ah *ibah,
@@ -389,9 +390,10 @@ static int mthca_ah_create(struct ib_ah *ibah,
                               init_attr->ah_attr, ah);
 }
 
-static void mthca_ah_destroy(struct ib_ah *ah, u32 flags)
+static int mthca_ah_destroy(struct ib_ah *ah, u32 flags)
 {
        mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
+       return 0;
 }
 
 static int mthca_create_srq(struct ib_srq *ibsrq,
@@ -440,7 +442,7 @@ static int mthca_create_srq(struct ib_srq *ibsrq,
        return 0;
 }
 
-static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
 {
        if (udata) {
                struct mthca_ucontext *context =
@@ -454,6 +456,7 @@ static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
        }
 
        mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
+       return 0;
 }
 
 static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
@@ -532,13 +535,14 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
        case IB_QPT_SMI:
        case IB_QPT_GSI:
        {
-               /* Don't allow userspace to create special QPs */
-               if (udata)
-                       return ERR_PTR(-EINVAL);
-
-               qp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL);
+               qp = kzalloc(sizeof(*qp), GFP_KERNEL);
                if (!qp)
                        return ERR_PTR(-ENOMEM);
+               qp->sqp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL);
+               if (!qp->sqp) {
+                       kfree(qp);
+                       return ERR_PTR(-ENOMEM);
+               }
 
                qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
 
@@ -547,7 +551,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
                                      to_mcq(init_attr->recv_cq),
                                      init_attr->sq_sig_type, &init_attr->cap,
                                      qp->ibqp.qp_num, init_attr->port_num,
-                                     to_msqp(qp), udata);
+                                     qp, udata);
                break;
        }
        default:
@@ -556,6 +560,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
        }
 
        if (err) {
+               kfree(qp->sqp);
                kfree(qp);
                return ERR_PTR(err);
        }
@@ -588,7 +593,8 @@ static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
                                    to_mqp(qp)->rq.db_index);
        }
        mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
-       kfree(qp);
+       kfree(to_mqp(qp)->sqp);
+       kfree(to_mqp(qp));
        return 0;
 }
 
@@ -789,7 +795,7 @@ out:
        return ret;
 }
 
-static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
 {
        if (udata) {
                struct mthca_ucontext *context =
@@ -808,6 +814,7 @@ static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
                                    to_mcq(cq)->set_ci_db_index);
        }
        mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
+       return 0;
 }
 
 static inline u32 convert_access(int acc)
@@ -846,7 +853,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                       u64 virt, int acc, struct ib_udata *udata)
 {
        struct mthca_dev *dev = to_mdev(pd->device);
-       struct sg_dma_page_iter sg_iter;
+       struct ib_block_iter biter;
        struct mthca_ucontext *context = rdma_udata_to_drv_context(
                udata, struct mthca_ucontext, ibucontext);
        struct mthca_mr *mr;
@@ -877,7 +884,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                goto err;
        }
 
-       n = ib_umem_num_pages(mr->umem);
+       n = ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE);
 
        mr->mtt = mthca_alloc_mtt(dev, n);
        if (IS_ERR(mr->mtt)) {
@@ -895,8 +902,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
        write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
 
-       for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) {
-               pages[i++] = sg_page_iter_dma_address(&sg_iter);
+       rdma_umem_for_each_dma_block(mr->umem, &biter, PAGE_SIZE) {
+               pages[i++] = rdma_block_iter_dma_address(&biter);
 
                /*
                 * Be friendly to write_mtt and pass it chunks
index 84c64bf..8a77483 100644 (file)
@@ -240,6 +240,16 @@ struct mthca_wq {
        __be32    *db;
 };
 
+struct mthca_sqp {
+       int             pkey_index;
+       u32             qkey;
+       u32             send_psn;
+       struct ib_ud_header ud_header;
+       int             header_buf_size;
+       void           *header_buf;
+       dma_addr_t      header_dma;
+};
+
 struct mthca_qp {
        struct ib_qp           ibqp;
        int                    refcount;
@@ -265,17 +275,7 @@ struct mthca_qp {
 
        wait_queue_head_t      wait;
        struct mutex           mutex;
-};
-
-struct mthca_sqp {
-       struct mthca_qp qp;
-       int             pkey_index;
-       u32             qkey;
-       u32             send_psn;
-       struct ib_ud_header ud_header;
-       int             header_buf_size;
-       void           *header_buf;
-       dma_addr_t      header_dma;
+       struct mthca_sqp *sqp;
 };
 
 static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -313,9 +313,4 @@ static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp)
        return container_of(ibqp, struct mthca_qp, ibqp);
 }
 
-static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp)
-{
-       return container_of(qp, struct mthca_sqp, qp);
-}
-
 #endif /* MTHCA_PROVIDER_H */
index c6e95d0..08a2a7a 100644 (file)
@@ -809,7 +809,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp,
                qp->alt_port = attr->alt_port_num;
 
        if (is_sqp(dev, qp))
-               store_attrs(to_msqp(qp), attr, attr_mask);
+               store_attrs(qp->sqp, attr, attr_mask);
 
        /*
         * If we moved QP0 to RTR, bring the IB link up; if we moved
@@ -1368,39 +1368,40 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
                    struct ib_qp_cap *cap,
                    int qpn,
                    int port,
-                   struct mthca_sqp *sqp,
+                   struct mthca_qp *qp,
                    struct ib_udata *udata)
 {
        u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
        int err;
 
-       sqp->qp.transport = MLX;
-       err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
+       qp->transport = MLX;
+       err = mthca_set_qp_size(dev, cap, pd, qp);
        if (err)
                return err;
 
-       sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
-       sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
-                                            &sqp->header_dma, GFP_KERNEL);
-       if (!sqp->header_buf)
+       qp->sqp->header_buf_size = qp->sq.max * MTHCA_UD_HEADER_SIZE;
+       qp->sqp->header_buf =
+               dma_alloc_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
+                                  &qp->sqp->header_dma, GFP_KERNEL);
+       if (!qp->sqp->header_buf)
                return -ENOMEM;
 
        spin_lock_irq(&dev->qp_table.lock);
        if (mthca_array_get(&dev->qp_table.qp, mqpn))
                err = -EBUSY;
        else
-               mthca_array_set(&dev->qp_table.qp, mqpn, sqp);
+               mthca_array_set(&dev->qp_table.qp, mqpn, qp->sqp);
        spin_unlock_irq(&dev->qp_table.lock);
 
        if (err)
                goto err_out;
 
-       sqp->qp.port      = port;
-       sqp->qp.qpn       = mqpn;
-       sqp->qp.transport = MLX;
+       qp->port      = port;
+       qp->qpn       = mqpn;
+       qp->transport = MLX;
 
        err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
-                                   send_policy, &sqp->qp, udata);
+                                   send_policy, qp, udata);
        if (err)
                goto err_out_free;
 
@@ -1421,10 +1422,9 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
 
        mthca_unlock_cqs(send_cq, recv_cq);
 
- err_out:
-       dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
-                         sqp->header_buf, sqp->header_dma);
-
+err_out:
+       dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
+                         qp->sqp->header_buf, qp->sqp->header_dma);
        return err;
 }
 
@@ -1487,20 +1487,19 @@ void mthca_free_qp(struct mthca_dev *dev,
 
        if (is_sqp(dev, qp)) {
                atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
-               dma_free_coherent(&dev->pdev->dev,
-                                 to_msqp(qp)->header_buf_size,
-                                 to_msqp(qp)->header_buf,
-                                 to_msqp(qp)->header_dma);
+               dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
+                                 qp->sqp->header_buf, qp->sqp->header_dma);
        } else
                mthca_free(&dev->qp_table.alloc, qp->qpn);
 }
 
 /* Create UD header for an MLX send and build a data segment for it */
-static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
-                           int ind, const struct ib_ud_wr *wr,
+static int build_mlx_header(struct mthca_dev *dev, struct mthca_qp *qp, int ind,
+                           const struct ib_ud_wr *wr,
                            struct mthca_mlx_seg *mlx,
                            struct mthca_data_seg *data)
 {
+       struct mthca_sqp *sqp = qp->sqp;
        int header_size;
        int err;
        u16 pkey;
@@ -1513,7 +1512,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
        if (err)
                return err;
        mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
-       mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
+       mlx->flags |= cpu_to_be32((!qp->ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
                                  (sqp->ud_header.lrh.destination_lid ==
                                   IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) |
                                  (sqp->ud_header.lrh.service_level << 8));
@@ -1534,29 +1533,29 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
                return -EINVAL;
        }
 
-       sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
+       sqp->ud_header.lrh.virtual_lane    = !qp->ibqp.qp_num ? 15 : 0;
        if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
                sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
        sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
-       if (!sqp->qp.ibqp.qp_num)
-               ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
-                                  sqp->pkey_index, &pkey);
+       if (!qp->ibqp.qp_num)
+               ib_get_cached_pkey(&dev->ib_dev, qp->port, sqp->pkey_index,
+                                  &pkey);
        else
-               ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
-                                  wr->pkey_index, &pkey);
+               ib_get_cached_pkey(&dev->ib_dev, qp->port, wr->pkey_index,
+                                  &pkey);
        sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
        sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
        sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
        sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
                                               sqp->qkey : wr->remote_qkey);
-       sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+       sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num);
 
        header_size = ib_ud_header_pack(&sqp->ud_header,
                                        sqp->header_buf +
                                        ind * MTHCA_UD_HEADER_SIZE);
 
        data->byte_count = cpu_to_be32(header_size);
-       data->lkey       = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey);
+       data->lkey       = cpu_to_be32(to_mpd(qp->ibqp.pd)->ntmr.ibmr.lkey);
        data->addr       = cpu_to_be64(sqp->header_dma +
                                       ind * MTHCA_UD_HEADER_SIZE);
 
@@ -1735,9 +1734,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                        break;
 
                case MLX:
-                       err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
-                                              wqe - sizeof (struct mthca_next_seg),
-                                              wqe);
+                       err = build_mlx_header(
+                               dev, qp, ind, ud_wr(wr),
+                               wqe - sizeof(struct mthca_next_seg), wqe);
                        if (err) {
                                *bad_wr = wr;
                                goto out;
@@ -2065,9 +2064,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                        break;
 
                case MLX:
-                       err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
-                                              wqe - sizeof (struct mthca_next_seg),
-                                              wqe);
+                       err = build_mlx_header(
+                               dev, qp, ind, ud_wr(wr),
+                               wqe - sizeof(struct mthca_next_seg), wqe);
                        if (err) {
                                *bad_wr = wr;
                                goto out;
index fcfe0e8..5eb61c1 100644 (file)
@@ -185,7 +185,6 @@ struct ocrdma_hw_mr {
        u32 num_pbes;
        u32 pbl_size;
        u32 pbe_size;
-       u64 fbo;
        u64 va;
 };
 
index 6eea02b..699a8b7 100644 (file)
@@ -215,12 +215,13 @@ av_err:
        return status;
 }
 
-void ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags)
+int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags)
 {
        struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
        struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device);
 
        ocrdma_free_av(dev, ah);
+       return 0;
 }
 
 int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
index 8b73b34..35cf2e2 100644 (file)
@@ -53,7 +53,7 @@ enum {
 
 int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
                     struct ib_udata *udata);
-void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags);
+int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags);
 int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
 
 int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags,
index e07bf0b..c51c3f4 100644 (file)
@@ -1962,6 +1962,7 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
        int i;
        struct ocrdma_reg_nsmr *cmd;
        struct ocrdma_reg_nsmr_rsp *rsp;
+       u64 fbo = hwmr->va & (hwmr->pbe_size - 1);
 
        cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR, sizeof(*cmd));
        if (!cmd)
@@ -1987,8 +1988,8 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
                                        OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT;
        cmd->totlen_low = hwmr->len;
        cmd->totlen_high = upper_32_bits(hwmr->len);
-       cmd->fbo_low = (u32) (hwmr->fbo & 0xffffffff);
-       cmd->fbo_high = (u32) upper_32_bits(hwmr->fbo);
+       cmd->fbo_low = lower_32_bits(fbo);
+       cmd->fbo_high = upper_32_bits(fbo);
        cmd->va_loaddr = (u32) hwmr->va;
        cmd->va_hiaddr = (u32) upper_32_bits(hwmr->va);
 
index c1751c9..7350fe1 100644 (file)
@@ -112,7 +112,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
 }
 
 static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
-                                           u8 *ib_speed, u8 *ib_width)
+                                           u16 *ib_speed, u8 *ib_width)
 {
        int status;
        u8 speed;
@@ -664,7 +664,7 @@ exit:
        return status;
 }
 
-void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 {
        struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
        struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
@@ -682,10 +682,11 @@ void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 
                if (is_ucontext_pd(uctx, pd)) {
                        ocrdma_release_ucontext_pd(uctx);
-                       return;
+                       return 0;
                }
        }
        _ocrdma_dealloc_pd(dev, pd);
+       return 0;
 }
 
 static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
@@ -810,14 +811,12 @@ static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
        return status;
 }
 
-static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
-                           u32 num_pbes)
+static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr)
 {
        struct ocrdma_pbe *pbe;
-       struct sg_dma_page_iter sg_iter;
+       struct ib_block_iter biter;
        struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
-       struct ib_umem *umem = mr->umem;
-       int pbe_cnt, total_num_pbes = 0;
+       int pbe_cnt;
        u64 pg_addr;
 
        if (!mr->hwmr.num_pbes)
@@ -826,19 +825,14 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
        pbe = (struct ocrdma_pbe *)pbl_tbl->va;
        pbe_cnt = 0;
 
-       for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
+       rdma_umem_for_each_dma_block (mr->umem, &biter, PAGE_SIZE) {
                /* store the page address in pbe */
-               pg_addr = sg_page_iter_dma_address(&sg_iter);
+               pg_addr = rdma_block_iter_dma_address(&biter);
                pbe->pa_lo = cpu_to_le32(pg_addr);
                pbe->pa_hi = cpu_to_le32(upper_32_bits(pg_addr));
                pbe_cnt += 1;
-               total_num_pbes += 1;
                pbe++;
 
-               /* if done building pbes, issue the mbx cmd. */
-               if (total_num_pbes == num_pbes)
-                       return;
-
                /* if the given pbl is full storing the pbes,
                 * move to next pbl.
                 */
@@ -857,7 +851,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
        struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
        struct ocrdma_mr *mr;
        struct ocrdma_pd *pd;
-       u32 num_pbes;
 
        pd = get_ocrdma_pd(ibpd);
 
@@ -872,13 +865,12 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
                status = -EFAULT;
                goto umem_err;
        }
-       num_pbes = ib_umem_page_count(mr->umem);
-       status = ocrdma_get_pbl_info(dev, mr, num_pbes);
+       status = ocrdma_get_pbl_info(
+               dev, mr, ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE));
        if (status)
                goto umem_err;
 
        mr->hwmr.pbe_size = PAGE_SIZE;
-       mr->hwmr.fbo = ib_umem_offset(mr->umem);
        mr->hwmr.va = usr_addr;
        mr->hwmr.len = len;
        mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
@@ -889,7 +881,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
        status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
        if (status)
                goto umem_err;
-       build_user_pbes(dev, mr, num_pbes);
+       build_user_pbes(dev, mr);
        status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
        if (status)
                goto mbx_err;
@@ -1056,7 +1048,7 @@ static void ocrdma_flush_cq(struct ocrdma_cq *cq)
        spin_unlock_irqrestore(&cq->cq_lock, flags);
 }
 
-void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 {
        struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
        struct ocrdma_eq *eq = NULL;
@@ -1081,6 +1073,7 @@ void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
                                ocrdma_get_db_addr(dev, pdid),
                                dev->nic_info.db_page_size);
        }
+       return 0;
 }
 
 static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
@@ -1857,7 +1850,7 @@ int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
        return status;
 }
 
-void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 {
        struct ocrdma_srq *srq;
        struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
@@ -1872,6 +1865,7 @@ void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 
        kfree(srq->idx_bit_fields);
        kfree(srq->rqe_wr_id_tbl);
+       return 0;
 }
 
 /* unprivileged verbs and their support functions. */
index df8e3b9..425d554 100644 (file)
@@ -67,12 +67,12 @@ void ocrdma_dealloc_ucontext(struct ib_ucontext *uctx);
 int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
 
 int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
 
 int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                     struct ib_udata *udata);
 int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
-void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
 
 struct ib_qp *ocrdma_create_qp(struct ib_pd *,
                               struct ib_qp_init_attr *attrs,
@@ -92,7 +92,7 @@ int ocrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attr,
 int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *,
                      enum ib_srq_attr_mask, struct ib_udata *);
 int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *);
-void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
 int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *,
                         const struct ib_recv_wr **bad_recv_wr);
 
index d85f992..7c0aac3 100644 (file)
@@ -177,6 +177,8 @@ static int qedr_iw_register_device(struct qedr_dev *dev)
 }
 
 static const struct ib_device_ops qedr_roce_dev_ops = {
+       .alloc_xrcd = qedr_alloc_xrcd,
+       .dealloc_xrcd = qedr_dealloc_xrcd,
        .get_port_immutable = qedr_roce_port_immutable,
        .query_pkey = qedr_query_pkey,
 };
@@ -186,6 +188,10 @@ static void qedr_roce_register_device(struct qedr_dev *dev)
        dev->ibdev.node_type = RDMA_NODE_IB_CA;
 
        ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops);
+
+       dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(OPEN_XRCD) |
+               QEDR_UVERBS(CLOSE_XRCD) |
+               QEDR_UVERBS(CREATE_XSRQ);
 }
 
 static const struct ib_device_ops qedr_dev_ops = {
@@ -232,6 +238,7 @@ static const struct ib_device_ops qedr_dev_ops = {
        INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq),
        INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd),
        INIT_RDMA_OBJ_SIZE(ib_srq, qedr_srq, ibsrq),
+       INIT_RDMA_OBJ_SIZE(ib_xrcd, qedr_xrcd, ibxrcd),
        INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext),
 };
 
@@ -602,7 +609,7 @@ static int qedr_set_device_attr(struct qedr_dev *dev)
        qed_attr = dev->ops->rdma_query_device(dev->rdma_ctx);
 
        /* Part 2 - check capabilities */
-       page_size = ~dev->attr.page_size_caps + 1;
+       page_size = ~qed_attr->page_size_caps + 1;
        if (page_size > PAGE_SIZE) {
                DP_ERR(dev,
                       "Kernel PAGE_SIZE is %ld which is smaller than minimum page size (%d) required by qedr\n",
@@ -705,6 +712,18 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
                        event.event = IB_EVENT_SRQ_ERR;
                        event_type = EVENT_TYPE_SRQ;
                        break;
+               case ROCE_ASYNC_EVENT_XRC_DOMAIN_ERR:
+                       event.event = IB_EVENT_QP_ACCESS_ERR;
+                       event_type = EVENT_TYPE_QP;
+                       break;
+               case ROCE_ASYNC_EVENT_INVALID_XRCETH_ERR:
+                       event.event = IB_EVENT_QP_ACCESS_ERR;
+                       event_type = EVENT_TYPE_QP;
+                       break;
+               case ROCE_ASYNC_EVENT_XRC_SRQ_CATASTROPHIC_ERR:
+                       event.event = IB_EVENT_CQ_ERR;
+                       event_type = EVENT_TYPE_CQ;
+                       break;
                default:
                        DP_ERR(dev, "unsupported event %d on handle=%llx\n",
                               e_code, roce_handle64);
@@ -1026,6 +1045,13 @@ static void qedr_notify(struct qedr_dev *dev, enum qede_rdma_event event)
        case QEDE_CHANGE_ADDR:
                qedr_mac_address_change(dev);
                break;
+       case QEDE_CHANGE_MTU:
+               if (rdma_protocol_iwarp(&dev->ibdev, 1))
+                       if (dev->ndev->mtu != dev->iwarp_max_mtu)
+                               DP_NOTICE(dev,
+                                         "Mtu was changed from %d to %d. This will not take affect for iWARP until qedr is reloaded\n",
+                                         dev->iwarp_max_mtu, dev->ndev->mtu);
+               break;
        default:
                pr_err("Event not supported\n");
        }
index 4602921..9dde703 100644 (file)
@@ -310,6 +310,11 @@ struct qedr_pd {
        struct qedr_ucontext *uctx;
 };
 
+struct qedr_xrcd {
+       struct ib_xrcd ibxrcd;
+       u16 xrcd_id;
+};
+
 struct qedr_qp_hwq_info {
        /* WQE Elements */
        struct qed_chain pbl;
@@ -361,6 +366,7 @@ struct qedr_srq {
        struct ib_umem *prod_umem;
        u16 srq_id;
        u32 srq_limit;
+       bool is_xrc;
        /* lock to protect srq recv post */
        spinlock_t lock;
 };
@@ -573,6 +579,11 @@ static inline struct qedr_pd *get_qedr_pd(struct ib_pd *ibpd)
        return container_of(ibpd, struct qedr_pd, ibpd);
 }
 
+static inline struct qedr_xrcd *get_qedr_xrcd(struct ib_xrcd *ibxrcd)
+{
+       return container_of(ibxrcd, struct qedr_xrcd, ibxrcd);
+}
+
 static inline struct qedr_cq *get_qedr_cq(struct ib_cq *ibcq)
 {
        return container_of(ibcq, struct qedr_cq, ibcq);
@@ -598,6 +609,28 @@ static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq)
        return container_of(ibsrq, struct qedr_srq, ibsrq);
 }
 
+static inline bool qedr_qp_has_srq(struct qedr_qp *qp)
+{
+       return qp->srq;
+}
+
+static inline bool qedr_qp_has_sq(struct qedr_qp *qp)
+{
+       if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_TGT)
+               return 0;
+
+       return 1;
+}
+
+static inline bool qedr_qp_has_rq(struct qedr_qp *qp)
+{
+       if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_INI ||
+           qp->qp_type == IB_QPT_XRC_TGT || qedr_qp_has_srq(qp))
+               return 0;
+
+       return 1;
+}
+
 static inline struct qedr_user_mmap_entry *
 get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry)
 {
index 97fc7dd..c7169d2 100644 (file)
@@ -736,7 +736,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        struct qedr_dev *dev = ep->dev;
        struct qedr_qp *qp;
        struct qed_iwarp_accept_in params;
-       int rc = 0;
+       int rc;
 
        DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn);
 
@@ -759,8 +759,10 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        params.ord = conn_param->ord;
 
        if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
-                            &qp->iwarp_cm_flags))
+                            &qp->iwarp_cm_flags)) {
+               rc = -EINVAL;
                goto err; /* QP already destroyed */
+       }
 
        rc = dev->ops->iwarp_accept(dev->rdma_ctx, &params);
        if (rc) {
index b49bef9..019642f 100644 (file)
@@ -136,6 +136,8 @@ int qedr_query_device(struct ib_device *ibdev,
            IB_DEVICE_RC_RNR_NAK_GEN |
            IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
 
+       if (!rdma_protocol_iwarp(&dev->ibdev, 1))
+               attr->device_cap_flags |= IB_DEVICE_XRC;
        attr->max_send_sge = qattr->max_sge;
        attr->max_recv_sge = qattr->max_sge;
        attr->max_sge_rd = qattr->max_sge;
@@ -157,13 +159,13 @@ int qedr_query_device(struct ib_device *ibdev,
 
        attr->local_ca_ack_delay = qattr->dev_ack_delay;
        attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
-       attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
+       attr->max_pkeys = qattr->max_pkey;
        attr->max_ah = qattr->max_ah;
 
        return 0;
 }
 
-static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
+static inline void get_link_speed_and_width(int speed, u16 *ib_speed,
                                            u8 *ib_width)
 {
        switch (speed) {
@@ -231,15 +233,16 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
                attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
        }
        attr->max_mtu = IB_MTU_4096;
-       attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
        attr->lid = 0;
        attr->lmc = 0;
        attr->sm_lid = 0;
        attr->sm_sl = 0;
        attr->ip_gids = true;
        if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+               attr->active_mtu = iboe_get_mtu(dev->iwarp_max_mtu);
                attr->gid_tbl_len = 1;
        } else {
+               attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
                attr->gid_tbl_len = QEDR_MAX_SGID;
                attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
        }
@@ -471,15 +474,33 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
        return 0;
 }
 
-void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 {
        struct qedr_dev *dev = get_qedr_dev(ibpd->device);
        struct qedr_pd *pd = get_qedr_pd(ibpd);
 
        DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
        dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
+       return 0;
+}
+
+
+int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
+       struct qedr_xrcd *xrcd = get_qedr_xrcd(ibxrcd);
+
+       return dev->ops->rdma_alloc_xrcd(dev->rdma_ctx, &xrcd->xrcd_id);
 }
 
+int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
+       u16 xrcd_id = get_qedr_xrcd(ibxrcd)->xrcd_id;
+
+       dev->ops->rdma_dealloc_xrcd(dev->rdma_ctx, xrcd_id);
+       return 0;
+}
 static void qedr_free_pbl(struct qedr_dev *dev,
                          struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
 {
@@ -600,11 +621,9 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
                               struct qedr_pbl_info *pbl_info, u32 pg_shift)
 {
        int pbe_cnt, total_num_pbes = 0;
-       u32 fw_pg_cnt, fw_pg_per_umem_pg;
        struct qedr_pbl *pbl_tbl;
-       struct sg_dma_page_iter sg_iter;
+       struct ib_block_iter biter;
        struct regpair *pbe;
-       u64 pg_addr;
 
        if (!pbl_info->num_pbes)
                return;
@@ -625,32 +644,25 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
 
        pbe_cnt = 0;
 
-       fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
+       rdma_umem_for_each_dma_block (umem, &biter, BIT(pg_shift)) {
+               u64 pg_addr = rdma_block_iter_dma_address(&biter);
 
-       for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
-               pg_addr = sg_page_iter_dma_address(&sg_iter);
-               for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
-                       pbe->lo = cpu_to_le32(pg_addr);
-                       pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
+               pbe->lo = cpu_to_le32(pg_addr);
+               pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
 
-                       pg_addr += BIT(pg_shift);
-                       pbe_cnt++;
-                       total_num_pbes++;
-                       pbe++;
+               pbe_cnt++;
+               total_num_pbes++;
+               pbe++;
 
-                       if (total_num_pbes == pbl_info->num_pbes)
-                               return;
+               if (total_num_pbes == pbl_info->num_pbes)
+                       return;
 
-                       /* If the given pbl is full storing the pbes,
-                        * move to next pbl.
-                        */
-                       if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
-                               pbl_tbl++;
-                               pbe = (struct regpair *)pbl_tbl->va;
-                               pbe_cnt = 0;
-                       }
-
-                       fw_pg_cnt++;
+               /* If the given pbl is full storing the pbes, move to next pbl.
+                */
+               if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
+                       pbl_tbl++;
+                       pbe = (struct regpair *)pbl_tbl->va;
+                       pbe_cnt = 0;
                }
        }
 }
@@ -792,9 +804,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata,
                return PTR_ERR(q->umem);
        }
 
-       fw_pages = ib_umem_page_count(q->umem) <<
-           (PAGE_SHIFT - FW_PAGE_SHIFT);
-
+       fw_pages = ib_umem_num_dma_blocks(q->umem, 1 << FW_PAGE_SHIFT);
        rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
        if (rc)
                goto err0;
@@ -999,7 +1009,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                /* Generate doorbell address. */
                cq->db.data.icid = cq->icid;
                cq->db_addr = dev->db_addr + db_offset;
-               cq->db.data.params = DB_AGG_CMD_SET <<
+               cq->db.data.params = DB_AGG_CMD_MAX <<
                    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
 
                /* point to the very last element, passing it we will toggle */
@@ -1051,7 +1061,7 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
 #define QEDR_DESTROY_CQ_MAX_ITERATIONS         (10)
 #define QEDR_DESTROY_CQ_ITER_DURATION          (10)
 
-void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 {
        struct qedr_dev *dev = get_qedr_dev(ibcq->device);
        struct qed_rdma_destroy_cq_out_params oparams;
@@ -1066,7 +1076,7 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
        /* GSIs CQs are handled by driver, so they don't exist in the FW */
        if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
                qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
-               return;
+               return 0;
        }
 
        iparams.icid = cq->icid;
@@ -1114,6 +1124,7 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
         * Since the destroy CQ ramrod has also been received on the EQ we can
         * be certain that there's no event handler in process.
         */
+       return 0;
 }
 
 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
@@ -1146,7 +1157,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
                SET_FIELD(qp_params->modify_flags,
                          QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
                break;
-       case RDMA_NETWORK_IB:
+       case RDMA_NETWORK_ROCE_V1:
                memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
                       sizeof(qp_params->sgid));
                memcpy(&qp_params->dgid.bytes[0],
@@ -1166,6 +1177,8 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
                          QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
                qp_params->roce_mode = ROCE_V2_IPV4;
                break;
+       default:
+               return -EINVAL;
        }
 
        for (i = 0; i < 4; i++) {
@@ -1186,7 +1199,10 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
        struct qedr_device_attr *qattr = &dev->attr;
 
        /* QP0... attrs->qp_type == IB_QPT_GSI */
-       if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
+       if (attrs->qp_type != IB_QPT_RC &&
+           attrs->qp_type != IB_QPT_GSI &&
+           attrs->qp_type != IB_QPT_XRC_INI &&
+           attrs->qp_type != IB_QPT_XRC_TGT) {
                DP_DEBUG(dev, QEDR_MSG_QP,
                         "create qp: unsupported qp type=0x%x requested\n",
                         attrs->qp_type);
@@ -1221,12 +1237,20 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
                return -EINVAL;
        }
 
-       /* Unprivileged user space cannot create special QP */
-       if (udata && attrs->qp_type == IB_QPT_GSI) {
-               DP_ERR(dev,
-                      "create qp: userspace can't create special QPs of type=0x%x\n",
-                      attrs->qp_type);
-               return -EINVAL;
+       /* verify consumer QPs are not trying to use GSI QP's CQ.
+        * TGT QP isn't associated with RQ/SQ
+        */
+       if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) &&
+           (attrs->qp_type != IB_QPT_XRC_TGT)) {
+               struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq);
+               struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq);
+
+               if ((send_cq->cq_type == QEDR_CQ_TYPE_GSI) ||
+                   (recv_cq->cq_type == QEDR_CQ_TYPE_GSI)) {
+                       DP_ERR(dev,
+                              "create qp: consumer QP cannot use GSI CQs.\n");
+                       return -EINVAL;
+               }
        }
 
        return 0;
@@ -1248,8 +1272,8 @@ static int qedr_copy_srq_uresp(struct qedr_dev *dev,
 }
 
 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
-                             struct qedr_create_qp_uresp *uresp,
-                             struct qedr_qp *qp)
+                              struct qedr_create_qp_uresp *uresp,
+                              struct qedr_qp *qp)
 {
        /* iWARP requires two doorbells per RQ. */
        if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
@@ -1291,8 +1315,12 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev,
        int rc;
 
        memset(uresp, 0, sizeof(*uresp));
-       qedr_copy_sq_uresp(dev, uresp, qp);
-       qedr_copy_rq_uresp(dev, uresp, qp);
+
+       if (qedr_qp_has_sq(qp))
+               qedr_copy_sq_uresp(dev, uresp, qp);
+
+       if (qedr_qp_has_rq(qp))
+               qedr_copy_rq_uresp(dev, uresp, qp);
 
        uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
        uresp->qp_id = qp->qp_id;
@@ -1316,18 +1344,25 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
                kref_init(&qp->refcnt);
                init_completion(&qp->iwarp_cm_comp);
        }
+
        qp->pd = pd;
        qp->qp_type = attrs->qp_type;
        qp->max_inline_data = attrs->cap.max_inline_data;
-       qp->sq.max_sges = attrs->cap.max_send_sge;
        qp->state = QED_ROCE_QP_STATE_RESET;
        qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
-       qp->sq_cq = get_qedr_cq(attrs->send_cq);
        qp->dev = dev;
+       if (qedr_qp_has_sq(qp)) {
+               qp->sq.max_sges = attrs->cap.max_send_sge;
+               qp->sq_cq = get_qedr_cq(attrs->send_cq);
+               DP_DEBUG(dev, QEDR_MSG_QP,
+                        "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
+                        qp->sq.max_sges, qp->sq_cq->icid);
+       }
 
-       if (attrs->srq) {
+       if (attrs->srq)
                qp->srq = get_qedr_srq(attrs->srq);
-       } else {
+
+       if (qedr_qp_has_rq(qp)) {
                qp->rq_cq = get_qedr_cq(attrs->recv_cq);
                qp->rq.max_sges = attrs->cap.max_recv_sge;
                DP_DEBUG(dev, QEDR_MSG_QP,
@@ -1346,30 +1381,26 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
 
 static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
 {
-       int rc;
+       int rc = 0;
 
-       qp->sq.db = dev->db_addr +
-                   DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
-       qp->sq.db_data.data.icid = qp->icid + 1;
-       rc = qedr_db_recovery_add(dev, qp->sq.db,
-                                 &qp->sq.db_data,
-                                 DB_REC_WIDTH_32B,
-                                 DB_REC_KERNEL);
-       if (rc)
-               return rc;
+       if (qedr_qp_has_sq(qp)) {
+               qp->sq.db = dev->db_addr +
+                           DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
+               qp->sq.db_data.data.icid = qp->icid + 1;
+               rc = qedr_db_recovery_add(dev, qp->sq.db, &qp->sq.db_data,
+                                         DB_REC_WIDTH_32B, DB_REC_KERNEL);
+               if (rc)
+                       return rc;
+       }
 
-       if (!qp->srq) {
+       if (qedr_qp_has_rq(qp)) {
                qp->rq.db = dev->db_addr +
                            DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
                qp->rq.db_data.data.icid = qp->icid;
-
-               rc = qedr_db_recovery_add(dev, qp->rq.db,
-                                         &qp->rq.db_data,
-                                         DB_REC_WIDTH_32B,
-                                         DB_REC_KERNEL);
-               if (rc)
-                       qedr_db_recovery_del(dev, qp->sq.db,
-                                            &qp->sq.db_data);
+               rc = qedr_db_recovery_add(dev, qp->rq.db, &qp->rq.db_data,
+                                         DB_REC_WIDTH_32B, DB_REC_KERNEL);
+               if (rc && qedr_qp_has_sq(qp))
+                       qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
        }
 
        return rc;
@@ -1392,6 +1423,10 @@ static int qedr_check_srq_params(struct qedr_dev *dev,
                DP_ERR(dev,
                       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
                       attrs->attr.max_sge, qattr->max_sge);
+       }
+
+       if (!udata && attrs->srq_type == IB_SRQT_XRC) {
+               DP_ERR(dev, "XRC SRQs are not supported in kernel-space\n");
                return -EINVAL;
        }
 
@@ -1516,6 +1551,7 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
                return -EINVAL;
 
        srq->dev = dev;
+       srq->is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
        hw_srq = &srq->hw_srq;
        spin_lock_init(&srq->lock);
 
@@ -1557,6 +1593,14 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
        in_params.prod_pair_addr = phy_prod_pair_addr;
        in_params.num_pages = page_cnt;
        in_params.page_size = page_size;
+       if (srq->is_xrc) {
+               struct qedr_xrcd *xrcd = get_qedr_xrcd(init_attr->ext.xrc.xrcd);
+               struct qedr_cq *cq = get_qedr_cq(init_attr->ext.cq);
+
+               in_params.is_xrc = 1;
+               in_params.xrcd_id = xrcd->xrcd_id;
+               in_params.cq_cid = cq->icid;
+       }
 
        rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
        if (rc)
@@ -1591,7 +1635,7 @@ err0:
        return -EFAULT;
 }
 
-void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 {
        struct qed_rdma_destroy_srq_in_params in_params = {};
        struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
@@ -1599,6 +1643,7 @@ void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 
        xa_erase_irq(&dev->srqs, srq->srq_id);
        in_params.srq_id = srq->srq_id;
+       in_params.is_xrc = srq->is_xrc;
        dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
 
        if (ibsrq->uobject)
@@ -1609,6 +1654,7 @@ void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
        DP_DEBUG(dev, QEDR_MSG_SRQ,
                 "destroy srq: destroyed srq with srq_id=0x%0x\n",
                 srq->srq_id);
+       return 0;
 }
 
 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
@@ -1649,6 +1695,20 @@ int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
        return 0;
 }
 
+static enum qed_rdma_qp_type qedr_ib_to_qed_qp_type(enum ib_qp_type ib_qp_type)
+{
+       switch (ib_qp_type) {
+       case IB_QPT_RC:
+               return QED_RDMA_QP_TYPE_RC;
+       case IB_QPT_XRC_INI:
+               return QED_RDMA_QP_TYPE_XRC_INI;
+       case IB_QPT_XRC_TGT:
+               return QED_RDMA_QP_TYPE_XRC_TGT;
+       default:
+               return QED_RDMA_QP_TYPE_INVAL;
+       }
+}
+
 static inline void
 qedr_init_common_qp_in_params(struct qedr_dev *dev,
                              struct qedr_pd *pd,
@@ -1663,20 +1723,27 @@ qedr_init_common_qp_in_params(struct qedr_dev *dev,
 
        params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
        params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
-       params->pd = pd->pd_id;
-       params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
-       params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
+       params->qp_type = qedr_ib_to_qed_qp_type(attrs->qp_type);
        params->stats_queue = 0;
-       params->srq_id = 0;
-       params->use_srq = false;
 
-       if (!qp->srq) {
+       if (pd) {
+               params->pd = pd->pd_id;
+               params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
+       }
+
+       if (qedr_qp_has_sq(qp))
+               params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
+
+       if (qedr_qp_has_rq(qp))
                params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
 
-       } else {
+       if (qedr_qp_has_srq(qp)) {
                params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
                params->srq_id = qp->srq->srq_id;
                params->use_srq = true;
+       } else {
+               params->srq_id = 0;
+               params->use_srq = false;
        }
 }
 
@@ -1690,8 +1757,10 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
                 "rq_len=%zd"
                 "\n",
                 qp,
-                qp->usq.buf_addr,
-                qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
+                qedr_qp_has_sq(qp) ? qp->usq.buf_addr : 0x0,
+                qedr_qp_has_sq(qp) ? qp->usq.buf_len : 0,
+                qedr_qp_has_rq(qp) ? qp->urq.buf_addr : 0x0,
+                qedr_qp_has_sq(qp) ? qp->urq.buf_len : 0);
 }
 
 static inline void
@@ -1717,11 +1786,15 @@ static void qedr_cleanup_user(struct qedr_dev *dev,
                              struct qedr_ucontext *ctx,
                              struct qedr_qp *qp)
 {
-       ib_umem_release(qp->usq.umem);
-       qp->usq.umem = NULL;
+       if (qedr_qp_has_sq(qp)) {
+               ib_umem_release(qp->usq.umem);
+               qp->usq.umem = NULL;
+       }
 
-       ib_umem_release(qp->urq.umem);
-       qp->urq.umem = NULL;
+       if (qedr_qp_has_rq(qp)) {
+               ib_umem_release(qp->urq.umem);
+               qp->urq.umem = NULL;
+       }
 
        if (rdma_protocol_roce(&dev->ibdev, 1)) {
                qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
@@ -1756,28 +1829,38 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
 {
        struct qed_rdma_create_qp_in_params in_params;
        struct qed_rdma_create_qp_out_params out_params;
-       struct qedr_pd *pd = get_qedr_pd(ibpd);
-       struct qedr_create_qp_uresp uresp;
-       struct qedr_ucontext *ctx = pd ? pd->uctx : NULL;
-       struct qedr_create_qp_ureq ureq;
+       struct qedr_create_qp_uresp uresp = {};
+       struct qedr_create_qp_ureq ureq = {};
        int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
-       int rc = -EINVAL;
+       struct qedr_ucontext *ctx = NULL;
+       struct qedr_pd *pd = NULL;
+       int rc = 0;
 
        qp->create_type = QEDR_QP_CREATE_USER;
-       memset(&ureq, 0, sizeof(ureq));
-       rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen));
-       if (rc) {
-               DP_ERR(dev, "Problem copying data from user space\n");
-               return rc;
+
+       if (ibpd) {
+               pd = get_qedr_pd(ibpd);
+               ctx = pd->uctx;
        }
 
-       /* SQ - read access only (0) */
-       rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
-                                 ureq.sq_len, true, 0, alloc_and_init);
-       if (rc)
-               return rc;
+       if (udata) {
+               rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
+                                       udata->inlen));
+               if (rc) {
+                       DP_ERR(dev, "Problem copying data from user space\n");
+                       return rc;
+               }
+       }
 
-       if (!qp->srq) {
+       if (qedr_qp_has_sq(qp)) {
+               /* SQ - read access only (0) */
+               rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
+                                         ureq.sq_len, true, 0, alloc_and_init);
+               if (rc)
+                       return rc;
+       }
+
+       if (qedr_qp_has_rq(qp)) {
                /* RQ - read access only (0) */
                rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
                                          ureq.rq_len, true, 0, alloc_and_init);
@@ -1789,9 +1872,21 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
        qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
        in_params.qp_handle_lo = ureq.qp_handle_lo;
        in_params.qp_handle_hi = ureq.qp_handle_hi;
-       in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
-       in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
-       if (!qp->srq) {
+
+       if (qp->qp_type == IB_QPT_XRC_TGT) {
+               struct qedr_xrcd *xrcd = get_qedr_xrcd(attrs->xrcd);
+
+               in_params.xrcd_id = xrcd->xrcd_id;
+               in_params.qp_handle_lo = qp->qp_id;
+               in_params.use_srq = 1;
+       }
+
+       if (qedr_qp_has_sq(qp)) {
+               in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
+               in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
+       }
+
+       if (qedr_qp_has_rq(qp)) {
                in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
                in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
        }
@@ -1813,39 +1908,32 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
        qp->qp_id = out_params.qp_id;
        qp->icid = out_params.icid;
 
-       rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
-       if (rc)
-               goto err;
+       if (udata) {
+               rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
+               if (rc)
+                       goto err;
+       }
 
        /* db offset was calculated in copy_qp_uresp, now set in the user q */
-       ctx = pd->uctx;
-       qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
-       qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
-
-       if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
-               qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
-
-               /* calculate the db_rec_db2 data since it is constant so no
-                *  need to reflect from user
-                */
-               qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
-               qp->urq.db_rec_db2_data.data.value =
-                       cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
+       if (qedr_qp_has_sq(qp)) {
+               qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
+               rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
+                                         &qp->usq.db_rec_data->db_data,
+                                         DB_REC_WIDTH_32B,
+                                         DB_REC_USER);
+               if (rc)
+                       goto err;
        }
 
-       rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
-                                 &qp->usq.db_rec_data->db_data,
-                                 DB_REC_WIDTH_32B,
-                                 DB_REC_USER);
-       if (rc)
-               goto err;
-
-       rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
-                                 &qp->urq.db_rec_data->db_data,
-                                 DB_REC_WIDTH_32B,
-                                 DB_REC_USER);
-       if (rc)
-               goto err;
+       if (qedr_qp_has_rq(qp)) {
+               qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
+               rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
+                                         &qp->urq.db_rec_data->db_data,
+                                         DB_REC_WIDTH_32B,
+                                         DB_REC_USER);
+               if (rc)
+                       goto err;
+       }
 
        if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
                rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
@@ -1856,7 +1944,6 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
                        goto err;
        }
        qedr_qp_user_print(dev, qp);
-
        return rc;
 err:
        rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
@@ -2112,16 +2199,47 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev,
        return rc;
 }
 
+static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
+                                 struct ib_udata *udata)
+{
+       struct qedr_ucontext *ctx =
+               rdma_udata_to_drv_context(udata, struct qedr_ucontext,
+                                         ibucontext);
+       int rc;
+
+       if (qp->qp_type != IB_QPT_GSI) {
+               rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+               if (rc)
+                       return rc;
+       }
+
+       if (qp->create_type == QEDR_QP_CREATE_USER)
+               qedr_cleanup_user(dev, ctx, qp);
+       else
+               qedr_cleanup_kernel(dev, qp);
+
+       return 0;
+}
+
 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
                             struct ib_qp_init_attr *attrs,
                             struct ib_udata *udata)
 {
-       struct qedr_dev *dev = get_qedr_dev(ibpd->device);
-       struct qedr_pd *pd = get_qedr_pd(ibpd);
+       struct qedr_xrcd *xrcd = NULL;
+       struct qedr_pd *pd = NULL;
+       struct qedr_dev *dev;
        struct qedr_qp *qp;
        struct ib_qp *ibqp;
        int rc = 0;
 
+       if (attrs->qp_type == IB_QPT_XRC_TGT) {
+               xrcd = get_qedr_xrcd(attrs->xrcd);
+               dev = get_qedr_dev(xrcd->ibxrcd.device);
+       } else {
+               pd = get_qedr_pd(ibpd);
+               dev = get_qedr_dev(ibpd->device);
+       }
+
        DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
                 udata ? "user library" : "kernel", pd);
 
@@ -2152,25 +2270,27 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
                return ibqp;
        }
 
-       if (udata)
+       if (udata || xrcd)
                rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
        else
                rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
 
        if (rc)
-               goto err;
+               goto out_free_qp;
 
        qp->ibqp.qp_num = qp->qp_id;
 
        if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
                rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
                if (rc)
-                       goto err;
+                       goto out_free_qp_resources;
        }
 
        return &qp->ibqp;
 
-err:
+out_free_qp_resources:
+       qedr_free_qp_resources(dev, qp, udata);
+out_free_qp:
        kfree(qp);
 
        return ERR_PTR(-EFAULT);
@@ -2636,7 +2756,7 @@ int qedr_query_qp(struct ib_qp *ibqp,
        qp_attr->cap.max_recv_wr = qp->rq.max_wr;
        qp_attr->cap.max_send_sge = qp->sq.max_sges;
        qp_attr->cap.max_recv_sge = qp->rq.max_sges;
-       qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
+       qp_attr->cap.max_inline_data = dev->attr.max_inline;
        qp_init_attr->cap = qp_attr->cap;
 
        qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
@@ -2671,28 +2791,6 @@ err:
        return rc;
 }
 
-static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
-                                 struct ib_udata *udata)
-{
-       struct qedr_ucontext *ctx =
-               rdma_udata_to_drv_context(udata, struct qedr_ucontext,
-                                         ibucontext);
-       int rc;
-
-       if (qp->qp_type != IB_QPT_GSI) {
-               rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
-               if (rc)
-                       return rc;
-       }
-
-       if (qp->create_type == QEDR_QP_CREATE_USER)
-               qedr_cleanup_user(dev, ctx, qp);
-       else
-               qedr_cleanup_kernel(dev, qp);
-
-       return 0;
-}
-
 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 {
        struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -2752,6 +2850,8 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 
        if (rdma_protocol_iwarp(&dev->ibdev, 1))
                qedr_iw_qp_rem_ref(&qp->ibqp);
+       else
+               kfree(qp);
 
        return 0;
 }
@@ -2766,11 +2866,12 @@ int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
        return 0;
 }
 
-void qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
+int qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
 {
        struct qedr_ah *ah = get_qedr_ah(ibah);
 
        rdma_destroy_ah_attr(&ah->attr);
+       return 0;
 }
 
 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
@@ -2861,7 +2962,8 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
                goto err0;
        }
 
-       rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
+       rc = init_mr_info(dev, &mr->info,
+                         ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE), 1);
        if (rc)
                goto err1;
 
@@ -2888,10 +2990,8 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
        mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
        mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
        mr->hw_mr.page_size_log = PAGE_SHIFT;
-       mr->hw_mr.fbo = ib_umem_offset(mr->umem);
        mr->hw_mr.length = len;
        mr->hw_mr.vaddr = usr_addr;
-       mr->hw_mr.zbva = false;
        mr->hw_mr.phy_mr = false;
        mr->hw_mr.dma_mr = false;
 
@@ -2984,10 +3084,8 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
        mr->hw_mr.pbl_ptr = 0;
        mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
        mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
-       mr->hw_mr.fbo = 0;
        mr->hw_mr.length = 0;
        mr->hw_mr.vaddr = 0;
-       mr->hw_mr.zbva = false;
        mr->hw_mr.phy_mr = true;
        mr->hw_mr.dma_mr = false;
 
@@ -3765,10 +3863,10 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
                 * in first 4 bytes and need to update WQE producer in
                 * next 4 bytes.
                 */
-               srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod;
+               srq->hw_srq.virt_prod_pair_addr->sge_prod = cpu_to_le32(hw_srq->sge_prod);
                /* Make sure sge producer is updated first */
                dma_wmb();
-               srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod;
+               srq->hw_srq.virt_prod_pair_addr->wqe_prod = cpu_to_le32(hw_srq->wqe_prod);
 
                wr = wr->next;
        }
index 39dd628..2672c32 100644 (file)
@@ -47,12 +47,13 @@ void qedr_dealloc_ucontext(struct ib_ucontext *uctx);
 int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma);
 void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
 int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-
+int qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata);
+int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata);
 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                   struct ib_udata *udata);
 int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
-void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
 struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs,
                             struct ib_udata *);
@@ -67,12 +68,12 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *attr,
 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
                    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
                       const struct ib_recv_wr **bad_recv_wr);
 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
                   struct ib_udata *udata);
-void qedr_destroy_ah(struct ib_ah *ibah, u32 flags);
+int qedr_destroy_ah(struct ib_ah *ibah, u32 flags);
 
 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
 struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc);
index 432d6d0..ee21142 100644 (file)
@@ -619,11 +619,11 @@ struct qib_pportdata {
        /* LID mask control */
        u8 lmc;
        u8 link_width_supported;
-       u8 link_speed_supported;
+       u16 link_speed_supported;
        u8 link_width_enabled;
-       u8 link_speed_enabled;
+       u16 link_speed_enabled;
        u8 link_width_active;
-       u8 link_speed_active;
+       u16 link_speed_active;
        u8 vls_supported;
        u8 vls_operational;
        /* Rx Polarity inversion (compensate for ~tx on partner) */
index a10eab8..189a0ce 100644 (file)
@@ -1733,9 +1733,9 @@ done:
        return;
 }
 
-static void qib_error_tasklet(unsigned long data)
+static void qib_error_tasklet(struct tasklet_struct *t)
 {
-       struct qib_devdata *dd = (struct qib_devdata *)data;
+       struct qib_devdata *dd = from_tasklet(dd, t, error_tasklet);
 
        handle_7322_errors(dd);
        qib_write_kreg(dd, kr_errmask, dd->cspec->errormask);
@@ -3537,8 +3537,7 @@ try_intx:
        for (i = 0; i < ARRAY_SIZE(redirect); i++)
                qib_write_kreg(dd, kr_intredirect + i, redirect[i]);
        dd->cspec->main_int_mask = mask;
-       tasklet_init(&dd->error_tasklet, qib_error_tasklet,
-               (unsigned long)dd);
+       tasklet_setup(&dd->error_tasklet, qib_error_tasklet);
 }
 
 /**
index e7789e7..f83e331 100644 (file)
@@ -2293,76 +2293,50 @@ static int process_cc(struct ib_device *ibdev, int mad_flags,
                        struct ib_mad *out_mad)
 {
        struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
-       int ret;
-
        *out_mad = *in_mad;
 
        if (ccp->class_version != 2) {
                ccp->status |= IB_SMP_UNSUP_VERSION;
-               ret = reply((struct ib_smp *)ccp);
-               goto bail;
+               return reply((struct ib_smp *)ccp);
        }
 
        switch (ccp->method) {
        case IB_MGMT_METHOD_GET:
                switch (ccp->attr_id) {
                case IB_CC_ATTR_CLASSPORTINFO:
-                       ret = cc_get_classportinfo(ccp, ibdev);
-                       goto bail;
-
+                       return cc_get_classportinfo(ccp, ibdev);
                case IB_CC_ATTR_CONGESTION_INFO:
-                       ret = cc_get_congestion_info(ccp, ibdev, port);
-                       goto bail;
-
+                       return cc_get_congestion_info(ccp, ibdev, port);
                case IB_CC_ATTR_CA_CONGESTION_SETTING:
-                       ret = cc_get_congestion_setting(ccp, ibdev, port);
-                       goto bail;
-
+                       return cc_get_congestion_setting(ccp, ibdev, port);
                case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
-                       ret = cc_get_congestion_control_table(ccp, ibdev, port);
-                       goto bail;
-
-                       fallthrough;
+                       return cc_get_congestion_control_table(ccp, ibdev, port);
                default:
                        ccp->status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply((struct ib_smp *) ccp);
-                       goto bail;
+                       return reply((struct ib_smp *) ccp);
                }
-
        case IB_MGMT_METHOD_SET:
                switch (ccp->attr_id) {
                case IB_CC_ATTR_CA_CONGESTION_SETTING:
-                       ret = cc_set_congestion_setting(ccp, ibdev, port);
-                       goto bail;
-
+                       return cc_set_congestion_setting(ccp, ibdev, port);
                case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
-                       ret = cc_set_congestion_control_table(ccp, ibdev, port);
-                       goto bail;
-
-                       fallthrough;
+                       return cc_set_congestion_control_table(ccp, ibdev, port);
                default:
                        ccp->status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply((struct ib_smp *) ccp);
-                       goto bail;
+                       return reply((struct ib_smp *) ccp);
                }
-
        case IB_MGMT_METHOD_GET_RESP:
                /*
                 * The ib_mad module will call us to process responses
                 * before checking for other consumers.
                 * Just tell the caller to process it normally.
                 */
-               ret = IB_MAD_RESULT_SUCCESS;
-               goto bail;
-
-       case IB_MGMT_METHOD_TRAP:
-       default:
-               ccp->status |= IB_SMP_UNSUP_METHOD;
-               ret = reply((struct ib_smp *) ccp);
+               return IB_MAD_RESULT_SUCCESS;
        }
 
-bail:
-       return ret;
+       /* method is unsupported */
+       ccp->status |= IB_SMP_UNSUP_METHOD;
+       return reply((struct ib_smp *) ccp);
 }
 
 /**
index 8f8d617..5e86cbf 100644 (file)
@@ -62,7 +62,7 @@ static void sdma_get(struct qib_sdma_state *);
 static void sdma_put(struct qib_sdma_state *);
 static void sdma_set_state(struct qib_pportdata *, enum qib_sdma_states);
 static void sdma_start_sw_clean_up(struct qib_pportdata *);
-static void sdma_sw_clean_up_task(unsigned long);
+static void sdma_sw_clean_up_task(struct tasklet_struct *);
 static void unmap_desc(struct qib_pportdata *, unsigned);
 
 static void sdma_get(struct qib_sdma_state *ss)
@@ -119,9 +119,10 @@ static void clear_sdma_activelist(struct qib_pportdata *ppd)
        }
 }
 
-static void sdma_sw_clean_up_task(unsigned long opaque)
+static void sdma_sw_clean_up_task(struct tasklet_struct *t)
 {
-       struct qib_pportdata *ppd = (struct qib_pportdata *) opaque;
+       struct qib_pportdata *ppd = from_tasklet(ppd, t,
+                                                sdma_sw_clean_up_task);
        unsigned long flags;
 
        spin_lock_irqsave(&ppd->sdma_lock, flags);
@@ -436,8 +437,7 @@ int qib_setup_sdma(struct qib_pportdata *ppd)
 
        INIT_LIST_HEAD(&ppd->sdma_activelist);
 
-       tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
-               (unsigned long)ppd);
+       tasklet_setup(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task);
 
        ret = dd->f_init_sdma_regs(ppd);
        if (ret)
index 662e7fc..462ed71 100644 (file)
@@ -315,7 +315,6 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
        if (err)
                return err;
 
-       immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 
        return 0;
@@ -355,7 +354,6 @@ static const struct ib_device_ops usnic_dev_ops = {
        .modify_qp = usnic_ib_modify_qp,
        .query_device = usnic_ib_query_device,
        .query_gid = usnic_ib_query_gid,
-       .query_pkey = usnic_ib_query_pkey,
        .query_port = usnic_ib_query_port,
        .query_qp = usnic_ib_query_qp,
        .reg_user_mr = usnic_ib_reg_mr,
index b8a77ce..9e961f8 100644 (file)
@@ -367,7 +367,6 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
 
        props->port_cap_flags = 0;
        props->gid_tbl_len = 1;
-       props->pkey_tbl_len = 1;
        props->bad_pkey_cntr = 0;
        props->qkey_viol_cntr = 0;
        props->max_mtu = IB_MTU_4096;
@@ -437,16 +436,6 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
        return 0;
 }
 
-int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-                               u16 *pkey)
-{
-       if (index > 0)
-               return -EINVAL;
-
-       *pkey = 0xffff;
-       return 0;
-}
-
 int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 {
        struct usnic_ib_pd *pd = to_upd(ibpd);
@@ -460,9 +449,10 @@ int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
        return 0;
 }
 
-void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
 {
        usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd);
+       return 0;
 }
 
 struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
@@ -596,9 +586,9 @@ int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
        return 0;
 }
 
-void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
 {
-       return;
+       return 0;
 }
 
 struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
index 2aedf78..11fe1ba 100644 (file)
@@ -48,10 +48,8 @@ int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
                                struct ib_qp_init_attr *qp_init_attr);
 int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
                                union ib_gid *gid);
-int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-                               u16 *pkey);
 int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
-void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
 struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
                                        struct ib_qp_init_attr *init_attr,
                                        struct ib_udata *udata);
@@ -60,7 +58,7 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                                int attr_mask, struct ib_udata *udata);
 int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                       struct ib_udata *udata);
-void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
 struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
                                u64 virt_addr, int access_flags,
                                struct ib_udata *udata);
index 4f6cc0d..319546a 100644 (file)
@@ -142,7 +142,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                        goto err_cq;
                }
 
-               npages = ib_umem_page_count(cq->umem);
+               npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE);
        } else {
                /* One extra page for shared ring state */
                npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
@@ -235,7 +235,7 @@ static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
  * @cq: the completion queue to destroy.
  * @udata: user data or null for kernel object
  */
-void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
 {
        struct pvrdma_cq *vcq = to_vcq(cq);
        union pvrdma_cmd_req req;
@@ -261,6 +261,7 @@ void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
 
        pvrdma_free_cq(dev, vcq);
        atomic_dec(&dev->num_cqs);
+       return 0;
 }
 
 static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
@@ -375,7 +376,7 @@ retry:
  * pvrdma_poll_cq - poll for work completion queue entries
  * @ibcq: completion queue
  * @num_entries: the maximum number of entries
- * @entry: pointer to work completion array
+ * @wc: pointer to work completion array
  *
  * @return: number of polled completion entries
  */
index 7944c58..ba43ad0 100644 (file)
@@ -182,17 +182,16 @@ int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx,
 int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
                                struct ib_umem *umem, u64 offset)
 {
+       struct ib_block_iter biter;
        u64 i = offset;
        int ret = 0;
-       struct sg_dma_page_iter sg_iter;
 
        if (offset >= pdir->npages)
                return -EINVAL;
 
-       for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
-               dma_addr_t addr = sg_page_iter_dma_address(&sg_iter);
-
-               ret = pvrdma_page_dir_insert_dma(pdir, i, addr);
+       rdma_umem_for_each_dma_block (umem, &biter, PAGE_SIZE) {
+               ret = pvrdma_page_dir_insert_dma(
+                       pdir, i, rdma_block_iter_dma_address(&biter));
                if (ret)
                        goto exit;
 
index 77a010e..e80848b 100644 (file)
@@ -133,7 +133,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                return ERR_CAST(umem);
        }
 
-       npages = ib_umem_num_pages(umem);
+       npages = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
        if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
                dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
                         npages);
@@ -270,6 +270,7 @@ freemr:
 /**
  * pvrdma_dereg_mr - deregister a memory region
  * @ibmr: memory region
+ * @udata: pointer to user data
  *
  * @return: 0 on success.
  */
index 9a8f2a9..428256c 100644 (file)
@@ -232,8 +232,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
        switch (init_attr->qp_type) {
        case IB_QPT_GSI:
                if (init_attr->port_num == 0 ||
-                   init_attr->port_num > pd->device->phys_port_cnt ||
-                   udata) {
+                   init_attr->port_num > pd->device->phys_port_cnt) {
                        dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
                        ret = -EINVAL;
                        goto err_qp;
@@ -298,9 +297,11 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
                                goto err_qp;
                        }
 
-                       qp->npages_send = ib_umem_page_count(qp->sumem);
+                       qp->npages_send =
+                               ib_umem_num_dma_blocks(qp->sumem, PAGE_SIZE);
                        if (!is_srq)
-                               qp->npages_recv = ib_umem_page_count(qp->rumem);
+                               qp->npages_recv = ib_umem_num_dma_blocks(
+                                       qp->rumem, PAGE_SIZE);
                        else
                                qp->npages_recv = 0;
                        qp->npages = qp->npages_send + qp->npages_recv;
index d330dec..082208f 100644 (file)
@@ -90,7 +90,7 @@ int pvrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
 
 /**
  * pvrdma_create_srq - create shared receive queue
- * @pd: protection domain
+ * @ibsrq: the IB shared receive queue
  * @init_attr: shared receive queue attributes
  * @udata: user data
  *
@@ -152,7 +152,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
                goto err_srq;
        }
 
-       srq->npages = ib_umem_page_count(srq->umem);
+       srq->npages = ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE);
 
        if (srq->npages < 0 || srq->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
                dev_warn(&dev->pdev->dev,
@@ -240,7 +240,7 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
  *
  * @return: 0 for success.
  */
-void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
 {
        struct pvrdma_srq *vsrq = to_vsrq(srq);
        union pvrdma_cmd_req req;
@@ -259,6 +259,7 @@ void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
                         ret);
 
        pvrdma_free_srq(dev, vsrq);
+       return 0;
 }
 
 /**
index ccbded2..fc412cb 100644 (file)
@@ -479,9 +479,9 @@ err:
  * @pd: the protection domain to be released
  * @udata: user data or null for kernel object
  *
- * @return: 0 on success, otherwise errno.
+ * @return: Always 0
  */
-void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
 {
        struct pvrdma_dev *dev = to_vdev(pd->device);
        union pvrdma_cmd_req req = {};
@@ -498,14 +498,14 @@ void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
                         ret);
 
        atomic_dec(&dev->num_pds);
+       return 0;
 }
 
 /**
  * pvrdma_create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- * @udata: user data blob
- * @flags: create address handle flags (see enum rdma_create_ah_flags)
+ * @ibah: the IB address handle
+ * @init_attr: the attributes of the AH
+ * @udata: pointer to user data
  *
  * @return: 0 on success, otherwise errno.
  */
@@ -548,9 +548,10 @@ int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
  * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags)
  *
  */
-void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags)
+int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags)
 {
        struct pvrdma_dev *dev = to_vdev(ah->device);
 
        atomic_dec(&dev->num_ahs);
+       return 0;
 }
index 699b208..f0e5ffb 100644 (file)
@@ -176,7 +176,7 @@ struct pvrdma_port_attr {
        u8                      subnet_timeout;
        u8                      init_type_reply;
        u8                      active_width;
-       u                     active_speed;
+       u16                     active_speed;
        u8                      phys_state;
        u8                      reserved[2];
 };
@@ -399,7 +399,7 @@ int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
 int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
 void pvrdma_dealloc_ucontext(struct ib_ucontext *context);
 int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
 struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc);
 struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                 u64 virt_addr, int access_flags,
@@ -411,19 +411,19 @@ int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
                     int sg_nents, unsigned int *sg_offset);
 int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                     struct ib_udata *udata);
-void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
 int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
 int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
                     struct ib_udata *udata);
-void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags);
+int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags);
 
 int pvrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
                      struct ib_udata *udata);
 int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
                      enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
 
 struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
                               struct ib_qp_init_attr *init_attr,
index 75a04b1..b938c4f 100644 (file)
@@ -132,7 +132,7 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
  *
  * Return: 0 on success
  */
-void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
+int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
 {
        struct rvt_dev_info *dev = ib_to_rvt(ibah->device);
        struct rvt_ah *ah = ibah_to_rvtah(ibah);
@@ -143,6 +143,7 @@ void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
        spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 
        rdma_destroy_ah_attr(&ah->attr);
+       return 0;
 }
 
 /**
index 40b7123..5a85edd 100644 (file)
@@ -52,7 +52,7 @@
 
 int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
                  struct ib_udata *udata);
-void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags);
+int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags);
 int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
 int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
 
index 04d2e72..19248be 100644 (file)
@@ -315,7 +315,7 @@ bail_wc:
  *
  * Called by ib_destroy_cq() in the generic verbs code.
  */
-void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 {
        struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
        struct rvt_dev_info *rdi = cq->rdi;
@@ -328,6 +328,7 @@ void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
                kref_put(&cq->ip->ref, rvt_release_mmap_info);
        else
                vfree(cq->kqueue);
+       return 0;
 }
 
 /**
index 5e26a2e..feb01e7 100644 (file)
@@ -53,7 +53,7 @@
 
 int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                  struct ib_udata *udata);
-void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
 int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
 int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
 int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
index a403718..01b7abf 100644 (file)
@@ -95,11 +95,12 @@ bail:
  *
  * Return: always 0
  */
-void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 {
        struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
 
        spin_lock(&dev->n_pds_lock);
        dev->n_pds_allocated--;
        spin_unlock(&dev->n_pds_lock);
+       return 0;
 }
index 71ba76d..06a6a38 100644 (file)
@@ -51,6 +51,6 @@
 #include <rdma/rdma_vt.h>
 
 int rvt_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
 
 #endif          /* DEF_RDMAVTPD_H */
index f547c11..64d98bf 100644 (file)
@@ -332,7 +332,7 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
  * @ibsrq: srq object to destroy
  *
  */
-void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 {
        struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
        struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device);
@@ -343,4 +343,5 @@ void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
        if (srq->ip)
                kref_put(&srq->ip->ref, rvt_release_mmap_info);
        kvfree(srq->rq.kwq);
+       return 0;
 }
index 6427d7d..d5a1a05 100644 (file)
@@ -56,6 +56,6 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
                   enum ib_srq_attr_mask attr_mask,
                   struct ib_udata *udata);
 int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
 
 #endif          /* DEF_RVTSRQ_H */
index f904bb3..2d534c4 100644 (file)
@@ -95,9 +95,7 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports)
        if (!rdi)
                return rdi;
 
-       rdi->ports = kcalloc(nports,
-                            sizeof(struct rvt_ibport **),
-                            GFP_KERNEL);
+       rdi->ports = kcalloc(nports, sizeof(*rdi->ports), GFP_KERNEL);
        if (!rdi->ports)
                ib_dealloc_device(&rdi->ibdev);
 
index 77f2c7c..95f0de0 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <rdma/rdma_netlink.h>
@@ -279,6 +252,12 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev)
        struct rxe_dev *exists;
        int err = 0;
 
+       if (is_vlan_dev(ndev)) {
+               pr_err("rxe creation allowed on top of a real device only\n");
+               err = -EPERM;
+               goto err;
+       }
+
        exists = rxe_get_dev_from_net(ndev);
        if (exists) {
                ib_device_put(&exists->ib_dev);
@@ -305,13 +284,6 @@ static int __init rxe_module_init(void)
 {
        int err;
 
-       /* initialize slab caches for managed objects */
-       err = rxe_cache_init();
-       if (err) {
-               pr_err("unable to init object pools\n");
-               return err;
-       }
-
        err = rxe_net_init();
        if (err)
                return err;
@@ -327,7 +299,6 @@ static void __exit rxe_module_exit(void)
        rdma_link_unregister(&rxe_link_ops);
        ib_unregister_driver(RDMA_DRIVER_RXE);
        rxe_net_exit();
-       rxe_cache_exit();
 
        rxe_initialized = false;
        pr_info("unloaded\n");
index cae1b0a..623fd17 100644 (file)
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #ifndef RXE_H
index 81ee756..38021e2 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *        Redistribution and use in source and binary forms, with or
- *        without modification, are permitted provided that the following
- *        conditions are met:
- *
- *             - Redistributions of source code must retain the above
- *               copyright notice, this list of conditions and the following
- *               disclaimer.
- *
- *             - Redistributions in binary form must reproduce the above
- *               copyright notice, this list of conditions and the following
- *               disclaimer in the documentation and/or other materials
- *               provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include "rxe.h"
index 7b4df00..0a1e639 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <linux/skbuff.h>
@@ -690,9 +663,8 @@ int rxe_completer(void *arg)
                         */
 
                        /* there is nothing to retry in this case */
-                       if (!wqe || (wqe->state == wqe_state_posted)) {
+                       if (!wqe || (wqe->state == wqe_state_posted))
                                goto exit;
-                       }
 
                        /* if we've started a retry, don't start another
                         * retry sequence, unless this is a timeout.
index ad30901..43394c3 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *        Redistribution and use in source and binary forms, with or
- *        without modification, are permitted provided that the following
- *        conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 #include <linux/vmalloc.h>
 #include "rxe.h"
@@ -66,9 +39,9 @@ err1:
        return -EINVAL;
 }
 
-static void rxe_send_complete(unsigned long data)
+static void rxe_send_complete(struct tasklet_struct *t)
 {
-       struct rxe_cq *cq = (struct rxe_cq *)data;
+       struct rxe_cq *cq = from_tasklet(cq, t, comp_task);
        unsigned long flags;
 
        spin_lock_irqsave(&cq->cq_lock, flags);
@@ -107,7 +80,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
 
        cq->is_dying = false;
 
-       tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq);
+       tasklet_setup(&cq->comp_task, rxe_send_complete);
 
        spin_lock_init(&cq->cq_lock);
        cq->ibcq.cqe = cqe;
index ce00366..3b483b7 100644 (file)
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #ifndef RXE_HDR_H
index 636edb5..ac9154f 100644 (file)
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include "rxe.h"
index 72c0d63..49ee6f9 100644 (file)
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #ifndef RXE_HW_COUNTERS_H
index 39e0be3..66b2aad 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include "rxe.h"
index 39dc3bf..0d75876 100644 (file)
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #ifndef RXE_LOC_H
index 522a794..c02315a 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *        Redistribution and use in source and binary forms, with or
- *        without modification, are permitted provided that the following
- *        conditions are met:
- *
- *             - Redistributions of source code must retain the above
- *               copyright notice, this list of conditions and the following
- *               disclaimer.
- *
- *             - Redistributions in binary form must reproduce the above
- *               copyright notice, this list of conditions and the following
- *               disclaimer in the documentation and/or other materials
- *               provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include "rxe.h"
index 7887f62..035f226 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <linux/module.h>
index ce24144..d2ce852 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include "rxe.h"
@@ -79,13 +52,8 @@ static void rxe_mem_init(int access, struct rxe_mem *mem)
        u32 lkey = mem->pelem.index << 8 | rxe_get_key();
        u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
 
-       if (mem->pelem.pool->type == RXE_TYPE_MR) {
-               mem->ibmr.lkey          = lkey;
-               mem->ibmr.rkey          = rkey;
-       }
-
-       mem->lkey               = lkey;
-       mem->rkey               = rkey;
+       mem->ibmr.lkey          = lkey;
+       mem->ibmr.rkey          = rkey;
        mem->state              = RXE_MEM_STATE_INVALID;
        mem->type               = RXE_MEM_TYPE_NONE;
        mem->map_shift          = ilog2(RXE_BUF_PER_MAP);
@@ -149,7 +117,7 @@ void rxe_mem_init_dma(struct rxe_pd *pd,
 {
        rxe_mem_init(access, mem);
 
-       mem->pd                 = pd;
+       mem->ibmr.pd            = &pd->ibpd;
        mem->access             = access;
        mem->state              = RXE_MEM_STATE_VALID;
        mem->type               = RXE_MEM_TYPE_DMA;
@@ -218,7 +186,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
                }
        }
 
-       mem->pd                 = pd;
+       mem->ibmr.pd            = &pd->ibpd;
        mem->umem               = umem;
        mem->access             = access;
        mem->length             = length;
@@ -248,7 +216,7 @@ int rxe_mem_init_fast(struct rxe_pd *pd,
        if (err)
                goto err1;
 
-       mem->pd                 = pd;
+       mem->ibmr.pd            = &pd->ibpd;
        mem->max_buf            = max_pages;
        mem->state              = RXE_MEM_STATE_FREE;
        mem->type               = RXE_MEM_TYPE_MR;
@@ -368,7 +336,7 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
                memcpy(dest, src, length);
 
                if (crcp)
-                       *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+                       *crcp = rxe_crc32(to_rdev(mem->ibmr.device),
                                        *crcp, dest, length);
 
                return 0;
@@ -402,7 +370,7 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
                memcpy(dest, src, bytes);
 
                if (crcp)
-                       crc = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+                       crc = rxe_crc32(to_rdev(mem->ibmr.device),
                                        crc, dest, bytes);
 
                length  -= bytes;
@@ -575,9 +543,9 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
        if (!mem)
                return NULL;
 
-       if (unlikely((type == lookup_local && mem->lkey != key) ||
-                    (type == lookup_remote && mem->rkey != key) ||
-                    mem->pd != pd ||
+       if (unlikely((type == lookup_local && mr_lkey(mem) != key) ||
+                    (type == lookup_remote && mr_rkey(mem) != key) ||
+                    mr_pd(mem) != pd ||
                     (access && !(access & mem->access)) ||
                     mem->state != RXE_MEM_STATE_VALID)) {
                rxe_drop_ref(mem);
index 0c38086..31b93e7 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <linux/skbuff.h>
@@ -120,7 +93,7 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
        ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk),
                                               recv_sockets.sk6->sk, &fl6,
                                               NULL);
-       if (unlikely(IS_ERR(ndst))) {
+       if (IS_ERR(ndst)) {
                pr_err_ratelimited("no route to %pI6\n", daddr);
                return NULL;
        }
index 2ca71d3..45d80d0 100644 (file)
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #ifndef RXE_NET_H
index 4cf1106..0cb4b01 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <rdma/ib_pack.h>
index 307604e..1041ac9 100644 (file)
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #ifndef RXE_OPCODE_H
index 2f381ae..25ab50d 100644 (file)
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #ifndef RXE_PARAM_H
index fbcbac5..b374eb5 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *        Redistribution and use in source and binary forms, with or
- *        without modification, are permitted provided that the following
- *        conditions are met:
- *
- *             - Redistributions of source code must retain the above
- *               copyright notice, this list of conditions and the following
- *               disclaimer.
- *
- *             - Redistributions in binary form must reproduce the above
- *               copyright notice, this list of conditions and the following
- *               disclaimer in the documentation and/or other materials
- *               provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include "rxe.h"
@@ -110,62 +83,6 @@ static inline const char *pool_name(struct rxe_pool *pool)
        return rxe_type_info[pool->type].name;
 }
 
-static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
-{
-       return rxe_type_info[pool->type].cache;
-}
-
-static void rxe_cache_clean(size_t cnt)
-{
-       int i;
-       struct rxe_type_info *type;
-
-       for (i = 0; i < cnt; i++) {
-               type = &rxe_type_info[i];
-               if (!(type->flags & RXE_POOL_NO_ALLOC)) {
-                       kmem_cache_destroy(type->cache);
-                       type->cache = NULL;
-               }
-       }
-}
-
-int rxe_cache_init(void)
-{
-       int err;
-       int i;
-       size_t size;
-       struct rxe_type_info *type;
-
-       for (i = 0; i < RXE_NUM_TYPES; i++) {
-               type = &rxe_type_info[i];
-               size = ALIGN(type->size, RXE_POOL_ALIGN);
-               if (!(type->flags & RXE_POOL_NO_ALLOC)) {
-                       type->cache =
-                               kmem_cache_create(type->name, size,
-                                                 RXE_POOL_ALIGN,
-                                                 RXE_POOL_CACHE_FLAGS, NULL);
-                       if (!type->cache) {
-                               pr_err("Unable to init kmem cache for %s\n",
-                                      type->name);
-                               err = -ENOMEM;
-                               goto err1;
-                       }
-               }
-       }
-
-       return 0;
-
-err1:
-       rxe_cache_clean(i);
-
-       return err;
-}
-
-void rxe_cache_exit(void)
-{
-       rxe_cache_clean(RXE_NUM_TYPES);
-}
-
 static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
 {
        int err = 0;
@@ -406,7 +323,7 @@ void *rxe_alloc(struct rxe_pool *pool)
        if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
                goto out_cnt;
 
-       elem = kmem_cache_zalloc(pool_cache(pool),
+       elem = kzalloc(rxe_type_info[pool->type].size,
                                 (pool->flags & RXE_POOL_ATOMIC) ?
                                 GFP_ATOMIC : GFP_KERNEL);
        if (!elem)
@@ -468,7 +385,7 @@ void rxe_elem_release(struct kref *kref)
                pool->cleanup(elem);
 
        if (!(pool->flags & RXE_POOL_NO_ALLOC))
-               kmem_cache_free(pool_cache(pool), elem);
+               kfree(elem);
        atomic_dec(&pool->num_elem);
        ib_device_put(&pool->rxe->ib_dev);
        rxe_pool_put(pool);
index 2f2cff1..432745f 100644 (file)
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *        Redistribution and use in source and binary forms, with or
- *        without modification, are permitted provided that the following
- *        conditions are met:
- *
- *             - Redistributions of source code must retain the above
- *               copyright notice, this list of conditions and the following
- *               disclaimer.
- *
- *             - Redistributions in binary form must reproduce the above
- *               copyright notice, this list of conditions and the following
- *               disclaimer in the documentation and/or other materials
- *               provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #ifndef RXE_POOL_H
@@ -69,7 +42,6 @@ struct rxe_type_info {
        u32                     min_index;
        size_t                  key_offset;
        size_t                  key_size;
-       struct kmem_cache       *cache;
 };
 
 extern struct rxe_type_info rxe_type_info[];
@@ -113,12 +85,6 @@ struct rxe_pool {
        size_t                  key_size;
 };
 
-/* initialize slab caches for managed objects */
-int rxe_cache_init(void);
-
-/* cleanup slab caches for managed objects */
-void rxe_cache_exit(void);
-
 /* initialize a pool of objects with given limit on
  * number of elements. gets parameters from rxe_type_info
  * pool elements will be allocated out of a slab cache
index 6c11c3a..656a5b4 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *        Redistribution and use in source and binary forms, with or
- *        without modification, are permitted provided that the following
- *        conditions are met:
- *
- *             - Redistributions of source code must retain the above
- *               copyright notice, this list of conditions and the following
- *               disclaimer.
- *
- *             - Redistributions in binary form must reproduce the above
- *               copyright notice, this list of conditions and the following
- *               disclaimer in the documentation and/or other materials
- *               provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <linux/skbuff.h>
@@ -628,9 +601,8 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
        if (mask & IB_QP_QKEY)
                qp->attr.qkey = attr->qkey;
 
-       if (mask & IB_QP_AV) {
+       if (mask & IB_QP_AV)
                rxe_init_av(&attr->ah_attr, &qp->pri_av);
-       }
 
        if (mask & IB_QP_ALT_PATH) {
                rxe_init_av(&attr->alt_ah_attr, &qp->alt_av);
index 245040c..fa69241 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must retailuce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <linux/vmalloc.h>
index 8ef17d6..7d434a6 100644 (file)
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #ifndef RXE_QUEUE_H
index 7e123d3..11f3daf 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <linux/skbuff.h>
@@ -260,6 +233,8 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
        struct rxe_mc_elem *mce;
        struct rxe_qp *qp;
        union ib_gid dgid;
+       struct sk_buff *per_qp_skb;
+       struct rxe_pkt_info *per_qp_pkt;
        int err;
 
        if (skb->protocol == htons(ETH_P_IP))
@@ -288,26 +263,41 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
                if (err)
                        continue;
 
-               /* if *not* the last qp in the list
-                * increase the users of the skb then post to the next qp
+               /* for all but the last qp create a new clone of the
+                * skb and pass to the qp.
                 */
                if (mce->qp_list.next != &mcg->qp_list)
-                       skb_get(skb);
+                       per_qp_skb = skb_clone(skb, GFP_ATOMIC);
+               else
+                       per_qp_skb = skb;
 
-               pkt->qp = qp;
+               per_qp_pkt = SKB_TO_PKT(per_qp_skb);
+               per_qp_pkt->qp = qp;
                rxe_add_ref(qp);
-               rxe_rcv_pkt(pkt, skb);
+               rxe_rcv_pkt(per_qp_pkt, per_qp_skb);
        }
 
        spin_unlock_bh(&mcg->mcg_lock);
 
        rxe_drop_ref(mcg);      /* drop ref from rxe_pool_get_key. */
 
+       return;
+
 err1:
        kfree_skb(skb);
 }
 
-static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
+/**
+ * rxe_chk_dgid - validate destination IP address
+ * @rxe: rxe device that received packet
+ * @skb: the received packet buffer
+ *
+ * Accept any loopback packets
+ * Extract IP address from packet and
+ * Accept if multicast packet
+ * Accept if matches an SGID table entry
+ */
+static int rxe_chk_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
 {
        struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
        const struct ib_gid_attr *gid_attr;
@@ -325,6 +315,9 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
                pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr;
        }
 
+       if (rdma_is_multicast_addr((struct in6_addr *)pdgid))
+               return 0;
+
        gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid,
                                         IB_GID_TYPE_ROCE_UDP_ENCAP,
                                         1, skb->dev);
@@ -349,8 +342,8 @@ void rxe_rcv(struct sk_buff *skb)
        if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES))
                goto drop;
 
-       if (rxe_match_dgid(rxe, skb) < 0) {
-               pr_warn_ratelimited("failed matching dgid\n");
+       if (rxe_chk_dgid(rxe, skb) < 0) {
+               pr_warn_ratelimited("failed checking dgid\n");
                goto drop;
        }
 
index 34df2b5..af3923b 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <linux/skbuff.h>
@@ -644,8 +617,8 @@ next_wqe:
 
                        rmr->state = RXE_MEM_STATE_VALID;
                        rmr->access = wqe->wr.wr.reg.access;
-                       rmr->lkey = wqe->wr.wr.reg.key;
-                       rmr->rkey = wqe->wr.wr.reg.key;
+                       rmr->ibmr.lkey = wqe->wr.wr.reg.key;
+                       rmr->ibmr.rkey = wqe->wr.wr.reg.key;
                        rmr->iova = wqe->wr.wr.reg.mr->iova;
                        wqe->state = wqe_state_done;
                        wqe->status = IB_WC_SUCCESS;
index c4a8195..c7e3b6a 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <linux/skbuff.h>
index d845943..41b0d1e 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <linux/vmalloc.h>
index 2af31d4..666202d 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include "rxe.h"
@@ -78,6 +51,12 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
                return -EINVAL;
        }
 
+       if (is_vlan_dev(ndev)) {
+               pr_err("rxe creation allowed on top of a real device only\n");
+               err = -EPERM;
+               goto err;
+       }
+
        exists = rxe_get_dev_from_net(ndev);
        if (exists) {
                ib_device_put(&exists->ib_dev);
index ecdac3f..6951fdc 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *        Redistribution and use in source and binary forms, with or
- *        without modification, are permitted provided that the following
- *        conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <linux/kernel.h>
@@ -55,12 +28,12 @@ int __rxe_do_task(struct rxe_task *task)
  * a second caller finds the task already running
  * but looks just after the last call to func
  */
-void rxe_do_task(unsigned long data)
+void rxe_do_task(struct tasklet_struct *t)
 {
        int cont;
        int ret;
        unsigned long flags;
-       struct rxe_task *task = (struct rxe_task *)data;
+       struct rxe_task *task = from_tasklet(task, t, tasklet);
 
        spin_lock_irqsave(&task->state_lock, flags);
        switch (task->state) {
@@ -123,7 +96,7 @@ int rxe_init_task(void *obj, struct rxe_task *task,
        snprintf(task->name, sizeof(task->name), "%s", name);
        task->destroyed = false;
 
-       tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task);
+       tasklet_setup(&task->tasklet, rxe_do_task);
 
        task->state = TASK_STATE_START;
        spin_lock_init(&task->state_lock);
@@ -159,7 +132,7 @@ void rxe_run_task(struct rxe_task *task, int sched)
        if (sched)
                tasklet_schedule(&task->tasklet);
        else
-               rxe_do_task((unsigned long)task);
+               rxe_do_task(&task->tasklet);
 }
 
 void rxe_disable_task(struct rxe_task *task)
index 08ff42d..11d183f 100644 (file)
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *        Redistribution and use in source and binary forms, with or
- *        without modification, are permitted provided that the following
- *        conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #ifndef RXE_TASK_H
@@ -60,7 +33,7 @@ struct rxe_task {
 /*
  * init rxe_task structure
  *     arg  => parameter to pass to fcn
- *     fcn  => function to call until it returns != 0
+ *     func => function to call until it returns != 0
  */
 int rxe_init_task(void *obj, struct rxe_task *task,
                  void *arg, int (*func)(void *), char *name);
@@ -80,7 +53,7 @@ int __rxe_do_task(struct rxe_task *task);
  * work to do someone must reschedule the task before
  * leaving
  */
-void rxe_do_task(unsigned long data);
+void rxe_do_task(struct tasklet_struct *t);
 
 /* run a task, else schedule it to run as a tasklet, The decision
  * to run or schedule tasklet is based on the parameter sched.
index 8522e9a..ba8faa3 100644 (file)
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <linux/dma-mapping.h>
@@ -175,11 +148,12 @@ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
        return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem);
 }
 
-static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 {
        struct rxe_pd *pd = to_rpd(ibpd);
 
        rxe_drop_ref(pd);
+       return 0;
 }
 
 static int rxe_create_ah(struct ib_ah *ibah,
@@ -227,11 +201,12 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
        return 0;
 }
 
-static void rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
+static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
 {
        struct rxe_ah *ah = to_rah(ibah);
 
        rxe_drop_ref(ah);
+       return 0;
 }
 
 static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
@@ -365,7 +340,7 @@ static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
        return 0;
 }
 
-static void rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 {
        struct rxe_srq *srq = to_rsrq(ibsrq);
 
@@ -374,6 +349,7 @@ static void rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 
        rxe_drop_ref(srq->pd);
        rxe_drop_ref(srq);
+       return 0;
 }
 
 static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
@@ -803,13 +779,14 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
        return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem);
 }
 
-static void rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 {
        struct rxe_cq *cq = to_rcq(ibcq);
 
        rxe_cq_disable(cq);
 
        rxe_drop_ref(cq);
+       return 0;
 }
 
 static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
@@ -944,7 +921,7 @@ static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
        struct rxe_mem *mr = to_rmr(ibmr);
 
        mr->state = RXE_MEM_STATE_ZOMBIE;
-       rxe_drop_ref(mr->pd);
+       rxe_drop_ref(mr_pd(mr));
        rxe_drop_index(mr);
        rxe_drop_ref(mr);
        return 0;
index c664c7f..3414b34 100644 (file)
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *        Redistribution and use in source and binary forms, with or
- *        without modification, are permitted provided that the following
- *        conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #ifndef RXE_VERBS_H
@@ -322,12 +295,8 @@ struct rxe_mem {
                struct ib_mw            ibmw;
        };
 
-       struct rxe_pd           *pd;
        struct ib_umem          *umem;
 
-       u32                     lkey;
-       u32                     rkey;
-
        enum rxe_mem_state      state;
        enum rxe_mem_type       type;
        u64                     va;
@@ -465,6 +434,21 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
        return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL;
 }
 
+static inline struct rxe_pd *mr_pd(struct rxe_mem *mr)
+{
+       return to_rpd(mr->ibmr.pd);
+}
+
+static inline u32 mr_lkey(struct rxe_mem *mr)
+{
+       return mr->ibmr.lkey;
+}
+
+static inline u32 mr_rkey(struct rxe_mem *mr)
+{
+       return mr->ibmr.rkey;
+}
+
 int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
 
 void rxe_mc_cleanup(struct rxe_pool_entry *arg);
index adafa1b..7cf3242 100644 (file)
@@ -234,12 +234,13 @@ int siw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
        return 0;
 }
 
-void siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
 {
        struct siw_device *sdev = to_siw_dev(pd->device);
 
        siw_dbg_pd(pd, "free PD\n");
        atomic_dec(&sdev->num_pd);
+       return 0;
 }
 
 void siw_qp_get_ref(struct ib_qp *base_qp)
@@ -1055,7 +1056,7 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
        return rv > 0 ? 0 : rv;
 }
 
-void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
+int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
 {
        struct siw_cq *cq = to_siw_cq(base_cq);
        struct siw_device *sdev = to_siw_dev(base_cq->device);
@@ -1073,6 +1074,7 @@ void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
        atomic_dec(&sdev->num_cq);
 
        vfree(cq->queue);
+       return 0;
 }
 
 /*
@@ -1690,7 +1692,7 @@ int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs)
  * QP anymore - the code trusts the RDMA core environment to keep track
  * of QP references.
  */
-void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata)
+int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata)
 {
        struct siw_srq *srq = to_siw_srq(base_srq);
        struct siw_device *sdev = to_siw_dev(base_srq->device);
@@ -1702,6 +1704,7 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata)
                rdma_user_mmap_entry_remove(srq->srq_entry);
        vfree(srq->recvq);
        atomic_dec(&sdev->num_srq);
+       return 0;
 }
 
 /*
index d957227..6374545 100644 (file)
@@ -49,7 +49,7 @@ int siw_query_port(struct ib_device *base_dev, u8 port,
 int siw_query_gid(struct ib_device *base_dev, u8 port, int idx,
                  union ib_gid *gid);
 int siw_alloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
-void siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
+int siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
 struct ib_qp *siw_create_qp(struct ib_pd *base_pd,
                            struct ib_qp_init_attr *attr,
                            struct ib_udata *udata);
@@ -62,7 +62,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
                  const struct ib_send_wr **bad_wr);
 int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
                     const struct ib_recv_wr **bad_wr);
-void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata);
+int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata);
 int siw_poll_cq(struct ib_cq *base_cq, int num_entries, struct ib_wc *wc);
 int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags);
 struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, u64 len,
@@ -78,7 +78,7 @@ int siw_create_srq(struct ib_srq *base_srq, struct ib_srq_init_attr *attr,
 int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr,
                   enum ib_srq_attr_mask mask, struct ib_udata *udata);
 int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr);
-void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata);
+int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata);
 int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr,
                      const struct ib_recv_wr **bad_wr);
 int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma);
index 7c41fb0..8f0b598 100644 (file)
@@ -1647,17 +1647,13 @@ int ipoib_cm_dev_init(struct net_device *dev)
 void ipoib_cm_dev_cleanup(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = ipoib_priv(dev);
-       int ret;
 
        if (!priv->cm.srq)
                return;
 
        ipoib_dbg(priv, "Cleanup ipoib connected mode.\n");
 
-       ret = ib_destroy_srq(priv->cm.srq);
-       if (ret)
-               ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret);
-
+       ib_destroy_srq(priv->cm.srq);
        priv->cm.srq = NULL;
        if (!priv->cm.srq_ring)
                return;
index 64c19f6..12ba7a0 100644 (file)
@@ -124,35 +124,14 @@ static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr)
        return 0;
 }
 
-static const struct seq_operations ipoib_mcg_seq_ops = {
+static const struct seq_operations ipoib_mcg_sops = {
        .start = ipoib_mcg_seq_start,
        .next  = ipoib_mcg_seq_next,
        .stop  = ipoib_mcg_seq_stop,
        .show  = ipoib_mcg_seq_show,
 };
 
-static int ipoib_mcg_open(struct inode *inode, struct file *file)
-{
-       struct seq_file *seq;
-       int ret;
-
-       ret = seq_open(file, &ipoib_mcg_seq_ops);
-       if (ret)
-               return ret;
-
-       seq = file->private_data;
-       seq->private = inode->i_private;
-
-       return 0;
-}
-
-static const struct file_operations ipoib_mcg_fops = {
-       .owner   = THIS_MODULE,
-       .open    = ipoib_mcg_open,
-       .read    = seq_read,
-       .llseek  = seq_lseek,
-       .release = seq_release
-};
+DEFINE_SEQ_ATTRIBUTE(ipoib_mcg);
 
 static void *ipoib_path_seq_start(struct seq_file *file, loff_t *pos)
 {
@@ -229,35 +208,14 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
        return 0;
 }
 
-static const struct seq_operations ipoib_path_seq_ops = {
+static const struct seq_operations ipoib_path_sops = {
        .start = ipoib_path_seq_start,
        .next  = ipoib_path_seq_next,
        .stop  = ipoib_path_seq_stop,
        .show  = ipoib_path_seq_show,
 };
 
-static int ipoib_path_open(struct inode *inode, struct file *file)
-{
-       struct seq_file *seq;
-       int ret;
-
-       ret = seq_open(file, &ipoib_path_seq_ops);
-       if (ret)
-               return ret;
-
-       seq = file->private_data;
-       seq->private = inode->i_private;
-
-       return 0;
-}
-
-static const struct file_operations ipoib_path_fops = {
-       .owner   = THIS_MODULE,
-       .open    = ipoib_path_open,
-       .read    = seq_read,
-       .llseek  = seq_lseek,
-       .release = seq_release
-};
+DEFINE_SEQ_ATTRIBUTE(ipoib_path);
 
 void ipoib_create_debug_files(struct net_device *dev)
 {
index ab75b7f..96b6be5 100644 (file)
@@ -2477,6 +2477,8 @@ static struct net_device *ipoib_add_port(const char *format,
        /* call event handler to ensure pkey in sync */
        queue_work(ipoib_workqueue, &priv->flush_heavy);
 
+       ndev->rtnl_link_ops = ipoib_get_link_ops();
+
        result = register_netdev(ndev);
        if (result) {
                pr_warn("%s: couldn't register ipoib port %d; error %d\n",
index 38c984d..d5a90a6 100644 (file)
@@ -144,6 +144,16 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
        return 0;
 }
 
+static void ipoib_del_child_link(struct net_device *dev, struct list_head *head)
+{
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+       if (!priv->parent)
+               return;
+
+       unregister_netdevice_queue(dev, head);
+}
+
 static size_t ipoib_get_size(const struct net_device *dev)
 {
        return nla_total_size(2) +      /* IFLA_IPOIB_PKEY   */
@@ -158,6 +168,7 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
        .priv_size      = sizeof(struct ipoib_dev_priv),
        .setup          = ipoib_setup_common,
        .newlink        = ipoib_new_child_link,
+       .dellink        = ipoib_del_child_link,
        .changelink     = ipoib_changelink,
        .get_size       = ipoib_get_size,
        .fill_info      = ipoib_fill_info,
index 3086560..4c50a87 100644 (file)
@@ -195,6 +195,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
        }
        priv = ipoib_priv(ndev);
 
+       ndev->rtnl_link_ops = ipoib_get_link_ops();
+
        result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
 
        if (result && ndev->reg_state == NETREG_UNINITIALIZED)
index 695f701..436e17f 100644 (file)
@@ -1141,12 +1141,7 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn,
         * multiple data-outs on the same command can arrive -
         * so post the buffer before hand
         */
-       rc = isert_post_recv(isert_conn, rx_desc);
-       if (rc) {
-               isert_err("ib_post_recv failed with %d\n", rc);
-               return rc;
-       }
-       return 0;
+       return isert_post_recv(isert_conn, rx_desc);
 }
 
 static int
@@ -1723,10 +1718,8 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
        int ret;
 
        ret = isert_post_recv(isert_conn, isert_cmd->rx_desc);
-       if (ret) {
-               isert_err("ib_post_recv failed with %d\n", ret);
+       if (ret)
                return ret;
-       }
 
        ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, NULL);
        if (ret) {
@@ -2098,10 +2091,8 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
                                   &isert_cmd->tx_desc.send_wr);
 
                rc = isert_post_recv(isert_conn, isert_cmd->rx_desc);
-               if (rc) {
-                       isert_err("ib_post_recv failed with %d\n", rc);
+               if (rc)
                        return rc;
-               }
 
                chain_wr = &isert_cmd->tx_desc.send_wr;
        }
index 298b747..ac4c49c 100644 (file)
@@ -312,7 +312,7 @@ static struct attribute *rtrs_clt_stats_attrs[] = {
        NULL
 };
 
-static struct attribute_group rtrs_clt_stats_attr_group = {
+static const struct attribute_group rtrs_clt_stats_attr_group = {
        .attrs = rtrs_clt_stats_attrs,
 };
 
@@ -388,7 +388,7 @@ static struct attribute *rtrs_clt_sess_attrs[] = {
        NULL,
 };
 
-static struct attribute_group rtrs_clt_sess_attr_group = {
+static const struct attribute_group rtrs_clt_sess_attr_group = {
        .attrs = rtrs_clt_sess_attrs,
 };
 
@@ -460,7 +460,7 @@ static struct attribute *rtrs_clt_attrs[] = {
        NULL,
 };
 
-static struct attribute_group rtrs_clt_attr_group = {
+static const struct attribute_group rtrs_clt_attr_group = {
        .attrs = rtrs_clt_attrs,
 };
 
index 0a93c87..b8e43dc 100644 (file)
@@ -115,7 +115,6 @@ struct rtrs_sess {
 
 /* rtrs information unit */
 struct rtrs_iu {
-       struct list_head        list;
        struct ib_cqe           cqe;
        dma_addr_t              dma_addr;
        void                    *buf;
index cf6a2be..07fbb06 100644 (file)
@@ -135,7 +135,7 @@ static struct attribute *rtrs_srv_sess_attrs[] = {
        NULL,
 };
 
-static struct attribute_group rtrs_srv_sess_attr_group = {
+static const struct attribute_group rtrs_srv_sess_attr_group = {
        .attrs = rtrs_srv_sess_attrs,
 };
 
@@ -148,7 +148,7 @@ static struct attribute *rtrs_srv_stats_attrs[] = {
        NULL,
 };
 
-static struct attribute_group rtrs_srv_stats_attr_group = {
+static const struct attribute_group rtrs_srv_stats_attr_group = {
        .attrs = rtrs_srv_stats_attrs,
 };
 
index 28f6414..d6f9360 100644 (file)
@@ -16,6 +16,7 @@
 #include "rtrs-srv.h"
 #include "rtrs-log.h"
 #include <rdma/ib_cm.h>
+#include <rdma/ib_verbs.h>
 
 MODULE_DESCRIPTION("RDMA Transport Server");
 MODULE_LICENSE("GPL");
@@ -31,6 +32,7 @@ MODULE_LICENSE("GPL");
 static struct rtrs_rdma_dev_pd dev_pd;
 static mempool_t *chunk_pool;
 struct class *rtrs_dev_class;
+static struct rtrs_srv_ib_ctx ib_ctx;
 
 static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE;
 static int __read_mostly sess_queue_depth = DEFAULT_SESS_QUEUE_DEPTH;
@@ -2042,6 +2044,70 @@ static void free_srv_ctx(struct rtrs_srv_ctx *ctx)
        kfree(ctx);
 }
 
+static int rtrs_srv_add_one(struct ib_device *device)
+{
+       struct rtrs_srv_ctx *ctx;
+       int ret = 0;
+
+       mutex_lock(&ib_ctx.ib_dev_mutex);
+       if (ib_ctx.ib_dev_count)
+               goto out;
+
+       /*
+        * Since our CM IDs are NOT bound to any ib device we will create them
+        * only once
+        */
+       ctx = ib_ctx.srv_ctx;
+       ret = rtrs_srv_rdma_init(ctx, ib_ctx.port);
+       if (ret) {
+               /*
+                * We errored out here.
+                * According to the ib code, if we encounter an error here then the
+                * error code is ignored, and no more calls to our ops are made.
+                */
+               pr_err("Failed to initialize RDMA connection");
+               goto err_out;
+       }
+
+out:
+       /*
+        * Keep a track on the number of ib devices added
+        */
+       ib_ctx.ib_dev_count++;
+
+err_out:
+       mutex_unlock(&ib_ctx.ib_dev_mutex);
+       return ret;
+}
+
+static void rtrs_srv_remove_one(struct ib_device *device, void *client_data)
+{
+       struct rtrs_srv_ctx *ctx;
+
+       mutex_lock(&ib_ctx.ib_dev_mutex);
+       ib_ctx.ib_dev_count--;
+
+       if (ib_ctx.ib_dev_count)
+               goto out;
+
+       /*
+        * Since our CM IDs are NOT bound to any ib device we will remove them
+        * only once, when the last device is removed
+        */
+       ctx = ib_ctx.srv_ctx;
+       rdma_destroy_id(ctx->cm_id_ip);
+       rdma_destroy_id(ctx->cm_id_ib);
+
+out:
+       mutex_unlock(&ib_ctx.ib_dev_mutex);
+}
+
+static struct ib_client rtrs_srv_client = {
+       .name   = "rtrs_server",
+       .add    = rtrs_srv_add_one,
+       .remove = rtrs_srv_remove_one
+};
+
 /**
  * rtrs_srv_open() - open RTRS server context
  * @ops:               callback functions
@@ -2060,7 +2126,11 @@ struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port)
        if (!ctx)
                return ERR_PTR(-ENOMEM);
 
-       err = rtrs_srv_rdma_init(ctx, port);
+       mutex_init(&ib_ctx.ib_dev_mutex);
+       ib_ctx.srv_ctx = ctx;
+       ib_ctx.port = port;
+
+       err = ib_register_client(&rtrs_srv_client);
        if (err) {
                free_srv_ctx(ctx);
                return ERR_PTR(err);
@@ -2099,8 +2169,8 @@ static void close_ctx(struct rtrs_srv_ctx *ctx)
  */
 void rtrs_srv_close(struct rtrs_srv_ctx *ctx)
 {
-       rdma_destroy_id(ctx->cm_id_ip);
-       rdma_destroy_id(ctx->cm_id_ib);
+       ib_unregister_client(&rtrs_srv_client);
+       mutex_destroy(&ib_ctx.ib_dev_mutex);
        close_ctx(ctx);
        free_srv_ctx(ctx);
 }
index dc95b09..08b0b8a 100644 (file)
@@ -118,6 +118,13 @@ struct rtrs_srv_ctx {
        struct list_head srv_list;
 };
 
+struct rtrs_srv_ib_ctx {
+       struct rtrs_srv_ctx     *srv_ctx;
+       u16                     port;
+       struct mutex            ib_dev_mutex;
+       int                     ib_dev_count;
+};
+
 extern struct class *rtrs_dev_class;
 
 void close_sess(struct rtrs_srv_sess *sess);
index 1eef66e..cac8f08 100644 (file)
@@ -130,14 +130,6 @@ static int mlx5i_flash_device(struct net_device *netdev,
        return mlx5e_ethtool_flash_device(priv, flash);
 }
 
-enum mlx5_ptys_width {
-       MLX5_PTYS_WIDTH_1X      = 1 << 0,
-       MLX5_PTYS_WIDTH_2X      = 1 << 1,
-       MLX5_PTYS_WIDTH_4X      = 1 << 2,
-       MLX5_PTYS_WIDTH_8X      = 1 << 3,
-       MLX5_PTYS_WIDTH_12X     = 1 << 4,
-};
-
 static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width)
 {
        switch (width) {
@@ -174,24 +166,6 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
        }
 }
 
-static int mlx5i_get_port_settings(struct net_device *netdev,
-                                  u16 *ib_link_width_oper, u16 *ib_proto_oper)
-{
-       struct mlx5e_priv *priv    = mlx5i_epriv(netdev);
-       struct mlx5_core_dev *mdev = priv->mdev;
-       u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
-       int ret;
-
-       ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1);
-       if (ret)
-               return ret;
-
-       *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
-       *ib_proto_oper      = MLX5_GET(ptys_reg, out, ib_proto_oper);
-
-       return 0;
-}
-
 static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
 {
        int rate, width;
@@ -209,11 +183,14 @@ static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
 static int mlx5i_get_link_ksettings(struct net_device *netdev,
                                    struct ethtool_link_ksettings *link_ksettings)
 {
+       struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
        u16 ib_link_width_oper;
        u16 ib_proto_oper;
        int speed, ret;
 
-       ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper);
+       ret = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, &ib_proto_oper,
+                                     1);
        if (ret)
                return ret;
 
index e4186e8..4bb2195 100644 (file)
@@ -154,24 +154,8 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration)
                                    sizeof(out), MLX5_REG_MLCR, 0, 1);
 }
 
-int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
-                                   u8 *link_width_oper, u8 local_port)
-{
-       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-       int err;
-
-       err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port);
-       if (err)
-               return err;
-
-       *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
-
-int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
-                                 u8 *proto_oper, u8 local_port)
+int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper,
+                           u16 *proto_oper, u8 local_port)
 {
        u32 out[MLX5_ST_SZ_DW(ptys_reg)];
        int err;
@@ -181,11 +165,12 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
        if (err)
                return err;
 
+       *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
        *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
 
        return 0;
 }
-EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
+EXPORT_SYMBOL(mlx5_query_ib_port_oper);
 
 /* This function should be used after setting a port register only */
 void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
index a4bcde5..0df6e05 100644 (file)
@@ -504,7 +504,8 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn,
        dev->max_mw = 0;
        dev->max_mr_mw_fmr_pbl = (PAGE_SIZE / 8) * (PAGE_SIZE / 8);
        dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * PAGE_SIZE;
-       dev->max_pkey = QED_RDMA_MAX_P_KEY;
+       if (QED_IS_ROCE_PERSONALITY(p_hwfn))
+               dev->max_pkey = QED_RDMA_MAX_P_KEY;
 
        dev->max_srq = p_hwfn->p_rdma_info->num_srqs;
        dev->max_srq_wr = QED_RDMA_MAX_SRQ_WQE_ELEM;
@@ -1520,7 +1521,7 @@ qed_rdma_register_tid(void *rdma_cxt,
                  params->pbl_two_level);
 
        SET_FIELD(flags, RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED,
-                 params->zbva);
+                 false);
 
        SET_FIELD(flags, RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR, params->phy_mr);
 
@@ -1582,15 +1583,7 @@ qed_rdma_register_tid(void *rdma_cxt,
        p_ramrod->pd = cpu_to_le16(params->pd);
        p_ramrod->length_hi = (u8)(params->length >> 32);
        p_ramrod->length_lo = DMA_LO_LE(params->length);
-       if (params->zbva) {
-               /* Lower 32 bits of the registered MR address.
-                * In case of zero based MR, will hold FBO
-                */
-               p_ramrod->va.hi = 0;
-               p_ramrod->va.lo = cpu_to_le32(params->fbo);
-       } else {
-               DMA_REGPAIR_LE(p_ramrod->va, params->vaddr);
-       }
+       DMA_REGPAIR_LE(p_ramrod->va, params->vaddr);
        DMA_REGPAIR_LE(p_ramrod->pbl_base, params->pbl_ptr);
 
        /* DIF */
index b9aa638..bedbb85 100644 (file)
@@ -1026,7 +1026,9 @@ int qede_change_mtu(struct net_device *ndev, int new_mtu)
        args.u.mtu = new_mtu;
        args.func = &qede_update_mtu;
        qede_reload(edev, &args, false);
-
+#if IS_ENABLED(CONFIG_QED_RDMA)
+       qede_rdma_event_change_mtu(edev);
+#endif
        edev->ops->common->update_mtu(edev->cdev, new_mtu);
 
        return 0;
index 769ec2f..2f65980 100644 (file)
@@ -234,6 +234,15 @@ static void qede_rdma_changeaddr(struct qede_dev *edev)
                qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CHANGE_ADDR);
 }
 
+static void qede_rdma_change_mtu(struct qede_dev *edev)
+{
+       if (qede_rdma_supported(edev)) {
+               if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+                       qedr_drv->notify(edev->rdma_info.qedr_dev,
+                                        QEDE_CHANGE_MTU);
+       }
+}
+
 static struct qede_rdma_event_work *
 qede_rdma_get_free_event_node(struct qede_dev *edev)
 {
@@ -287,6 +296,9 @@ static void qede_rdma_handle_event(struct work_struct *work)
        case QEDE_CHANGE_ADDR:
                qede_rdma_changeaddr(edev);
                break;
+       case QEDE_CHANGE_MTU:
+               qede_rdma_change_mtu(edev);
+               break;
        default:
                DP_NOTICE(edev, "Invalid rdma event %d", event);
        }
@@ -338,3 +350,8 @@ void qede_rdma_event_changeaddr(struct qede_dev *edev)
 {
        qede_rdma_add_event(edev, QEDE_CHANGE_ADDR);
 }
+
+void qede_rdma_event_change_mtu(struct qede_dev *edev)
+{
+       qede_rdma_add_event(edev, QEDE_CHANGE_MTU);
+}
index de1ffb4..651591a 100644 (file)
@@ -420,7 +420,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
        u8         reserved_at_1a[0x2];
        u8         ipsec_encrypt[0x1];
        u8         ipsec_decrypt[0x1];
-       u8         reserved_at_1e[0x2];
+       u8         sw_owner_v2[0x1];
+       u8         reserved_at_1f[0x1];
 
        u8         termination_table_raw_traffic[0x1];
        u8         reserved_at_21[0x1];
@@ -1430,7 +1431,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
        u8         log_bf_reg_size[0x5];
 
-       u8         reserved_at_270[0x8];
+       u8         reserved_at_270[0x6];
+       u8         lag_dct[0x2];
        u8         lag_tx_port_affinity[0x1];
        u8         reserved_at_279[0x2];
        u8         lag_master[0x1];
index 2d45a6a..23edd2d 100644 (file)
@@ -125,6 +125,14 @@ enum mlx5e_connector_type {
        MLX5E_CONNECTOR_TYPE_NUMBER,
 };
 
+enum mlx5_ptys_width {
+       MLX5_PTYS_WIDTH_1X      = 1 << 0,
+       MLX5_PTYS_WIDTH_2X      = 1 << 1,
+       MLX5_PTYS_WIDTH_4X      = 1 << 2,
+       MLX5_PTYS_WIDTH_8X      = 1 << 3,
+       MLX5_PTYS_WIDTH_12X     = 1 << 4,
+};
+
 #define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
 #define MLX5_GET_ETH_PROTO(reg, out, ext, field)       \
        (ext ? MLX5_GET(reg, out, ext_##field) :        \
@@ -133,10 +141,9 @@ enum mlx5e_connector_type {
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
                         int ptys_size, int proto_mask, u8 local_port);
-int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
-                                   u8 *link_width_oper, u8 local_port);
-int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
-                                 u8 *proto_oper, u8 local_port);
+
+int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper,
+                           u16 *proto_oper, u8 local_port);
 void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
 int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
                               enum mlx5_port_status status);
index 93fcef1..ff3c48f 100644 (file)
@@ -3,6 +3,7 @@
 #define __LINUX_OVERFLOW_H
 
 #include <linux/compiler.h>
+#include <linux/limits.h>
 
 /*
  * In the fallback code below, we need to compute the minimum and
index f464d85..aeb242c 100644 (file)
@@ -242,10 +242,8 @@ struct qed_rdma_register_tid_in_params {
        bool pbl_two_level;
        u8 pbl_page_size_log;
        u8 page_size_log;
-       u32 fbo;
        u64 length;
        u64 vaddr;
-       bool zbva;
        bool phy_mr;
        bool dma_mr;
 
index 072da2f..0d5564a 100644 (file)
@@ -20,7 +20,8 @@ enum qede_rdma_event {
        QEDE_UP,
        QEDE_DOWN,
        QEDE_CHANGE_ADDR,
-       QEDE_CLOSE
+       QEDE_CLOSE,
+       QEDE_CHANGE_MTU,
 };
 
 struct qede_rdma_event_work {
@@ -54,6 +55,7 @@ void qede_rdma_dev_event_open(struct qede_dev *dev);
 void qede_rdma_dev_event_close(struct qede_dev *dev);
 void qede_rdma_dev_remove(struct qede_dev *dev, bool recovery);
 void qede_rdma_event_changeaddr(struct qede_dev *edr);
+void qede_rdma_event_change_mtu(struct qede_dev *edev);
 
 #else
 static inline int qede_rdma_dev_add(struct qede_dev *dev,
index 66a8f36..bae29f5 100644 (file)
@@ -110,5 +110,8 @@ const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device,
                                            u8 port_num, int index);
 void rdma_put_gid_attr(const struct ib_gid_attr *attr);
 void rdma_hold_gid_attr(const struct ib_gid_attr *attr);
+ssize_t rdma_query_gid_table(struct ib_device *device,
+                            struct ib_uverbs_gid_entry *entries,
+                            size_t max_entries);
 
 #endif /* _IB_CACHE_H */
index 382427a..e23eb35 100644 (file)
@@ -14,9 +14,6 @@
 #include <rdma/ib_sa.h>
 #include <rdma/rdma_cm.h>
 
-/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */
-extern struct class cm_class;
-
 enum ib_cm_state {
        IB_CM_IDLE,
        IB_CM_LISTEN,
index 71f573a..7059750 100644 (file)
@@ -17,6 +17,7 @@ struct ib_umem_odp;
 struct ib_umem {
        struct ib_device       *ibdev;
        struct mm_struct       *owning_mm;
+       u64 iova;
        size_t                  length;
        unsigned long           address;
        u32 writable : 1;
@@ -33,19 +34,46 @@ static inline int ib_umem_offset(struct ib_umem *umem)
        return umem->address & ~PAGE_MASK;
 }
 
+static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
+                                           unsigned long pgsz)
+{
+       return (size_t)((ALIGN(umem->iova + umem->length, pgsz) -
+                        ALIGN_DOWN(umem->iova, pgsz))) /
+              pgsz;
+}
+
 static inline size_t ib_umem_num_pages(struct ib_umem *umem)
 {
-       return (ALIGN(umem->address + umem->length, PAGE_SIZE) -
-               ALIGN_DOWN(umem->address, PAGE_SIZE)) >>
-              PAGE_SHIFT;
+       return ib_umem_num_dma_blocks(umem, PAGE_SIZE);
+}
+
+static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
+                                               struct ib_umem *umem,
+                                               unsigned long pgsz)
+{
+       __rdma_block_iter_start(biter, umem->sg_head.sgl, umem->nmap, pgsz);
 }
 
+/**
+ * rdma_umem_for_each_dma_block - iterate over contiguous DMA blocks of the umem
+ * @umem: umem to iterate over
+ * @pgsz: Page size to split the list into
+ *
+ * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The
+ * returned DMA blocks will be aligned to pgsz and span the range:
+ * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz)
+ *
+ * Performs exactly ib_umem_num_dma_blocks() iterations.
+ */
+#define rdma_umem_for_each_dma_block(umem, biter, pgsz)                        \
+       for (__rdma_umem_block_iter_start(biter, umem, pgsz);                  \
+            __rdma_block_iter_next(biter);)
+
 #ifdef CONFIG_INFINIBAND_USER_MEM
 
 struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
                            size_t size, int access);
 void ib_umem_release(struct ib_umem *umem);
-int ib_umem_page_count(struct ib_umem *umem);
 int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
                      size_t length);
 unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
@@ -63,15 +91,15 @@ static inline struct ib_umem *ib_umem_get(struct ib_device *device,
        return ERR_PTR(-EINVAL);
 }
 static inline void ib_umem_release(struct ib_umem *umem) { }
-static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
 static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
                                    size_t length) {
        return -EINVAL;
 }
-static inline int ib_umem_find_best_pgsz(struct ib_umem *umem,
-                                        unsigned long pgsz_bitmap,
-                                        unsigned long virt) {
-       return -EINVAL;
+static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+                                                  unsigned long pgsz_bitmap,
+                                                  unsigned long virt)
+{
+       return 0;
 }
 
 #endif /* CONFIG_INFINIBAND_USER_MEM */
index d16d2c1..0844c1d 100644 (file)
@@ -14,17 +14,13 @@ struct ib_umem_odp {
        struct mmu_interval_notifier notifier;
        struct pid *tgid;
 
+       /* An array of the pfns included in the on-demand paging umem. */
+       unsigned long *pfn_list;
+
        /*
-        * An array of the pages included in the on-demand paging umem.
-        * Indices of pages that are currently not mapped into the device will
-        * contain NULL.
-        */
-       struct page             **page_list;
-       /*
-        * An array of the same size as page_list, with DMA addresses mapped
-        * for pages the pages in page_list. The lower two bits designate
-        * access permissions. See ODP_READ_ALLOWED_BIT and
-        * ODP_WRITE_ALLOWED_BIT.
+        * An array with DMA addresses mapped for pfns in pfn_list.
+        * The lower two bits designate access permissions.
+        * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT.
         */
        dma_addr_t              *dma_list;
        /*
@@ -97,9 +93,8 @@ ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr,
                        const struct mmu_interval_notifier_ops *ops);
 void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
 
-int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
-                             u64 bcnt, u64 access_mask,
-                             unsigned long current_seq);
+int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 start_offset,
+                                u64 bcnt, u64 access_mask, bool fault);
 
 void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
                                 u64 bound);
index c0b2fa7..ce935d7 100644 (file)
@@ -138,10 +138,9 @@ union ib_gid {
 extern union ib_gid zgid;
 
 enum ib_gid_type {
-       /* If link layer is Ethernet, this is RoCE V1 */
-       IB_GID_TYPE_IB        = 0,
-       IB_GID_TYPE_ROCE      = 0,
-       IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
+       IB_GID_TYPE_IB = IB_UVERBS_GID_TYPE_IB,
+       IB_GID_TYPE_ROCE = IB_UVERBS_GID_TYPE_ROCE_V1,
+       IB_GID_TYPE_ROCE_UDP_ENCAP = IB_UVERBS_GID_TYPE_ROCE_V2,
        IB_GID_TYPE_SIZE
 };
 
@@ -180,7 +179,7 @@ rdma_node_get_transport(unsigned int node_type);
 
 enum rdma_network_type {
        RDMA_NETWORK_IB,
-       RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
+       RDMA_NETWORK_ROCE_V1,
        RDMA_NETWORK_IPV4,
        RDMA_NETWORK_IPV6
 };
@@ -190,9 +189,10 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net
        if (network_type == RDMA_NETWORK_IPV4 ||
            network_type == RDMA_NETWORK_IPV6)
                return IB_GID_TYPE_ROCE_UDP_ENCAP;
-
-       /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
-       return IB_GID_TYPE_IB;
+       else if (network_type == RDMA_NETWORK_ROCE_V1)
+               return IB_GID_TYPE_ROCE;
+       else
+               return IB_GID_TYPE_IB;
 }
 
 static inline enum rdma_network_type
@@ -201,6 +201,9 @@ rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
        if (attr->gid_type == IB_GID_TYPE_IB)
                return RDMA_NETWORK_IB;
 
+       if (attr->gid_type == IB_GID_TYPE_ROCE)
+               return RDMA_NETWORK_ROCE_V1;
+
        if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
                return RDMA_NETWORK_IPV4;
        else
@@ -535,7 +538,8 @@ enum ib_port_speed {
        IB_SPEED_FDR10  = 8,
        IB_SPEED_FDR    = 16,
        IB_SPEED_EDR    = 32,
-       IB_SPEED_HDR    = 64
+       IB_SPEED_HDR    = 64,
+       IB_SPEED_NDR    = 128,
 };
 
 /**
@@ -669,7 +673,7 @@ struct ib_port_attr {
        u8                      subnet_timeout;
        u8                      init_type_reply;
        u8                      active_width;
-       u                     active_speed;
+       u16                     active_speed;
        u8                      phys_state;
        u16                     port_cap_flags2;
 };
@@ -952,13 +956,14 @@ enum ib_wc_status {
 const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
 
 enum ib_wc_opcode {
-       IB_WC_SEND,
-       IB_WC_RDMA_WRITE,
-       IB_WC_RDMA_READ,
-       IB_WC_COMP_SWAP,
-       IB_WC_FETCH_ADD,
-       IB_WC_LSO,
-       IB_WC_LOCAL_INV,
+       IB_WC_SEND = IB_UVERBS_WC_SEND,
+       IB_WC_RDMA_WRITE = IB_UVERBS_WC_RDMA_WRITE,
+       IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ,
+       IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP,
+       IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD,
+       IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
+       IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
+       IB_WC_LSO = IB_UVERBS_WC_TSO,
        IB_WC_REG_MR,
        IB_WC_MASKED_COMP_SWAP,
        IB_WC_MASKED_FETCH_ADD,
@@ -1291,6 +1296,7 @@ enum ib_wr_opcode {
        IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
        IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
        IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
+       IB_WR_BIND_MW = IB_UVERBS_WR_BIND_MW,
        IB_WR_LSO = IB_UVERBS_WR_TSO,
        IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
        IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
@@ -1463,11 +1469,6 @@ enum rdma_remove_reason {
        RDMA_REMOVE_DRIVER_REMOVE,
        /* uobj is being cleaned-up before being committed */
        RDMA_REMOVE_ABORT,
-       /*
-        * uobj has been fully created, with the uobj->object set, but is being
-        * cleaned up before being comitted
-        */
-       RDMA_REMOVE_ABORT_HWOBJ,
 };
 
 struct ib_rdmacg_object {
@@ -1479,12 +1480,6 @@ struct ib_rdmacg_object {
 struct ib_ucontext {
        struct ib_device       *device;
        struct ib_uverbs_file  *ufile;
-       /*
-        * 'closing' can be read by the driver only during a destroy callback,
-        * it is set when we are closing the file descriptor and indicates
-        * that mm_sem may be locked.
-        */
-       bool closing;
 
        bool cleanup_retryable;
 
@@ -1863,17 +1858,6 @@ enum ib_flow_spec_type {
 #define IB_FLOW_SPEC_LAYER_MASK        0xF0
 #define IB_FLOW_SPEC_SUPPORT_LAYERS 10
 
-/* Flow steering rule priority is set according to it's domain.
- * Lower domain value means higher priority.
- */
-enum ib_flow_domain {
-       IB_FLOW_DOMAIN_USER,
-       IB_FLOW_DOMAIN_ETHTOOL,
-       IB_FLOW_DOMAIN_RFS,
-       IB_FLOW_DOMAIN_NIC,
-       IB_FLOW_DOMAIN_NUM /* Must be last */
-};
-
 enum ib_flow_flags {
        IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
        IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */
@@ -2414,12 +2398,12 @@ struct ib_device_ops {
        void (*mmap_free)(struct rdma_user_mmap_entry *entry);
        void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
        int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
-       void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+       int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
        int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
                         struct ib_udata *udata);
        int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
        int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
-       void (*destroy_ah)(struct ib_ah *ah, u32 flags);
+       int (*destroy_ah)(struct ib_ah *ah, u32 flags);
        int (*create_srq)(struct ib_srq *srq,
                          struct ib_srq_init_attr *srq_init_attr,
                          struct ib_udata *udata);
@@ -2427,7 +2411,7 @@ struct ib_device_ops {
                          enum ib_srq_attr_mask srq_attr_mask,
                          struct ib_udata *udata);
        int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-       void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
+       int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
        struct ib_qp *(*create_qp)(struct ib_pd *pd,
                                   struct ib_qp_init_attr *qp_init_attr,
                                   struct ib_udata *udata);
@@ -2439,7 +2423,7 @@ struct ib_device_ops {
        int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
                         struct ib_udata *udata);
        int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
-       void (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
+       int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
        int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
        struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
        struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
@@ -2462,16 +2446,15 @@ struct ib_device_ops {
                         unsigned int *sg_offset);
        int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
                               struct ib_mr_status *mr_status);
-       struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
-                                 struct ib_udata *udata);
+       int (*alloc_mw)(struct ib_mw *mw, struct ib_udata *udata);
        int (*dealloc_mw)(struct ib_mw *mw);
        int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
        int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
        int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
-       void (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
+       int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
        struct ib_flow *(*create_flow)(struct ib_qp *qp,
                                       struct ib_flow_attr *flow_attr,
-                                      int domain, struct ib_udata *udata);
+                                      struct ib_udata *udata);
        int (*destroy_flow)(struct ib_flow *flow_id);
        struct ib_flow_action *(*create_flow_action_esp)(
                struct ib_device *device,
@@ -2496,13 +2479,12 @@ struct ib_device_ops {
        struct ib_wq *(*create_wq)(struct ib_pd *pd,
                                   struct ib_wq_init_attr *init_attr,
                                   struct ib_udata *udata);
-       void (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
+       int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
        int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
                         u32 wq_attr_mask, struct ib_udata *udata);
-       struct ib_rwq_ind_table *(*create_rwq_ind_table)(
-               struct ib_device *device,
-               struct ib_rwq_ind_table_init_attr *init_attr,
-               struct ib_udata *udata);
+       int (*create_rwq_ind_table)(struct ib_rwq_ind_table *ib_rwq_ind_table,
+                                   struct ib_rwq_ind_table_init_attr *init_attr,
+                                   struct ib_udata *udata);
        int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
        struct ib_dm *(*alloc_dm)(struct ib_device *device,
                                  struct ib_ucontext *context,
@@ -2514,7 +2496,7 @@ struct ib_device_ops {
                                   struct uverbs_attr_bundle *attrs);
        int (*create_counters)(struct ib_counters *counters,
                               struct uverbs_attr_bundle *attrs);
-       void (*destroy_counters)(struct ib_counters *counters);
+       int (*destroy_counters)(struct ib_counters *counters);
        int (*read_counters)(struct ib_counters *counters,
                             struct ib_counters_read_attr *counters_read_attr,
                             struct uverbs_attr_bundle *attrs);
@@ -2624,7 +2606,9 @@ struct ib_device_ops {
        DECLARE_RDMA_OBJ_SIZE(ib_ah);
        DECLARE_RDMA_OBJ_SIZE(ib_counters);
        DECLARE_RDMA_OBJ_SIZE(ib_cq);
+       DECLARE_RDMA_OBJ_SIZE(ib_mw);
        DECLARE_RDMA_OBJ_SIZE(ib_pd);
+       DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table);
        DECLARE_RDMA_OBJ_SIZE(ib_srq);
        DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
        DECLARE_RDMA_OBJ_SIZE(ib_xrcd);
@@ -3351,30 +3335,6 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
        return rdma_protocol_iwarp(dev, port_num);
 }
 
-/**
- * rdma_find_pg_bit - Find page bit given address and HW supported page sizes
- *
- * @addr: address
- * @pgsz_bitmap: bitmap of HW supported page sizes
- */
-static inline unsigned int rdma_find_pg_bit(unsigned long addr,
-                                           unsigned long pgsz_bitmap)
-{
-       unsigned long align;
-       unsigned long pgsz;
-
-       align = addr & -addr;
-
-       /* Find page bit such that addr is aligned to the highest supported
-        * HW page size
-        */
-       pgsz = pgsz_bitmap & ~(-align << 1);
-       if (!pgsz)
-               return __ffs(pgsz_bitmap);
-
-       return __fls(pgsz);
-}
-
 /**
  * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
  * @device: Device
@@ -3472,12 +3432,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
 #define ib_alloc_pd(device, flags) \
        __ib_alloc_pd((device), (flags), KBUILD_MODNAME)
 
-/**
- * ib_dealloc_pd_user - Deallocate kernel/user PD
- * @pd: The protection domain
- * @udata: Valid user data or NULL for kernel objects
- */
-void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
 
 /**
  * ib_dealloc_pd - Deallocate kernel PD
@@ -3487,7 +3442,9 @@ void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
  */
 static inline void ib_dealloc_pd(struct ib_pd *pd)
 {
-       ib_dealloc_pd_user(pd, NULL);
+       int ret = ib_dealloc_pd_user(pd, NULL);
+
+       WARN_ONCE(ret, "Destroy of kernel PD shouldn't fail");
 }
 
 enum rdma_create_ah_flags {
@@ -3615,9 +3572,11 @@ int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
  *
  * NOTE: for user ah use rdma_destroy_ah_user with valid udata!
  */
-static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags)
+static inline void rdma_destroy_ah(struct ib_ah *ah, u32 flags)
 {
-       return rdma_destroy_ah_user(ah, flags, NULL);
+       int ret = rdma_destroy_ah_user(ah, flags, NULL);
+
+       WARN_ONCE(ret, "Destroy of kernel AH shouldn't fail");
 }
 
 struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
@@ -3671,9 +3630,11 @@ int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
  *
  * NOTE: for user srq use ib_destroy_srq_user with valid udata!
  */
-static inline int ib_destroy_srq(struct ib_srq *srq)
+static inline void ib_destroy_srq(struct ib_srq *srq)
 {
-       return ib_destroy_srq_user(srq, NULL);
+       int ret = ib_destroy_srq_user(srq, NULL);
+
+       WARN_ONCE(ret, "Destroy of kernel SRQ shouldn't fail");
 }
 
 /**
@@ -3817,46 +3778,15 @@ static inline int ib_post_recv(struct ib_qp *qp,
        return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
 }
 
-struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
-                                int nr_cqe, int comp_vector,
-                                enum ib_poll_context poll_ctx,
-                                const char *caller, struct ib_udata *udata);
-
-/**
- * ib_alloc_cq_user: Allocate kernel/user CQ
- * @dev: The IB device
- * @private: Private data attached to the CQE
- * @nr_cqe: Number of CQEs in the CQ
- * @comp_vector: Completion vector used for the IRQs
- * @poll_ctx: Context used for polling the CQ
- * @udata: Valid user data or NULL for kernel objects
- */
-static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev,
-                                            void *private, int nr_cqe,
-                                            int comp_vector,
-                                            enum ib_poll_context poll_ctx,
-                                            struct ib_udata *udata)
-{
-       return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
-                                 KBUILD_MODNAME, udata);
-}
-
-/**
- * ib_alloc_cq: Allocate kernel CQ
- * @dev: The IB device
- * @private: Private data attached to the CQE
- * @nr_cqe: Number of CQEs in the CQ
- * @comp_vector: Completion vector used for the IRQs
- * @poll_ctx: Context used for polling the CQ
- *
- * NOTE: for user cq use ib_alloc_cq_user with valid udata!
- */
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+                           int comp_vector, enum ib_poll_context poll_ctx,
+                           const char *caller);
 static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
                                        int nr_cqe, int comp_vector,
                                        enum ib_poll_context poll_ctx)
 {
-       return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
-                               NULL);
+       return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+                            KBUILD_MODNAME);
 }
 
 struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
@@ -3878,26 +3808,7 @@ static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
                                 KBUILD_MODNAME);
 }
 
-/**
- * ib_free_cq_user - Free kernel/user CQ
- * @cq: The CQ to free
- * @udata: Valid user data or NULL for kernel objects
- *
- * NOTE: This function shouldn't be called on shared CQs.
- */
-void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata);
-
-/**
- * ib_free_cq - Free kernel CQ
- * @cq: The CQ to free
- *
- * NOTE: for user cq use ib_free_cq_user with valid udata!
- */
-static inline void ib_free_cq(struct ib_cq *cq)
-{
-       ib_free_cq_user(cq, NULL);
-}
-
+void ib_free_cq(struct ib_cq *cq);
 int ib_process_cq_direct(struct ib_cq *cq, int budget);
 
 /**
@@ -3955,7 +3866,9 @@ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
  */
 static inline void ib_destroy_cq(struct ib_cq *cq)
 {
-       ib_destroy_cq_user(cq, NULL);
+       int ret = ib_destroy_cq_user(cq, NULL);
+
+       WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
 }
 
 /**
@@ -4379,10 +4292,9 @@ struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
 
 struct ib_wq *ib_create_wq(struct ib_pd *pd,
                           struct ib_wq_init_attr *init_attr);
-int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata);
 int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
                 u32 wq_attr_mask);
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
 
 int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
                 unsigned int *sg_offset, unsigned int page_size);
@@ -4410,7 +4322,7 @@ void ib_drain_rq(struct ib_qp *qp);
 void ib_drain_sq(struct ib_qp *qp);
 void ib_drain_qp(struct ib_qp *qp);
 
-int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width);
+int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width);
 
 static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr)
 {
@@ -4717,6 +4629,7 @@ bool rdma_dev_access_netns(const struct ib_device *device,
                           const struct net *net);
 
 #define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF)
 #define IB_GRH_FLOWLABEL_MASK (0x000FFFFF)
 
 /**
index cf5da2a..c672ae1 100644 (file)
@@ -110,11 +110,14 @@ struct rdma_cm_id {
        u8                       port_num;
 };
 
-struct rdma_cm_id *__rdma_create_id(struct net *net,
-                                   rdma_cm_event_handler event_handler,
-                                   void *context, enum rdma_ucm_port_space ps,
-                                   enum ib_qp_type qp_type,
-                                   const char *caller);
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+                       void *context, enum rdma_ucm_port_space ps,
+                       enum ib_qp_type qp_type, const char *caller);
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+                                      void *context,
+                                      enum rdma_ucm_port_space ps,
+                                      enum ib_qp_type qp_type);
 
 /**
  * rdma_create_id - Create an RDMA identifier.
@@ -132,9 +135,9 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
  * The event handler callback serializes on the id's mutex and is
  * allowed to sleep.
  */
-#define rdma_create_id(net, event_handler, context, ps, qp_type) \
-       __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
-                        KBUILD_MODNAME)
+#define rdma_create_id(net, event_handler, context, ps, qp_type)               \
+       __rdma_create_kernel_id(net, event_handler, context, ps, qp_type,      \
+                               KBUILD_MODNAME)
 
 /**
   * rdma_destroy_id - Destroys an RDMA identifier.
@@ -250,29 +253,12 @@ int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
  */
 int rdma_listen(struct rdma_cm_id *id, int backlog);
 
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
-                 const char *caller);
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
 
-int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
-                     const char *caller, struct rdma_ucm_ece *ece);
-
-/**
- * rdma_accept - Called to accept a connection request or response.
- * @id: Connection identifier associated with the request.
- * @conn_param: Information needed to establish the connection.  This must be
- *   provided if accepting a connection request.  If accepting a connection
- *   response, this parameter must be NULL.
- *
- * Typically, this routine is only called by the listener to accept a connection
- * request.  It must also be called on the active side of a connection if the
- * user is performing their own QP transitions.
- *
- * In the case of error, a reject message is sent to the remote side and the
- * state of the qp associated with the id is modified to error, such that any
- * previously posted receive buffers would be flushed.
- */
-#define rdma_accept(id, conn_param) \
-       __rdma_accept((id), (conn_param),  KBUILD_MODNAME)
+void rdma_lock_handler(struct rdma_cm_id *id);
+void rdma_unlock_handler(struct rdma_cm_id *id);
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+                   struct rdma_ucm_ece *ece);
 
 /**
  * rdma_notify - Notifies the RDMA CM of an asynchronous event that has
index 7682d1b..d3a1cc5 100644 (file)
@@ -106,22 +106,11 @@ struct rdma_restrack_entry {
 
 int rdma_restrack_count(struct ib_device *dev,
                        enum rdma_restrack_type type);
-
-void rdma_restrack_kadd(struct rdma_restrack_entry *res);
-void rdma_restrack_uadd(struct rdma_restrack_entry *res);
-
-/**
- * rdma_restrack_del() - delete object from the reource tracking database
- * @res:  resource entry
- * @type: actual type of object to operate
- */
-void rdma_restrack_del(struct rdma_restrack_entry *res);
-
 /**
  * rdma_is_kernel_res() - check the owner of resource
  * @res:  resource entry
  */
-static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res)
+static inline bool rdma_is_kernel_res(const struct rdma_restrack_entry *res)
 {
        return !res->user;
 }
@@ -138,14 +127,6 @@ int __must_check rdma_restrack_get(struct rdma_restrack_entry *res);
  */
 int rdma_restrack_put(struct rdma_restrack_entry *res);
 
-/**
- * rdma_restrack_set_task() - set the task for this resource
- * @res:  resource entry
- * @caller: kernel name, the current task will be used if the caller is NULL.
- */
-void rdma_restrack_set_task(struct rdma_restrack_entry *res,
-                           const char *caller);
-
 /*
  * Helper functions for rdma drivers when filling out
  * nldev driver attributes.
index aa19afc..81bb454 100644 (file)
@@ -6,7 +6,6 @@
 /*
  * enum ib_event_type, from include/rdma/ib_verbs.h
  */
-
 #define IB_EVENT_LIST                          \
        ib_event(CQ_ERR)                        \
        ib_event(QP_FATAL)                      \
@@ -90,6 +89,46 @@ IB_WC_STATUS_LIST
 #define rdma_show_wc_status(x) \
                __print_symbolic(x, IB_WC_STATUS_LIST)
 
+/*
+ * enum ib_cm_event_type, from include/rdma/ib_cm.h
+ */
+#define IB_CM_EVENT_LIST                       \
+       ib_cm_event(REQ_ERROR)                  \
+       ib_cm_event(REQ_RECEIVED)               \
+       ib_cm_event(REP_ERROR)                  \
+       ib_cm_event(REP_RECEIVED)               \
+       ib_cm_event(RTU_RECEIVED)               \
+       ib_cm_event(USER_ESTABLISHED)           \
+       ib_cm_event(DREQ_ERROR)                 \
+       ib_cm_event(DREQ_RECEIVED)              \
+       ib_cm_event(DREP_RECEIVED)              \
+       ib_cm_event(TIMEWAIT_EXIT)              \
+       ib_cm_event(MRA_RECEIVED)               \
+       ib_cm_event(REJ_RECEIVED)               \
+       ib_cm_event(LAP_ERROR)                  \
+       ib_cm_event(LAP_RECEIVED)               \
+       ib_cm_event(APR_RECEIVED)               \
+       ib_cm_event(SIDR_REQ_ERROR)             \
+       ib_cm_event(SIDR_REQ_RECEIVED)          \
+       ib_cm_event_end(SIDR_REP_RECEIVED)
+
+#undef ib_cm_event
+#undef ib_cm_event_end
+
+#define ib_cm_event(x)         TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_event_end(x)     TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_EVENT_LIST
+
+#undef ib_cm_event
+#undef ib_cm_event_end
+
+#define ib_cm_event(x)         { IB_CM_##x, #x },
+#define ib_cm_event_end(x)     { IB_CM_##x, #x }
+
+#define rdma_show_ib_cm_event(x) \
+               __print_symbolic(x, IB_CM_EVENT_LIST)
+
 /*
  * enum rdma_cm_event_type, from include/rdma/rdma_cm.h
  */
index abe9422..b6aad52 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/scatterlist.h>
 #include <linux/sunrpc/rpc_rdma_cid.h>
 #include <linux/tracepoint.h>
+#include <rdma/ib_cm.h>
 #include <trace/events/rdma.h>
 
 /**
index 507a286..f89fbb5 100644 (file)
@@ -105,6 +105,7 @@ struct efa_ibv_create_ah_resp {
 
 enum {
        EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0,
+       EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1,
 };
 
 struct efa_ibv_ex_query_device_resp {
index eb76b38..9ec85f7 100644 (file)
@@ -39,6 +39,8 @@
 struct hns_roce_ib_create_cq {
        __aligned_u64 buf_addr;
        __aligned_u64 db_addr;
+       __u32 cqe_size;
+       __u32 reserved;
 };
 
 struct hns_roce_ib_create_cq_resp {
@@ -73,7 +75,7 @@ struct hns_roce_ib_create_qp_resp {
 
 struct hns_roce_ib_alloc_ucontext_resp {
        __u32   qp_tab_size;
-       __u32   reserved;
+       __u32   cqe_size;
 };
 
 struct hns_roce_ib_alloc_pd_resp {
index 99dcabf..7968a18 100644 (file)
@@ -70,6 +70,8 @@ enum uverbs_methods_device {
        UVERBS_METHOD_QUERY_PORT,
        UVERBS_METHOD_GET_CONTEXT,
        UVERBS_METHOD_QUERY_CONTEXT,
+       UVERBS_METHOD_QUERY_GID_TABLE,
+       UVERBS_METHOD_QUERY_GID_ENTRY,
 };
 
 enum uverbs_attrs_invoke_write_cmd_attr_ids {
@@ -352,4 +354,18 @@ enum uverbs_attrs_async_event_create {
        UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE,
 };
 
+enum uverbs_attrs_query_gid_table_cmd_attr_ids {
+       UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE,
+       UVERBS_ATTR_QUERY_GID_TABLE_FLAGS,
+       UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+       UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+};
+
+enum uverbs_attrs_query_gid_entry_cmd_attr_ids {
+       UVERBS_ATTR_QUERY_GID_ENTRY_PORT,
+       UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX,
+       UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS,
+       UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
+};
+
 #endif
index 5debab4..2248379 100644 (file)
@@ -208,6 +208,7 @@ enum ib_uverbs_read_counters_flags {
 enum ib_uverbs_advise_mr_advice {
        IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH,
        IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE,
+       IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT,
 };
 
 enum ib_uverbs_advise_mr_flag {
@@ -250,4 +251,18 @@ enum rdma_driver_id {
        RDMA_DRIVER_SIW,
 };
 
+enum ib_uverbs_gid_type {
+       IB_UVERBS_GID_TYPE_IB,
+       IB_UVERBS_GID_TYPE_ROCE_V1,
+       IB_UVERBS_GID_TYPE_ROCE_V2,
+};
+
+struct ib_uverbs_gid_entry {
+       __aligned_u64 gid[2];
+       __u32 gid_index;
+       __u32 port_num;
+       __u32 gid_type;
+       __u32 netdev_ifindex; /* It is 0 if there is no netdev associated with it */
+};
+
 #endif
index 0474c74..456438c 100644 (file)
@@ -457,6 +457,17 @@ struct ib_uverbs_poll_cq {
        __u32 ne;
 };
 
+enum ib_uverbs_wc_opcode {
+       IB_UVERBS_WC_SEND = 0,
+       IB_UVERBS_WC_RDMA_WRITE = 1,
+       IB_UVERBS_WC_RDMA_READ = 2,
+       IB_UVERBS_WC_COMP_SWAP = 3,
+       IB_UVERBS_WC_FETCH_ADD = 4,
+       IB_UVERBS_WC_BIND_MW = 5,
+       IB_UVERBS_WC_LOCAL_INV = 6,
+       IB_UVERBS_WC_TSO = 7,
+};
+
 struct ib_uverbs_wc {
        __aligned_u64 wr_id;
        __u32 status;
index aae2e69..d8f2e0e 100644 (file)
@@ -99,8 +99,8 @@ struct rxe_send_wr {
                                struct ib_mr *mr;
                                __aligned_u64 reserved;
                        };
-                       __u32        key;
-                       __u32        access;
+                       __u32        key;
+                       __u32        access;
                } reg;
        } wr;
 };
@@ -112,7 +112,7 @@ struct rxe_sge {
 };
 
 struct mminfo {
-       __aligned_u64           offset;
+       __aligned_u64           offset;
        __u32                   size;
        __u32                   pad;
 };