- 'rbtree_postorder_for_each_entry_safe'
- 'rdma_for_each_block'
- 'rdma_for_each_port'
+ - 'rdma_umem_for_each_dma_block'
- 'resource_list_for_each_entry'
- 'resource_list_for_each_entry_safe'
- 'rhl_for_each_entry_rcu'
userspace ABI compatibility of umad & issm devices.
-What: /sys/class/infiniband_cm/ucmN/ibdev
-Date: Oct, 2005
-KernelVersion: v2.6.14
-Contact: linux-rdma@vger.kernel.org
-Description:
- (RO) Display Infiniband (IB) device name
-
-
-What: /sys/class/infiniband_cm/abi_version
-Date: Oct, 2005
-KernelVersion: v2.6.14
-Contact: linux-rdma@vger.kernel.org
-Description:
- (RO) Value is incremented if any changes are made that break
- userspace ABI compatibility of ucm devices.
-
-
What: /sys/class/infiniband_verbs/uverbsN/ibdev
What: /sys/class/infiniband_verbs/uverbsN/abi_version
Date: Sept, 2005
CISCO VIC LOW LATENCY NIC DRIVER
M: Christian Benvenuti <benve@cisco.com>
M: Nelson Escobar <neescoba@cisco.com>
-M: Parvi Kaustubhi <pkaustub@cisco.com>
S: Supported
F: drivers/infiniband/hw/usnic/
F: include/uapi/linux/cciss*.h
HFI1 DRIVER
-M: Mike Marciniszyn <mike.marciniszyn@intel.com>
-M: Dennis Dalessandro <dennis.dalessandro@intel.com>
+M: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
+M: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/hfi1
F: drivers/char/hw_random/optee-rng.c
OPA-VNIC DRIVER
-M: Dennis Dalessandro <dennis.dalessandro@intel.com>
-M: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
+M: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+M: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/ulp/opa_vnic
F: include/uapi/linux/qemu_fw_cfg.h
QIB DRIVER
-M: Dennis Dalessandro <dennis.dalessandro@intel.com>
-M: Mike Marciniszyn <mike.marciniszyn@intel.com>
+M: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+M: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/qib/
F: drivers/net/ethernet/rdc/r6040.c
RDMAVT - RDMA verbs software
-M: Dennis Dalessandro <dennis.dalessandro@intel.com>
-M: Mike Marciniszyn <mike.marciniszyn@intel.com>
+M: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+M: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/sw/rdmavt
depends on INFINIBAND_USER_MEM
select MMU_NOTIFIER
select INTERVAL_TREE
+ select HMM_MIRROR
default y
help
On demand paging support for the InfiniBand subsystem.
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
-ib_cm-y := cm.o
+ib_cm-y := cm.o cm_trace.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
req->callback = NULL;
spin_lock_bh(&lock);
+ /*
+ * Although the work will normally have been canceled by the workqueue,
+ * it can still be requeued as long as it is on the req_list.
+ */
+ cancel_delayed_work(&req->work);
if (!list_empty(&req->list)) {
- /*
- * Although the work will normally have been canceled by the
- * workqueue, it can still be requeued as long as it is on the
- * req_list.
- */
- cancel_delayed_work(&req->work);
list_del_init(&req->list);
kfree(req);
}
}
static const char * const gid_type_str[] = {
+ /* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for
+ * user space compatibility reasons.
+ */
[IB_GID_TYPE_IB] = "IB/RoCE v1",
+ [IB_GID_TYPE_ROCE] = "IB/RoCE v1",
[IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2",
};
const struct ib_gid_attr *
rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
{
- const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
+ const struct ib_gid_attr *attr = ERR_PTR(-ENODATA);
struct ib_gid_table *table;
unsigned long flags;
}
EXPORT_SYMBOL(rdma_get_gid_attr);
+/**
+ * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries.
+ * @device: The device to query.
+ * @entries: Entries where GID entries are returned.
+ * @max_entries: Maximum number of entries that can be returned.
+ * Entries array must be allocated to hold max_entries number of entries.
+ * @num_entries: Updated to the number of entries that were successfully read.
+ *
+ * Returns number of entries on success or appropriate error code.
+ */
+ssize_t rdma_query_gid_table(struct ib_device *device,
+ struct ib_uverbs_gid_entry *entries,
+ size_t max_entries)
+{
+ const struct ib_gid_attr *gid_attr;
+ ssize_t num_entries = 0, ret;
+ struct ib_gid_table *table;
+ unsigned int port_num, i;
+ struct net_device *ndev;
+ unsigned long flags;
+
+ rdma_for_each_port(device, port_num) {
+ if (!rdma_ib_or_roce(device, port_num))
+ continue;
+
+ table = rdma_gid_table(device, port_num);
+ read_lock_irqsave(&table->rwlock, flags);
+ for (i = 0; i < table->sz; i++) {
+ if (!is_gid_entry_valid(table->data_vec[i]))
+ continue;
+ if (num_entries >= max_entries) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ gid_attr = &table->data_vec[i]->attr;
+
+ memcpy(&entries->gid, &gid_attr->gid,
+ sizeof(gid_attr->gid));
+ entries->gid_index = gid_attr->index;
+ entries->port_num = gid_attr->port_num;
+ entries->gid_type = gid_attr->gid_type;
+ ndev = rcu_dereference_protected(
+ gid_attr->ndev,
+ lockdep_is_held(&table->rwlock));
+ if (ndev)
+ entries->netdev_ifindex = ndev->ifindex;
+
+ num_entries++;
+ entries++;
+ }
+ read_unlock_irqrestore(&table->rwlock, flags);
+ }
+
+ return num_entries;
+err:
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_query_gid_table);
+
/**
* rdma_put_gid_attr - Release reference to the GID attribute
* @attr: Pointer to the GID attribute whose reference
struct ib_gid_table_entry *entry =
container_of(attr, struct ib_gid_table_entry, attr);
struct ib_device *device = entry->attr.device;
- struct net_device *ndev = ERR_PTR(-ENODEV);
+ struct net_device *ndev = ERR_PTR(-EINVAL);
u8 port_num = entry->attr.port_num;
struct ib_gid_table *table;
unsigned long flags;
valid = is_gid_entry_valid(table->data_vec[attr->index]);
if (valid) {
ndev = rcu_dereference(attr->ndev);
- if (!ndev ||
- (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0)))
+ if (!ndev)
ndev = ERR_PTR(-ENODEV);
}
read_unlock_irqrestore(&table->rwlock, flags);
#include <rdma/ib_cm.h>
#include "cm_msgs.h"
#include "core_priv.h"
+#include "cm_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
struct cm_port {
struct cm_device *cm_dev;
struct ib_mad_agent *mad_agent;
- struct kobject port_obj;
u8 port_num;
struct list_head cm_priv_prim_list;
struct list_head cm_priv_altr_list;
cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
+ trace_icm_send_req(&cm_id_priv->id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
ret = ib_post_send_mad(cm_id_priv->msg, NULL);
if (ret) {
IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
}
+ trace_icm_issue_rej(
+ IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
+ IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
}
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
listen_cm_id_priv = cm_match_req(work, cm_id_priv);
if (!listen_cm_id_priv) {
- pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id));
+ trace_icm_no_listener_err(&cm_id_priv->id);
cm_id_priv->id.state = IB_CM_IDLE;
ret = -EINVAL;
goto destroy;
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
- pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
+ trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state);
ret = -EINVAL;
goto out;
}
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
+ trace_icm_send_rep(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
- pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ trace_icm_send_cm_rtu_err(cm_id);
ret = -EINVAL;
goto error;
}
cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_rtu(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
goto unlock;
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
- pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
+ trace_icm_remote_no_priv_err(
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
return -EINVAL;
}
break;
default:
ret = -EINVAL;
- pr_debug(
- "%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
- __func__, cm_id_priv->id.state,
+ trace_icm_rep_unknown_err(
IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
- IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg),
+ cm_id_priv->id.state);
spin_unlock_irq(&cm_id_priv->lock);
goto error;
}
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
- pr_debug("%s: Failed to insert remote id %d\n", __func__,
+ trace_icm_insert_failed_err(
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
goto error;
}
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
- pr_debug(
- "%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
- __func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+ trace_icm_staleconn_err(
+ IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
if (cur_cm_id_priv) {
return -EINVAL;
if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_dreq_skipped(&cm_id_priv->id);
return -EINVAL;
}
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
+ trace_icm_send_dreq(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_enter_timewait(cm_id_priv);
return -EINVAL;
if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
- pr_debug(
- "%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_send_drep_err(&cm_id_priv->id);
kfree(private_data);
return -EINVAL;
}
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_drep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_free_msg(msg);
IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
+ trace_icm_issue_drep(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
- pr_debug(
- "%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
- __func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ trace_icm_no_priv_err(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
return -EINVAL;
}
counter[CM_DREQ_COUNTER]);
goto unlock;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_dreq_unknown_err(&cm_id_priv->id);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
state);
break;
default:
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_send_unknown_rej_err(&cm_id_priv->id);
return -EINVAL;
}
+ trace_icm_send_rej(&cm_id_priv->id, reason);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_free_msg(msg);
}
fallthrough;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_rej_unknown_err(&cm_id_priv->id);
spin_unlock_irq(&cm_id_priv->lock);
goto out;
}
}
fallthrough;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_send_mra_unknown_err(&cm_id_priv->id);
ret = -EINVAL;
goto error1;
}
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
msg_response, service_timeout,
private_data, private_data_len);
+ trace_icm_send_mra(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
counter[CM_MRA_COUNTER]);
fallthrough;
default:
- pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_mra_unknown_err(&cm_id_priv->id);
goto out;
}
msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state == IB_CM_IDLE)
+ if (cm_id->state == IB_CM_IDLE) {
+ trace_icm_send_sidr_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
- else
+ } else {
ret = -EINVAL;
+ }
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
param);
+ trace_icm_send_sidr_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_free_msg(msg);
if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
goto discard;
- pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
- state, ib_wc_status_msg(wc_status));
+ trace_icm_mad_send_err(state, wc_status);
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
ret = cm_timewait_handler(work);
break;
default:
- pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
+ trace_icm_handler_err(work->cm_event.event);
ret = -EINVAL;
break;
}
ret = -EISCONN;
break;
default:
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ trace_icm_establish_err(cm_id);
ret = -EINVAL;
break;
}
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_init_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_rtr_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_rts_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
.default_attrs = cm_counter_default_attrs
};
-static char *cm_devnode(struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0666;
- return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
-}
-
-struct class cm_class = {
- .owner = THIS_MODULE,
- .name = "infiniband_cm",
- .devnode = cm_devnode,
-};
-EXPORT_SYMBOL(cm_class);
-
static int cm_create_port_fs(struct cm_port *port)
{
int i, ret;
get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
INIT_LIST_HEAD(&cm.timewait_list);
- ret = class_register(&cm_class);
- if (ret) {
- ret = -ENOMEM;
- goto error1;
- }
-
cm.wq = alloc_workqueue("ib_cm", 0, 1);
if (!cm.wq) {
ret = -ENOMEM;
error3:
destroy_workqueue(cm.wq);
error2:
- class_unregister(&cm_class);
-error1:
return ret;
}
kfree(timewait_info);
}
- class_unregister(&cm_class);
WARN_ON(!xa_empty(&cm.local_id_table));
}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the IB Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <rdma/rdma_cm.h>
+#include "cma_priv.h"
+
+#define CREATE_TRACE_POINTS
+
+#include "cm_trace.h"
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ib_cma
+
+#if !defined(_TRACE_IB_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_IB_CMA_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_cm.h>
+#include <trace/events/rdma.h>
+
+/*
+ * enum ib_cm_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_STATE_LIST \
+ ib_cm_state(IDLE) \
+ ib_cm_state(LISTEN) \
+ ib_cm_state(REQ_SENT) \
+ ib_cm_state(REQ_RCVD) \
+ ib_cm_state(MRA_REQ_SENT) \
+ ib_cm_state(MRA_REQ_RCVD) \
+ ib_cm_state(REP_SENT) \
+ ib_cm_state(REP_RCVD) \
+ ib_cm_state(MRA_REP_SENT) \
+ ib_cm_state(MRA_REP_RCVD) \
+ ib_cm_state(ESTABLISHED) \
+ ib_cm_state(DREQ_SENT) \
+ ib_cm_state(DREQ_RCVD) \
+ ib_cm_state(TIMEWAIT) \
+ ib_cm_state(SIDR_REQ_SENT) \
+ ib_cm_state_end(SIDR_REQ_RCVD)
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_STATE_LIST
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) { IB_CM_##x, #x },
+#define ib_cm_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_state(x) \
+ __print_symbolic(x, IB_CM_STATE_LIST)
+
+/*
+ * enum ib_cm_lap_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_LAP_STATE_LIST \
+ ib_cm_lap_state(LAP_UNINIT) \
+ ib_cm_lap_state(LAP_IDLE) \
+ ib_cm_lap_state(LAP_SENT) \
+ ib_cm_lap_state(LAP_RCVD) \
+ ib_cm_lap_state(MRA_LAP_SENT) \
+ ib_cm_lap_state_end(MRA_LAP_RCVD)
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_lap_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_LAP_STATE_LIST
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) { IB_CM_##x, #x },
+#define ib_cm_lap_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_lap_state(x) \
+ __print_symbolic(x, IB_CM_LAP_STATE_LIST)
+
+/*
+ * enum ib_cm_rej_reason, from include/rdma/ib_cm.h
+ */
+#define IB_CM_REJ_REASON_LIST \
+ ib_cm_rej_reason(REJ_NO_QP) \
+ ib_cm_rej_reason(REJ_NO_EEC) \
+ ib_cm_rej_reason(REJ_NO_RESOURCES) \
+ ib_cm_rej_reason(REJ_TIMEOUT) \
+ ib_cm_rej_reason(REJ_UNSUPPORTED) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE) \
+ ib_cm_rej_reason(REJ_INVALID_SERVICE_ID) \
+ ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE) \
+ ib_cm_rej_reason(REJ_STALE_CONN) \
+ ib_cm_rej_reason(REJ_RDC_NOT_EXIST) \
+ ib_cm_rej_reason(REJ_INVALID_GID) \
+ ib_cm_rej_reason(REJ_INVALID_LID) \
+ ib_cm_rej_reason(REJ_INVALID_SL) \
+ ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_GID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_LID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_SL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_PORT_CM_REDIRECT) \
+ ib_cm_rej_reason(REJ_PORT_REDIRECT) \
+ ib_cm_rej_reason(REJ_INVALID_MTU) \
+ ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES) \
+ ib_cm_rej_reason(REJ_CONSUMER_DEFINED) \
+ ib_cm_rej_reason(REJ_INVALID_RNR_RETRY) \
+ ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION) \
+ ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL) \
+ ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED)
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_rej_reason_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_REJ_REASON_LIST
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) { IB_CM_##x, #x },
+#define ib_cm_rej_reason_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_rej_reason(x) \
+ __print_symbolic(x, IB_CM_REJ_REASON_LIST)
+
+DECLARE_EVENT_CLASS(icm_id_class,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id
+ ),
+
+ TP_ARGS(cm_id),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id) /* for eBPF scripts */
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, lap_state)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->lap_state = cm_id->lap_state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s lap_state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_lap_state(__entry->lap_state)
+ )
+);
+
+#define DEFINE_CM_SEND_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_send_##name, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_SEND_EVENT(req);
+DEFINE_CM_SEND_EVENT(rep);
+DEFINE_CM_SEND_EVENT(dup_req);
+DEFINE_CM_SEND_EVENT(dup_rep);
+DEFINE_CM_SEND_EVENT(rtu);
+DEFINE_CM_SEND_EVENT(mra);
+DEFINE_CM_SEND_EVENT(sidr_req);
+DEFINE_CM_SEND_EVENT(sidr_rep);
+DEFINE_CM_SEND_EVENT(dreq);
+DEFINE_CM_SEND_EVENT(drep);
+
+TRACE_EVENT(icm_send_rej,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id,
+ enum ib_cm_rej_reason reason
+ ),
+
+ TP_ARGS(cm_id, reason),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id)
+ __field(u32, local_id)
+ __field(u32, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s reason=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_rej_reason(__entry->reason)
+ )
+);
+
+#define DEFINE_CM_ERR_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_##name##_err, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_ERR_EVENT(send_cm_rtu);
+DEFINE_CM_ERR_EVENT(establish);
+DEFINE_CM_ERR_EVENT(no_listener);
+DEFINE_CM_ERR_EVENT(send_drep);
+DEFINE_CM_ERR_EVENT(dreq_unknown);
+DEFINE_CM_ERR_EVENT(send_unknown_rej);
+DEFINE_CM_ERR_EVENT(rej_unknown);
+DEFINE_CM_ERR_EVENT(send_mra_unknown);
+DEFINE_CM_ERR_EVENT(mra_unknown);
+DEFINE_CM_ERR_EVENT(qp_init);
+DEFINE_CM_ERR_EVENT(qp_rtr);
+DEFINE_CM_ERR_EVENT(qp_rts);
+
+DEFINE_EVENT(icm_id_class, \
+ icm_dreq_skipped, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id) \
+);
+
+DECLARE_EVENT_CLASS(icm_local_class,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id
+ ),
+
+ TP_ARGS(local_id, remote_id),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u",
+ __entry->local_id, __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_LOCAL_EVENT(name) \
+ DEFINE_EVENT(icm_local_class, \
+ icm_##name, \
+ TP_PROTO( \
+ unsigned int local_id, \
+ unsigned int remote_id \
+ ), \
+ TP_ARGS(local_id, remote_id))
+
+DEFINE_CM_LOCAL_EVENT(issue_rej);
+DEFINE_CM_LOCAL_EVENT(issue_drep);
+DEFINE_CM_LOCAL_EVENT(staleconn_err);
+DEFINE_CM_LOCAL_EVENT(no_priv_err);
+
+DECLARE_EVENT_CLASS(icm_remote_class,
+ TP_PROTO(
+ u32 remote_id
+ ),
+
+ TP_ARGS(remote_id),
+
+ TP_STRUCT__entry(
+ __field(u32, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("remote_id=%u",
+ __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_REMOTE_EVENT(name) \
+ DEFINE_EVENT(icm_remote_class, \
+ icm_##name, \
+ TP_PROTO( \
+ u32 remote_id \
+ ), \
+ TP_ARGS(remote_id))
+
+DEFINE_CM_REMOTE_EVENT(remote_no_priv_err);
+DEFINE_CM_REMOTE_EVENT(insert_failed_err);
+
+TRACE_EVENT(icm_send_rep_err,
+ TP_PROTO(
+ __be32 local_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = be32_to_cpu(local_id);
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u state=%s",
+ __entry->local_id, show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_rep_unknown_err,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, remote_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_handler_err,
+ TP_PROTO(
+ enum ib_cm_event_type event
+ ),
+
+ TP_ARGS(event),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, event)
+ ),
+
+ TP_fast_assign(
+ __entry->event = event;
+ ),
+
+ TP_printk("unhandled event=%s",
+ rdma_show_ib_cm_event(__entry->event)
+ )
+);
+
+TRACE_EVENT(icm_mad_send_err,
+ TP_PROTO(
+ enum ib_cm_state state,
+ enum ib_wc_status wc_status
+ ),
+
+ TP_ARGS(state, wc_status),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, state)
+ __field(unsigned long, wc_status)
+ ),
+
+ TP_fast_assign(
+ __entry->state = state;
+ __entry->wc_status = wc_status;
+ ),
+
+ TP_printk("state=%s completion status=%s",
+ show_ib_cm_state(__entry->state),
+ rdma_show_wc_status(__entry->wc_status)
+ )
+);
+
+#endif /* _TRACE_IB_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/infiniband/core
+#define TRACE_INCLUDE_FILE cm_trace
+
+#include <trace/define_trace.h>
[RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
};
+static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr,
+ union ib_gid *mgid);
+
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
size_t index = event;
if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
+ if (default_gid_type == IB_GID_TYPE_IB &&
+ rdma_protocol_roce_eth_encap(cma_dev->device, port))
+ default_gid_type = IB_GID_TYPE_ROCE;
+
supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
if (!(supported_gids & 1 << default_gid_type))
struct cma_multicast {
struct rdma_id_private *id_priv;
- union {
- struct ib_sa_multicast *ib;
- } multicast;
+ struct ib_sa_multicast *sa_mc;
struct list_head list;
void *context;
struct sockaddr_storage addr;
- struct kref mcref;
u8 join_state;
};
struct rdma_cm_event event;
};
-struct cma_ndev_work {
- struct work_struct work;
- struct rdma_id_private *id;
- struct rdma_cm_event event;
-};
-
-struct iboe_mcast_work {
- struct work_struct work;
- struct rdma_id_private *id;
- struct cma_multicast *mc;
-};
-
union cma_ip_addr {
struct in6_addr ip6;
struct {
u16 pkey;
};
-static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&id_priv->lock, flags);
- ret = (id_priv->state == comp);
- spin_unlock_irqrestore(&id_priv->lock, flags);
- return ret;
-}
-
static int cma_comp_exch(struct rdma_id_private *id_priv,
enum rdma_cm_state comp, enum rdma_cm_state exch)
{
unsigned long flags;
int ret;
+ /*
+ * The FSM uses a funny double locking where state is protected by both
+ * the handler_mutex and the spinlock. State is not allowed to change
+ * away from a handler_mutex protected value without also holding
+ * handler_mutex.
+ */
+ if (comp == RDMA_CM_CONNECT)
+ lockdep_assert_held(&id_priv->handler_mutex);
+
spin_lock_irqsave(&id_priv->lock, flags);
if ((ret = (id_priv->state == comp)))
id_priv->state = exch;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
- if (id_priv->res.kern_name)
- rdma_restrack_kadd(&id_priv->res);
- else
- rdma_restrack_uadd(&id_priv->res);
+ rdma_restrack_add(&id_priv->res);
+
trace_cm_id_attach(id_priv, cma_dev->device);
}
rdma_start_port(cma_dev->device)];
}
-static inline void release_mc(struct kref *kref)
-{
- struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
-
- kfree(mc->multicast.ib);
- kfree(mc);
-}
-
static void cma_release_dev(struct rdma_id_private *id_priv)
{
mutex_lock(&lock);
complete(&id_priv->comp);
}
-struct rdma_cm_id *__rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_ucm_port_space ps,
- enum ib_qp_type qp_type, const char *caller)
+static struct rdma_id_private *
+__rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const struct rdma_id_private *parent)
{
struct rdma_id_private *id_priv;
if (!id_priv)
return ERR_PTR(-ENOMEM);
- rdma_restrack_set_task(&id_priv->res, caller);
- id_priv->res.type = RDMA_RESTRACK_CM_ID;
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
id_priv->id.route.addr.dev_addr.net = get_net(net);
id_priv->seq_num &= 0x00ffffff;
- return &id_priv->id;
+ rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID);
+ if (parent)
+ rdma_restrack_parent_name(&id_priv->res, &parent->res);
+
+ return id_priv;
+}
+
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const char *caller)
+{
+ struct rdma_id_private *ret;
+
+ ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL);
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ rdma_restrack_set_name(&ret->res, caller);
+ return &ret->id;
}
-EXPORT_SYMBOL(__rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_kernel_id);
+
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+ void *context,
+ enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type)
+{
+ struct rdma_id_private *ret;
+
+ ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context,
+ ps, qp_type, NULL);
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ rdma_restrack_set_name(&ret->res, NULL);
+ return &ret->id;
+}
+EXPORT_SYMBOL(rdma_create_user_id);
static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
mutex_unlock(&lock);
}
-static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
- struct cma_multicast *mc)
+static void destroy_mc(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
{
- struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev = NULL;
+ if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
+ ib_sa_free_multicast(mc->sa_mc);
- if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- if (ndev) {
- cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
- dev_put(ndev);
+ if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
+ struct rdma_dev_addr *dev_addr =
+ &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(dev_addr->net,
+ dev_addr->bound_dev_if);
+ if (ndev) {
+ union ib_gid mgid;
+
+ cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr,
+ &mgid);
+ cma_igmp_send(ndev, &mgid, false);
+ dev_put(ndev);
+ }
}
- kref_put(&mc->mcref, release_mc);
+ kfree(mc);
}
static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
struct cma_multicast *mc;
while (!list_empty(&id_priv->mc_list)) {
- mc = container_of(id_priv->mc_list.next,
- struct cma_multicast, list);
+ mc = list_first_entry(&id_priv->mc_list, struct cma_multicast,
+ list);
list_del(&mc->list);
- if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
- id_priv->id.port_num)) {
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
- } else {
- cma_leave_roce_mc_group(id_priv, mc);
- }
+ destroy_mc(id_priv, mc);
}
}
{
cma_cancel_operation(id_priv, state);
- rdma_restrack_del(&id_priv->res);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
put_net(id_priv->id.route.addr.dev_addr.net);
+ rdma_restrack_del(&id_priv->res);
kfree(id_priv);
}
{
struct rdma_id_private *id_priv = cm_id->context;
struct rdma_cm_event event = {};
+ enum rdma_cm_state state;
int ret;
mutex_lock(&id_priv->handler_mutex);
+ state = READ_ONCE(id_priv->state);
if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
- id_priv->state != RDMA_CM_CONNECT) ||
+ state != RDMA_CM_CONNECT) ||
(ib_event->event == IB_CM_TIMEWAIT_EXIT &&
- id_priv->state != RDMA_CM_DISCONNECT))
+ state != RDMA_CM_DISCONNECT))
goto out;
switch (ib_event->event) {
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
- if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
+ if (state == RDMA_CM_CONNECT &&
(id_priv->id.qp_type != IB_QPT_UD)) {
trace_cm_send_mra(id_priv);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
int ret;
listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
- id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
- listen_id->event_handler, listen_id->context,
- listen_id->ps, ib_event->param.req_rcvd.qp_type,
- listen_id_priv->res.kern_name);
- if (IS_ERR(id))
+ id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net,
+ listen_id->event_handler, listen_id->context,
+ listen_id->ps,
+ ib_event->param.req_rcvd.qp_type,
+ listen_id_priv);
+ if (IS_ERR(id_priv))
return NULL;
- id_priv = container_of(id, struct rdma_id_private, id);
+ id = &id_priv->id;
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family, service_id))
int ret;
listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
- id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
- listen_id->ps, IB_QPT_UD,
- listen_id_priv->res.kern_name);
- if (IS_ERR(id))
+ id_priv = __rdma_create_id(net, listen_id->event_handler,
+ listen_id->context, listen_id->ps, IB_QPT_UD,
+ listen_id_priv);
+ if (IS_ERR(id_priv))
return NULL;
- id_priv = container_of(id, struct rdma_id_private, id);
+ id = &id_priv->id;
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family,
}
mutex_lock(&listen_id->handler_mutex);
- if (listen_id->state != RDMA_CM_LISTEN) {
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) {
ret = -ECONNABORTED;
goto err_unlock;
}
goto net_dev_put;
}
- if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
- (conn_id->id.qp_type != IB_QPT_UD)) {
+ if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
+ conn_id->id.qp_type != IB_QPT_UD) {
trace_cm_send_mra(cm_id->context);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
}
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_CONNECT)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
goto out;
switch (iw_event->event) {
static int iw_conn_req_handler(struct iw_cm_id *cm_id,
struct iw_cm_event *iw_event)
{
- struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event = {};
int ret = -ECONNABORTED;
listen_id = cm_id->context;
mutex_lock(&listen_id->handler_mutex);
- if (listen_id->state != RDMA_CM_LISTEN)
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN)
goto out;
/* Create a new RDMA id for the new IW CM ID */
- new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
- listen_id->id.event_handler,
- listen_id->id.context,
- RDMA_PS_TCP, IB_QPT_RC,
- listen_id->res.kern_name);
- if (IS_ERR(new_cm_id)) {
+ conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+ listen_id->id.event_handler,
+ listen_id->id.context, RDMA_PS_TCP,
+ IB_QPT_RC, listen_id);
+ if (IS_ERR(conn_id)) {
ret = -ENOMEM;
goto out;
}
- conn_id = container_of(new_cm_id, struct rdma_id_private, id);
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
struct cma_device *cma_dev)
{
struct rdma_id_private *dev_id_priv;
- struct rdma_cm_id *id;
struct net *net = id_priv->id.route.addr.dev_addr.net;
int ret;
if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
return;
- id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
- id_priv->id.qp_type, id_priv->res.kern_name);
- if (IS_ERR(id))
+ dev_id_priv =
+ __rdma_create_id(net, cma_listen_handler, id_priv,
+ id_priv->id.ps, id_priv->id.qp_type, id_priv);
+ if (IS_ERR(dev_id_priv))
return;
- dev_id_priv = container_of(id, struct rdma_id_private, id);
-
dev_id_priv->state = RDMA_CM_ADDR_BOUND;
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));
dev_id_priv->tos_set = id_priv->tos_set;
dev_id_priv->tos = id_priv->tos;
- ret = rdma_listen(id, id_priv->backlog);
+ ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
if (ret)
dev_warn(&cma_dev->device->dev,
"RDMA CMA: cma_listen_on_dev, error %d\n", ret);
struct rdma_id_private *id_priv = work->id;
mutex_lock(&id_priv->handler_mutex);
- if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
goto out_unlock;
-
- if (cma_cm_event_handler(id_priv, &work->event)) {
- cma_id_put(id_priv);
- destroy_id_handler_unlock(id_priv);
- goto out_free;
+ if (work->old_state != 0 || work->new_state != 0) {
+ if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+ goto out_unlock;
}
-out_unlock:
- mutex_unlock(&id_priv->handler_mutex);
- cma_id_put(id_priv);
-out_free:
- kfree(work);
-}
-
-static void cma_ndev_work_handler(struct work_struct *_work)
-{
- struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
- struct rdma_id_private *id_priv = work->id;
-
- mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state == RDMA_CM_DESTROYING ||
- id_priv->state == RDMA_CM_DEVICE_REMOVAL)
- goto out_unlock;
-
if (cma_cm_event_handler(id_priv, &work->event)) {
cma_id_put(id_priv);
destroy_id_handler_unlock(id_priv);
mutex_unlock(&id_priv->handler_mutex);
cma_id_put(id_priv);
out_free:
+ if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN)
+ rdma_destroy_ah_attr(&work->event.param.ud.ah_attr);
kfree(work);
}
return rdma_bind_addr(id, src_addr);
}
-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr, unsigned long timeout_ms)
+/*
+ * If required, resolve the source address for bind and leave the id_priv in
+ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
+ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
+ * ignored.
+ */
+static int resolve_prepare_src(struct rdma_id_private *id_priv,
+ struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
{
- struct rdma_id_private *id_priv;
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
- if (id_priv->state == RDMA_CM_IDLE) {
- ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret) {
- memset(cma_dst_addr(id_priv), 0,
- rdma_addr_size(dst_addr));
- return ret;
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
+ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
+ if (ret)
+ goto err_dst;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_ADDR_QUERY))) {
+ ret = -EINVAL;
+ goto err_dst;
}
}
if (cma_family(id_priv) != dst_addr->sa_family) {
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_state;
}
+ return 0;
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
- return -EINVAL;
- }
+err_state:
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+err_dst:
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+ return ret;
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr, unsigned long timeout_ms)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ int ret;
+
+ ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
+ if (ret)
+ return ret;
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irqsave(&id_priv->lock, flags);
- if (reuse || id_priv->state == RDMA_CM_IDLE) {
+ if ((reuse && id_priv->state != RDMA_CM_LISTEN) ||
+ id_priv->state == RDMA_CM_IDLE) {
id_priv->reuseaddr = reuse;
ret = 0;
} else {
if (id_priv == cur_id)
continue;
- if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
- cur_id->reuseaddr)
+ if (reuseaddr && cur_id->reuseaddr)
continue;
cur_addr = cma_src_addr(cur_id);
return ret;
}
-static int cma_bind_listen(struct rdma_id_private *id_priv)
-{
- struct rdma_bind_list *bind_list = id_priv->bind_list;
- int ret = 0;
-
- mutex_lock(&lock);
- if (bind_list->owners.first->next)
- ret = cma_check_port(bind_list, id_priv, 0);
- mutex_unlock(&lock);
- return ret;
-}
-
static enum rdma_ucm_port_space
cma_select_inet_ps(struct rdma_id_private *id_priv)
{
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (id_priv->state == RDMA_CM_IDLE) {
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
id->route.addr.src_addr.ss_family = AF_INET;
ret = rdma_bind_addr(id, cma_src_addr(id_priv));
if (ret)
return ret;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_LISTEN)))
+ return -EINVAL;
}
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
- return -EINVAL;
-
+ /*
+ * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable
+ * any more, and has to be unique in the bind list.
+ */
if (id_priv->reuseaddr) {
- ret = cma_bind_listen(id_priv);
+ mutex_lock(&lock);
+ ret = cma_check_port(id_priv->bind_list, id_priv, 0);
+ if (!ret)
+ id_priv->reuseaddr = 0;
+ mutex_unlock(&lock);
if (ret)
goto err;
}
return 0;
err:
id_priv->backlog = 0;
+ /*
+ * All the failure paths that lead here will not allow the req_handler's
+ * to have run.
+ */
cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
return ret;
}
return 0;
err2:
- rdma_restrack_del(&id_priv->res);
if (id_priv->cma_dev)
cma_release_dev(id_priv);
err1:
int ret;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_CONNECT)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
goto out;
switch (ib_event->event) {
int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
- return -EINVAL;
+ mutex_lock(&id_priv->handler_mutex);
+ if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) {
+ ret = -EINVAL;
+ goto err_unlock;
+ }
if (!id->qp) {
id_priv->qp_num = conn_param->qp_num;
else
ret = -ENOSYS;
if (ret)
- goto err;
-
+ goto err_state;
+ mutex_unlock(&id_priv->handler_mutex);
return 0;
-err:
+err_state:
cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
+err_unlock:
+ mutex_unlock(&id_priv->handler_mutex);
return ret;
}
EXPORT_SYMBOL(rdma_connect);
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller)
+/**
+ * rdma_accept - Called to accept a connection request or response.
+ * @id: Connection identifier associated with the request.
+ * @conn_param: Information needed to establish the connection. This must be
+ * provided if accepting a connection request. If accepting a connection
+ * response, this parameter must be NULL.
+ *
+ * Typically, this routine is only called by the listener to accept a connection
+ * request. It must also be called on the active side of a connection if the
+ * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
+ *
+ * This function is for use by kernel ULPs and must be called from under the
+ * handler callback.
+ */
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
-
- rdma_restrack_set_task(&id_priv->res, caller);
+ lockdep_assert_held(&id_priv->handler_mutex);
- if (!cma_comp(id_priv, RDMA_CM_CONNECT))
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
return -EINVAL;
if (!id->qp && conn_param) {
rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED);
return ret;
}
-EXPORT_SYMBOL(__rdma_accept);
+EXPORT_SYMBOL(rdma_accept);
-int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller, struct rdma_ucm_ece *ece)
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece)
{
struct rdma_id_private *id_priv =
container_of(id, struct rdma_id_private, id);
id_priv->ece.vendor_id = ece->vendor_id;
id_priv->ece.attr_mod = ece->attr_mod;
- return __rdma_accept(id, conn_param, caller);
+ return rdma_accept(id, conn_param);
+}
+EXPORT_SYMBOL(rdma_accept_ece);
+
+void rdma_lock_handler(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_lock(&id_priv->handler_mutex);
}
-EXPORT_SYMBOL(__rdma_accept_ece);
+EXPORT_SYMBOL(rdma_lock_handler);
+
+void rdma_unlock_handler(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_unlock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_unlock_handler);
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
}
EXPORT_SYMBOL(rdma_disconnect);
-static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
+ struct ib_sa_multicast *multicast,
+ struct rdma_cm_event *event,
+ struct cma_multicast *mc)
{
- struct rdma_id_private *id_priv;
- struct cma_multicast *mc = multicast->context;
- struct rdma_cm_event event = {};
- int ret = 0;
-
- id_priv = mc->id_priv;
- mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_ADDR_BOUND &&
- id_priv->state != RDMA_CM_ADDR_RESOLVED)
- goto out;
+ struct rdma_dev_addr *dev_addr;
+ enum ib_gid_type gid_type;
+ struct net_device *ndev;
if (!status)
status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
else
pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
status);
- mutex_lock(&id_priv->qp_mutex);
- if (!status && id_priv->id.qp) {
- status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
- be16_to_cpu(multicast->rec.mlid));
- if (status)
- pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
- status);
+
+ event->status = status;
+ event->param.ud.private_data = mc->context;
+ if (status) {
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ return;
}
- mutex_unlock(&id_priv->qp_mutex);
- event.status = status;
- event.param.ud.private_data = mc->context;
- if (!status) {
- struct rdma_dev_addr *dev_addr =
- &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev =
- dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- enum ib_gid_type gid_type =
- id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
- rdma_start_port(id_priv->cma_dev->device)];
-
- event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
- ret = ib_init_ah_from_mcmember(id_priv->id.device,
- id_priv->id.port_num,
- &multicast->rec,
- ndev, gid_type,
- &event.param.ud.ah_attr);
- if (ret)
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ dev_addr = &id_priv->id.route.addr.dev_addr;
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+ gid_type =
+ id_priv->cma_dev
+ ->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+
+ event->event = RDMA_CM_EVENT_MULTICAST_JOIN;
+ if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num,
+ &multicast->rec, ndev, gid_type,
+ &event->param.ud.ah_attr)) {
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ goto out;
+ }
- event.param.ud.qp_num = 0xFFFFFF;
- event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
- if (ndev)
- dev_put(ndev);
- } else
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ event->param.ud.qp_num = 0xFFFFFF;
+ event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
- ret = cma_cm_event_handler(id_priv, &event);
+out:
+ if (ndev)
+ dev_put(ndev);
+}
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+ struct cma_multicast *mc = multicast->context;
+ struct rdma_id_private *id_priv = mc->id_priv;
+ struct rdma_cm_event event = {};
+ int ret = 0;
+
+ mutex_lock(&id_priv->handler_mutex);
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
+ goto out;
+
+ cma_make_mc_event(status, id_priv, multicast, &event, mc);
+ ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
destroy_id_handler_unlock(id_priv);
IB_SA_MCMEMBER_REC_MTU |
IB_SA_MCMEMBER_REC_HOP_LIMIT;
- mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
- id_priv->id.port_num, &rec,
- comp_mask, GFP_KERNEL,
- cma_ib_mc_handler, mc);
- return PTR_ERR_OR_ZERO(mc->multicast.ib);
-}
-
-static void iboe_mcast_work_handler(struct work_struct *work)
-{
- struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
- struct cma_multicast *mc = mw->mc;
- struct ib_sa_multicast *m = mc->multicast.ib;
-
- mc->multicast.ib->context = mc;
- cma_ib_mc_handler(0, m);
- kref_put(&mc->mcref, release_mc);
- kfree(mw);
+ mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+ id_priv->id.port_num, &rec, comp_mask,
+ GFP_KERNEL, cma_ib_mc_handler, mc);
+ return PTR_ERR_OR_ZERO(mc->sa_mc);
}
static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
- struct iboe_mcast_work *work;
+ struct cma_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
+ struct ib_sa_multicast ib;
enum ib_gid_type gid_type;
bool send_only;
send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
- if (cma_zero_addr((struct sockaddr *)&mc->addr))
+ if (cma_zero_addr(addr))
return -EINVAL;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
- mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
- if (!mc->multicast.ib) {
- err = -ENOMEM;
- goto out1;
- }
-
gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
- cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
+ cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
- mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
+ ib.rec.pkey = cpu_to_be16(0xffff);
if (id_priv->id.ps == RDMA_PS_UDP)
- mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+ ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!ndev) {
err = -ENODEV;
- goto out2;
+ goto err_free;
}
- mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
- mc->multicast.ib->rec.hop_limit = 1;
- mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+ ib.rec.rate = iboe_get_rate(ndev);
+ ib.rec.hop_limit = 1;
+ ib.rec.mtu = iboe_get_mtu(ndev->mtu);
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
- mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+ ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
if (!send_only) {
- err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+ err = cma_igmp_send(ndev, &ib.rec.mgid,
true);
}
}
err = -ENOTSUPP;
}
dev_put(ndev);
- if (err || !mc->multicast.ib->rec.mtu) {
+ if (err || !ib.rec.mtu) {
if (!err)
err = -EINVAL;
- goto out2;
+ goto err_free;
}
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
- &mc->multicast.ib->rec.port_gid);
+ &ib.rec.port_gid);
work->id = id_priv;
- work->mc = mc;
- INIT_WORK(&work->work, iboe_mcast_work_handler);
- kref_get(&mc->mcref);
+ INIT_WORK(&work->work, cma_work_handler);
+ cma_make_mc_event(0, id_priv, &ib, &work->event, mc);
+ /* Balances with cma_id_put() in cma_work_handler */
+ cma_id_get(id_priv);
queue_work(cma_wq, &work->work);
-
return 0;
-out2:
- kfree(mc->multicast.ib);
-out1:
+err_free:
kfree(work);
return err;
}
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
u8 join_state, void *context)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
struct cma_multicast *mc;
int ret;
- if (!id->device)
+ /* Not supported for kernel QPs */
+ if (WARN_ON(id->qp))
return -EINVAL;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
- !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
+ /* ULP is calling this wrong. */
+ if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND &&
+ READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
return -EINVAL;
- mc = kmalloc(sizeof *mc, GFP_KERNEL);
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc)
return -ENOMEM;
mc->join_state = join_state;
if (rdma_protocol_roce(id->device, id->port_num)) {
- kref_init(&mc->mcref);
ret = cma_iboe_join_multicast(id_priv, mc);
if (ret)
goto out_err;
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irq(&id_priv->lock);
list_for_each_entry(mc, &id_priv->mc_list, list) {
- if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
- list_del(&mc->list);
- spin_unlock_irq(&id_priv->lock);
-
- if (id->qp)
- ib_detach_mcast(id->qp,
- &mc->multicast.ib->rec.mgid,
- be16_to_cpu(mc->multicast.ib->rec.mlid));
-
- BUG_ON(id_priv->cma_dev->device != id->device);
-
- if (rdma_cap_ib_mcast(id->device, id->port_num)) {
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
- } else if (rdma_protocol_roce(id->device, id->port_num)) {
- cma_leave_roce_mc_group(id_priv, mc);
- }
- return;
- }
+ if (memcmp(&mc->addr, addr, rdma_addr_size(addr)) != 0)
+ continue;
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+
+ WARN_ON(id_priv->cma_dev->device != id->device);
+ destroy_mc(id_priv, mc);
+ return;
}
spin_unlock_irq(&id_priv->lock);
}
static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr;
- struct cma_ndev_work *work;
+ struct cma_work *work;
dev_addr = &id_priv->id.route.addr.dev_addr;
if (!work)
return -ENOMEM;
- INIT_WORK(&work->work, cma_ndev_work_handler);
+ INIT_WORK(&work->work, cma_work_handler);
work->id = id_priv;
work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
cma_id_get(id_priv);
{
struct cma_device *cma_dev;
struct cma_dev_port_group *group;
- int gid_type = ib_cache_gid_parse_type_str(buf);
+ int gid_type;
ssize_t ret;
- if (gid_type < 0)
- return -EINVAL;
-
ret = cma_configfs_params_get(item, &cma_dev, &group);
if (ret)
return ret;
+ gid_type = ib_cache_gid_parse_type_str(buf);
+ if (gid_type < 0)
+ return -EINVAL;
+
ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
cma_configfs_params_put(cma_dev);
#include <linux/tracepoint.h>
#include <trace/events/rdma.h>
-/*
- * enum ib_cm_event_type, from include/rdma/ib_cm.h
- */
-#define IB_CM_EVENT_LIST \
- ib_cm_event(REQ_ERROR) \
- ib_cm_event(REQ_RECEIVED) \
- ib_cm_event(REP_ERROR) \
- ib_cm_event(REP_RECEIVED) \
- ib_cm_event(RTU_RECEIVED) \
- ib_cm_event(USER_ESTABLISHED) \
- ib_cm_event(DREQ_ERROR) \
- ib_cm_event(DREQ_RECEIVED) \
- ib_cm_event(DREP_RECEIVED) \
- ib_cm_event(TIMEWAIT_EXIT) \
- ib_cm_event(MRA_RECEIVED) \
- ib_cm_event(REJ_RECEIVED) \
- ib_cm_event(LAP_ERROR) \
- ib_cm_event(LAP_RECEIVED) \
- ib_cm_event(APR_RECEIVED) \
- ib_cm_event(SIDR_REQ_ERROR) \
- ib_cm_event(SIDR_REQ_RECEIVED) \
- ib_cm_event_end(SIDR_REP_RECEIVED)
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x);
-#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
-
-IB_CM_EVENT_LIST
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x) { IB_CM_##x, #x },
-#define ib_cm_event_end(x) { IB_CM_##x, #x }
-
-#define rdma_show_ib_cm_event(x) \
- __print_symbolic(x, IB_CM_EVENT_LIST)
-
DECLARE_EVENT_CLASS(cma_fsm_class,
TP_PROTO(
#include <rdma/ib_mad.h>
#include <rdma/restrack.h>
#include "mad_priv.h"
+#include "restrack.h"
/* Total number of ports combined across all struct ib_devices's */
#define RDMA_MAX_PORTS 8192
INIT_LIST_HEAD(&qp->rdma_mrs);
INIT_LIST_HEAD(&qp->sig_mrs);
+ rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
/*
* We don't track XRC QPs for now, because they don't have PD
* and more importantly they are created internaly by driver,
*/
is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT;
if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) {
- qp->res.type = RDMA_RESTRACK_QP;
- if (uobj)
- rdma_restrack_uadd(&qp->res);
- else
- rdma_restrack_kadd(&qp->res);
- } else
- qp->res.valid = false;
-
+ rdma_restrack_parent_name(&qp->res, &pd->res);
+ rdma_restrack_add(&qp->res);
+ }
return qp;
}
counter->device = dev;
counter->port = port;
- counter->res.type = RDMA_RESTRACK_COUNTER;
- counter->stats = dev->ops.counter_alloc_stats(counter);
+
+ rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
+ counter->stats = dev->ops.counter_alloc_stats(counter);
if (!counter->stats)
goto err_stats;
mutex_unlock(&port_counter->lock);
kfree(counter->stats);
err_stats:
+ rdma_restrack_put(&counter->res);
kfree(counter);
return NULL;
}
static void rdma_counter_res_add(struct rdma_counter *counter,
struct ib_qp *qp)
{
- if (rdma_is_kernel_res(&qp->res)) {
- rdma_restrack_set_task(&counter->res, qp->res.kern_name);
- rdma_restrack_kadd(&counter->res);
- } else {
- rdma_restrack_attach_task(&counter->res, qp->res.task);
- rdma_restrack_uadd(&counter->res);
- }
+ rdma_restrack_parent_name(&counter->res, &qp->res);
+ rdma_restrack_add(&counter->res);
}
static void counter_release(struct kref *kref)
}
/**
- * __ib_alloc_cq_user - allocate a completion queue
+ * __ib_alloc_cq allocate a completion queue
* @dev: device to allocate the CQ for
* @private: driver private data, accessible from cq->cq_context
* @nr_cqe: number of CQEs to allocate
* @comp_vector: HCA completion vectors for this CQ
* @poll_ctx: context to poll the CQ from.
* @caller: module owner name.
- * @udata: Valid user data or NULL for kernel object
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
* specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
-struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector,
- enum ib_poll_context poll_ctx,
- const char *caller, struct ib_udata *udata)
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+ int comp_vector, enum ib_poll_context poll_ctx,
+ const char *caller)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
if (!cq->wc)
goto out_free_cq;
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_set_task(&cq->res, caller);
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, caller);
ret = dev->ops.create_cq(cq, &cq_attr, NULL);
if (ret)
goto out_free_wc;
- rdma_restrack_kadd(&cq->res);
-
rdma_dim_init(cq);
switch (cq->poll_ctx) {
goto out_destroy_cq;
}
+ rdma_restrack_add(&cq->res);
trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
return cq;
out_destroy_cq:
rdma_dim_destroy(cq);
- rdma_restrack_del(&cq->res);
- cq->device->ops.destroy_cq(cq, udata);
+ cq->device->ops.destroy_cq(cq, NULL);
out_free_wc:
+ rdma_restrack_put(&cq->res);
kfree(cq->wc);
out_free_cq:
kfree(cq);
trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(__ib_alloc_cq_user);
+EXPORT_SYMBOL(__ib_alloc_cq);
/**
* __ib_alloc_cq_any - allocate a completion queue
atomic_inc_return(&counter) %
min_t(int, dev->num_comp_vectors, num_online_cpus());
- return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- caller, NULL);
+ return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+ caller);
}
EXPORT_SYMBOL(__ib_alloc_cq_any);
/**
- * ib_free_cq_user - free a completion queue
+ * ib_free_cq - free a completion queue
* @cq: completion queue to free.
- * @udata: User data or NULL for kernel object
*/
-void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
+void ib_free_cq(struct ib_cq *cq)
{
+ int ret;
+
if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
return;
if (WARN_ON_ONCE(cq->cqe_used))
rdma_dim_destroy(cq);
trace_cq_free(cq);
+ ret = cq->device->ops.destroy_cq(cq, NULL);
+ WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
rdma_restrack_del(&cq->res);
- cq->device->ops.destroy_cq(cq, udata);
kfree(cq->wc);
kfree(cq);
}
-EXPORT_SYMBOL(ib_free_cq_user);
+EXPORT_SYMBOL(ib_free_cq);
void ib_cq_pool_init(struct ib_device *dev)
{
SET_OBJ_SIZE(dev_ops, ib_ah);
SET_OBJ_SIZE(dev_ops, ib_counters);
SET_OBJ_SIZE(dev_ops, ib_cq);
+ SET_OBJ_SIZE(dev_ops, ib_mw);
SET_OBJ_SIZE(dev_ops, ib_pd);
+ SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table);
SET_OBJ_SIZE(dev_ops, ib_srq);
SET_OBJ_SIZE(dev_ops, ib_ucontext);
SET_OBJ_SIZE(dev_ops, ib_xrcd);
lockdep_assert_held(&ufile->hw_destroy_rwsem);
assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
- if (reason == RDMA_REMOVE_ABORT_HWOBJ) {
- reason = RDMA_REMOVE_ABORT;
- ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
- attrs);
- /*
- * Drivers are not permitted to ignore RDMA_REMOVE_ABORT, see
- * ib_is_destroy_retryable, cleanup_retryable == false here.
- */
- WARN_ON(ret);
- }
-
if (reason == RDMA_REMOVE_ABORT) {
WARN_ON(!list_empty(&uobj->list));
WARN_ON(!uobj->context);
bool hw_obj_valid)
{
struct ib_uverbs_file *ufile = uobj->ufile;
+ int ret;
+
+ if (hw_obj_valid) {
+ ret = uobj->uapi_object->type_class->destroy_hw(
+ uobj, RDMA_REMOVE_ABORT, attrs);
+ /*
+ * If the driver couldn't destroy the object then go ahead and
+ * commit it. Leaking objects that can't be destroyed is only
+ * done during FD close after the driver has a few more tries to
+ * destroy it.
+ */
+ if (WARN_ON(ret))
+ return rdma_alloc_commit_uobject(uobj, attrs);
+ }
- uverbs_destroy_uobject(uobj,
- hw_obj_valid ? RDMA_REMOVE_ABORT_HWOBJ :
- RDMA_REMOVE_ABORT,
- attrs);
+ uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
/* Matches the down_read in rdma_alloc_begin_uobject */
up_read(&ufile->hw_destroy_rwsem);
if (!ufile->ucontext)
goto done;
- ufile->ucontext->closing = true;
ufile->ucontext->cleanup_retryable = true;
while (!list_empty(&ufile->uobjects))
if (__uverbs_cleanup_ufile(ufile, reason)) {
/*
* No entry was cleaned-up successfully during this
- * iteration
+ * iteration. It is a driver bug to fail destruction.
*/
+ WARN_ON(!list_empty(&ufile->uobjects));
break;
}
}
EXPORT_SYMBOL(rdma_restrack_count);
-static void set_kern_name(struct rdma_restrack_entry *res)
-{
- struct ib_pd *pd;
-
- switch (res->type) {
- case RDMA_RESTRACK_QP:
- pd = container_of(res, struct ib_qp, res)->pd;
- if (!pd) {
- WARN_ONCE(true, "XRC QPs are not supported\n");
- /* Survive, despite the programmer's error */
- res->kern_name = " ";
- }
- break;
- case RDMA_RESTRACK_MR:
- pd = container_of(res, struct ib_mr, res)->pd;
- break;
- default:
- /* Other types set kern_name directly */
- pd = NULL;
- break;
- }
-
- if (pd)
- res->kern_name = pd->res.kern_name;
-}
-
static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
{
switch (res->type) {
}
}
-void rdma_restrack_set_task(struct rdma_restrack_entry *res,
- const char *caller)
+/**
+ * rdma_restrack_attach_task() - attach the task onto this resource,
+ * valid for user space restrack entries.
+ * @res: resource entry
+ * @task: the task to attach
+ */
+static void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
+ struct task_struct *task)
{
- if (caller) {
- res->kern_name = caller;
+ if (WARN_ON_ONCE(!task))
return;
- }
if (res->task)
put_task_struct(res->task);
- get_task_struct(current);
- res->task = current;
+ get_task_struct(task);
+ res->task = task;
+ res->user = true;
}
-EXPORT_SYMBOL(rdma_restrack_set_task);
/**
- * rdma_restrack_attach_task() - attach the task onto this resource
+ * rdma_restrack_set_name() - set the task for this resource
* @res: resource entry
- * @task: the task to attach, the current task will be used if it is NULL.
+ * @caller: kernel name, the current task will be used if the caller is NULL.
*/
-void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
- struct task_struct *task)
+void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller)
{
- if (res->task)
- put_task_struct(res->task);
- get_task_struct(task);
- res->task = task;
+ if (caller) {
+ res->kern_name = caller;
+ return;
+ }
+
+ rdma_restrack_attach_task(res, current);
+}
+EXPORT_SYMBOL(rdma_restrack_set_name);
+
+/**
+ * rdma_restrack_parent_name() - set the restrack name properties based
+ * on parent restrack
+ * @dst: destination resource entry
+ * @parent: parent resource entry
+ */
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+ const struct rdma_restrack_entry *parent)
+{
+ if (rdma_is_kernel_res(parent))
+ dst->kern_name = parent->kern_name;
+ else
+ rdma_restrack_attach_task(dst, parent->task);
+}
+EXPORT_SYMBOL(rdma_restrack_parent_name);
+
+/**
+ * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface
+ * to release memory in fully automatic way.
+ * @res - Entry to initialize
+ * @type - REstrack type
+ */
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+ enum rdma_restrack_type type)
+{
+ kref_init(&res->kref);
+ init_completion(&res->comp);
+ res->type = type;
}
+EXPORT_SYMBOL(rdma_restrack_new);
-static void rdma_restrack_add(struct rdma_restrack_entry *res)
+/**
+ * rdma_restrack_add() - add object to the reource tracking database
+ * @res: resource entry
+ */
+void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
struct rdma_restrack_root *rt;
rt = &dev->res[res->type];
- kref_init(&res->kref);
- init_completion(&res->comp);
if (res->type == RDMA_RESTRACK_QP) {
/* Special case to ensure that LQPN points to right QP */
struct ib_qp *qp = container_of(res, struct ib_qp, res);
if (!ret)
res->valid = true;
}
-
-/**
- * rdma_restrack_kadd() - add kernel object to the reource tracking database
- * @res: resource entry
- */
-void rdma_restrack_kadd(struct rdma_restrack_entry *res)
-{
- res->task = NULL;
- set_kern_name(res);
- res->user = false;
- rdma_restrack_add(res);
-}
-EXPORT_SYMBOL(rdma_restrack_kadd);
-
-/**
- * rdma_restrack_uadd() - add user object to the reource tracking database
- * @res: resource entry
- */
-void rdma_restrack_uadd(struct rdma_restrack_entry *res)
-{
- if ((res->type != RDMA_RESTRACK_CM_ID) &&
- (res->type != RDMA_RESTRACK_COUNTER))
- res->task = NULL;
-
- if (!res->task)
- rdma_restrack_set_task(res, NULL);
- res->kern_name = NULL;
-
- res->user = true;
- rdma_restrack_add(res);
-}
-EXPORT_SYMBOL(rdma_restrack_uadd);
+EXPORT_SYMBOL(rdma_restrack_add);
int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
{
struct rdma_restrack_entry *res;
res = container_of(kref, struct rdma_restrack_entry, kref);
+ if (res->task) {
+ put_task_struct(res->task);
+ res->task = NULL;
+ }
complete(&res->comp);
}
}
EXPORT_SYMBOL(rdma_restrack_put);
+/**
+ * rdma_restrack_del() - delete object from the reource tracking database
+ * @res: resource entry
+ */
void rdma_restrack_del(struct rdma_restrack_entry *res)
{
struct rdma_restrack_entry *old;
struct rdma_restrack_root *rt;
struct ib_device *dev;
- if (!res->valid)
- goto out;
+ if (!res->valid) {
+ if (res->task) {
+ put_task_struct(res->task);
+ res->task = NULL;
+ }
+ return;
+ }
dev = res_to_dev(res);
if (WARN_ON(!dev))
rt = &dev->res[res->type];
old = xa_erase(&rt->xa, res->id);
+ if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP)
+ return;
WARN_ON(old != res);
res->valid = false;
rdma_restrack_put(res);
wait_for_completion(&res->comp);
-
-out:
- if (res->task) {
- put_task_struct(res->task);
- res->task = NULL;
- }
}
EXPORT_SYMBOL(rdma_restrack_del);
int rdma_restrack_init(struct ib_device *dev);
void rdma_restrack_clean(struct ib_device *dev);
-void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
- struct task_struct *task);
+void rdma_restrack_add(struct rdma_restrack_entry *res);
+void rdma_restrack_del(struct rdma_restrack_entry *res);
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+ enum rdma_restrack_type type);
+void rdma_restrack_set_name(struct rdma_restrack_entry *res,
+ const char *caller);
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+ const struct rdma_restrack_entry *parent);
#endif /* _RDMA_CORE_RESTRACK_H_ */
struct gid_attr_group *gid_attr_group;
struct attribute_group gid_group;
struct attribute_group *pkey_group;
- struct attribute_group *pma_table;
+ const struct attribute_group *pma_table;
struct attribute_group *hw_stats_ag;
struct rdma_hw_stats *hw_stats;
u8 port_num;
gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
if (IS_ERR(gid_attr))
- return PTR_ERR(gid_attr);
+ /* -EINVAL is returned for user space compatibility reasons. */
+ return -EINVAL;
ret = print(gid_attr, buf);
rdma_put_gid_attr(gid_attr);
NULL
};
-static struct attribute_group pma_group = {
+static const struct attribute_group pma_group = {
.name = "counters",
.attrs = pma_attrs
};
-static struct attribute_group pma_group_ext = {
+static const struct attribute_group pma_group_ext = {
.name = "counters",
.attrs = pma_attrs_ext
};
-static struct attribute_group pma_group_noietf = {
+static const struct attribute_group pma_group_noietf = {
.name = "counters",
.attrs = pma_attrs_noietf
};
* Figure out which counter table to use depending on
* the device capabilities.
*/
-static struct attribute_group *get_counter_table(struct ib_device *dev,
- int port_num)
+static const struct attribute_group *get_counter_table(struct ib_device *dev,
+ int port_num)
{
struct ib_class_port_info cpi;
struct list_head ctx_list;
struct list_head event_list;
wait_queue_head_t poll_wait;
- struct workqueue_struct *close_wq;
};
struct ucma_context {
struct completion comp;
refcount_t ref;
int events_reported;
- int backlog;
+ atomic_t backlog;
struct ucma_file *file;
struct rdma_cm_id *cm_id;
u64 uid;
struct list_head list;
- struct list_head mc_list;
- /* mark that device is in process of destroying the internal HW
- * resources, protected by the ctx_table lock
- */
- int closing;
/* sync between removal event and id destroy, protected by file mut */
int destroying;
struct work_struct close_work;
u64 uid;
u8 join_state;
- struct list_head list;
struct sockaddr_storage addr;
};
struct ucma_event {
struct ucma_context *ctx;
+ struct ucma_context *listen_ctx;
struct ucma_multicast *mc;
struct list_head list;
- struct rdma_cm_id *cm_id;
struct rdma_ucm_event_resp resp;
- struct work_struct close_work;
};
static DEFINE_XARRAY_ALLOC(ctx_table);
static DEFINE_XARRAY_ALLOC(multicast_table);
static const struct file_operations ucma_fops;
+static int __destroy_id(struct ucma_context *ctx);
static inline struct ucma_context *_ucma_find_context(int id,
struct ucma_file *file)
ctx = xa_load(&ctx_table, id);
if (!ctx)
ctx = ERR_PTR(-ENOENT);
- else if (ctx->file != file || !ctx->cm_id)
+ else if (ctx->file != file)
ctx = ERR_PTR(-EINVAL);
return ctx;
}
xa_lock(&ctx_table);
ctx = _ucma_find_context(id, file);
- if (!IS_ERR(ctx)) {
- if (ctx->closing)
- ctx = ERR_PTR(-EIO);
- else
- refcount_inc(&ctx->ref);
- }
+ if (!IS_ERR(ctx))
+ if (!refcount_inc_not_zero(&ctx->ref))
+ ctx = ERR_PTR(-ENXIO);
xa_unlock(&ctx_table);
return ctx;
}
return ctx;
}
-static void ucma_close_event_id(struct work_struct *work)
-{
- struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work);
-
- rdma_destroy_id(uevent_close->cm_id);
- kfree(uevent_close);
-}
-
static void ucma_close_id(struct work_struct *work)
{
struct ucma_context *ctx = container_of(work, struct ucma_context, close_work);
wait_for_completion(&ctx->comp);
/* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id);
+
+ /*
+ * At this point ctx->ref is zero so the only place the ctx can be is in
+ * a uevent or in __destroy_id(). Since the former doesn't touch
+ * ctx->cm_id and the latter sync cancels this, there is no races with
+ * this store.
+ */
+ ctx->cm_id = NULL;
}
static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
INIT_WORK(&ctx->close_work, ucma_close_id);
refcount_set(&ctx->ref, 1);
init_completion(&ctx->comp);
- INIT_LIST_HEAD(&ctx->mc_list);
+ /* So list_del() will work if we don't do ucma_finish_ctx() */
+ INIT_LIST_HEAD(&ctx->list);
ctx->file = file;
mutex_init(&ctx->mutex);
- if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL))
- goto error;
-
- list_add_tail(&ctx->list, &file->ctx_list);
+ if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) {
+ kfree(ctx);
+ return NULL;
+ }
return ctx;
-
-error:
- kfree(ctx);
- return NULL;
}
-static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
+static void ucma_finish_ctx(struct ucma_context *ctx)
{
- struct ucma_multicast *mc;
-
- mc = kzalloc(sizeof(*mc), GFP_KERNEL);
- if (!mc)
- return NULL;
-
- mc->ctx = ctx;
- if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL))
- goto error;
-
- list_add_tail(&mc->list, &ctx->mc_list);
- return mc;
-
-error:
- kfree(mc);
- return NULL;
+ lockdep_assert_held(&ctx->file->mut);
+ list_add_tail(&ctx->list, &ctx->file->ctx_list);
+ xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL);
}
static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
dst->qkey = src->qkey;
}
-static void ucma_set_event_context(struct ucma_context *ctx,
- struct rdma_cm_event *event,
- struct ucma_event *uevent)
+static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
+ struct rdma_cm_event *event)
{
+ struct ucma_event *uevent;
+
+ uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
+ if (!uevent)
+ return NULL;
+
uevent->ctx = ctx;
switch (event->event) {
case RDMA_CM_EVENT_MULTICAST_JOIN:
uevent->resp.id = ctx->id;
break;
}
+ uevent->resp.event = event->event;
+ uevent->resp.status = event->status;
+ if (ctx->cm_id->qp_type == IB_QPT_UD)
+ ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud,
+ &event->param.ud);
+ else
+ ucma_copy_conn_event(&uevent->resp.param.conn,
+ &event->param.conn);
+
+ uevent->resp.ece.vendor_id = event->ece.vendor_id;
+ uevent->resp.ece.attr_mod = event->ece.attr_mod;
+ return uevent;
}
-/* Called with file->mut locked for the relevant context. */
-static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
+static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
{
- struct ucma_context *ctx = cm_id->context;
- struct ucma_event *con_req_eve;
- int event_found = 0;
+ struct ucma_context *listen_ctx = cm_id->context;
+ struct ucma_context *ctx;
+ struct ucma_event *uevent;
- if (ctx->destroying)
- return;
+ if (!atomic_add_unless(&listen_ctx->backlog, -1, 0))
+ return -ENOMEM;
+ ctx = ucma_alloc_ctx(listen_ctx->file);
+ if (!ctx)
+ goto err_backlog;
+ ctx->cm_id = cm_id;
- /* only if context is pointing to cm_id that it owns it and can be
- * queued to be closed, otherwise that cm_id is an inflight one that
- * is part of that context event list pending to be detached and
- * reattached to its new context as part of ucma_get_event,
- * handled separately below.
- */
- if (ctx->cm_id == cm_id) {
- xa_lock(&ctx_table);
- ctx->closing = 1;
- xa_unlock(&ctx_table);
- queue_work(ctx->file->close_wq, &ctx->close_work);
- return;
- }
+ uevent = ucma_create_uevent(listen_ctx, event);
+ if (!uevent)
+ goto err_alloc;
+ uevent->listen_ctx = listen_ctx;
+ uevent->resp.id = ctx->id;
- list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
- if (con_req_eve->cm_id == cm_id &&
- con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- list_del(&con_req_eve->list);
- INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
- queue_work(ctx->file->close_wq, &con_req_eve->close_work);
- event_found = 1;
- break;
- }
- }
- if (!event_found)
- pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
+ ctx->cm_id->context = ctx;
+
+ mutex_lock(&ctx->file->mut);
+ ucma_finish_ctx(ctx);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
+ return 0;
+
+err_alloc:
+ xa_erase(&ctx_table, ctx->id);
+ kfree(ctx);
+err_backlog:
+ atomic_inc(&listen_ctx->backlog);
+ /* Returning error causes the new ID to be destroyed */
+ return -ENOMEM;
}
static int ucma_event_handler(struct rdma_cm_id *cm_id,
{
struct ucma_event *uevent;
struct ucma_context *ctx = cm_id->context;
- int ret = 0;
-
- uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
- if (!uevent)
- return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
- mutex_lock(&ctx->file->mut);
- uevent->cm_id = cm_id;
- ucma_set_event_context(ctx, event, uevent);
- uevent->resp.event = event->event;
- uevent->resp.status = event->status;
- if (cm_id->qp_type == IB_QPT_UD)
- ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud,
- &event->param.ud);
- else
- ucma_copy_conn_event(&uevent->resp.param.conn,
- &event->param.conn);
-
- uevent->resp.ece.vendor_id = event->ece.vendor_id;
- uevent->resp.ece.attr_mod = event->ece.attr_mod;
-
- if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- if (!ctx->backlog) {
- ret = -ENOMEM;
- kfree(uevent);
- goto out;
- }
- ctx->backlog--;
- } else if (!ctx->uid || ctx->cm_id != cm_id) {
- /*
- * We ignore events for new connections until userspace has set
- * their context. This can only happen if an error occurs on a
- * new connection before the user accepts it. This is okay,
- * since the accept will just fail later. However, we do need
- * to release the underlying HW resources in case of a device
- * removal event.
- */
- if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
- ucma_removal_event_handler(cm_id);
+ if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ return ucma_connect_event_handler(cm_id, event);
- kfree(uevent);
- goto out;
+ /*
+ * We ignore events for new connections until userspace has set their
+ * context. This can only happen if an error occurs on a new connection
+ * before the user accepts it. This is okay, since the accept will just
+ * fail later. However, we do need to release the underlying HW
+ * resources in case of a device removal event.
+ */
+ if (ctx->uid) {
+ uevent = ucma_create_uevent(ctx, event);
+ if (!uevent)
+ return 0;
+
+ mutex_lock(&ctx->file->mut);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
}
- list_add_tail(&uevent->list, &ctx->file->event_list);
- wake_up_interruptible(&ctx->file->poll_wait);
- if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
- ucma_removal_event_handler(cm_id);
-out:
- mutex_unlock(&ctx->file->mut);
- return ret;
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying)
+ queue_work(system_unbound_wq, &ctx->close_work);
+ return 0;
}
static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
- struct ucma_context *ctx;
struct rdma_ucm_get_event cmd;
struct ucma_event *uevent;
- int ret = 0;
/*
* Old 32 bit user space does not send the 4 byte padding in the
mutex_lock(&file->mut);
}
- uevent = list_entry(file->event_list.next, struct ucma_event, list);
-
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- ctx = ucma_alloc_ctx(file);
- if (!ctx) {
- ret = -ENOMEM;
- goto done;
- }
- uevent->ctx->backlog++;
- ctx->cm_id = uevent->cm_id;
- ctx->cm_id->context = ctx;
- uevent->resp.id = ctx->id;
- }
+ uevent = list_first_entry(&file->event_list, struct ucma_event, list);
if (copy_to_user(u64_to_user_ptr(cmd.response),
&uevent->resp,
min_t(size_t, out_len, sizeof(uevent->resp)))) {
- ret = -EFAULT;
- goto done;
+ mutex_unlock(&file->mut);
+ return -EFAULT;
}
list_del(&uevent->list);
uevent->ctx->events_reported++;
if (uevent->mc)
uevent->mc->events_reported++;
- kfree(uevent);
-done:
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ atomic_inc(&uevent->ctx->backlog);
mutex_unlock(&file->mut);
- return ret;
+
+ kfree(uevent);
+ return 0;
}
static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
if (ret)
return ret;
- mutex_lock(&file->mut);
ctx = ucma_alloc_ctx(file);
- mutex_unlock(&file->mut);
if (!ctx)
return -ENOMEM;
ctx->uid = cmd.uid;
- cm_id = __rdma_create_id(current->nsproxy->net_ns,
- ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
+ cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
goto err1;
}
+ ctx->cm_id = cm_id;
resp.id = ctx->id;
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
- ret = -EFAULT;
- goto err2;
+ xa_erase(&ctx_table, ctx->id);
+ __destroy_id(ctx);
+ return -EFAULT;
}
- ctx->cm_id = cm_id;
+ mutex_lock(&file->mut);
+ ucma_finish_ctx(ctx);
+ mutex_unlock(&file->mut);
return 0;
-err2:
- rdma_destroy_id(cm_id);
err1:
xa_erase(&ctx_table, ctx->id);
- mutex_lock(&file->mut);
- list_del(&ctx->list);
- mutex_unlock(&file->mut);
kfree(ctx);
return ret;
}
static void ucma_cleanup_multicast(struct ucma_context *ctx)
{
- struct ucma_multicast *mc, *tmp;
+ struct ucma_multicast *mc;
+ unsigned long index;
- mutex_lock(&ctx->file->mut);
- list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
- list_del(&mc->list);
- xa_erase(&multicast_table, mc->id);
+ xa_for_each(&multicast_table, index, mc) {
+ if (mc->ctx != ctx)
+ continue;
+ /*
+ * At this point mc->ctx->ref is 0 so the mc cannot leave the
+ * lock on the reader and this is enough serialization
+ */
+ xa_erase(&multicast_table, index);
kfree(mc);
}
- mutex_unlock(&ctx->file->mut);
}
static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
{
struct ucma_event *uevent, *tmp;
+ rdma_lock_handler(mc->ctx->cm_id);
+ mutex_lock(&mc->ctx->file->mut);
list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
if (uevent->mc != mc)
continue;
list_del(&uevent->list);
kfree(uevent);
}
+ mutex_unlock(&mc->ctx->file->mut);
+ rdma_unlock_handler(mc->ctx->cm_id);
}
/*
* this point, no new events will be reported from the hardware. However, we
* still need to cleanup the UCMA context for this ID. Specifically, there
* might be events that have not yet been consumed by the user space software.
- * These might include pending connect requests which we have not completed
- * processing. We cannot call rdma_destroy_id while holding the lock of the
- * context (file->mut), as it might cause a deadlock. We therefore extract all
- * relevant events from the context pending events list while holding the
* mutex. After that we release them as needed.
*/
static int ucma_free_ctx(struct ucma_context *ctx)
struct ucma_event *uevent, *tmp;
LIST_HEAD(list);
-
ucma_cleanup_multicast(ctx);
/* Cleanup events not yet reported to the user. */
mutex_lock(&ctx->file->mut);
list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
- if (uevent->ctx == ctx)
+ if (uevent->ctx == ctx || uevent->listen_ctx == ctx)
list_move_tail(&uevent->list, &list);
}
list_del(&ctx->list);
+ events_reported = ctx->events_reported;
mutex_unlock(&ctx->file->mut);
+ /*
+ * If this was a listening ID then any connections spawned from it
+ * that have not been delivered to userspace are cleaned up too.
+ * Must be done outside any locks.
+ */
list_for_each_entry_safe(uevent, tmp, &list, list) {
list_del(&uevent->list);
if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
- rdma_destroy_id(uevent->cm_id);
+ __destroy_id(uevent->ctx);
kfree(uevent);
}
- events_reported = ctx->events_reported;
mutex_destroy(&ctx->mutex);
kfree(ctx);
return events_reported;
}
+static int __destroy_id(struct ucma_context *ctx)
+{
+ /*
+ * If the refcount is already 0 then ucma_close_id() has already
+ * destroyed the cm_id, otherwise holding the refcount keeps cm_id
+ * valid. Prevent queue_work() from being called.
+ */
+ if (refcount_inc_not_zero(&ctx->ref)) {
+ rdma_lock_handler(ctx->cm_id);
+ ctx->destroying = 1;
+ rdma_unlock_handler(ctx->cm_id);
+ ucma_put_ctx(ctx);
+ }
+
+ cancel_work_sync(&ctx->close_work);
+ /* At this point it's guaranteed that there is no inflight closing task */
+ if (ctx->cm_id)
+ ucma_close_id(&ctx->close_work);
+ return ucma_free_ctx(ctx);
+}
+
static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- mutex_lock(&ctx->file->mut);
- ctx->destroying = 1;
- mutex_unlock(&ctx->file->mut);
-
- flush_workqueue(ctx->file->close_wq);
- /* At this point it's guaranteed that there is no inflight
- * closing task */
- xa_lock(&ctx_table);
- if (!ctx->closing) {
- xa_unlock(&ctx_table);
- ucma_put_ctx(ctx);
- wait_for_completion(&ctx->comp);
- rdma_destroy_id(ctx->cm_id);
- } else {
- xa_unlock(&ctx_table);
- }
-
- resp.events_reported = ucma_free_ctx(ctx);
+ resp.events_reported = __destroy_id(ctx);
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
- cmd.backlog : max_backlog;
+ if (cmd.backlog <= 0 || cmd.backlog > max_backlog)
+ cmd.backlog = max_backlog;
+ atomic_set(&ctx->backlog, cmd.backlog);
+
mutex_lock(&ctx->mutex);
- ret = rdma_listen(ctx->cm_id, ctx->backlog);
+ ret = rdma_listen(ctx->cm_id, cmd.backlog);
mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
if (cmd.conn_param.valid) {
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
- mutex_lock(&file->mut);
mutex_lock(&ctx->mutex);
- ret = __rdma_accept_ece(ctx->cm_id, &conn_param, NULL, &ece);
- mutex_unlock(&ctx->mutex);
- if (!ret)
+ rdma_lock_handler(ctx->cm_id);
+ ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece);
+ if (!ret) {
+ /* The uid must be set atomically with the handler */
ctx->uid = cmd.uid;
- mutex_unlock(&file->mut);
+ }
+ rdma_unlock_handler(ctx->cm_id);
+ mutex_unlock(&ctx->mutex);
} else {
mutex_lock(&ctx->mutex);
- ret = __rdma_accept_ece(ctx->cm_id, NULL, NULL, &ece);
+ rdma_lock_handler(ctx->cm_id);
+ ret = rdma_accept_ece(ctx->cm_id, NULL, &ece);
+ rdma_unlock_handler(ctx->cm_id);
mutex_unlock(&ctx->mutex);
}
ucma_put_ctx(ctx);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- mutex_lock(&file->mut);
- mc = ucma_alloc_multicast(ctx);
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc) {
ret = -ENOMEM;
- goto err1;
+ goto err_put_ctx;
}
+
+ mc->ctx = ctx;
mc->join_state = join_state;
mc->uid = cmd->uid;
memcpy(&mc->addr, addr, cmd->addr_size);
+
+ if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
+ GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_free_mc;
+ }
+
mutex_lock(&ctx->mutex);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
join_state, mc);
mutex_unlock(&ctx->mutex);
if (ret)
- goto err2;
+ goto err_xa_erase;
resp.id = mc->id;
if (copy_to_user(u64_to_user_ptr(cmd->response),
&resp, sizeof(resp))) {
ret = -EFAULT;
- goto err3;
+ goto err_leave_multicast;
}
xa_store(&multicast_table, mc->id, mc, 0);
- mutex_unlock(&file->mut);
ucma_put_ctx(ctx);
return 0;
-err3:
+err_leave_multicast:
+ mutex_lock(&ctx->mutex);
rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
+ mutex_unlock(&ctx->mutex);
ucma_cleanup_mc_events(mc);
-err2:
+err_xa_erase:
xa_erase(&multicast_table, mc->id);
- list_del(&mc->list);
+err_free_mc:
kfree(mc);
-err1:
- mutex_unlock(&file->mut);
+err_put_ctx:
ucma_put_ctx(ctx);
return ret;
}
mc = xa_load(&multicast_table, cmd.id);
if (!mc)
mc = ERR_PTR(-ENOENT);
- else if (mc->ctx->file != file)
+ else if (READ_ONCE(mc->ctx->file) != file)
mc = ERR_PTR(-EINVAL);
else if (!refcount_inc_not_zero(&mc->ctx->ref))
mc = ERR_PTR(-ENXIO);
rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
mutex_unlock(&mc->ctx->mutex);
- mutex_lock(&mc->ctx->file->mut);
ucma_cleanup_mc_events(mc);
- list_del(&mc->list);
- mutex_unlock(&mc->ctx->file->mut);
ucma_put_ctx(mc->ctx);
resp.events_reported = mc->events_reported;
return ret;
}
-static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
- /* Acquire mutex's based on pointer comparison to prevent deadlock. */
- if (file1 < file2) {
- mutex_lock(&file1->mut);
- mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING);
- } else {
- mutex_lock(&file2->mut);
- mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING);
- }
-}
-
-static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
- if (file1 < file2) {
- mutex_unlock(&file2->mut);
- mutex_unlock(&file1->mut);
- } else {
- mutex_unlock(&file1->mut);
- mutex_unlock(&file2->mut);
- }
-}
-
-static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
-{
- struct ucma_event *uevent, *tmp;
-
- list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
- if (uevent->ctx == ctx)
- list_move_tail(&uevent->list, &file->event_list);
-}
-
static ssize_t ucma_migrate_id(struct ucma_file *new_file,
const char __user *inbuf,
int in_len, int out_len)
{
struct rdma_ucm_migrate_id cmd;
struct rdma_ucm_migrate_resp resp;
+ struct ucma_event *uevent, *tmp;
struct ucma_context *ctx;
+ LIST_HEAD(event_list);
struct fd f;
struct ucma_file *cur_file;
int ret = 0;
ret = -EINVAL;
goto file_put;
}
+ cur_file = f.file->private_data;
/* Validate current fd and prevent destruction of id. */
- ctx = ucma_get_ctx(f.file->private_data, cmd.id);
+ ctx = ucma_get_ctx(cur_file, cmd.id);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);
goto file_put;
}
- cur_file = ctx->file;
- if (cur_file == new_file) {
- resp.events_reported = ctx->events_reported;
- goto response;
- }
-
+ rdma_lock_handler(ctx->cm_id);
/*
- * Migrate events between fd's, maintaining order, and avoiding new
- * events being added before existing events.
+ * ctx->file can only be changed under the handler & xa_lock. xa_load()
+ * must be checked again to ensure the ctx hasn't begun destruction
+ * since the ucma_get_ctx().
*/
- ucma_lock_files(cur_file, new_file);
xa_lock(&ctx_table);
-
- list_move_tail(&ctx->list, &new_file->ctx_list);
- ucma_move_events(ctx, new_file);
+ if (_ucma_find_context(cmd.id, cur_file) != ctx) {
+ xa_unlock(&ctx_table);
+ ret = -ENOENT;
+ goto err_unlock;
+ }
ctx->file = new_file;
+ xa_unlock(&ctx_table);
+
+ mutex_lock(&cur_file->mut);
+ list_del(&ctx->list);
+ /*
+ * At this point lock_handler() prevents addition of new uevents for
+ * this ctx.
+ */
+ list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list)
+ if (uevent->ctx == ctx)
+ list_move_tail(&uevent->list, &event_list);
resp.events_reported = ctx->events_reported;
+ mutex_unlock(&cur_file->mut);
- xa_unlock(&ctx_table);
- ucma_unlock_files(cur_file, new_file);
+ mutex_lock(&new_file->mut);
+ list_add_tail(&ctx->list, &new_file->ctx_list);
+ list_splice_tail(&event_list, &new_file->event_list);
+ mutex_unlock(&new_file->mut);
-response:
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
+err_unlock:
+ rdma_unlock_handler(ctx->cm_id);
ucma_put_ctx(ctx);
file_put:
fdput(f);
if (!file)
return -ENOMEM;
- file->close_wq = alloc_ordered_workqueue("ucma_close_id",
- WQ_MEM_RECLAIM);
- if (!file->close_wq) {
- kfree(file);
- return -ENOMEM;
- }
-
INIT_LIST_HEAD(&file->event_list);
INIT_LIST_HEAD(&file->ctx_list);
init_waitqueue_head(&file->poll_wait);
static int ucma_close(struct inode *inode, struct file *filp)
{
struct ucma_file *file = filp->private_data;
- struct ucma_context *ctx, *tmp;
- mutex_lock(&file->mut);
- list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
- ctx->destroying = 1;
- mutex_unlock(&file->mut);
+ /*
+ * All paths that touch ctx_list or ctx_list starting from write() are
+ * prevented by this being a FD release function. The list_add_tail() in
+ * ucma_connect_event_handler() can run concurrently, however it only
+ * adds to the list *after* a listening ID. By only reading the first of
+ * the list, and relying on __destroy_id() to block
+ * ucma_connect_event_handler(), no additional locking is needed.
+ */
+ while (!list_empty(&file->ctx_list)) {
+ struct ucma_context *ctx = list_first_entry(
+ &file->ctx_list, struct ucma_context, list);
xa_erase(&ctx_table, ctx->id);
- flush_workqueue(file->close_wq);
- /* At that step once ctx was marked as destroying and workqueue
- * was flushed we are safe from any inflights handlers that
- * might put other closing task.
- */
- xa_lock(&ctx_table);
- if (!ctx->closing) {
- xa_unlock(&ctx_table);
- ucma_put_ctx(ctx);
- wait_for_completion(&ctx->comp);
- /* rdma_destroy_id ensures that no event handlers are
- * inflight for that id before releasing it.
- */
- rdma_destroy_id(ctx->cm_id);
- } else {
- xa_unlock(&ctx_table);
- }
-
- ucma_free_ctx(ctx);
- mutex_lock(&file->mut);
+ __destroy_id(ctx);
}
- mutex_unlock(&file->mut);
- destroy_workqueue(file->close_wq);
kfree(file);
return 0;
}
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/count_zeros.h>
#include <rdma/ib_umem_odp.h>
#include "uverbs.h"
unsigned long virt)
{
struct scatterlist *sg;
- unsigned int best_pg_bit;
unsigned long va, pgoff;
dma_addr_t mask;
int i;
+ /* rdma_for_each_block() has a bug if the page size is smaller than the
+ * page size used to build the umem. For now prevent smaller page sizes
+ * from being returned.
+ */
+ pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
+
/* At minimum, drivers must support PAGE_SIZE or smaller */
if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0))))
return 0;
- va = virt;
- /* max page size not to exceed MR length */
- mask = roundup_pow_of_two(umem->length);
+ umem->iova = va = virt;
+ /* The best result is the smallest page size that results in the minimum
+ * number of required pages. Compute the largest page size that could
+ * work based on VA address bits that don't change.
+ */
+ mask = pgsz_bitmap &
+ GENMASK(BITS_PER_LONG - 1,
+ bits_per((umem->length - 1 + virt) ^ virt));
/* offset into first SGL */
pgoff = umem->address & ~PAGE_MASK;
mask |= va;
pgoff = 0;
}
- best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
- return BIT_ULL(best_pg_bit);
+ /* The mask accumulates 1's in each position where the VA and physical
+ * address differ, thus the length of trailing 0 is the largest page
+ * size that can pass the VA through to the physical.
+ */
+ if (mask)
+ pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
+ return rounddown_pow_of_two(pgsz_bitmap);
}
EXPORT_SYMBOL(ib_umem_find_best_pgsz);
umem->ibdev = device;
umem->length = size;
umem->address = addr;
+ /*
+ * Drivers should call ib_umem_find_best_pgsz() to set the iova
+ * correctly.
+ */
+ umem->iova = addr;
umem->writable = ib_access_writable(access);
umem->owning_mm = mm = current->mm;
mmgrab(mm);
}
EXPORT_SYMBOL(ib_umem_release);
-int ib_umem_page_count(struct ib_umem *umem)
-{
- int i, n = 0;
- struct scatterlist *sg;
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
- n += sg_dma_len(sg) >> PAGE_SHIFT;
-
- return n;
-}
-EXPORT_SYMBOL(ib_umem_page_count);
-
/*
* Copy from the given ib_umem's pages to the given buffer.
*
#include <linux/vmalloc.h>
#include <linux/hugetlb.h>
#include <linux/interval_tree.h>
+#include <linux/hmm.h>
#include <linux/pagemap.h>
#include <rdma/ib_verbs.h>
size_t page_size = 1UL << umem_odp->page_shift;
unsigned long start;
unsigned long end;
- size_t pages;
+ size_t ndmas, npfns;
start = ALIGN_DOWN(umem_odp->umem.address, page_size);
if (check_add_overflow(umem_odp->umem.address,
if (unlikely(end < page_size))
return -EOVERFLOW;
- pages = (end - start) >> umem_odp->page_shift;
- if (!pages)
+ ndmas = (end - start) >> umem_odp->page_shift;
+ if (!ndmas)
return -EINVAL;
- umem_odp->page_list = kvcalloc(
- pages, sizeof(*umem_odp->page_list), GFP_KERNEL);
- if (!umem_odp->page_list)
+ npfns = (end - start) >> PAGE_SHIFT;
+ umem_odp->pfn_list = kvcalloc(
+ npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL);
+ if (!umem_odp->pfn_list)
return -ENOMEM;
umem_odp->dma_list = kvcalloc(
- pages, sizeof(*umem_odp->dma_list), GFP_KERNEL);
+ ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL);
if (!umem_odp->dma_list) {
ret = -ENOMEM;
- goto out_page_list;
+ goto out_pfn_list;
}
ret = mmu_interval_notifier_insert(&umem_odp->notifier,
out_dma_list:
kvfree(umem_odp->dma_list);
-out_page_list:
- kvfree(umem_odp->page_list);
+out_pfn_list:
+ kvfree(umem_odp->pfn_list);
return ret;
}
mutex_unlock(&umem_odp->umem_mutex);
mmu_interval_notifier_remove(&umem_odp->notifier);
kvfree(umem_odp->dma_list);
- kvfree(umem_odp->page_list);
+ kvfree(umem_odp->pfn_list);
}
put_pid(umem_odp->tgid);
kfree(umem_odp);
* Map for DMA and insert a single page into the on-demand paging page tables.
*
* @umem: the umem to insert the page to.
- * @page_index: index in the umem to add the page to.
+ * @dma_index: index in the umem to add the dma to.
* @page: the page struct to map and add.
* @access_mask: access permissions needed for this page.
* @current_seq: sequence number for synchronization with invalidations.
* the sequence number is taken from
* umem_odp->notifiers_seq.
*
- * The function returns -EFAULT if the DMA mapping operation fails. It returns
- * -EAGAIN if a concurrent invalidation prevents us from updating the page.
+ * The function returns -EFAULT if the DMA mapping operation fails.
*
- * The page is released via put_page even if the operation failed. For on-demand
- * pinning, the page is released whenever it isn't stored in the umem.
*/
static int ib_umem_odp_map_dma_single_page(
struct ib_umem_odp *umem_odp,
- unsigned int page_index,
+ unsigned int dma_index,
struct page *page,
- u64 access_mask,
- unsigned long current_seq)
+ u64 access_mask)
{
struct ib_device *dev = umem_odp->umem.ibdev;
- dma_addr_t dma_addr;
- int ret = 0;
+ dma_addr_t *dma_addr = &umem_odp->dma_list[dma_index];
- if (mmu_interval_check_retry(&umem_odp->notifier, current_seq)) {
- ret = -EAGAIN;
- goto out;
- }
- if (!(umem_odp->dma_list[page_index])) {
- dma_addr =
- ib_dma_map_page(dev, page, 0, BIT(umem_odp->page_shift),
- DMA_BIDIRECTIONAL);
- if (ib_dma_mapping_error(dev, dma_addr)) {
- ret = -EFAULT;
- goto out;
- }
- umem_odp->dma_list[page_index] = dma_addr | access_mask;
- umem_odp->page_list[page_index] = page;
- umem_odp->npages++;
- } else if (umem_odp->page_list[page_index] == page) {
- umem_odp->dma_list[page_index] |= access_mask;
- } else {
+ if (*dma_addr) {
/*
- * This is a race here where we could have done:
- *
- * CPU0 CPU1
- * get_user_pages()
- * invalidate()
- * page_fault()
- * mutex_lock(umem_mutex)
- * page from GUP != page in ODP
- *
- * It should be prevented by the retry test above as reading
- * the seq number should be reliable under the
- * umem_mutex. Thus something is really not working right if
- * things get here.
+ * If the page is already dma mapped it means it went through
+ * a non-invalidating trasition, like read-only to writable.
+ * Resync the flags.
*/
- WARN(true,
- "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
- umem_odp->page_list[page_index], page);
- ret = -EAGAIN;
+ *dma_addr = (*dma_addr & ODP_DMA_ADDR_MASK) | access_mask;
+ return 0;
}
-out:
- put_page(page);
- return ret;
+ *dma_addr = ib_dma_map_page(dev, page, 0, 1 << umem_odp->page_shift,
+ DMA_BIDIRECTIONAL);
+ if (ib_dma_mapping_error(dev, *dma_addr)) {
+ *dma_addr = 0;
+ return -EFAULT;
+ }
+ umem_odp->npages++;
+ *dma_addr |= access_mask;
+ return 0;
}
/**
- * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR.
+ * ib_umem_odp_map_dma_and_lock - DMA map userspace memory in an ODP MR and lock it.
*
- * Pins the range of pages passed in the argument, and maps them to
- * DMA addresses. The DMA addresses of the mapped pages is updated in
- * umem_odp->dma_list.
+ * Maps the range passed in the argument to DMA addresses.
+ * The DMA addresses of the mapped pages is updated in umem_odp->dma_list.
+ * Upon success the ODP MR will be locked to let caller complete its device
+ * page table update.
*
* Returns the number of pages mapped in success, negative error code
* for failure.
- * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
- * the function from completing its task.
- * An -ENOENT error code indicates that userspace process is being terminated
- * and mm was already destroyed.
* @umem_odp: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
* the return value.
* @access_mask: bit mask of the requested access permissions for the given
* range.
- * @current_seq: the MMU notifiers sequance value for synchronization with
- * invalidations. the sequance number is read from
- * umem_odp->notifiers_seq before calling this function
+ * @fault: is faulting required for the given range
*/
-int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
- u64 bcnt, u64 access_mask,
- unsigned long current_seq)
+int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
+ u64 bcnt, u64 access_mask, bool fault)
+ __acquires(&umem_odp->umem_mutex)
{
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
- struct page **local_page_list = NULL;
- u64 page_mask, off;
- int j, k, ret = 0, start_idx, npages = 0;
- unsigned int flags = 0, page_shift;
- phys_addr_t p = 0;
+ int pfn_index, dma_index, ret = 0, start_idx;
+ unsigned int page_shift, hmm_order, pfn_start_idx;
+ unsigned long num_pfns, current_seq;
+ struct hmm_range range = {};
+ unsigned long timeout;
if (access_mask == 0)
return -EINVAL;
user_virt + bcnt > ib_umem_end(umem_odp))
return -EFAULT;
- local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
- if (!local_page_list)
- return -ENOMEM;
-
page_shift = umem_odp->page_shift;
- page_mask = ~(BIT(page_shift) - 1);
- off = user_virt & (~page_mask);
- user_virt = user_virt & page_mask;
- bcnt += off; /* Charge for the first page offset as well. */
/*
* owning_process is allowed to be NULL, this means somehow the mm is
goto out_put_task;
}
- if (access_mask & ODP_WRITE_ALLOWED_BIT)
- flags |= FOLL_WRITE;
+ range.notifier = &umem_odp->notifier;
+ range.start = ALIGN_DOWN(user_virt, 1UL << page_shift);
+ range.end = ALIGN(user_virt + bcnt, 1UL << page_shift);
+ pfn_start_idx = (range.start - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
+ num_pfns = (range.end - range.start) >> PAGE_SHIFT;
+ if (fault) {
+ range.default_flags = HMM_PFN_REQ_FAULT;
- start_idx = (user_virt - ib_umem_start(umem_odp)) >> page_shift;
- k = start_idx;
+ if (access_mask & ODP_WRITE_ALLOWED_BIT)
+ range.default_flags |= HMM_PFN_REQ_WRITE;
+ }
- while (bcnt > 0) {
- const size_t gup_num_pages = min_t(size_t,
- ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
- PAGE_SIZE / sizeof(struct page *));
+ range.hmm_pfns = &(umem_odp->pfn_list[pfn_start_idx]);
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
- mmap_read_lock(owning_mm);
- /*
- * Note: this might result in redundent page getting. We can
- * avoid this by checking dma_list to be 0 before calling
- * get_user_pages. However, this make the code much more
- * complex (and doesn't gain us much performance in most use
- * cases).
- */
- npages = get_user_pages_remote(owning_mm,
- user_virt, gup_num_pages,
- flags, local_page_list, NULL, NULL);
- mmap_read_unlock(owning_mm);
-
- if (npages < 0) {
- if (npages != -EAGAIN)
- pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
- else
- pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
- break;
- }
+retry:
+ current_seq = range.notifier_seq =
+ mmu_interval_read_begin(&umem_odp->notifier);
- bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
- mutex_lock(&umem_odp->umem_mutex);
- for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
- if (user_virt & ~page_mask) {
- p += PAGE_SIZE;
- if (page_to_phys(local_page_list[j]) != p) {
- ret = -EFAULT;
- break;
- }
- put_page(local_page_list[j]);
- continue;
- }
+ mmap_read_lock(owning_mm);
+ ret = hmm_range_fault(&range);
+ mmap_read_unlock(owning_mm);
+ if (unlikely(ret)) {
+ if (ret == -EBUSY && !time_after(jiffies, timeout))
+ goto retry;
+ goto out_put_mm;
+ }
- ret = ib_umem_odp_map_dma_single_page(
- umem_odp, k, local_page_list[j],
- access_mask, current_seq);
- if (ret < 0) {
- if (ret != -EAGAIN)
- pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
- else
- pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
- break;
- }
+ start_idx = (range.start - ib_umem_start(umem_odp)) >> page_shift;
+ dma_index = start_idx;
- p = page_to_phys(local_page_list[j]);
- k++;
- }
+ mutex_lock(&umem_odp->umem_mutex);
+ if (mmu_interval_read_retry(&umem_odp->notifier, current_seq)) {
mutex_unlock(&umem_odp->umem_mutex);
+ goto retry;
+ }
- if (ret < 0) {
+ for (pfn_index = 0; pfn_index < num_pfns;
+ pfn_index += 1 << (page_shift - PAGE_SHIFT), dma_index++) {
+
+ if (fault) {
/*
- * Release pages, remembering that the first page
- * to hit an error was already released by
- * ib_umem_odp_map_dma_single_page().
+ * Since we asked for hmm_range_fault() to populate
+ * pages it shouldn't return an error entry on success.
*/
- if (npages - (j + 1) > 0)
- release_pages(&local_page_list[j+1],
- npages - (j + 1));
+ WARN_ON(range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
+ WARN_ON(!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
+ } else {
+ if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID)) {
+ WARN_ON(umem_odp->dma_list[dma_index]);
+ continue;
+ }
+ access_mask = ODP_READ_ALLOWED_BIT;
+ if (range.hmm_pfns[pfn_index] & HMM_PFN_WRITE)
+ access_mask |= ODP_WRITE_ALLOWED_BIT;
+ }
+
+ hmm_order = hmm_pfn_to_map_order(range.hmm_pfns[pfn_index]);
+ /* If a hugepage was detected and ODP wasn't set for, the umem
+ * page_shift will be used, the opposite case is an error.
+ */
+ if (hmm_order + PAGE_SHIFT < page_shift) {
+ ret = -EINVAL;
+ ibdev_dbg(umem_odp->umem.ibdev,
+ "%s: un-expected hmm_order %d, page_shift %d\n",
+ __func__, hmm_order, page_shift);
break;
}
- }
- if (ret >= 0) {
- if (npages < 0 && k == start_idx)
- ret = npages;
- else
- ret = k - start_idx;
+ ret = ib_umem_odp_map_dma_single_page(
+ umem_odp, dma_index, hmm_pfn_to_page(range.hmm_pfns[pfn_index]),
+ access_mask);
+ if (ret < 0) {
+ ibdev_dbg(umem_odp->umem.ibdev,
+ "ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
+ break;
+ }
}
+ /* upon sucesss lock should stay on hold for the callee */
+ if (!ret)
+ ret = dma_index - start_idx;
+ else
+ mutex_unlock(&umem_odp->umem_mutex);
+out_put_mm:
mmput(owning_mm);
out_put_task:
if (owning_process)
put_task_struct(owning_process);
- free_page((unsigned long)local_page_list);
return ret;
}
-EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
+EXPORT_SYMBOL(ib_umem_odp_map_dma_and_lock);
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 bound)
{
+ dma_addr_t dma_addr;
+ dma_addr_t dma;
int idx;
u64 addr;
struct ib_device *dev = umem_odp->umem.ibdev;
virt = max_t(u64, virt, ib_umem_start(umem_odp));
bound = min_t(u64, bound, ib_umem_end(umem_odp));
- /* Note that during the run of this function, the
- * notifiers_count of the MR is > 0, preventing any racing
- * faults from completion. We might be racing with other
- * invalidations, so we must make sure we free each page only
- * once. */
for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
- if (umem_odp->page_list[idx]) {
- struct page *page = umem_odp->page_list[idx];
- dma_addr_t dma = umem_odp->dma_list[idx];
- dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
+ dma = umem_odp->dma_list[idx];
- WARN_ON(!dma_addr);
+ /* The access flags guaranteed a valid DMA address in case was NULL */
+ if (dma) {
+ unsigned long pfn_idx = (addr - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
+ struct page *page = hmm_pfn_to_page(umem_odp->pfn_list[pfn_idx]);
+ dma_addr = dma & ODP_DMA_ADDR_MASK;
ib_dma_unmap_page(dev, dma_addr,
BIT(umem_odp->page_shift),
DMA_BIDIRECTIONAL);
*/
set_page_dirty(head_page);
}
- umem_odp->page_list[idx] = NULL;
umem_odp->dma_list[idx] = 0;
umem_odp->npages--;
}
if (!ucontext)
return -ENOMEM;
- ucontext->res.type = RDMA_RESTRACK_CTX;
ucontext->device = ib_dev;
ucontext->ufile = ufile;
xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC);
+
+ rdma_restrack_new(&ucontext->res, RDMA_RESTRACK_CTX);
+ rdma_restrack_set_name(&ucontext->res, NULL);
attrs->context = ucontext;
return 0;
}
if (ret)
goto err_uncharge;
- rdma_restrack_uadd(&ucontext->res);
+ rdma_restrack_add(&ucontext->res);
/*
* Make sure that ib_uverbs_get_ucontext() sees the pointer update
err_uobj:
rdma_alloc_abort_uobject(uobj, attrs, false);
err_ucontext:
+ rdma_restrack_put(&attrs->context->res);
kfree(attrs->context);
attrs->context = NULL;
return ret;
pd->device = ib_dev;
pd->uobject = uobj;
atomic_set(&pd->usecnt, 0);
- pd->res.type = RDMA_RESTRACK_PD;
+
+ rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+ rdma_restrack_set_name(&pd->res, NULL);
ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata);
if (ret)
goto err_alloc;
- rdma_restrack_uadd(&pd->res);
+ rdma_restrack_add(&pd->res);
uobj->object = pd;
uobj_finalize_uobj_create(uobj, attrs);
return uverbs_response(attrs, &resp, sizeof(resp));
err_alloc:
+ rdma_restrack_put(&pd->res);
kfree(pd);
err:
uobj_alloc_abort(uobj, attrs);
mr->sig_attrs = NULL;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
- mr->res.type = RDMA_RESTRACK_MR;
mr->iova = cmd.hca_va;
- rdma_restrack_uadd(&mr->res);
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
uobj->object = mr;
uobj_put_obj_read(pd);
static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_mw cmd;
- struct ib_uverbs_alloc_mw_resp resp;
+ struct ib_uverbs_alloc_mw_resp resp = {};
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mw *mw;
goto err_put;
}
- mw = pd->device->ops.alloc_mw(pd, cmd.mw_type, &attrs->driver_udata);
- if (IS_ERR(mw)) {
- ret = PTR_ERR(mw);
+ mw = rdma_zalloc_drv_obj(ib_dev, ib_mw);
+ if (!mw) {
+ ret = -ENOMEM;
goto err_put;
}
- mw->device = pd->device;
- mw->pd = pd;
+ mw->device = ib_dev;
+ mw->pd = pd;
mw->uobject = uobj;
+ mw->type = cmd.mw_type;
+
+ ret = pd->device->ops.alloc_mw(mw, &attrs->driver_udata);
+ if (ret)
+ goto err_alloc;
+
atomic_inc(&pd->usecnt);
uobj->object = mw;
resp.mw_handle = uobj->id;
return uverbs_response(attrs, &resp, sizeof(resp));
+err_alloc:
+ kfree(mw);
err_put:
uobj_put_obj_read(pd);
err_free:
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
+
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, NULL);
ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
if (ret)
goto err_free;
- rdma_restrack_uadd(&cq->res);
+ rdma_restrack_add(&cq->res);
obj->uevent.uobject.object = cq;
obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
return uverbs_response(attrs, &resp, sizeof(resp));
err_free:
+ rdma_restrack_put(&cq->res);
kfree(cq);
err_file:
if (ev_file)
bool has_sq = true;
struct ib_device *ib_dev;
- if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
- return -EPERM;
+ switch (cmd->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+ break;
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ case IB_QPT_DRIVER:
+ break;
+ default:
+ return -EINVAL;
+ }
obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
&ib_dev);
{
struct ib_uverbs_ex_create_rwq_ind_table cmd;
struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {};
- struct ib_uobject *uobj;
+ struct ib_uobject *uobj;
int err;
struct ib_rwq_ind_table_init_attr init_attr = {};
struct ib_rwq_ind_table *rwq_ind_tbl;
- struct ib_wq **wqs = NULL;
+ struct ib_wq **wqs = NULL;
u32 *wqs_handles = NULL;
struct ib_wq *wq = NULL;
int i, num_read_wqs;
goto put_wqs;
}
- init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
- init_attr.ind_tbl = wqs;
-
- rwq_ind_tbl = ib_dev->ops.create_rwq_ind_table(ib_dev, &init_attr,
- &attrs->driver_udata);
-
- if (IS_ERR(rwq_ind_tbl)) {
- err = PTR_ERR(rwq_ind_tbl);
+ rwq_ind_tbl = rdma_zalloc_drv_obj(ib_dev, ib_rwq_ind_table);
+ if (!rwq_ind_tbl) {
+ err = -ENOMEM;
goto err_uobj;
}
+ init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
+ init_attr.ind_tbl = wqs;
+
rwq_ind_tbl->ind_tbl = wqs;
rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size;
rwq_ind_tbl->uobject = uobj;
rwq_ind_tbl->device = ib_dev;
atomic_set(&rwq_ind_tbl->usecnt, 0);
+ err = ib_dev->ops.create_rwq_ind_table(rwq_ind_tbl, &init_attr,
+ &attrs->driver_udata);
+ if (err)
+ goto err_create;
+
for (i = 0; i < num_wq_handles; i++)
rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject,
UVERBS_LOOKUP_READ);
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
return uverbs_response(attrs, &resp, sizeof(resp));
+err_create:
+ kfree(rwq_ind_tbl);
err_uobj:
uobj_alloc_abort(uobj, attrs);
put_wqs:
goto err_free;
}
- flow_id = qp->device->ops.create_flow(
- qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata);
+ flow_id = qp->device->ops.create_flow(qp, flow_attr,
+ &attrs->driver_udata);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
int ret;
ret = mw->device->ops.dealloc_mw(mw);
- if (!ret)
- atomic_dec(&pd->usecnt);
+ if (ret)
+ return ret;
+
+ atomic_dec(&pd->usecnt);
+ kfree(mw);
return ret;
}
{
struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
- int ret;
+ u32 table_size = (1 << rwq_ind_tbl->log_ind_tbl_size);
+ int ret, i;
+
+ if (atomic_read(&rwq_ind_tbl->usecnt))
+ return -EBUSY;
- ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+ ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl);
if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
+ for (i = 0; i < table_size; i++)
+ atomic_dec(&ind_tbl[i]->usecnt);
+
+ kfree(rwq_ind_tbl);
kfree(ind_tbl);
return ret;
}
if (ret)
return ret;
- ib_dealloc_pd_user(pd, &attrs->driver_udata);
- return 0;
+ return ib_dealloc_pd_user(pd, &attrs->driver_udata);
}
void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
if (ret)
return ret;
- counters->device->ops.destroy_counters(counters);
+ ret = counters->device->ops.destroy_counters(counters);
+ if (ret)
+ return ret;
kfree(counters);
return 0;
}
#include <rdma/uverbs_std_types.h>
#include "rdma_core.h"
#include "uverbs.h"
+#include "restrack.h"
static int uverbs_free_cq(struct ib_uobject *uobject,
enum rdma_remove_reason why,
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
+
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, NULL);
ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
if (ret)
obj->uevent.uobject.object = cq;
obj->uevent.uobject.user_handle = user_handle;
- rdma_restrack_uadd(&cq->res);
+ rdma_restrack_add(&cq->res);
uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE);
ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
return ret;
err_free:
+ rdma_restrack_put(&cq->res);
kfree(cq);
err_event_file:
if (obj->uevent.event_file)
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
*/
+#include <linux/overflow.h>
#include <rdma/uverbs_std_types.h>
#include "rdma_core.h"
#include "uverbs.h"
#include <rdma/uverbs_ioctl.h>
#include <rdma/opa_addr.h>
+#include <rdma/ib_cache.h>
/*
* This ioctl method allows calling any defined write or write_ex
resp->subnet_timeout = attr->subnet_timeout;
resp->init_type_reply = attr->init_type_reply;
resp->active_width = attr->active_width;
- resp->active_speed = attr->active_speed;
+ /* This ABI needs to be extended to provide any speed more than IB_SPEED_NDR */
+ resp->active_speed = min_t(u16, attr->active_speed, IB_SPEED_NDR);
resp->phys_state = attr->phys_state;
resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num);
}
return ucontext->device->ops.query_ucontext(ucontext, attrs);
}
+static int copy_gid_entries_to_user(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_gid_entry *entries,
+ size_t num_entries, size_t user_entry_size)
+{
+ const struct uverbs_attr *attr;
+ void __user *user_entries;
+ size_t copy_len;
+ int ret;
+ int i;
+
+ if (user_entry_size == sizeof(*entries)) {
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ entries, sizeof(*entries) * num_entries);
+ return ret;
+ }
+
+ copy_len = min_t(size_t, user_entry_size, sizeof(*entries));
+ attr = uverbs_attr_get(attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ user_entries = u64_to_user_ptr(attr->ptr_attr.data);
+ for (i = 0; i < num_entries; i++) {
+ if (copy_to_user(user_entries, entries, copy_len))
+ return -EFAULT;
+
+ if (user_entry_size > sizeof(*entries)) {
+ if (clear_user(user_entries + sizeof(*entries),
+ user_entry_size - sizeof(*entries)))
+ return -EFAULT;
+ }
+
+ entries++;
+ user_entries += user_entry_size;
+ }
+
+ return uverbs_output_written(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_gid_entry *entries;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ size_t user_entry_size;
+ ssize_t num_entries;
+ size_t max_entries;
+ size_t num_bytes;
+ u32 flags;
+ int ret;
+
+ ret = uverbs_get_flags32(&flags, attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, 0);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&user_entry_size, attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE);
+ if (ret)
+ return ret;
+
+ max_entries = uverbs_attr_ptr_get_array_size(
+ attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ user_entry_size);
+ if (max_entries <= 0)
+ return -EINVAL;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes))
+ return -EINVAL;
+
+ entries = uverbs_zalloc(attrs, num_bytes);
+ if (!entries)
+ return -ENOMEM;
+
+ num_entries = rdma_query_gid_table(ib_dev, entries, max_entries);
+ if (num_entries < 0)
+ return -EINVAL;
+
+ ret = copy_gid_entries_to_user(attrs, entries, num_entries,
+ user_entry_size);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+ &num_entries, sizeof(num_entries));
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_ENTRY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_gid_entry entry = {};
+ const struct ib_gid_attr *gid_attr;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ struct net_device *ndev;
+ u32 gid_index;
+ u32 port_num;
+ u32 flags;
+ int ret;
+
+ ret = uverbs_get_flags32(&flags, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, 0);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&port_num, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_PORT);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&gid_index, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX);
+ if (ret)
+ return ret;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ if (!rdma_is_port_valid(ib_dev, port_num))
+ return -EINVAL;
+
+ if (!rdma_ib_or_roce(ib_dev, port_num))
+ return -EOPNOTSUPP;
+
+ gid_attr = rdma_get_gid_attr(ib_dev, port_num, gid_index);
+ if (IS_ERR(gid_attr))
+ return PTR_ERR(gid_attr);
+
+ memcpy(&entry.gid, &gid_attr->gid, sizeof(gid_attr->gid));
+ entry.gid_index = gid_attr->index;
+ entry.port_num = gid_attr->port_num;
+ entry.gid_type = gid_attr->gid_type;
+
+ rcu_read_lock();
+ ndev = rdma_read_gid_attr_ndev_rcu(gid_attr);
+ if (IS_ERR(ndev)) {
+ if (PTR_ERR(ndev) != -ENODEV) {
+ ret = PTR_ERR(ndev);
+ rcu_read_unlock();
+ goto out;
+ }
+ } else {
+ entry.netdev_ifindex = ndev->ifindex;
+ }
+ rcu_read_unlock();
+
+ ret = uverbs_copy_to_struct_or_zero(
+ attrs, UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY, &entry,
+ sizeof(entry));
+out:
+ rdma_put_gid_attr(gid_attr);
+ return ret;
+}
+
DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_GET_CONTEXT,
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
reserved),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_GID_TABLE,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE, u64,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, u32,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ UVERBS_ATTR_MIN_SIZE(0), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_GID_ENTRY,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_PORT, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_gid_entry,
+ netdev_ifindex),
+ UA_MANDATORY));
+
DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
&UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT),
&UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
&UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
&UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT),
- &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT));
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_TABLE),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_ENTRY));
const struct uapi_definition uverbs_def_obj_device[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
container_of(uobject, struct ib_uwq_object, uevent.uobject);
int ret;
- ret = ib_destroy_wq(wq, &attrs->driver_udata);
+ ret = ib_destroy_wq_user(wq, &attrs->driver_udata);
if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
atomic_set(&pd->usecnt, 0);
pd->flags = flags;
- pd->res.type = RDMA_RESTRACK_PD;
- rdma_restrack_set_task(&pd->res, caller);
+ rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+ rdma_restrack_set_name(&pd->res, caller);
ret = device->ops.alloc_pd(pd, NULL);
if (ret) {
+ rdma_restrack_put(&pd->res);
kfree(pd);
return ERR_PTR(ret);
}
- rdma_restrack_kadd(&pd->res);
+ rdma_restrack_add(&pd->res);
if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
* exist. The caller is responsible to synchronously destroy them and
* guarantee no new allocations will happen.
*/
-void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
{
int ret;
requires the caller to guarantee we can't race here. */
WARN_ON(atomic_read(&pd->usecnt));
+ ret = pd->device->ops.dealloc_pd(pd, udata);
+ if (ret)
+ return ret;
+
rdma_restrack_del(&pd->res);
- pd->device->ops.dealloc_pd(pd, udata);
kfree(pd);
+ return ret;
}
EXPORT_SYMBOL(ib_dealloc_pd_user);
(struct in6_addr *)dgid);
return 0;
} else if (net_type == RDMA_NETWORK_IPV6 ||
- net_type == RDMA_NETWORK_IB) {
+ net_type == RDMA_NETWORK_IB || RDMA_NETWORK_ROCE_V1) {
*dgid = hdr->ibgrh.dgid;
*sgid = hdr->ibgrh.sgid;
return 0;
{
const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
struct ib_pd *pd;
+ int ret;
might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
pd = ah->pd;
- ah->device->ops.destroy_ah(ah, flags);
+ ret = ah->device->ops.destroy_ah(ah, flags);
+ if (ret)
+ return ret;
+
atomic_dec(&pd->usecnt);
if (sgid_attr)
rdma_put_gid_attr(sgid_attr);
kfree(ah);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(rdma_destroy_ah_user);
int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
{
+ int ret;
+
if (atomic_read(&srq->usecnt))
return -EBUSY;
- srq->device->ops.destroy_srq(srq, udata);
+ ret = srq->device->ops.destroy_srq(srq, udata);
+ if (ret)
+ return ret;
atomic_dec(&srq->pd->usecnt);
if (srq->srq_type == IB_SRQT_XRC)
atomic_dec(&srq->ext.cq->usecnt);
kfree(srq);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ib_destroy_srq_user);
}
EXPORT_SYMBOL(ib_modify_qp_with_udata);
-int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
+int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width)
{
int rc;
u32 netdev_speed;
cq->event_handler = event_handler;
cq->cq_context = cq_context;
atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_set_task(&cq->res, caller);
+
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, caller);
ret = device->ops.create_cq(cq, cq_attr, NULL);
if (ret) {
+ rdma_restrack_put(&cq->res);
kfree(cq);
return ERR_PTR(ret);
}
- rdma_restrack_kadd(&cq->res);
+ rdma_restrack_add(&cq->res);
return cq;
}
EXPORT_SYMBOL(__ib_create_cq);
int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
{
+ int ret;
+
if (WARN_ON_ONCE(cq->shared))
return -EOPNOTSUPP;
if (atomic_read(&cq->usecnt))
return -EBUSY;
+ ret = cq->device->ops.destroy_cq(cq, udata);
+ if (ret)
+ return ret;
+
rdma_restrack_del(&cq->res);
- cq->device->ops.destroy_cq(cq, udata);
kfree(cq);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ib_destroy_cq_user);
mr->pd = pd;
mr->dm = NULL;
atomic_inc(&pd->usecnt);
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_kadd(&mr->res);
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
return mr;
}
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
mr->need_inval = false;
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_kadd(&mr->res);
mr->type = mr_type;
mr->sig_attrs = NULL;
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
out:
trace_mr_alloc(pd, mr_type, max_num_sg, mr);
return mr;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
mr->need_inval = false;
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_kadd(&mr->res);
mr->type = IB_MR_TYPE_INTEGRITY;
mr->sig_attrs = sig_attrs;
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
out:
trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr);
return mr;
*/
int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
+ int ret;
+
if (atomic_read(&xrcd->usecnt))
return -EBUSY;
WARN_ON(!xa_empty(&xrcd->tgt_qps));
- xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+ ret = xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+ if (ret)
+ return ret;
kfree(xrcd);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ib_dealloc_xrcd_user);
EXPORT_SYMBOL(ib_create_wq);
/**
- * ib_destroy_wq - Destroys the specified user WQ.
+ * ib_destroy_wq_user - Destroys the specified user WQ.
* @wq: The WQ to destroy.
* @udata: Valid user data
*/
-int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata)
{
struct ib_cq *cq = wq->cq;
struct ib_pd *pd = wq->pd;
+ int ret;
if (atomic_read(&wq->usecnt))
return -EBUSY;
- wq->device->ops.destroy_wq(wq, udata);
+ ret = wq->device->ops.destroy_wq(wq, udata);
+ if (ret)
+ return ret;
+
atomic_dec(&pd->usecnt);
atomic_dec(&cq->usecnt);
-
- return 0;
+ return ret;
}
-EXPORT_SYMBOL(ib_destroy_wq);
+EXPORT_SYMBOL(ib_destroy_wq_user);
/**
* ib_modify_wq - Modifies the specified WQ.
}
EXPORT_SYMBOL(ib_modify_wq);
-/*
- * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
- * @wq_ind_table: The Indirection Table to destroy.
-*/
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
-{
- int err, i;
- u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size);
- struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl;
-
- if (atomic_read(&rwq_ind_table->usecnt))
- return -EBUSY;
-
- err = rwq_ind_table->device->ops.destroy_rwq_ind_table(rwq_ind_table);
- if (!err) {
- for (i = 0; i < table_size; i++)
- atomic_dec(&ind_tbl[i]->usecnt);
- }
-
- return err;
-}
-EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
-
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
struct delayed_work worker;
u8 cur_prio_map;
- u8 active_speed;
+ u16 active_speed;
u8 active_width;
/* FP Notification Queue (CQ & SRQ) */
}
/* Protection Domains */
-void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
+int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
if (pd->qplib_pd.id)
bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
&pd->qplib_pd);
+ return 0;
}
int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
}
/* Address Handles */
-void bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
+int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
{
struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
struct bnxt_re_dev *rdev = ah->rdev;
bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah,
!(flags & RDMA_DESTROY_AH_SLEEPABLE));
+ return 0;
}
static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype)
return PTR_ERR(umem);
qp->sumem = umem;
- qplib_qp->sq.sg_info.sghead = umem->sg_head.sgl;
- qplib_qp->sq.sg_info.npages = ib_umem_num_pages(umem);
- qplib_qp->sq.sg_info.nmap = umem->nmap;
+ qplib_qp->sq.sg_info.umem = umem;
qplib_qp->sq.sg_info.pgsize = PAGE_SIZE;
qplib_qp->sq.sg_info.pgshft = PAGE_SHIFT;
qplib_qp->qp_handle = ureq.qp_handle;
if (IS_ERR(umem))
goto rqfail;
qp->rumem = umem;
- qplib_qp->rq.sg_info.sghead = umem->sg_head.sgl;
- qplib_qp->rq.sg_info.npages = ib_umem_num_pages(umem);
- qplib_qp->rq.sg_info.nmap = umem->nmap;
+ qplib_qp->rq.sg_info.umem = umem;
qplib_qp->rq.sg_info.pgsize = PAGE_SIZE;
qplib_qp->rq.sg_info.pgshft = PAGE_SHIFT;
}
}
/* Shared Receive Queues */
-void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
+int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
{
struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
ib_srq);
atomic_dec(&rdev->srq_count);
if (nq)
nq->budget--;
+ return 0;
}
static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
return PTR_ERR(umem);
srq->umem = umem;
- qplib_srq->sg_info.sghead = umem->sg_head.sgl;
- qplib_srq->sg_info.npages = ib_umem_num_pages(umem);
- qplib_srq->sg_info.nmap = umem->nmap;
+ qplib_srq->sg_info.umem = umem;
qplib_srq->sg_info.pgsize = PAGE_SIZE;
qplib_srq->sg_info.pgshft = PAGE_SHIFT;
qplib_srq->srq_handle = ureq.srq_handle;
}
/* Completion Queues */
-void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct bnxt_re_cq *cq;
struct bnxt_qplib_nq *nq;
atomic_dec(&rdev->cq_count);
nq->budget--;
kfree(cq->cql);
+ return 0;
}
int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
rc = PTR_ERR(cq->umem);
goto fail;
}
- cq->qplib_cq.sg_info.sghead = cq->umem->sg_head.sgl;
- cq->qplib_cq.sg_info.npages = ib_umem_num_pages(cq->umem);
- cq->qplib_cq.sg_info.nmap = cq->umem->nmap;
+ cq->qplib_cq.sg_info.umem = cq->umem;
cq->qplib_cq.dpi = &uctx->dpi;
} else {
cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL);
return rc;
}
-static int bnxt_re_page_size_ok(int page_shift)
-{
- switch (page_shift) {
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G:
- return 1;
- default:
- return 0;
- }
-}
-
static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig,
int page_shift)
{
u64 page_size = BIT_ULL(page_shift);
struct ib_block_iter biter;
- rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, page_size)
+ rdma_umem_for_each_dma_block(umem, &biter, page_size)
*pbl_tbl++ = rdma_block_iter_dma_address(&biter);
return pbl_tbl - pbl_tbl_orig;
struct bnxt_re_mr *mr;
struct ib_umem *umem;
u64 *pbl_tbl = NULL;
- int umem_pgs, page_shift, rc;
+ unsigned long page_size;
+ int umem_pgs, rc;
if (length > BNXT_RE_MAX_MR_SIZE) {
ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n",
mr->ib_umem = umem;
mr->qplib_mr.va = virt_addr;
- umem_pgs = ib_umem_page_count(umem);
- if (!umem_pgs) {
- ibdev_err(&rdev->ibdev, "umem is invalid!");
- rc = -EINVAL;
- goto free_umem;
- }
- mr->qplib_mr.total_size = length;
-
- pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL);
- if (!pbl_tbl) {
- rc = -ENOMEM;
- goto free_umem;
- }
-
- page_shift = __ffs(ib_umem_find_best_pgsz(umem,
- BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M,
- virt_addr));
-
- if (!bnxt_re_page_size_ok(page_shift)) {
+ page_size = ib_umem_find_best_pgsz(
+ umem, BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, virt_addr);
+ if (!page_size) {
ibdev_err(&rdev->ibdev, "umem page size unsupported!");
rc = -EFAULT;
- goto fail;
+ goto free_umem;
}
+ mr->qplib_mr.total_size = length;
- if (page_shift == BNXT_RE_PAGE_SHIFT_4K &&
+ if (page_size == BNXT_RE_PAGE_SIZE_4K &&
length > BNXT_RE_MAX_MR_SIZE_LOW) {
ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu",
length, (u64)BNXT_RE_MAX_MR_SIZE_LOW);
rc = -EINVAL;
- goto fail;
+ goto free_umem;
+ }
+
+ umem_pgs = ib_umem_num_dma_blocks(umem, page_size);
+ pbl_tbl = kcalloc(umem_pgs, sizeof(*pbl_tbl), GFP_KERNEL);
+ if (!pbl_tbl) {
+ rc = -ENOMEM;
+ goto free_umem;
}
/* Map umem buf ptrs to the PBL */
- umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, page_shift);
+ umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, order_base_2(page_size));
rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl,
- umem_pgs, false, 1 << page_shift);
+ umem_pgs, false, page_size);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to register user MR");
goto fail;
enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
u8 port_num);
int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
-void bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags);
+int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags);
int bnxt_re_create_srq(struct ib_srq *srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-void bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
const struct ib_recv_wr **bad_recv_wr);
int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
}
}
-static void bnxt_qplib_service_nq(unsigned long data)
+static void bnxt_qplib_service_nq(struct tasklet_struct *t)
{
- struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data;
+ struct bnxt_qplib_nq *nq = from_tasklet(nq, t, nq_tasklet);
struct bnxt_qplib_hwq *hwq = &nq->hwq;
int num_srqne_processed = 0;
int num_cqne_processed = 0;
nq->msix_vec = msix_vector;
if (need_init)
- tasklet_init(&nq->nq_tasklet, bnxt_qplib_service_nq,
- (unsigned long)nq);
+ tasklet_setup(&nq->nq_tasklet, bnxt_qplib_service_nq);
else
tasklet_enable(&nq->nq_tasklet);
#include "qplib_sp.h"
#include "qplib_fp.h"
-static void bnxt_qplib_service_creq(unsigned long data);
+static void bnxt_qplib_service_creq(struct tasklet_struct *t);
/* Hardware communication channel */
static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
goto done;
do {
mdelay(1); /* 1m sec */
- bnxt_qplib_service_creq((unsigned long)rcfw);
+ bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
} while (test_bit(cbit, cmdq->cmdq_bitmap) && --count);
done:
return count ? 0 : -ETIMEDOUT;
}
/* SP - CREQ Completion handlers */
-static void bnxt_qplib_service_creq(unsigned long data)
+static void bnxt_qplib_service_creq(struct tasklet_struct *t)
{
- struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data;
+ struct bnxt_qplib_rcfw *rcfw = from_tasklet(rcfw, t, creq.creq_tasklet);
struct bnxt_qplib_creq_ctx *creq = &rcfw->creq;
u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
struct bnxt_qplib_hwq *hwq = &creq->hwq;
creq->msix_vec = msix_vector;
if (need_init)
- tasklet_init(&creq->creq_tasklet,
- bnxt_qplib_service_creq, (unsigned long)rcfw);
+ tasklet_setup(&creq->creq_tasklet, bnxt_qplib_service_creq);
else
tasklet_enable(&creq->creq_tasklet);
rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0,
#include <linux/dma-mapping.h>
#include <linux/if_vlan.h>
#include <linux/vmalloc.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_sp.h"
static void bnxt_qplib_fill_user_dma_pages(struct bnxt_qplib_pbl *pbl,
struct bnxt_qplib_sg_info *sginfo)
{
- struct scatterlist *sghead = sginfo->sghead;
- struct sg_dma_page_iter sg_iter;
+ struct ib_block_iter biter;
int i = 0;
- for_each_sg_dma_page(sghead, &sg_iter, sginfo->nmap, 0) {
- pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter);
+ rdma_umem_for_each_dma_block(sginfo->umem, &biter, sginfo->pgsize) {
+ pbl->pg_map_arr[i] = rdma_block_iter_dma_address(&biter);
pbl->pg_arr[i] = NULL;
pbl->pg_count++;
i++;
struct bnxt_qplib_sg_info *sginfo)
{
struct pci_dev *pdev = res->pdev;
- struct scatterlist *sghead;
bool is_umem = false;
u32 pages;
int i;
if (sginfo->nopte)
return 0;
- pages = sginfo->npages;
- sghead = sginfo->sghead;
+ if (sginfo->umem)
+ pages = ib_umem_num_dma_blocks(sginfo->umem, sginfo->pgsize);
+ else
+ pages = sginfo->npages;
/* page ptr arrays */
pbl->pg_arr = vmalloc(pages * sizeof(void *));
if (!pbl->pg_arr)
pbl->pg_count = 0;
pbl->pg_size = sginfo->pgsize;
- if (!sghead) {
+ if (!sginfo->umem) {
for (i = 0; i < pages; i++) {
pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
pbl->pg_size,
struct bnxt_qplib_sg_info sginfo = {};
u32 depth, stride, npbl, npde;
dma_addr_t *src_phys_ptr, **dst_virt_ptr;
- struct scatterlist *sghead = NULL;
struct bnxt_qplib_res *res;
struct pci_dev *pdev;
int i, rc, lvl;
res = hwq_attr->res;
pdev = res->pdev;
- sghead = hwq_attr->sginfo->sghead;
pg_size = hwq_attr->sginfo->pgsize;
hwq->level = PBL_LVL_MAX;
aux_pages++;
}
- if (!sghead) {
+ if (!hwq_attr->sginfo->umem) {
hwq->is_user = false;
npages = (depth * stride) / pg_size + aux_pages;
if ((depth * stride) % pg_size)
return -EINVAL;
hwq_attr->sginfo->npages = npages;
} else {
+ unsigned long sginfo_num_pages = ib_umem_num_dma_blocks(
+ hwq_attr->sginfo->umem, hwq_attr->sginfo->pgsize);
+
hwq->is_user = true;
- npages = hwq_attr->sginfo->npages;
+ npages = sginfo_num_pages;
npages = (npages * PAGE_SIZE) /
BIT_ULL(hwq_attr->sginfo->pgshft);
- if ((hwq_attr->sginfo->npages * PAGE_SIZE) %
+ if ((sginfo_num_pages * PAGE_SIZE) %
BIT_ULL(hwq_attr->sginfo->pgshft))
if (!npages)
npages++;
};
struct bnxt_qplib_sg_info {
- struct scatterlist *sghead;
- u32 nmap;
+ struct ib_umem *umem;
u32 npages;
u32 pgshft;
u32 pgsize;
module_param(enable_ecn, int, 0644);
MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
-static int dack_mode = 1;
+static int dack_mode;
module_param(dack_mode, int, 0644);
-MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
+MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)");
uint c4iw_max_read_depth = 32;
module_param(c4iw_max_read_depth, int, 0644);
return !err || err == -ENODATA ? npolled : err;
}
-void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct c4iw_cq *chp;
struct c4iw_ucontext *ucontext;
ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx,
chp->destroy_skb, chp->wr_waitp);
c4iw_put_wr_wait(chp->wr_waitp);
+ return 0;
}
int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
unsigned int *sg_offset);
int c4iw_dealloc_mw(struct ib_mw *mw);
void c4iw_dealloc(struct uld_ctx *ctx);
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+int c4iw_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
u64 length, u64 virt, int acc,
struct ib_udata *udata);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
-void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
-void c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata);
+int c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata);
int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs,
struct ib_udata *udata);
int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata);
__be64 *pages;
int shift, n, i;
int err = -ENOMEM;
- struct sg_dma_page_iter sg_iter;
+ struct ib_block_iter biter;
struct c4iw_dev *rhp;
struct c4iw_pd *php;
struct c4iw_mr *mhp;
shift = PAGE_SHIFT;
- n = ib_umem_num_pages(mhp->umem);
+ n = ib_umem_num_dma_blocks(mhp->umem, 1 << shift);
err = alloc_pbl(mhp, n);
if (err)
goto err_umem_release;
i = n = 0;
- for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
- pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
+ rdma_umem_for_each_dma_block(mhp->umem, &biter, 1 << shift) {
+ pages[i++] = cpu_to_be64(rdma_block_iter_dma_address(&biter));
if (i == PAGE_SIZE / sizeof(*pages)) {
err = write_pbl(&mhp->rhp->rdev, pages,
mhp->attr.pbl_addr + (n << 3), i,
return ERR_PTR(err);
}
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
+int c4iw_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
{
+ struct c4iw_mw *mhp = to_c4iw_mw(ibmw);
struct c4iw_dev *rhp;
struct c4iw_pd *php;
- struct c4iw_mw *mhp;
u32 mmid;
u32 stag = 0;
int ret;
- if (type != IB_MW_TYPE_1)
- return ERR_PTR(-EINVAL);
+ if (ibmw->type != IB_MW_TYPE_1)
+ return -EINVAL;
- php = to_c4iw_pd(pd);
+ php = to_c4iw_pd(ibmw->pd);
rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
- if (!mhp->wr_waitp) {
- ret = -ENOMEM;
- goto free_mhp;
- }
+ if (!mhp->wr_waitp)
+ return -ENOMEM;
mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
if (!mhp->dereg_skb) {
ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp);
if (ret)
goto free_skb;
+
mhp->rhp = rhp;
mhp->attr.pdid = php->pdid;
mhp->attr.type = FW_RI_STAG_MW;
mhp->attr.stag = stag;
mmid = (stag) >> 8;
- mhp->ibmw.rkey = stag;
+ ibmw->rkey = stag;
if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) {
ret = -ENOMEM;
goto dealloc_win;
}
pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag);
- return &(mhp->ibmw);
+ return 0;
dealloc_win:
deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb,
kfree_skb(mhp->dereg_skb);
free_wr_wait:
c4iw_put_wr_wait(mhp->wr_waitp);
-free_mhp:
- kfree(mhp);
- return ERR_PTR(ret);
+ return ret;
}
int c4iw_dealloc_mw(struct ib_mw *mw)
mhp->wr_waitp);
kfree_skb(mhp->dereg_skb);
c4iw_put_wr_wait(mhp->wr_waitp);
- pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp);
- kfree(mhp);
return 0;
}
return ret;
}
-static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
mutex_lock(&rhp->rdev.stats.lock);
rhp->rdev.stats.pd.cur--;
mutex_unlock(&rhp->rdev.stats.lock);
+ return 0;
}
static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
.query_qp = c4iw_ib_query_qp,
.reg_user_mr = c4iw_reg_user_mr,
.req_notify_cq = c4iw_arm_cq,
- INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
+
INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_mw, c4iw_mw, ibmw),
+ INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext),
};
return ret;
}
-void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_srq *srq;
srq->wr_waitp);
c4iw_free_srq_idx(&rhp->rdev, srq->idx);
c4iw_put_wr_wait(srq->wr_waitp);
+ return 0;
}
char name[EFA_IRQNAME_SIZE];
};
-struct efa_sw_stats {
+/* Don't use anything other than atomic64 */
+struct efa_stats {
atomic64_t alloc_pd_err;
atomic64_t create_qp_err;
atomic64_t create_cq_err;
atomic64_t alloc_ucontext_err;
atomic64_t create_ah_err;
atomic64_t mmap_err;
-};
-
-/* Don't use anything other than atomic64 */
-struct efa_stats {
- struct efa_sw_stats sw_stats;
atomic64_t keep_alive_rcvd;
};
int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey);
int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
-void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
-void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
int efa_create_ah(struct ib_ah *ibah,
struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
-void efa_destroy_ah(struct ib_ah *ibah, u32 flags);
+int efa_destroy_ah(struct ib_ah *ibah, u32 flags);
int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_udata *udata);
enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
enum efa_admin_get_stats_type {
EFA_ADMIN_GET_STATS_TYPE_BASIC = 0,
+ EFA_ADMIN_GET_STATS_TYPE_MESSAGES = 1,
+ EFA_ADMIN_GET_STATS_TYPE_RDMA_READ = 2,
};
enum efa_admin_get_stats_scope {
EFA_ADMIN_GET_STATS_SCOPE_QUEUE = 1,
};
-enum efa_admin_modify_qp_mask_bits {
- EFA_ADMIN_QP_STATE_BIT = 0,
- EFA_ADMIN_CUR_QP_STATE_BIT = 1,
- EFA_ADMIN_QKEY_BIT = 2,
- EFA_ADMIN_SQ_PSN_BIT = 3,
- EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT = 4,
-};
-
/*
* QP allocation sizes, converted by fabric QueuePair (QP) create command
* from QP capabilities.
struct efa_admin_aq_common_desc aq_common_desc;
/*
- * Mask indicating which fields should be updated see enum
- * efa_admin_modify_qp_mask_bits
+ * Mask indicating which fields should be updated
+ * 0 : qp_state
+ * 1 : cur_qp_state
+ * 2 : qkey
+ * 3 : sq_psn
+ * 4 : sq_drained_async_notify
+ * 5 : rnr_retry
+ * 31:6 : reserved
*/
u32 modify_mask;
/* Enable async notification when SQ is drained */
u8 sq_drained_async_notify;
- /* MBZ */
- u8 reserved1;
+ /* Number of RNR retries (valid only for SRD QPs) */
+ u8 rnr_retry;
/* MBZ */
u16 reserved2;
/* Indicates that draining is in progress */
u8 sq_draining;
- /* MBZ */
- u8 reserved1;
+ /* Number of RNR retries (valid only for SRD QPs) */
+ u8 rnr_retry;
/* MBZ */
u16 reserved2;
u64 rx_drops;
};
+struct efa_admin_messages_stats {
+ u64 send_bytes;
+
+ u64 send_wrs;
+
+ u64 recv_bytes;
+
+ u64 recv_wrs;
+};
+
+struct efa_admin_rdma_read_stats {
+ u64 read_wrs;
+
+ u64 read_bytes;
+
+ u64 read_wr_err;
+
+ u64 read_resp_bytes;
+};
+
struct efa_admin_acq_get_stats_resp {
struct efa_admin_acq_common_desc acq_common_desc;
- struct efa_admin_basic_stats basic_stats;
+ union {
+ struct efa_admin_basic_stats basic_stats;
+
+ struct efa_admin_messages_stats messages_stats;
+
+ struct efa_admin_rdma_read_stats rdma_read_stats;
+ } u;
};
struct efa_admin_get_set_feature_common_desc {
/*
* 0 : rdma_read - If set, RDMA Read is supported on
* TX queues
- * 31:1 : reserved - MBZ
+ * 1 : rnr_retry - If set, RNR retry is supported on
+ * modify QP command
+ * 31:2 : reserved - MBZ
*/
u32 device_caps;
#define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0)
#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1)
+/* modify_qp_cmd */
+#define EFA_ADMIN_MODIFY_QP_CMD_QP_STATE_MASK BIT(0)
+#define EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE_MASK BIT(1)
+#define EFA_ADMIN_MODIFY_QP_CMD_QKEY_MASK BIT(2)
+#define EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN_MASK BIT(3)
+#define EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY_MASK BIT(4)
+#define EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY_MASK BIT(5)
+
/* reg_mr_cmd */
#define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0)
#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7)
/* feature_device_attr_desc */
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1)
/* host_info */
#define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0)
cmd.qkey = params->qkey;
cmd.sq_psn = params->sq_psn;
cmd.sq_drained_async_notify = params->sq_drained_async_notify;
+ cmd.rnr_retry = params->rnr_retry;
err = efa_com_cmd_exec(aq,
(struct efa_admin_aq_entry *)&cmd,
result->qkey = resp.qkey;
result->sq_draining = resp.sq_draining;
result->sq_psn = resp.sq_psn;
+ result->rnr_retry = resp.rnr_retry;
return 0;
}
return err;
}
- result->basic_stats.tx_bytes = resp.basic_stats.tx_bytes;
- result->basic_stats.tx_pkts = resp.basic_stats.tx_pkts;
- result->basic_stats.rx_bytes = resp.basic_stats.rx_bytes;
- result->basic_stats.rx_pkts = resp.basic_stats.rx_pkts;
- result->basic_stats.rx_drops = resp.basic_stats.rx_drops;
+ switch (cmd.type) {
+ case EFA_ADMIN_GET_STATS_TYPE_BASIC:
+ result->basic_stats.tx_bytes = resp.u.basic_stats.tx_bytes;
+ result->basic_stats.tx_pkts = resp.u.basic_stats.tx_pkts;
+ result->basic_stats.rx_bytes = resp.u.basic_stats.rx_bytes;
+ result->basic_stats.rx_pkts = resp.u.basic_stats.rx_pkts;
+ result->basic_stats.rx_drops = resp.u.basic_stats.rx_drops;
+ break;
+ case EFA_ADMIN_GET_STATS_TYPE_MESSAGES:
+ result->messages_stats.send_bytes = resp.u.messages_stats.send_bytes;
+ result->messages_stats.send_wrs = resp.u.messages_stats.send_wrs;
+ result->messages_stats.recv_bytes = resp.u.messages_stats.recv_bytes;
+ result->messages_stats.recv_wrs = resp.u.messages_stats.recv_wrs;
+ break;
+ case EFA_ADMIN_GET_STATS_TYPE_RDMA_READ:
+ result->rdma_read_stats.read_wrs = resp.u.rdma_read_stats.read_wrs;
+ result->rdma_read_stats.read_bytes = resp.u.rdma_read_stats.read_bytes;
+ result->rdma_read_stats.read_wr_err = resp.u.rdma_read_stats.read_wr_err;
+ result->rdma_read_stats.read_resp_bytes = resp.u.rdma_read_stats.read_resp_bytes;
+ break;
+ }
return 0;
}
u32 qkey;
u32 sq_psn;
u8 sq_drained_async_notify;
+ u8 rnr_retry;
};
struct efa_com_query_qp_params {
u32 qkey;
u32 sq_draining;
u32 sq_psn;
+ u8 rnr_retry;
};
struct efa_com_destroy_qp_params {
u64 rx_drops;
};
+struct efa_com_messages_stats {
+ u64 send_bytes;
+ u64 send_wrs;
+ u64 recv_bytes;
+ u64 recv_wrs;
+};
+
+struct efa_com_rdma_read_stats {
+ u64 read_wrs;
+ u64 read_bytes;
+ u64 read_wr_err;
+ u64 read_resp_bytes;
+};
+
union efa_com_get_stats_result {
struct efa_com_basic_stats basic_stats;
+ struct efa_com_messages_stats messages_stats;
+ struct efa_com_rdma_read_stats rdma_read_stats;
};
void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low);
*/
#include <linux/vmalloc.h>
+#include <linux/log2.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_umem.h>
op(EFA_RX_BYTES, "rx_bytes") \
op(EFA_RX_PKTS, "rx_pkts") \
op(EFA_RX_DROPS, "rx_drops") \
+ op(EFA_SEND_BYTES, "send_bytes") \
+ op(EFA_SEND_WRS, "send_wrs") \
+ op(EFA_RECV_BYTES, "recv_bytes") \
+ op(EFA_RECV_WRS, "recv_wrs") \
+ op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \
+ op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \
+ op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \
+ op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \
op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
op(EFA_COMPLETED_CMDS, "completed_cmds") \
op(EFA_CMDS_ERR, "cmds_err") \
return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry);
}
-static inline bool is_rdma_read_cap(struct efa_dev *dev)
-{
- return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK;
-}
+#define EFA_DEV_CAP(dev, cap) \
+ ((dev)->dev_attr.device_caps & \
+ EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK)
#define is_reserved_cleared(reserved) \
!memchr_inv(reserved, 0, sizeof(reserved))
resp.max_rq_wr = dev_attr->max_rq_depth;
resp.max_rdma_size = dev_attr->max_rdma_size;
- if (is_rdma_read_cap(dev))
+ if (EFA_DEV_CAP(dev, RDMA_READ))
resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
+ if (EFA_DEV_CAP(dev, RNR_RETRY))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY;
+
err = ib_copy_to_udata(udata, &resp,
min(sizeof(resp), udata->outlen));
if (err) {
#define EFA_QUERY_QP_SUPP_MASK \
(IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
- IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP)
+ IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY)
if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
ibdev_dbg(&dev->ibdev,
qp_attr->sq_psn = result.sq_psn;
qp_attr->sq_draining = result.sq_draining;
qp_attr->port_num = 1;
+ qp_attr->rnr_retry = result.rnr_retry;
qp_attr->cap.max_send_wr = qp->max_send_wr;
qp_attr->cap.max_recv_wr = qp->max_recv_wr;
err_dealloc_pd:
efa_pd_dealloc(dev, result.pdn);
err_out:
- atomic64_inc(&dev->stats.sw_stats.alloc_pd_err);
+ atomic64_inc(&dev->stats.alloc_pd_err);
return err;
}
-void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibpd->device);
struct efa_pd *pd = to_epd(ibpd);
ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
efa_pd_dealloc(dev, pd->pdn);
+ return 0;
}
static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
err_free_qp:
kfree(qp);
err_out:
- atomic64_inc(&dev->stats.sw_stats.create_qp_err);
+ atomic64_inc(&dev->stats.create_qp_err);
return ERR_PTR(err);
}
+static const struct {
+ int valid;
+ enum ib_qp_attr_mask req_param;
+ enum ib_qp_attr_mask opt_param;
+} srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_INIT] = {
+ .valid = 1,
+ .req_param = IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY,
+ },
+ },
+ [IB_QPS_INIT] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_INIT] = {
+ .valid = 1,
+ .opt_param = IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY,
+ },
+ [IB_QPS_RTR] = {
+ .valid = 1,
+ .opt_param = IB_QP_PKEY_INDEX |
+ IB_QP_QKEY,
+ },
+ },
+ [IB_QPS_RTR] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .req_param = IB_QP_SQ_PSN,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY |
+ IB_QP_RNR_RETRY,
+
+ }
+ },
+ [IB_QPS_RTS] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY,
+ },
+ [IB_QPS_SQD] = {
+ .valid = 1,
+ .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ },
+ },
+ [IB_QPS_SQD] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY,
+ },
+ [IB_QPS_SQD] = {
+ .valid = 1,
+ .opt_param = IB_QP_PKEY_INDEX |
+ IB_QP_QKEY,
+ }
+ },
+ [IB_QPS_SQE] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY,
+ }
+ },
+ [IB_QPS_ERR] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ }
+};
+
+static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state,
+ enum ib_qp_state next_state,
+ enum ib_qp_attr_mask mask)
+{
+ enum ib_qp_attr_mask req_param, opt_param;
+
+ if (mask & IB_QP_CUR_STATE &&
+ cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
+ cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
+ return false;
+
+ if (!srd_qp_state_table[cur_state][next_state].valid)
+ return false;
+
+ req_param = srd_qp_state_table[cur_state][next_state].req_param;
+ opt_param = srd_qp_state_table[cur_state][next_state].opt_param;
+
+ if ((mask & req_param) != req_param)
+ return false;
+
+ if (mask & ~(req_param | opt_param | IB_QP_STATE))
+ return false;
+
+ return true;
+}
+
static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
struct ib_qp_attr *qp_attr, int qp_attr_mask,
enum ib_qp_state cur_state,
enum ib_qp_state new_state)
{
+ int err;
+
#define EFA_MODIFY_QP_SUPP_MASK \
(IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
- IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN)
+ IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \
+ IB_QP_RNR_RETRY)
if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
ibdev_dbg(&dev->ibdev,
return -EOPNOTSUPP;
}
- if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
- qp_attr_mask)) {
+ if (qp->ibqp.qp_type == IB_QPT_DRIVER)
+ err = !efa_modify_srd_qp_is_ok(cur_state, new_state,
+ qp_attr_mask);
+ else
+ err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
+ qp_attr_mask);
+
+ if (err) {
ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
return -EINVAL;
}
params.qp_handle = qp->qp_handle;
if (qp_attr_mask & IB_QP_STATE) {
- params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) |
- BIT(EFA_ADMIN_CUR_QP_STATE_BIT);
+ EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE,
+ 1);
+ EFA_SET(¶ms.modify_mask,
+ EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1);
params.cur_qp_state = qp_attr->cur_qp_state;
params.qp_state = qp_attr->qp_state;
}
if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
- params.modify_mask |=
- BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT);
+ EFA_SET(¶ms.modify_mask,
+ EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1);
params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
}
if (qp_attr_mask & IB_QP_QKEY) {
- params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT);
+ EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1);
params.qkey = qp_attr->qkey;
}
if (qp_attr_mask & IB_QP_SQ_PSN) {
- params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT);
+ EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1);
params.sq_psn = qp_attr->sq_psn;
}
+ if (qp_attr_mask & IB_QP_RNR_RETRY) {
+ EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY,
+ 1);
+ params.rnr_retry = qp_attr->rnr_retry;
+ }
+
err = efa_com_modify_qp(&dev->edev, ¶ms);
if (err)
return err;
return efa_com_destroy_cq(&dev->edev, ¶ms);
}
-void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibcq->device);
struct efa_cq *cq = to_ecq(ibcq);
efa_destroy_cq_idx(dev, cq->cq_idx);
efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
DMA_FROM_DEVICE);
+ return 0;
}
static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
DMA_FROM_DEVICE);
err_out:
- atomic64_inc(&dev->stats.sw_stats.create_cq_err);
+ atomic64_inc(&dev->stats.create_cq_err);
return err;
}
ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
hp_cnt, pages_in_hp);
- rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
- BIT(hp_shift))
+ rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift))
page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
return 0;
struct page *pg;
int i;
- sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL);
+ sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL);
if (!sglist)
return NULL;
sg_init_table(sglist, page_cnt);
supp_access_flags =
IB_ACCESS_LOCAL_WRITE |
- (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0);
+ (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0);
access_flags &= ~IB_ACCESS_OPTIONAL;
if (access_flags & ~supp_access_flags) {
goto err_unmap;
}
- params.page_shift = __ffs(pg_sz);
- params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)),
- pg_sz);
+ params.page_shift = order_base_2(pg_sz);
+ params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz);
ibdev_dbg(&dev->ibdev,
"start %#llx length %#llx params.page_shift %u params.page_num %u\n",
err_free:
kfree(mr);
err_out:
- atomic64_inc(&dev->stats.sw_stats.reg_mr_err);
+ atomic64_inc(&dev->stats.reg_mr_err);
return ERR_PTR(err);
}
resp.max_tx_batch = dev->dev_attr.max_tx_batch;
resp.min_sq_wr = dev->dev_attr.min_sq_depth;
- if (udata && udata->outlen) {
- err = ib_copy_to_udata(udata, &resp,
- min(sizeof(resp), udata->outlen));
- if (err)
- goto err_dealloc_uar;
- }
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err)
+ goto err_dealloc_uar;
return 0;
err_dealloc_uar:
efa_dealloc_uar(dev, result.uarn);
err_out:
- atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err);
+ atomic64_inc(&dev->stats.alloc_ucontext_err);
return err;
}
ibdev_dbg(&dev->ibdev,
"pgoff[%#lx] does not have valid entry\n",
vma->vm_pgoff);
- atomic64_inc(&dev->stats.sw_stats.mmap_err);
+ atomic64_inc(&dev->stats.mmap_err);
return -EINVAL;
}
entry = to_emmap(rdma_entry);
"Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
entry->address, rdma_entry->npages * PAGE_SIZE,
entry->mmap_flag, err);
- atomic64_inc(&dev->stats.sw_stats.mmap_err);
+ atomic64_inc(&dev->stats.mmap_err);
}
rdma_user_mmap_entry_put(rdma_entry);
err_destroy_ah:
efa_ah_destroy(dev, ah);
err_out:
- atomic64_inc(&dev->stats.sw_stats.create_ah_err);
+ atomic64_inc(&dev->stats.create_ah_err);
return err;
}
-void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
+int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct efa_dev *dev = to_edev(ibah->pd->device);
struct efa_ah *ah = to_eah(ibah);
if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
ibdev_dbg(&dev->ibdev,
"Destroy address handle is not supported in atomic context\n");
- return;
+ return -EOPNOTSUPP;
}
efa_ah_destroy(dev, ah);
+ return 0;
}
struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num)
struct efa_com_get_stats_params params = {};
union efa_com_get_stats_result result;
struct efa_dev *dev = to_edev(ibdev);
+ struct efa_com_rdma_read_stats *rrs;
+ struct efa_com_messages_stats *ms;
struct efa_com_basic_stats *bs;
struct efa_com_stats_admin *as;
struct efa_stats *s;
int err;
- params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
+ params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
err = efa_com_get_stats(&dev->edev, ¶ms, &result);
if (err)
stats->value[EFA_RX_PKTS] = bs->rx_pkts;
stats->value[EFA_RX_DROPS] = bs->rx_drops;
+ params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES;
+ err = efa_com_get_stats(&dev->edev, ¶ms, &result);
+ if (err)
+ return err;
+
+ ms = &result.messages_stats;
+ stats->value[EFA_SEND_BYTES] = ms->send_bytes;
+ stats->value[EFA_SEND_WRS] = ms->send_wrs;
+ stats->value[EFA_RECV_BYTES] = ms->recv_bytes;
+ stats->value[EFA_RECV_WRS] = ms->recv_wrs;
+
+ params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ;
+ err = efa_com_get_stats(&dev->edev, ¶ms, &result);
+ if (err)
+ return err;
+
+ rrs = &result.rdma_read_stats;
+ stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs;
+ stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes;
+ stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err;
+ stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes;
+
as = &dev->edev.aq.stats;
stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
s = &dev->stats;
stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
- stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err);
- stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err);
- stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->sw_stats.create_cq_err);
- stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err);
- stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err);
- stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err);
- stats->value[EFA_MMAP_ERR] = atomic64_read(&s->sw_stats.mmap_err);
+ stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err);
+ stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err);
+ stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err);
+ stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err);
+ stats->value[EFA_ALLOC_UCONTEXT_ERR] =
+ atomic64_read(&s->alloc_ucontext_err);
+ stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err);
+ stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err);
return ARRAY_SIZE(efa_stats_names);
}
static void sdma_complete(struct kref *);
static void sdma_finalput(struct sdma_state *);
static void sdma_get(struct sdma_state *);
-static void sdma_hw_clean_up_task(unsigned long);
+static void sdma_hw_clean_up_task(struct tasklet_struct *);
static void sdma_put(struct sdma_state *);
static void sdma_set_state(struct sdma_engine *, enum sdma_states);
static void sdma_start_hw_clean_up(struct sdma_engine *);
-static void sdma_sw_clean_up_task(unsigned long);
+static void sdma_sw_clean_up_task(struct tasklet_struct *);
static void sdma_sendctrl(struct sdma_engine *, unsigned);
static void init_sdma_regs(struct sdma_engine *, u32, uint);
static void sdma_process_event(
schedule_work(&sde->err_halt_worker);
}
-static void sdma_hw_clean_up_task(unsigned long opaque)
+static void sdma_hw_clean_up_task(struct tasklet_struct *t)
{
- struct sdma_engine *sde = (struct sdma_engine *)opaque;
+ struct sdma_engine *sde = from_tasklet(sde, t,
+ sdma_hw_clean_up_task);
u64 statuscsr;
while (1) {
sdma_desc_avail(sde, sdma_descq_freecnt(sde));
}
-static void sdma_sw_clean_up_task(unsigned long opaque)
+static void sdma_sw_clean_up_task(struct tasklet_struct *t)
{
- struct sdma_engine *sde = (struct sdma_engine *)opaque;
+ struct sdma_engine *sde = from_tasklet(sde, t, sdma_sw_clean_up_task);
unsigned long flags;
spin_lock_irqsave(&sde->tail_lock, flags);
sde->tail_csr =
get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
- tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
- (unsigned long)sde);
-
- tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
- (unsigned long)sde);
+ tasklet_setup(&sde->sdma_hw_clean_up_task,
+ sdma_hw_clean_up_task);
+ tasklet_setup(&sde->sdma_sw_clean_up_task,
+ sdma_sw_clean_up_task);
INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
INIT_WORK(&sde->flush_worker, sdma_field_flush);
props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
/* see rate_show() in ib core/sysfs.c */
- props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
+ props->active_speed = opa_speed_to_ib(ppd->link_speed_active);
props->max_vl_num = ppd->vls_supported;
/* Once we are a "first class" citizen and have added the OPA MTUs to
#define HNS_ROCE_VLAN_SL_BIT_MASK 7
#define HNS_ROCE_VLAN_SL_SHIFT 13
+static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr)
+{
+ u32 fl = ah_attr->grh.flow_label;
+ u16 sport;
+
+ if (!fl)
+ sport = get_random_u32() %
+ (IB_ROCE_UDP_ENCAP_VALID_PORT_MAX + 1 -
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MIN) +
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MIN;
+ else
+ sport = rdma_flow_label_to_udp_sport(fl);
+
+ return sport;
+}
+
int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata)
{
memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
ah->av.sl = rdma_ah_get_sl(ah_attr);
+ ah->av.flowlabel = grh->flow_label;
+ ah->av.udp_sport = get_ah_udp_sport(ah_attr);
return 0;
}
return 0;
}
-
-void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags)
-{
- return;
-}
}
/* convert system page cnt to hw page cnt */
- rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
- 1 << page_shift) {
+ rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) {
addr = rdma_block_iter_dma_address(&biter);
if (idx >= start) {
bufs[total++] = addr;
int err;
buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT;
- buf_attr.region[0].size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz;
+ buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
buf_attr.region_count = 1;
buf_attr.fixed_page = true;
}
}
+static void set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
+ struct hns_roce_ib_create_cq *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+
+ if (udata) {
+ if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size))
+ hr_cq->cqe_size = ucmd->cqe_size;
+ else
+ hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE;
+ } else {
+ hr_cq->cqe_size = hr_dev->caps.cqe_sz;
+ }
+}
+
int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata)
{
INIT_LIST_HEAD(&hr_cq->rq_list);
if (udata) {
- ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+ ret = ib_copy_from_udata(&ucmd, udata,
+ min(sizeof(ucmd), udata->inlen));
if (ret) {
ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n",
ret);
}
}
+ set_cqe_size(hr_cq, udata, &ucmd);
+
ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
if (ret) {
ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret);
/*
* For the QP created by kernel space, tptr value should be initialized
* to zero; For the QP created by user space, it will cause synchronous
- * problems if tptr is set to zero here, so we initialze it in user
+ * problems if tptr is set to zero here, so we initialize it in user
* space.
*/
if (!udata && hr_cq->tptr_addr)
return ret;
}
-void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
free_cq_buf(hr_dev, hr_cq);
free_cq_db(hr_dev, hr_cq, udata);
free_cqc(hr_dev, hr_cq);
+ return 0;
}
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
#define DRV_NAME "hns_roce"
-/* hip08 is a pci device */
#define PCI_REVISION_ID_HIP08 0x21
+#define PCI_REVISION_ID_HIP09 0x30
#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
/* Hardware specification only for v1 engine */
#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7
#define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000
-#define HNS_ROCE_MAX_SGE_NUM 2
#define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20
#define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \
#define HNS_ROCE_CEQ 0
#define HNS_ROCE_AEQ 1
-#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4
-#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10
+#define HNS_ROCE_CEQE_SIZE 0x4
+#define HNS_ROCE_AEQE_SIZE 0x10
-#define HNS_ROCE_SL_SHIFT 28
-#define HNS_ROCE_TCLASS_SHIFT 20
-#define HNS_ROCE_FLOW_LABEL_MASK 0xfffff
+#define HNS_ROCE_V3_EQE_SIZE 0x40
+
+#define HNS_ROCE_V2_CQE_SIZE 32
+#define HNS_ROCE_V3_CQE_SIZE 64
+
+#define HNS_ROCE_V2_QPC_SZ 256
+#define HNS_ROCE_V3_QPC_SZ 512
#define HNS_ROCE_MAX_PORTS 6
-#define HNS_ROCE_MAX_GID_NUM 16
#define HNS_ROCE_GID_SIZE 16
#define HNS_ROCE_SGE_SIZE 16
#define PAGES_SHIFT_24 24
#define PAGES_SHIFT_32 32
-#define HNS_ROCE_PCI_BAR_NUM 2
-
#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
#define SRQ_DB_REG 0x230
void __iomem *cq_db_l;
u16 *tptr_addr;
int arm_sn;
+ int cqe_size;
unsigned long cqn;
u32 vector;
atomic_t refcount;
};
struct hns_roce_av {
- u8 port;
- u8 gid_index;
- u8 stat_rate;
- u8 hop_limit;
- u32 flowlabel;
- u8 sl;
- u8 tclass;
- u8 dgid[HNS_ROCE_GID_SIZE];
- u8 mac[ETH_ALEN];
- u16 vlan_id;
- bool vlan_en;
+ u8 port;
+ u8 gid_index;
+ u8 stat_rate;
+ u8 hop_limit;
+ u32 flowlabel;
+ u16 udp_sport;
+ u8 sl;
+ u8 tclass;
+ u8 dgid[HNS_ROCE_GID_SIZE];
+ u8 mac[ETH_ALEN];
+ u16 vlan_id;
+ bool vlan_en;
};
struct hns_roce_ah {
struct hns_roce_sge sge;
u32 next_sge;
+ enum ib_mtu path_mtu;
+ u32 max_inline_data;
/* 0: flush needed, 1: unneeded */
unsigned long flush_flag;
};
struct hns_roce_ceqe {
- __le32 comp;
+ __le32 comp;
+ __le32 rsv[15];
};
struct hns_roce_aeqe {
u8 rsv0;
} __packed cmd;
} event;
+ __le32 rsv[12];
};
struct hns_roce_eq {
int num_pds;
int reserved_pds;
u32 mtt_entry_sz;
- u32 cq_entry_sz;
+ u32 cqe_sz;
u32 page_size_cap;
u32 reserved_lkey;
int mtpt_entry_sz;
- int qpc_entry_sz;
+ int qpc_sz;
int irrl_entry_sz;
int trrl_entry_sz;
int cqc_entry_sz;
- int sccc_entry_sz;
+ int sccc_sz;
int qpc_timer_entry_sz;
int cqc_timer_entry_sz;
int srqc_entry_sz;
u32 pbl_hop_num;
int aeqe_depth;
int ceqe_depth;
+ u32 aeqe_size;
+ u32 ceqe_size;
enum ib_mtu max_mtu;
u32 qpc_bt_num;
u32 qpc_timer_bt_num;
int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
struct ib_udata *udata);
- void (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata);
+ int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int (*init_eq)(struct hns_roce_dev *hr_dev);
void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags);
+static inline int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
unsigned long mpt_index);
unsigned long key_to_hw_index(u32 key);
-struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
- struct ib_udata *udata);
+int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int hns_roce_dealloc_mw(struct ib_mw *ibmw);
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf);
int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
-void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
struct ib_qp_init_attr *init_attr,
int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
int hns_roce_db_map_user(struct hns_roce_ucontext *context,
struct ib_udata *udata, unsigned long virt,
struct hns_roce_db *db);
void __iomem *bt_cmd;
__le32 bt_cmd_val[2];
__le32 bt_cmd_h = 0;
- __le32 bt_cmd_l = 0;
- u64 bt_ba = 0;
+ __le32 bt_cmd_l;
+ u64 bt_ba;
int ret = 0;
/* Find the HEM(Hardware Entry Memory) entry */
if (hr_dev->caps.cqc_timer_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->cqc_timer_table);
- if (hr_dev->caps.sccc_entry_sz)
+ if (hr_dev->caps.sccc_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table);
if (hr_dev->caps.trrl_entry_sz)
{
const struct hns_roce_buf_region *r;
int ofs, end;
- int ret = 0;
+ int ret;
int unit;
int i;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
struct device *dev = &hr_dev->pdev->dev;
struct hns_roce_sq_db sq_db = {};
- int ps_opcode = 0, i = 0;
+ int ps_opcode, i;
unsigned long flags = 0;
void *wqe = NULL;
__le32 doorbell[2];
- u32 wqe_idx = 0;
- int nreq = 0;
int ret = 0;
- u8 *smac;
int loopback;
+ u32 wqe_idx;
+ int nreq;
+ u8 *smac;
if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
ibqp->qp_type != IB_QPT_RC)) {
ps_opcode = HNS_ROCE_WQE_OPCODE_SEND;
break;
case IB_WR_LOCAL_INV:
- break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_LSO:
u32 odb_ext_mod;
u32 sdb_evt_mod;
u32 odb_evt_mod;
- int ret = 0;
+ int ret;
memset(db, 0, sizeof(*db));
struct hns_roce_v1_priv *priv = hr_dev->priv;
struct hns_roce_raq_table *raq = &priv->raq_table;
struct device *dev = &hr_dev->pdev->dev;
- int raq_shift = 0;
dma_addr_t addr;
+ int raq_shift;
__le32 tmp;
u32 val;
int ret;
struct hns_roce_v1_priv *priv = hr_dev->priv;
struct hns_roce_free_mr *free_mr = &priv->free_mr;
struct device *dev = &hr_dev->pdev->dev;
- int ret = 0;
+ int ret;
free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr");
if (!free_mr->free_mr_wq) {
static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
{
- int i = 0;
struct hns_roce_caps *caps = &hr_dev->caps;
+ int i;
hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG);
hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG);
caps->max_qp_dest_rdma = HNS_ROCE_V1_MAX_QP_DEST_RDMA;
caps->max_sq_desc_sz = HNS_ROCE_V1_MAX_SQ_DESC_SZ;
caps->max_rq_desc_sz = HNS_ROCE_V1_MAX_RQ_DESC_SZ;
- caps->qpc_entry_sz = HNS_ROCE_V1_QPC_ENTRY_SIZE;
+ caps->qpc_sz = HNS_ROCE_V1_QPC_SIZE;
caps->irrl_entry_sz = HNS_ROCE_V1_IRRL_ENTRY_SIZE;
caps->cqc_entry_sz = HNS_ROCE_V1_CQC_ENTRY_SIZE;
caps->mtpt_entry_sz = HNS_ROCE_V1_MTPT_ENTRY_SIZE;
caps->mtt_entry_sz = HNS_ROCE_V1_MTT_ENTRY_SIZE;
- caps->cq_entry_sz = HNS_ROCE_V1_CQE_ENTRY_SIZE;
+ caps->cqe_sz = HNS_ROCE_V1_CQE_SIZE;
caps->page_size_cap = HNS_ROCE_V1_PAGE_SIZE_SUPPORT;
caps->reserved_lkey = 0;
caps->reserved_pds = 0;
unsigned long timeout)
{
u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG;
- unsigned long end = 0;
+ unsigned long end;
u32 status = 0;
end = msecs_to_jiffies(timeout) + jiffies;
{
unsigned long flags;
u32 *p = NULL;
- u8 gid_idx = 0;
+ u8 gid_idx;
gid_idx = hns_get_gid_index(hr_dev, port, gid_index);
static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
{
- return hns_roce_buf_offset(hr_cq->mtr.kmem,
- n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
+ return hns_roce_buf_offset(hr_cq->mtr.kmem, n * HNS_ROCE_V1_CQE_SIZE);
}
static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
struct hns_roce_cmd_mailbox *mailbox;
struct device *dev = &hr_dev->pdev->dev;
- int ret = 0;
+ int ret;
if (cur_state >= HNS_ROCE_QP_NUM_STATE ||
new_state >= HNS_ROCE_QP_NUM_STATE ||
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct device *dev = &hr_dev->pdev->dev;
struct hns_roce_qp_context *context;
- int tmp_qp_state = 0;
+ int tmp_qp_state;
int ret = 0;
int state;
return 0;
}
-static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
}
wait_time++;
}
+ return 0;
}
static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not)
static struct hns_roce_aeqe *get_aeqe_v1(struct hns_roce_eq *eq, u32 entry)
{
- unsigned long off = (entry & (eq->entries - 1)) *
- HNS_ROCE_AEQ_ENTRY_SIZE;
+ unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQE_SIZE;
return (struct hns_roce_aeqe *)((u8 *)
(eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
static struct hns_roce_ceqe *get_ceqe_v1(struct hns_roce_eq *eq, u32 entry)
{
- unsigned long off = (entry & (eq->entries - 1)) *
- HNS_ROCE_CEQ_ENTRY_SIZE;
+ unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQE_SIZE;
return (struct hns_roce_ceqe *)((u8 *)
(eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
{
struct hns_roce_eq *eq = eq_ptr;
struct hns_roce_dev *hr_dev = eq->hr_dev;
- int int_work = 0;
+ int int_work;
if (eq->type_flag == HNS_ROCE_CEQ)
/* CEQ irq routine, CEQ is pulse irq, not clear */
void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn];
struct device *dev = &hr_dev->pdev->dev;
dma_addr_t tmp_dma_addr;
- u32 eqconsindx_val = 0;
u32 eqcuridx_val = 0;
- u32 eqshift_val = 0;
+ u32 eqconsindx_val;
+ u32 eqshift_val;
__le32 tmp2 = 0;
__le32 tmp1 = 0;
__le32 tmp = 0;
CEQ_REG_OFFSET * i;
eq->entries = hr_dev->caps.ceqe_depth;
eq->log_entries = ilog2(eq->entries);
- eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
+ eq->eqe_size = HNS_ROCE_CEQE_SIZE;
} else {
/* AEQ */
eq_table->eqc_base[i] = hr_dev->reg_base +
ROCEE_CAEP_AEQE_CONS_IDX_REG;
eq->entries = hr_dev->caps.aeqe_depth;
eq->log_entries = ilog2(eq->entries);
- eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
+ eq->eqe_size = HNS_ROCE_AEQE_SIZE;
}
}
#define HNS_ROCE_V1_COMP_EQE_NUM 0x8000
#define HNS_ROCE_V1_ASYNC_EQE_NUM 0x400
-#define HNS_ROCE_V1_QPC_ENTRY_SIZE 256
+#define HNS_ROCE_V1_QPC_SIZE 256
#define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8
#define HNS_ROCE_V1_CQC_ENTRY_SIZE 64
#define HNS_ROCE_V1_MTPT_ENTRY_SIZE 64
#define HNS_ROCE_V1_MTT_ENTRY_SIZE 64
-#define HNS_ROCE_V1_CQE_ENTRY_SIZE 32
+#define HNS_ROCE_V1_CQE_SIZE 32
#define HNS_ROCE_V1_PAGE_SIZE_SUPPORT 0xFFFFF000
#define HNS_ROCE_V1_TABLE_CHUNK_SIZE (1 << 17)
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
}
+static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
+ const struct ib_send_wr *wr,
+ unsigned int *sge_idx, u32 msg_len)
+{
+ struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
+ unsigned int dseg_len = sizeof(struct hns_roce_v2_wqe_data_seg);
+ unsigned int ext_sge_sz = qp->sq.max_gs * dseg_len;
+ unsigned int left_len_in_pg;
+ unsigned int idx = *sge_idx;
+ unsigned int i = 0;
+ unsigned int len;
+ void *addr;
+ void *dseg;
+
+ if (msg_len > ext_sge_sz) {
+ ibdev_err(ibdev,
+ "no enough extended sge space for inline data.\n");
+ return -EINVAL;
+ }
+
+ dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
+ left_len_in_pg = hr_hw_page_align((uintptr_t)dseg) - (uintptr_t)dseg;
+ len = wr->sg_list[0].length;
+ addr = (void *)(unsigned long)(wr->sg_list[0].addr);
+
+ /* When copying data to extended sge space, the left length in page may
+ * not long enough for current user's sge. So the data should be
+ * splited into several parts, one in the first page, and the others in
+ * the subsequent pages.
+ */
+ while (1) {
+ if (len <= left_len_in_pg) {
+ memcpy(dseg, addr, len);
+
+ idx += len / dseg_len;
+
+ i++;
+ if (i >= wr->num_sge)
+ break;
+
+ left_len_in_pg -= len;
+ len = wr->sg_list[i].length;
+ addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+ dseg += len;
+ } else {
+ memcpy(dseg, addr, left_len_in_pg);
+
+ len -= left_len_in_pg;
+ addr += left_len_in_pg;
+ idx += left_len_in_pg / dseg_len;
+ dseg = hns_roce_get_extend_sge(qp,
+ idx & (qp->sge.sge_cnt - 1));
+ left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT;
+ }
+ }
+
+ *sge_idx = idx;
+
+ return 0;
+}
+
static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
unsigned int *sge_ind, unsigned int valid_num_sge)
{
*sge_ind = idx;
}
+static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ int mtu = ib_mtu_enum_to_int(qp->path_mtu);
+
+ if (len > qp->max_inline_data || len > mtu) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid length of data, data len = %u, max inline len = %u, path mtu = %d.\n",
+ len, qp->max_inline_data, mtu);
+ return false;
+ }
+
+ return true;
+}
+
+static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ unsigned int *sge_idx)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ u32 msg_len = le32_to_cpu(rc_sq_wqe->msg_len);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ unsigned int curr_idx = *sge_idx;
+ void *dseg = rc_sq_wqe;
+ unsigned int i;
+ int ret;
+
+ if (unlikely(wr->opcode == IB_WR_RDMA_READ)) {
+ ibdev_err(ibdev, "invalid inline parameters!\n");
+ return -EINVAL;
+ }
+
+ if (!check_inl_data_len(qp, msg_len))
+ return -EINVAL;
+
+ dseg += sizeof(struct hns_roce_v2_rc_send_wqe);
+
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1);
+
+ if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) {
+ roce_set_bit(rc_sq_wqe->byte_20,
+ V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0);
+
+ for (i = 0; i < wr->num_sge; i++) {
+ memcpy(dseg, ((void *)wr->sg_list[i].addr),
+ wr->sg_list[i].length);
+ dseg += wr->sg_list[i].length;
+ }
+ } else {
+ roce_set_bit(rc_sq_wqe->byte_20,
+ V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1);
+
+ ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len);
+ if (ret)
+ return ret;
+
+ roce_set_field(rc_sq_wqe->byte_16,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
+ curr_idx - *sge_idx);
+ }
+
+ *sge_idx = curr_idx;
+
+ return 0;
+}
+
static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
unsigned int *sge_ind,
unsigned int valid_num_sge)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_v2_wqe_data_seg *dseg =
(void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
- struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
- void *wqe = dseg;
int j = 0;
int i;
- if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) {
- if (unlikely(le32_to_cpu(rc_sq_wqe->msg_len) >
- hr_dev->caps.max_sq_inline)) {
- ibdev_err(ibdev, "inline len(1-%d)=%d, illegal",
- rc_sq_wqe->msg_len,
- hr_dev->caps.max_sq_inline);
- return -EINVAL;
- }
+ roce_set_field(rc_sq_wqe->byte_20,
+ V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
+ V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
+ (*sge_ind) & (qp->sge.sge_cnt - 1));
- if (unlikely(wr->opcode == IB_WR_RDMA_READ)) {
- ibdev_err(ibdev, "Not support inline data!\n");
- return -EINVAL;
- }
+ if (wr->send_flags & IB_SEND_INLINE)
+ return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind);
+ if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
for (i = 0; i < wr->num_sge; i++) {
- memcpy(wqe, ((void *)wr->sg_list[i].addr),
- wr->sg_list[i].length);
- wqe += wr->sg_list[i].length;
+ if (likely(wr->sg_list[i].length)) {
+ set_data_seg_v2(dseg, wr->sg_list + i);
+ dseg++;
+ }
}
-
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
- 1);
} else {
- if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
- for (i = 0; i < wr->num_sge; i++) {
- if (likely(wr->sg_list[i].length)) {
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
- }
+ for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ set_data_seg_v2(dseg, wr->sg_list + i);
+ dseg++;
+ j++;
}
- } else {
- roce_set_field(rc_sq_wqe->byte_20,
- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
- (*sge_ind) & (qp->sge.sge_cnt - 1));
-
- for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE;
- i++) {
- if (likely(wr->sg_list[i].length)) {
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
- j++;
- }
- }
-
- set_extend_sge(qp, wr, sge_ind, valid_num_sge);
}
- roce_set_field(rc_sq_wqe->byte_16,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
+ set_extend_sge(qp, wr, sge_ind, valid_num_sge);
}
+ roce_set_field(rc_sq_wqe->byte_16,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
+
return 0;
}
return valid_num;
}
+static __le32 get_immtdata(const struct ib_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ return cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
+ default:
+ return 0;
+ }
+}
+
+static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+ const struct ib_send_wr *wr)
+{
+ u32 ib_op = wr->opcode;
+
+ if (ib_op != IB_WR_SEND && ib_op != IB_WR_SEND_WITH_IMM)
+ return -EINVAL;
+
+ ud_sq_wqe->immtdata = get_immtdata(wr);
+
+ roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
+
+ return 0;
+}
+
static inline int set_ud_wqe(struct hns_roce_qp *qp,
const struct ib_send_wr *wr,
void *wqe, unsigned int *sge_idx,
u32 msg_len = 0;
bool loopback;
u8 *smac;
+ int ret;
valid_num_sge = calc_wr_sge_num(wr, &msg_len);
memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
+ ret = set_ud_opcode(ud_sq_wqe, wr);
+ if (WARN_ON(ret))
+ return ret;
+
roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M,
V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]);
roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M,
roce_set_bit(ud_sq_wqe->byte_40,
V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback);
- roce_set_field(ud_sq_wqe->byte_4,
- V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
- V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_SEND);
-
ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
- switch (wr->opcode) {
- case IB_WR_SEND_WITH_IMM:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- ud_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
- break;
- default:
- ud_sq_wqe->immtdata = 0;
- break;
- }
-
/* Set sig attr */
roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S,
(wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
curr_idx & (qp->sge.sge_cnt - 1));
roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
- V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0);
+ V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport);
ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
qp->qkey : ud_wr(wr)->remote_qkey);
roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M,
return 0;
}
+static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ const struct ib_send_wr *wr)
+{
+ u32 ib_op = wr->opcode;
+
+ rc_sq_wqe->immtdata = get_immtdata(wr);
+
+ switch (ib_op) {
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
+ rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
+ break;
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey);
+ rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
+ break;
+ case IB_WR_REG_MR:
+ set_frmr_seg(rc_sq_wqe, reg_wr(wr));
+ break;
+ case IB_WR_LOCAL_INV:
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
+ fallthrough;
+ case IB_WR_SEND_WITH_INV:
+ rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
+
+ return 0;
+}
static inline int set_rc_wqe(struct hns_roce_qp *qp,
const struct ib_send_wr *wr,
void *wqe, unsigned int *sge_idx,
unsigned int curr_idx = *sge_idx;
unsigned int valid_num_sge;
u32 msg_len = 0;
- int ret = 0;
+ int ret;
valid_num_sge = calc_wr_sge_num(wr, &msg_len);
memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
rc_sq_wqe->msg_len = cpu_to_le32(msg_len);
- switch (wr->opcode) {
- case IB_WR_SEND_WITH_IMM:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- rc_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
- break;
- case IB_WR_SEND_WITH_INV:
- rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
- break;
- default:
- rc_sq_wqe->immtdata = 0;
- break;
- }
+ ret = set_rc_opcode(rc_sq_wqe, wr);
+ if (WARN_ON(ret))
+ return ret;
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S,
(wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
owner_bit);
- switch (wr->opcode) {
- case IB_WR_RDMA_READ:
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
- rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
- break;
- case IB_WR_LOCAL_INV:
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
- rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
- break;
- case IB_WR_REG_MR:
- set_frmr_seg(rc_sq_wqe, reg_wr(wr));
- break;
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey);
- rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
- break;
- default:
- break;
- }
-
- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- to_hr_opcode(wr->opcode));
-
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
set_atomic_seg(wr, rc_sq_wqe, valid_num_sge);
caps->max_sq_desc_sz = HNS_ROCE_V2_MAX_SQ_DESC_SZ;
caps->max_rq_desc_sz = HNS_ROCE_V2_MAX_RQ_DESC_SZ;
caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
- caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ;
+ caps->qpc_sz = HNS_ROCE_V2_QPC_SZ;
caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ;
caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ;
caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ;
caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
caps->idx_entry_sz = HNS_ROCE_V2_IDX_ENTRY_SZ;
- caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE;
+ caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE;
caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
caps->reserved_lkey = 0;
caps->reserved_pds = 0;
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
caps->aeqe_depth = HNS_ROCE_V2_ASYNC_EQE_NUM;
+ caps->aeqe_size = HNS_ROCE_AEQE_SIZE;
+ caps->ceqe_size = HNS_ROCE_CEQE_SIZE;
caps->local_ca_ack_delay = 0;
caps->max_mtu = IB_MTU_4096;
caps->cqc_timer_buf_pg_sz = 0;
caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ;
+ caps->sccc_sz = HNS_ROCE_V2_SCCC_SZ;
caps->sccc_ba_pg_sz = 0;
caps->sccc_buf_pg_sz = 0;
caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE;
+ caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
+ caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
+ caps->qpc_sz = HNS_ROCE_V3_QPC_SZ;
+ }
}
static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num,
int *buf_page_size, int *bt_page_size, u32 hem_type)
{
u64 obj_per_chunk;
- int bt_chunk_size = 1 << PAGE_SHIFT;
- int buf_chunk_size = 1 << PAGE_SHIFT;
- int obj_per_chunk_default = buf_chunk_size / obj_size;
+ u64 bt_chunk_size = PAGE_SIZE;
+ u64 buf_chunk_size = PAGE_SIZE;
+ u64 obj_per_chunk_default = buf_chunk_size / obj_size;
*buf_page_size = 0;
*bt_page_size = 0;
caps->max_sq_desc_sz = resp_a->max_sq_desc_sz;
caps->max_rq_desc_sz = resp_a->max_rq_desc_sz;
caps->max_srq_desc_sz = resp_a->max_srq_desc_sz;
- caps->cq_entry_sz = resp_a->cq_entry_sz;
+ caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE;
caps->mtpt_entry_sz = resp_b->mtpt_entry_sz;
caps->irrl_entry_sz = resp_b->irrl_entry_sz;
caps->cqc_entry_sz = resp_b->cqc_entry_sz;
caps->srqc_entry_sz = resp_b->srqc_entry_sz;
caps->idx_entry_sz = resp_b->idx_entry_sz;
- caps->sccc_entry_sz = resp_b->scc_ctx_entry_sz;
+ caps->sccc_sz = resp_b->sccc_sz;
caps->max_mtu = resp_b->max_mtu;
- caps->qpc_entry_sz = le16_to_cpu(resp_b->qpc_entry_sz);
+ caps->qpc_sz = HNS_ROCE_V2_QPC_SZ;
caps->min_cqes = resp_b->min_cqes;
caps->min_wqes = resp_b->min_wqes;
caps->page_size_cap = le32_to_cpu(resp_b->page_size_cap);
caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
+ caps->ceqe_size = HNS_ROCE_CEQE_SIZE;
+ caps->aeqe_size = HNS_ROCE_AEQE_SIZE;
caps->mtt_ba_pg_sz = 0;
caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M,
V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S);
- calc_pg_sz(caps->num_qps, caps->qpc_entry_sz, caps->qpc_hop_num,
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
+ caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE;
+ caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
+ caps->qpc_sz = HNS_ROCE_V3_QPC_SZ;
+ caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ;
+ }
+
+ calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num,
caps->qpc_bt_num, &caps->qpc_buf_pg_sz, &caps->qpc_ba_pg_sz,
HEM_TYPE_QPC);
calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num,
caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- calc_pg_sz(caps->num_qps, caps->sccc_entry_sz,
+ calc_pg_sz(caps->num_qps, caps->sccc_sz,
caps->sccc_hop_num, caps->sccc_bt_num,
&caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz,
HEM_TYPE_SCCC);
return 0;
}
+static int hns_roce_config_qpc_size(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cfg_entry_size *cfg_size =
+ (struct hns_roce_cfg_entry_size *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE,
+ false);
+
+ cfg_size->type = cpu_to_le32(HNS_ROCE_CFG_QPC_SIZE);
+ cfg_size->size = cpu_to_le32(hr_dev->caps.qpc_sz);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int hns_roce_config_sccc_size(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cfg_entry_size *cfg_size =
+ (struct hns_roce_cfg_entry_size *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE,
+ false);
+
+ cfg_size->type = cpu_to_le32(HNS_ROCE_CFG_SCCC_SIZE);
+ cfg_size->size = cpu_to_le32(hr_dev->caps.sccc_sz);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int hns_roce_config_entry_size(struct hns_roce_dev *hr_dev)
+{
+ int ret;
+
+ if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09)
+ return 0;
+
+ ret = hns_roce_config_qpc_size(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to cfg qpc sz, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_config_sccc_size(hr_dev);
+ if (ret)
+ dev_err(hr_dev->dev, "failed to cfg sccc sz, ret = %d.\n", ret);
+
+ return ret;
+}
+
static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
{
struct hns_roce_caps *caps = &hr_dev->caps;
}
ret = hns_roce_v2_set_bt(hr_dev);
- if (ret)
- dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
- ret);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "Configure bt attribute fail, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* Configure the size of QPC, SCCC, etc. */
+ ret = hns_roce_config_entry_size(hr_dev);
return ret;
}
static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
{
- return hns_roce_buf_offset(hr_cq->mtr.kmem,
- n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
+ return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size);
}
static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n)
roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M,
V2_CQC_BYTE_8_CQN_S, hr_cq->cqn);
+ roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQE_SIZE_M,
+ V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size ==
+ HNS_ROCE_V3_CQE_SIZE ? 1 : 0);
+
cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
roce_set_field(cq_context->byte_16_hop_addr,
}
static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
- struct hns_roce_v2_cqe *cqe, struct ib_wc *wc)
+ struct hns_roce_cq *cq, struct hns_roce_v2_cqe *cqe,
+ struct ib_wc *wc)
{
static const struct {
u32 cqe_status;
ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status);
print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe,
- sizeof(*cqe), false);
+ cq->cqe_size, false);
/*
* For hns ROCEE, GENERAL_ERR is an error type that is not defined in
++wq->tail;
}
- get_cqe_status(hr_dev, *cur_qp, cqe, wc);
+ get_cqe_status(hr_dev, *cur_qp, hr_cq, cqe, wc);
if (unlikely(wc->status != IB_WC_SUCCESS))
return 0;
static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev,
struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask,
struct hns_roce_qp *hr_qp)
{
struct hns_roce_cmd_mailbox *mailbox;
+ int qpc_size;
int ret;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- memcpy(mailbox->buf, context, sizeof(*context) * 2);
+ /* The qpc size of HIP08 is only 256B, which is half of HIP09 */
+ qpc_size = hr_dev->caps.qpc_sz;
+ memcpy(mailbox->buf, context, qpc_size);
+ memcpy(mailbox->buf + qpc_size, qpc_mask, qpc_size);
ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
HNS_ROCE_CMD_MODIFY_QPC,
V2_QPC_BYTE_76_SRQ_EN_S, 1);
}
- roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
- V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 4);
-
roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
hr_qp->access_flags = attr->qp_access_flags;
dma_addr_t trrl_ba;
dma_addr_t irrl_ba;
enum ib_mtu mtu;
+ u8 lp_pktn_ini;
u8 port_num;
u64 *mtts;
u8 *dmac;
V2_QPC_BYTE_52_DMAC_S, 0);
mtu = get_mtu(ibqp, attr);
+ hr_qp->path_mtu = mtu;
if (attr_mask & IB_QP_PATH_MTU) {
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
}
#define MAX_LP_MSG_LEN 65536
- /* MTU*(2^LP_PKTN_INI) shouldn't be bigger than 64kb */
+ /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */
+ lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu));
+
roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
- V2_QPC_BYTE_56_LP_PKTN_INI_S,
- ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu)));
+ V2_QPC_BYTE_56_LP_PKTN_INI_S, lp_pktn_ini);
roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
V2_QPC_BYTE_56_LP_PKTN_INI_S, 0);
+ /* ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI */
+ roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
+ V2_QPC_BYTE_172_ACK_REQ_FREQ_S, lp_pktn_ini);
+ roce_set_field(qpc_mask->byte_172_sq_psn,
+ V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
+ V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 0);
+
roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0);
roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M,
return 0;
}
+static inline u16 get_udp_sport(u32 fl, u32 lqpn, u32 rqpn)
+{
+ if (!fl)
+ fl = rdma_calc_flow_label(lqpn, rqpn);
+
+ return rdma_flow_label_to_udp_sport(fl);
+}
+
static int hns_roce_v2_set_path(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask,
roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M,
V2_QPC_BYTE_52_UDPSPN_S,
- is_udp ? 0x12b7 : 0);
+ is_udp ? get_udp_sport(grh->flow_label, ibqp->qp_num,
+ attr->dest_qp_num) : 0);
roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M,
V2_QPC_BYTE_52_UDPSPN_S, 0);
V2_QPC_BYTE_28_FL_S, 0);
memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
+
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+ if (unlikely(hr_qp->sl > MAX_SERVICE_LEVEL)) {
+ ibdev_err(ibdev,
+ "failed to fill QPC, sl (%d) shouldn't be larger than %d.\n",
+ hr_qp->sl, MAX_SERVICE_LEVEL);
+ return -EINVAL;
+ }
+
roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
- V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr));
+ V2_QPC_BYTE_28_SL_S, hr_qp->sl);
roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
V2_QPC_BYTE_28_SL_S, 0);
- hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
return 0;
}
}
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- memset(qpc_mask, 0, sizeof(*qpc_mask));
+ memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
* we should set all bits of the relevant fields in context mask to
* 0 at the same time, else set them to 0x1.
*/
- memset(context, 0, sizeof(*context));
- memset(qpc_mask, 0xff, sizeof(*qpc_mask));
+ memset(context, 0, hr_dev->caps.qpc_sz);
+ memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz);
+
ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state,
new_state, context, qpc_mask);
if (ret)
V2_QPC_BYTE_60_QP_ST_S, 0);
/* SW pass context to HW */
- ret = hns_roce_v2_qp_modify(hr_dev, ctx, hr_qp);
+ ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp);
if (ret) {
ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret);
goto out;
if (ret)
goto out;
- memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
+ memcpy(hr_context, mailbox->buf, hr_dev->caps.qpc_sz);
out:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
qp_attr->retry_cnt = roce_get_field(context.byte_212_lsn,
V2_QPC_BYTE_212_RETRY_CNT_M,
V2_QPC_BYTE_212_RETRY_CNT_S);
- qp_attr->rnr_retry = le32_to_cpu(context.rq_rnr_timer);
+ qp_attr->rnr_retry = roce_get_field(context.byte_244_rnr_rxack,
+ V2_QPC_BYTE_244_RNR_CNT_M,
+ V2_QPC_BYTE_244_RNR_CNT_S);
done:
qp_attr->cur_qp_state = qp_attr->qp_state;
}
qp_init_attr->cap = qp_attr->cap;
+ qp_init_attr->sq_sig_type = hr_qp->sq_signal_bits;
out:
mutex_unlock(&hr_qp->mutex);
struct hns_roce_cmd_mailbox *mailbox;
int ret;
+ /* Resizing SRQs is not supported yet */
+ if (srq_attr_mask & IB_SRQ_MAX_WR)
+ return -EINVAL;
+
if (srq_attr_mask & IB_SRQ_LIMIT) {
if (srq_attr->srq_limit >= srq->wqe_cnt)
return -EINVAL;
aeqe = hns_roce_buf_offset(eq->mtr.kmem,
(eq->cons_index & (eq->entries - 1)) *
- HNS_ROCE_AEQ_ENTRY_SIZE);
+ eq->eqe_size);
return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
ceqe = hns_roce_buf_offset(eq->mtr.kmem,
(eq->cons_index & (eq->entries - 1)) *
- HNS_ROCE_CEQ_ENTRY_SIZE);
+ eq->eqe_size);
+
return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
(!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
}
{
struct hns_roce_eq *eq = eq_ptr;
struct hns_roce_dev *hr_dev = eq->hr_dev;
- int int_work = 0;
+ int int_work;
if (eq->type_flag == HNS_ROCE_CEQ)
/* Completion event interrupt */
roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M,
HNS_ROCE_EQC_CONS_INDX_S, HNS_ROCE_EQ_INIT_CONS_IDX);
- /* set nex_eqe_ba[43:12] */
- roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M,
+ roce_set_field(eqc->byte_40, HNS_ROCE_EQC_NXT_EQE_BA_L_M,
HNS_ROCE_EQC_NXT_EQE_BA_L_S, eqe_ba[1] >> 12);
- /* set nex_eqe_ba[63:44] */
- roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M,
+ roce_set_field(eqc->byte_44, HNS_ROCE_EQC_NXT_EQE_BA_H_M,
HNS_ROCE_EQC_NXT_EQE_BA_H_S, eqe_ba[1] >> 44);
+ roce_set_field(eqc->byte_44, HNS_ROCE_EQC_EQE_SIZE_M,
+ HNS_ROCE_EQC_EQE_SIZE_S,
+ eq->eqe_size == HNS_ROCE_V3_EQE_SIZE ? 1 : 0);
+
return 0;
}
eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
eq->type_flag = HNS_ROCE_CEQ;
eq->entries = hr_dev->caps.ceqe_depth;
- eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
+ eq->eqe_size = hr_dev->caps.ceqe_size;
eq->irq = hr_dev->irq[i + other_num + aeq_num];
eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
eq_cmd = HNS_ROCE_CMD_CREATE_AEQC;
eq->type_flag = HNS_ROCE_AEQ;
eq->entries = hr_dev->caps.aeqe_depth;
- eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
+ eq->eqe_size = hr_dev->caps.aeqe_size;
eq->irq = hr_dev->irq[i - comp_num + other_num];
eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 64
#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
+#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32
#define HNS_ROCE_V2_UAR_NUM 256
#define HNS_ROCE_V2_PHY_UAR_NUM 1
#define HNS_ROCE_V2_MAX_IRQ_NUM 65
#define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64
#define HNS_ROCE_V2_MAX_RQ_DESC_SZ 16
#define HNS_ROCE_V2_MAX_SRQ_DESC_SZ 64
-#define HNS_ROCE_V2_QPC_ENTRY_SZ 256
#define HNS_ROCE_V2_IRRL_ENTRY_SZ 64
#define HNS_ROCE_V2_TRRL_ENTRY_SZ 48
#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100
#define HNS_ROCE_V2_MTPT_ENTRY_SZ 64
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
#define HNS_ROCE_V2_IDX_ENTRY_SZ 4
-#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32
-#define HNS_ROCE_V2_SCCC_ENTRY_SZ 32
+
+#define HNS_ROCE_V2_SCCC_SZ 32
+#define HNS_ROCE_V3_SCCC_SZ 64
+
#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404,
HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406,
HNS_ROCE_OPC_QUERY_PF_CAPS_NUM = 0x8408,
+ HNS_ROCE_OPC_CFG_ENTRY_SIZE = 0x8409,
HNS_ROCE_OPC_CFG_SGID_TB = 0x8500,
HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501,
HNS_ROCE_OPC_POST_MB = 0x8504,
#define V2_CQC_BYTE_8_CQN_S 0
#define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0)
+#define V2_CQC_BYTE_8_CQE_SIZE_S 27
+#define V2_CQC_BYTE_8_CQE_SIZE_M GENMASK(28, 27)
+
#define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S 0
#define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M GENMASK(19, 0)
__le32 byte_248_ack_psn;
__le32 byte_252_err_txcqn;
__le32 byte_256_sqflush_rqcqe;
+ __le32 ext[64];
};
#define V2_QPC_BYTE_4_TST_S 0
u8 smac[4];
__le32 byte_28;
__le32 byte_32;
+ __le32 rsv[8];
};
#define V2_CQE_BYTE_4_OPCODE_S 0
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
+#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31
+
struct hns_roce_wqe_frmr_seg {
__le32 pbl_size;
__le32 mode_buf_pg_sz;
__le32 vf_sgid_h;
__le32 vf_sgid_type_rsv;
};
+
+enum {
+ HNS_ROCE_CFG_QPC_SIZE = BIT(0),
+ HNS_ROCE_CFG_SCCC_SIZE = BIT(1),
+};
+
+struct hns_roce_cfg_entry_size {
+ __le32 type;
+ __le32 rsv[4];
+ __le32 size;
+};
+
#define CFG_SGID_TB_TABLE_IDX_S 0
#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0)
u8 max_sq_desc_sz;
u8 max_rq_desc_sz;
u8 max_srq_desc_sz;
- u8 cq_entry_sz;
+ u8 cqe_sz;
};
struct hns_roce_query_pf_caps_b {
u8 cqc_entry_sz;
u8 srqc_entry_sz;
u8 idx_entry_sz;
- u8 scc_ctx_entry_sz;
+ u8 sccc_sz;
u8 max_mtu;
- __le16 qpc_entry_sz;
+ __le16 qpc_sz;
__le16 qpc_timer_entry_sz;
__le16 cqc_timer_entry_sz;
u8 min_cqes;
__le32 byte_28;
__le32 byte_32;
__le32 byte_36;
- __le32 nxt_eqe_ba0;
- __le32 nxt_eqe_ba1;
+ __le32 byte_40;
+ __le32 byte_44;
__le32 rsv[5];
};
#define HNS_ROCE_EQC_NXT_EQE_BA_H_S 0
#define HNS_ROCE_EQC_NXT_EQE_BA_H_M GENMASK(19, 0)
+#define HNS_ROCE_EQC_EQE_SIZE_S 20
+#define HNS_ROCE_EQC_EQE_SIZE_M GENMASK(21, 20)
+
#define HNS_ROCE_V2_CEQE_COMP_CQN_S 0
#define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0)
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
+#define MAX_SERVICE_LEVEL 0x7
+
struct hns_roce_wqe_atomic_seg {
__le64 fetchadd_swap_data;
__le64 cmp_data;
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct hns_roce_ib_iboe *iboe = NULL;
struct hns_roce_dev *hr_dev = NULL;
- u8 port = 0;
- int ret = 0;
+ int ret;
+ u8 port;
hr_dev = container_of(self, struct hns_roce_dev, iboe.nb);
iboe = &hr_dev->iboe;
mutex_init(&context->page_mutex);
}
+ resp.cqe_size = hr_dev->caps.cqe_sz;
+
ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (ret)
goto error_fail_copy_to_udata;
static const struct ib_device_ops hns_roce_dev_mw_ops = {
.alloc_mw = hns_roce_alloc_mw,
.dealloc_mw = hns_roce_dealloc_mw,
+
+ INIT_RDMA_OBJ_SIZE(ib_mw, hns_roce_mw, ibmw),
};
static const struct ib_device_ops hns_roce_dev_frmr_ops = {
}
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table,
- HEM_TYPE_QPC, hr_dev->caps.qpc_entry_sz,
+ HEM_TYPE_QPC, hr_dev->caps.qpc_sz,
hr_dev->caps.num_qps, 1);
if (ret) {
dev_err(dev, "Failed to init QP context memory, aborting.\n");
}
}
- if (hr_dev->caps.sccc_entry_sz) {
+ if (hr_dev->caps.sccc_sz) {
ret = hns_roce_init_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table,
HEM_TYPE_SCCC,
- hr_dev->caps.sccc_entry_sz,
+ hr_dev->caps.sccc_sz,
hr_dev->caps.num_qps, 1);
if (ret) {
dev_err(dev,
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table);
err_unmap_ctx:
- if (hr_dev->caps.sccc_entry_sz)
+ if (hr_dev->caps.sccc_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table);
err_unmap_srq:
return ret;
}
-struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
- struct ib_udata *udata)
+int hns_roce_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
- struct hns_roce_mw *mw;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
+ struct hns_roce_mw *mw = to_hr_mw(ibmw);
unsigned long index = 0;
int ret;
- mw = kmalloc(sizeof(*mw), GFP_KERNEL);
- if (!mw)
- return ERR_PTR(-ENOMEM);
-
/* Allocate a key for mw from bitmap */
ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
if (ret)
- goto err_bitmap;
+ return ret;
mw->rkey = hw_index_to_key(index);
- mw->ibmw.rkey = mw->rkey;
- mw->ibmw.type = type;
- mw->pdn = to_hr_pd(ib_pd)->pdn;
+ ibmw->rkey = mw->rkey;
+ mw->pdn = to_hr_pd(ibmw->pd)->pdn;
mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
if (ret)
goto err_mw;
- return &mw->ibmw;
+ return 0;
err_mw:
hns_roce_mw_free(hr_dev, mw);
-
-err_bitmap:
- kfree(mw);
-
- return ERR_PTR(ret);
+ return ret;
}
int hns_roce_dealloc_mw(struct ib_mw *ibmw)
struct hns_roce_mw *mw = to_hr_mw(ibmw);
hns_roce_mw_free(hr_dev, mw);
- kfree(mw);
-
return 0;
}
return size;
}
-static inline int mtr_umem_page_count(struct ib_umem *umem,
- unsigned int page_shift)
-{
- int count = ib_umem_page_count(umem);
-
- if (page_shift >= PAGE_SHIFT)
- count >>= page_shift - PAGE_SHIFT;
- else
- count <<= PAGE_SHIFT - page_shift;
-
- return count;
-}
-
static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size,
unsigned int page_shift)
{
struct ib_udata *udata, unsigned long user_addr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
- unsigned int max_pg_shift = buf_attr->page_shift;
- unsigned int best_pg_shift = 0;
+ unsigned int best_pg_shift;
int all_pg_count = 0;
size_t direct_size;
size_t total_size;
- unsigned long tmp;
- int ret = 0;
+ int ret;
total_size = mtr_bufs_size(buf_attr);
if (total_size < 1) {
}
if (udata) {
+ unsigned long pgsz_bitmap;
+ unsigned long page_size;
+
mtr->kmem = NULL;
mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
buf_attr->user_access);
PTR_ERR(mtr->umem));
return -ENOMEM;
}
- if (buf_attr->fixed_page) {
- best_pg_shift = max_pg_shift;
- } else {
- tmp = GENMASK(max_pg_shift, 0);
- ret = ib_umem_find_best_pgsz(mtr->umem, tmp, user_addr);
- best_pg_shift = (ret <= PAGE_SIZE) ?
- PAGE_SHIFT : ilog2(ret);
- }
- all_pg_count = mtr_umem_page_count(mtr->umem, best_pg_shift);
+ if (buf_attr->fixed_page)
+ pgsz_bitmap = 1 << buf_attr->page_shift;
+ else
+ pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT);
+
+ page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap,
+ user_addr);
+ if (!page_size)
+ return -EINVAL;
+ best_pg_shift = order_base_2(page_size);
+ all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size);
ret = 0;
} else {
mtr->umem = NULL;
return -ENOMEM;
}
direct_size = mtr_kmem_direct_size(is_direct, total_size,
- max_pg_shift);
+ buf_attr->page_shift);
ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size,
- mtr->kmem, max_pg_shift);
+ mtr->kmem, buf_attr->page_shift);
if (ret) {
ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret);
goto err_alloc_mem;
- } else {
- best_pg_shift = max_pg_shift;
- all_pg_count = mtr->kmem->npages;
}
+ best_pg_shift = buf_attr->page_shift;
+ all_pg_count = mtr->kmem->npages;
}
/* must bigger than minimum hardware page shift */
unsigned int *buf_page_shift)
{
struct hns_roce_buf_region *r;
- unsigned int page_shift = 0;
+ unsigned int page_shift;
int page_cnt = 0;
size_t buf_size;
int region_cnt;
return 0;
}
-void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn);
+ return 0;
}
int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
#include "hns_roce_hem.h"
#include <rdma/hns-abi.h>
-#define SQP_NUM (2 * HNS_ROCE_MAX_PORTS)
-
static void flush_work_handle(struct work_struct *work)
{
struct hns_roce_work *flush_work = container_of(work,
}
}
- if (hr_dev->caps.sccc_entry_sz) {
+ if (hr_dev->caps.sccc_sz) {
/* Alloc memory for SCC CTX */
ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
hr_qp->qpn);
int ret;
if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes ||
- cap->max_send_sge > hr_dev->caps.max_sq_sg ||
- cap->max_inline_data > hr_dev->caps.max_sq_inline) {
+ cap->max_send_sge > hr_dev->caps.max_sq_sg) {
ibdev_err(ibdev,
- "failed to check SQ WR, SGE or inline num, ret = %d.\n",
+ "failed to check SQ WR or SGE num, ret = %d.\n",
-EINVAL);
return -EINVAL;
}
cap->max_send_wr = cnt;
cap->max_send_sge = hr_qp->sq.max_gs;
- /* We don't support inline sends for kernel QPs (yet) */
- cap->max_inline_data = 0;
-
return 0;
}
hr_qp->ibqp.qp_type = init_attr->qp_type;
+ if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline)
+ init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline;
+
+ hr_qp->max_inline_data = init_attr->cap.max_inline_data;
+
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
else
int ret;
switch (init_attr->qp_type) {
- case IB_QPT_RC: {
- hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
- if (!hr_qp)
- return ERR_PTR(-ENOMEM);
-
- ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
- hr_qp);
- if (ret) {
- ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n",
- hr_qp->qpn, ret);
- kfree(hr_qp);
- return ERR_PTR(ret);
- }
-
+ case IB_QPT_RC:
+ case IB_QPT_GSI:
break;
+ default:
+ ibdev_err(ibdev, "not support QP type %d\n",
+ init_attr->qp_type);
+ return ERR_PTR(-EOPNOTSUPP);
}
- case IB_QPT_GSI: {
- /* Userspace is not allowed to create special QPs: */
- if (udata) {
- ibdev_err(ibdev, "not support usr space GSI\n");
- return ERR_PTR(-EINVAL);
- }
- hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
- if (!hr_qp)
- return ERR_PTR(-ENOMEM);
+ hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
+ if (!hr_qp)
+ return ERR_PTR(-ENOMEM);
+ if (init_attr->qp_type == IB_QPT_GSI) {
hr_qp->port = init_attr->port_num - 1;
hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
-
- ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
- hr_qp);
- if (ret) {
- ibdev_err(ibdev, "Create GSI QP failed!\n");
- kfree(hr_qp);
- return ERR_PTR(ret);
- }
-
- break;
- }
- default:{
- ibdev_err(ibdev, "not support QP type %d\n",
- init_attr->qp_type);
- return ERR_PTR(-EOPNOTSUPP);
- }
}
+ ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp);
+ if (ret) {
+ ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n",
+ init_attr->qp_type, ret);
+ ibdev_err(ibdev, "Create GSI QP failed!\n");
+ kfree(hr_qp);
+ return ERR_PTR(ret);
+ }
return &hr_qp->ibqp;
}
mutex_lock(&hr_qp->mutex);
- cur_state = attr_mask & IB_QP_CUR_STATE ?
- attr->cur_qp_state : (enum ib_qp_state)hr_qp->state;
+ if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state)
+ goto out;
+
+ cur_state = hr_qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (ibqp->uobject &&
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_ib_create_srq ucmd = {};
- int ret = 0;
+ int ret;
u32 cqn;
/* Check the actual SRQ wqe and SRQ sge num */
return ret;
}
-void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
free_srq_idx(hr_dev, srq);
free_srq_wrid(srq);
free_srq_buf(hr_dev, srq);
+ return 0;
}
int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
}
/* i40iw.c */
-void i40iw_add_ref(struct ib_qp *);
-void i40iw_rem_ref(struct ib_qp *);
+void i40iw_qp_add_ref(struct ib_qp *ibqp);
+void i40iw_qp_rem_ref(struct ib_qp *ibqp);
struct ib_qp *i40iw_get_qp(struct ib_device *, int);
void i40iw_flush_wqes(struct i40iw_device *iwdev,
bool wait);
void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf);
void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp);
-void i40iw_free_qp_resources(struct i40iw_device *iwdev,
- struct i40iw_qp *iwqp,
- u32 qp_num);
+void i40iw_free_qp_resources(struct i40iw_qp *iwqp);
+
enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
struct i40iw_dma_mem *memptr,
u32 size, u32 mask);
iwqp = cm_node->iwqp;
if (iwqp) {
iwqp->cm_node = NULL;
- i40iw_rem_ref(&iwqp->ibqp);
+ i40iw_qp_rem_ref(&iwqp->ibqp);
cm_node->iwqp = NULL;
} else if (cm_node->qhash_set) {
i40iw_get_addr_info(cm_node, &nfo);
kfree(work);
return;
}
- i40iw_add_ref(&iwqp->ibqp);
+ i40iw_qp_add_ref(&iwqp->ibqp);
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
work->iwqp = iwqp;
kfree(dwork);
i40iw_cm_disconn_true(iwqp);
- i40iw_rem_ref(&iwqp->ibqp);
+ i40iw_qp_rem_ref(&iwqp->ibqp);
}
/**
cm_node->lsmm_size = accept.size + conn_param->private_data_len;
i40iw_cm_init_tsa_conn(iwqp, cm_node);
cm_id->add_ref(cm_id);
- i40iw_add_ref(&iwqp->ibqp);
+ i40iw_qp_add_ref(&iwqp->ibqp);
attr.qp_state = IB_QPS_RTS;
cm_node->qhash_set = false;
iwqp->cm_node = cm_node;
cm_node->iwqp = iwqp;
iwqp->cm_id = cm_id;
- i40iw_add_ref(&iwqp->ibqp);
+ i40iw_qp_add_ref(&iwqp->ibqp);
if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
cm_node->state = I40IW_CM_STATE_SYN_SENT;
__func__, info->qp_cq_id);
continue;
}
- i40iw_add_ref(&iwqp->ibqp);
+ i40iw_qp_add_ref(&iwqp->ibqp);
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
qp = &iwqp->sc_qp;
spin_lock_irqsave(&iwqp->lock, flags);
break;
}
if (info->qp)
- i40iw_rem_ref(&iwqp->ibqp);
+ i40iw_qp_rem_ref(&iwqp->ibqp);
} while (1);
if (aeqcnt)
* i40iw_dpc - tasklet for aeq and ceq 0
* @data: iwarp device
*/
-static void i40iw_dpc(unsigned long data)
+static void i40iw_dpc(struct tasklet_struct *t)
{
- struct i40iw_device *iwdev = (struct i40iw_device *)data;
+ struct i40iw_device *iwdev = from_tasklet(iwdev, t, dpc_tasklet);
if (iwdev->msix_shared)
i40iw_process_ceq(iwdev, iwdev->ceqlist);
* i40iw_ceq_dpc - dpc handler for CEQ
* @data: data points to CEQ
*/
-static void i40iw_ceq_dpc(unsigned long data)
+static void i40iw_ceq_dpc(struct tasklet_struct *t)
{
- struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
+ struct i40iw_ceq *iwceq = from_tasklet(iwceq, t, dpc_tasklet);
struct i40iw_device *iwdev = iwceq->iwdev;
i40iw_process_ceq(iwdev, iwceq);
enum i40iw_status_code status;
if (iwdev->msix_shared && !ceq_id) {
- tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
+ tasklet_setup(&iwdev->dpc_tasklet, i40iw_dpc);
status = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "AEQCEQ", iwdev);
} else {
- tasklet_init(&iwceq->dpc_tasklet, i40iw_ceq_dpc, (unsigned long)iwceq);
+ tasklet_setup(&iwceq->dpc_tasklet, i40iw_ceq_dpc);
status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
}
u32 ret = 0;
if (!iwdev->msix_shared) {
- tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
+ tasklet_setup(&iwdev->dpc_tasklet, i40iw_dpc);
ret = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "i40iw", iwdev);
}
if (ret) {
status = i40iw_save_msix_info(iwdev, ldev);
if (status)
return status;
- iwdev->hw.dev_context = (void *)ldev->pcidev;
+ iwdev->hw.pcidev = ldev->pcidev;
iwdev->hw.hw_addr = ldev->hw_addr;
status = i40iw_allocate_dma_mem(&iwdev->hw,
&iwdev->obj_mem, 8192, 4096);
*/
static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk)
{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+ struct pci_dev *pcidev = hw->pcidev;
int i;
if (!chunk->pg_cnt)
struct i40iw_chunk *chunk,
int pg_cnt)
{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+ struct pci_dev *pcidev = hw->pcidev;
struct page *page;
u8 *addr;
u32 size;
struct i40iw_priv_qp_ops;
struct i40iw_priv_cq_ops;
struct i40iw_hmc_ops;
+struct pci_dev;
enum i40iw_page_size {
I40IW_PAGE_SIZE_4K,
struct i40iw_hw {
u8 __iomem *hw_addr;
- void *dev_context;
+ struct pci_dev *pcidev;
struct i40iw_hmc_info hmc;
};
}
}
-/**
- * i40iw_free_qp - callback after destroy cqp completes
- * @cqp_request: cqp request for destroy qp
- * @num: not used
- */
-static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num)
-{
- struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param;
- struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
- struct i40iw_device *iwdev;
- u32 qp_num = iwqp->ibqp.qp_num;
-
- iwdev = iwqp->iwdev;
-
- i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
- i40iw_free_qp_resources(iwdev, iwqp, qp_num);
- i40iw_rem_devusecount(iwdev);
-}
-
/**
* i40iw_wait_event - wait for completion
* @iwdev: iwarp device
}
/**
- * i40iw_add_ref - add refcount for qp
+ * i40iw_qp_add_ref - add refcount for qp
* @ibqp: iqarp qp
*/
-void i40iw_add_ref(struct ib_qp *ibqp)
+void i40iw_qp_add_ref(struct ib_qp *ibqp)
{
struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp;
- atomic_inc(&iwqp->refcount);
+ refcount_inc(&iwqp->refcount);
}
/**
- * i40iw_rem_ref - rem refcount for qp and free if 0
+ * i40iw_qp_rem_ref - rem refcount for qp and free if 0
* @ibqp: iqarp qp
*/
-void i40iw_rem_ref(struct ib_qp *ibqp)
+void i40iw_qp_rem_ref(struct ib_qp *ibqp)
{
struct i40iw_qp *iwqp;
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
struct i40iw_device *iwdev;
u32 qp_num;
unsigned long flags;
iwqp = to_iwqp(ibqp);
iwdev = iwqp->iwdev;
spin_lock_irqsave(&iwdev->qptable_lock, flags);
- if (!atomic_dec_and_test(&iwqp->refcount)) {
+ if (!refcount_dec_and_test(&iwqp->refcount)) {
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
return;
}
qp_num = iwqp->ibqp.qp_num;
iwdev->qp_table[qp_num] = NULL;
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return;
-
- cqp_request->callback_fcn = i40iw_free_qp;
- cqp_request->param = (void *)&iwqp->sc_qp;
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_QP_DESTROY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp;
- cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
- cqp_info->in.u.qp_destroy.remove_hash_idx = true;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (!status)
- return;
+ complete(&iwqp->free_qp);
- i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
- i40iw_free_qp_resources(iwdev, iwqp, qp_num);
- i40iw_rem_devusecount(iwdev);
}
/**
u64 size,
u32 alignment)
{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+ struct pci_dev *pcidev = hw->pcidev;
if (!mem)
return I40IW_ERR_PARAM;
*/
void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem)
{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+ struct pci_dev *pcidev = hw->pcidev;
if (!mem || !mem->va)
return;
struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp;
i40iw_terminate_done(qp, 1);
- i40iw_rem_ref(&iwqp->ibqp);
+ i40iw_qp_rem_ref(&iwqp->ibqp);
}
/**
struct i40iw_qp *iwqp;
iwqp = (struct i40iw_qp *)qp->back_qp;
- i40iw_add_ref(&iwqp->ibqp);
+ i40iw_qp_add_ref(&iwqp->ibqp);
timer_setup(&iwqp->terminate_timer, i40iw_terminate_timeout, 0);
iwqp->terminate_timer.expires = jiffies + HZ;
add_timer(&iwqp->terminate_timer);
iwqp = (struct i40iw_qp *)qp->back_qp;
if (del_timer(&iwqp->terminate_timer))
- i40iw_rem_ref(&iwqp->ibqp);
+ i40iw_qp_rem_ref(&iwqp->ibqp);
}
/**
* @ibpd: ptr of pd to be deallocated
* @udata: user data or null for kernel object
*/
-static void i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+static int i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct i40iw_pd *iwpd = to_iwpd(ibpd);
struct i40iw_device *iwdev = to_iwdev(ibpd->device);
i40iw_rem_pdusecount(iwpd, iwdev);
+ return 0;
}
/**
* @iwqp: qp ptr (user or kernel)
* @qp_num: qp number assigned
*/
-void i40iw_free_qp_resources(struct i40iw_device *iwdev,
- struct i40iw_qp *iwqp,
- u32 qp_num)
+void i40iw_free_qp_resources(struct i40iw_qp *iwqp)
{
struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
+ struct i40iw_device *iwdev = iwqp->iwdev;
+ u32 qp_num = iwqp->ibqp.qp_num;
i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem);
kfree(iwqp->kqp.wrid_mem);
iwqp->kqp.wrid_mem = NULL;
- kfree(iwqp->allocated_buffer);
+ kfree(iwqp);
}
/**
static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct i40iw_qp *iwqp = to_iwqp(ibqp);
+ struct ib_qp_attr attr;
+ struct i40iw_device *iwdev = iwqp->iwdev;
+
+ memset(&attr, 0, sizeof(attr));
iwqp->destroyed = 1;
}
}
- i40iw_rem_ref(&iwqp->ibqp);
+ attr.qp_state = IB_QPS_ERR;
+ i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ i40iw_qp_rem_ref(&iwqp->ibqp);
+ wait_for_completion(&iwqp->free_qp);
+ i40iw_cqp_qp_destroy_cmd(&iwdev->sc_dev, &iwqp->sc_qp);
+ i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
+ i40iw_free_qp_resources(iwqp);
+ i40iw_rem_devusecount(iwdev);
+
return 0;
}
struct i40iw_create_qp_req req;
struct i40iw_create_qp_resp uresp;
u32 qp_num = 0;
- void *mem;
enum i40iw_status_code ret;
int err_code;
int sq_size;
init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
- mem = kzalloc(sizeof(*iwqp), GFP_KERNEL);
- if (!mem)
+ iwqp = kzalloc(sizeof(*iwqp), GFP_KERNEL);
+ if (!iwqp)
return ERR_PTR(-ENOMEM);
- iwqp = (struct i40iw_qp *)mem;
- iwqp->allocated_buffer = mem;
qp = &iwqp->sc_qp;
qp->back_qp = (void *)iwqp;
qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
+ iwqp->iwdev = iwdev;
iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info;
if (i40iw_allocate_dma_mem(dev->hw,
goto error;
}
- iwqp->iwdev = iwdev;
iwqp->iwpd = iwpd;
iwqp->ibqp.qp_num = qp_num;
qp = &iwqp->sc_qp;
goto error;
}
- i40iw_add_ref(&iwqp->ibqp);
+ refcount_set(&iwqp->refcount, 1);
spin_lock_init(&iwqp->lock);
iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
iwdev->qp_table[qp_num] = iwqp;
}
init_completion(&iwqp->sq_drained);
init_completion(&iwqp->rq_drained);
+ init_completion(&iwqp->free_qp);
return &iwqp->ibqp;
error:
- i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+ i40iw_free_qp_resources(iwqp);
return ERR_PTR(err_code);
}
* @ib_cq: cq pointer
* @udata: user data or NULL for kernel object
*/
-static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct i40iw_cq *iwcq;
struct i40iw_device *iwdev;
i40iw_cq_wq_destroy(iwdev, cq);
cq_free_resources(iwdev, iwcq);
i40iw_rem_devusecount(iwdev);
+ return 0;
}
/**
if (iwmr->type == IW_MEMREG_TYPE_QP)
iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl);
- rdma_for_each_block(region->sg_head.sgl, &biter, region->nmap,
- iwmr->page_size) {
+ rdma_umem_for_each_dma_block(region, &biter, iwmr->page_size) {
*pbl = rdma_block_iter_dma_address(&biter);
pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx);
}
struct i40iw_mr *iwmr;
struct ib_umem *region;
struct i40iw_mem_reg_req req;
- u64 pbl_depth = 0;
u32 stag = 0;
u16 access;
- u64 region_length;
bool use_pbles = false;
unsigned long flags;
int err = -ENOSYS;
int ret;
- int pg_shift;
if (!udata)
return ERR_PTR(-EOPNOTSUPP);
if (req.reg_type == IW_MEMREG_TYPE_MEM)
iwmr->page_size = ib_umem_find_best_pgsz(region, SZ_4K | SZ_2M,
virt);
-
- region_length = region->length + (start & (iwmr->page_size - 1));
- pg_shift = ffs(iwmr->page_size) - 1;
- pbl_depth = region_length >> pg_shift;
- pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0;
iwmr->length = region->length;
iwpbl->user_base = virt;
palloc = &iwpbl->pble_alloc;
iwmr->type = req.reg_type;
- iwmr->page_cnt = (u32)pbl_depth;
+ iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size);
switch (req.reg_type) {
case IW_MEMREG_TYPE_QP:
.get_hw_stats = i40iw_get_hw_stats,
.get_port_immutable = i40iw_port_immutable,
.iw_accept = i40iw_accept,
- .iw_add_ref = i40iw_add_ref,
+ .iw_add_ref = i40iw_qp_add_ref,
.iw_connect = i40iw_connect,
.iw_create_listen = i40iw_create_listen,
.iw_destroy_listen = i40iw_destroy_listen,
.iw_get_qp = i40iw_get_qp,
.iw_reject = i40iw_reject,
- .iw_rem_ref = i40iw_rem_ref,
+ .iw_rem_ref = i40iw_qp_rem_ref,
.map_mr_sg = i40iw_map_mr_sg,
.mmap = i40iw_mmap,
.modify_qp = i40iw_modify_qp,
{
struct i40iw_ib_device *iwibdev;
struct net_device *netdev = iwdev->netdev;
- struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context;
+ struct pci_dev *pcidev = iwdev->hw.pcidev;
iwibdev = ib_alloc_device(i40iw_ib_device, ibdev);
if (!iwibdev) {
struct i40iw_qp_host_ctx_info ctx_info;
struct i40iwarp_offload_info iwarp_info;
void *allocated_buffer;
- atomic_t refcount;
+ refcount_t refcount;
struct iw_cm_id *cm_id;
void *cm_node;
struct ib_mr *lsmm_mr;
struct i40iw_dma_mem ietf_mem;
struct completion sq_drained;
struct completion rq_drained;
+ struct completion free_qp;
};
#endif
return 0;
}
-
-void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags)
-{
- return;
-}
struct delayed_work timeout;
};
+struct rej_tmout_entry {
+ int slave;
+ u32 rem_pv_cm_id;
+ struct delayed_work timeout;
+ struct xarray *xa_rej_tmout;
+};
+
struct cm_generic_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
+ unsigned char unused[2];
+ __be16 rej_reason;
};
struct cm_sidr_generic_msg {
if (!sriov->is_going_down && !id->scheduled_delete) {
id->scheduled_delete = 1;
schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ } else if (id->scheduled_delete) {
+ /* Adjust timeout if already scheduled */
+ mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
}
spin_unlock_irqrestore(&sriov->going_down_lock, flags);
spin_unlock(&sriov->id_map_lock);
}
+#define REJ_REASON(m) be16_to_cpu(((struct cm_generic_msg *)(m))->rej_reason)
int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
struct ib_mad *mad)
{
int pv_cm_id = -1;
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_MRA_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID ||
+ (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID && REJ_REASON(mad) == IB_CM_REJ_TIMEOUT)) {
sl_cm_id = get_local_comm_id(mad);
id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
if (id)
}
if (!id) {
- pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
- slave_id, sl_cm_id);
+ pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL! attr_id: 0x%x\n",
+ slave_id, sl_cm_id, be16_to_cpu(mad->mad_hdr.attr_id));
return -EINVAL;
}
return 0;
}
+static void rej_tmout_timeout(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct rej_tmout_entry *item = container_of(delay, struct rej_tmout_entry, timeout);
+ struct rej_tmout_entry *deleted;
+
+ deleted = xa_cmpxchg(item->xa_rej_tmout, item->rem_pv_cm_id, item, NULL, 0);
+
+ if (deleted != item)
+ pr_debug("deleted(%p) != item(%p)\n", deleted, item);
+
+ kfree(item);
+}
+
+static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int slave)
+{
+ struct rej_tmout_entry *item;
+ struct rej_tmout_entry *old;
+ int ret = 0;
+
+ xa_lock(&sriov->xa_rej_tmout);
+ item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
+
+ if (item) {
+ if (xa_err(item))
+ ret = xa_err(item);
+ else
+ /* If a retry, adjust delayed work */
+ mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ goto err_or_exists;
+ }
+ xa_unlock(&sriov->xa_rej_tmout);
+
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ INIT_DELAYED_WORK(&item->timeout, rej_tmout_timeout);
+ item->slave = slave;
+ item->rem_pv_cm_id = rem_pv_cm_id;
+ item->xa_rej_tmout = &sriov->xa_rej_tmout;
+
+ old = xa_cmpxchg(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id, NULL, item, GFP_KERNEL);
+ if (old) {
+ pr_debug(
+ "Non-null old entry (%p) or error (%d) when inserting\n",
+ old, xa_err(old));
+ kfree(item);
+ return xa_err(old);
+ }
+
+ schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+
+ return 0;
+
+err_or_exists:
+ xa_unlock(&sriov->xa_rej_tmout);
+ return ret;
+}
+
+static int lookup_rej_tmout_slave(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id)
+{
+ struct rej_tmout_entry *item;
+ int slave;
+
+ xa_lock(&sriov->xa_rej_tmout);
+ item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
+
+ if (!item || xa_err(item)) {
+ pr_debug("Could not find slave. rem_pv_cm_id 0x%x error: %d\n",
+ rem_pv_cm_id, xa_err(item));
+ slave = !item ? -ENOENT : xa_err(item);
+ } else {
+ slave = item->slave;
+ }
+ xa_unlock(&sriov->xa_rej_tmout);
+
+ return slave;
+}
+
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
struct ib_mad *mad)
{
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+ u32 rem_pv_cm_id = get_local_comm_id(mad);
u32 pv_cm_id;
struct id_map_entry *id;
+ int sts;
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
be64_to_cpu(gid.global.interface_id));
return -ENOENT;
}
+
+ sts = alloc_rej_tmout(sriov, rem_pv_cm_id, *slave);
+ if (sts)
+ /* Even if this fails, we pass on the REQ to the slave */
+ pr_debug("Could not allocate rej_tmout entry. rem_pv_cm_id 0x%x slave %d status %d\n",
+ rem_pv_cm_id, *slave, sts);
+
return 0;
}
id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
if (!id) {
- pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
+ if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID &&
+ REJ_REASON(mad) == IB_CM_REJ_TIMEOUT && slave) {
+ *slave = lookup_rej_tmout_slave(sriov, rem_pv_cm_id);
+
+ return (*slave < 0) ? *slave : 0;
+ }
+ pr_debug("Couldn't find an entry for pv_cm_id 0x%x, attr_id 0x%x\n",
+ pv_cm_id, be16_to_cpu(mad->mad_hdr.attr_id));
return -ENOENT;
}
INIT_LIST_HEAD(&dev->sriov.cm_list);
dev->sriov.sl_id_map = RB_ROOT;
xa_init_flags(&dev->sriov.pv_id_table, XA_FLAGS_ALLOC);
+ xa_init(&dev->sriov.xa_rej_tmout);
+}
+
+static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
+{
+ struct rej_tmout_entry *item;
+ bool flush_needed = false;
+ unsigned long id;
+ int cnt = 0;
+
+ xa_lock(&sriov->xa_rej_tmout);
+ xa_for_each(&sriov->xa_rej_tmout, id, item) {
+ if (slave < 0 || slave == item->slave) {
+ mod_delayed_work(system_wq, &item->timeout, 0);
+ flush_needed = true;
+ ++cnt;
+ }
+ }
+ xa_unlock(&sriov->xa_rej_tmout);
+
+ if (flush_needed) {
+ flush_scheduled_work();
+ pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n",
+ cnt, slave);
+ }
+
+ if (slave < 0)
+ WARN_ON(!xa_empty(&sriov->xa_rej_tmout));
}
/* slave = -1 ==> all slaves */
list_del(&map->list);
kfree(map);
}
+
+ rej_tmout_xa_cleanup(sriov, slave);
}
if (IS_ERR(*umem))
return PTR_ERR(*umem);
- n = ib_umem_page_count(*umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
return err;
}
-void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(cq->device);
struct mlx4_ib_cq *mcq = to_mcq(cq);
mlx4_db_free(dev->dev, &mcq->db);
}
ib_umem_release(mcq->umem);
+ return 0;
}
static void dump_cqe(void *cqe)
sgid, dgid);
}
+static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
+{
+ int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
+
+ return (qpn >= proxy_start && qpn <= proxy_start + 1);
+}
+
int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad)
u16 cached_pkey;
u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
- if (dest_qpt > IB_QPT_GSI)
+ if (dest_qpt > IB_QPT_GSI) {
+ pr_debug("dest_qpt (%d) > IB_QPT_GSI\n", dest_qpt);
return -EINVAL;
+ }
tun_ctx = dev->sriov.demux[port-1].tun[slave];
if (dest_qpt) {
u16 pkey_ix;
ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
- if (ret)
+ if (ret) {
+ pr_debug("unable to get %s cached pkey for index %d, ret %d\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+ wc->pkey_index, ret);
return -EINVAL;
+ }
ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
- if (ret)
+ if (ret) {
+ pr_debug("unable to get %s pkey ix for pkey 0x%x, ret %d\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+ cached_pkey, ret);
return -EINVAL;
+ }
tun_pkey_ix = pkey_ix;
} else
tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
if (err)
- pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
slave, err);
return 0;
}
err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
if (err)
- pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
slave, err);
return 0;
}
int err;
struct ib_port_attr pattr;
- if (in_wc && in_wc->qp) {
- pr_debug("received MAD: port:%d slid:%d sqpn:%d "
- "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n",
- port_num,
- in_wc->slid, in_wc->src_qp,
- in_wc->dlid_path_bits,
- in_wc->qp->qp_num,
- in_wc->wc_flags,
- be64_to_cpu(in_mad->mad_hdr.tid),
- in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
- be16_to_cpu(in_mad->mad_hdr.attr_id));
- if (in_wc->wc_flags & IB_WC_GRH) {
- pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
- be64_to_cpu(in_grh->sgid.global.subnet_prefix),
- be64_to_cpu(in_grh->sgid.global.interface_id));
- pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
- be64_to_cpu(in_grh->dgid.global.subnet_prefix),
- be64_to_cpu(in_grh->dgid.global.interface_id));
- }
- }
-
slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
+static void mlx4_ib_wire_comp_handler(struct ib_cq *cq, void *arg)
+{
+ unsigned long flags;
+ struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
+ queue_work(ctx->wi_wq, &ctx->work);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
struct mlx4_ib_demux_pv_qp *tun_qp,
int index)
return ret;
}
-static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
-{
- int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
-
- return (qpn >= proxy_start && qpn <= proxy_start + 1);
-}
-
-
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index,
u32 remote_qpn, u32 qkey, struct rdma_ah_attr *attr,
spin_lock(&sqp->tx_lock);
if (sqp->tx_ix_head - sqp->tx_ix_tail >=
- (MLX4_NUM_TUNNEL_BUFS - 1))
+ (MLX4_NUM_WIRE_BUFS - 1))
ret = -EAGAIN;
else
- wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+ wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_WIRE_BUFS - 1);
spin_unlock(&sqp->tx_lock);
if (ret)
goto out;
u16 vlan_id;
u8 qos;
u8 *dmac;
+ int sts;
/* Get slave that sent this packet */
if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
&vlan_id, &qos))
rdma_ah_set_sl(&ah_attr, qos);
- mlx4_ib_send_to_wire(dev, slave, ctx->port,
- is_proxy_qp0(dev, wc->src_qp, slave) ?
- IB_QPT_SMI : IB_QPT_GSI,
- be16_to_cpu(tunnel->hdr.pkey_index),
- be32_to_cpu(tunnel->hdr.remote_qpn),
- be32_to_cpu(tunnel->hdr.qkey),
- &ah_attr, wc->smac, vlan_id, &tunnel->mad);
+ sts = mlx4_ib_send_to_wire(dev, slave, ctx->port,
+ is_proxy_qp0(dev, wc->src_qp, slave) ?
+ IB_QPT_SMI : IB_QPT_GSI,
+ be16_to_cpu(tunnel->hdr.pkey_index),
+ be32_to_cpu(tunnel->hdr.remote_qpn),
+ be32_to_cpu(tunnel->hdr.qkey),
+ &ah_attr, wc->smac, vlan_id, &tunnel->mad);
+ if (sts)
+ pr_debug("failed sending %s to wire on behalf of slave %d (%d)\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+ slave, sts);
}
static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
int i;
struct mlx4_ib_demux_pv_qp *tun_qp;
int rx_buf_size, tx_buf_size;
+ const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return -EINVAL;
tun_qp = &ctx->qp[qp_type];
- tun_qp->ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+ tun_qp->ring = kcalloc(nmbr_bufs,
sizeof(struct mlx4_ib_buf),
GFP_KERNEL);
if (!tun_qp->ring)
return -ENOMEM;
- tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+ tun_qp->tx_ring = kcalloc(nmbr_bufs,
sizeof (struct mlx4_ib_tun_tx_buf),
GFP_KERNEL);
if (!tun_qp->tx_ring) {
tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
if (!tun_qp->ring[i].addr)
goto err;
}
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
tun_qp->tx_ring[i].buf.addr =
kmalloc(tx_buf_size, GFP_KERNEL);
if (!tun_qp->tx_ring[i].buf.addr)
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
}
- i = MLX4_NUM_TUNNEL_BUFS;
+ i = nmbr_bufs;
err:
while (i > 0) {
--i;
int i;
struct mlx4_ib_demux_pv_qp *tun_qp;
int rx_buf_size, tx_buf_size;
+ const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return;
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
rx_buf_size, DMA_FROM_DEVICE);
kfree(tun_qp->ring[i].addr);
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
"buf:%lld\n", wc.wr_id);
break;
case IB_WC_SEND:
- pr_debug("received tunnel send completion:"
- "wrid=0x%llx, status=0x%x\n",
- wc.wr_id, wc.status);
rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
struct ib_qp_attr attr;
int qp_attr_mask_INIT;
+ const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return -EINVAL;
qp_init_attr.init_attr.send_cq = ctx->cq;
qp_init_attr.init_attr.recv_cq = ctx->cq;
qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
- qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
- qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
+ qp_init_attr.init_attr.cap.max_send_wr = nmbr_bufs;
+ qp_init_attr.init_attr.cap.max_recv_wr = nmbr_bufs;
qp_init_attr.init_attr.cap.max_send_sge = 1;
qp_init_attr.init_attr.cap.max_recv_sge = 1;
if (create_tun) {
goto err_qp;
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
if (ret) {
pr_err(" mlx4_ib_post_pv_buf error"
switch (wc.opcode) {
case IB_WC_SEND:
kfree(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
- sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ (MLX4_NUM_WIRE_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
sqp->tx_ix_tail++;
case IB_WC_RECV:
mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
(sqp->ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
+ (MLX4_NUM_WIRE_BUFS - 1)].addr))->payload);
grh = &(((struct mlx4_mad_rcv_buf *)
(sqp->ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
+ (MLX4_NUM_WIRE_BUFS - 1)].addr))->grh);
mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)))
+ (MLX4_NUM_WIRE_BUFS - 1)))
pr_err("Failed reposting SQP "
"buf:%lld\n", wc.wr_id);
break;
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
kfree(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
- sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ (MLX4_NUM_WIRE_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
sqp->tx_ix_tail++;
{
int ret, cq_size;
struct ib_cq_init_attr cq_attr = {};
+ const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (ctx->state != DEMUX_PV_STATE_DOWN)
return -EEXIST;
goto err_out_qp0;
}
- cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
+ cq_size = 2 * nmbr_bufs;
if (ctx->has_smi)
cq_size *= 2;
cq_attr.cqe = cq_size;
- ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
+ ctx->cq = ib_create_cq(ctx->ib_dev,
+ create_tun ? mlx4_ib_tunnel_comp_handler : mlx4_ib_wire_comp_handler,
NULL, ctx, &cq_attr);
if (IS_ERR(ctx->cq)) {
ret = PTR_ERR(ctx->cq);
INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
+ ctx->wi_wq = to_mdev(ibdev)->sriov.demux[port - 1].wi_wq;
ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
if (ret) {
goto err_mcg;
}
- snprintf(name, sizeof name, "mlx4_ibt%d", port);
+ snprintf(name, sizeof(name), "mlx4_ibt%d", port);
ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
if (!ctx->wq) {
pr_err("Failed to create tunnelling WQ for port %d\n", port);
goto err_wq;
}
- snprintf(name, sizeof name, "mlx4_ibud%d", port);
+ snprintf(name, sizeof(name), "mlx4_ibwi%d", port);
+ ctx->wi_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
+ if (!ctx->wi_wq) {
+ pr_err("Failed to create wire WQ for port %d\n", port);
+ ret = -ENOMEM;
+ goto err_wiwq;
+ }
+
+ snprintf(name, sizeof(name), "mlx4_ibud%d", port);
ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
if (!ctx->ud_wq) {
pr_err("Failed to create up/down WQ for port %d\n", port);
return 0;
err_udwq:
+ destroy_workqueue(ctx->wi_wq);
+ ctx->wi_wq = NULL;
+
+err_wiwq:
destroy_workqueue(ctx->wq);
ctx->wq = NULL;
ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
}
flush_workqueue(ctx->wq);
+ flush_workqueue(ctx->wi_wq);
for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
free_pv_object(dev, i, ctx->port);
}
kfree(ctx->tun);
destroy_workqueue(ctx->ud_wq);
+ destroy_workqueue(ctx->wi_wq);
destroy_workqueue(ctx->wq);
}
}
return 0;
}
-static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
+ return 0;
}
static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
return err;
}
-static void mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
ib_destroy_cq(to_mxrcd(xrcd)->cq);
ib_dealloc_pd(to_mxrcd(xrcd)->pd);
mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
+ return 0;
}
static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
struct mlx4_net_trans_rule_hw_ctrl *ctrl;
int default_flow;
- static const u16 __mlx4_domain[] = {
- [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
- [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
- [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
- [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
- };
-
if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
pr_err("Invalid priority value %d\n", flow_attr->priority);
return -EINVAL;
}
- if (domain >= IB_FLOW_DOMAIN_NUM) {
- pr_err("Invalid domain value %d\n", domain);
- return -EINVAL;
- }
-
if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
return -EINVAL;
return PTR_ERR(mailbox);
ctrl = mailbox->buf;
- ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
- flow_attr->priority);
+ ctrl->prio = cpu_to_be16(domain | flow_attr->priority);
ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
ctrl->port = flow_attr->port;
ctrl->qpn = cpu_to_be32(qp->qp_num);
}
static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain, struct ib_udata *udata)
+ struct ib_flow_attr *flow_attr,
+ struct ib_udata *udata)
{
int err = 0, i = 0, j = 0;
struct mlx4_ib_flow *mflow;
}
while (i < ARRAY_SIZE(type) && type[i]) {
- err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
- &mflow->reg_id[i].id);
+ err = __mlx4_ib_create_flow(qp, flow_attr, MLX4_DOMAIN_UVERBS,
+ type[i], &mflow->reg_id[i].id);
if (err)
goto err_create_flow;
if (is_bonded) {
*/
flow_attr->port = 2;
err = __mlx4_ib_create_flow(qp, flow_attr,
- domain, type[j],
+ MLX4_DOMAIN_UVERBS, type[j],
&mflow->reg_id[j].mirror);
flow_attr->port = 1;
if (err)
.destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table,
.destroy_wq = mlx4_ib_destroy_wq,
.modify_wq = mlx4_ib_modify_wq,
+
+ INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx4_ib_rwq_ind_table,
+ ib_rwq_ind_tbl),
};
static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
.alloc_mw = mlx4_ib_alloc_mw,
.dealloc_mw = mlx4_ib_dealloc_mw,
+
+ INIT_RDMA_OBJ_SIZE(ib_mw, mlx4_ib_mw, ibmw),
};
static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
/* Add an empty rule for IB L2 */
memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
- err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
- IB_FLOW_DOMAIN_NIC,
- MLX4_FS_REGULAR,
- &mqp->reg_id);
+ err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC,
+ MLX4_FS_REGULAR, &mqp->reg_id);
} else {
err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
}
};
enum {
- MLX4_NUM_TUNNEL_BUFS = 256,
+ MLX4_NUM_TUNNEL_BUFS = 512,
+ MLX4_NUM_WIRE_BUFS = 2048,
};
struct mlx4_ib_tunnel_header {
u8 rss_key[MLX4_EN_RSS_KEY_SIZE];
};
+enum {
+ /*
+ * Largest possible UD header: send with GRH and immediate
+ * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
+ * tag. (LRH would only use 8 bytes, so Ethernet is the
+ * biggest case)
+ */
+ MLX4_IB_UD_HEADER_SIZE = 82,
+ MLX4_IB_LSO_HEADER_SPARE = 128,
+};
+
+struct mlx4_ib_sqp {
+ int pkey_index;
+ u32 qkey;
+ u32 send_psn;
+ struct ib_ud_header ud_header;
+ u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
+ struct ib_qp *roce_v2_gsi;
+};
+
struct mlx4_ib_qp {
union {
struct ib_qp ibqp;
struct mlx4_wqn_range *wqn_range;
/* Number of RSS QP parents that uses this WQ */
u32 rss_usecnt;
- struct mlx4_ib_rss *rss_ctx;
+ union {
+ struct mlx4_ib_rss *rss_ctx;
+ struct mlx4_ib_sqp *sqp;
+ };
};
struct mlx4_ib_srq {
union mlx4_ext_av av;
};
+struct mlx4_ib_rwq_ind_table {
+ struct ib_rwq_ind_table ib_rwq_ind_tbl;
+};
+
/****************************************/
/* alias guid support */
/****************************************/
struct ib_pd *pd;
struct work_struct work;
struct workqueue_struct *wq;
+ struct workqueue_struct *wi_wq;
struct mlx4_ib_demux_pv_qp qp[2];
};
struct ib_device *ib_dev;
int port;
struct workqueue_struct *wq;
+ struct workqueue_struct *wi_wq;
struct workqueue_struct *ud_wq;
spinlock_t ud_lock;
atomic64_t subnet_prefix;
spinlock_t id_map_lock;
struct rb_root sl_id_map;
struct list_head cm_list;
+ struct xarray xa_rej_tmout;
};
struct gid_cache_context {
u64 virt_addr, int access_flags,
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
-struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+int mlx4_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
int slave_sgid_index, u8 *s_mac, u16 vlan_tag);
int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+static inline int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
int mlx4_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
-void mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+int mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
-struct ib_rwq_ind_table
-*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
-int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
+int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+static inline int
+mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table)
+{
+ return 0;
+}
int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
int *num_of_mtts);
u64 total_len = 0;
int i;
+ *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
+
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
/*
* Initialization - save the first chunk start as the
goto err_free;
}
- n = ib_umem_page_count(mr->umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
mmr->umem = NULL;
goto release_mpt_entry;
}
- n = ib_umem_page_count(mmr->umem);
+ n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE);
shift = PAGE_SHIFT;
err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
return 0;
}
-struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
+int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
{
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
- struct mlx4_ib_mw *mw;
+ struct mlx4_ib_dev *dev = to_mdev(ibmw->device);
+ struct mlx4_ib_mw *mw = to_mmw(ibmw);
int err;
- mw = kmalloc(sizeof(*mw), GFP_KERNEL);
- if (!mw)
- return ERR_PTR(-ENOMEM);
-
- err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn,
- to_mlx4_type(type), &mw->mmw);
+ err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn,
+ to_mlx4_type(ibmw->type), &mw->mmw);
if (err)
- goto err_free;
+ return err;
err = mlx4_mw_enable(dev->dev, &mw->mmw);
if (err)
goto err_mw;
- mw->ibmw.rkey = mw->mmw.key;
-
- return &mw->ibmw;
+ ibmw->rkey = mw->mmw.key;
+ return 0;
err_mw:
mlx4_mw_free(dev->dev, &mw->mmw);
-
-err_free:
- kfree(mw);
-
- return ERR_PTR(err);
+ return err;
}
int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
struct mlx4_ib_mw *mw = to_mmw(ibmw);
mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
- kfree(mw);
-
return 0;
}
MLX4_IB_LINK_TYPE_ETH = 1
};
-enum {
- /*
- * Largest possible UD header: send with GRH and immediate
- * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
- * tag. (LRH would only use 8 bytes, so Ethernet is the
- * biggest case)
- */
- MLX4_IB_UD_HEADER_SIZE = 82,
- MLX4_IB_LSO_HEADER_SPARE = 128,
-};
-
-struct mlx4_ib_sqp {
- struct mlx4_ib_qp qp;
- int pkey_index;
- u32 qkey;
- u32 send_psn;
- struct ib_ud_header ud_header;
- u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
- struct ib_qp *roce_v2_gsi;
-};
-
enum {
MLX4_IB_MIN_SQ_STRIDE = 6,
MLX4_IB_CACHE_LINE_SIZE = 64,
MLX4_IB_RWQ_SRC = 1,
};
-static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
-{
- return container_of(mqp, struct mlx4_ib_sqp, qp);
-}
-
static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
if (!mlx4_is_master(dev->dev))
if (err)
goto err_qpn;
- mutex_init(&qp->mutex);
-
INIT_LIST_HEAD(&qp->gid_list);
INIT_LIST_HEAD(&qp->steering_rules);
return err;
}
-static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int _mlx4_ib_create_qp_rss(struct ib_pd *pd, struct mlx4_ib_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx4_ib_qp *qp;
struct mlx4_ib_create_qp_rss ucmd = {};
size_t required_cmd_sz;
int err;
if (!udata) {
pr_debug("RSS QP with NULL udata\n");
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
if (udata->outlen)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
required_cmd_sz = offsetof(typeof(ucmd), reserved1) +
sizeof(ucmd.reserved1);
if (udata->inlen < required_cmd_sz) {
pr_debug("invalid inlen\n");
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
pr_debug("copy failed\n");
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
}
if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (ucmd.comp_mask || ucmd.reserved1)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (udata->inlen > sizeof(ucmd) &&
!ib_is_udata_cleared(udata, sizeof(ucmd),
udata->inlen - sizeof(ucmd))) {
pr_debug("inlen is not supported\n");
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
pr_debug("RSS QP with unsupported QP type %d\n",
init_attr->qp_type);
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
if (init_attr->create_flags) {
pr_debug("RSS QP doesn't support create flags\n");
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
if (init_attr->send_cq || init_attr->cap.max_send_wr) {
pr_debug("RSS QP with unsupported send attributes\n");
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
-
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
err = create_qp_rss(to_mdev(pd->device), init_attr, &ucmd, qp);
- if (err) {
- kfree(qp);
- return ERR_PTR(err);
- }
+ if (err)
+ return err;
qp->ibqp.qp_num = qp->mqp.qpn;
-
- return &qp->ibqp;
+ return 0;
}
/*
qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET;
- mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
INIT_LIST_HEAD(&qp->gid_list);
goto err;
}
- n = ib_umem_page_count(qp->umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, int sqpn,
- struct mlx4_ib_qp **caller_qp)
+ struct mlx4_ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
int qpn;
int err;
- struct mlx4_ib_sqp *sqp = NULL;
- struct mlx4_ib_qp *qp;
struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
udata, struct mlx4_ib_ucontext, ibucontext);
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
sqpn = qpn;
}
- if (!*caller_qp) {
- if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
- (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
- MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
- sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL);
- if (!sqp)
- return -ENOMEM;
- qp = &sqp->qp;
- } else {
- qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL);
- if (!qp)
- return -ENOMEM;
- }
- qp->pri.vid = 0xFFFF;
- qp->alt.vid = 0xFFFF;
- } else
- qp = *caller_qp;
+ if (init_attr->qp_type == IB_QPT_SMI ||
+ init_attr->qp_type == IB_QPT_GSI || qp_type == MLX4_IB_QPT_SMI ||
+ qp_type == MLX4_IB_QPT_GSI ||
+ (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
+ qp->sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL);
+ if (!qp->sqp)
+ return -ENOMEM;
+ }
qp->mlx4_ib_qp_type = qp_type;
- mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
INIT_LIST_HEAD(&qp->gid_list);
goto err;
}
- n = ib_umem_page_count(qp->umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
qp->mqp.event = mlx4_ib_qp_event;
- if (!*caller_qp)
- *caller_qp = qp;
-
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq));
mlx4_db_free(dev->dev, &qp->db);
err:
- if (!sqp && !*caller_qp)
- kfree(qp);
- kfree(sqp);
-
+ kfree(qp->sqp);
return err;
}
mlx4_qp_free(dev->dev, &qp->mqp);
mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
del_gid_entries(qp);
- kfree(qp->rss_ctx);
}
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
return dev->dev->caps.spec_qps[attr->port_num - 1].qp1_proxy;
}
-static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx4_ib_qp *qp = NULL;
int err;
int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
u16 xrcdn = 0;
if (init_attr->rwq_ind_tbl)
- return _mlx4_ib_create_qp_rss(pd, init_attr, udata);
+ return _mlx4_ib_create_qp_rss(pd, qp, init_attr, udata);
/*
* We only support LSO, vendor flag1, and multicast loopback blocking,
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF |
MLX4_IB_QP_CREATE_ROCE_V2_GSI))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
if (init_attr->qp_type != IB_QPT_UD)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
if (init_attr->create_flags) {
if (udata && init_attr->create_flags & ~(sup_u_create_flags))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_CREATE_ROCE_V2_GSI |
init_attr->qp_type > IB_QPT_GSI) ||
(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
init_attr->qp_type != IB_QPT_GSI))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
switch (init_attr->qp_type) {
fallthrough;
case IB_QPT_XRC_INI:
if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return ERR_PTR(-ENOSYS);
+ return -ENOSYS;
init_attr->recv_cq = init_attr->send_cq;
fallthrough;
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_RAW_PACKET:
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
+ case IB_QPT_UD:
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
- fallthrough;
- case IB_QPT_UD:
- {
- err = create_qp_common(pd, init_attr, udata, 0, &qp);
- if (err) {
- kfree(qp);
- return ERR_PTR(err);
- }
+ err = create_qp_common(pd, init_attr, udata, 0, qp);
+ if (err)
+ return err;
qp->ibqp.qp_num = qp->mqp.qpn;
qp->xrcdn = xrcdn;
-
break;
- }
case IB_QPT_SMI:
case IB_QPT_GSI:
{
int sqpn;
- /* Userspace is not allowed to create special QPs: */
- if (udata)
- return ERR_PTR(-EINVAL);
if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev,
1, 1, &sqpn, 0,
MLX4_RES_USAGE_DRIVER);
if (res)
- return ERR_PTR(res);
+ return res;
} else {
sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
}
- err = create_qp_common(pd, init_attr, udata, sqpn, &qp);
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+ err = create_qp_common(pd, init_attr, udata, sqpn, qp);
if (err)
- return ERR_PTR(err);
+ return err;
qp->port = init_attr->port_num;
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
}
default:
/* Don't support raw QPs */
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
-
- return &qp->ibqp;
+ return 0;
}
struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata) {
struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
- struct ib_qp *ibqp;
struct mlx4_ib_dev *dev = to_mdev(device);
+ struct mlx4_ib_qp *qp;
+ int ret;
- ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
- if (!IS_ERR(ibqp) &&
- (init_attr->qp_type == IB_QPT_GSI) &&
+ mutex_init(&qp->mutex);
+ ret = _mlx4_ib_create_qp(pd, qp, init_attr, udata);
+ if (ret) {
+ kfree(qp);
+ return ERR_PTR(ret);
+ }
+
+ if (init_attr->qp_type == IB_QPT_GSI &&
!(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
- struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+ struct mlx4_ib_sqp *sqp = qp->sqp;
int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
if (is_eth &&
pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
sqp->roce_v2_gsi = NULL;
} else {
- sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
- sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+ to_mqp(sqp->roce_v2_gsi)->flags |=
+ MLX4_IB_ROCE_V2_GSI_QP;
}
init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
}
}
- return ibqp;
+ return &qp->ibqp;
}
static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, udata);
}
- if (is_sqp(dev, mqp))
- kfree(to_msqp(mqp));
- else
- kfree(mqp);
+ kfree(mqp->sqp);
+ kfree(mqp);
return 0;
}
struct mlx4_ib_qp *mqp = to_mqp(qp);
if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
- struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ struct mlx4_ib_sqp *sqp = mqp->sqp;
if (sqp->roce_v2_gsi)
ib_destroy_qp(sqp->roce_v2_gsi);
qp->alt_port = attr->alt_port_num;
if (is_sqp(dev, qp))
- store_sqp_attrs(to_msqp(qp), attr, attr_mask);
+ store_sqp_attrs(qp->sqp, attr, attr_mask);
/*
* If we moved QP0 to RTR, bring the IB link up; if we moved
ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
- struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ struct mlx4_ib_sqp *sqp = mqp->sqp;
int err = 0;
if (sqp->roce_v2_gsi)
return -EINVAL;
}
-static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
+static int build_sriov_qp0_header(struct mlx4_ib_qp *qp,
const struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
- struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
- struct ib_device *ib_dev = &mdev->ib_dev;
+ struct mlx4_ib_dev *mdev = to_mdev(qp->ibqp.device);
+ struct mlx4_ib_sqp *sqp = qp->sqp;
+ struct ib_device *ib_dev = qp->ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->ah);
/* for proxy-qp0 sends, need to add in size of tunnel header */
/* for tunnel-qp0 sends, tunnel header is already in s/g list */
- if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
send_size += sizeof (struct mlx4_ib_tunnel_header);
ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
- if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
sqp->ud_header.lrh.service_level =
be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
sqp->ud_header.lrh.destination_lid =
sqp->ud_header.lrh.virtual_lane = 0;
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
- err = ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
+ err = ib_get_cached_pkey(ib_dev, qp->port, 0, &pkey);
if (err)
return err;
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
- if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
else
sqp->ud_header.bth.destination_qpn =
- cpu_to_be32(mdev->dev->caps.spec_qps[sqp->qp.port - 1].qp0_tunnel);
+ cpu_to_be32(mdev->dev->caps.spec_qps[qp->port - 1].qp0_tunnel);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
if (mlx4_is_master(mdev->dev)) {
- if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ if (mlx4_get_parav_qkey(mdev->dev, qp->mqp.qpn, &qkey))
return -EINVAL;
} else {
- if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ if (vf_get_qp0_qkey(mdev->dev, qp->mqp.qpn, &qkey))
return -EINVAL;
}
sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->mqp.qpn);
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
sqp->ud_header.immediate_present = 0;
}
#define MLX4_ROCEV2_QP1_SPORT 0xC000
-static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
+static int build_mlx_header(struct mlx4_ib_qp *qp, const struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
- struct ib_device *ib_dev = sqp->qp.ibqp.device;
+ struct mlx4_ib_sqp *sqp = qp->sqp;
+ struct ib_device *ib_dev = qp->ibqp.device;
struct mlx4_ib_dev *ibdev = to_mdev(ib_dev);
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
for (i = 0; i < wr->wr.num_sge; ++i)
send_size += wr->wr.sg_list[i].length;
- is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
+ is_eth = rdma_port_get_link_layer(qp->ibqp.device, qp->port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
enum ib_gid_type gid_type;
if (err)
return err;
} else {
- err = fill_gid_by_hw_index(ibdev, sqp->qp.port,
- ah->av.ib.gid_index,
- &sgid, &gid_type);
+ err = fill_gid_by_hw_index(ibdev, qp->port,
+ ah->av.ib.gid_index, &sgid,
+ &gid_type);
if (!err) {
is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
if (is_udp) {
* indexes don't necessarily match the hw ones, so
* we must use our own cache
*/
- sqp->ud_header.grh.source_gid.global.subnet_prefix =
- cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
- demux[sqp->qp.port - 1].
- subnet_prefix)));
- sqp->ud_header.grh.source_gid.global.interface_id =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- guid_cache[ah->av.ib.gid_index];
+ sqp->ud_header.grh.source_gid.global
+ .subnet_prefix =
+ cpu_to_be64(atomic64_read(
+ &(to_mdev(ib_dev)
+ ->sriov
+ .demux[qp->port - 1]
+ .subnet_prefix)));
+ sqp->ud_header.grh.source_gid.global
+ .interface_id =
+ to_mdev(ib_dev)
+ ->sriov.demux[qp->port - 1]
+ .guid_cache[ah->av.ib.gid_index];
} else {
sqp->ud_header.grh.source_gid =
ah->ibah.sgid_attr->gid;
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
if (!is_eth) {
- mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
- (sqp->ud_header.lrh.destination_lid ==
- IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
- (sqp->ud_header.lrh.service_level << 8));
+ mlx->flags |=
+ cpu_to_be32((!qp->ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
+ (sqp->ud_header.lrh.destination_lid ==
+ IB_LID_PERMISSIVE ?
+ MLX4_WQE_MLX_SLR :
+ 0) |
+ (sqp->ud_header.lrh.service_level << 8));
if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
mlx->flags |= cpu_to_be32(0x1); /* force loopback */
mlx->rlid = sqp->ud_header.lrh.destination_lid;
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
}
} else {
- sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 :
- sl_to_vl(to_mdev(ib_dev),
- sqp->ud_header.lrh.service_level,
- sqp->qp.port);
- if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
+ sqp->ud_header.lrh.virtual_lane =
+ !qp->ibqp.qp_num ?
+ 15 :
+ sl_to_vl(to_mdev(ib_dev),
+ sqp->ud_header.lrh.service_level,
+ qp->port);
+ if (qp->ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
return -EINVAL;
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
}
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
- if (!sqp->qp.ibqp.qp_num)
- err = ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index,
+ if (!qp->ibqp.qp_num)
+ err = ib_get_cached_pkey(ib_dev, qp->port, sqp->pkey_index,
&pkey);
else
- err = ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index,
+ err = ib_get_cached_pkey(ib_dev, qp->port, wr->pkey_index,
&pkey);
if (err)
return err;
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
sqp->qkey : wr->remote_qkey);
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num);
header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
- struct mlx4_ib_sqp *sqp = to_msqp(qp);
+ struct mlx4_ib_sqp *sqp = qp->sqp;
if (sqp->roce_v2_gsi) {
struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
enum ib_gid_type gid_type;
union ib_gid gid;
- if (!fill_gid_by_hw_index(mdev, sqp->qp.port,
+ if (!fill_gid_by_hw_index(mdev, qp->port,
ah->av.ib.gid_index,
&gid, &gid_type))
qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
break;
case MLX4_IB_QPT_TUN_SMI_OWNER:
- err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
- ctrl, &seglen);
+ err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
+ &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
break;
case MLX4_IB_QPT_PROXY_SMI_OWNER:
- err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
- ctrl, &seglen);
+ err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
+ &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
case MLX4_IB_QPT_SMI:
case MLX4_IB_QPT_GSI:
- err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl,
- &seglen);
+ err = build_mlx_header(qp, ud_wr(wr), ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
if (!qp)
return ERR_PTR(-ENOMEM);
+ mutex_init(&qp->mutex);
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
return err;
}
-void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
+int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibwq->device);
struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, udata);
kfree(qp);
+ return 0;
}
-struct ib_rwq_ind_table
-*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata)
+int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct ib_rwq_ind_table *rwq_ind_table;
struct mlx4_ib_create_rwq_ind_tbl_resp resp = {};
unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size;
+ struct ib_device *device = rwq_ind_table->device;
unsigned int base_wqn;
size_t min_resp_len;
- int i;
- int err;
+ int i, err = 0;
if (udata->inlen > 0 &&
!ib_is_udata_cleared(udata, 0,
udata->inlen))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
if (udata->outlen && udata->outlen < min_resp_len)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (ind_tbl_size >
device->attrs.rss_caps.max_rwq_indirection_table_size) {
pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n",
ind_tbl_size,
device->attrs.rss_caps.max_rwq_indirection_table_size);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
base_wqn = init_attr->ind_tbl[0]->wq_num;
if (base_wqn % ind_tbl_size) {
pr_debug("WQN=0x%x isn't aligned with indirection table size\n",
base_wqn);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
for (i = 1; i < ind_tbl_size; i++) {
if (++base_wqn != init_attr->ind_tbl[i]->wq_num) {
pr_debug("indirection table's WQNs aren't consecutive\n");
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
}
- rwq_ind_table = kzalloc(sizeof(*rwq_ind_table), GFP_KERNEL);
- if (!rwq_ind_table)
- return ERR_PTR(-ENOMEM);
-
if (udata->outlen) {
resp.response_length = offsetof(typeof(resp), response_length) +
sizeof(resp.response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
- if (err)
- goto err;
}
- return rwq_ind_table;
-
-err:
- kfree(rwq_ind_table);
- return ERR_PTR(err);
-}
-
-int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
-{
- kfree(ib_rwq_ind_tbl);
- return 0;
+ return err;
}
struct mlx4_ib_drain_cqe {
if (IS_ERR(srq->umem))
return PTR_ERR(srq->umem);
- err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
- PAGE_SHIFT, &srq->mtt);
+ err = mlx4_mtt_init(
+ dev->dev, ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE),
+ PAGE_SHIFT, &srq->mtt);
if (err)
goto err_buf;
return 0;
}
-void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(srq->device);
struct mlx4_ib_srq *msrq = to_msrq(srq);
mlx4_db_free(dev->dev, &msrq->db);
}
ib_umem_release(msrq->umem);
+ return 0;
}
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) {
int err;
struct mlx5_ib_create_ah_resp resp = {};
- u32 min_resp_len = offsetof(typeof(resp), dmac) +
- sizeof(resp.dmac);
+ u32 min_resp_len =
+ offsetofend(struct mlx5_ib_create_ah_resp, dmac);
if (udata->outlen < min_resp_len)
return -EINVAL;
return 0;
}
-
-void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags)
-{
- return;
-}
mlx5_cmd_exec_in(dev, destroy_tis, in);
}
-void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
+int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
MLX5_SET(destroy_rqt_in, in, uid, uid);
- mlx5_cmd_exec_in(dev, destroy_rqt, in);
+ return mlx5_cmd_exec_in(dev, destroy_rqt, in);
}
int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
mlx5_cmd_exec_in(dev, dealloc_transport_domain, in);
}
-void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
+int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
MLX5_SET(dealloc_pd_in, in, pd, pdn);
MLX5_SET(dealloc_pd_in, in, uid, uid);
- mlx5_cmd_exec_in(dev, dealloc_pd, in);
+ return mlx5_cmd_exec_in(dev, dealloc_pd, in);
}
int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
u64 length, u32 alignment);
void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
-void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
+int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
-void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
+int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
u16 uid);
void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
return ret;
}
-static void mlx5_ib_destroy_counters(struct ib_counters *counters)
+static int mlx5_ib_destroy_counters(struct ib_counters *counters)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
if (mcounters->hw_cntrs_hndl)
mlx5_fc_destroy(to_mdev(counters->device)->mdev,
mcounters->hw_cntrs_hndl);
+ return 0;
}
static int mlx5_ib_create_counters(struct ib_counters *counters,
cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
}
- cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
+ cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL);
if (!cnts->names)
return -ENOMEM;
cnts->offsets = kcalloc(num_counters,
- sizeof(cnts->offsets), GFP_KERNEL);
+ sizeof(*cnts->offsets), GFP_KERNEL);
if (!cnts->offsets)
goto err_names;
{
enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
- struct mlx5_ib_srq *srq;
+ struct mlx5_ib_srq *srq = NULL;
struct mlx5_ib_wq *wq;
u16 wqe_ctr;
u8 roce_packet_type;
if (qp->ibqp.xrcd) {
msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
- srq = to_mibsrq(msrq);
+ if (msrq)
+ srq = to_mibsrq(msrq);
} else {
srq = to_msrq(qp->ibqp.srq);
}
switch (roce_packet_type) {
case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
- wc->network_hdr_type = RDMA_NETWORK_IB;
+ wc->network_hdr_type = RDMA_NETWORK_ROCE_V1;
break;
case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
wc->network_hdr_type = RDMA_NETWORK_IPV6;
return err;
}
-void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(cq->device);
struct mlx5_ib_cq *mcq = to_mcq(cq);
+ int ret;
+
+ ret = mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
+ if (ret)
+ return ret;
- mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
if (udata)
destroy_cq_user(mcq, udata);
else
destroy_cq_kernel(dev, mcq);
+ return 0;
}
static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
#define LAST_COUNTERS_FIELD counters
/* Field is the last supported field */
-#define FIELDS_NOT_SUPPORTED(filter, field)\
- memchr_inv((void *)&filter.field +\
- sizeof(filter.field), 0,\
- sizeof(filter) -\
- offsetof(typeof(filter), field) -\
- sizeof(filter.field))
+#define FIELDS_NOT_SUPPORTED(filter, field) \
+ memchr_inv((void *)&filter.field + sizeof(filter.field), 0, \
+ sizeof(filter) - offsetofend(typeof(filter), field))
int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
bool is_egress,
{
bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
struct mlx5_flow_namespace *ns = NULL;
+ enum mlx5_flow_namespace_type fn_type;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_table *ft;
int max_table_size;
log_max_ft_size));
esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- enum mlx5_flow_namespace_type fn_type;
-
- if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
+ switch (flow_attr->type) {
+ case IB_FLOW_ATTR_NORMAL:
+ if (flow_is_multicast_only(flow_attr) && !dont_trap)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
priority = ib_prio_to_core_prio(flow_attr->priority,
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
if (!dev->is_rep && !esw_encap &&
MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2))
+ reformat_l3_tunnel_to_l2))
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
} else {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
- log_max_ft_size));
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
+ dev->mdev, log_max_ft_size));
fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
prio = &dev->flow_db->egress_prios[priority];
if (!dev->is_rep && !esw_encap &&
ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ break;
+ case IB_FLOW_ATTR_ALL_DEFAULT:
+ case IB_FLOW_ATTR_MC_DEFAULT:
ns = mlx5_get_flow_namespace(dev->mdev,
MLX5_FLOW_NAMESPACE_LEFTOVERS);
- build_leftovers_ft_param(&priority,
- &num_entries,
- &num_groups);
+ build_leftovers_ft_param(&priority, &num_entries, &num_groups);
prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ break;
+ case IB_FLOW_ATTR_SNIFFER:
if (!MLX5_CAP_FLOWTABLE(dev->mdev,
allow_sniffer_and_nic_rx_shared_tir))
return ERR_PTR(-EOPNOTSUPP);
- ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
- MLX5_FLOW_NAMESPACE_SNIFFER_RX :
- MLX5_FLOW_NAMESPACE_SNIFFER_TX);
+ ns = mlx5_get_flow_namespace(
+ dev->mdev, ft_type == MLX5_IB_FT_RX ?
+ MLX5_FLOW_NAMESPACE_SNIFFER_RX :
+ MLX5_FLOW_NAMESPACE_SNIFFER_TX);
prio = &dev->flow_db->sniffer[ft_type];
priority = 0;
num_entries = 1;
num_groups = 1;
+ break;
+ default:
+ break;
}
if (!ns)
if (!flow_is_multicast_only(flow_attr))
set_underlay_qp(dev, spec, underlay_qpn);
- if (dev->is_rep) {
+ if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
struct mlx5_eswitch_rep *rep;
rep = dev->port[flow_attr->port - 1].rep;
int err;
static const struct ib_flow_attr flow_attr = {
.num_of_specs = 0,
+ .type = IB_FLOW_ATTR_SNIFFER,
.size = sizeof(flow_attr)
};
return ERR_PTR(err);
}
-
static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
- int domain,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
int underlay_qpn;
if (udata && udata->inlen) {
- min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
- sizeof(ucmd_hdr.reserved);
+ min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
if (udata->inlen < min_ucmd_sz)
return ERR_PTR(-EOPNOTSUPP);
goto free_ucmd;
}
- if (domain != IB_FLOW_DOMAIN_USER ||
- flow_attr->port > dev->num_ports ||
- (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
- IB_FLOW_ATTR_FLAGS_EGRESS))) {
+ if (flow_attr->port > dev->num_ports ||
+ (flow_attr->flags &
+ ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS))) {
err = -EINVAL;
goto free_ucmd;
}
dst->tir_num = mqp->raw_packet_qp.rq.tirn;
}
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ switch (flow_attr->type) {
+ case IB_FLOW_ATTR_NORMAL:
underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
mqp->underlay_qpn :
0;
handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
underlay_qpn, ucmd);
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- handler = create_leftovers_rule(dev, ft_prio, flow_attr,
- dst);
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ break;
+ case IB_FLOW_ATTR_ALL_DEFAULT:
+ case IB_FLOW_ATTR_MC_DEFAULT:
+ handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
+ break;
+ case IB_FLOW_ATTR_SNIFFER:
handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
- } else {
+ break;
+ default:
err = -EINVAL;
goto destroy_ft;
}
esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
+ switch (fs_matcher->ns_type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
reformat_l3_tunnel_to_l2) &&
!esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
+ break;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
max_table_size = BIT(
MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
+ !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
+ break;
+ case MLX5_FLOW_NAMESPACE_FDB:
max_table_size = BIT(
MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
+ reformat_l3_tunnel_to_l2) &&
esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
priority = FDB_BYPASS_PATH;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
- log_max_ft_size));
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
priority = fs_matcher->priority;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
- log_max_ft_size));
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
priority = fs_matcher->priority;
+ break;
+ default:
+ break;
}
max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
if (!ns)
return ERR_PTR(-EOPNOTSUPP);
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
+ switch (fs_matcher->ns_type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
prio = &dev->flow_db->prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
+ break;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
prio = &dev->flow_db->egress_prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ break;
+ case MLX5_FLOW_NAMESPACE_FDB:
prio = &dev->flow_db->fdb;
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX:
prio = &dev->flow_db->rdma_rx[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
prio = &dev->flow_db->rdma_tx[priority];
+ break;
+ default: return ERR_PTR(-EINVAL);
+ }
if (!prio)
return ERR_PTR(-EINVAL);
goto unlock;
}
- if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
+ switch (dest_type) {
+ case MLX5_FLOW_DESTINATION_TYPE_TIR:
dst[dst_num].type = dest_type;
dst[dst_num++].tir_num = dest_id;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
dst[dst_num++].ft_num = dest_id;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_PORT:
dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ break;
+ default:
+ break;
}
-
if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
dst[dst_num].counter_id = counter_id;
struct mlx5_ib_gsi_wr {
struct ib_cqe cqe;
struct ib_wc wc;
- int send_flags;
bool completed:1;
};
-struct mlx5_ib_gsi_qp {
- struct ib_qp ibqp;
- struct ib_qp *rx_qp;
- u8 port_num;
- struct ib_qp_cap cap;
- enum ib_sig_type sq_sig_type;
- /* Serialize qp state modifications */
- struct mutex mutex;
- struct ib_cq *cq;
- struct mlx5_ib_gsi_wr *outstanding_wrs;
- u32 outstanding_pi, outstanding_ci;
- int num_qps;
- /* Protects access to the tx_qps. Post send operations synchronize
- * with tx_qp creation in setup_qp(). Also protects the
- * outstanding_wrs array and indices.
- */
- spinlock_t lock;
- struct ib_qp **tx_qps;
-};
-
-static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
-{
- return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
-}
-
static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
{
return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
}
/* Call with gsi->lock locked */
-static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
+static void generate_completions(struct mlx5_ib_qp *mqp)
{
- struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
+ struct ib_cq *gsi_cq = mqp->ibqp.send_cq;
struct mlx5_ib_gsi_wr *wr;
u32 index;
if (!wr->completed)
break;
- if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
- wr->send_flags & IB_SEND_SIGNALED)
- WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
-
+ WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
wr->completed = false;
}
struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
struct mlx5_ib_gsi_wr *wr =
container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
+ struct mlx5_ib_qp *mqp = container_of(gsi, struct mlx5_ib_qp, gsi);
u64 wr_id;
unsigned long flags;
wr_id = wr->wc.wr_id;
wr->wc = *wc;
wr->wc.wr_id = wr_id;
- wr->wc.qp = &gsi->ibqp;
+ wr->wc.qp = &mqp->ibqp;
- generate_completions(gsi);
+ generate_completions(mqp);
spin_unlock_irqrestore(&gsi->lock, flags);
}
-struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr)
+int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
+ struct ib_qp_init_attr *attr)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_gsi_qp *gsi;
- struct ib_qp_init_attr hw_init_attr = *init_attr;
- const u8 port_num = init_attr->port_num;
+ struct ib_qp_init_attr hw_init_attr = *attr;
+ const u8 port_num = attr->port_num;
int num_qps = 0;
int ret;
num_qps = MLX5_MAX_PORTS;
}
- gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
- if (!gsi)
- return ERR_PTR(-ENOMEM);
-
+ gsi = &mqp->gsi;
gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
- if (!gsi->tx_qps) {
- ret = -ENOMEM;
- goto err_free;
- }
+ if (!gsi->tx_qps)
+ return -ENOMEM;
- gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
- sizeof(*gsi->outstanding_wrs),
- GFP_KERNEL);
+ gsi->outstanding_wrs =
+ kcalloc(attr->cap.max_send_wr, sizeof(*gsi->outstanding_wrs),
+ GFP_KERNEL);
if (!gsi->outstanding_wrs) {
ret = -ENOMEM;
goto err_free_tx;
}
- mutex_init(&gsi->mutex);
-
mutex_lock(&dev->devr.mutex);
if (dev->devr.ports[port_num - 1].gsi) {
gsi->num_qps = num_qps;
spin_lock_init(&gsi->lock);
- gsi->cap = init_attr->cap;
- gsi->sq_sig_type = init_attr->sq_sig_type;
- gsi->ibqp.qp_num = 1;
+ gsi->cap = attr->cap;
gsi->port_num = port_num;
- gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
+ gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0,
IB_POLL_SOFTIRQ);
if (IS_ERR(gsi->cq)) {
mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
hw_init_attr.cap.max_send_sge = 0;
hw_init_attr.cap.max_inline_data = 0;
}
- gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
+
+ gsi->rx_qp = mlx5_ib_create_qp(pd, &hw_init_attr, NULL);
if (IS_ERR(gsi->rx_qp)) {
mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
PTR_ERR(gsi->rx_qp));
ret = PTR_ERR(gsi->rx_qp);
goto err_destroy_cq;
}
+ gsi->rx_qp->device = pd->device;
+ gsi->rx_qp->pd = pd;
+ gsi->rx_qp->real_qp = gsi->rx_qp;
+
+ gsi->rx_qp->qp_type = hw_init_attr.qp_type;
+ gsi->rx_qp->send_cq = hw_init_attr.send_cq;
+ gsi->rx_qp->recv_cq = hw_init_attr.recv_cq;
+ gsi->rx_qp->event_handler = hw_init_attr.event_handler;
+ spin_lock_init(&gsi->rx_qp->mr_lock);
+ INIT_LIST_HEAD(&gsi->rx_qp->rdma_mrs);
+ INIT_LIST_HEAD(&gsi->rx_qp->sig_mrs);
- dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
+ dev->devr.ports[attr->port_num - 1].gsi = gsi;
mutex_unlock(&dev->devr.mutex);
- return &gsi->ibqp;
+ return 0;
err_destroy_cq:
ib_free_cq(gsi->cq);
kfree(gsi->outstanding_wrs);
err_free_tx:
kfree(gsi->tx_qps);
-err_free:
- kfree(gsi);
- return ERR_PTR(ret);
+ return ret;
}
-int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
+int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
const int port_num = gsi->port_num;
int qp_index;
int ret;
- mlx5_ib_dbg(dev, "destroying GSI QP\n");
-
mutex_lock(&dev->devr.mutex);
- ret = ib_destroy_qp(gsi->rx_qp);
+ ret = mlx5_ib_destroy_qp(gsi->rx_qp, NULL);
if (ret) {
mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
ret);
kfree(gsi->outstanding_wrs);
kfree(gsi->tx_qps);
- kfree(gsi);
+ kfree(mqp);
return 0;
}
.max_send_sge = gsi->cap.max_send_sge,
.max_inline_data = gsi->cap.max_inline_data,
},
- .sq_sig_type = gsi->sq_sig_type,
.qp_type = IB_QPT_UD,
.create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
};
static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
{
+ struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
u16 qp_index;
+ mutex_lock(&dev->devr.mutex);
for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
setup_qp(gsi, qp_index);
+ mutex_unlock(&dev->devr.mutex);
}
int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
int ret;
mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
- mutex_lock(&gsi->mutex);
ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
if (ret) {
mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
- goto unlock;
+ return ret;
}
if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
setup_qps(gsi);
-
-unlock:
- mutex_unlock(&gsi->mutex);
-
- return ret;
+ return 0;
}
int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
{
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
int ret;
- mutex_lock(&gsi->mutex);
ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
qp_init_attr->cap = gsi->cap;
- mutex_unlock(&gsi->mutex);
-
return ret;
}
/* Call with gsi->lock locked */
-static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
+static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_qp *mqp,
struct ib_ud_wr *wr, struct ib_wc *wc)
{
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
struct mlx5_ib_gsi_wr *gsi_wr;
}
/* Call with gsi->lock locked */
-static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
- struct ib_ud_wr *wr)
+static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr)
{
struct ib_wc wc = {
{ .wr_id = wr->wr.wr_id },
.status = IB_WC_SUCCESS,
.opcode = IB_WC_SEND,
- .qp = &gsi->ibqp,
+ .qp = &mqp->ibqp,
};
int ret;
- ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
+ ret = mlx5_ib_add_outstanding_wr(mqp, wr, &wc);
if (ret)
return ret;
- generate_completions(gsi);
+ generate_completions(mqp);
return 0;
}
int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
{
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
struct ib_qp *tx_qp;
unsigned long flags;
int ret;
spin_lock_irqsave(&gsi->lock, flags);
tx_qp = get_tx_qp(gsi, &cur_wr);
if (!tx_qp) {
- ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
+ ret = mlx5_ib_gsi_silent_drop(mqp, &cur_wr);
if (ret)
goto err;
spin_unlock_irqrestore(&gsi->lock, flags);
continue;
}
- ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
+ ret = mlx5_ib_add_outstanding_wr(mqp, &cur_wr, NULL);
if (ret)
goto err;
int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
return ib_post_recv(gsi->rx_qp, wr, bad_wr);
}
if (!gsi)
return;
- mutex_lock(&gsi->mutex);
setup_qps(gsi);
- mutex_unlock(&gsi->mutex);
}
spin_unlock(&port->mp.mpi_lock);
}
-static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed,
- u8 *active_width)
+static int translate_eth_legacy_proto_oper(u32 eth_proto_oper,
+ u16 *active_speed, u8 *active_width)
{
switch (eth_proto_oper) {
case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
return 0;
}
-static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
u8 *active_width)
{
switch (eth_proto_oper) {
return 0;
}
-static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed,
u8 *active_width, bool ext)
{
return ext ?
unsigned int index, const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
- enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ enum ib_gid_type gid_type = IB_GID_TYPE_ROCE;
u16 vlan_id = 0xffff;
u8 roce_version = 0;
u8 roce_l3_type = 0;
}
switch (gid_type) {
- case IB_GID_TYPE_IB:
+ case IB_GID_TYPE_ROCE:
roce_version = MLX5_ROCE_VERSION_1;
break;
case IB_GID_TYPE_ROCE_UDP_ENCAP:
/* We support 'Gappy' memory registration too */
props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
}
- props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ /* IB_WR_REG_MR always requires changing the entity size with UMR */
+ if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER;
/* At this stage no support for signature handover */
return 0;
}
-enum mlx5_ib_width {
- MLX5_IB_WIDTH_1X = 1 << 0,
- MLX5_IB_WIDTH_2X = 1 << 1,
- MLX5_IB_WIDTH_4X = 1 << 2,
- MLX5_IB_WIDTH_8X = 1 << 3,
- MLX5_IB_WIDTH_12X = 1 << 4
-};
-
-static void translate_active_width(struct ib_device *ibdev, u8 active_width,
- u8 *ib_width)
+static void translate_active_width(struct ib_device *ibdev, u16 active_width,
+ u8 *ib_width)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- if (active_width & MLX5_IB_WIDTH_1X)
+ if (active_width & MLX5_PTYS_WIDTH_1X)
*ib_width = IB_WIDTH_1X;
- else if (active_width & MLX5_IB_WIDTH_2X)
+ else if (active_width & MLX5_PTYS_WIDTH_2X)
*ib_width = IB_WIDTH_2X;
- else if (active_width & MLX5_IB_WIDTH_4X)
+ else if (active_width & MLX5_PTYS_WIDTH_4X)
*ib_width = IB_WIDTH_4X;
- else if (active_width & MLX5_IB_WIDTH_8X)
+ else if (active_width & MLX5_PTYS_WIDTH_8X)
*ib_width = IB_WIDTH_8X;
- else if (active_width & MLX5_IB_WIDTH_12X)
+ else if (active_width & MLX5_PTYS_WIDTH_12X)
*ib_width = IB_WIDTH_12X;
else {
mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n",
- (int)active_width);
+ active_width);
*ib_width = IB_WIDTH_4X;
}
u16 max_mtu;
u16 oper_mtu;
int err;
- u8 ib_link_width_oper;
+ u16 ib_link_width_oper;
u8 vl_hw_cap;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
props->port_cap_flags2 = rep->cap_mask2;
- err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
+ err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper,
+ &props->active_speed, port);
if (err)
goto out;
translate_active_width(ibdev, ib_link_width_oper, &props->active_width);
- err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
- if (err)
- goto out;
-
mlx5_query_port_max_mtu(mdev, &max_mtu, port);
props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu);
return -EPERM;
if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner)))
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner_v2)))
return -EOPNOTSUPP;
break;
}
return 0;
}
-static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
- mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
+ return mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
}
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
container_of(work, struct mlx5_ib_port_resources,
pkey_change_work);
- mutex_lock(&ports->devr->mutex);
mlx5_ib_gsi_pkey_change(ports->gsi);
- mutex_unlock(&ports->devr->mutex);
}
static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s1->usecnt, 0);
- for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
INIT_WORK(&devr->ports[port].pkey_change_work,
pkey_change_handler);
- devr->ports[port].devr = devr;
- }
return 0;
static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
.alloc_mw = mlx5_ib_alloc_mw,
.dealloc_mw = mlx5_ib_dealloc_mw,
+
+ INIT_RDMA_OBJ_SIZE(ib_mw, mlx5_ib_mw, ibmw),
};
static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
.destroy_wq = mlx5_ib_destroy_wq,
.get_netdev = mlx5_ib_get_netdev,
.modify_wq = mlx5_ib_modify_wq,
+
+ INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table,
+ ib_rwq_ind_tbl),
};
static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
int page_shift, __be64 *pas, int access_flags)
{
return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
- ib_umem_num_pages(umem), pas,
- access_flags);
+ ib_umem_num_dma_blocks(umem, PAGE_SIZE),
+ pas, access_flags);
}
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
{
u32 *in;
};
+struct mlx5_ib_gsi_qp {
+ struct ib_qp *rx_qp;
+ u8 port_num;
+ struct ib_qp_cap cap;
+ struct ib_cq *cq;
+ struct mlx5_ib_gsi_wr *outstanding_wrs;
+ u32 outstanding_pi, outstanding_ci;
+ int num_qps;
+ /* Protects access to the tx_qps. Post send operations synchronize
+ * with tx_qp creation in setup_qp(). Also protects the
+ * outstanding_wrs array and indices.
+ */
+ spinlock_t lock;
+ struct ib_qp **tx_qps;
+};
+
struct mlx5_ib_qp {
struct ib_qp ibqp;
union {
struct mlx5_ib_raw_packet_qp raw_packet_qp;
struct mlx5_ib_rss_qp rss_qp;
struct mlx5_ib_dct dct;
+ struct mlx5_ib_gsi_qp gsi;
};
struct mlx5_frag_buf buf;
unsigned long last_add;
};
-struct mlx5_ib_gsi_qp;
-
struct mlx5_ib_port_resources {
- struct mlx5_ib_resources *devr;
struct mlx5_ib_gsi_qp *gsi;
struct work_struct pkey_change_work;
};
int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+static inline int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
-void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
size_t buflen, size_t *bc);
int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
struct ib_sge *sg_list,
u32 num_sge,
struct uverbs_attr_bundle *attrs);
-struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
const struct ib_mad *in, struct ib_mad *out,
size_t *out_mad_size, u16 *out_mad_pkey_index);
int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
-void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
- unsigned int entry);
+ unsigned int entry, int access_flags);
void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr);
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
-void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
-struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
+int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
struct ib_ucontext *context,
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge);
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
{
return -EOPNOTSUPP;
}
+static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
extern const struct mmu_interval_notifier_ops mlx5_mn_ops;
void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
/* GSI QP helper functions */
-struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr);
-int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp);
+int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
+ struct ib_qp_init_attr *attr);
+int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp);
int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask);
int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
static inline int is_qp1(enum ib_qp_type qp_type)
{
- return qp_type == MLX5_IB_QPT_HW_GSI;
+ return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI;
}
#define MLX5_MAX_UMR_SHIFT 16
struct mlx5_bfreg_info *bfregi, u32 bfregn,
bool dyn_bfreg);
-static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
- bool do_modify_atomic, int access_flags)
+static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev,
+ size_t length)
{
+ /*
+ * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
+ * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
+ * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
+ * can never be enabled without this capability. Simplify this weird
+ * quirky hardware by just saying it can't use PAS lists with UMR at
+ * all.
+ */
if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return false;
- if (do_modify_atomic &&
+ /*
+ * length is the size of the MR in bytes when mlx5_ib_update_xlt() is
+ * used.
+ */
+ if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
+ length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
+ return false;
+ return true;
+}
+
+/*
+ * true if an existing MR can be reconfigured to new access_flags using UMR.
+ * Older HW cannot use UMR to update certain elements of the MKC. See
+ * umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask()
+ */
+static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev,
+ unsigned int current_access_flags,
+ unsigned int target_access_flags)
+{
+ unsigned int diffs = current_access_flags ^ target_access_flags;
+
+ if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
MLX5_CAP_GEN(dev->mdev, atomic) &&
MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
return false;
- if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
+ if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
return false;
- if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
- MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
- !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return false;
return true;
static void
create_mkey_callback(int status, struct mlx5_async_work *context);
+static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+
+ MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
+ MLX5_SET(mkc, mkc, relaxed_ordering_write,
+ !!(acc & IB_ACCESS_RELAXED_ORDERING));
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
+ MLX5_SET(mkc, mkc, relaxed_ordering_read,
+ !!(acc & IB_ACCESS_RELAXED_ORDERING));
+
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET64(mkc, mkc, start_addr, start_addr);
+}
+
static void
assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
u32 *in)
return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
-static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
+static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start,
+ u64 length)
{
return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
mr->cache_ent = ent;
mr->dev = ent->dev;
+ set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
MLX5_SET(mkc, mkc, log_page_size, ent->page);
return mr;
/* Allocate a special entry from the cache */
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
- unsigned int entry)
+ unsigned int entry, int access_flags)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
entry >= ARRAY_SIZE(cache->ent)))
return ERR_PTR(-EINVAL);
+ /* Matches access in alloc_cache_mr() */
+ if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
+ return ERR_PTR(-EOPNOTSUPP);
+
ent = &cache->ent[entry];
spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
}
+ mr->access_flags = access_flags;
return mr;
}
MLX5_IB_UMR_OCTOWORD;
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
- !dev->is_rep &&
- mlx5_core_is_pf(dev->mdev))
+ !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
+ mlx5_ib_can_load_pas_with_umr(dev, 0))
ent->limit = dev->mdev->profile->mr_cache[i].limit;
else
ent->limit = 0;
return 0;
}
-static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
- struct ib_pd *pd)
-{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
-
- MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
-
- if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
- MLX5_SET(mkc, mkc, relaxed_ordering_write,
- !!(acc & IB_ACCESS_RELAXED_ORDERING));
- if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
- MLX5_SET(mkc, mkc, relaxed_ordering_read,
- !!(acc & IB_ACCESS_RELAXED_ORDERING));
-
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET64(mkc, mkc, start_addr, start_addr);
-}
-
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
if (!ent)
return ERR_PTR(-E2BIG);
+
+ /* Matches access in alloc_cache_mr() */
+ if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
+ return ERR_PTR(-EOPNOTSUPP);
+
mr = get_cache_mr(ent);
if (!mr) {
mr = create_cache_mr(ent);
goto err_1;
}
pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
- if (populate && !(access_flags & IB_ACCESS_ON_DEMAND))
+ if (populate) {
+ if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
+ err = -EINVAL;
+ goto err_2;
+ }
mlx5_ib_populate_pas(dev, umem, page_shift, pas,
pg_cap ? MLX5_IB_MTT_PRESENT : 0);
+ }
/* The pg_access bit allows setting the access flags
* in the page list submitted with the command. */
MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr,
+ populate ? pd : dev->umrc.pd);
MLX5_SET(mkc, mkc, free, !populate);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
- if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
- MLX5_SET(mkc, mkc, relaxed_ordering_write,
- !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
- if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
- MLX5_SET(mkc, mkc, relaxed_ordering_read,
- !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
- MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET64(mkc, mkc, start_addr, virt_addr);
MLX5_SET64(mkc, mkc, len, length);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
MLX5_SET(mkc, mkc, translations_octword_size,
get_octo_len(virt_addr, length, page_shift));
MLX5_SET(mkc, mkc, log_page_size, page_shift);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
if (populate) {
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
get_octo_len(virt_addr, length, page_shift));
struct uverbs_attr_bundle *attrs)
{
if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
- advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE)
+ advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
+ advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
return -EOPNOTSUPP;
return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
- bool use_umr;
+ bool xlt_with_umr;
struct ib_umem *umem;
int page_shift;
int npages;
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
+ xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
+ /* ODP requires xlt update via umr to work. */
+ if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
+ return ERR_PTR(-EINVAL);
+
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
length == U64_MAX) {
if (virt_addr != start)
if (err < 0)
return ERR_PTR(err);
- use_umr = mlx5_ib_can_use_umr(dev, true, access_flags);
-
- if (order <= mr_cache_max_order(dev) && use_umr) {
+ if (xlt_with_umr) {
mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
page_shift, order, access_flags);
- if (PTR_ERR(mr) == -EAGAIN) {
- mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
+ if (IS_ERR(mr))
mr = NULL;
- }
- } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
- if (access_flags & IB_ACCESS_ON_DEMAND) {
- err = -EINVAL;
- pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
- goto error;
- }
- use_umr = false;
}
if (!mr) {
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
- page_shift, access_flags, !use_umr);
+ page_shift, access_flags, !xlt_with_umr);
mutex_unlock(&dev->slow_path_mutex);
}
mr->umem = umem;
set_mr_fields(dev, mr, npages, length, access_flags);
- if (use_umr) {
+ if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) {
+ /*
+ * If the MR was created with reg_create then it will be
+ * configured properly but left disabled. It is safe to go ahead
+ * and configure it again via UMR while enabling it.
+ */
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
- if (access_flags & IB_ACCESS_ON_DEMAND)
- update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP;
-
err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
update_xlt_flags);
-
if (err) {
dereg_mr(dev, mr);
return ERR_PTR(err);
dereg_mr(dev, mr);
return ERR_PTR(err);
}
+
+ err = mlx5_ib_init_odp_mr(mr, xlt_with_umr);
+ if (err) {
+ dereg_mr(dev, mr);
+ return ERR_PTR(err);
+ }
}
return &mr->ibmr;
goto err;
}
- if (!mlx5_ib_can_use_umr(dev, true, access_flags) ||
- (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) {
+ if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags,
+ access_flags) ||
+ !mlx5_ib_can_load_pas_with_umr(dev, len) ||
+ (flags & IB_MR_REREG_TRANS &&
+ !mlx5_ib_pas_fits_in_mr(mr, addr, len))) {
/*
* UMR can't be used - MKey needs to be replaced.
*/
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ /* This is only used from the kernel, so setting the PD is OK. */
+ set_mkc_access_pd_addr_fields(mkc, 0, 0, pd);
MLX5_SET(mkc, mkc, free, 1);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
max_num_meta_sg);
}
-struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
+int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_ib_mw *mw = NULL;
+ struct mlx5_ib_mw *mw = to_mmw(ibmw);
u32 *in = NULL;
void *mkc;
int ndescs;
err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
if (err)
- return ERR_PTR(err);
+ return err;
if (req.comp_mask || req.reserved1 || req.reserved2)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (udata->inlen > sizeof(req) &&
!ib_is_udata_cleared(udata, sizeof(req),
udata->inlen - sizeof(req)))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
- mw = kzalloc(sizeof(*mw), GFP_KERNEL);
in = kzalloc(inlen, GFP_KERNEL);
- if (!mw || !in) {
+ if (!in) {
err = -ENOMEM;
goto free;
}
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
- MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
+ MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
MLX5_SET(mkc, mkc, qpn, 0xffffff);
err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
goto free;
mw->mmkey.type = MLX5_MKEY_MW;
- mw->ibmw.rkey = mw->mmkey.key;
+ ibmw->rkey = mw->mmkey.key;
mw->ndescs = ndescs;
- resp.response_length = min(offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length), udata->outlen);
+ resp.response_length =
+ min(offsetofend(typeof(resp), response_length), udata->outlen);
if (resp.response_length) {
err = ib_copy_to_udata(udata, &resp, resp.response_length);
- if (err) {
- mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
- goto free;
- }
+ if (err)
+ goto free_mkey;
}
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
}
kfree(in);
- return &mw->ibmw;
+ return 0;
free_mkey:
mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
free:
- kfree(mw);
kfree(in);
- return ERR_PTR(err);
+ return err;
}
int mlx5_ib_dealloc_mw(struct ib_mw *mw)
{
struct mlx5_ib_dev *dev = to_mdev(mw->device);
struct mlx5_ib_mw *mmw = to_mmw(mw);
- int err;
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
synchronize_srcu(&dev->odp_srcu);
}
- err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
- if (err)
- return err;
- kfree(mmw);
- return 0;
+ return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
}
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
memset(caps, 0, sizeof(*caps));
if (!MLX5_CAP_GEN(dev->mdev, pg) ||
- !mlx5_ib_can_use_umr(dev, true, 0))
+ !mlx5_ib_can_load_pas_with_umr(dev, 0))
return;
caps->general_caps = IB_ODP_SUPPORT;
if (IS_ERR(odp))
return ERR_CAST(odp);
- ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY);
+ ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY,
+ imr->access_flags);
if (IS_ERR(mr))
goto out_umem;
mr->ibmr.pd = imr->ibmr.pd;
- mr->access_flags = imr->access_flags;
mr->umem = &odp->umem;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
if (IS_ERR(umem_odp))
return ERR_CAST(umem_odp);
- imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY);
+ imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY, access_flags);
if (IS_ERR(imr)) {
err = PTR_ERR(imr);
goto out_umem;
}
imr->ibmr.pd = &pd->ibpd;
- imr->access_flags = access_flags;
imr->mmkey.iova = 0;
imr->umem = &umem_odp->umem;
imr->ibmr.lkey = imr->mmkey.key;
}
#define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
+#define MLX5_PF_FLAGS_SNAPSHOT BIT(2)
+#define MLX5_PF_FLAGS_ENABLE BIT(3)
static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
u64 user_va, size_t bcnt, u32 *bytes_mapped,
u32 flags)
{
int page_shift, ret, np;
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
- unsigned long current_seq;
u64 access_mask;
u64 start_idx;
+ bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT);
+ u32 xlt_flags = MLX5_IB_UPD_XLT_ATOMIC;
+
+ if (flags & MLX5_PF_FLAGS_ENABLE)
+ xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
page_shift = odp->page_shift;
start_idx = (user_va - ib_umem_start(odp)) >> page_shift;
if (odp->umem.writable && !downgrade)
access_mask |= ODP_WRITE_ALLOWED_BIT;
- current_seq = mmu_interval_read_begin(&odp->notifier);
-
- np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask,
- current_seq);
+ np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault);
if (np < 0)
return np;
- mutex_lock(&odp->umem_mutex);
- if (!mmu_interval_read_retry(&odp->notifier, current_seq)) {
- /*
- * No need to check whether the MTTs really belong to
- * this MR, since ib_umem_odp_map_dma_pages already
- * checks this.
- */
- ret = mlx5_ib_update_xlt(mr, start_idx, np,
- page_shift, MLX5_IB_UPD_XLT_ATOMIC);
- } else {
- ret = -EAGAIN;
- }
+ /*
+ * No need to check whether the MTTs really belong to this MR, since
+ * ib_umem_odp_map_dma_and_lock already checks this.
+ */
+ ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
mutex_unlock(&odp->umem_mutex);
if (ret < 0) {
flags);
}
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
+{
+ u32 flags = MLX5_PF_FLAGS_SNAPSHOT;
+ int ret;
+
+ if (enable)
+ flags |= MLX5_PF_FLAGS_ENABLE;
+
+ ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem),
+ mr->umem->address, mr->umem->length, NULL,
+ flags);
+ return ret >= 0 ? 0 : ret;
+}
+
struct pf_frame {
struct pf_frame *next;
u32 key;
if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
+ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
+ pf_flags |= MLX5_PF_FLAGS_SNAPSHOT;
+
if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH)
return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list,
num_sge);
resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN;
resp->tirn = rq->tirn;
resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) {
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
resp->tir_icm_addr = MLX5_GET(
create_tir_out, out, icm_address_31_0);
resp->tir_icm_addr |=
if (mucontext->devx_uid) {
params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
params->resp.tirn = qp->rss_qp.tirn;
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) {
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
params->resp.tir_icm_addr =
MLX5_GET(create_tir_out, out, icm_address_31_0);
params->resp.tir_icm_addr |=
u32 uidx = params->uidx;
void *dctc;
+ if (mlx5_lag_is_active(dev->mdev) && !MLX5_CAP_GEN(dev->mdev, lag_dct))
+ return -EOPNOTSUPP;
+
qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL);
if (!qp->dct.in)
return -ENOMEM;
return -EINVAL;
}
- switch (attr->qp_type) {
- case IB_QPT_SMI:
- case MLX5_IB_QPT_HW_GSI:
- case MLX5_IB_QPT_REG_UMR:
- case IB_QPT_GSI:
- mlx5_ib_dbg(dev, "Kernel doesn't support QP type %d\n",
- attr->qp_type);
- return -EINVAL;
- default:
- break;
- }
-
/*
* We don't need to see this warning, it means that kernel code
* missing ib_pd. Placed here to catch developer's mistakes.
goto out;
}
- if (qp->type == MLX5_IB_QPT_DCT) {
+ switch (qp->type) {
+ case MLX5_IB_QPT_DCT:
err = create_dct(dev, pd, qp, params);
- goto out;
- }
-
- if (qp->type == IB_QPT_XRC_TGT) {
+ break;
+ case IB_QPT_XRC_TGT:
err = create_xrc_tgt_qp(dev, qp, params);
- goto out;
+ break;
+ case IB_QPT_GSI:
+ err = mlx5_ib_create_gsi(pd, qp, params->attr);
+ break;
+ default:
+ if (params->udata)
+ err = create_user_qp(dev, pd, qp, params);
+ else
+ err = create_kernel_qp(dev, pd, qp, params);
}
- if (params->udata)
- err = create_user_qp(dev, pd, qp, params);
- else
- err = create_kernel_qp(dev, pd, qp, params);
-
out:
if (err) {
mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type);
if (err)
return ERR_PTR(err);
- if (attr->qp_type == IB_QPT_GSI)
- return mlx5_ib_gsi_create_qp(pd, attr);
-
params.udata = udata;
params.uidx = MLX5_IB_DEFAULT_UIDX;
params.attr = attr;
return &qp->ibqp;
destroy_qp:
- if (qp->type == MLX5_IB_QPT_DCT) {
+ switch (qp->type) {
+ case MLX5_IB_QPT_DCT:
mlx5_ib_destroy_dct(qp);
- } else {
+ break;
+ case IB_QPT_GSI:
+ mlx5_ib_destroy_gsi(qp);
+ break;
+ default:
/*
* These lines below are temp solution till QP allocation
* will be moved to be under IB/core responsiblity.
struct mlx5_ib_qp *mqp = to_mqp(qp);
if (unlikely(qp->qp_type == IB_QPT_GSI))
- return mlx5_ib_gsi_destroy_qp(qp);
+ return mlx5_ib_destroy_gsi(mqp);
if (mqp->type == MLX5_IB_QPT_DCT)
return mlx5_ib_destroy_dct(mqp);
MLX5_PATH_FLAG_COUNTER = 1 << 2,
};
+static int ib_to_mlx5_rate_map(u8 rate)
+{
+ switch (rate) {
+ case IB_RATE_PORT_CURRENT:
+ return 0;
+ case IB_RATE_56_GBPS:
+ return 1;
+ case IB_RATE_25_GBPS:
+ return 2;
+ case IB_RATE_100_GBPS:
+ return 3;
+ case IB_RATE_200_GBPS:
+ return 4;
+ case IB_RATE_50_GBPS:
+ return 5;
+ default:
+ return rate + MLX5_STAT_RATE_OFFSET;
+ };
+
+ return 0;
+}
+
static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
{
+ u32 stat_rate_support;
+
if (rate == IB_RATE_PORT_CURRENT)
return 0;
if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS)
return -EINVAL;
+ stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support);
while (rate != IB_RATE_PORT_CURRENT &&
- !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
- MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
+ !(1 << ib_to_mlx5_rate_map(rate) & stat_rate_support))
--rate;
- return rate ? rate + MLX5_STAT_RATE_OFFSET : rate;
+ return ib_to_mlx5_rate_map(rate);
}
static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
MLX5_MAX_PORTS + 1;
}
-static bool qp_supports_affinity(struct ib_qp *qp)
+static bool qp_supports_affinity(struct mlx5_ib_qp *qp)
{
- if ((qp->qp_type == IB_QPT_RC) ||
- (qp->qp_type == IB_QPT_UD) ||
- (qp->qp_type == IB_QPT_UC) ||
- (qp->qp_type == IB_QPT_RAW_PACKET) ||
- (qp->qp_type == IB_QPT_XRC_INI) ||
- (qp->qp_type == IB_QPT_XRC_TGT))
+ if ((qp->type == IB_QPT_RC) || (qp->type == IB_QPT_UD) ||
+ (qp->type == IB_QPT_UC) || (qp->type == IB_QPT_RAW_PACKET) ||
+ (qp->type == IB_QPT_XRC_INI) || (qp->type == IB_QPT_XRC_TGT) ||
+ (qp->type == MLX5_IB_QPT_DCI))
return true;
return false;
}
unsigned int tx_affinity;
if (!(mlx5_ib_lag_should_assign_affinity(dev) &&
- qp_supports_affinity(qp)))
+ qp_supports_affinity(mqp)))
return 0;
if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
MLX5_SET(dctc, dctc, rae, 1);
}
MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
- MLX5_SET(dctc, dctc, port, attr->port_num);
+ if (mlx5_lag_is_active(dev->mdev))
+ MLX5_SET(dctc, dctc, port,
+ get_tx_affinity_rr(dev, udata));
+ else
+ MLX5_SET(dctc, dctc, port, attr->port_num);
set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1);
MLX5_SET(dctc, dctc, counter_set_id, set_id);
return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
}
-void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
- mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
+ return mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
}
static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
int err;
size_t required_cmd_sz;
- required_cmd_sz = offsetof(typeof(ucmd), single_stride_log_num_of_bytes)
- + sizeof(ucmd.single_stride_log_num_of_bytes);
+ required_cmd_sz = offsetofend(struct mlx5_ib_create_wq,
+ single_stride_log_num_of_bytes);
if (udata->inlen < required_cmd_sz) {
mlx5_ib_dbg(dev, "invalid inlen\n");
return -EINVAL;
if (!udata)
return ERR_PTR(-ENOSYS);
- min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ min_resp_len = offsetofend(struct mlx5_ib_create_wq_resp, reserved);
if (udata->outlen && udata->outlen < min_resp_len)
return ERR_PTR(-EINVAL);
rwq->ibwq.wq_num = rwq->core_qp.qpn;
rwq->ibwq.state = IB_WQS_RESET;
if (udata->outlen) {
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
+ resp.response_length = offsetofend(
+ struct mlx5_ib_create_wq_resp, response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto err_copy;
return ERR_PTR(err);
}
-void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
+int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(wq->device);
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
+ int ret;
- mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
+ ret = mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
+ if (ret)
+ return ret;
destroy_user_rq(dev, wq->pd, rwq, udata);
kfree(rwq);
+ return 0;
}
-struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata)
+int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(device);
- struct mlx5_ib_rwq_ind_table *rwq_ind_tbl;
+ struct mlx5_ib_rwq_ind_table *rwq_ind_tbl =
+ to_mrwq_ind_table(ib_rwq_ind_table);
+ struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_table->device);
int sz = 1 << init_attr->log_ind_tbl_size;
struct mlx5_ib_create_rwq_ind_tbl_resp resp = {};
size_t min_resp_len;
if (udata->inlen > 0 &&
!ib_is_udata_cleared(udata, 0,
udata->inlen))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (init_attr->log_ind_tbl_size >
MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
init_attr->log_ind_tbl_size,
MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
- min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ min_resp_len =
+ offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, reserved);
if (udata->outlen && udata->outlen < min_resp_len)
- return ERR_PTR(-EINVAL);
-
- rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL);
- if (!rwq_ind_tbl)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
in = kvzalloc(inlen, GFP_KERNEL);
- if (!in) {
- err = -ENOMEM;
- goto err;
- }
+ if (!in)
+ return -ENOMEM;
rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
kvfree(in);
-
if (err)
- goto err;
+ return err;
rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn;
if (udata->outlen) {
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
+ resp.response_length =
+ offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp,
+ response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto err_copy;
}
- return &rwq_ind_tbl->ib_rwq_ind_tbl;
+ return 0;
err_copy:
mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
-err:
- kfree(rwq_ind_tbl);
- return ERR_PTR(err);
+ return err;
}
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
- mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
-
- kfree(rwq_ind_tbl);
- return 0;
+ return mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
}
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
void *rqc;
void *in;
- required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
+ required_cmd_sz = offsetofend(struct mlx5_ib_modify_wq, reserved);
if (udata->inlen < required_cmd_sz)
return -EINVAL;
int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev, u32 timeout_usec);
-void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
- struct mlx5_core_qp *rq);
+int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *rq);
int mlx5_core_create_sq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
struct mlx5_core_qp *sq);
void mlx5_core_destroy_sq_tracked(struct mlx5_ib_dev *dev,
return err;
}
-void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
- struct mlx5_core_qp *rq)
+int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *rq)
{
destroy_resource_common(dev, rq);
destroy_rq_tracked(dev, rq->qpn, rq->uid);
+ return 0;
}
static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
return ret;
}
-void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(srq->device);
struct mlx5_ib_srq *msrq = to_msrq(srq);
+ int ret;
+
+ ret = mlx5_cmd_destroy_srq(dev, &msrq->msrq);
+ if (ret)
+ return ret;
- mlx5_cmd_destroy_srq(dev, &msrq->msrq);
-
- if (srq->uobject) {
- mlx5_ib_db_unmap_user(
- rdma_udata_to_drv_context(
- udata,
- struct mlx5_ib_ucontext,
- ibucontext),
- &msrq->db);
- ib_umem_release(msrq->umem);
- } else {
+ if (udata)
+ destroy_srq_user(srq->pd, msrq, udata);
+ else
destroy_srq_kernel(dev, msrq);
- }
+ return 0;
}
void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in);
-void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out);
int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
return err;
}
-void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
struct mlx5_srq_table *table = &dev->srq_table;
struct mlx5_core_srq *tmp;
int err;
- tmp = xa_erase_irq(&table->array, srq->srqn);
- if (!tmp || tmp != srq)
- return;
+ /* Delete entry, but leave index occupied */
+ tmp = xa_cmpxchg_irq(&table->array, srq->srqn, srq, XA_ZERO_ENTRY, 0);
+ if (WARN_ON(tmp != srq))
+ return xa_err(tmp) ?: -EINVAL;
err = destroy_srq_split(dev, srq);
- if (err)
- return;
+ if (err) {
+ /*
+ * We don't need to check returned result for an error,
+ * because we are storing in pre-allocated space xarray
+ * entry and it can't fail at this stage.
+ */
+ xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0);
+ return err;
+ }
+ xa_erase_irq(&table->array, srq->srqn);
mlx5_core_res_put(&srq->common);
wait_for_completion(&srq->common.free);
+ return 0;
}
int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
seg->status = MLX5_MKEY_STATUS_FREE;
}
-static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
+static void set_reg_mkey_segment(struct mlx5_ib_dev *dev,
+ struct mlx5_mkey_seg *seg,
const struct ib_send_wr *wr)
{
const struct mlx5_umr_wr *umrwr = umr_wr(wr);
MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
MLX5_SET(mkc, seg, lr, 1);
- MLX5_SET(mkc, seg, relaxed_ordering_write,
- !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
- MLX5_SET(mkc, seg, relaxed_ordering_read,
- !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ MLX5_SET(mkc, seg, relaxed_ordering_write,
+ !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ MLX5_SET(mkc, seg, relaxed_ordering_read,
+ !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
if (umrwr->pd)
MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
u8 flags = 0;
- if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) {
- mlx5_ib_warn(to_mdev(qp->ibqp.device),
- "Fast update of %s for MR is disabled\n",
- (MLX5_CAP_GEN(dev->mdev,
- umr_modify_entity_size_disabled)) ?
- "entity size" :
- "atomic access");
+ /* Matches access in mlx5_set_umr_free_mkey() */
+ if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) {
+ mlx5_ib_warn(
+ to_mdev(qp->ibqp.device),
+ "Fast update for MR access flags is not possible\n");
return -EINVAL;
}
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- set_reg_mkey_segment(*seg, wr);
+ set_reg_mkey_segment(dev, *seg, wr);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
struct ib_qp_cap *cap,
int qpn,
int port,
- struct mthca_sqp *sqp,
+ struct mthca_qp *qp,
struct ib_udata *udata);
void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
int mthca_create_ah(struct mthca_dev *dev,
return 0;
}
-static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
+ return 0;
}
static int mthca_ah_create(struct ib_ah *ibah,
init_attr->ah_attr, ah);
}
-static void mthca_ah_destroy(struct ib_ah *ah, u32 flags)
+static int mthca_ah_destroy(struct ib_ah *ah, u32 flags)
{
mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
+ return 0;
}
static int mthca_create_srq(struct ib_srq *ibsrq,
return 0;
}
-static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
if (udata) {
struct mthca_ucontext *context =
}
mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
+ return 0;
}
static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
case IB_QPT_SMI:
case IB_QPT_GSI:
{
- /* Don't allow userspace to create special QPs */
- if (udata)
- return ERR_PTR(-EINVAL);
-
- qp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL);
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
+ qp->sqp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL);
+ if (!qp->sqp) {
+ kfree(qp);
+ return ERR_PTR(-ENOMEM);
+ }
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
to_mcq(init_attr->recv_cq),
init_attr->sq_sig_type, &init_attr->cap,
qp->ibqp.qp_num, init_attr->port_num,
- to_msqp(qp), udata);
+ qp, udata);
break;
}
default:
}
if (err) {
+ kfree(qp->sqp);
kfree(qp);
return ERR_PTR(err);
}
to_mqp(qp)->rq.db_index);
}
mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
- kfree(qp);
+ kfree(to_mqp(qp)->sqp);
+ kfree(to_mqp(qp));
return 0;
}
return ret;
}
-static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
if (udata) {
struct mthca_ucontext *context =
to_mcq(cq)->set_ci_db_index);
}
mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
+ return 0;
}
static inline u32 convert_access(int acc)
u64 virt, int acc, struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(pd->device);
- struct sg_dma_page_iter sg_iter;
+ struct ib_block_iter biter;
struct mthca_ucontext *context = rdma_udata_to_drv_context(
udata, struct mthca_ucontext, ibucontext);
struct mthca_mr *mr;
goto err;
}
- n = ib_umem_num_pages(mr->umem);
+ n = ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE);
mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) {
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
- for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) {
- pages[i++] = sg_page_iter_dma_address(&sg_iter);
+ rdma_umem_for_each_dma_block(mr->umem, &biter, PAGE_SIZE) {
+ pages[i++] = rdma_block_iter_dma_address(&biter);
/*
* Be friendly to write_mtt and pass it chunks
__be32 *db;
};
+struct mthca_sqp {
+ int pkey_index;
+ u32 qkey;
+ u32 send_psn;
+ struct ib_ud_header ud_header;
+ int header_buf_size;
+ void *header_buf;
+ dma_addr_t header_dma;
+};
+
struct mthca_qp {
struct ib_qp ibqp;
int refcount;
wait_queue_head_t wait;
struct mutex mutex;
-};
-
-struct mthca_sqp {
- struct mthca_qp qp;
- int pkey_index;
- u32 qkey;
- u32 send_psn;
- struct ib_ud_header ud_header;
- int header_buf_size;
- void *header_buf;
- dma_addr_t header_dma;
+ struct mthca_sqp *sqp;
};
static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
return container_of(ibqp, struct mthca_qp, ibqp);
}
-static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp)
-{
- return container_of(qp, struct mthca_sqp, qp);
-}
-
#endif /* MTHCA_PROVIDER_H */
qp->alt_port = attr->alt_port_num;
if (is_sqp(dev, qp))
- store_attrs(to_msqp(qp), attr, attr_mask);
+ store_attrs(qp->sqp, attr, attr_mask);
/*
* If we moved QP0 to RTR, bring the IB link up; if we moved
struct ib_qp_cap *cap,
int qpn,
int port,
- struct mthca_sqp *sqp,
+ struct mthca_qp *qp,
struct ib_udata *udata)
{
u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
int err;
- sqp->qp.transport = MLX;
- err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
+ qp->transport = MLX;
+ err = mthca_set_qp_size(dev, cap, pd, qp);
if (err)
return err;
- sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
- sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
- &sqp->header_dma, GFP_KERNEL);
- if (!sqp->header_buf)
+ qp->sqp->header_buf_size = qp->sq.max * MTHCA_UD_HEADER_SIZE;
+ qp->sqp->header_buf =
+ dma_alloc_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
+ &qp->sqp->header_dma, GFP_KERNEL);
+ if (!qp->sqp->header_buf)
return -ENOMEM;
spin_lock_irq(&dev->qp_table.lock);
if (mthca_array_get(&dev->qp_table.qp, mqpn))
err = -EBUSY;
else
- mthca_array_set(&dev->qp_table.qp, mqpn, sqp);
+ mthca_array_set(&dev->qp_table.qp, mqpn, qp->sqp);
spin_unlock_irq(&dev->qp_table.lock);
if (err)
goto err_out;
- sqp->qp.port = port;
- sqp->qp.qpn = mqpn;
- sqp->qp.transport = MLX;
+ qp->port = port;
+ qp->qpn = mqpn;
+ qp->transport = MLX;
err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
- send_policy, &sqp->qp, udata);
+ send_policy, qp, udata);
if (err)
goto err_out_free;
mthca_unlock_cqs(send_cq, recv_cq);
- err_out:
- dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
- sqp->header_buf, sqp->header_dma);
-
+err_out:
+ dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
+ qp->sqp->header_buf, qp->sqp->header_dma);
return err;
}
if (is_sqp(dev, qp)) {
atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
- dma_free_coherent(&dev->pdev->dev,
- to_msqp(qp)->header_buf_size,
- to_msqp(qp)->header_buf,
- to_msqp(qp)->header_dma);
+ dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
+ qp->sqp->header_buf, qp->sqp->header_dma);
} else
mthca_free(&dev->qp_table.alloc, qp->qpn);
}
/* Create UD header for an MLX send and build a data segment for it */
-static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
- int ind, const struct ib_ud_wr *wr,
+static int build_mlx_header(struct mthca_dev *dev, struct mthca_qp *qp, int ind,
+ const struct ib_ud_wr *wr,
struct mthca_mlx_seg *mlx,
struct mthca_data_seg *data)
{
+ struct mthca_sqp *sqp = qp->sqp;
int header_size;
int err;
u16 pkey;
if (err)
return err;
mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
- mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
+ mlx->flags |= cpu_to_be32((!qp->ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
(sqp->ud_header.lrh.destination_lid ==
IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) |
(sqp->ud_header.lrh.service_level << 8));
return -EINVAL;
}
- sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
+ sqp->ud_header.lrh.virtual_lane = !qp->ibqp.qp_num ? 15 : 0;
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
- if (!sqp->qp.ibqp.qp_num)
- ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
- sqp->pkey_index, &pkey);
+ if (!qp->ibqp.qp_num)
+ ib_get_cached_pkey(&dev->ib_dev, qp->port, sqp->pkey_index,
+ &pkey);
else
- ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
- wr->pkey_index, &pkey);
+ ib_get_cached_pkey(&dev->ib_dev, qp->port, wr->pkey_index,
+ &pkey);
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
sqp->qkey : wr->remote_qkey);
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num);
header_size = ib_ud_header_pack(&sqp->ud_header,
sqp->header_buf +
ind * MTHCA_UD_HEADER_SIZE);
data->byte_count = cpu_to_be32(header_size);
- data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey);
+ data->lkey = cpu_to_be32(to_mpd(qp->ibqp.pd)->ntmr.ibmr.lkey);
data->addr = cpu_to_be64(sqp->header_dma +
ind * MTHCA_UD_HEADER_SIZE);
break;
case MLX:
- err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
- wqe - sizeof (struct mthca_next_seg),
- wqe);
+ err = build_mlx_header(
+ dev, qp, ind, ud_wr(wr),
+ wqe - sizeof(struct mthca_next_seg), wqe);
if (err) {
*bad_wr = wr;
goto out;
break;
case MLX:
- err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
- wqe - sizeof (struct mthca_next_seg),
- wqe);
+ err = build_mlx_header(
+ dev, qp, ind, ud_wr(wr),
+ wqe - sizeof(struct mthca_next_seg), wqe);
if (err) {
*bad_wr = wr;
goto out;
u32 num_pbes;
u32 pbl_size;
u32 pbe_size;
- u64 fbo;
u64 va;
};
return status;
}
-void ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags)
+int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device);
ocrdma_free_av(dev, ah);
+ return 0;
}
int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
-void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags);
+int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags);
int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags,
int i;
struct ocrdma_reg_nsmr *cmd;
struct ocrdma_reg_nsmr_rsp *rsp;
+ u64 fbo = hwmr->va & (hwmr->pbe_size - 1);
cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR, sizeof(*cmd));
if (!cmd)
OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT;
cmd->totlen_low = hwmr->len;
cmd->totlen_high = upper_32_bits(hwmr->len);
- cmd->fbo_low = (u32) (hwmr->fbo & 0xffffffff);
- cmd->fbo_high = (u32) upper_32_bits(hwmr->fbo);
+ cmd->fbo_low = lower_32_bits(fbo);
+ cmd->fbo_high = upper_32_bits(fbo);
cmd->va_loaddr = (u32) hwmr->va;
cmd->va_hiaddr = (u32) upper_32_bits(hwmr->va);
}
static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
- u8 *ib_speed, u8 *ib_width)
+ u16 *ib_speed, u8 *ib_width)
{
int status;
u8 speed;
return status;
}
-void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
if (is_ucontext_pd(uctx, pd)) {
ocrdma_release_ucontext_pd(uctx);
- return;
+ return 0;
}
}
_ocrdma_dealloc_pd(dev, pd);
+ return 0;
}
static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
return status;
}
-static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
- u32 num_pbes)
+static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr)
{
struct ocrdma_pbe *pbe;
- struct sg_dma_page_iter sg_iter;
+ struct ib_block_iter biter;
struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
- struct ib_umem *umem = mr->umem;
- int pbe_cnt, total_num_pbes = 0;
+ int pbe_cnt;
u64 pg_addr;
if (!mr->hwmr.num_pbes)
pbe = (struct ocrdma_pbe *)pbl_tbl->va;
pbe_cnt = 0;
- for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
+ rdma_umem_for_each_dma_block (mr->umem, &biter, PAGE_SIZE) {
/* store the page address in pbe */
- pg_addr = sg_page_iter_dma_address(&sg_iter);
+ pg_addr = rdma_block_iter_dma_address(&biter);
pbe->pa_lo = cpu_to_le32(pg_addr);
pbe->pa_hi = cpu_to_le32(upper_32_bits(pg_addr));
pbe_cnt += 1;
- total_num_pbes += 1;
pbe++;
- /* if done building pbes, issue the mbx cmd. */
- if (total_num_pbes == num_pbes)
- return;
-
/* if the given pbl is full storing the pbes,
* move to next pbl.
*/
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
struct ocrdma_mr *mr;
struct ocrdma_pd *pd;
- u32 num_pbes;
pd = get_ocrdma_pd(ibpd);
status = -EFAULT;
goto umem_err;
}
- num_pbes = ib_umem_page_count(mr->umem);
- status = ocrdma_get_pbl_info(dev, mr, num_pbes);
+ status = ocrdma_get_pbl_info(
+ dev, mr, ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE));
if (status)
goto umem_err;
mr->hwmr.pbe_size = PAGE_SIZE;
- mr->hwmr.fbo = ib_umem_offset(mr->umem);
mr->hwmr.va = usr_addr;
mr->hwmr.len = len;
mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
if (status)
goto umem_err;
- build_user_pbes(dev, mr, num_pbes);
+ build_user_pbes(dev, mr);
status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
if (status)
goto mbx_err;
spin_unlock_irqrestore(&cq->cq_lock, flags);
}
-void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
struct ocrdma_eq *eq = NULL;
ocrdma_get_db_addr(dev, pdid),
dev->nic_info.db_page_size);
}
+ return 0;
}
static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
return status;
}
-void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct ocrdma_srq *srq;
struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
kfree(srq->idx_bit_fields);
kfree(srq->rqe_wr_id_tbl);
+ return 0;
}
/* unprivileged verbs and their support functions. */
int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
-void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
struct ib_qp *ocrdma_create_qp(struct ib_pd *,
struct ib_qp_init_attr *attrs,
int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *,
enum ib_srq_attr_mask, struct ib_udata *);
int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *);
-void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *,
const struct ib_recv_wr **bad_recv_wr);
}
static const struct ib_device_ops qedr_roce_dev_ops = {
+ .alloc_xrcd = qedr_alloc_xrcd,
+ .dealloc_xrcd = qedr_dealloc_xrcd,
.get_port_immutable = qedr_roce_port_immutable,
.query_pkey = qedr_query_pkey,
};
dev->ibdev.node_type = RDMA_NODE_IB_CA;
ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops);
+
+ dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(OPEN_XRCD) |
+ QEDR_UVERBS(CLOSE_XRCD) |
+ QEDR_UVERBS(CREATE_XSRQ);
}
static const struct ib_device_ops qedr_dev_ops = {
INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, qedr_srq, ibsrq),
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, qedr_xrcd, ibxrcd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext),
};
qed_attr = dev->ops->rdma_query_device(dev->rdma_ctx);
/* Part 2 - check capabilities */
- page_size = ~dev->attr.page_size_caps + 1;
+ page_size = ~qed_attr->page_size_caps + 1;
if (page_size > PAGE_SIZE) {
DP_ERR(dev,
"Kernel PAGE_SIZE is %ld which is smaller than minimum page size (%d) required by qedr\n",
event.event = IB_EVENT_SRQ_ERR;
event_type = EVENT_TYPE_SRQ;
break;
+ case ROCE_ASYNC_EVENT_XRC_DOMAIN_ERR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ event_type = EVENT_TYPE_QP;
+ break;
+ case ROCE_ASYNC_EVENT_INVALID_XRCETH_ERR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ event_type = EVENT_TYPE_QP;
+ break;
+ case ROCE_ASYNC_EVENT_XRC_SRQ_CATASTROPHIC_ERR:
+ event.event = IB_EVENT_CQ_ERR;
+ event_type = EVENT_TYPE_CQ;
+ break;
default:
DP_ERR(dev, "unsupported event %d on handle=%llx\n",
e_code, roce_handle64);
case QEDE_CHANGE_ADDR:
qedr_mac_address_change(dev);
break;
+ case QEDE_CHANGE_MTU:
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ if (dev->ndev->mtu != dev->iwarp_max_mtu)
+ DP_NOTICE(dev,
+ "Mtu was changed from %d to %d. This will not take affect for iWARP until qedr is reloaded\n",
+ dev->iwarp_max_mtu, dev->ndev->mtu);
+ break;
default:
pr_err("Event not supported\n");
}
struct qedr_ucontext *uctx;
};
+struct qedr_xrcd {
+ struct ib_xrcd ibxrcd;
+ u16 xrcd_id;
+};
+
struct qedr_qp_hwq_info {
/* WQE Elements */
struct qed_chain pbl;
struct ib_umem *prod_umem;
u16 srq_id;
u32 srq_limit;
+ bool is_xrc;
/* lock to protect srq recv post */
spinlock_t lock;
};
return container_of(ibpd, struct qedr_pd, ibpd);
}
+static inline struct qedr_xrcd *get_qedr_xrcd(struct ib_xrcd *ibxrcd)
+{
+ return container_of(ibxrcd, struct qedr_xrcd, ibxrcd);
+}
+
static inline struct qedr_cq *get_qedr_cq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct qedr_cq, ibcq);
return container_of(ibsrq, struct qedr_srq, ibsrq);
}
+static inline bool qedr_qp_has_srq(struct qedr_qp *qp)
+{
+ return qp->srq;
+}
+
+static inline bool qedr_qp_has_sq(struct qedr_qp *qp)
+{
+ if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_TGT)
+ return 0;
+
+ return 1;
+}
+
+static inline bool qedr_qp_has_rq(struct qedr_qp *qp)
+{
+ if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_INI ||
+ qp->qp_type == IB_QPT_XRC_TGT || qedr_qp_has_srq(qp))
+ return 0;
+
+ return 1;
+}
+
static inline struct qedr_user_mmap_entry *
get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry)
{
struct qedr_dev *dev = ep->dev;
struct qedr_qp *qp;
struct qed_iwarp_accept_in params;
- int rc = 0;
+ int rc;
DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn);
params.ord = conn_param->ord;
if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
- &qp->iwarp_cm_flags))
+ &qp->iwarp_cm_flags)) {
+ rc = -EINVAL;
goto err; /* QP already destroyed */
+ }
rc = dev->ops->iwarp_accept(dev->rdma_ctx, ¶ms);
if (rc) {
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
+ if (!rdma_protocol_iwarp(&dev->ibdev, 1))
+ attr->device_cap_flags |= IB_DEVICE_XRC;
attr->max_send_sge = qattr->max_sge;
attr->max_recv_sge = qattr->max_sge;
attr->max_sge_rd = qattr->max_sge;
attr->local_ca_ack_delay = qattr->dev_ack_delay;
attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
- attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
+ attr->max_pkeys = qattr->max_pkey;
attr->max_ah = qattr->max_ah;
return 0;
}
-static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
+static inline void get_link_speed_and_width(int speed, u16 *ib_speed,
u8 *ib_width)
{
switch (speed) {
attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
}
attr->max_mtu = IB_MTU_4096;
- attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
attr->lid = 0;
attr->lmc = 0;
attr->sm_lid = 0;
attr->sm_sl = 0;
attr->ip_gids = true;
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ attr->active_mtu = iboe_get_mtu(dev->iwarp_max_mtu);
attr->gid_tbl_len = 1;
} else {
+ attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
attr->gid_tbl_len = QEDR_MAX_SGID;
attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
}
return 0;
}
-void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct qedr_dev *dev = get_qedr_dev(ibpd->device);
struct qedr_pd *pd = get_qedr_pd(ibpd);
DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
+ return 0;
+}
+
+
+int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
+ struct qedr_xrcd *xrcd = get_qedr_xrcd(ibxrcd);
+
+ return dev->ops->rdma_alloc_xrcd(dev->rdma_ctx, &xrcd->xrcd_id);
}
+int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
+ u16 xrcd_id = get_qedr_xrcd(ibxrcd)->xrcd_id;
+
+ dev->ops->rdma_dealloc_xrcd(dev->rdma_ctx, xrcd_id);
+ return 0;
+}
static void qedr_free_pbl(struct qedr_dev *dev,
struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
{
struct qedr_pbl_info *pbl_info, u32 pg_shift)
{
int pbe_cnt, total_num_pbes = 0;
- u32 fw_pg_cnt, fw_pg_per_umem_pg;
struct qedr_pbl *pbl_tbl;
- struct sg_dma_page_iter sg_iter;
+ struct ib_block_iter biter;
struct regpair *pbe;
- u64 pg_addr;
if (!pbl_info->num_pbes)
return;
pbe_cnt = 0;
- fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
+ rdma_umem_for_each_dma_block (umem, &biter, BIT(pg_shift)) {
+ u64 pg_addr = rdma_block_iter_dma_address(&biter);
- for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
- pg_addr = sg_page_iter_dma_address(&sg_iter);
- for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
- pbe->lo = cpu_to_le32(pg_addr);
- pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
+ pbe->lo = cpu_to_le32(pg_addr);
+ pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
- pg_addr += BIT(pg_shift);
- pbe_cnt++;
- total_num_pbes++;
- pbe++;
+ pbe_cnt++;
+ total_num_pbes++;
+ pbe++;
- if (total_num_pbes == pbl_info->num_pbes)
- return;
+ if (total_num_pbes == pbl_info->num_pbes)
+ return;
- /* If the given pbl is full storing the pbes,
- * move to next pbl.
- */
- if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct regpair *)pbl_tbl->va;
- pbe_cnt = 0;
- }
-
- fw_pg_cnt++;
+ /* If the given pbl is full storing the pbes, move to next pbl.
+ */
+ if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct regpair *)pbl_tbl->va;
+ pbe_cnt = 0;
}
}
}
return PTR_ERR(q->umem);
}
- fw_pages = ib_umem_page_count(q->umem) <<
- (PAGE_SHIFT - FW_PAGE_SHIFT);
-
+ fw_pages = ib_umem_num_dma_blocks(q->umem, 1 << FW_PAGE_SHIFT);
rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
if (rc)
goto err0;
/* Generate doorbell address. */
cq->db.data.icid = cq->icid;
cq->db_addr = dev->db_addr + db_offset;
- cq->db.data.params = DB_AGG_CMD_SET <<
+ cq->db.data.params = DB_AGG_CMD_MAX <<
RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
/* point to the very last element, passing it we will toggle */
#define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
#define QEDR_DESTROY_CQ_ITER_DURATION (10)
-void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct qedr_dev *dev = get_qedr_dev(ibcq->device);
struct qed_rdma_destroy_cq_out_params oparams;
/* GSIs CQs are handled by driver, so they don't exist in the FW */
if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
- return;
+ return 0;
}
iparams.icid = cq->icid;
* Since the destroy CQ ramrod has also been received on the EQ we can
* be certain that there's no event handler in process.
*/
+ return 0;
}
static inline int get_gid_info_from_table(struct ib_qp *ibqp,
SET_FIELD(qp_params->modify_flags,
QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
break;
- case RDMA_NETWORK_IB:
+ case RDMA_NETWORK_ROCE_V1:
memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
sizeof(qp_params->sgid));
memcpy(&qp_params->dgid.bytes[0],
QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
qp_params->roce_mode = ROCE_V2_IPV4;
break;
+ default:
+ return -EINVAL;
}
for (i = 0; i < 4; i++) {
struct qedr_device_attr *qattr = &dev->attr;
/* QP0... attrs->qp_type == IB_QPT_GSI */
- if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
+ if (attrs->qp_type != IB_QPT_RC &&
+ attrs->qp_type != IB_QPT_GSI &&
+ attrs->qp_type != IB_QPT_XRC_INI &&
+ attrs->qp_type != IB_QPT_XRC_TGT) {
DP_DEBUG(dev, QEDR_MSG_QP,
"create qp: unsupported qp type=0x%x requested\n",
attrs->qp_type);
return -EINVAL;
}
- /* Unprivileged user space cannot create special QP */
- if (udata && attrs->qp_type == IB_QPT_GSI) {
- DP_ERR(dev,
- "create qp: userspace can't create special QPs of type=0x%x\n",
- attrs->qp_type);
- return -EINVAL;
+ /* verify consumer QPs are not trying to use GSI QP's CQ.
+ * TGT QP isn't associated with RQ/SQ
+ */
+ if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) &&
+ (attrs->qp_type != IB_QPT_XRC_TGT)) {
+ struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq);
+ struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq);
+
+ if ((send_cq->cq_type == QEDR_CQ_TYPE_GSI) ||
+ (recv_cq->cq_type == QEDR_CQ_TYPE_GSI)) {
+ DP_ERR(dev,
+ "create qp: consumer QP cannot use GSI CQs.\n");
+ return -EINVAL;
+ }
}
return 0;
}
static void qedr_copy_rq_uresp(struct qedr_dev *dev,
- struct qedr_create_qp_uresp *uresp,
- struct qedr_qp *qp)
+ struct qedr_create_qp_uresp *uresp,
+ struct qedr_qp *qp)
{
/* iWARP requires two doorbells per RQ. */
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
int rc;
memset(uresp, 0, sizeof(*uresp));
- qedr_copy_sq_uresp(dev, uresp, qp);
- qedr_copy_rq_uresp(dev, uresp, qp);
+
+ if (qedr_qp_has_sq(qp))
+ qedr_copy_sq_uresp(dev, uresp, qp);
+
+ if (qedr_qp_has_rq(qp))
+ qedr_copy_rq_uresp(dev, uresp, qp);
uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
uresp->qp_id = qp->qp_id;
kref_init(&qp->refcnt);
init_completion(&qp->iwarp_cm_comp);
}
+
qp->pd = pd;
qp->qp_type = attrs->qp_type;
qp->max_inline_data = attrs->cap.max_inline_data;
- qp->sq.max_sges = attrs->cap.max_send_sge;
qp->state = QED_ROCE_QP_STATE_RESET;
qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
- qp->sq_cq = get_qedr_cq(attrs->send_cq);
qp->dev = dev;
+ if (qedr_qp_has_sq(qp)) {
+ qp->sq.max_sges = attrs->cap.max_send_sge;
+ qp->sq_cq = get_qedr_cq(attrs->send_cq);
+ DP_DEBUG(dev, QEDR_MSG_QP,
+ "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
+ qp->sq.max_sges, qp->sq_cq->icid);
+ }
- if (attrs->srq) {
+ if (attrs->srq)
qp->srq = get_qedr_srq(attrs->srq);
- } else {
+
+ if (qedr_qp_has_rq(qp)) {
qp->rq_cq = get_qedr_cq(attrs->recv_cq);
qp->rq.max_sges = attrs->cap.max_recv_sge;
DP_DEBUG(dev, QEDR_MSG_QP,
static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
{
- int rc;
+ int rc = 0;
- qp->sq.db = dev->db_addr +
- DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
- qp->sq.db_data.data.icid = qp->icid + 1;
- rc = qedr_db_recovery_add(dev, qp->sq.db,
- &qp->sq.db_data,
- DB_REC_WIDTH_32B,
- DB_REC_KERNEL);
- if (rc)
- return rc;
+ if (qedr_qp_has_sq(qp)) {
+ qp->sq.db = dev->db_addr +
+ DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
+ qp->sq.db_data.data.icid = qp->icid + 1;
+ rc = qedr_db_recovery_add(dev, qp->sq.db, &qp->sq.db_data,
+ DB_REC_WIDTH_32B, DB_REC_KERNEL);
+ if (rc)
+ return rc;
+ }
- if (!qp->srq) {
+ if (qedr_qp_has_rq(qp)) {
qp->rq.db = dev->db_addr +
DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
qp->rq.db_data.data.icid = qp->icid;
-
- rc = qedr_db_recovery_add(dev, qp->rq.db,
- &qp->rq.db_data,
- DB_REC_WIDTH_32B,
- DB_REC_KERNEL);
- if (rc)
- qedr_db_recovery_del(dev, qp->sq.db,
- &qp->sq.db_data);
+ rc = qedr_db_recovery_add(dev, qp->rq.db, &qp->rq.db_data,
+ DB_REC_WIDTH_32B, DB_REC_KERNEL);
+ if (rc && qedr_qp_has_sq(qp))
+ qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
}
return rc;
DP_ERR(dev,
"create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
attrs->attr.max_sge, qattr->max_sge);
+ }
+
+ if (!udata && attrs->srq_type == IB_SRQT_XRC) {
+ DP_ERR(dev, "XRC SRQs are not supported in kernel-space\n");
return -EINVAL;
}
return -EINVAL;
srq->dev = dev;
+ srq->is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
hw_srq = &srq->hw_srq;
spin_lock_init(&srq->lock);
in_params.prod_pair_addr = phy_prod_pair_addr;
in_params.num_pages = page_cnt;
in_params.page_size = page_size;
+ if (srq->is_xrc) {
+ struct qedr_xrcd *xrcd = get_qedr_xrcd(init_attr->ext.xrc.xrcd);
+ struct qedr_cq *cq = get_qedr_cq(init_attr->ext.cq);
+
+ in_params.is_xrc = 1;
+ in_params.xrcd_id = xrcd->xrcd_id;
+ in_params.cq_cid = cq->icid;
+ }
rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
if (rc)
return -EFAULT;
}
-void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct qed_rdma_destroy_srq_in_params in_params = {};
struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
xa_erase_irq(&dev->srqs, srq->srq_id);
in_params.srq_id = srq->srq_id;
+ in_params.is_xrc = srq->is_xrc;
dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
if (ibsrq->uobject)
DP_DEBUG(dev, QEDR_MSG_SRQ,
"destroy srq: destroyed srq with srq_id=0x%0x\n",
srq->srq_id);
+ return 0;
}
int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
return 0;
}
+static enum qed_rdma_qp_type qedr_ib_to_qed_qp_type(enum ib_qp_type ib_qp_type)
+{
+ switch (ib_qp_type) {
+ case IB_QPT_RC:
+ return QED_RDMA_QP_TYPE_RC;
+ case IB_QPT_XRC_INI:
+ return QED_RDMA_QP_TYPE_XRC_INI;
+ case IB_QPT_XRC_TGT:
+ return QED_RDMA_QP_TYPE_XRC_TGT;
+ default:
+ return QED_RDMA_QP_TYPE_INVAL;
+ }
+}
+
static inline void
qedr_init_common_qp_in_params(struct qedr_dev *dev,
struct qedr_pd *pd,
params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
- params->pd = pd->pd_id;
- params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
- params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
+ params->qp_type = qedr_ib_to_qed_qp_type(attrs->qp_type);
params->stats_queue = 0;
- params->srq_id = 0;
- params->use_srq = false;
- if (!qp->srq) {
+ if (pd) {
+ params->pd = pd->pd_id;
+ params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
+ }
+
+ if (qedr_qp_has_sq(qp))
+ params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
+
+ if (qedr_qp_has_rq(qp))
params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
- } else {
+ if (qedr_qp_has_srq(qp)) {
params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
params->srq_id = qp->srq->srq_id;
params->use_srq = true;
+ } else {
+ params->srq_id = 0;
+ params->use_srq = false;
}
}
"rq_len=%zd"
"\n",
qp,
- qp->usq.buf_addr,
- qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
+ qedr_qp_has_sq(qp) ? qp->usq.buf_addr : 0x0,
+ qedr_qp_has_sq(qp) ? qp->usq.buf_len : 0,
+ qedr_qp_has_rq(qp) ? qp->urq.buf_addr : 0x0,
+ qedr_qp_has_sq(qp) ? qp->urq.buf_len : 0);
}
static inline void
struct qedr_ucontext *ctx,
struct qedr_qp *qp)
{
- ib_umem_release(qp->usq.umem);
- qp->usq.umem = NULL;
+ if (qedr_qp_has_sq(qp)) {
+ ib_umem_release(qp->usq.umem);
+ qp->usq.umem = NULL;
+ }
- ib_umem_release(qp->urq.umem);
- qp->urq.umem = NULL;
+ if (qedr_qp_has_rq(qp)) {
+ ib_umem_release(qp->urq.umem);
+ qp->urq.umem = NULL;
+ }
if (rdma_protocol_roce(&dev->ibdev, 1)) {
qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
{
struct qed_rdma_create_qp_in_params in_params;
struct qed_rdma_create_qp_out_params out_params;
- struct qedr_pd *pd = get_qedr_pd(ibpd);
- struct qedr_create_qp_uresp uresp;
- struct qedr_ucontext *ctx = pd ? pd->uctx : NULL;
- struct qedr_create_qp_ureq ureq;
+ struct qedr_create_qp_uresp uresp = {};
+ struct qedr_create_qp_ureq ureq = {};
int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
- int rc = -EINVAL;
+ struct qedr_ucontext *ctx = NULL;
+ struct qedr_pd *pd = NULL;
+ int rc = 0;
qp->create_type = QEDR_QP_CREATE_USER;
- memset(&ureq, 0, sizeof(ureq));
- rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen));
- if (rc) {
- DP_ERR(dev, "Problem copying data from user space\n");
- return rc;
+
+ if (ibpd) {
+ pd = get_qedr_pd(ibpd);
+ ctx = pd->uctx;
}
- /* SQ - read access only (0) */
- rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
- ureq.sq_len, true, 0, alloc_and_init);
- if (rc)
- return rc;
+ if (udata) {
+ rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
+ udata->inlen));
+ if (rc) {
+ DP_ERR(dev, "Problem copying data from user space\n");
+ return rc;
+ }
+ }
- if (!qp->srq) {
+ if (qedr_qp_has_sq(qp)) {
+ /* SQ - read access only (0) */
+ rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
+ ureq.sq_len, true, 0, alloc_and_init);
+ if (rc)
+ return rc;
+ }
+
+ if (qedr_qp_has_rq(qp)) {
/* RQ - read access only (0) */
rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
ureq.rq_len, true, 0, alloc_and_init);
qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
in_params.qp_handle_lo = ureq.qp_handle_lo;
in_params.qp_handle_hi = ureq.qp_handle_hi;
- in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
- in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
- if (!qp->srq) {
+
+ if (qp->qp_type == IB_QPT_XRC_TGT) {
+ struct qedr_xrcd *xrcd = get_qedr_xrcd(attrs->xrcd);
+
+ in_params.xrcd_id = xrcd->xrcd_id;
+ in_params.qp_handle_lo = qp->qp_id;
+ in_params.use_srq = 1;
+ }
+
+ if (qedr_qp_has_sq(qp)) {
+ in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
+ in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
+ }
+
+ if (qedr_qp_has_rq(qp)) {
in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
}
qp->qp_id = out_params.qp_id;
qp->icid = out_params.icid;
- rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
- if (rc)
- goto err;
+ if (udata) {
+ rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
+ if (rc)
+ goto err;
+ }
/* db offset was calculated in copy_qp_uresp, now set in the user q */
- ctx = pd->uctx;
- qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
- qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
-
- if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
- qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
-
- /* calculate the db_rec_db2 data since it is constant so no
- * need to reflect from user
- */
- qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
- qp->urq.db_rec_db2_data.data.value =
- cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
+ if (qedr_qp_has_sq(qp)) {
+ qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
+ rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
+ &qp->usq.db_rec_data->db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
}
- rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
- &qp->usq.db_rec_data->db_data,
- DB_REC_WIDTH_32B,
- DB_REC_USER);
- if (rc)
- goto err;
-
- rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
- &qp->urq.db_rec_data->db_data,
- DB_REC_WIDTH_32B,
- DB_REC_USER);
- if (rc)
- goto err;
+ if (qedr_qp_has_rq(qp)) {
+ qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
+ rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
+ &qp->urq.db_rec_data->db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
+ }
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
goto err;
}
qedr_qp_user_print(dev, qp);
-
return rc;
err:
rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
return rc;
}
+static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
+ struct ib_udata *udata)
+{
+ struct qedr_ucontext *ctx =
+ rdma_udata_to_drv_context(udata, struct qedr_ucontext,
+ ibucontext);
+ int rc;
+
+ if (qp->qp_type != IB_QPT_GSI) {
+ rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+ if (rc)
+ return rc;
+ }
+
+ if (qp->create_type == QEDR_QP_CREATE_USER)
+ qedr_cleanup_user(dev, ctx, qp);
+ else
+ qedr_cleanup_kernel(dev, qp);
+
+ return 0;
+}
+
struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *attrs,
struct ib_udata *udata)
{
- struct qedr_dev *dev = get_qedr_dev(ibpd->device);
- struct qedr_pd *pd = get_qedr_pd(ibpd);
+ struct qedr_xrcd *xrcd = NULL;
+ struct qedr_pd *pd = NULL;
+ struct qedr_dev *dev;
struct qedr_qp *qp;
struct ib_qp *ibqp;
int rc = 0;
+ if (attrs->qp_type == IB_QPT_XRC_TGT) {
+ xrcd = get_qedr_xrcd(attrs->xrcd);
+ dev = get_qedr_dev(xrcd->ibxrcd.device);
+ } else {
+ pd = get_qedr_pd(ibpd);
+ dev = get_qedr_dev(ibpd->device);
+ }
+
DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
udata ? "user library" : "kernel", pd);
return ibqp;
}
- if (udata)
+ if (udata || xrcd)
rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
else
rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
if (rc)
- goto err;
+ goto out_free_qp;
qp->ibqp.qp_num = qp->qp_id;
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
if (rc)
- goto err;
+ goto out_free_qp_resources;
}
return &qp->ibqp;
-err:
+out_free_qp_resources:
+ qedr_free_qp_resources(dev, qp, udata);
+out_free_qp:
kfree(qp);
return ERR_PTR(-EFAULT);
qp_attr->cap.max_recv_wr = qp->rq.max_wr;
qp_attr->cap.max_send_sge = qp->sq.max_sges;
qp_attr->cap.max_recv_sge = qp->rq.max_sges;
- qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
+ qp_attr->cap.max_inline_data = dev->attr.max_inline;
qp_init_attr->cap = qp_attr->cap;
qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
return rc;
}
-static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
- struct ib_udata *udata)
-{
- struct qedr_ucontext *ctx =
- rdma_udata_to_drv_context(udata, struct qedr_ucontext,
- ibucontext);
- int rc;
-
- if (qp->qp_type != IB_QPT_GSI) {
- rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
- if (rc)
- return rc;
- }
-
- if (qp->create_type == QEDR_QP_CREATE_USER)
- qedr_cleanup_user(dev, ctx, qp);
- else
- qedr_cleanup_kernel(dev, qp);
-
- return 0;
-}
-
int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct qedr_qp *qp = get_qedr_qp(ibqp);
if (rdma_protocol_iwarp(&dev->ibdev, 1))
qedr_iw_qp_rem_ref(&qp->ibqp);
+ else
+ kfree(qp);
return 0;
}
return 0;
}
-void qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
+int qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct qedr_ah *ah = get_qedr_ah(ibah);
rdma_destroy_ah_attr(&ah->attr);
+ return 0;
}
static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
goto err0;
}
- rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
+ rc = init_mr_info(dev, &mr->info,
+ ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE), 1);
if (rc)
goto err1;
mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
mr->hw_mr.page_size_log = PAGE_SHIFT;
- mr->hw_mr.fbo = ib_umem_offset(mr->umem);
mr->hw_mr.length = len;
mr->hw_mr.vaddr = usr_addr;
- mr->hw_mr.zbva = false;
mr->hw_mr.phy_mr = false;
mr->hw_mr.dma_mr = false;
mr->hw_mr.pbl_ptr = 0;
mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
- mr->hw_mr.fbo = 0;
mr->hw_mr.length = 0;
mr->hw_mr.vaddr = 0;
- mr->hw_mr.zbva = false;
mr->hw_mr.phy_mr = true;
mr->hw_mr.dma_mr = false;
* in first 4 bytes and need to update WQE producer in
* next 4 bytes.
*/
- srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod;
+ srq->hw_srq.virt_prod_pair_addr->sge_prod = cpu_to_le32(hw_srq->sge_prod);
/* Make sure sge producer is updated first */
dma_wmb();
- srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod;
+ srq->hw_srq.virt_prod_pair_addr->wqe_prod = cpu_to_le32(hw_srq->wqe_prod);
wr = wr->next;
}
int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma);
void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-
+int qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata);
+int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata);
int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
-void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs,
struct ib_udata *);
int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_recv_wr);
int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
-void qedr_destroy_ah(struct ib_ah *ibah, u32 flags);
+int qedr_destroy_ah(struct ib_ah *ibah, u32 flags);
int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc);
/* LID mask control */
u8 lmc;
u8 link_width_supported;
- u8 link_speed_supported;
+ u16 link_speed_supported;
u8 link_width_enabled;
- u8 link_speed_enabled;
+ u16 link_speed_enabled;
u8 link_width_active;
- u8 link_speed_active;
+ u16 link_speed_active;
u8 vls_supported;
u8 vls_operational;
/* Rx Polarity inversion (compensate for ~tx on partner) */
return;
}
-static void qib_error_tasklet(unsigned long data)
+static void qib_error_tasklet(struct tasklet_struct *t)
{
- struct qib_devdata *dd = (struct qib_devdata *)data;
+ struct qib_devdata *dd = from_tasklet(dd, t, error_tasklet);
handle_7322_errors(dd);
qib_write_kreg(dd, kr_errmask, dd->cspec->errormask);
for (i = 0; i < ARRAY_SIZE(redirect); i++)
qib_write_kreg(dd, kr_intredirect + i, redirect[i]);
dd->cspec->main_int_mask = mask;
- tasklet_init(&dd->error_tasklet, qib_error_tasklet,
- (unsigned long)dd);
+ tasklet_setup(&dd->error_tasklet, qib_error_tasklet);
}
/**
struct ib_mad *out_mad)
{
struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
- int ret;
-
*out_mad = *in_mad;
if (ccp->class_version != 2) {
ccp->status |= IB_SMP_UNSUP_VERSION;
- ret = reply((struct ib_smp *)ccp);
- goto bail;
+ return reply((struct ib_smp *)ccp);
}
switch (ccp->method) {
case IB_MGMT_METHOD_GET:
switch (ccp->attr_id) {
case IB_CC_ATTR_CLASSPORTINFO:
- ret = cc_get_classportinfo(ccp, ibdev);
- goto bail;
-
+ return cc_get_classportinfo(ccp, ibdev);
case IB_CC_ATTR_CONGESTION_INFO:
- ret = cc_get_congestion_info(ccp, ibdev, port);
- goto bail;
-
+ return cc_get_congestion_info(ccp, ibdev, port);
case IB_CC_ATTR_CA_CONGESTION_SETTING:
- ret = cc_get_congestion_setting(ccp, ibdev, port);
- goto bail;
-
+ return cc_get_congestion_setting(ccp, ibdev, port);
case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
- ret = cc_get_congestion_control_table(ccp, ibdev, port);
- goto bail;
-
- fallthrough;
+ return cc_get_congestion_control_table(ccp, ibdev, port);
default:
ccp->status |= IB_SMP_UNSUP_METH_ATTR;
- ret = reply((struct ib_smp *) ccp);
- goto bail;
+ return reply((struct ib_smp *) ccp);
}
-
case IB_MGMT_METHOD_SET:
switch (ccp->attr_id) {
case IB_CC_ATTR_CA_CONGESTION_SETTING:
- ret = cc_set_congestion_setting(ccp, ibdev, port);
- goto bail;
-
+ return cc_set_congestion_setting(ccp, ibdev, port);
case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
- ret = cc_set_congestion_control_table(ccp, ibdev, port);
- goto bail;
-
- fallthrough;
+ return cc_set_congestion_control_table(ccp, ibdev, port);
default:
ccp->status |= IB_SMP_UNSUP_METH_ATTR;
- ret = reply((struct ib_smp *) ccp);
- goto bail;
+ return reply((struct ib_smp *) ccp);
}
-
case IB_MGMT_METHOD_GET_RESP:
/*
* The ib_mad module will call us to process responses
* before checking for other consumers.
* Just tell the caller to process it normally.
*/
- ret = IB_MAD_RESULT_SUCCESS;
- goto bail;
-
- case IB_MGMT_METHOD_TRAP:
- default:
- ccp->status |= IB_SMP_UNSUP_METHOD;
- ret = reply((struct ib_smp *) ccp);
+ return IB_MAD_RESULT_SUCCESS;
}
-bail:
- return ret;
+ /* method is unsupported */
+ ccp->status |= IB_SMP_UNSUP_METHOD;
+ return reply((struct ib_smp *) ccp);
}
/**
static void sdma_put(struct qib_sdma_state *);
static void sdma_set_state(struct qib_pportdata *, enum qib_sdma_states);
static void sdma_start_sw_clean_up(struct qib_pportdata *);
-static void sdma_sw_clean_up_task(unsigned long);
+static void sdma_sw_clean_up_task(struct tasklet_struct *);
static void unmap_desc(struct qib_pportdata *, unsigned);
static void sdma_get(struct qib_sdma_state *ss)
}
}
-static void sdma_sw_clean_up_task(unsigned long opaque)
+static void sdma_sw_clean_up_task(struct tasklet_struct *t)
{
- struct qib_pportdata *ppd = (struct qib_pportdata *) opaque;
+ struct qib_pportdata *ppd = from_tasklet(ppd, t,
+ sdma_sw_clean_up_task);
unsigned long flags;
spin_lock_irqsave(&ppd->sdma_lock, flags);
INIT_LIST_HEAD(&ppd->sdma_activelist);
- tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
- (unsigned long)ppd);
+ tasklet_setup(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task);
ret = dd->f_init_sdma_regs(ppd);
if (ret)
if (err)
return err;
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
.modify_qp = usnic_ib_modify_qp,
.query_device = usnic_ib_query_device,
.query_gid = usnic_ib_query_gid,
- .query_pkey = usnic_ib_query_pkey,
.query_port = usnic_ib_query_port,
.query_qp = usnic_ib_query_qp,
.reg_user_mr = usnic_ib_reg_mr,
props->port_cap_flags = 0;
props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
props->bad_pkey_cntr = 0;
props->qkey_viol_cntr = 0;
props->max_mtu = IB_MTU_4096;
return 0;
}
-int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
-{
- if (index > 0)
- return -EINVAL;
-
- *pkey = 0xffff;
- return 0;
-}
-
int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct usnic_ib_pd *pd = to_upd(ibpd);
return 0;
}
-void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd);
+ return 0;
}
struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
return 0;
}
-void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
- return;
+ return 0;
}
struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_qp_init_attr *qp_init_attr);
int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid);
-int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey);
int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
-void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
int attr_mask, struct ib_udata *udata);
int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
goto err_cq;
}
- npages = ib_umem_page_count(cq->umem);
+ npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE);
} else {
/* One extra page for shared ring state */
npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
* @cq: the completion queue to destroy.
* @udata: user data or null for kernel object
*/
-void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct pvrdma_cq *vcq = to_vcq(cq);
union pvrdma_cmd_req req;
pvrdma_free_cq(dev, vcq);
atomic_dec(&dev->num_cqs);
+ return 0;
}
static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
* pvrdma_poll_cq - poll for work completion queue entries
* @ibcq: completion queue
* @num_entries: the maximum number of entries
- * @entry: pointer to work completion array
+ * @wc: pointer to work completion array
*
* @return: number of polled completion entries
*/
int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
struct ib_umem *umem, u64 offset)
{
+ struct ib_block_iter biter;
u64 i = offset;
int ret = 0;
- struct sg_dma_page_iter sg_iter;
if (offset >= pdir->npages)
return -EINVAL;
- for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
- dma_addr_t addr = sg_page_iter_dma_address(&sg_iter);
-
- ret = pvrdma_page_dir_insert_dma(pdir, i, addr);
+ rdma_umem_for_each_dma_block (umem, &biter, PAGE_SIZE) {
+ ret = pvrdma_page_dir_insert_dma(
+ pdir, i, rdma_block_iter_dma_address(&biter));
if (ret)
goto exit;
return ERR_CAST(umem);
}
- npages = ib_umem_num_pages(umem);
+ npages = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
npages);
/**
* pvrdma_dereg_mr - deregister a memory region
* @ibmr: memory region
+ * @udata: pointer to user data
*
* @return: 0 on success.
*/
switch (init_attr->qp_type) {
case IB_QPT_GSI:
if (init_attr->port_num == 0 ||
- init_attr->port_num > pd->device->phys_port_cnt ||
- udata) {
+ init_attr->port_num > pd->device->phys_port_cnt) {
dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
ret = -EINVAL;
goto err_qp;
goto err_qp;
}
- qp->npages_send = ib_umem_page_count(qp->sumem);
+ qp->npages_send =
+ ib_umem_num_dma_blocks(qp->sumem, PAGE_SIZE);
if (!is_srq)
- qp->npages_recv = ib_umem_page_count(qp->rumem);
+ qp->npages_recv = ib_umem_num_dma_blocks(
+ qp->rumem, PAGE_SIZE);
else
qp->npages_recv = 0;
qp->npages = qp->npages_send + qp->npages_recv;
/**
* pvrdma_create_srq - create shared receive queue
- * @pd: protection domain
+ * @ibsrq: the IB shared receive queue
* @init_attr: shared receive queue attributes
* @udata: user data
*
goto err_srq;
}
- srq->npages = ib_umem_page_count(srq->umem);
+ srq->npages = ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE);
if (srq->npages < 0 || srq->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
dev_warn(&dev->pdev->dev,
*
* @return: 0 for success.
*/
-void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
struct pvrdma_srq *vsrq = to_vsrq(srq);
union pvrdma_cmd_req req;
ret);
pvrdma_free_srq(dev, vsrq);
+ return 0;
}
/**
* @pd: the protection domain to be released
* @udata: user data or null for kernel object
*
- * @return: 0 on success, otherwise errno.
+ * @return: Always 0
*/
-void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct pvrdma_dev *dev = to_vdev(pd->device);
union pvrdma_cmd_req req = {};
ret);
atomic_dec(&dev->num_pds);
+ return 0;
}
/**
* pvrdma_create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- * @udata: user data blob
- * @flags: create address handle flags (see enum rdma_create_ah_flags)
+ * @ibah: the IB address handle
+ * @init_attr: the attributes of the AH
+ * @udata: pointer to user data
*
* @return: 0 on success, otherwise errno.
*/
* @flags: destroy address handle flags (see enum rdma_destroy_ah_flags)
*
*/
-void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags)
+int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags)
{
struct pvrdma_dev *dev = to_vdev(ah->device);
atomic_dec(&dev->num_ahs);
+ return 0;
}
u8 subnet_timeout;
u8 init_type_reply;
u8 active_width;
- u8 active_speed;
+ u16 active_speed;
u8 phys_state;
u8 reserved[2];
};
int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
void pvrdma_dealloc_ucontext(struct ib_ucontext *context);
int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
int sg_nents, unsigned int *sg_offset);
int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
-void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags);
+int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags);
int pvrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
*
* Return: 0 on success
*/
-void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
+int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
{
struct rvt_dev_info *dev = ib_to_rvt(ibah->device);
struct rvt_ah *ah = ibah_to_rvtah(ibah);
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
rdma_destroy_ah_attr(&ah->attr);
+ return 0;
}
/**
int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
-void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags);
+int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags);
int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
*
* Called by ib_destroy_cq() in the generic verbs code.
*/
-void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
struct rvt_dev_info *rdi = cq->rdi;
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
vfree(cq->kqueue);
+ return 0;
}
/**
int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
*
* Return: always 0
*/
-void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
spin_lock(&dev->n_pds_lock);
dev->n_pds_allocated--;
spin_unlock(&dev->n_pds_lock);
+ return 0;
}
#include <rdma/rdma_vt.h>
int rvt_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
#endif /* DEF_RDMAVTPD_H */
* @ibsrq: srq object to destroy
*
*/
-void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device);
if (srq->ip)
kref_put(&srq->ip->ref, rvt_release_mmap_info);
kvfree(srq->rq.kwq);
+ return 0;
}
enum ib_srq_attr_mask attr_mask,
struct ib_udata *udata);
int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
#endif /* DEF_RVTSRQ_H */
if (!rdi)
return rdi;
- rdi->ports = kcalloc(nports,
- sizeof(struct rvt_ibport **),
- GFP_KERNEL);
+ rdi->ports = kcalloc(nports, sizeof(*rdi->ports), GFP_KERNEL);
if (!rdi->ports)
ib_dealloc_device(&rdi->ibdev);
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <rdma/rdma_netlink.h>
struct rxe_dev *exists;
int err = 0;
+ if (is_vlan_dev(ndev)) {
+ pr_err("rxe creation allowed on top of a real device only\n");
+ err = -EPERM;
+ goto err;
+ }
+
exists = rxe_get_dev_from_net(ndev);
if (exists) {
ib_device_put(&exists->ib_dev);
{
int err;
- /* initialize slab caches for managed objects */
- err = rxe_cache_init();
- if (err) {
- pr_err("unable to init object pools\n");
- return err;
- }
-
err = rxe_net_init();
if (err)
return err;
rdma_link_unregister(&rxe_link_ops);
ib_unregister_driver(RDMA_DRIVER_RXE);
rxe_net_exit();
- rxe_cache_exit();
rxe_initialized = false;
pr_info("unloaded\n");
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_H
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/skbuff.h>
*/
/* there is nothing to retry in this case */
- if (!wqe || (wqe->state == wqe_state_posted)) {
+ if (!wqe || (wqe->state == wqe_state_posted))
goto exit;
- }
/* if we've started a retry, don't start another
* retry sequence, unless this is a timeout.
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/vmalloc.h>
#include "rxe.h"
return -EINVAL;
}
-static void rxe_send_complete(unsigned long data)
+static void rxe_send_complete(struct tasklet_struct *t)
{
- struct rxe_cq *cq = (struct rxe_cq *)data;
+ struct rxe_cq *cq = from_tasklet(cq, t, comp_task);
unsigned long flags;
spin_lock_irqsave(&cq->cq_lock, flags);
cq->is_dying = false;
- tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq);
+ tasklet_setup(&cq->comp_task, rxe_send_complete);
spin_lock_init(&cq->cq_lock);
cq->ibcq.cqe = cqe;
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_HDR_H
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_HW_COUNTERS_H
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_LOC_H
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/module.h>
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
u32 lkey = mem->pelem.index << 8 | rxe_get_key();
u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
- if (mem->pelem.pool->type == RXE_TYPE_MR) {
- mem->ibmr.lkey = lkey;
- mem->ibmr.rkey = rkey;
- }
-
- mem->lkey = lkey;
- mem->rkey = rkey;
+ mem->ibmr.lkey = lkey;
+ mem->ibmr.rkey = rkey;
mem->state = RXE_MEM_STATE_INVALID;
mem->type = RXE_MEM_TYPE_NONE;
mem->map_shift = ilog2(RXE_BUF_PER_MAP);
{
rxe_mem_init(access, mem);
- mem->pd = pd;
+ mem->ibmr.pd = &pd->ibpd;
mem->access = access;
mem->state = RXE_MEM_STATE_VALID;
mem->type = RXE_MEM_TYPE_DMA;
}
}
- mem->pd = pd;
+ mem->ibmr.pd = &pd->ibpd;
mem->umem = umem;
mem->access = access;
mem->length = length;
if (err)
goto err1;
- mem->pd = pd;
+ mem->ibmr.pd = &pd->ibpd;
mem->max_buf = max_pages;
mem->state = RXE_MEM_STATE_FREE;
mem->type = RXE_MEM_TYPE_MR;
memcpy(dest, src, length);
if (crcp)
- *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+ *crcp = rxe_crc32(to_rdev(mem->ibmr.device),
*crcp, dest, length);
return 0;
memcpy(dest, src, bytes);
if (crcp)
- crc = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+ crc = rxe_crc32(to_rdev(mem->ibmr.device),
crc, dest, bytes);
length -= bytes;
if (!mem)
return NULL;
- if (unlikely((type == lookup_local && mem->lkey != key) ||
- (type == lookup_remote && mem->rkey != key) ||
- mem->pd != pd ||
+ if (unlikely((type == lookup_local && mr_lkey(mem) != key) ||
+ (type == lookup_remote && mr_rkey(mem) != key) ||
+ mr_pd(mem) != pd ||
(access && !(access & mem->access)) ||
mem->state != RXE_MEM_STATE_VALID)) {
rxe_drop_ref(mem);
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/skbuff.h>
ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk),
recv_sockets.sk6->sk, &fl6,
NULL);
- if (unlikely(IS_ERR(ndst))) {
+ if (IS_ERR(ndst)) {
pr_err_ratelimited("no route to %pI6\n", daddr);
return NULL;
}
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_NET_H
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <rdma/ib_pack.h>
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_OPCODE_H
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_PARAM_H
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
return rxe_type_info[pool->type].name;
}
-static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
-{
- return rxe_type_info[pool->type].cache;
-}
-
-static void rxe_cache_clean(size_t cnt)
-{
- int i;
- struct rxe_type_info *type;
-
- for (i = 0; i < cnt; i++) {
- type = &rxe_type_info[i];
- if (!(type->flags & RXE_POOL_NO_ALLOC)) {
- kmem_cache_destroy(type->cache);
- type->cache = NULL;
- }
- }
-}
-
-int rxe_cache_init(void)
-{
- int err;
- int i;
- size_t size;
- struct rxe_type_info *type;
-
- for (i = 0; i < RXE_NUM_TYPES; i++) {
- type = &rxe_type_info[i];
- size = ALIGN(type->size, RXE_POOL_ALIGN);
- if (!(type->flags & RXE_POOL_NO_ALLOC)) {
- type->cache =
- kmem_cache_create(type->name, size,
- RXE_POOL_ALIGN,
- RXE_POOL_CACHE_FLAGS, NULL);
- if (!type->cache) {
- pr_err("Unable to init kmem cache for %s\n",
- type->name);
- err = -ENOMEM;
- goto err1;
- }
- }
- }
-
- return 0;
-
-err1:
- rxe_cache_clean(i);
-
- return err;
-}
-
-void rxe_cache_exit(void)
-{
- rxe_cache_clean(RXE_NUM_TYPES);
-}
-
static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
{
int err = 0;
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
goto out_cnt;
- elem = kmem_cache_zalloc(pool_cache(pool),
+ elem = kzalloc(rxe_type_info[pool->type].size,
(pool->flags & RXE_POOL_ATOMIC) ?
GFP_ATOMIC : GFP_KERNEL);
if (!elem)
pool->cleanup(elem);
if (!(pool->flags & RXE_POOL_NO_ALLOC))
- kmem_cache_free(pool_cache(pool), elem);
+ kfree(elem);
atomic_dec(&pool->num_elem);
ib_device_put(&pool->rxe->ib_dev);
rxe_pool_put(pool);
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_POOL_H
u32 min_index;
size_t key_offset;
size_t key_size;
- struct kmem_cache *cache;
};
extern struct rxe_type_info rxe_type_info[];
size_t key_size;
};
-/* initialize slab caches for managed objects */
-int rxe_cache_init(void);
-
-/* cleanup slab caches for managed objects */
-void rxe_cache_exit(void);
-
/* initialize a pool of objects with given limit on
* number of elements. gets parameters from rxe_type_info
* pool elements will be allocated out of a slab cache
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/skbuff.h>
if (mask & IB_QP_QKEY)
qp->attr.qkey = attr->qkey;
- if (mask & IB_QP_AV) {
+ if (mask & IB_QP_AV)
rxe_init_av(&attr->ah_attr, &qp->pri_av);
- }
if (mask & IB_QP_ALT_PATH) {
rxe_init_av(&attr->alt_ah_attr, &qp->alt_av);
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must retailuce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/vmalloc.h>
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_QUEUE_H
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/skbuff.h>
struct rxe_mc_elem *mce;
struct rxe_qp *qp;
union ib_gid dgid;
+ struct sk_buff *per_qp_skb;
+ struct rxe_pkt_info *per_qp_pkt;
int err;
if (skb->protocol == htons(ETH_P_IP))
if (err)
continue;
- /* if *not* the last qp in the list
- * increase the users of the skb then post to the next qp
+ /* for all but the last qp create a new clone of the
+ * skb and pass to the qp.
*/
if (mce->qp_list.next != &mcg->qp_list)
- skb_get(skb);
+ per_qp_skb = skb_clone(skb, GFP_ATOMIC);
+ else
+ per_qp_skb = skb;
- pkt->qp = qp;
+ per_qp_pkt = SKB_TO_PKT(per_qp_skb);
+ per_qp_pkt->qp = qp;
rxe_add_ref(qp);
- rxe_rcv_pkt(pkt, skb);
+ rxe_rcv_pkt(per_qp_pkt, per_qp_skb);
}
spin_unlock_bh(&mcg->mcg_lock);
rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
+ return;
+
err1:
kfree_skb(skb);
}
-static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
+/**
+ * rxe_chk_dgid - validate destination IP address
+ * @rxe: rxe device that received packet
+ * @skb: the received packet buffer
+ *
+ * Accept any loopback packets
+ * Extract IP address from packet and
+ * Accept if multicast packet
+ * Accept if matches an SGID table entry
+ */
+static int rxe_chk_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
{
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
const struct ib_gid_attr *gid_attr;
pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr;
}
+ if (rdma_is_multicast_addr((struct in6_addr *)pdgid))
+ return 0;
+
gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid,
IB_GID_TYPE_ROCE_UDP_ENCAP,
1, skb->dev);
if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES))
goto drop;
- if (rxe_match_dgid(rxe, skb) < 0) {
- pr_warn_ratelimited("failed matching dgid\n");
+ if (rxe_chk_dgid(rxe, skb) < 0) {
+ pr_warn_ratelimited("failed checking dgid\n");
goto drop;
}
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/skbuff.h>
rmr->state = RXE_MEM_STATE_VALID;
rmr->access = wqe->wr.wr.reg.access;
- rmr->lkey = wqe->wr.wr.reg.key;
- rmr->rkey = wqe->wr.wr.reg.key;
+ rmr->ibmr.lkey = wqe->wr.wr.reg.key;
+ rmr->ibmr.rkey = wqe->wr.wr.reg.key;
rmr->iova = wqe->wr.wr.reg.mr->iova;
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/skbuff.h>
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/vmalloc.h>
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
return -EINVAL;
}
+ if (is_vlan_dev(ndev)) {
+ pr_err("rxe creation allowed on top of a real device only\n");
+ err = -EPERM;
+ goto err;
+ }
+
exists = rxe_get_dev_from_net(ndev);
if (exists) {
ib_device_put(&exists->ib_dev);
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/kernel.h>
* a second caller finds the task already running
* but looks just after the last call to func
*/
-void rxe_do_task(unsigned long data)
+void rxe_do_task(struct tasklet_struct *t)
{
int cont;
int ret;
unsigned long flags;
- struct rxe_task *task = (struct rxe_task *)data;
+ struct rxe_task *task = from_tasklet(task, t, tasklet);
spin_lock_irqsave(&task->state_lock, flags);
switch (task->state) {
snprintf(task->name, sizeof(task->name), "%s", name);
task->destroyed = false;
- tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task);
+ tasklet_setup(&task->tasklet, rxe_do_task);
task->state = TASK_STATE_START;
spin_lock_init(&task->state_lock);
if (sched)
tasklet_schedule(&task->tasklet);
else
- rxe_do_task((unsigned long)task);
+ rxe_do_task(&task->tasklet);
}
void rxe_disable_task(struct rxe_task *task)
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_TASK_H
/*
* init rxe_task structure
* arg => parameter to pass to fcn
- * fcn => function to call until it returns != 0
+ * func => function to call until it returns != 0
*/
int rxe_init_task(void *obj, struct rxe_task *task,
void *arg, int (*func)(void *), char *name);
* work to do someone must reschedule the task before
* leaving
*/
-void rxe_do_task(unsigned long data);
+void rxe_do_task(struct tasklet_struct *t);
/* run a task, else schedule it to run as a tasklet, The decision
* to run or schedule tasklet is based on the parameter sched.
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/dma-mapping.h>
return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem);
}
-static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct rxe_pd *pd = to_rpd(ibpd);
rxe_drop_ref(pd);
+ return 0;
}
static int rxe_create_ah(struct ib_ah *ibah,
return 0;
}
-static void rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
+static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct rxe_ah *ah = to_rah(ibah);
rxe_drop_ref(ah);
+ return 0;
}
static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
return 0;
}
-static void rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct rxe_srq *srq = to_rsrq(ibsrq);
rxe_drop_ref(srq->pd);
rxe_drop_ref(srq);
+ return 0;
}
static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem);
}
-static void rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct rxe_cq *cq = to_rcq(ibcq);
rxe_cq_disable(cq);
rxe_drop_ref(cq);
+ return 0;
}
static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
struct rxe_mem *mr = to_rmr(ibmr);
mr->state = RXE_MEM_STATE_ZOMBIE;
- rxe_drop_ref(mr->pd);
+ rxe_drop_ref(mr_pd(mr));
rxe_drop_index(mr);
rxe_drop_ref(mr);
return 0;
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_VERBS_H
struct ib_mw ibmw;
};
- struct rxe_pd *pd;
struct ib_umem *umem;
- u32 lkey;
- u32 rkey;
-
enum rxe_mem_state state;
enum rxe_mem_type type;
u64 va;
return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL;
}
+static inline struct rxe_pd *mr_pd(struct rxe_mem *mr)
+{
+ return to_rpd(mr->ibmr.pd);
+}
+
+static inline u32 mr_lkey(struct rxe_mem *mr)
+{
+ return mr->ibmr.lkey;
+}
+
+static inline u32 mr_rkey(struct rxe_mem *mr)
+{
+ return mr->ibmr.rkey;
+}
+
int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
void rxe_mc_cleanup(struct rxe_pool_entry *arg);
return 0;
}
-void siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct siw_device *sdev = to_siw_dev(pd->device);
siw_dbg_pd(pd, "free PD\n");
atomic_dec(&sdev->num_pd);
+ return 0;
}
void siw_qp_get_ref(struct ib_qp *base_qp)
return rv > 0 ? 0 : rv;
}
-void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
+int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
{
struct siw_cq *cq = to_siw_cq(base_cq);
struct siw_device *sdev = to_siw_dev(base_cq->device);
atomic_dec(&sdev->num_cq);
vfree(cq->queue);
+ return 0;
}
/*
* QP anymore - the code trusts the RDMA core environment to keep track
* of QP references.
*/
-void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata)
+int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata)
{
struct siw_srq *srq = to_siw_srq(base_srq);
struct siw_device *sdev = to_siw_dev(base_srq->device);
rdma_user_mmap_entry_remove(srq->srq_entry);
vfree(srq->recvq);
atomic_dec(&sdev->num_srq);
+ return 0;
}
/*
int siw_query_gid(struct ib_device *base_dev, u8 port, int idx,
union ib_gid *gid);
int siw_alloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
-void siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
+int siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
struct ib_qp *siw_create_qp(struct ib_pd *base_pd,
struct ib_qp_init_attr *attr,
struct ib_udata *udata);
const struct ib_send_wr **bad_wr);
int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
-void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata);
+int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata);
int siw_poll_cq(struct ib_cq *base_cq, int num_entries, struct ib_wc *wc);
int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags);
struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, u64 len,
int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask mask, struct ib_udata *udata);
int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr);
-void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata);
+int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata);
int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma);
void ipoib_cm_dev_cleanup(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- int ret;
if (!priv->cm.srq)
return;
ipoib_dbg(priv, "Cleanup ipoib connected mode.\n");
- ret = ib_destroy_srq(priv->cm.srq);
- if (ret)
- ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret);
-
+ ib_destroy_srq(priv->cm.srq);
priv->cm.srq = NULL;
if (!priv->cm.srq_ring)
return;
return 0;
}
-static const struct seq_operations ipoib_mcg_seq_ops = {
+static const struct seq_operations ipoib_mcg_sops = {
.start = ipoib_mcg_seq_start,
.next = ipoib_mcg_seq_next,
.stop = ipoib_mcg_seq_stop,
.show = ipoib_mcg_seq_show,
};
-static int ipoib_mcg_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- int ret;
-
- ret = seq_open(file, &ipoib_mcg_seq_ops);
- if (ret)
- return ret;
-
- seq = file->private_data;
- seq->private = inode->i_private;
-
- return 0;
-}
-
-static const struct file_operations ipoib_mcg_fops = {
- .owner = THIS_MODULE,
- .open = ipoib_mcg_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
+DEFINE_SEQ_ATTRIBUTE(ipoib_mcg);
static void *ipoib_path_seq_start(struct seq_file *file, loff_t *pos)
{
return 0;
}
-static const struct seq_operations ipoib_path_seq_ops = {
+static const struct seq_operations ipoib_path_sops = {
.start = ipoib_path_seq_start,
.next = ipoib_path_seq_next,
.stop = ipoib_path_seq_stop,
.show = ipoib_path_seq_show,
};
-static int ipoib_path_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- int ret;
-
- ret = seq_open(file, &ipoib_path_seq_ops);
- if (ret)
- return ret;
-
- seq = file->private_data;
- seq->private = inode->i_private;
-
- return 0;
-}
-
-static const struct file_operations ipoib_path_fops = {
- .owner = THIS_MODULE,
- .open = ipoib_path_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
+DEFINE_SEQ_ATTRIBUTE(ipoib_path);
void ipoib_create_debug_files(struct net_device *dev)
{
/* call event handler to ensure pkey in sync */
queue_work(ipoib_workqueue, &priv->flush_heavy);
+ ndev->rtnl_link_ops = ipoib_get_link_ops();
+
result = register_netdev(ndev);
if (result) {
pr_warn("%s: couldn't register ipoib port %d; error %d\n",
return 0;
}
+static void ipoib_del_child_link(struct net_device *dev, struct list_head *head)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ if (!priv->parent)
+ return;
+
+ unregister_netdevice_queue(dev, head);
+}
+
static size_t ipoib_get_size(const struct net_device *dev)
{
return nla_total_size(2) + /* IFLA_IPOIB_PKEY */
.priv_size = sizeof(struct ipoib_dev_priv),
.setup = ipoib_setup_common,
.newlink = ipoib_new_child_link,
+ .dellink = ipoib_del_child_link,
.changelink = ipoib_changelink,
.get_size = ipoib_get_size,
.fill_info = ipoib_fill_info,
}
priv = ipoib_priv(ndev);
+ ndev->rtnl_link_ops = ipoib_get_link_ops();
+
result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
if (result && ndev->reg_state == NETREG_UNINITIALIZED)
* multiple data-outs on the same command can arrive -
* so post the buffer before hand
*/
- rc = isert_post_recv(isert_conn, rx_desc);
- if (rc) {
- isert_err("ib_post_recv failed with %d\n", rc);
- return rc;
- }
- return 0;
+ return isert_post_recv(isert_conn, rx_desc);
}
static int
int ret;
ret = isert_post_recv(isert_conn, isert_cmd->rx_desc);
- if (ret) {
- isert_err("ib_post_recv failed with %d\n", ret);
+ if (ret)
return ret;
- }
ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, NULL);
if (ret) {
&isert_cmd->tx_desc.send_wr);
rc = isert_post_recv(isert_conn, isert_cmd->rx_desc);
- if (rc) {
- isert_err("ib_post_recv failed with %d\n", rc);
+ if (rc)
return rc;
- }
chain_wr = &isert_cmd->tx_desc.send_wr;
}
NULL
};
-static struct attribute_group rtrs_clt_stats_attr_group = {
+static const struct attribute_group rtrs_clt_stats_attr_group = {
.attrs = rtrs_clt_stats_attrs,
};
NULL,
};
-static struct attribute_group rtrs_clt_sess_attr_group = {
+static const struct attribute_group rtrs_clt_sess_attr_group = {
.attrs = rtrs_clt_sess_attrs,
};
NULL,
};
-static struct attribute_group rtrs_clt_attr_group = {
+static const struct attribute_group rtrs_clt_attr_group = {
.attrs = rtrs_clt_attrs,
};
/* rtrs information unit */
struct rtrs_iu {
- struct list_head list;
struct ib_cqe cqe;
dma_addr_t dma_addr;
void *buf;
NULL,
};
-static struct attribute_group rtrs_srv_sess_attr_group = {
+static const struct attribute_group rtrs_srv_sess_attr_group = {
.attrs = rtrs_srv_sess_attrs,
};
NULL,
};
-static struct attribute_group rtrs_srv_stats_attr_group = {
+static const struct attribute_group rtrs_srv_stats_attr_group = {
.attrs = rtrs_srv_stats_attrs,
};
#include "rtrs-srv.h"
#include "rtrs-log.h"
#include <rdma/ib_cm.h>
+#include <rdma/ib_verbs.h>
MODULE_DESCRIPTION("RDMA Transport Server");
MODULE_LICENSE("GPL");
static struct rtrs_rdma_dev_pd dev_pd;
static mempool_t *chunk_pool;
struct class *rtrs_dev_class;
+static struct rtrs_srv_ib_ctx ib_ctx;
static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE;
static int __read_mostly sess_queue_depth = DEFAULT_SESS_QUEUE_DEPTH;
kfree(ctx);
}
+static int rtrs_srv_add_one(struct ib_device *device)
+{
+ struct rtrs_srv_ctx *ctx;
+ int ret = 0;
+
+ mutex_lock(&ib_ctx.ib_dev_mutex);
+ if (ib_ctx.ib_dev_count)
+ goto out;
+
+ /*
+ * Since our CM IDs are NOT bound to any ib device we will create them
+ * only once
+ */
+ ctx = ib_ctx.srv_ctx;
+ ret = rtrs_srv_rdma_init(ctx, ib_ctx.port);
+ if (ret) {
+ /*
+ * We errored out here.
+ * According to the ib code, if we encounter an error here then the
+ * error code is ignored, and no more calls to our ops are made.
+ */
+ pr_err("Failed to initialize RDMA connection");
+ goto err_out;
+ }
+
+out:
+ /*
+ * Keep a track on the number of ib devices added
+ */
+ ib_ctx.ib_dev_count++;
+
+err_out:
+ mutex_unlock(&ib_ctx.ib_dev_mutex);
+ return ret;
+}
+
+static void rtrs_srv_remove_one(struct ib_device *device, void *client_data)
+{
+ struct rtrs_srv_ctx *ctx;
+
+ mutex_lock(&ib_ctx.ib_dev_mutex);
+ ib_ctx.ib_dev_count--;
+
+ if (ib_ctx.ib_dev_count)
+ goto out;
+
+ /*
+ * Since our CM IDs are NOT bound to any ib device we will remove them
+ * only once, when the last device is removed
+ */
+ ctx = ib_ctx.srv_ctx;
+ rdma_destroy_id(ctx->cm_id_ip);
+ rdma_destroy_id(ctx->cm_id_ib);
+
+out:
+ mutex_unlock(&ib_ctx.ib_dev_mutex);
+}
+
+static struct ib_client rtrs_srv_client = {
+ .name = "rtrs_server",
+ .add = rtrs_srv_add_one,
+ .remove = rtrs_srv_remove_one
+};
+
/**
* rtrs_srv_open() - open RTRS server context
* @ops: callback functions
if (!ctx)
return ERR_PTR(-ENOMEM);
- err = rtrs_srv_rdma_init(ctx, port);
+ mutex_init(&ib_ctx.ib_dev_mutex);
+ ib_ctx.srv_ctx = ctx;
+ ib_ctx.port = port;
+
+ err = ib_register_client(&rtrs_srv_client);
if (err) {
free_srv_ctx(ctx);
return ERR_PTR(err);
*/
void rtrs_srv_close(struct rtrs_srv_ctx *ctx)
{
- rdma_destroy_id(ctx->cm_id_ip);
- rdma_destroy_id(ctx->cm_id_ib);
+ ib_unregister_client(&rtrs_srv_client);
+ mutex_destroy(&ib_ctx.ib_dev_mutex);
close_ctx(ctx);
free_srv_ctx(ctx);
}
struct list_head srv_list;
};
+struct rtrs_srv_ib_ctx {
+ struct rtrs_srv_ctx *srv_ctx;
+ u16 port;
+ struct mutex ib_dev_mutex;
+ int ib_dev_count;
+};
+
extern struct class *rtrs_dev_class;
void close_sess(struct rtrs_srv_sess *sess);
return mlx5e_ethtool_flash_device(priv, flash);
}
-enum mlx5_ptys_width {
- MLX5_PTYS_WIDTH_1X = 1 << 0,
- MLX5_PTYS_WIDTH_2X = 1 << 1,
- MLX5_PTYS_WIDTH_4X = 1 << 2,
- MLX5_PTYS_WIDTH_8X = 1 << 3,
- MLX5_PTYS_WIDTH_12X = 1 << 4,
-};
-
static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width)
{
switch (width) {
}
}
-static int mlx5i_get_port_settings(struct net_device *netdev,
- u16 *ib_link_width_oper, u16 *ib_proto_oper)
-{
- struct mlx5e_priv *priv = mlx5i_epriv(netdev);
- struct mlx5_core_dev *mdev = priv->mdev;
- u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
- int ret;
-
- ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1);
- if (ret)
- return ret;
-
- *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
- *ib_proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
-
- return 0;
-}
-
static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
{
int rate, width;
static int mlx5i_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *link_ksettings)
{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
u16 ib_link_width_oper;
u16 ib_proto_oper;
int speed, ret;
- ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper);
+ ret = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, &ib_proto_oper,
+ 1);
if (ret)
return ret;
sizeof(out), MLX5_REG_MLCR, 0, 1);
}
-int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
- u8 *link_width_oper, u8 local_port)
-{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
- int err;
-
- err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port);
- if (err)
- return err;
-
- *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
-
-int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
- u8 *proto_oper, u8 local_port)
+int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper,
+ u16 *proto_oper, u8 local_port)
{
u32 out[MLX5_ST_SZ_DW(ptys_reg)];
int err;
if (err)
return err;
+ *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
*proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
return 0;
}
-EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
+EXPORT_SYMBOL(mlx5_query_ib_port_oper);
/* This function should be used after setting a port register only */
void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
dev->max_mw = 0;
dev->max_mr_mw_fmr_pbl = (PAGE_SIZE / 8) * (PAGE_SIZE / 8);
dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * PAGE_SIZE;
- dev->max_pkey = QED_RDMA_MAX_P_KEY;
+ if (QED_IS_ROCE_PERSONALITY(p_hwfn))
+ dev->max_pkey = QED_RDMA_MAX_P_KEY;
dev->max_srq = p_hwfn->p_rdma_info->num_srqs;
dev->max_srq_wr = QED_RDMA_MAX_SRQ_WQE_ELEM;
params->pbl_two_level);
SET_FIELD(flags, RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED,
- params->zbva);
+ false);
SET_FIELD(flags, RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR, params->phy_mr);
p_ramrod->pd = cpu_to_le16(params->pd);
p_ramrod->length_hi = (u8)(params->length >> 32);
p_ramrod->length_lo = DMA_LO_LE(params->length);
- if (params->zbva) {
- /* Lower 32 bits of the registered MR address.
- * In case of zero based MR, will hold FBO
- */
- p_ramrod->va.hi = 0;
- p_ramrod->va.lo = cpu_to_le32(params->fbo);
- } else {
- DMA_REGPAIR_LE(p_ramrod->va, params->vaddr);
- }
+ DMA_REGPAIR_LE(p_ramrod->va, params->vaddr);
DMA_REGPAIR_LE(p_ramrod->pbl_base, params->pbl_ptr);
/* DIF */
args.u.mtu = new_mtu;
args.func = &qede_update_mtu;
qede_reload(edev, &args, false);
-
+#if IS_ENABLED(CONFIG_QED_RDMA)
+ qede_rdma_event_change_mtu(edev);
+#endif
edev->ops->common->update_mtu(edev->cdev, new_mtu);
return 0;
qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CHANGE_ADDR);
}
+static void qede_rdma_change_mtu(struct qede_dev *edev)
+{
+ if (qede_rdma_supported(edev)) {
+ if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+ qedr_drv->notify(edev->rdma_info.qedr_dev,
+ QEDE_CHANGE_MTU);
+ }
+}
+
static struct qede_rdma_event_work *
qede_rdma_get_free_event_node(struct qede_dev *edev)
{
case QEDE_CHANGE_ADDR:
qede_rdma_changeaddr(edev);
break;
+ case QEDE_CHANGE_MTU:
+ qede_rdma_change_mtu(edev);
+ break;
default:
DP_NOTICE(edev, "Invalid rdma event %d", event);
}
{
qede_rdma_add_event(edev, QEDE_CHANGE_ADDR);
}
+
+void qede_rdma_event_change_mtu(struct qede_dev *edev)
+{
+ qede_rdma_add_event(edev, QEDE_CHANGE_MTU);
+}
u8 reserved_at_1a[0x2];
u8 ipsec_encrypt[0x1];
u8 ipsec_decrypt[0x1];
- u8 reserved_at_1e[0x2];
+ u8 sw_owner_v2[0x1];
+ u8 reserved_at_1f[0x1];
u8 termination_table_raw_traffic[0x1];
u8 reserved_at_21[0x1];
u8 log_bf_reg_size[0x5];
- u8 reserved_at_270[0x8];
+ u8 reserved_at_270[0x6];
+ u8 lag_dct[0x2];
u8 lag_tx_port_affinity[0x1];
u8 reserved_at_279[0x2];
u8 lag_master[0x1];
MLX5E_CONNECTOR_TYPE_NUMBER,
};
+enum mlx5_ptys_width {
+ MLX5_PTYS_WIDTH_1X = 1 << 0,
+ MLX5_PTYS_WIDTH_2X = 1 << 1,
+ MLX5_PTYS_WIDTH_4X = 1 << 2,
+ MLX5_PTYS_WIDTH_8X = 1 << 3,
+ MLX5_PTYS_WIDTH_12X = 1 << 4,
+};
+
#define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
#define MLX5_GET_ETH_PROTO(reg, out, ext, field) \
(ext ? MLX5_GET(reg, out, ext_##field) : \
int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
int ptys_size, int proto_mask, u8 local_port);
-int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
- u8 *link_width_oper, u8 local_port);
-int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
- u8 *proto_oper, u8 local_port);
+
+int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper,
+ u16 *proto_oper, u8 local_port);
void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
enum mlx5_port_status status);
#define __LINUX_OVERFLOW_H
#include <linux/compiler.h>
+#include <linux/limits.h>
/*
* In the fallback code below, we need to compute the minimum and
bool pbl_two_level;
u8 pbl_page_size_log;
u8 page_size_log;
- u32 fbo;
u64 length;
u64 vaddr;
- bool zbva;
bool phy_mr;
bool dma_mr;
QEDE_UP,
QEDE_DOWN,
QEDE_CHANGE_ADDR,
- QEDE_CLOSE
+ QEDE_CLOSE,
+ QEDE_CHANGE_MTU,
};
struct qede_rdma_event_work {
void qede_rdma_dev_event_close(struct qede_dev *dev);
void qede_rdma_dev_remove(struct qede_dev *dev, bool recovery);
void qede_rdma_event_changeaddr(struct qede_dev *edr);
+void qede_rdma_event_change_mtu(struct qede_dev *edev);
#else
static inline int qede_rdma_dev_add(struct qede_dev *dev,
u8 port_num, int index);
void rdma_put_gid_attr(const struct ib_gid_attr *attr);
void rdma_hold_gid_attr(const struct ib_gid_attr *attr);
+ssize_t rdma_query_gid_table(struct ib_device *device,
+ struct ib_uverbs_gid_entry *entries,
+ size_t max_entries);
#endif /* _IB_CACHE_H */
#include <rdma/ib_sa.h>
#include <rdma/rdma_cm.h>
-/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */
-extern struct class cm_class;
-
enum ib_cm_state {
IB_CM_IDLE,
IB_CM_LISTEN,
struct ib_umem {
struct ib_device *ibdev;
struct mm_struct *owning_mm;
+ u64 iova;
size_t length;
unsigned long address;
u32 writable : 1;
return umem->address & ~PAGE_MASK;
}
+static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
+ unsigned long pgsz)
+{
+ return (size_t)((ALIGN(umem->iova + umem->length, pgsz) -
+ ALIGN_DOWN(umem->iova, pgsz))) /
+ pgsz;
+}
+
static inline size_t ib_umem_num_pages(struct ib_umem *umem)
{
- return (ALIGN(umem->address + umem->length, PAGE_SIZE) -
- ALIGN_DOWN(umem->address, PAGE_SIZE)) >>
- PAGE_SHIFT;
+ return ib_umem_num_dma_blocks(umem, PAGE_SIZE);
+}
+
+static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
+ struct ib_umem *umem,
+ unsigned long pgsz)
+{
+ __rdma_block_iter_start(biter, umem->sg_head.sgl, umem->nmap, pgsz);
}
+/**
+ * rdma_umem_for_each_dma_block - iterate over contiguous DMA blocks of the umem
+ * @umem: umem to iterate over
+ * @pgsz: Page size to split the list into
+ *
+ * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The
+ * returned DMA blocks will be aligned to pgsz and span the range:
+ * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz)
+ *
+ * Performs exactly ib_umem_num_dma_blocks() iterations.
+ */
+#define rdma_umem_for_each_dma_block(umem, biter, pgsz) \
+ for (__rdma_umem_block_iter_start(biter, umem, pgsz); \
+ __rdma_block_iter_next(biter);)
+
#ifdef CONFIG_INFINIBAND_USER_MEM
struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
size_t size, int access);
void ib_umem_release(struct ib_umem *umem);
-int ib_umem_page_count(struct ib_umem *umem);
int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length);
unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
return ERR_PTR(-EINVAL);
}
static inline void ib_umem_release(struct ib_umem *umem) { }
-static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length) {
return -EINVAL;
}
-static inline int ib_umem_find_best_pgsz(struct ib_umem *umem,
- unsigned long pgsz_bitmap,
- unsigned long virt) {
- return -EINVAL;
+static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ unsigned long virt)
+{
+ return 0;
}
#endif /* CONFIG_INFINIBAND_USER_MEM */
struct mmu_interval_notifier notifier;
struct pid *tgid;
+ /* An array of the pfns included in the on-demand paging umem. */
+ unsigned long *pfn_list;
+
/*
- * An array of the pages included in the on-demand paging umem.
- * Indices of pages that are currently not mapped into the device will
- * contain NULL.
- */
- struct page **page_list;
- /*
- * An array of the same size as page_list, with DMA addresses mapped
- * for pages the pages in page_list. The lower two bits designate
- * access permissions. See ODP_READ_ALLOWED_BIT and
- * ODP_WRITE_ALLOWED_BIT.
+ * An array with DMA addresses mapped for pfns in pfn_list.
+ * The lower two bits designate access permissions.
+ * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT.
*/
dma_addr_t *dma_list;
/*
const struct mmu_interval_notifier_ops *ops);
void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
-int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
- u64 bcnt, u64 access_mask,
- unsigned long current_seq);
+int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 start_offset,
+ u64 bcnt, u64 access_mask, bool fault);
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bound);
extern union ib_gid zgid;
enum ib_gid_type {
- /* If link layer is Ethernet, this is RoCE V1 */
- IB_GID_TYPE_IB = 0,
- IB_GID_TYPE_ROCE = 0,
- IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
+ IB_GID_TYPE_IB = IB_UVERBS_GID_TYPE_IB,
+ IB_GID_TYPE_ROCE = IB_UVERBS_GID_TYPE_ROCE_V1,
+ IB_GID_TYPE_ROCE_UDP_ENCAP = IB_UVERBS_GID_TYPE_ROCE_V2,
IB_GID_TYPE_SIZE
};
enum rdma_network_type {
RDMA_NETWORK_IB,
- RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
+ RDMA_NETWORK_ROCE_V1,
RDMA_NETWORK_IPV4,
RDMA_NETWORK_IPV6
};
if (network_type == RDMA_NETWORK_IPV4 ||
network_type == RDMA_NETWORK_IPV6)
return IB_GID_TYPE_ROCE_UDP_ENCAP;
-
- /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
- return IB_GID_TYPE_IB;
+ else if (network_type == RDMA_NETWORK_ROCE_V1)
+ return IB_GID_TYPE_ROCE;
+ else
+ return IB_GID_TYPE_IB;
}
static inline enum rdma_network_type
if (attr->gid_type == IB_GID_TYPE_IB)
return RDMA_NETWORK_IB;
+ if (attr->gid_type == IB_GID_TYPE_ROCE)
+ return RDMA_NETWORK_ROCE_V1;
+
if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
return RDMA_NETWORK_IPV4;
else
IB_SPEED_FDR10 = 8,
IB_SPEED_FDR = 16,
IB_SPEED_EDR = 32,
- IB_SPEED_HDR = 64
+ IB_SPEED_HDR = 64,
+ IB_SPEED_NDR = 128,
};
/**
u8 subnet_timeout;
u8 init_type_reply;
u8 active_width;
- u8 active_speed;
+ u16 active_speed;
u8 phys_state;
u16 port_cap_flags2;
};
const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
enum ib_wc_opcode {
- IB_WC_SEND,
- IB_WC_RDMA_WRITE,
- IB_WC_RDMA_READ,
- IB_WC_COMP_SWAP,
- IB_WC_FETCH_ADD,
- IB_WC_LSO,
- IB_WC_LOCAL_INV,
+ IB_WC_SEND = IB_UVERBS_WC_SEND,
+ IB_WC_RDMA_WRITE = IB_UVERBS_WC_RDMA_WRITE,
+ IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ,
+ IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP,
+ IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD,
+ IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
+ IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
+ IB_WC_LSO = IB_UVERBS_WC_TSO,
IB_WC_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
+ IB_WR_BIND_MW = IB_UVERBS_WR_BIND_MW,
IB_WR_LSO = IB_UVERBS_WR_TSO,
IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
RDMA_REMOVE_DRIVER_REMOVE,
/* uobj is being cleaned-up before being committed */
RDMA_REMOVE_ABORT,
- /*
- * uobj has been fully created, with the uobj->object set, but is being
- * cleaned up before being comitted
- */
- RDMA_REMOVE_ABORT_HWOBJ,
};
struct ib_rdmacg_object {
struct ib_ucontext {
struct ib_device *device;
struct ib_uverbs_file *ufile;
- /*
- * 'closing' can be read by the driver only during a destroy callback,
- * it is set when we are closing the file descriptor and indicates
- * that mm_sem may be locked.
- */
- bool closing;
bool cleanup_retryable;
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 10
-/* Flow steering rule priority is set according to it's domain.
- * Lower domain value means higher priority.
- */
-enum ib_flow_domain {
- IB_FLOW_DOMAIN_USER,
- IB_FLOW_DOMAIN_ETHTOOL,
- IB_FLOW_DOMAIN_RFS,
- IB_FLOW_DOMAIN_NIC,
- IB_FLOW_DOMAIN_NUM /* Must be last */
-};
-
enum ib_flow_flags {
IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */
void (*mmap_free)(struct rdma_user_mmap_entry *entry);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
- void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+ int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
struct ib_udata *udata);
int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
- void (*destroy_ah)(struct ib_ah *ah, u32 flags);
+ int (*destroy_ah)(struct ib_ah *ah, u32 flags);
int (*create_srq)(struct ib_srq *srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
- void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
+ int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
struct ib_qp *(*create_qp)(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr,
struct ib_udata *udata);
int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
- void (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
+ int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
unsigned int *sg_offset);
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
- struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+ int (*alloc_mw)(struct ib_mw *mw, struct ib_udata *udata);
int (*dealloc_mw)(struct ib_mw *mw);
int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
- void (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
+ int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
struct ib_flow *(*create_flow)(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
- int domain, struct ib_udata *udata);
+ struct ib_udata *udata);
int (*destroy_flow)(struct ib_flow *flow_id);
struct ib_flow_action *(*create_flow_action_esp)(
struct ib_device *device,
struct ib_wq *(*create_wq)(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
- void (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
+ int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
u32 wq_attr_mask, struct ib_udata *udata);
- struct ib_rwq_ind_table *(*create_rwq_ind_table)(
- struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
+ int (*create_rwq_ind_table)(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
struct ib_dm *(*alloc_dm)(struct ib_device *device,
struct ib_ucontext *context,
struct uverbs_attr_bundle *attrs);
int (*create_counters)(struct ib_counters *counters,
struct uverbs_attr_bundle *attrs);
- void (*destroy_counters)(struct ib_counters *counters);
+ int (*destroy_counters)(struct ib_counters *counters);
int (*read_counters)(struct ib_counters *counters,
struct ib_counters_read_attr *counters_read_attr,
struct uverbs_attr_bundle *attrs);
DECLARE_RDMA_OBJ_SIZE(ib_ah);
DECLARE_RDMA_OBJ_SIZE(ib_counters);
DECLARE_RDMA_OBJ_SIZE(ib_cq);
+ DECLARE_RDMA_OBJ_SIZE(ib_mw);
DECLARE_RDMA_OBJ_SIZE(ib_pd);
+ DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table);
DECLARE_RDMA_OBJ_SIZE(ib_srq);
DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
DECLARE_RDMA_OBJ_SIZE(ib_xrcd);
return rdma_protocol_iwarp(dev, port_num);
}
-/**
- * rdma_find_pg_bit - Find page bit given address and HW supported page sizes
- *
- * @addr: address
- * @pgsz_bitmap: bitmap of HW supported page sizes
- */
-static inline unsigned int rdma_find_pg_bit(unsigned long addr,
- unsigned long pgsz_bitmap)
-{
- unsigned long align;
- unsigned long pgsz;
-
- align = addr & -addr;
-
- /* Find page bit such that addr is aligned to the highest supported
- * HW page size
- */
- pgsz = pgsz_bitmap & ~(-align << 1);
- if (!pgsz)
- return __ffs(pgsz_bitmap);
-
- return __fls(pgsz);
-}
-
/**
* rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
* @device: Device
#define ib_alloc_pd(device, flags) \
__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
-/**
- * ib_dealloc_pd_user - Deallocate kernel/user PD
- * @pd: The protection domain
- * @udata: Valid user data or NULL for kernel objects
- */
-void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
/**
* ib_dealloc_pd - Deallocate kernel PD
*/
static inline void ib_dealloc_pd(struct ib_pd *pd)
{
- ib_dealloc_pd_user(pd, NULL);
+ int ret = ib_dealloc_pd_user(pd, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel PD shouldn't fail");
}
enum rdma_create_ah_flags {
*
* NOTE: for user ah use rdma_destroy_ah_user with valid udata!
*/
-static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags)
+static inline void rdma_destroy_ah(struct ib_ah *ah, u32 flags)
{
- return rdma_destroy_ah_user(ah, flags, NULL);
+ int ret = rdma_destroy_ah_user(ah, flags, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel AH shouldn't fail");
}
struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
*
* NOTE: for user srq use ib_destroy_srq_user with valid udata!
*/
-static inline int ib_destroy_srq(struct ib_srq *srq)
+static inline void ib_destroy_srq(struct ib_srq *srq)
{
- return ib_destroy_srq_user(srq, NULL);
+ int ret = ib_destroy_srq_user(srq, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel SRQ shouldn't fail");
}
/**
return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
}
-struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector,
- enum ib_poll_context poll_ctx,
- const char *caller, struct ib_udata *udata);
-
-/**
- * ib_alloc_cq_user: Allocate kernel/user CQ
- * @dev: The IB device
- * @private: Private data attached to the CQE
- * @nr_cqe: Number of CQEs in the CQ
- * @comp_vector: Completion vector used for the IRQs
- * @poll_ctx: Context used for polling the CQ
- * @udata: Valid user data or NULL for kernel objects
- */
-static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev,
- void *private, int nr_cqe,
- int comp_vector,
- enum ib_poll_context poll_ctx,
- struct ib_udata *udata)
-{
- return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- KBUILD_MODNAME, udata);
-}
-
-/**
- * ib_alloc_cq: Allocate kernel CQ
- * @dev: The IB device
- * @private: Private data attached to the CQE
- * @nr_cqe: Number of CQEs in the CQ
- * @comp_vector: Completion vector used for the IRQs
- * @poll_ctx: Context used for polling the CQ
- *
- * NOTE: for user cq use ib_alloc_cq_user with valid udata!
- */
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+ int comp_vector, enum ib_poll_context poll_ctx,
+ const char *caller);
static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
int nr_cqe, int comp_vector,
enum ib_poll_context poll_ctx)
{
- return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- NULL);
+ return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+ KBUILD_MODNAME);
}
struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
KBUILD_MODNAME);
}
-/**
- * ib_free_cq_user - Free kernel/user CQ
- * @cq: The CQ to free
- * @udata: Valid user data or NULL for kernel objects
- *
- * NOTE: This function shouldn't be called on shared CQs.
- */
-void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata);
-
-/**
- * ib_free_cq - Free kernel CQ
- * @cq: The CQ to free
- *
- * NOTE: for user cq use ib_free_cq_user with valid udata!
- */
-static inline void ib_free_cq(struct ib_cq *cq)
-{
- ib_free_cq_user(cq, NULL);
-}
-
+void ib_free_cq(struct ib_cq *cq);
int ib_process_cq_direct(struct ib_cq *cq, int budget);
/**
*/
static inline void ib_destroy_cq(struct ib_cq *cq)
{
- ib_destroy_cq_user(cq, NULL);
+ int ret = ib_destroy_cq_user(cq, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
}
/**
struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr);
-int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata);
int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
u32 wq_attr_mask);
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size);
void ib_drain_sq(struct ib_qp *qp);
void ib_drain_qp(struct ib_qp *qp);
-int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width);
+int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width);
static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr)
{
const struct net *net);
#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF)
#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF)
/**
u8 port_num;
};
-struct rdma_cm_id *__rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_ucm_port_space ps,
- enum ib_qp_type qp_type,
- const char *caller);
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const char *caller);
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+ void *context,
+ enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type);
/**
* rdma_create_id - Create an RDMA identifier.
* The event handler callback serializes on the id's mutex and is
* allowed to sleep.
*/
-#define rdma_create_id(net, event_handler, context, ps, qp_type) \
- __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
- KBUILD_MODNAME)
+#define rdma_create_id(net, event_handler, context, ps, qp_type) \
+ __rdma_create_kernel_id(net, event_handler, context, ps, qp_type, \
+ KBUILD_MODNAME)
/**
* rdma_destroy_id - Destroys an RDMA identifier.
*/
int rdma_listen(struct rdma_cm_id *id, int backlog);
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller);
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
-int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller, struct rdma_ucm_ece *ece);
-
-/**
- * rdma_accept - Called to accept a connection request or response.
- * @id: Connection identifier associated with the request.
- * @conn_param: Information needed to establish the connection. This must be
- * provided if accepting a connection request. If accepting a connection
- * response, this parameter must be NULL.
- *
- * Typically, this routine is only called by the listener to accept a connection
- * request. It must also be called on the active side of a connection if the
- * user is performing their own QP transitions.
- *
- * In the case of error, a reject message is sent to the remote side and the
- * state of the qp associated with the id is modified to error, such that any
- * previously posted receive buffers would be flushed.
- */
-#define rdma_accept(id, conn_param) \
- __rdma_accept((id), (conn_param), KBUILD_MODNAME)
+void rdma_lock_handler(struct rdma_cm_id *id);
+void rdma_unlock_handler(struct rdma_cm_id *id);
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece);
/**
* rdma_notify - Notifies the RDMA CM of an asynchronous event that has
int rdma_restrack_count(struct ib_device *dev,
enum rdma_restrack_type type);
-
-void rdma_restrack_kadd(struct rdma_restrack_entry *res);
-void rdma_restrack_uadd(struct rdma_restrack_entry *res);
-
-/**
- * rdma_restrack_del() - delete object from the reource tracking database
- * @res: resource entry
- * @type: actual type of object to operate
- */
-void rdma_restrack_del(struct rdma_restrack_entry *res);
-
/**
* rdma_is_kernel_res() - check the owner of resource
* @res: resource entry
*/
-static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res)
+static inline bool rdma_is_kernel_res(const struct rdma_restrack_entry *res)
{
return !res->user;
}
*/
int rdma_restrack_put(struct rdma_restrack_entry *res);
-/**
- * rdma_restrack_set_task() - set the task for this resource
- * @res: resource entry
- * @caller: kernel name, the current task will be used if the caller is NULL.
- */
-void rdma_restrack_set_task(struct rdma_restrack_entry *res,
- const char *caller);
-
/*
* Helper functions for rdma drivers when filling out
* nldev driver attributes.
/*
* enum ib_event_type, from include/rdma/ib_verbs.h
*/
-
#define IB_EVENT_LIST \
ib_event(CQ_ERR) \
ib_event(QP_FATAL) \
#define rdma_show_wc_status(x) \
__print_symbolic(x, IB_WC_STATUS_LIST)
+/*
+ * enum ib_cm_event_type, from include/rdma/ib_cm.h
+ */
+#define IB_CM_EVENT_LIST \
+ ib_cm_event(REQ_ERROR) \
+ ib_cm_event(REQ_RECEIVED) \
+ ib_cm_event(REP_ERROR) \
+ ib_cm_event(REP_RECEIVED) \
+ ib_cm_event(RTU_RECEIVED) \
+ ib_cm_event(USER_ESTABLISHED) \
+ ib_cm_event(DREQ_ERROR) \
+ ib_cm_event(DREQ_RECEIVED) \
+ ib_cm_event(DREP_RECEIVED) \
+ ib_cm_event(TIMEWAIT_EXIT) \
+ ib_cm_event(MRA_RECEIVED) \
+ ib_cm_event(REJ_RECEIVED) \
+ ib_cm_event(LAP_ERROR) \
+ ib_cm_event(LAP_RECEIVED) \
+ ib_cm_event(APR_RECEIVED) \
+ ib_cm_event(SIDR_REQ_ERROR) \
+ ib_cm_event(SIDR_REQ_RECEIVED) \
+ ib_cm_event_end(SIDR_REP_RECEIVED)
+
+#undef ib_cm_event
+#undef ib_cm_event_end
+
+#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_EVENT_LIST
+
+#undef ib_cm_event
+#undef ib_cm_event_end
+
+#define ib_cm_event(x) { IB_CM_##x, #x },
+#define ib_cm_event_end(x) { IB_CM_##x, #x }
+
+#define rdma_show_ib_cm_event(x) \
+ __print_symbolic(x, IB_CM_EVENT_LIST)
+
/*
* enum rdma_cm_event_type, from include/rdma/rdma_cm.h
*/
#include <linux/scatterlist.h>
#include <linux/sunrpc/rpc_rdma_cid.h>
#include <linux/tracepoint.h>
+#include <rdma/ib_cm.h>
#include <trace/events/rdma.h>
/**
enum {
EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0,
+ EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1,
};
struct efa_ibv_ex_query_device_resp {
struct hns_roce_ib_create_cq {
__aligned_u64 buf_addr;
__aligned_u64 db_addr;
+ __u32 cqe_size;
+ __u32 reserved;
};
struct hns_roce_ib_create_cq_resp {
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
- __u32 reserved;
+ __u32 cqe_size;
};
struct hns_roce_ib_alloc_pd_resp {
UVERBS_METHOD_QUERY_PORT,
UVERBS_METHOD_GET_CONTEXT,
UVERBS_METHOD_QUERY_CONTEXT,
+ UVERBS_METHOD_QUERY_GID_TABLE,
+ UVERBS_METHOD_QUERY_GID_ENTRY,
};
enum uverbs_attrs_invoke_write_cmd_attr_ids {
UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE,
};
+enum uverbs_attrs_query_gid_table_cmd_attr_ids {
+ UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE,
+ UVERBS_ATTR_QUERY_GID_TABLE_FLAGS,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+};
+
+enum uverbs_attrs_query_gid_entry_cmd_attr_ids {
+ UVERBS_ATTR_QUERY_GID_ENTRY_PORT,
+ UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX,
+ UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS,
+ UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
+};
+
#endif
enum ib_uverbs_advise_mr_advice {
IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH,
IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE,
+ IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT,
};
enum ib_uverbs_advise_mr_flag {
RDMA_DRIVER_SIW,
};
+enum ib_uverbs_gid_type {
+ IB_UVERBS_GID_TYPE_IB,
+ IB_UVERBS_GID_TYPE_ROCE_V1,
+ IB_UVERBS_GID_TYPE_ROCE_V2,
+};
+
+struct ib_uverbs_gid_entry {
+ __aligned_u64 gid[2];
+ __u32 gid_index;
+ __u32 port_num;
+ __u32 gid_type;
+ __u32 netdev_ifindex; /* It is 0 if there is no netdev associated with it */
+};
+
#endif
__u32 ne;
};
+enum ib_uverbs_wc_opcode {
+ IB_UVERBS_WC_SEND = 0,
+ IB_UVERBS_WC_RDMA_WRITE = 1,
+ IB_UVERBS_WC_RDMA_READ = 2,
+ IB_UVERBS_WC_COMP_SWAP = 3,
+ IB_UVERBS_WC_FETCH_ADD = 4,
+ IB_UVERBS_WC_BIND_MW = 5,
+ IB_UVERBS_WC_LOCAL_INV = 6,
+ IB_UVERBS_WC_TSO = 7,
+};
+
struct ib_uverbs_wc {
__aligned_u64 wr_id;
__u32 status;
struct ib_mr *mr;
__aligned_u64 reserved;
};
- __u32 key;
- __u32 access;
+ __u32 key;
+ __u32 access;
} reg;
} wr;
};
};
struct mminfo {
- __aligned_u64 offset;
+ __aligned_u64 offset;
__u32 size;
__u32 pad;
};