RDMA/counter: Add "auto" configuration mode support
authorMark Zhang <markz@mellanox.com>
Tue, 2 Jul 2019 10:02:35 +0000 (13:02 +0300)
committerJason Gunthorpe <jgg@mellanox.com>
Fri, 5 Jul 2019 13:22:54 +0000 (10:22 -0300)
In auto mode all QPs belong to one category are bind automatically to a
single counter set. Currently only "qp type" is supported.

In this mode the qp counter is set in RST2INIT modification, and when a qp
is destroyed the counter is unbound.

Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/core/counters.c
drivers/infiniband/core/device.c
drivers/infiniband/core/verbs.c
include/rdma/ib_verbs.h
include/rdma/rdma_counter.h

index 6167914..615ee73 100644 (file)
@@ -54,6 +54,227 @@ out:
        return ret;
 }
 
+static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
+                                              enum rdma_nl_counter_mode mode)
+{
+       struct rdma_counter *counter;
+
+       if (!dev->ops.counter_dealloc)
+               return NULL;
+
+       counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+       if (!counter)
+               return NULL;
+
+       counter->device    = dev;
+       counter->port      = port;
+       counter->res.type  = RDMA_RESTRACK_COUNTER;
+       counter->mode.mode = mode;
+       kref_init(&counter->kref);
+       mutex_init(&counter->lock);
+
+       return counter;
+}
+
+static void rdma_counter_free(struct rdma_counter *counter)
+{
+       rdma_restrack_del(&counter->res);
+       kfree(counter);
+}
+
+static void auto_mode_init_counter(struct rdma_counter *counter,
+                                  const struct ib_qp *qp,
+                                  enum rdma_nl_counter_mask new_mask)
+{
+       struct auto_mode_param *param = &counter->mode.param;
+
+       counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
+       counter->mode.mask = new_mask;
+
+       if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
+               param->qp_type = qp->qp_type;
+}
+
+static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
+                           enum rdma_nl_counter_mask auto_mask)
+{
+       struct auto_mode_param *param = &counter->mode.param;
+       bool match = true;
+
+       if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res))
+               return false;
+
+       /* Ensure that counter belong to right PID */
+       if (!rdma_is_kernel_res(&counter->res) &&
+           !rdma_is_kernel_res(&qp->res) &&
+           (task_pid_vnr(counter->res.task) != current->pid))
+               return false;
+
+       if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
+               match &= (param->qp_type == qp->qp_type);
+
+       return match;
+}
+
+static int __rdma_counter_bind_qp(struct rdma_counter *counter,
+                                 struct ib_qp *qp)
+{
+       int ret;
+
+       if (qp->counter)
+               return -EINVAL;
+
+       if (!qp->device->ops.counter_bind_qp)
+               return -EOPNOTSUPP;
+
+       mutex_lock(&counter->lock);
+       ret = qp->device->ops.counter_bind_qp(counter, qp);
+       mutex_unlock(&counter->lock);
+
+       return ret;
+}
+
+static int __rdma_counter_unbind_qp(struct ib_qp *qp)
+{
+       struct rdma_counter *counter = qp->counter;
+       int ret;
+
+       if (!qp->device->ops.counter_unbind_qp)
+               return -EOPNOTSUPP;
+
+       mutex_lock(&counter->lock);
+       ret = qp->device->ops.counter_unbind_qp(qp);
+       mutex_unlock(&counter->lock);
+
+       return ret;
+}
+
+/**
+ * rdma_get_counter_auto_mode - Find the counter that @qp should be bound
+ *     with in auto mode
+ *
+ * Return: The counter (with ref-count increased) if found
+ */
+static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
+                                                      u8 port)
+{
+       struct rdma_port_counter *port_counter;
+       struct rdma_counter *counter = NULL;
+       struct ib_device *dev = qp->device;
+       struct rdma_restrack_entry *res;
+       struct rdma_restrack_root *rt;
+       unsigned long id = 0;
+
+       port_counter = &dev->port_data[port].port_counter;
+       rt = &dev->res[RDMA_RESTRACK_COUNTER];
+       xa_lock(&rt->xa);
+       xa_for_each(&rt->xa, id, res) {
+               if (!rdma_is_visible_in_pid_ns(res))
+                       continue;
+
+               counter = container_of(res, struct rdma_counter, res);
+               if ((counter->device != qp->device) || (counter->port != port))
+                       goto next;
+
+               if (auto_mode_match(qp, counter, port_counter->mode.mask))
+                       break;
+next:
+               counter = NULL;
+       }
+
+       if (counter && !kref_get_unless_zero(&counter->kref))
+               counter = NULL;
+
+       xa_unlock(&rt->xa);
+       return counter;
+}
+
+static void rdma_counter_res_add(struct rdma_counter *counter,
+                                struct ib_qp *qp)
+{
+       if (rdma_is_kernel_res(&qp->res)) {
+               rdma_restrack_set_task(&counter->res, qp->res.kern_name);
+               rdma_restrack_kadd(&counter->res);
+       } else {
+               rdma_restrack_attach_task(&counter->res, qp->res.task);
+               rdma_restrack_uadd(&counter->res);
+       }
+}
+
+static void counter_release(struct kref *kref)
+{
+       struct rdma_counter *counter;
+
+       counter = container_of(kref, struct rdma_counter, kref);
+       counter->device->ops.counter_dealloc(counter);
+       rdma_counter_free(counter);
+}
+
+/**
+ * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
+ *   the auto-mode rule
+ */
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
+{
+       struct rdma_port_counter *port_counter;
+       struct ib_device *dev = qp->device;
+       struct rdma_counter *counter;
+       int ret;
+
+       if (!rdma_is_port_valid(dev, port))
+               return -EINVAL;
+
+       port_counter = &dev->port_data[port].port_counter;
+       if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO)
+               return 0;
+
+       counter = rdma_get_counter_auto_mode(qp, port);
+       if (counter) {
+               ret = __rdma_counter_bind_qp(counter, qp);
+               if (ret) {
+                       kref_put(&counter->kref, counter_release);
+                       return ret;
+               }
+       } else {
+               counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO);
+               if (!counter)
+                       return -ENOMEM;
+
+               auto_mode_init_counter(counter, qp, port_counter->mode.mask);
+
+               ret = __rdma_counter_bind_qp(counter, qp);
+               if (ret) {
+                       rdma_counter_free(counter);
+                       return ret;
+               }
+
+               rdma_counter_res_add(counter, qp);
+       }
+
+       return 0;
+}
+
+/**
+ * rdma_counter_unbind_qp - Unbind a qp from a counter
+ * @force:
+ *   true - Decrease the counter ref-count anyway (e.g., qp destroy)
+ */
+int rdma_counter_unbind_qp(struct ib_qp *qp, bool force)
+{
+       struct rdma_counter *counter = qp->counter;
+       int ret;
+
+       if (!counter)
+               return -EINVAL;
+
+       ret = __rdma_counter_unbind_qp(qp);
+       if (ret && !force)
+               return ret;
+
+       kref_put(&counter->kref, counter_release);
+       return 0;
+}
+
 void rdma_counter_init(struct ib_device *dev)
 {
        struct rdma_port_counter *port_counter;
index 6579865..f3181b7 100644 (file)
@@ -2471,6 +2471,9 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
        SET_DEVICE_OP(dev_ops, alloc_xrcd);
        SET_DEVICE_OP(dev_ops, attach_mcast);
        SET_DEVICE_OP(dev_ops, check_mr_status);
+       SET_DEVICE_OP(dev_ops, counter_bind_qp);
+       SET_DEVICE_OP(dev_ops, counter_dealloc);
+       SET_DEVICE_OP(dev_ops, counter_unbind_qp);
        SET_DEVICE_OP(dev_ops, create_ah);
        SET_DEVICE_OP(dev_ops, create_counters);
        SET_DEVICE_OP(dev_ops, create_cq);
index 4a04e94..92349bf 100644 (file)
@@ -1690,6 +1690,14 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
                }
        }
 
+       /*
+        * Bind this qp to a counter automatically based on the rdma counter
+        * rules. This only set in RST2INIT with port specified
+        */
+       if (!qp->counter && (attr_mask & IB_QP_PORT) &&
+           ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT))
+               rdma_counter_bind_qp_auto(qp, attr->port_num);
+
        ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
        if (ret)
                goto out;
@@ -1885,6 +1893,7 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
        if (!qp->uobject)
                rdma_rw_cleanup_mrs(qp);
 
+       rdma_counter_unbind_qp(qp, true);
        rdma_restrack_del(&qp->res);
        ret = qp->device->ops.destroy_qp(qp, udata);
        if (!ret) {
index 3d19c05..0205472 100644 (file)
@@ -1698,6 +1698,9 @@ struct ib_qp {
         * Implementation details of the RDMA core, don't use in drivers:
         */
        struct rdma_restrack_entry     res;
+
+       /* The counter the qp is bind to */
+       struct rdma_counter    *counter;
 };
 
 struct ib_dm {
@@ -2485,6 +2488,21 @@ struct ib_device_ops {
                         u8 pdata_len);
        int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
        int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
+       /**
+        * counter_bind_qp - Bind a QP to a counter.
+        * @counter - The counter to be bound. If counter->id is zero then
+        *   the driver needs to allocate a new counter and set counter->id
+        */
+       int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp);
+       /**
+        * counter_unbind_qp - Unbind the qp from the dynamically-allocated
+        *   counter and bind it onto the default one
+        */
+       int (*counter_unbind_qp)(struct ib_qp *qp);
+       /**
+        * counter_dealloc -De-allocate the hw counter
+        */
+       int (*counter_dealloc)(struct rdma_counter *counter);
 
        DECLARE_RDMA_OBJ_SIZE(ib_ah);
        DECLARE_RDMA_OBJ_SIZE(ib_cq);
index 8dd2619..9f93a24 100644 (file)
@@ -7,11 +7,14 @@
 #define _RDMA_COUNTER_H_
 
 #include <linux/mutex.h>
+#include <linux/pid_namespace.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/restrack.h>
 #include <rdma/rdma_netlink.h>
 
+struct ib_qp;
+
 struct auto_mode_param {
        int qp_type;
 };
@@ -31,6 +34,9 @@ struct rdma_counter {
        struct rdma_restrack_entry      res;
        struct ib_device                *device;
        uint32_t                        id;
+       struct kref                     kref;
+       struct rdma_counter_mode        mode;
+       struct mutex                    lock;
        u8                              port;
 };
 
@@ -38,5 +44,7 @@ void rdma_counter_init(struct ib_device *dev);
 void rdma_counter_release(struct ib_device *dev);
 int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
                               bool on, enum rdma_nl_counter_mask mask);
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port);
+int rdma_counter_unbind_qp(struct ib_qp *qp, bool force);
 
 #endif /* _RDMA_COUNTER_H_ */