Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
[linux-2.6-microblaze.git] / drivers / infiniband / core / nldev.c
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44
45 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
46         [RDMA_NLDEV_ATTR_DEV_INDEX]     = { .type = NLA_U32 },
47         [RDMA_NLDEV_ATTR_DEV_NAME]      = { .type = NLA_NUL_STRING,
48                                             .len = IB_DEVICE_NAME_MAX - 1},
49         [RDMA_NLDEV_ATTR_PORT_INDEX]    = { .type = NLA_U32 },
50         [RDMA_NLDEV_ATTR_FW_VERSION]    = { .type = NLA_NUL_STRING,
51                                             .len = IB_FW_VERSION_NAME_MAX - 1},
52         [RDMA_NLDEV_ATTR_NODE_GUID]     = { .type = NLA_U64 },
53         [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
54         [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
55         [RDMA_NLDEV_ATTR_LID]           = { .type = NLA_U32 },
56         [RDMA_NLDEV_ATTR_SM_LID]        = { .type = NLA_U32 },
57         [RDMA_NLDEV_ATTR_LMC]           = { .type = NLA_U8 },
58         [RDMA_NLDEV_ATTR_PORT_STATE]    = { .type = NLA_U8 },
59         [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
60         [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
61         [RDMA_NLDEV_ATTR_RES_SUMMARY]   = { .type = NLA_NESTED },
62         [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]     = { .type = NLA_NESTED },
63         [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
64                                              .len = 16 },
65         [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
66         [RDMA_NLDEV_ATTR_RES_QP]                = { .type = NLA_NESTED },
67         [RDMA_NLDEV_ATTR_RES_QP_ENTRY]          = { .type = NLA_NESTED },
68         [RDMA_NLDEV_ATTR_RES_LQPN]              = { .type = NLA_U32 },
69         [RDMA_NLDEV_ATTR_RES_RQPN]              = { .type = NLA_U32 },
70         [RDMA_NLDEV_ATTR_RES_RQ_PSN]            = { .type = NLA_U32 },
71         [RDMA_NLDEV_ATTR_RES_SQ_PSN]            = { .type = NLA_U32 },
72         [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
73         [RDMA_NLDEV_ATTR_RES_TYPE]              = { .type = NLA_U8 },
74         [RDMA_NLDEV_ATTR_RES_STATE]             = { .type = NLA_U8 },
75         [RDMA_NLDEV_ATTR_RES_PID]               = { .type = NLA_U32 },
76         [RDMA_NLDEV_ATTR_RES_KERN_NAME]         = { .type = NLA_NUL_STRING,
77                                                     .len = TASK_COMM_LEN },
78         [RDMA_NLDEV_ATTR_RES_CM_ID]             = { .type = NLA_NESTED },
79         [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]       = { .type = NLA_NESTED },
80         [RDMA_NLDEV_ATTR_RES_PS]                = { .type = NLA_U32 },
81         [RDMA_NLDEV_ATTR_RES_SRC_ADDR]  = {
82                         .len = sizeof(struct __kernel_sockaddr_storage) },
83         [RDMA_NLDEV_ATTR_RES_DST_ADDR]  = {
84                         .len = sizeof(struct __kernel_sockaddr_storage) },
85         [RDMA_NLDEV_ATTR_RES_CQ]                = { .type = NLA_NESTED },
86         [RDMA_NLDEV_ATTR_RES_CQ_ENTRY]          = { .type = NLA_NESTED },
87         [RDMA_NLDEV_ATTR_RES_CQE]               = { .type = NLA_U32 },
88         [RDMA_NLDEV_ATTR_RES_USECNT]            = { .type = NLA_U64 },
89         [RDMA_NLDEV_ATTR_RES_POLL_CTX]          = { .type = NLA_U8 },
90         [RDMA_NLDEV_ATTR_RES_MR]                = { .type = NLA_NESTED },
91         [RDMA_NLDEV_ATTR_RES_MR_ENTRY]          = { .type = NLA_NESTED },
92         [RDMA_NLDEV_ATTR_RES_RKEY]              = { .type = NLA_U32 },
93         [RDMA_NLDEV_ATTR_RES_LKEY]              = { .type = NLA_U32 },
94         [RDMA_NLDEV_ATTR_RES_IOVA]              = { .type = NLA_U64 },
95         [RDMA_NLDEV_ATTR_RES_MRLEN]             = { .type = NLA_U64 },
96         [RDMA_NLDEV_ATTR_RES_PD]                = { .type = NLA_NESTED },
97         [RDMA_NLDEV_ATTR_RES_PD_ENTRY]          = { .type = NLA_NESTED },
98         [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]    = { .type = NLA_U32 },
99         [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
100         [RDMA_NLDEV_ATTR_NDEV_INDEX]            = { .type = NLA_U32 },
101         [RDMA_NLDEV_ATTR_NDEV_NAME]             = { .type = NLA_NUL_STRING,
102                                                     .len = IFNAMSIZ },
103         [RDMA_NLDEV_ATTR_DRIVER]                = { .type = NLA_NESTED },
104         [RDMA_NLDEV_ATTR_DRIVER_ENTRY]          = { .type = NLA_NESTED },
105         [RDMA_NLDEV_ATTR_DRIVER_STRING]         = { .type = NLA_NUL_STRING,
106                                     .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
107         [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]     = { .type = NLA_U8 },
108         [RDMA_NLDEV_ATTR_DRIVER_S32]            = { .type = NLA_S32 },
109         [RDMA_NLDEV_ATTR_DRIVER_U32]            = { .type = NLA_U32 },
110         [RDMA_NLDEV_ATTR_DRIVER_S64]            = { .type = NLA_S64 },
111         [RDMA_NLDEV_ATTR_DRIVER_U64]            = { .type = NLA_U64 },
112         [RDMA_NLDEV_ATTR_RES_PDN]               = { .type = NLA_U32 },
113         [RDMA_NLDEV_ATTR_RES_CQN]               = { .type = NLA_U32 },
114         [RDMA_NLDEV_ATTR_RES_MRN]               = { .type = NLA_U32 },
115         [RDMA_NLDEV_ATTR_RES_CM_IDN]            = { .type = NLA_U32 },
116         [RDMA_NLDEV_ATTR_RES_CTXN]              = { .type = NLA_U32 },
117         [RDMA_NLDEV_ATTR_LINK_TYPE]             = { .type = NLA_NUL_STRING,
118                                     .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
119         [RDMA_NLDEV_SYS_ATTR_NETNS_MODE]        = { .type = NLA_U8 },
120         [RDMA_NLDEV_ATTR_DEV_PROTOCOL]          = { .type = NLA_NUL_STRING,
121                                     .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
122         [RDMA_NLDEV_NET_NS_FD]                  = { .type = NLA_U32 },
123 };
124
125 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
126                                       enum rdma_nldev_print_type print_type)
127 {
128         if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
129                 return -EMSGSIZE;
130         if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
131             nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
132                 return -EMSGSIZE;
133
134         return 0;
135 }
136
137 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
138                                    enum rdma_nldev_print_type print_type,
139                                    u32 value)
140 {
141         if (put_driver_name_print_type(msg, name, print_type))
142                 return -EMSGSIZE;
143         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
144                 return -EMSGSIZE;
145
146         return 0;
147 }
148
149 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
150                                    enum rdma_nldev_print_type print_type,
151                                    u64 value)
152 {
153         if (put_driver_name_print_type(msg, name, print_type))
154                 return -EMSGSIZE;
155         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
156                               RDMA_NLDEV_ATTR_PAD))
157                 return -EMSGSIZE;
158
159         return 0;
160 }
161
162 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
163 {
164         return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
165                                        value);
166 }
167 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
168
169 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
170                                u32 value)
171 {
172         return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
173                                        value);
174 }
175 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
176
177 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
178 {
179         return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
180                                        value);
181 }
182 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
183
184 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
185 {
186         return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
187                                        value);
188 }
189 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
190
191 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
192 {
193         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
194                 return -EMSGSIZE;
195         if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
196                            dev_name(&device->dev)))
197                 return -EMSGSIZE;
198
199         return 0;
200 }
201
202 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
203 {
204         char fw[IB_FW_VERSION_NAME_MAX];
205         int ret = 0;
206         u8 port;
207
208         if (fill_nldev_handle(msg, device))
209                 return -EMSGSIZE;
210
211         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
212                 return -EMSGSIZE;
213
214         BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
215         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
216                               device->attrs.device_cap_flags,
217                               RDMA_NLDEV_ATTR_PAD))
218                 return -EMSGSIZE;
219
220         ib_get_device_fw_str(device, fw);
221         /* Device without FW has strlen(fw) = 0 */
222         if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
223                 return -EMSGSIZE;
224
225         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
226                               be64_to_cpu(device->node_guid),
227                               RDMA_NLDEV_ATTR_PAD))
228                 return -EMSGSIZE;
229         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
230                               be64_to_cpu(device->attrs.sys_image_guid),
231                               RDMA_NLDEV_ATTR_PAD))
232                 return -EMSGSIZE;
233         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
234                 return -EMSGSIZE;
235
236         /*
237          * Link type is determined on first port and mlx4 device
238          * which can potentially have two different link type for the same
239          * IB device is considered as better to be avoided in the future,
240          */
241         port = rdma_start_port(device);
242         if (rdma_cap_opa_mad(device, port))
243                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
244         else if (rdma_protocol_ib(device, port))
245                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
246         else if (rdma_protocol_iwarp(device, port))
247                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
248         else if (rdma_protocol_roce(device, port))
249                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
250         else if (rdma_protocol_usnic(device, port))
251                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
252                                      "usnic");
253         return ret;
254 }
255
256 static int fill_port_info(struct sk_buff *msg,
257                           struct ib_device *device, u32 port,
258                           const struct net *net)
259 {
260         struct net_device *netdev = NULL;
261         struct ib_port_attr attr;
262         int ret;
263         u64 cap_flags = 0;
264
265         if (fill_nldev_handle(msg, device))
266                 return -EMSGSIZE;
267
268         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
269                 return -EMSGSIZE;
270
271         ret = ib_query_port(device, port, &attr);
272         if (ret)
273                 return ret;
274
275         if (rdma_protocol_ib(device, port)) {
276                 BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
277                                 sizeof(attr.port_cap_flags2)) > sizeof(u64));
278                 cap_flags = attr.port_cap_flags |
279                         ((u64)attr.port_cap_flags2 << 32);
280                 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
281                                       cap_flags, RDMA_NLDEV_ATTR_PAD))
282                         return -EMSGSIZE;
283                 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
284                                       attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
285                         return -EMSGSIZE;
286                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
287                         return -EMSGSIZE;
288                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
289                         return -EMSGSIZE;
290                 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
291                         return -EMSGSIZE;
292         }
293         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
294                 return -EMSGSIZE;
295         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
296                 return -EMSGSIZE;
297
298         netdev = ib_device_get_netdev(device, port);
299         if (netdev && net_eq(dev_net(netdev), net)) {
300                 ret = nla_put_u32(msg,
301                                   RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
302                 if (ret)
303                         goto out;
304                 ret = nla_put_string(msg,
305                                      RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
306         }
307
308 out:
309         if (netdev)
310                 dev_put(netdev);
311         return ret;
312 }
313
314 static int fill_res_info_entry(struct sk_buff *msg,
315                                const char *name, u64 curr)
316 {
317         struct nlattr *entry_attr;
318
319         entry_attr = nla_nest_start_noflag(msg,
320                                            RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
321         if (!entry_attr)
322                 return -EMSGSIZE;
323
324         if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
325                 goto err;
326         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
327                               RDMA_NLDEV_ATTR_PAD))
328                 goto err;
329
330         nla_nest_end(msg, entry_attr);
331         return 0;
332
333 err:
334         nla_nest_cancel(msg, entry_attr);
335         return -EMSGSIZE;
336 }
337
338 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
339 {
340         static const char * const names[RDMA_RESTRACK_MAX] = {
341                 [RDMA_RESTRACK_PD] = "pd",
342                 [RDMA_RESTRACK_CQ] = "cq",
343                 [RDMA_RESTRACK_QP] = "qp",
344                 [RDMA_RESTRACK_CM_ID] = "cm_id",
345                 [RDMA_RESTRACK_MR] = "mr",
346                 [RDMA_RESTRACK_CTX] = "ctx",
347         };
348
349         struct nlattr *table_attr;
350         int ret, i, curr;
351
352         if (fill_nldev_handle(msg, device))
353                 return -EMSGSIZE;
354
355         table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
356         if (!table_attr)
357                 return -EMSGSIZE;
358
359         for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
360                 if (!names[i])
361                         continue;
362                 curr = rdma_restrack_count(device, i,
363                                            task_active_pid_ns(current));
364                 ret = fill_res_info_entry(msg, names[i], curr);
365                 if (ret)
366                         goto err;
367         }
368
369         nla_nest_end(msg, table_attr);
370         return 0;
371
372 err:
373         nla_nest_cancel(msg, table_attr);
374         return ret;
375 }
376
377 static int fill_res_name_pid(struct sk_buff *msg,
378                              struct rdma_restrack_entry *res)
379 {
380         /*
381          * For user resources, user is should read /proc/PID/comm to get the
382          * name of the task file.
383          */
384         if (rdma_is_kernel_res(res)) {
385                 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
386                     res->kern_name))
387                         return -EMSGSIZE;
388         } else {
389                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
390                     task_pid_vnr(res->task)))
391                         return -EMSGSIZE;
392         }
393         return 0;
394 }
395
396 static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
397                            struct rdma_restrack_entry *res)
398 {
399         if (!dev->ops.fill_res_entry)
400                 return false;
401         return dev->ops.fill_res_entry(msg, res);
402 }
403
404 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
405                              struct rdma_restrack_entry *res, uint32_t port)
406 {
407         struct ib_qp *qp = container_of(res, struct ib_qp, res);
408         struct ib_device *dev = qp->device;
409         struct ib_qp_init_attr qp_init_attr;
410         struct ib_qp_attr qp_attr;
411         int ret;
412
413         ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
414         if (ret)
415                 return ret;
416
417         if (port && port != qp_attr.port_num)
418                 return -EAGAIN;
419
420         /* In create_qp() port is not set yet */
421         if (qp_attr.port_num &&
422             nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
423                 goto err;
424
425         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
426                 goto err;
427         if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
428                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
429                                 qp_attr.dest_qp_num))
430                         goto err;
431                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
432                                 qp_attr.rq_psn))
433                         goto err;
434         }
435
436         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
437                 goto err;
438
439         if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
440             qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
441                 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
442                                qp_attr.path_mig_state))
443                         goto err;
444         }
445         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
446                 goto err;
447         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
448                 goto err;
449
450         if (!rdma_is_kernel_res(res) &&
451             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
452                 goto err;
453
454         if (fill_res_name_pid(msg, res))
455                 goto err;
456
457         if (fill_res_entry(dev, msg, res))
458                 goto err;
459
460         return 0;
461
462 err:    return -EMSGSIZE;
463 }
464
465 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
466                                 struct rdma_restrack_entry *res, uint32_t port)
467 {
468         struct rdma_id_private *id_priv =
469                                 container_of(res, struct rdma_id_private, res);
470         struct ib_device *dev = id_priv->id.device;
471         struct rdma_cm_id *cm_id = &id_priv->id;
472
473         if (port && port != cm_id->port_num)
474                 return 0;
475
476         if (cm_id->port_num &&
477             nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
478                 goto err;
479
480         if (id_priv->qp_num) {
481                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
482                         goto err;
483                 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
484                         goto err;
485         }
486
487         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
488                 goto err;
489
490         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
491                 goto err;
492
493         if (cm_id->route.addr.src_addr.ss_family &&
494             nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
495                     sizeof(cm_id->route.addr.src_addr),
496                     &cm_id->route.addr.src_addr))
497                 goto err;
498         if (cm_id->route.addr.dst_addr.ss_family &&
499             nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
500                     sizeof(cm_id->route.addr.dst_addr),
501                     &cm_id->route.addr.dst_addr))
502                 goto err;
503
504         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
505                 goto err;
506
507         if (fill_res_name_pid(msg, res))
508                 goto err;
509
510         if (fill_res_entry(dev, msg, res))
511                 goto err;
512
513         return 0;
514
515 err: return -EMSGSIZE;
516 }
517
518 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
519                              struct rdma_restrack_entry *res, uint32_t port)
520 {
521         struct ib_cq *cq = container_of(res, struct ib_cq, res);
522         struct ib_device *dev = cq->device;
523
524         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
525                 goto err;
526         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
527                               atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
528                 goto err;
529
530         /* Poll context is only valid for kernel CQs */
531         if (rdma_is_kernel_res(res) &&
532             nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
533                 goto err;
534
535         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
536                 goto err;
537         if (!rdma_is_kernel_res(res) &&
538             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
539                         cq->uobject->context->res.id))
540                 goto err;
541
542         if (fill_res_name_pid(msg, res))
543                 goto err;
544
545         if (fill_res_entry(dev, msg, res))
546                 goto err;
547
548         return 0;
549
550 err:    return -EMSGSIZE;
551 }
552
553 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
554                              struct rdma_restrack_entry *res, uint32_t port)
555 {
556         struct ib_mr *mr = container_of(res, struct ib_mr, res);
557         struct ib_device *dev = mr->pd->device;
558
559         if (has_cap_net_admin) {
560                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
561                         goto err;
562                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
563                         goto err;
564         }
565
566         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
567                               RDMA_NLDEV_ATTR_PAD))
568                 goto err;
569
570         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
571                 goto err;
572
573         if (!rdma_is_kernel_res(res) &&
574             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
575                 goto err;
576
577         if (fill_res_name_pid(msg, res))
578                 goto err;
579
580         if (fill_res_entry(dev, msg, res))
581                 goto err;
582
583         return 0;
584
585 err:    return -EMSGSIZE;
586 }
587
588 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
589                              struct rdma_restrack_entry *res, uint32_t port)
590 {
591         struct ib_pd *pd = container_of(res, struct ib_pd, res);
592         struct ib_device *dev = pd->device;
593
594         if (has_cap_net_admin) {
595                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
596                                 pd->local_dma_lkey))
597                         goto err;
598                 if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
599                     nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
600                                 pd->unsafe_global_rkey))
601                         goto err;
602         }
603         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
604                               atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
605                 goto err;
606
607         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
608                 goto err;
609
610         if (!rdma_is_kernel_res(res) &&
611             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
612                         pd->uobject->context->res.id))
613                 goto err;
614
615         if (fill_res_name_pid(msg, res))
616                 goto err;
617
618         if (fill_res_entry(dev, msg, res))
619                 goto err;
620
621         return 0;
622
623 err:    return -EMSGSIZE;
624 }
625
626 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
627                           struct netlink_ext_ack *extack)
628 {
629         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
630         struct ib_device *device;
631         struct sk_buff *msg;
632         u32 index;
633         int err;
634
635         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
636                                      nldev_policy, extack);
637         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
638                 return -EINVAL;
639
640         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
641
642         device = ib_device_get_by_index(sock_net(skb->sk), index);
643         if (!device)
644                 return -EINVAL;
645
646         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
647         if (!msg) {
648                 err = -ENOMEM;
649                 goto err;
650         }
651
652         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
653                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
654                         0, 0);
655
656         err = fill_dev_info(msg, device);
657         if (err)
658                 goto err_free;
659
660         nlmsg_end(msg, nlh);
661
662         ib_device_put(device);
663         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
664
665 err_free:
666         nlmsg_free(msg);
667 err:
668         ib_device_put(device);
669         return err;
670 }
671
672 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
673                           struct netlink_ext_ack *extack)
674 {
675         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
676         struct ib_device *device;
677         u32 index;
678         int err;
679
680         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
681                                      nldev_policy, extack);
682         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
683                 return -EINVAL;
684
685         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
686         device = ib_device_get_by_index(sock_net(skb->sk), index);
687         if (!device)
688                 return -EINVAL;
689
690         if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
691                 char name[IB_DEVICE_NAME_MAX] = {};
692
693                 nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
694                             IB_DEVICE_NAME_MAX);
695                 err = ib_device_rename(device, name);
696                 goto done;
697         }
698
699         if (tb[RDMA_NLDEV_NET_NS_FD]) {
700                 u32 ns_fd;
701
702                 ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
703                 err = ib_device_set_netns_put(skb, device, ns_fd);
704                 goto put_done;
705         }
706
707 done:
708         ib_device_put(device);
709 put_done:
710         return err;
711 }
712
713 static int _nldev_get_dumpit(struct ib_device *device,
714                              struct sk_buff *skb,
715                              struct netlink_callback *cb,
716                              unsigned int idx)
717 {
718         int start = cb->args[0];
719         struct nlmsghdr *nlh;
720
721         if (idx < start)
722                 return 0;
723
724         nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
725                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
726                         0, NLM_F_MULTI);
727
728         if (fill_dev_info(skb, device)) {
729                 nlmsg_cancel(skb, nlh);
730                 goto out;
731         }
732
733         nlmsg_end(skb, nlh);
734
735         idx++;
736
737 out:    cb->args[0] = idx;
738         return skb->len;
739 }
740
741 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
742 {
743         /*
744          * There is no need to take lock, because
745          * we are relying on ib_core's locking.
746          */
747         return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
748 }
749
750 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
751                                struct netlink_ext_ack *extack)
752 {
753         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
754         struct ib_device *device;
755         struct sk_buff *msg;
756         u32 index;
757         u32 port;
758         int err;
759
760         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
761                                      nldev_policy, extack);
762         if (err ||
763             !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
764             !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
765                 return -EINVAL;
766
767         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
768         device = ib_device_get_by_index(sock_net(skb->sk), index);
769         if (!device)
770                 return -EINVAL;
771
772         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
773         if (!rdma_is_port_valid(device, port)) {
774                 err = -EINVAL;
775                 goto err;
776         }
777
778         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
779         if (!msg) {
780                 err = -ENOMEM;
781                 goto err;
782         }
783
784         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
785                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
786                         0, 0);
787
788         err = fill_port_info(msg, device, port, sock_net(skb->sk));
789         if (err)
790                 goto err_free;
791
792         nlmsg_end(msg, nlh);
793         ib_device_put(device);
794
795         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
796
797 err_free:
798         nlmsg_free(msg);
799 err:
800         ib_device_put(device);
801         return err;
802 }
803
804 static int nldev_port_get_dumpit(struct sk_buff *skb,
805                                  struct netlink_callback *cb)
806 {
807         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
808         struct ib_device *device;
809         int start = cb->args[0];
810         struct nlmsghdr *nlh;
811         u32 idx = 0;
812         u32 ifindex;
813         int err;
814         unsigned int p;
815
816         err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
817                                      nldev_policy, NULL);
818         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
819                 return -EINVAL;
820
821         ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
822         device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
823         if (!device)
824                 return -EINVAL;
825
826         rdma_for_each_port (device, p) {
827                 /*
828                  * The dumpit function returns all information from specific
829                  * index. This specific index is taken from the netlink
830                  * messages request sent by user and it is available
831                  * in cb->args[0].
832                  *
833                  * Usually, the user doesn't fill this field and it causes
834                  * to return everything.
835                  *
836                  */
837                 if (idx < start) {
838                         idx++;
839                         continue;
840                 }
841
842                 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
843                                 cb->nlh->nlmsg_seq,
844                                 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
845                                                  RDMA_NLDEV_CMD_PORT_GET),
846                                 0, NLM_F_MULTI);
847
848                 if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
849                         nlmsg_cancel(skb, nlh);
850                         goto out;
851                 }
852                 idx++;
853                 nlmsg_end(skb, nlh);
854         }
855
856 out:
857         ib_device_put(device);
858         cb->args[0] = idx;
859         return skb->len;
860 }
861
862 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
863                               struct netlink_ext_ack *extack)
864 {
865         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
866         struct ib_device *device;
867         struct sk_buff *msg;
868         u32 index;
869         int ret;
870
871         ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
872                                      nldev_policy, extack);
873         if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
874                 return -EINVAL;
875
876         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
877         device = ib_device_get_by_index(sock_net(skb->sk), index);
878         if (!device)
879                 return -EINVAL;
880
881         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
882         if (!msg) {
883                 ret = -ENOMEM;
884                 goto err;
885         }
886
887         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
888                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
889                         0, 0);
890
891         ret = fill_res_info(msg, device);
892         if (ret)
893                 goto err_free;
894
895         nlmsg_end(msg, nlh);
896         ib_device_put(device);
897         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
898
899 err_free:
900         nlmsg_free(msg);
901 err:
902         ib_device_put(device);
903         return ret;
904 }
905
906 static int _nldev_res_get_dumpit(struct ib_device *device,
907                                  struct sk_buff *skb,
908                                  struct netlink_callback *cb,
909                                  unsigned int idx)
910 {
911         int start = cb->args[0];
912         struct nlmsghdr *nlh;
913
914         if (idx < start)
915                 return 0;
916
917         nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
918                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
919                         0, NLM_F_MULTI);
920
921         if (fill_res_info(skb, device)) {
922                 nlmsg_cancel(skb, nlh);
923                 goto out;
924         }
925         nlmsg_end(skb, nlh);
926
927         idx++;
928
929 out:
930         cb->args[0] = idx;
931         return skb->len;
932 }
933
934 static int nldev_res_get_dumpit(struct sk_buff *skb,
935                                 struct netlink_callback *cb)
936 {
937         return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
938 }
939
940 struct nldev_fill_res_entry {
941         int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin,
942                              struct rdma_restrack_entry *res, u32 port);
943         enum rdma_nldev_attr nldev_attr;
944         enum rdma_nldev_command nldev_cmd;
945         u8 flags;
946         u32 entry;
947         u32 id;
948 };
949
950 enum nldev_res_flags {
951         NLDEV_PER_DEV = 1 << 0,
952 };
953
954 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
955         [RDMA_RESTRACK_QP] = {
956                 .fill_res_func = fill_res_qp_entry,
957                 .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
958                 .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
959                 .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
960                 .id = RDMA_NLDEV_ATTR_RES_LQPN,
961         },
962         [RDMA_RESTRACK_CM_ID] = {
963                 .fill_res_func = fill_res_cm_id_entry,
964                 .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
965                 .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
966                 .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
967                 .id = RDMA_NLDEV_ATTR_RES_CM_IDN,
968         },
969         [RDMA_RESTRACK_CQ] = {
970                 .fill_res_func = fill_res_cq_entry,
971                 .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
972                 .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
973                 .flags = NLDEV_PER_DEV,
974                 .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
975                 .id = RDMA_NLDEV_ATTR_RES_CQN,
976         },
977         [RDMA_RESTRACK_MR] = {
978                 .fill_res_func = fill_res_mr_entry,
979                 .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
980                 .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
981                 .flags = NLDEV_PER_DEV,
982                 .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
983                 .id = RDMA_NLDEV_ATTR_RES_MRN,
984         },
985         [RDMA_RESTRACK_PD] = {
986                 .fill_res_func = fill_res_pd_entry,
987                 .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
988                 .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
989                 .flags = NLDEV_PER_DEV,
990                 .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
991                 .id = RDMA_NLDEV_ATTR_RES_PDN,
992         },
993 };
994
995 static bool is_visible_in_pid_ns(struct rdma_restrack_entry *res)
996 {
997         /*
998          * 1. Kern resources should be visible in init name space only
999          * 2. Present only resources visible in the current namespace
1000          */
1001         if (rdma_is_kernel_res(res))
1002                 return task_active_pid_ns(current) == &init_pid_ns;
1003         return task_active_pid_ns(current) == task_active_pid_ns(res->task);
1004 }
1005
1006 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1007                                struct netlink_ext_ack *extack,
1008                                enum rdma_restrack_type res_type)
1009 {
1010         const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1011         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1012         struct rdma_restrack_entry *res;
1013         struct ib_device *device;
1014         u32 index, id, port = 0;
1015         bool has_cap_net_admin;
1016         struct sk_buff *msg;
1017         int ret;
1018
1019         ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1020                                      nldev_policy, extack);
1021         if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1022                 return -EINVAL;
1023
1024         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1025         device = ib_device_get_by_index(sock_net(skb->sk), index);
1026         if (!device)
1027                 return -EINVAL;
1028
1029         if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1030                 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1031                 if (!rdma_is_port_valid(device, port)) {
1032                         ret = -EINVAL;
1033                         goto err;
1034                 }
1035         }
1036
1037         if ((port && fe->flags & NLDEV_PER_DEV) ||
1038             (!port && ~fe->flags & NLDEV_PER_DEV)) {
1039                 ret = -EINVAL;
1040                 goto err;
1041         }
1042
1043         id = nla_get_u32(tb[fe->id]);
1044         res = rdma_restrack_get_byid(device, res_type, id);
1045         if (IS_ERR(res)) {
1046                 ret = PTR_ERR(res);
1047                 goto err;
1048         }
1049
1050         if (!is_visible_in_pid_ns(res)) {
1051                 ret = -ENOENT;
1052                 goto err_get;
1053         }
1054
1055         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1056         if (!msg) {
1057                 ret = -ENOMEM;
1058                 goto err;
1059         }
1060
1061         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1062                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1063                         0, 0);
1064
1065         if (fill_nldev_handle(msg, device)) {
1066                 ret = -EMSGSIZE;
1067                 goto err_free;
1068         }
1069
1070         has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1071         ret = fe->fill_res_func(msg, has_cap_net_admin, res, port);
1072         rdma_restrack_put(res);
1073         if (ret)
1074                 goto err_free;
1075
1076         nlmsg_end(msg, nlh);
1077         ib_device_put(device);
1078         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1079
1080 err_free:
1081         nlmsg_free(msg);
1082 err_get:
1083         rdma_restrack_put(res);
1084 err:
1085         ib_device_put(device);
1086         return ret;
1087 }
1088
1089 static int res_get_common_dumpit(struct sk_buff *skb,
1090                                  struct netlink_callback *cb,
1091                                  enum rdma_restrack_type res_type)
1092 {
1093         const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1094         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1095         struct rdma_restrack_entry *res;
1096         struct rdma_restrack_root *rt;
1097         int err, ret = 0, idx = 0;
1098         struct nlattr *table_attr;
1099         struct nlattr *entry_attr;
1100         struct ib_device *device;
1101         int start = cb->args[0];
1102         bool has_cap_net_admin;
1103         struct nlmsghdr *nlh;
1104         unsigned long id;
1105         u32 index, port = 0;
1106         bool filled = false;
1107
1108         err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1109                                      nldev_policy, NULL);
1110         /*
1111          * Right now, we are expecting the device index to get res information,
1112          * but it is possible to extend this code to return all devices in
1113          * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1114          * if it doesn't exist, we will iterate over all devices.
1115          *
1116          * But it is not needed for now.
1117          */
1118         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1119                 return -EINVAL;
1120
1121         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1122         device = ib_device_get_by_index(sock_net(skb->sk), index);
1123         if (!device)
1124                 return -EINVAL;
1125
1126         /*
1127          * If no PORT_INDEX is supplied, we will return all QPs from that device
1128          */
1129         if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1130                 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1131                 if (!rdma_is_port_valid(device, port)) {
1132                         ret = -EINVAL;
1133                         goto err_index;
1134                 }
1135         }
1136
1137         nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1138                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1139                         0, NLM_F_MULTI);
1140
1141         if (fill_nldev_handle(skb, device)) {
1142                 ret = -EMSGSIZE;
1143                 goto err;
1144         }
1145
1146         table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1147         if (!table_attr) {
1148                 ret = -EMSGSIZE;
1149                 goto err;
1150         }
1151
1152         has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1153
1154         rt = &device->res[res_type];
1155         xa_lock(&rt->xa);
1156         /*
1157          * FIXME: if the skip ahead is something common this loop should
1158          * use xas_for_each & xas_pause to optimize, we can have a lot of
1159          * objects.
1160          */
1161         xa_for_each(&rt->xa, id, res) {
1162                 if (!is_visible_in_pid_ns(res))
1163                         continue;
1164
1165                 if (idx < start || !rdma_restrack_get(res))
1166                         goto next;
1167
1168                 xa_unlock(&rt->xa);
1169
1170                 filled = true;
1171
1172                 entry_attr = nla_nest_start_noflag(skb, fe->entry);
1173                 if (!entry_attr) {
1174                         ret = -EMSGSIZE;
1175                         rdma_restrack_put(res);
1176                         goto msg_full;
1177                 }
1178
1179                 ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
1180                 rdma_restrack_put(res);
1181
1182                 if (ret) {
1183                         nla_nest_cancel(skb, entry_attr);
1184                         if (ret == -EMSGSIZE)
1185                                 goto msg_full;
1186                         if (ret == -EAGAIN)
1187                                 goto again;
1188                         goto res_err;
1189                 }
1190                 nla_nest_end(skb, entry_attr);
1191 again:          xa_lock(&rt->xa);
1192 next:           idx++;
1193         }
1194         xa_unlock(&rt->xa);
1195
1196 msg_full:
1197         nla_nest_end(skb, table_attr);
1198         nlmsg_end(skb, nlh);
1199         cb->args[0] = idx;
1200
1201         /*
1202          * No more entries to fill, cancel the message and
1203          * return 0 to mark end of dumpit.
1204          */
1205         if (!filled)
1206                 goto err;
1207
1208         ib_device_put(device);
1209         return skb->len;
1210
1211 res_err:
1212         nla_nest_cancel(skb, table_attr);
1213
1214 err:
1215         nlmsg_cancel(skb, nlh);
1216
1217 err_index:
1218         ib_device_put(device);
1219         return ret;
1220 }
1221
1222 #define RES_GET_FUNCS(name, type)                                              \
1223         static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1224                                                  struct netlink_callback *cb)  \
1225         {                                                                      \
1226                 return res_get_common_dumpit(skb, cb, type);                   \
1227         }                                                                      \
1228         static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1229                                                struct nlmsghdr *nlh,           \
1230                                                struct netlink_ext_ack *extack) \
1231         {                                                                      \
1232                 return res_get_common_doit(skb, nlh, extack, type);            \
1233         }
1234
1235 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1236 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1237 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1238 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1239 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1240
1241 static LIST_HEAD(link_ops);
1242 static DECLARE_RWSEM(link_ops_rwsem);
1243
1244 static const struct rdma_link_ops *link_ops_get(const char *type)
1245 {
1246         const struct rdma_link_ops *ops;
1247
1248         list_for_each_entry(ops, &link_ops, list) {
1249                 if (!strcmp(ops->type, type))
1250                         goto out;
1251         }
1252         ops = NULL;
1253 out:
1254         return ops;
1255 }
1256
1257 void rdma_link_register(struct rdma_link_ops *ops)
1258 {
1259         down_write(&link_ops_rwsem);
1260         if (WARN_ON_ONCE(link_ops_get(ops->type)))
1261                 goto out;
1262         list_add(&ops->list, &link_ops);
1263 out:
1264         up_write(&link_ops_rwsem);
1265 }
1266 EXPORT_SYMBOL(rdma_link_register);
1267
1268 void rdma_link_unregister(struct rdma_link_ops *ops)
1269 {
1270         down_write(&link_ops_rwsem);
1271         list_del(&ops->list);
1272         up_write(&link_ops_rwsem);
1273 }
1274 EXPORT_SYMBOL(rdma_link_unregister);
1275
1276 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1277                           struct netlink_ext_ack *extack)
1278 {
1279         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1280         char ibdev_name[IB_DEVICE_NAME_MAX];
1281         const struct rdma_link_ops *ops;
1282         char ndev_name[IFNAMSIZ];
1283         struct net_device *ndev;
1284         char type[IFNAMSIZ];
1285         int err;
1286
1287         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1288                                      nldev_policy, extack);
1289         if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1290             !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1291                 return -EINVAL;
1292
1293         nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1294                     sizeof(ibdev_name));
1295         if (strchr(ibdev_name, '%'))
1296                 return -EINVAL;
1297
1298         nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1299         nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1300                     sizeof(ndev_name));
1301
1302         ndev = dev_get_by_name(&init_net, ndev_name);
1303         if (!ndev)
1304                 return -ENODEV;
1305
1306         down_read(&link_ops_rwsem);
1307         ops = link_ops_get(type);
1308 #ifdef CONFIG_MODULES
1309         if (!ops) {
1310                 up_read(&link_ops_rwsem);
1311                 request_module("rdma-link-%s", type);
1312                 down_read(&link_ops_rwsem);
1313                 ops = link_ops_get(type);
1314         }
1315 #endif
1316         err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1317         up_read(&link_ops_rwsem);
1318         dev_put(ndev);
1319
1320         return err;
1321 }
1322
1323 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1324                           struct netlink_ext_ack *extack)
1325 {
1326         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1327         struct ib_device *device;
1328         u32 index;
1329         int err;
1330
1331         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1332                                      nldev_policy, extack);
1333         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1334                 return -EINVAL;
1335
1336         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1337         device = ib_device_get_by_index(sock_net(skb->sk), index);
1338         if (!device)
1339                 return -EINVAL;
1340
1341         if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
1342                 ib_device_put(device);
1343                 return -EINVAL;
1344         }
1345
1346         ib_unregister_device_and_put(device);
1347         return 0;
1348 }
1349
1350 static int nldev_get_sys_get_dumpit(struct sk_buff *skb,
1351                                     struct netlink_callback *cb)
1352 {
1353         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1354         struct nlmsghdr *nlh;
1355         int err;
1356
1357         err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1358                           nldev_policy, NULL);
1359         if (err)
1360                 return err;
1361
1362         nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1363                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1364                                          RDMA_NLDEV_CMD_SYS_GET),
1365                         0, 0);
1366
1367         err = nla_put_u8(skb, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1368                          (u8)ib_devices_shared_netns);
1369         if (err) {
1370                 nlmsg_cancel(skb, nlh);
1371                 return err;
1372         }
1373
1374         nlmsg_end(skb, nlh);
1375         return skb->len;
1376 }
1377
1378 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1379                                   struct netlink_ext_ack *extack)
1380 {
1381         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1382         u8 enable;
1383         int err;
1384
1385         err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1386                           nldev_policy, extack);
1387         if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
1388                 return -EINVAL;
1389
1390         enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1391         /* Only 0 and 1 are supported */
1392         if (enable > 1)
1393                 return -EINVAL;
1394
1395         err = rdma_compatdev_set(enable);
1396         return err;
1397 }
1398
1399 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
1400         [RDMA_NLDEV_CMD_GET] = {
1401                 .doit = nldev_get_doit,
1402                 .dump = nldev_get_dumpit,
1403         },
1404         [RDMA_NLDEV_CMD_SET] = {
1405                 .doit = nldev_set_doit,
1406                 .flags = RDMA_NL_ADMIN_PERM,
1407         },
1408         [RDMA_NLDEV_CMD_NEWLINK] = {
1409                 .doit = nldev_newlink,
1410                 .flags = RDMA_NL_ADMIN_PERM,
1411         },
1412         [RDMA_NLDEV_CMD_DELLINK] = {
1413                 .doit = nldev_dellink,
1414                 .flags = RDMA_NL_ADMIN_PERM,
1415         },
1416         [RDMA_NLDEV_CMD_PORT_GET] = {
1417                 .doit = nldev_port_get_doit,
1418                 .dump = nldev_port_get_dumpit,
1419         },
1420         [RDMA_NLDEV_CMD_RES_GET] = {
1421                 .doit = nldev_res_get_doit,
1422                 .dump = nldev_res_get_dumpit,
1423         },
1424         [RDMA_NLDEV_CMD_RES_QP_GET] = {
1425                 .doit = nldev_res_get_qp_doit,
1426                 .dump = nldev_res_get_qp_dumpit,
1427         },
1428         [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
1429                 .doit = nldev_res_get_cm_id_doit,
1430                 .dump = nldev_res_get_cm_id_dumpit,
1431         },
1432         [RDMA_NLDEV_CMD_RES_CQ_GET] = {
1433                 .doit = nldev_res_get_cq_doit,
1434                 .dump = nldev_res_get_cq_dumpit,
1435         },
1436         [RDMA_NLDEV_CMD_RES_MR_GET] = {
1437                 .doit = nldev_res_get_mr_doit,
1438                 .dump = nldev_res_get_mr_dumpit,
1439         },
1440         [RDMA_NLDEV_CMD_RES_PD_GET] = {
1441                 .doit = nldev_res_get_pd_doit,
1442                 .dump = nldev_res_get_pd_dumpit,
1443         },
1444         [RDMA_NLDEV_CMD_SYS_GET] = {
1445                 .dump = nldev_get_sys_get_dumpit,
1446         },
1447         [RDMA_NLDEV_CMD_SYS_SET] = {
1448                 .doit = nldev_set_sys_set_doit,
1449                 .flags = RDMA_NL_ADMIN_PERM,
1450         },
1451 };
1452
1453 void __init nldev_init(void)
1454 {
1455         rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
1456 }
1457
1458 void __exit nldev_exit(void)
1459 {
1460         rdma_nl_unregister(RDMA_NL_NLDEV);
1461 }
1462
1463 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);