Merge tag '9p-for-5.9-rc1' of git://github.com/martinetd/linux
[linux-2.6-microblaze.git] / drivers / infiniband / hw / mlx5 / devx.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
4  */
5
6 #include <rdma/ib_user_verbs.h>
7 #include <rdma/ib_verbs.h>
8 #include <rdma/uverbs_types.h>
9 #include <rdma/uverbs_ioctl.h>
10 #include <rdma/mlx5_user_ioctl_cmds.h>
11 #include <rdma/mlx5_user_ioctl_verbs.h>
12 #include <rdma/ib_umem.h>
13 #include <rdma/uverbs_std_types.h>
14 #include <linux/mlx5/driver.h>
15 #include <linux/mlx5/fs.h>
16 #include "mlx5_ib.h"
17 #include "devx.h"
18 #include "qp.h"
19 #include <linux/xarray.h>
20
21 #define UVERBS_MODULE_NAME mlx5_ib
22 #include <rdma/uverbs_named_ioctl.h>
23
24 static void dispatch_event_fd(struct list_head *fd_list, const void *data);
25
26 enum devx_obj_flags {
27         DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
28         DEVX_OBJ_FLAGS_DCT = 1 << 1,
29         DEVX_OBJ_FLAGS_CQ = 1 << 2,
30 };
31
32 struct devx_async_data {
33         struct mlx5_ib_dev *mdev;
34         struct list_head list;
35         struct devx_async_cmd_event_file *ev_file;
36         struct mlx5_async_work cb_work;
37         u16 cmd_out_len;
38         /* must be last field in this structure */
39         struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
40 };
41
42 struct devx_async_event_data {
43         struct list_head list; /* headed in ev_file->event_list */
44         struct mlx5_ib_uapi_devx_async_event_hdr hdr;
45 };
46
47 /* first level XA value data structure */
48 struct devx_event {
49         struct xarray object_ids; /* second XA level, Key = object id */
50         struct list_head unaffiliated_list;
51 };
52
53 /* second level XA value data structure */
54 struct devx_obj_event {
55         struct rcu_head rcu;
56         struct list_head obj_sub_list;
57 };
58
59 struct devx_event_subscription {
60         struct list_head file_list; /* headed in ev_file->
61                                      * subscribed_events_list
62                                      */
63         struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
64                                    * devx_obj_event->obj_sub_list
65                                    */
66         struct list_head obj_list; /* headed in devx_object */
67         struct list_head event_list; /* headed in ev_file->event_list or in
68                                       * temp list via subscription
69                                       */
70
71         u8 is_cleaned:1;
72         u32 xa_key_level1;
73         u32 xa_key_level2;
74         struct rcu_head rcu;
75         u64 cookie;
76         struct devx_async_event_file *ev_file;
77         struct eventfd_ctx *eventfd;
78 };
79
80 struct devx_async_event_file {
81         struct ib_uobject uobj;
82         /* Head of events that are subscribed to this FD */
83         struct list_head subscribed_events_list;
84         spinlock_t lock;
85         wait_queue_head_t poll_wait;
86         struct list_head event_list;
87         struct mlx5_ib_dev *dev;
88         u8 omit_data:1;
89         u8 is_overflow_err:1;
90         u8 is_destroyed:1;
91 };
92
93 struct devx_umem {
94         struct mlx5_core_dev            *mdev;
95         struct ib_umem                  *umem;
96         u32                             page_offset;
97         int                             page_shift;
98         int                             ncont;
99         u32                             dinlen;
100         u32                             dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
101 };
102
103 struct devx_umem_reg_cmd {
104         void                            *in;
105         u32                             inlen;
106         u32                             out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
107 };
108
109 static struct mlx5_ib_ucontext *
110 devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
111 {
112         return to_mucontext(ib_uverbs_get_ucontext(attrs));
113 }
114
115 int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
116 {
117         u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
118         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
119         void *uctx;
120         int err;
121         u16 uid;
122         u32 cap = 0;
123
124         /* 0 means not supported */
125         if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx))
126                 return -EINVAL;
127
128         uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
129         if (is_user && capable(CAP_NET_RAW) &&
130             (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
131                 cap |= MLX5_UCTX_CAP_RAW_TX;
132         if (is_user && capable(CAP_SYS_RAWIO) &&
133             (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
134              MLX5_UCTX_CAP_INTERNAL_DEV_RES))
135                 cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
136
137         MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
138         MLX5_SET(uctx, uctx, cap, cap);
139
140         err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
141         if (err)
142                 return err;
143
144         uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
145         return uid;
146 }
147
148 void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
149 {
150         u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0};
151         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
152
153         MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
154         MLX5_SET(destroy_uctx_in, in, uid, uid);
155
156         mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
157 }
158
159 static bool is_legacy_unaffiliated_event_num(u16 event_num)
160 {
161         switch (event_num) {
162         case MLX5_EVENT_TYPE_PORT_CHANGE:
163                 return true;
164         default:
165                 return false;
166         }
167 }
168
169 static bool is_legacy_obj_event_num(u16 event_num)
170 {
171         switch (event_num) {
172         case MLX5_EVENT_TYPE_PATH_MIG:
173         case MLX5_EVENT_TYPE_COMM_EST:
174         case MLX5_EVENT_TYPE_SQ_DRAINED:
175         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
176         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
177         case MLX5_EVENT_TYPE_CQ_ERROR:
178         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
179         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
180         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
181         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
182         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
183         case MLX5_EVENT_TYPE_DCT_DRAINED:
184         case MLX5_EVENT_TYPE_COMP:
185         case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
186         case MLX5_EVENT_TYPE_XRQ_ERROR:
187                 return true;
188         default:
189                 return false;
190         }
191 }
192
193 static u16 get_legacy_obj_type(u16 opcode)
194 {
195         switch (opcode) {
196         case MLX5_CMD_OP_CREATE_RQ:
197                 return MLX5_EVENT_QUEUE_TYPE_RQ;
198         case MLX5_CMD_OP_CREATE_QP:
199                 return MLX5_EVENT_QUEUE_TYPE_QP;
200         case MLX5_CMD_OP_CREATE_SQ:
201                 return MLX5_EVENT_QUEUE_TYPE_SQ;
202         case MLX5_CMD_OP_CREATE_DCT:
203                 return MLX5_EVENT_QUEUE_TYPE_DCT;
204         default:
205                 return 0;
206         }
207 }
208
209 static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
210 {
211         u16 opcode;
212
213         opcode = (obj->obj_id >> 32) & 0xffff;
214
215         if (is_legacy_obj_event_num(event_num))
216                 return get_legacy_obj_type(opcode);
217
218         switch (opcode) {
219         case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
220                 return (obj->obj_id >> 48);
221         case MLX5_CMD_OP_CREATE_RQ:
222                 return MLX5_OBJ_TYPE_RQ;
223         case MLX5_CMD_OP_CREATE_QP:
224                 return MLX5_OBJ_TYPE_QP;
225         case MLX5_CMD_OP_CREATE_SQ:
226                 return MLX5_OBJ_TYPE_SQ;
227         case MLX5_CMD_OP_CREATE_DCT:
228                 return MLX5_OBJ_TYPE_DCT;
229         case MLX5_CMD_OP_CREATE_TIR:
230                 return MLX5_OBJ_TYPE_TIR;
231         case MLX5_CMD_OP_CREATE_TIS:
232                 return MLX5_OBJ_TYPE_TIS;
233         case MLX5_CMD_OP_CREATE_PSV:
234                 return MLX5_OBJ_TYPE_PSV;
235         case MLX5_OBJ_TYPE_MKEY:
236                 return MLX5_OBJ_TYPE_MKEY;
237         case MLX5_CMD_OP_CREATE_RMP:
238                 return MLX5_OBJ_TYPE_RMP;
239         case MLX5_CMD_OP_CREATE_XRC_SRQ:
240                 return MLX5_OBJ_TYPE_XRC_SRQ;
241         case MLX5_CMD_OP_CREATE_XRQ:
242                 return MLX5_OBJ_TYPE_XRQ;
243         case MLX5_CMD_OP_CREATE_RQT:
244                 return MLX5_OBJ_TYPE_RQT;
245         case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
246                 return MLX5_OBJ_TYPE_FLOW_COUNTER;
247         case MLX5_CMD_OP_CREATE_CQ:
248                 return MLX5_OBJ_TYPE_CQ;
249         default:
250                 return 0;
251         }
252 }
253
254 static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
255 {
256         switch (event_type) {
257         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
258         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
259         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
260         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
261         case MLX5_EVENT_TYPE_PATH_MIG:
262         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
263         case MLX5_EVENT_TYPE_COMM_EST:
264         case MLX5_EVENT_TYPE_SQ_DRAINED:
265         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
266         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
267                 return eqe->data.qp_srq.type;
268         case MLX5_EVENT_TYPE_CQ_ERROR:
269         case MLX5_EVENT_TYPE_XRQ_ERROR:
270                 return 0;
271         case MLX5_EVENT_TYPE_DCT_DRAINED:
272         case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
273                 return MLX5_EVENT_QUEUE_TYPE_DCT;
274         default:
275                 return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
276         }
277 }
278
279 static u32 get_dec_obj_id(u64 obj_id)
280 {
281         return (obj_id & 0xffffffff);
282 }
283
284 /*
285  * As the obj_id in the firmware is not globally unique the object type
286  * must be considered upon checking for a valid object id.
287  * For that the opcode of the creator command is encoded as part of the obj_id.
288  */
289 static u64 get_enc_obj_id(u32 opcode, u32 obj_id)
290 {
291         return ((u64)opcode << 32) | obj_id;
292 }
293
294 static u64 devx_get_obj_id(const void *in)
295 {
296         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
297         u64 obj_id;
298
299         switch (opcode) {
300         case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
301         case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
302                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJECT |
303                                         MLX5_GET(general_obj_in_cmd_hdr, in,
304                                                  obj_type) << 16,
305                                         MLX5_GET(general_obj_in_cmd_hdr, in,
306                                                  obj_id));
307                 break;
308         case MLX5_CMD_OP_QUERY_MKEY:
309                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY,
310                                         MLX5_GET(query_mkey_in, in,
311                                                  mkey_index));
312                 break;
313         case MLX5_CMD_OP_QUERY_CQ:
314                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
315                                         MLX5_GET(query_cq_in, in, cqn));
316                 break;
317         case MLX5_CMD_OP_MODIFY_CQ:
318                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
319                                         MLX5_GET(modify_cq_in, in, cqn));
320                 break;
321         case MLX5_CMD_OP_QUERY_SQ:
322                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
323                                         MLX5_GET(query_sq_in, in, sqn));
324                 break;
325         case MLX5_CMD_OP_MODIFY_SQ:
326                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
327                                         MLX5_GET(modify_sq_in, in, sqn));
328                 break;
329         case MLX5_CMD_OP_QUERY_RQ:
330                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
331                                         MLX5_GET(query_rq_in, in, rqn));
332                 break;
333         case MLX5_CMD_OP_MODIFY_RQ:
334                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
335                                         MLX5_GET(modify_rq_in, in, rqn));
336                 break;
337         case MLX5_CMD_OP_QUERY_RMP:
338                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
339                                         MLX5_GET(query_rmp_in, in, rmpn));
340                 break;
341         case MLX5_CMD_OP_MODIFY_RMP:
342                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
343                                         MLX5_GET(modify_rmp_in, in, rmpn));
344                 break;
345         case MLX5_CMD_OP_QUERY_RQT:
346                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
347                                         MLX5_GET(query_rqt_in, in, rqtn));
348                 break;
349         case MLX5_CMD_OP_MODIFY_RQT:
350                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
351                                         MLX5_GET(modify_rqt_in, in, rqtn));
352                 break;
353         case MLX5_CMD_OP_QUERY_TIR:
354                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
355                                         MLX5_GET(query_tir_in, in, tirn));
356                 break;
357         case MLX5_CMD_OP_MODIFY_TIR:
358                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
359                                         MLX5_GET(modify_tir_in, in, tirn));
360                 break;
361         case MLX5_CMD_OP_QUERY_TIS:
362                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
363                                         MLX5_GET(query_tis_in, in, tisn));
364                 break;
365         case MLX5_CMD_OP_MODIFY_TIS:
366                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
367                                         MLX5_GET(modify_tis_in, in, tisn));
368                 break;
369         case MLX5_CMD_OP_QUERY_FLOW_TABLE:
370                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
371                                         MLX5_GET(query_flow_table_in, in,
372                                                  table_id));
373                 break;
374         case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
375                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
376                                         MLX5_GET(modify_flow_table_in, in,
377                                                  table_id));
378                 break;
379         case MLX5_CMD_OP_QUERY_FLOW_GROUP:
380                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP,
381                                         MLX5_GET(query_flow_group_in, in,
382                                                  group_id));
383                 break;
384         case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
385                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
386                                         MLX5_GET(query_fte_in, in,
387                                                  flow_index));
388                 break;
389         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
390                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
391                                         MLX5_GET(set_fte_in, in, flow_index));
392                 break;
393         case MLX5_CMD_OP_QUERY_Q_COUNTER:
394                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER,
395                                         MLX5_GET(query_q_counter_in, in,
396                                                  counter_set_id));
397                 break;
398         case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
399                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER,
400                                         MLX5_GET(query_flow_counter_in, in,
401                                                  flow_counter_id));
402                 break;
403         case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
404                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
405                                         MLX5_GET(general_obj_in_cmd_hdr, in,
406                                                  obj_id));
407                 break;
408         case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
409                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
410                                         MLX5_GET(query_scheduling_element_in,
411                                                  in, scheduling_element_id));
412                 break;
413         case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
414                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
415                                         MLX5_GET(modify_scheduling_element_in,
416                                                  in, scheduling_element_id));
417                 break;
418         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
419                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT,
420                                         MLX5_GET(add_vxlan_udp_dport_in, in,
421                                                  vxlan_udp_port));
422                 break;
423         case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
424                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
425                                         MLX5_GET(query_l2_table_entry_in, in,
426                                                  table_index));
427                 break;
428         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
429                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
430                                         MLX5_GET(set_l2_table_entry_in, in,
431                                                  table_index));
432                 break;
433         case MLX5_CMD_OP_QUERY_QP:
434                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
435                                         MLX5_GET(query_qp_in, in, qpn));
436                 break;
437         case MLX5_CMD_OP_RST2INIT_QP:
438                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
439                                         MLX5_GET(rst2init_qp_in, in, qpn));
440                 break;
441         case MLX5_CMD_OP_INIT2INIT_QP:
442                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
443                                         MLX5_GET(init2init_qp_in, in, qpn));
444                 break;
445         case MLX5_CMD_OP_INIT2RTR_QP:
446                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
447                                         MLX5_GET(init2rtr_qp_in, in, qpn));
448                 break;
449         case MLX5_CMD_OP_RTR2RTS_QP:
450                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
451                                         MLX5_GET(rtr2rts_qp_in, in, qpn));
452                 break;
453         case MLX5_CMD_OP_RTS2RTS_QP:
454                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
455                                         MLX5_GET(rts2rts_qp_in, in, qpn));
456                 break;
457         case MLX5_CMD_OP_SQERR2RTS_QP:
458                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
459                                         MLX5_GET(sqerr2rts_qp_in, in, qpn));
460                 break;
461         case MLX5_CMD_OP_2ERR_QP:
462                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
463                                         MLX5_GET(qp_2err_in, in, qpn));
464                 break;
465         case MLX5_CMD_OP_2RST_QP:
466                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
467                                         MLX5_GET(qp_2rst_in, in, qpn));
468                 break;
469         case MLX5_CMD_OP_QUERY_DCT:
470                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
471                                         MLX5_GET(query_dct_in, in, dctn));
472                 break;
473         case MLX5_CMD_OP_QUERY_XRQ:
474         case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
475         case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
476                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
477                                         MLX5_GET(query_xrq_in, in, xrqn));
478                 break;
479         case MLX5_CMD_OP_QUERY_XRC_SRQ:
480                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
481                                         MLX5_GET(query_xrc_srq_in, in,
482                                                  xrc_srqn));
483                 break;
484         case MLX5_CMD_OP_ARM_XRC_SRQ:
485                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
486                                         MLX5_GET(arm_xrc_srq_in, in, xrc_srqn));
487                 break;
488         case MLX5_CMD_OP_QUERY_SRQ:
489                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ,
490                                         MLX5_GET(query_srq_in, in, srqn));
491                 break;
492         case MLX5_CMD_OP_ARM_RQ:
493                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
494                                         MLX5_GET(arm_rq_in, in, srq_number));
495                 break;
496         case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
497                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
498                                         MLX5_GET(drain_dct_in, in, dctn));
499                 break;
500         case MLX5_CMD_OP_ARM_XRQ:
501         case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
502         case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
503         case MLX5_CMD_OP_MODIFY_XRQ:
504                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
505                                         MLX5_GET(arm_xrq_in, in, xrqn));
506                 break;
507         case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
508                 obj_id = get_enc_obj_id
509                                 (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT,
510                                  MLX5_GET(query_packet_reformat_context_in,
511                                           in, packet_reformat_id));
512                 break;
513         default:
514                 obj_id = 0;
515         }
516
517         return obj_id;
518 }
519
520 static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
521                                  struct ib_uobject *uobj, const void *in)
522 {
523         struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
524         u64 obj_id = devx_get_obj_id(in);
525
526         if (!obj_id)
527                 return false;
528
529         switch (uobj_get_object_id(uobj)) {
530         case UVERBS_OBJECT_CQ:
531                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
532                                       to_mcq(uobj->object)->mcq.cqn) ==
533                                       obj_id;
534
535         case UVERBS_OBJECT_SRQ:
536         {
537                 struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq);
538                 u16 opcode;
539
540                 switch (srq->common.res) {
541                 case MLX5_RES_XSRQ:
542                         opcode = MLX5_CMD_OP_CREATE_XRC_SRQ;
543                         break;
544                 case MLX5_RES_XRQ:
545                         opcode = MLX5_CMD_OP_CREATE_XRQ;
546                         break;
547                 default:
548                         if (!dev->mdev->issi)
549                                 opcode = MLX5_CMD_OP_CREATE_SRQ;
550                         else
551                                 opcode = MLX5_CMD_OP_CREATE_RMP;
552                 }
553
554                 return get_enc_obj_id(opcode,
555                                       to_msrq(uobj->object)->msrq.srqn) ==
556                                       obj_id;
557         }
558
559         case UVERBS_OBJECT_QP:
560         {
561                 struct mlx5_ib_qp *qp = to_mqp(uobj->object);
562                 enum ib_qp_type qp_type = qp->ibqp.qp_type;
563
564                 if (qp_type == IB_QPT_RAW_PACKET ||
565                     (qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
566                         struct mlx5_ib_raw_packet_qp *raw_packet_qp =
567                                                          &qp->raw_packet_qp;
568                         struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
569                         struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
570
571                         return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
572                                                rq->base.mqp.qpn) == obj_id ||
573                                 get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
574                                                sq->base.mqp.qpn) == obj_id ||
575                                 get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
576                                                rq->tirn) == obj_id ||
577                                 get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
578                                                sq->tisn) == obj_id);
579                 }
580
581                 if (qp_type == MLX5_IB_QPT_DCT)
582                         return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
583                                               qp->dct.mdct.mqp.qpn) == obj_id;
584
585                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
586                                       qp->ibqp.qp_num) == obj_id;
587         }
588
589         case UVERBS_OBJECT_WQ:
590                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
591                                       to_mrwq(uobj->object)->core_qp.qpn) ==
592                                       obj_id;
593
594         case UVERBS_OBJECT_RWQ_IND_TBL:
595                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
596                                       to_mrwq_ind_table(uobj->object)->rqtn) ==
597                                       obj_id;
598
599         case MLX5_IB_OBJECT_DEVX_OBJ:
600                 return ((struct devx_obj *)uobj->object)->obj_id == obj_id;
601
602         default:
603                 return false;
604         }
605 }
606
607 static void devx_set_umem_valid(const void *in)
608 {
609         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
610
611         switch (opcode) {
612         case MLX5_CMD_OP_CREATE_MKEY:
613                 MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
614                 break;
615         case MLX5_CMD_OP_CREATE_CQ:
616         {
617                 void *cqc;
618
619                 MLX5_SET(create_cq_in, in, cq_umem_valid, 1);
620                 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
621                 MLX5_SET(cqc, cqc, dbr_umem_valid, 1);
622                 break;
623         }
624         case MLX5_CMD_OP_CREATE_QP:
625         {
626                 void *qpc;
627
628                 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
629                 MLX5_SET(qpc, qpc, dbr_umem_valid, 1);
630                 MLX5_SET(create_qp_in, in, wq_umem_valid, 1);
631                 break;
632         }
633
634         case MLX5_CMD_OP_CREATE_RQ:
635         {
636                 void *rqc, *wq;
637
638                 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
639                 wq  = MLX5_ADDR_OF(rqc, rqc, wq);
640                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
641                 MLX5_SET(wq, wq, wq_umem_valid, 1);
642                 break;
643         }
644
645         case MLX5_CMD_OP_CREATE_SQ:
646         {
647                 void *sqc, *wq;
648
649                 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
650                 wq = MLX5_ADDR_OF(sqc, sqc, wq);
651                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
652                 MLX5_SET(wq, wq, wq_umem_valid, 1);
653                 break;
654         }
655
656         case MLX5_CMD_OP_MODIFY_CQ:
657                 MLX5_SET(modify_cq_in, in, cq_umem_valid, 1);
658                 break;
659
660         case MLX5_CMD_OP_CREATE_RMP:
661         {
662                 void *rmpc, *wq;
663
664                 rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx);
665                 wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
666                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
667                 MLX5_SET(wq, wq, wq_umem_valid, 1);
668                 break;
669         }
670
671         case MLX5_CMD_OP_CREATE_XRQ:
672         {
673                 void *xrqc, *wq;
674
675                 xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context);
676                 wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
677                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
678                 MLX5_SET(wq, wq, wq_umem_valid, 1);
679                 break;
680         }
681
682         case MLX5_CMD_OP_CREATE_XRC_SRQ:
683         {
684                 void *xrc_srqc;
685
686                 MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1);
687                 xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in,
688                                         xrc_srq_context_entry);
689                 MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1);
690                 break;
691         }
692
693         default:
694                 return;
695         }
696 }
697
698 static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
699 {
700         *opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
701
702         switch (*opcode) {
703         case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
704         case MLX5_CMD_OP_CREATE_MKEY:
705         case MLX5_CMD_OP_CREATE_CQ:
706         case MLX5_CMD_OP_ALLOC_PD:
707         case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
708         case MLX5_CMD_OP_CREATE_RMP:
709         case MLX5_CMD_OP_CREATE_SQ:
710         case MLX5_CMD_OP_CREATE_RQ:
711         case MLX5_CMD_OP_CREATE_RQT:
712         case MLX5_CMD_OP_CREATE_TIR:
713         case MLX5_CMD_OP_CREATE_TIS:
714         case MLX5_CMD_OP_ALLOC_Q_COUNTER:
715         case MLX5_CMD_OP_CREATE_FLOW_TABLE:
716         case MLX5_CMD_OP_CREATE_FLOW_GROUP:
717         case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
718         case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
719         case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
720         case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
721         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
722         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
723         case MLX5_CMD_OP_CREATE_QP:
724         case MLX5_CMD_OP_CREATE_SRQ:
725         case MLX5_CMD_OP_CREATE_XRC_SRQ:
726         case MLX5_CMD_OP_CREATE_DCT:
727         case MLX5_CMD_OP_CREATE_XRQ:
728         case MLX5_CMD_OP_ATTACH_TO_MCG:
729         case MLX5_CMD_OP_ALLOC_XRCD:
730                 return true;
731         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
732         {
733                 u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
734                 if (op_mod == 0)
735                         return true;
736                 return false;
737         }
738         case MLX5_CMD_OP_CREATE_PSV:
739         {
740                 u8 num_psv = MLX5_GET(create_psv_in, in, num_psv);
741
742                 if (num_psv == 1)
743                         return true;
744                 return false;
745         }
746         default:
747                 return false;
748         }
749 }
750
751 static bool devx_is_obj_modify_cmd(const void *in)
752 {
753         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
754
755         switch (opcode) {
756         case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
757         case MLX5_CMD_OP_MODIFY_CQ:
758         case MLX5_CMD_OP_MODIFY_RMP:
759         case MLX5_CMD_OP_MODIFY_SQ:
760         case MLX5_CMD_OP_MODIFY_RQ:
761         case MLX5_CMD_OP_MODIFY_RQT:
762         case MLX5_CMD_OP_MODIFY_TIR:
763         case MLX5_CMD_OP_MODIFY_TIS:
764         case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
765         case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
766         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
767         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
768         case MLX5_CMD_OP_RST2INIT_QP:
769         case MLX5_CMD_OP_INIT2RTR_QP:
770         case MLX5_CMD_OP_INIT2INIT_QP:
771         case MLX5_CMD_OP_RTR2RTS_QP:
772         case MLX5_CMD_OP_RTS2RTS_QP:
773         case MLX5_CMD_OP_SQERR2RTS_QP:
774         case MLX5_CMD_OP_2ERR_QP:
775         case MLX5_CMD_OP_2RST_QP:
776         case MLX5_CMD_OP_ARM_XRC_SRQ:
777         case MLX5_CMD_OP_ARM_RQ:
778         case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
779         case MLX5_CMD_OP_ARM_XRQ:
780         case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
781         case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
782         case MLX5_CMD_OP_MODIFY_XRQ:
783                 return true;
784         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
785         {
786                 u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
787
788                 if (op_mod == 1)
789                         return true;
790                 return false;
791         }
792         default:
793                 return false;
794         }
795 }
796
797 static bool devx_is_obj_query_cmd(const void *in)
798 {
799         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
800
801         switch (opcode) {
802         case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
803         case MLX5_CMD_OP_QUERY_MKEY:
804         case MLX5_CMD_OP_QUERY_CQ:
805         case MLX5_CMD_OP_QUERY_RMP:
806         case MLX5_CMD_OP_QUERY_SQ:
807         case MLX5_CMD_OP_QUERY_RQ:
808         case MLX5_CMD_OP_QUERY_RQT:
809         case MLX5_CMD_OP_QUERY_TIR:
810         case MLX5_CMD_OP_QUERY_TIS:
811         case MLX5_CMD_OP_QUERY_Q_COUNTER:
812         case MLX5_CMD_OP_QUERY_FLOW_TABLE:
813         case MLX5_CMD_OP_QUERY_FLOW_GROUP:
814         case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
815         case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
816         case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
817         case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
818         case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
819         case MLX5_CMD_OP_QUERY_QP:
820         case MLX5_CMD_OP_QUERY_SRQ:
821         case MLX5_CMD_OP_QUERY_XRC_SRQ:
822         case MLX5_CMD_OP_QUERY_DCT:
823         case MLX5_CMD_OP_QUERY_XRQ:
824         case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
825         case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
826         case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
827                 return true;
828         default:
829                 return false;
830         }
831 }
832
833 static bool devx_is_whitelist_cmd(void *in)
834 {
835         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
836
837         switch (opcode) {
838         case MLX5_CMD_OP_QUERY_HCA_CAP:
839         case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
840         case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
841                 return true;
842         default:
843                 return false;
844         }
845 }
846
847 static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
848 {
849         if (devx_is_whitelist_cmd(cmd_in)) {
850                 struct mlx5_ib_dev *dev;
851
852                 if (c->devx_uid)
853                         return c->devx_uid;
854
855                 dev = to_mdev(c->ibucontext.device);
856                 if (dev->devx_whitelist_uid)
857                         return dev->devx_whitelist_uid;
858
859                 return -EOPNOTSUPP;
860         }
861
862         if (!c->devx_uid)
863                 return -EINVAL;
864
865         return c->devx_uid;
866 }
867
868 static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
869 {
870         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
871
872         /* Pass all cmds for vhca_tunnel as general, tracking is done in FW */
873         if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) &&
874              MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) ||
875             (opcode >= MLX5_CMD_OP_GENERAL_START &&
876              opcode < MLX5_CMD_OP_GENERAL_END))
877                 return true;
878
879         switch (opcode) {
880         case MLX5_CMD_OP_QUERY_HCA_CAP:
881         case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
882         case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
883         case MLX5_CMD_OP_QUERY_VPORT_STATE:
884         case MLX5_CMD_OP_QUERY_ADAPTER:
885         case MLX5_CMD_OP_QUERY_ISSI:
886         case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
887         case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
888         case MLX5_CMD_OP_QUERY_VNIC_ENV:
889         case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
890         case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
891         case MLX5_CMD_OP_NOP:
892         case MLX5_CMD_OP_QUERY_CONG_STATUS:
893         case MLX5_CMD_OP_QUERY_CONG_PARAMS:
894         case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
895         case MLX5_CMD_OP_QUERY_LAG:
896                 return true;
897         default:
898                 return false;
899         }
900 }
901
902 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
903         struct uverbs_attr_bundle *attrs)
904 {
905         struct mlx5_ib_ucontext *c;
906         struct mlx5_ib_dev *dev;
907         int user_vector;
908         int dev_eqn;
909         unsigned int irqn;
910         int err;
911
912         if (uverbs_copy_from(&user_vector, attrs,
913                              MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
914                 return -EFAULT;
915
916         c = devx_ufile2uctx(attrs);
917         if (IS_ERR(c))
918                 return PTR_ERR(c);
919         dev = to_mdev(c->ibucontext.device);
920
921         err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn);
922         if (err < 0)
923                 return err;
924
925         if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
926                            &dev_eqn, sizeof(dev_eqn)))
927                 return -EFAULT;
928
929         return 0;
930 }
931
932 /*
933  *Security note:
934  * The hardware protection mechanism works like this: Each device object that
935  * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
936  * the device specification manual) upon its creation. Then upon doorbell,
937  * hardware fetches the object context for which the doorbell was rang, and
938  * validates that the UAR through which the DB was rang matches the UAR ID
939  * of the object.
940  * If no match the doorbell is silently ignored by the hardware. Of course,
941  * the user cannot ring a doorbell on a UAR that was not mapped to it.
942  * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
943  * mailboxes (except tagging them with UID), we expose to the user its UAR
944  * ID, so it can embed it in these objects in the expected specification
945  * format. So the only thing the user can do is hurt itself by creating a
946  * QP/SQ/CQ with a UAR ID other than his, and then in this case other users
947  * may ring a doorbell on its objects.
948  * The consequence of that will be that another user can schedule a QP/SQ
949  * of the buggy user for execution (just insert it to the hardware schedule
950  * queue or arm its CQ for event generation), no further harm is expected.
951  */
952 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
953         struct uverbs_attr_bundle *attrs)
954 {
955         struct mlx5_ib_ucontext *c;
956         struct mlx5_ib_dev *dev;
957         u32 user_idx;
958         s32 dev_idx;
959
960         c = devx_ufile2uctx(attrs);
961         if (IS_ERR(c))
962                 return PTR_ERR(c);
963         dev = to_mdev(c->ibucontext.device);
964
965         if (uverbs_copy_from(&user_idx, attrs,
966                              MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
967                 return -EFAULT;
968
969         dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true);
970         if (dev_idx < 0)
971                 return dev_idx;
972
973         if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
974                            &dev_idx, sizeof(dev_idx)))
975                 return -EFAULT;
976
977         return 0;
978 }
979
980 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
981         struct uverbs_attr_bundle *attrs)
982 {
983         struct mlx5_ib_ucontext *c;
984         struct mlx5_ib_dev *dev;
985         void *cmd_in = uverbs_attr_get_alloced_ptr(
986                 attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
987         int cmd_out_len = uverbs_attr_get_len(attrs,
988                                         MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
989         void *cmd_out;
990         int err;
991         int uid;
992
993         c = devx_ufile2uctx(attrs);
994         if (IS_ERR(c))
995                 return PTR_ERR(c);
996         dev = to_mdev(c->ibucontext.device);
997
998         uid = devx_get_uid(c, cmd_in);
999         if (uid < 0)
1000                 return uid;
1001
1002         /* Only white list of some general HCA commands are allowed for this method. */
1003         if (!devx_is_general_cmd(cmd_in, dev))
1004                 return -EINVAL;
1005
1006         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1007         if (IS_ERR(cmd_out))
1008                 return PTR_ERR(cmd_out);
1009
1010         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1011         err = mlx5_cmd_exec(dev->mdev, cmd_in,
1012                             uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
1013                             cmd_out, cmd_out_len);
1014         if (err)
1015                 return err;
1016
1017         return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
1018                               cmd_out_len);
1019 }
1020
1021 static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
1022                                        u32 *dinlen,
1023                                        u32 *obj_id)
1024 {
1025         u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
1026         u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
1027
1028         *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1029         *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
1030
1031         MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
1032         MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
1033
1034         switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
1035         case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
1036                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1037                 MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
1038                 break;
1039
1040         case MLX5_CMD_OP_CREATE_UMEM:
1041                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1042                          MLX5_CMD_OP_DESTROY_UMEM);
1043                 break;
1044         case MLX5_CMD_OP_CREATE_MKEY:
1045                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
1046                 break;
1047         case MLX5_CMD_OP_CREATE_CQ:
1048                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
1049                 break;
1050         case MLX5_CMD_OP_ALLOC_PD:
1051                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
1052                 break;
1053         case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
1054                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1055                          MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
1056                 break;
1057         case MLX5_CMD_OP_CREATE_RMP:
1058                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
1059                 break;
1060         case MLX5_CMD_OP_CREATE_SQ:
1061                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
1062                 break;
1063         case MLX5_CMD_OP_CREATE_RQ:
1064                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
1065                 break;
1066         case MLX5_CMD_OP_CREATE_RQT:
1067                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
1068                 break;
1069         case MLX5_CMD_OP_CREATE_TIR:
1070                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
1071                 break;
1072         case MLX5_CMD_OP_CREATE_TIS:
1073                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
1074                 break;
1075         case MLX5_CMD_OP_ALLOC_Q_COUNTER:
1076                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1077                          MLX5_CMD_OP_DEALLOC_Q_COUNTER);
1078                 break;
1079         case MLX5_CMD_OP_CREATE_FLOW_TABLE:
1080                 *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
1081                 *obj_id = MLX5_GET(create_flow_table_out, out, table_id);
1082                 MLX5_SET(destroy_flow_table_in, din, other_vport,
1083                          MLX5_GET(create_flow_table_in,  in, other_vport));
1084                 MLX5_SET(destroy_flow_table_in, din, vport_number,
1085                          MLX5_GET(create_flow_table_in,  in, vport_number));
1086                 MLX5_SET(destroy_flow_table_in, din, table_type,
1087                          MLX5_GET(create_flow_table_in,  in, table_type));
1088                 MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
1089                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1090                          MLX5_CMD_OP_DESTROY_FLOW_TABLE);
1091                 break;
1092         case MLX5_CMD_OP_CREATE_FLOW_GROUP:
1093                 *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
1094                 *obj_id = MLX5_GET(create_flow_group_out, out, group_id);
1095                 MLX5_SET(destroy_flow_group_in, din, other_vport,
1096                          MLX5_GET(create_flow_group_in, in, other_vport));
1097                 MLX5_SET(destroy_flow_group_in, din, vport_number,
1098                          MLX5_GET(create_flow_group_in, in, vport_number));
1099                 MLX5_SET(destroy_flow_group_in, din, table_type,
1100                          MLX5_GET(create_flow_group_in, in, table_type));
1101                 MLX5_SET(destroy_flow_group_in, din, table_id,
1102                          MLX5_GET(create_flow_group_in, in, table_id));
1103                 MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
1104                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1105                          MLX5_CMD_OP_DESTROY_FLOW_GROUP);
1106                 break;
1107         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
1108                 *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
1109                 *obj_id = MLX5_GET(set_fte_in, in, flow_index);
1110                 MLX5_SET(delete_fte_in, din, other_vport,
1111                          MLX5_GET(set_fte_in,  in, other_vport));
1112                 MLX5_SET(delete_fte_in, din, vport_number,
1113                          MLX5_GET(set_fte_in, in, vport_number));
1114                 MLX5_SET(delete_fte_in, din, table_type,
1115                          MLX5_GET(set_fte_in, in, table_type));
1116                 MLX5_SET(delete_fte_in, din, table_id,
1117                          MLX5_GET(set_fte_in, in, table_id));
1118                 MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
1119                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1120                          MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
1121                 break;
1122         case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
1123                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1124                          MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
1125                 break;
1126         case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
1127                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1128                          MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
1129                 break;
1130         case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
1131                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1132                          MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
1133                 break;
1134         case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
1135                 *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
1136                 *obj_id = MLX5_GET(create_scheduling_element_out, out,
1137                                    scheduling_element_id);
1138                 MLX5_SET(destroy_scheduling_element_in, din,
1139                          scheduling_hierarchy,
1140                          MLX5_GET(create_scheduling_element_in, in,
1141                                   scheduling_hierarchy));
1142                 MLX5_SET(destroy_scheduling_element_in, din,
1143                          scheduling_element_id, *obj_id);
1144                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1145                          MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
1146                 break;
1147         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
1148                 *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
1149                 *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
1150                 MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
1151                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1152                          MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
1153                 break;
1154         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
1155                 *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
1156                 *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
1157                 MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
1158                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1159                          MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
1160                 break;
1161         case MLX5_CMD_OP_CREATE_QP:
1162                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
1163                 break;
1164         case MLX5_CMD_OP_CREATE_SRQ:
1165                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
1166                 break;
1167         case MLX5_CMD_OP_CREATE_XRC_SRQ:
1168                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1169                          MLX5_CMD_OP_DESTROY_XRC_SRQ);
1170                 break;
1171         case MLX5_CMD_OP_CREATE_DCT:
1172                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
1173                 break;
1174         case MLX5_CMD_OP_CREATE_XRQ:
1175                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
1176                 break;
1177         case MLX5_CMD_OP_ATTACH_TO_MCG:
1178                 *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
1179                 MLX5_SET(detach_from_mcg_in, din, qpn,
1180                          MLX5_GET(attach_to_mcg_in, in, qpn));
1181                 memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
1182                        MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
1183                        MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
1184                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
1185                 break;
1186         case MLX5_CMD_OP_ALLOC_XRCD:
1187                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
1188                 break;
1189         case MLX5_CMD_OP_CREATE_PSV:
1190                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1191                          MLX5_CMD_OP_DESTROY_PSV);
1192                 MLX5_SET(destroy_psv_in, din, psvn,
1193                          MLX5_GET(create_psv_out, out, psv0_index));
1194                 break;
1195         default:
1196                 /* The entry must match to one of the devx_is_obj_create_cmd */
1197                 WARN_ON(true);
1198                 break;
1199         }
1200 }
1201
1202 static int devx_handle_mkey_indirect(struct devx_obj *obj,
1203                                      struct mlx5_ib_dev *dev,
1204                                      void *in, void *out)
1205 {
1206         struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
1207         struct mlx5_core_mkey *mkey;
1208         void *mkc;
1209         u8 key;
1210
1211         mkey = &devx_mr->mmkey;
1212         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1213         key = MLX5_GET(mkc, mkc, mkey_7_0);
1214         mkey->key = mlx5_idx_to_mkey(
1215                         MLX5_GET(create_mkey_out, out, mkey_index)) | key;
1216         mkey->type = MLX5_MKEY_INDIRECT_DEVX;
1217         mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
1218         mkey->size = MLX5_GET64(mkc, mkc, len);
1219         mkey->pd = MLX5_GET(mkc, mkc, pd);
1220         devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
1221
1222         return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey,
1223                                GFP_KERNEL));
1224 }
1225
1226 static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
1227                                    struct devx_obj *obj,
1228                                    void *in, int in_len)
1229 {
1230         int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) +
1231                         MLX5_FLD_SZ_BYTES(create_mkey_in,
1232                         memory_key_mkey_entry);
1233         void *mkc;
1234         u8 access_mode;
1235
1236         if (in_len < min_len)
1237                 return -EINVAL;
1238
1239         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1240
1241         access_mode = MLX5_GET(mkc, mkc, access_mode_1_0);
1242         access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
1243
1244         if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS ||
1245                 access_mode == MLX5_MKC_ACCESS_MODE_KSM) {
1246                 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
1247                         obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY;
1248                 return 0;
1249         }
1250
1251         MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
1252         return 0;
1253 }
1254
1255 static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
1256                                       struct devx_event_subscription *sub)
1257 {
1258         struct devx_event *event;
1259         struct devx_obj_event *xa_val_level2;
1260
1261         if (sub->is_cleaned)
1262                 return;
1263
1264         sub->is_cleaned = 1;
1265         list_del_rcu(&sub->xa_list);
1266
1267         if (list_empty(&sub->obj_list))
1268                 return;
1269
1270         list_del_rcu(&sub->obj_list);
1271         /* check whether key level 1 for this obj_sub_list is empty */
1272         event = xa_load(&dev->devx_event_table.event_xa,
1273                         sub->xa_key_level1);
1274         WARN_ON(!event);
1275
1276         xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2);
1277         if (list_empty(&xa_val_level2->obj_sub_list)) {
1278                 xa_erase(&event->object_ids,
1279                          sub->xa_key_level2);
1280                 kfree_rcu(xa_val_level2, rcu);
1281         }
1282 }
1283
1284 static int devx_obj_cleanup(struct ib_uobject *uobject,
1285                             enum rdma_remove_reason why,
1286                             struct uverbs_attr_bundle *attrs)
1287 {
1288         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
1289         struct mlx5_devx_event_table *devx_event_table;
1290         struct devx_obj *obj = uobject->object;
1291         struct devx_event_subscription *sub_entry, *tmp;
1292         struct mlx5_ib_dev *dev;
1293         int ret;
1294
1295         dev = mlx5_udata_to_mdev(&attrs->driver_udata);
1296         if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
1297                 /*
1298                  * The pagefault_single_data_segment() does commands against
1299                  * the mmkey, we must wait for that to stop before freeing the
1300                  * mkey, as another allocation could get the same mkey #.
1301                  */
1302                 xa_erase(&obj->ib_dev->odp_mkeys,
1303                          mlx5_base_mkey(obj->devx_mr.mmkey.key));
1304                 synchronize_srcu(&dev->odp_srcu);
1305         }
1306
1307         if (obj->flags & DEVX_OBJ_FLAGS_DCT)
1308                 ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
1309         else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
1310                 ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
1311         else
1312                 ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
1313                                     obj->dinlen, out, sizeof(out));
1314         if (ib_is_destroy_retryable(ret, why, uobject))
1315                 return ret;
1316
1317         devx_event_table = &dev->devx_event_table;
1318
1319         mutex_lock(&devx_event_table->event_xa_lock);
1320         list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list)
1321                 devx_cleanup_subscription(dev, sub_entry);
1322         mutex_unlock(&devx_event_table->event_xa_lock);
1323
1324         kfree(obj);
1325         return ret;
1326 }
1327
1328 static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
1329 {
1330         struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq);
1331         struct mlx5_devx_event_table *table;
1332         struct devx_event *event;
1333         struct devx_obj_event *obj_event;
1334         u32 obj_id = mcq->cqn;
1335
1336         table = &obj->ib_dev->devx_event_table;
1337         rcu_read_lock();
1338         event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP);
1339         if (!event)
1340                 goto out;
1341
1342         obj_event = xa_load(&event->object_ids, obj_id);
1343         if (!obj_event)
1344                 goto out;
1345
1346         dispatch_event_fd(&obj_event->obj_sub_list, eqe);
1347 out:
1348         rcu_read_unlock();
1349 }
1350
1351 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
1352         struct uverbs_attr_bundle *attrs)
1353 {
1354         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
1355         int cmd_out_len =  uverbs_attr_get_len(attrs,
1356                                         MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
1357         int cmd_in_len = uverbs_attr_get_len(attrs,
1358                                         MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
1359         void *cmd_out;
1360         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1361                 attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
1362         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1363                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1364         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1365         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
1366         struct devx_obj *obj;
1367         u16 obj_type = 0;
1368         int err;
1369         int uid;
1370         u32 obj_id;
1371         u16 opcode;
1372
1373         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1374                 return -EINVAL;
1375
1376         uid = devx_get_uid(c, cmd_in);
1377         if (uid < 0)
1378                 return uid;
1379
1380         if (!devx_is_obj_create_cmd(cmd_in, &opcode))
1381                 return -EINVAL;
1382
1383         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1384         if (IS_ERR(cmd_out))
1385                 return PTR_ERR(cmd_out);
1386
1387         obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL);
1388         if (!obj)
1389                 return -ENOMEM;
1390
1391         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1392         if (opcode == MLX5_CMD_OP_CREATE_MKEY) {
1393                 err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len);
1394                 if (err)
1395                         goto obj_free;
1396         } else {
1397                 devx_set_umem_valid(cmd_in);
1398         }
1399
1400         if (opcode == MLX5_CMD_OP_CREATE_DCT) {
1401                 obj->flags |= DEVX_OBJ_FLAGS_DCT;
1402                 err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
1403                                            cmd_in_len, cmd_out, cmd_out_len);
1404         } else if (opcode == MLX5_CMD_OP_CREATE_CQ) {
1405                 obj->flags |= DEVX_OBJ_FLAGS_CQ;
1406                 obj->core_cq.comp = devx_cq_comp;
1407                 err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
1408                                           cmd_in, cmd_in_len, cmd_out,
1409                                           cmd_out_len);
1410         } else {
1411                 err = mlx5_cmd_exec(dev->mdev, cmd_in,
1412                                     cmd_in_len,
1413                                     cmd_out, cmd_out_len);
1414         }
1415
1416         if (err)
1417                 goto obj_free;
1418
1419         if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) {
1420                 u8 bulk = MLX5_GET(alloc_flow_counter_in,
1421                                    cmd_in,
1422                                    flow_counter_bulk);
1423                 obj->flow_counter_bulk_size = 128UL * bulk;
1424         }
1425
1426         uobj->object = obj;
1427         INIT_LIST_HEAD(&obj->event_sub);
1428         obj->ib_dev = dev;
1429         devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
1430                                    &obj_id);
1431         WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
1432
1433         err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
1434         if (err)
1435                 goto obj_destroy;
1436
1437         if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT)
1438                 obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
1439         obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
1440
1441         if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
1442                 err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
1443                 if (err)
1444                         goto obj_destroy;
1445         }
1446         return 0;
1447
1448 obj_destroy:
1449         if (obj->flags & DEVX_OBJ_FLAGS_DCT)
1450                 mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
1451         else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
1452                 mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
1453         else
1454                 mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out,
1455                               sizeof(out));
1456 obj_free:
1457         kfree(obj);
1458         return err;
1459 }
1460
1461 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
1462         struct uverbs_attr_bundle *attrs)
1463 {
1464         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
1465         int cmd_out_len = uverbs_attr_get_len(attrs,
1466                                         MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
1467         struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
1468                                                           MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
1469         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1470                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1471         struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1472         void *cmd_out;
1473         int err;
1474         int uid;
1475
1476         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1477                 return -EINVAL;
1478
1479         uid = devx_get_uid(c, cmd_in);
1480         if (uid < 0)
1481                 return uid;
1482
1483         if (!devx_is_obj_modify_cmd(cmd_in))
1484                 return -EINVAL;
1485
1486         if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1487                 return -EINVAL;
1488
1489         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1490         if (IS_ERR(cmd_out))
1491                 return PTR_ERR(cmd_out);
1492
1493         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1494         devx_set_umem_valid(cmd_in);
1495
1496         err = mlx5_cmd_exec(mdev->mdev, cmd_in,
1497                             uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
1498                             cmd_out, cmd_out_len);
1499         if (err)
1500                 return err;
1501
1502         return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
1503                               cmd_out, cmd_out_len);
1504 }
1505
1506 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
1507         struct uverbs_attr_bundle *attrs)
1508 {
1509         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
1510         int cmd_out_len = uverbs_attr_get_len(attrs,
1511                                               MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
1512         struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
1513                                                           MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
1514         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1515                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1516         void *cmd_out;
1517         int err;
1518         int uid;
1519         struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1520
1521         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1522                 return -EINVAL;
1523
1524         uid = devx_get_uid(c, cmd_in);
1525         if (uid < 0)
1526                 return uid;
1527
1528         if (!devx_is_obj_query_cmd(cmd_in))
1529                 return -EINVAL;
1530
1531         if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1532                 return -EINVAL;
1533
1534         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1535         if (IS_ERR(cmd_out))
1536                 return PTR_ERR(cmd_out);
1537
1538         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1539         err = mlx5_cmd_exec(mdev->mdev, cmd_in,
1540                             uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
1541                             cmd_out, cmd_out_len);
1542         if (err)
1543                 return err;
1544
1545         return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
1546                               cmd_out, cmd_out_len);
1547 }
1548
1549 struct devx_async_event_queue {
1550         spinlock_t              lock;
1551         wait_queue_head_t       poll_wait;
1552         struct list_head        event_list;
1553         atomic_t                bytes_in_use;
1554         u8                      is_destroyed:1;
1555 };
1556
1557 struct devx_async_cmd_event_file {
1558         struct ib_uobject               uobj;
1559         struct devx_async_event_queue   ev_queue;
1560         struct mlx5_async_ctx           async_ctx;
1561 };
1562
1563 static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
1564 {
1565         spin_lock_init(&ev_queue->lock);
1566         INIT_LIST_HEAD(&ev_queue->event_list);
1567         init_waitqueue_head(&ev_queue->poll_wait);
1568         atomic_set(&ev_queue->bytes_in_use, 0);
1569         ev_queue->is_destroyed = 0;
1570 }
1571
1572 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
1573         struct uverbs_attr_bundle *attrs)
1574 {
1575         struct devx_async_cmd_event_file *ev_file;
1576
1577         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1578                 attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE);
1579         struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
1580
1581         ev_file = container_of(uobj, struct devx_async_cmd_event_file,
1582                                uobj);
1583         devx_init_event_queue(&ev_file->ev_queue);
1584         mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx);
1585         return 0;
1586 }
1587
1588 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)(
1589         struct uverbs_attr_bundle *attrs)
1590 {
1591         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1592                 attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE);
1593         struct devx_async_event_file *ev_file;
1594         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1595                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1596         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1597         u32 flags;
1598         int err;
1599
1600         err = uverbs_get_flags32(&flags, attrs,
1601                 MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
1602                 MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA);
1603
1604         if (err)
1605                 return err;
1606
1607         ev_file = container_of(uobj, struct devx_async_event_file,
1608                                uobj);
1609         spin_lock_init(&ev_file->lock);
1610         INIT_LIST_HEAD(&ev_file->event_list);
1611         init_waitqueue_head(&ev_file->poll_wait);
1612         if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA)
1613                 ev_file->omit_data = 1;
1614         INIT_LIST_HEAD(&ev_file->subscribed_events_list);
1615         ev_file->dev = dev;
1616         get_device(&dev->ib_dev.dev);
1617         return 0;
1618 }
1619
1620 static void devx_query_callback(int status, struct mlx5_async_work *context)
1621 {
1622         struct devx_async_data *async_data =
1623                 container_of(context, struct devx_async_data, cb_work);
1624         struct devx_async_cmd_event_file *ev_file = async_data->ev_file;
1625         struct devx_async_event_queue *ev_queue = &ev_file->ev_queue;
1626         unsigned long flags;
1627
1628         /*
1629          * Note that if the struct devx_async_cmd_event_file uobj begins to be
1630          * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this
1631          * routine returns, ensuring that it always remains valid here.
1632          */
1633         spin_lock_irqsave(&ev_queue->lock, flags);
1634         list_add_tail(&async_data->list, &ev_queue->event_list);
1635         spin_unlock_irqrestore(&ev_queue->lock, flags);
1636
1637         wake_up_interruptible(&ev_queue->poll_wait);
1638 }
1639
1640 #define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
1641
1642 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
1643         struct uverbs_attr_bundle *attrs)
1644 {
1645         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs,
1646                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN);
1647         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1648                                 attrs,
1649                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE);
1650         u16 cmd_out_len;
1651         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1652                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1653         struct ib_uobject *fd_uobj;
1654         int err;
1655         int uid;
1656         struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1657         struct devx_async_cmd_event_file *ev_file;
1658         struct devx_async_data *async_data;
1659
1660         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1661                 return -EINVAL;
1662
1663         uid = devx_get_uid(c, cmd_in);
1664         if (uid < 0)
1665                 return uid;
1666
1667         if (!devx_is_obj_query_cmd(cmd_in))
1668                 return -EINVAL;
1669
1670         err = uverbs_get_const(&cmd_out_len, attrs,
1671                                MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN);
1672         if (err)
1673                 return err;
1674
1675         if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1676                 return -EINVAL;
1677
1678         fd_uobj = uverbs_attr_get_uobject(attrs,
1679                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD);
1680         if (IS_ERR(fd_uobj))
1681                 return PTR_ERR(fd_uobj);
1682
1683         ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
1684                                uobj);
1685
1686         if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) >
1687                         MAX_ASYNC_BYTES_IN_USE) {
1688                 atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
1689                 return -EAGAIN;
1690         }
1691
1692         async_data = kvzalloc(struct_size(async_data, hdr.out_data,
1693                                           cmd_out_len), GFP_KERNEL);
1694         if (!async_data) {
1695                 err = -ENOMEM;
1696                 goto sub_bytes;
1697         }
1698
1699         err = uverbs_copy_from(&async_data->hdr.wr_id, attrs,
1700                                MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID);
1701         if (err)
1702                 goto free_async;
1703
1704         async_data->cmd_out_len = cmd_out_len;
1705         async_data->mdev = mdev;
1706         async_data->ev_file = ev_file;
1707
1708         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1709         err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
1710                     uverbs_attr_get_len(attrs,
1711                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN),
1712                     async_data->hdr.out_data,
1713                     async_data->cmd_out_len,
1714                     devx_query_callback, &async_data->cb_work);
1715
1716         if (err)
1717                 goto free_async;
1718
1719         return 0;
1720
1721 free_async:
1722         kvfree(async_data);
1723 sub_bytes:
1724         atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
1725         return err;
1726 }
1727
1728 static void
1729 subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
1730                            u32 key_level1,
1731                            bool is_level2,
1732                            u32 key_level2)
1733 {
1734         struct devx_event *event;
1735         struct devx_obj_event *xa_val_level2;
1736
1737         /* Level 1 is valid for future use, no need to free */
1738         if (!is_level2)
1739                 return;
1740
1741         event = xa_load(&devx_event_table->event_xa, key_level1);
1742         WARN_ON(!event);
1743
1744         xa_val_level2 = xa_load(&event->object_ids,
1745                                 key_level2);
1746         if (list_empty(&xa_val_level2->obj_sub_list)) {
1747                 xa_erase(&event->object_ids,
1748                          key_level2);
1749                 kfree_rcu(xa_val_level2, rcu);
1750         }
1751 }
1752
1753 static int
1754 subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
1755                          u32 key_level1,
1756                          bool is_level2,
1757                          u32 key_level2)
1758 {
1759         struct devx_obj_event *obj_event;
1760         struct devx_event *event;
1761         int err;
1762
1763         event = xa_load(&devx_event_table->event_xa, key_level1);
1764         if (!event) {
1765                 event = kzalloc(sizeof(*event), GFP_KERNEL);
1766                 if (!event)
1767                         return -ENOMEM;
1768
1769                 INIT_LIST_HEAD(&event->unaffiliated_list);
1770                 xa_init(&event->object_ids);
1771
1772                 err = xa_insert(&devx_event_table->event_xa,
1773                                 key_level1,
1774                                 event,
1775                                 GFP_KERNEL);
1776                 if (err) {
1777                         kfree(event);
1778                         return err;
1779                 }
1780         }
1781
1782         if (!is_level2)
1783                 return 0;
1784
1785         obj_event = xa_load(&event->object_ids, key_level2);
1786         if (!obj_event) {
1787                 obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
1788                 if (!obj_event)
1789                         /* Level1 is valid for future use, no need to free */
1790                         return -ENOMEM;
1791
1792                 err = xa_insert(&event->object_ids,
1793                                 key_level2,
1794                                 obj_event,
1795                                 GFP_KERNEL);
1796                 if (err)
1797                         return err;
1798                 INIT_LIST_HEAD(&obj_event->obj_sub_list);
1799         }
1800
1801         return 0;
1802 }
1803
1804 static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list,
1805                                    struct devx_obj *obj)
1806 {
1807         int i;
1808
1809         for (i = 0; i < num_events; i++) {
1810                 if (obj) {
1811                         if (!is_legacy_obj_event_num(event_type_num_list[i]))
1812                                 return false;
1813                 } else if (!is_legacy_unaffiliated_event_num(
1814                                 event_type_num_list[i])) {
1815                         return false;
1816                 }
1817         }
1818
1819         return true;
1820 }
1821
1822 #define MAX_SUPP_EVENT_NUM 255
1823 static bool is_valid_events(struct mlx5_core_dev *dev,
1824                             int num_events, u16 *event_type_num_list,
1825                             struct devx_obj *obj)
1826 {
1827         __be64 *aff_events;
1828         __be64 *unaff_events;
1829         int mask_entry;
1830         int mask_bit;
1831         int i;
1832
1833         if (MLX5_CAP_GEN(dev, event_cap)) {
1834                 aff_events = MLX5_CAP_DEV_EVENT(dev,
1835                                                 user_affiliated_events);
1836                 unaff_events = MLX5_CAP_DEV_EVENT(dev,
1837                                                   user_unaffiliated_events);
1838         } else {
1839                 return is_valid_events_legacy(num_events, event_type_num_list,
1840                                               obj);
1841         }
1842
1843         for (i = 0; i < num_events; i++) {
1844                 if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM)
1845                         return false;
1846
1847                 mask_entry = event_type_num_list[i] / 64;
1848                 mask_bit = event_type_num_list[i] % 64;
1849
1850                 if (obj) {
1851                         /* CQ completion */
1852                         if (event_type_num_list[i] == 0)
1853                                 continue;
1854
1855                         if (!(be64_to_cpu(aff_events[mask_entry]) &
1856                                         (1ull << mask_bit)))
1857                                 return false;
1858
1859                         continue;
1860                 }
1861
1862                 if (!(be64_to_cpu(unaff_events[mask_entry]) &
1863                                 (1ull << mask_bit)))
1864                         return false;
1865         }
1866
1867         return true;
1868 }
1869
1870 #define MAX_NUM_EVENTS 16
1871 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
1872         struct uverbs_attr_bundle *attrs)
1873 {
1874         struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
1875                                 attrs,
1876                                 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
1877         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1878                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1879         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1880         struct ib_uobject *fd_uobj;
1881         struct devx_obj *obj = NULL;
1882         struct devx_async_event_file *ev_file;
1883         struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
1884         u16 *event_type_num_list;
1885         struct devx_event_subscription *event_sub, *tmp_sub;
1886         struct list_head sub_list;
1887         int redirect_fd;
1888         bool use_eventfd = false;
1889         int num_events;
1890         int num_alloc_xa_entries = 0;
1891         u16 obj_type = 0;
1892         u64 cookie = 0;
1893         u32 obj_id = 0;
1894         int err;
1895         int i;
1896
1897         if (!c->devx_uid)
1898                 return -EINVAL;
1899
1900         if (!IS_ERR(devx_uobj)) {
1901                 obj = (struct devx_obj *)devx_uobj->object;
1902                 if (obj)
1903                         obj_id = get_dec_obj_id(obj->obj_id);
1904         }
1905
1906         fd_uobj = uverbs_attr_get_uobject(attrs,
1907                                 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
1908         if (IS_ERR(fd_uobj))
1909                 return PTR_ERR(fd_uobj);
1910
1911         ev_file = container_of(fd_uobj, struct devx_async_event_file,
1912                                uobj);
1913
1914         if (uverbs_attr_is_valid(attrs,
1915                                  MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
1916                 err = uverbs_copy_from(&redirect_fd, attrs,
1917                                MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
1918                 if (err)
1919                         return err;
1920
1921                 use_eventfd = true;
1922         }
1923
1924         if (uverbs_attr_is_valid(attrs,
1925                                  MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
1926                 if (use_eventfd)
1927                         return -EINVAL;
1928
1929                 err = uverbs_copy_from(&cookie, attrs,
1930                                 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
1931                 if (err)
1932                         return err;
1933         }
1934
1935         num_events = uverbs_attr_ptr_get_array_size(
1936                 attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
1937                 sizeof(u16));
1938
1939         if (num_events < 0)
1940                 return num_events;
1941
1942         if (num_events > MAX_NUM_EVENTS)
1943                 return -EINVAL;
1944
1945         event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
1946                         MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
1947
1948         if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
1949                 return -EINVAL;
1950
1951         INIT_LIST_HEAD(&sub_list);
1952
1953         /* Protect from concurrent subscriptions to same XA entries to allow
1954          * both to succeed
1955          */
1956         mutex_lock(&devx_event_table->event_xa_lock);
1957         for (i = 0; i < num_events; i++) {
1958                 u32 key_level1;
1959
1960                 if (obj)
1961                         obj_type = get_dec_obj_type(obj,
1962                                                     event_type_num_list[i]);
1963                 key_level1 = event_type_num_list[i] | obj_type << 16;
1964
1965                 err = subscribe_event_xa_alloc(devx_event_table,
1966                                                key_level1,
1967                                                obj,
1968                                                obj_id);
1969                 if (err)
1970                         goto err;
1971
1972                 num_alloc_xa_entries++;
1973                 event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
1974                 if (!event_sub)
1975                         goto err;
1976
1977                 list_add_tail(&event_sub->event_list, &sub_list);
1978                 uverbs_uobject_get(&ev_file->uobj);
1979                 if (use_eventfd) {
1980                         event_sub->eventfd =
1981                                 eventfd_ctx_fdget(redirect_fd);
1982
1983                         if (IS_ERR(event_sub->eventfd)) {
1984                                 err = PTR_ERR(event_sub->eventfd);
1985                                 event_sub->eventfd = NULL;
1986                                 goto err;
1987                         }
1988                 }
1989
1990                 event_sub->cookie = cookie;
1991                 event_sub->ev_file = ev_file;
1992                 /* May be needed upon cleanup the devx object/subscription */
1993                 event_sub->xa_key_level1 = key_level1;
1994                 event_sub->xa_key_level2 = obj_id;
1995                 INIT_LIST_HEAD(&event_sub->obj_list);
1996         }
1997
1998         /* Once all the allocations and the XA data insertions were done we
1999          * can go ahead and add all the subscriptions to the relevant lists
2000          * without concern of a failure.
2001          */
2002         list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
2003                 struct devx_event *event;
2004                 struct devx_obj_event *obj_event;
2005
2006                 list_del_init(&event_sub->event_list);
2007
2008                 spin_lock_irq(&ev_file->lock);
2009                 list_add_tail_rcu(&event_sub->file_list,
2010                                   &ev_file->subscribed_events_list);
2011                 spin_unlock_irq(&ev_file->lock);
2012
2013                 event = xa_load(&devx_event_table->event_xa,
2014                                 event_sub->xa_key_level1);
2015                 WARN_ON(!event);
2016
2017                 if (!obj) {
2018                         list_add_tail_rcu(&event_sub->xa_list,
2019                                           &event->unaffiliated_list);
2020                         continue;
2021                 }
2022
2023                 obj_event = xa_load(&event->object_ids, obj_id);
2024                 WARN_ON(!obj_event);
2025                 list_add_tail_rcu(&event_sub->xa_list,
2026                                   &obj_event->obj_sub_list);
2027                 list_add_tail_rcu(&event_sub->obj_list,
2028                                   &obj->event_sub);
2029         }
2030
2031         mutex_unlock(&devx_event_table->event_xa_lock);
2032         return 0;
2033
2034 err:
2035         list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
2036                 list_del(&event_sub->event_list);
2037
2038                 subscribe_event_xa_dealloc(devx_event_table,
2039                                            event_sub->xa_key_level1,
2040                                            obj,
2041                                            obj_id);
2042
2043                 if (event_sub->eventfd)
2044                         eventfd_ctx_put(event_sub->eventfd);
2045                 uverbs_uobject_put(&event_sub->ev_file->uobj);
2046                 kfree(event_sub);
2047         }
2048
2049         mutex_unlock(&devx_event_table->event_xa_lock);
2050         return err;
2051 }
2052
2053 static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
2054                          struct uverbs_attr_bundle *attrs,
2055                          struct devx_umem *obj)
2056 {
2057         u64 addr;
2058         size_t size;
2059         u32 access;
2060         int npages;
2061         int err;
2062         u32 page_mask;
2063
2064         if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
2065             uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
2066                 return -EFAULT;
2067
2068         err = uverbs_get_flags32(&access, attrs,
2069                                  MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
2070                                  IB_ACCESS_LOCAL_WRITE |
2071                                  IB_ACCESS_REMOTE_WRITE |
2072                                  IB_ACCESS_REMOTE_READ);
2073         if (err)
2074                 return err;
2075
2076         err = ib_check_mr_access(access);
2077         if (err)
2078                 return err;
2079
2080         obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access);
2081         if (IS_ERR(obj->umem))
2082                 return PTR_ERR(obj->umem);
2083
2084         mlx5_ib_cont_pages(obj->umem, obj->umem->address,
2085                            MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
2086                            &obj->page_shift, &obj->ncont, NULL);
2087
2088         if (!npages) {
2089                 ib_umem_release(obj->umem);
2090                 return -EINVAL;
2091         }
2092
2093         page_mask = (1 << obj->page_shift) - 1;
2094         obj->page_offset = obj->umem->address & page_mask;
2095
2096         return 0;
2097 }
2098
2099 static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
2100                                    struct devx_umem *obj,
2101                                    struct devx_umem_reg_cmd *cmd)
2102 {
2103         cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
2104                     (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
2105         cmd->in = uverbs_zalloc(attrs, cmd->inlen);
2106         return PTR_ERR_OR_ZERO(cmd->in);
2107 }
2108
2109 static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
2110                                     struct devx_umem *obj,
2111                                     struct devx_umem_reg_cmd *cmd)
2112 {
2113         void *umem;
2114         __be64 *mtt;
2115
2116         umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
2117         mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
2118
2119         MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
2120         MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
2121         MLX5_SET(umem, umem, log_page_size, obj->page_shift -
2122                                             MLX5_ADAPTER_PAGE_SHIFT);
2123         MLX5_SET(umem, umem, page_offset, obj->page_offset);
2124         mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
2125                              (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
2126                              MLX5_IB_MTT_READ);
2127 }
2128
2129 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
2130         struct uverbs_attr_bundle *attrs)
2131 {
2132         struct devx_umem_reg_cmd cmd;
2133         struct devx_umem *obj;
2134         struct ib_uobject *uobj = uverbs_attr_get_uobject(
2135                 attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
2136         u32 obj_id;
2137         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
2138                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
2139         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
2140         int err;
2141
2142         if (!c->devx_uid)
2143                 return -EINVAL;
2144
2145         obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
2146         if (!obj)
2147                 return -ENOMEM;
2148
2149         err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
2150         if (err)
2151                 goto err_obj_free;
2152
2153         err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
2154         if (err)
2155                 goto err_umem_release;
2156
2157         devx_umem_reg_cmd_build(dev, obj, &cmd);
2158
2159         MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
2160         err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
2161                             sizeof(cmd.out));
2162         if (err)
2163                 goto err_umem_release;
2164
2165         obj->mdev = dev->mdev;
2166         uobj->object = obj;
2167         devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
2168         uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
2169
2170         err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id,
2171                              sizeof(obj_id));
2172         return err;
2173
2174 err_umem_release:
2175         ib_umem_release(obj->umem);
2176 err_obj_free:
2177         kfree(obj);
2178         return err;
2179 }
2180
2181 static int devx_umem_cleanup(struct ib_uobject *uobject,
2182                              enum rdma_remove_reason why,
2183                              struct uverbs_attr_bundle *attrs)
2184 {
2185         struct devx_umem *obj = uobject->object;
2186         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
2187         int err;
2188
2189         err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
2190         if (ib_is_destroy_retryable(err, why, uobject))
2191                 return err;
2192
2193         ib_umem_release(obj->umem);
2194         kfree(obj);
2195         return 0;
2196 }
2197
2198 static bool is_unaffiliated_event(struct mlx5_core_dev *dev,
2199                                   unsigned long event_type)
2200 {
2201         __be64 *unaff_events;
2202         int mask_entry;
2203         int mask_bit;
2204
2205         if (!MLX5_CAP_GEN(dev, event_cap))
2206                 return is_legacy_unaffiliated_event_num(event_type);
2207
2208         unaff_events = MLX5_CAP_DEV_EVENT(dev,
2209                                           user_unaffiliated_events);
2210         WARN_ON(event_type > MAX_SUPP_EVENT_NUM);
2211
2212         mask_entry = event_type / 64;
2213         mask_bit = event_type % 64;
2214
2215         if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit)))
2216                 return false;
2217
2218         return true;
2219 }
2220
2221 static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
2222 {
2223         struct mlx5_eqe *eqe = data;
2224         u32 obj_id = 0;
2225
2226         switch (event_type) {
2227         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
2228         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
2229         case MLX5_EVENT_TYPE_PATH_MIG:
2230         case MLX5_EVENT_TYPE_COMM_EST:
2231         case MLX5_EVENT_TYPE_SQ_DRAINED:
2232         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
2233         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
2234         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
2235         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
2236         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
2237                 obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
2238                 break;
2239         case MLX5_EVENT_TYPE_XRQ_ERROR:
2240                 obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
2241                 break;
2242         case MLX5_EVENT_TYPE_DCT_DRAINED:
2243         case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
2244                 obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
2245                 break;
2246         case MLX5_EVENT_TYPE_CQ_ERROR:
2247                 obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
2248                 break;
2249         default:
2250                 obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id);
2251                 break;
2252         }
2253
2254         return obj_id;
2255 }
2256
2257 static int deliver_event(struct devx_event_subscription *event_sub,
2258                          const void *data)
2259 {
2260         struct devx_async_event_file *ev_file;
2261         struct devx_async_event_data *event_data;
2262         unsigned long flags;
2263
2264         ev_file = event_sub->ev_file;
2265
2266         if (ev_file->omit_data) {
2267                 spin_lock_irqsave(&ev_file->lock, flags);
2268                 if (!list_empty(&event_sub->event_list) ||
2269                     ev_file->is_destroyed) {
2270                         spin_unlock_irqrestore(&ev_file->lock, flags);
2271                         return 0;
2272                 }
2273
2274                 list_add_tail(&event_sub->event_list, &ev_file->event_list);
2275                 spin_unlock_irqrestore(&ev_file->lock, flags);
2276                 wake_up_interruptible(&ev_file->poll_wait);
2277                 return 0;
2278         }
2279
2280         event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe),
2281                              GFP_ATOMIC);
2282         if (!event_data) {
2283                 spin_lock_irqsave(&ev_file->lock, flags);
2284                 ev_file->is_overflow_err = 1;
2285                 spin_unlock_irqrestore(&ev_file->lock, flags);
2286                 return -ENOMEM;
2287         }
2288
2289         event_data->hdr.cookie = event_sub->cookie;
2290         memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
2291
2292         spin_lock_irqsave(&ev_file->lock, flags);
2293         if (!ev_file->is_destroyed)
2294                 list_add_tail(&event_data->list, &ev_file->event_list);
2295         else
2296                 kfree(event_data);
2297         spin_unlock_irqrestore(&ev_file->lock, flags);
2298         wake_up_interruptible(&ev_file->poll_wait);
2299
2300         return 0;
2301 }
2302
2303 static void dispatch_event_fd(struct list_head *fd_list,
2304                               const void *data)
2305 {
2306         struct devx_event_subscription *item;
2307
2308         list_for_each_entry_rcu(item, fd_list, xa_list) {
2309                 if (item->eventfd)
2310                         eventfd_signal(item->eventfd, 1);
2311                 else
2312                         deliver_event(item, data);
2313         }
2314 }
2315
2316 static int devx_event_notifier(struct notifier_block *nb,
2317                                unsigned long event_type, void *data)
2318 {
2319         struct mlx5_devx_event_table *table;
2320         struct mlx5_ib_dev *dev;
2321         struct devx_event *event;
2322         struct devx_obj_event *obj_event;
2323         u16 obj_type = 0;
2324         bool is_unaffiliated;
2325         u32 obj_id;
2326
2327         /* Explicit filtering to kernel events which may occur frequently */
2328         if (event_type == MLX5_EVENT_TYPE_CMD ||
2329             event_type == MLX5_EVENT_TYPE_PAGE_REQUEST)
2330                 return NOTIFY_OK;
2331
2332         table = container_of(nb, struct mlx5_devx_event_table, devx_nb.nb);
2333         dev = container_of(table, struct mlx5_ib_dev, devx_event_table);
2334         is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type);
2335
2336         if (!is_unaffiliated)
2337                 obj_type = get_event_obj_type(event_type, data);
2338
2339         rcu_read_lock();
2340         event = xa_load(&table->event_xa, event_type | (obj_type << 16));
2341         if (!event) {
2342                 rcu_read_unlock();
2343                 return NOTIFY_DONE;
2344         }
2345
2346         if (is_unaffiliated) {
2347                 dispatch_event_fd(&event->unaffiliated_list, data);
2348                 rcu_read_unlock();
2349                 return NOTIFY_OK;
2350         }
2351
2352         obj_id = devx_get_obj_id_from_event(event_type, data);
2353         obj_event = xa_load(&event->object_ids, obj_id);
2354         if (!obj_event) {
2355                 rcu_read_unlock();
2356                 return NOTIFY_DONE;
2357         }
2358
2359         dispatch_event_fd(&obj_event->obj_sub_list, data);
2360
2361         rcu_read_unlock();
2362         return NOTIFY_OK;
2363 }
2364
2365 int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
2366 {
2367         struct mlx5_devx_event_table *table = &dev->devx_event_table;
2368         int uid;
2369
2370         uid = mlx5_ib_devx_create(dev, false);
2371         if (uid > 0) {
2372                 dev->devx_whitelist_uid = uid;
2373                 xa_init(&table->event_xa);
2374                 mutex_init(&table->event_xa_lock);
2375                 MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
2376                 mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
2377         }
2378
2379         return 0;
2380 }
2381
2382 void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
2383 {
2384         struct mlx5_devx_event_table *table = &dev->devx_event_table;
2385         struct devx_event_subscription *sub, *tmp;
2386         struct devx_event *event;
2387         void *entry;
2388         unsigned long id;
2389
2390         if (dev->devx_whitelist_uid) {
2391                 mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
2392                 mutex_lock(&dev->devx_event_table.event_xa_lock);
2393                 xa_for_each(&table->event_xa, id, entry) {
2394                         event = entry;
2395                         list_for_each_entry_safe(
2396                                 sub, tmp, &event->unaffiliated_list, xa_list)
2397                                 devx_cleanup_subscription(dev, sub);
2398                         kfree(entry);
2399                 }
2400                 mutex_unlock(&dev->devx_event_table.event_xa_lock);
2401                 xa_destroy(&table->event_xa);
2402
2403                 mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
2404         }
2405 }
2406
2407 static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
2408                                          size_t count, loff_t *pos)
2409 {
2410         struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
2411         struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2412         struct devx_async_data *event;
2413         int ret = 0;
2414         size_t eventsz;
2415
2416         spin_lock_irq(&ev_queue->lock);
2417
2418         while (list_empty(&ev_queue->event_list)) {
2419                 spin_unlock_irq(&ev_queue->lock);
2420
2421                 if (filp->f_flags & O_NONBLOCK)
2422                         return -EAGAIN;
2423
2424                 if (wait_event_interruptible(
2425                             ev_queue->poll_wait,
2426                             (!list_empty(&ev_queue->event_list) ||
2427                              ev_queue->is_destroyed))) {
2428                         return -ERESTARTSYS;
2429                 }
2430
2431                 spin_lock_irq(&ev_queue->lock);
2432                 if (ev_queue->is_destroyed) {
2433                         spin_unlock_irq(&ev_queue->lock);
2434                         return -EIO;
2435                 }
2436         }
2437
2438         event = list_entry(ev_queue->event_list.next,
2439                            struct devx_async_data, list);
2440         eventsz = event->cmd_out_len +
2441                         sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr);
2442
2443         if (eventsz > count) {
2444                 spin_unlock_irq(&ev_queue->lock);
2445                 return -ENOSPC;
2446         }
2447
2448         list_del(ev_queue->event_list.next);
2449         spin_unlock_irq(&ev_queue->lock);
2450
2451         if (copy_to_user(buf, &event->hdr, eventsz))
2452                 ret = -EFAULT;
2453         else
2454                 ret = eventsz;
2455
2456         atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use);
2457         kvfree(event);
2458         return ret;
2459 }
2460
2461 static __poll_t devx_async_cmd_event_poll(struct file *filp,
2462                                               struct poll_table_struct *wait)
2463 {
2464         struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
2465         struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2466         __poll_t pollflags = 0;
2467
2468         poll_wait(filp, &ev_queue->poll_wait, wait);
2469
2470         spin_lock_irq(&ev_queue->lock);
2471         if (ev_queue->is_destroyed)
2472                 pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
2473         else if (!list_empty(&ev_queue->event_list))
2474                 pollflags = EPOLLIN | EPOLLRDNORM;
2475         spin_unlock_irq(&ev_queue->lock);
2476
2477         return pollflags;
2478 }
2479
2480 static const struct file_operations devx_async_cmd_event_fops = {
2481         .owner   = THIS_MODULE,
2482         .read    = devx_async_cmd_event_read,
2483         .poll    = devx_async_cmd_event_poll,
2484         .release = uverbs_uobject_fd_release,
2485         .llseek  = no_llseek,
2486 };
2487
2488 static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
2489                                      size_t count, loff_t *pos)
2490 {
2491         struct devx_async_event_file *ev_file = filp->private_data;
2492         struct devx_event_subscription *event_sub;
2493         struct devx_async_event_data *event;
2494         int ret = 0;
2495         size_t eventsz;
2496         bool omit_data;
2497         void *event_data;
2498
2499         omit_data = ev_file->omit_data;
2500
2501         spin_lock_irq(&ev_file->lock);
2502
2503         if (ev_file->is_overflow_err) {
2504                 ev_file->is_overflow_err = 0;
2505                 spin_unlock_irq(&ev_file->lock);
2506                 return -EOVERFLOW;
2507         }
2508
2509
2510         while (list_empty(&ev_file->event_list)) {
2511                 spin_unlock_irq(&ev_file->lock);
2512
2513                 if (filp->f_flags & O_NONBLOCK)
2514                         return -EAGAIN;
2515
2516                 if (wait_event_interruptible(ev_file->poll_wait,
2517                             (!list_empty(&ev_file->event_list) ||
2518                              ev_file->is_destroyed))) {
2519                         return -ERESTARTSYS;
2520                 }
2521
2522                 spin_lock_irq(&ev_file->lock);
2523                 if (ev_file->is_destroyed) {
2524                         spin_unlock_irq(&ev_file->lock);
2525                         return -EIO;
2526                 }
2527         }
2528
2529         if (omit_data) {
2530                 event_sub = list_first_entry(&ev_file->event_list,
2531                                         struct devx_event_subscription,
2532                                         event_list);
2533                 eventsz = sizeof(event_sub->cookie);
2534                 event_data = &event_sub->cookie;
2535         } else {
2536                 event = list_first_entry(&ev_file->event_list,
2537                                       struct devx_async_event_data, list);
2538                 eventsz = sizeof(struct mlx5_eqe) +
2539                         sizeof(struct mlx5_ib_uapi_devx_async_event_hdr);
2540                 event_data = &event->hdr;
2541         }
2542
2543         if (eventsz > count) {
2544                 spin_unlock_irq(&ev_file->lock);
2545                 return -EINVAL;
2546         }
2547
2548         if (omit_data)
2549                 list_del_init(&event_sub->event_list);
2550         else
2551                 list_del(&event->list);
2552
2553         spin_unlock_irq(&ev_file->lock);
2554
2555         if (copy_to_user(buf, event_data, eventsz))
2556                 /* This points to an application issue, not a kernel concern */
2557                 ret = -EFAULT;
2558         else
2559                 ret = eventsz;
2560
2561         if (!omit_data)
2562                 kfree(event);
2563         return ret;
2564 }
2565
2566 static __poll_t devx_async_event_poll(struct file *filp,
2567                                       struct poll_table_struct *wait)
2568 {
2569         struct devx_async_event_file *ev_file = filp->private_data;
2570         __poll_t pollflags = 0;
2571
2572         poll_wait(filp, &ev_file->poll_wait, wait);
2573
2574         spin_lock_irq(&ev_file->lock);
2575         if (ev_file->is_destroyed)
2576                 pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
2577         else if (!list_empty(&ev_file->event_list))
2578                 pollflags = EPOLLIN | EPOLLRDNORM;
2579         spin_unlock_irq(&ev_file->lock);
2580
2581         return pollflags;
2582 }
2583
2584 static void devx_free_subscription(struct rcu_head *rcu)
2585 {
2586         struct devx_event_subscription *event_sub =
2587                 container_of(rcu, struct devx_event_subscription, rcu);
2588
2589         if (event_sub->eventfd)
2590                 eventfd_ctx_put(event_sub->eventfd);
2591         uverbs_uobject_put(&event_sub->ev_file->uobj);
2592         kfree(event_sub);
2593 }
2594
2595 static const struct file_operations devx_async_event_fops = {
2596         .owner   = THIS_MODULE,
2597         .read    = devx_async_event_read,
2598         .poll    = devx_async_event_poll,
2599         .release = uverbs_uobject_fd_release,
2600         .llseek  = no_llseek,
2601 };
2602
2603 static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
2604                                              enum rdma_remove_reason why)
2605 {
2606         struct devx_async_cmd_event_file *comp_ev_file =
2607                 container_of(uobj, struct devx_async_cmd_event_file,
2608                              uobj);
2609         struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2610         struct devx_async_data *entry, *tmp;
2611
2612         spin_lock_irq(&ev_queue->lock);
2613         ev_queue->is_destroyed = 1;
2614         spin_unlock_irq(&ev_queue->lock);
2615         wake_up_interruptible(&ev_queue->poll_wait);
2616
2617         mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
2618
2619         spin_lock_irq(&comp_ev_file->ev_queue.lock);
2620         list_for_each_entry_safe(entry, tmp,
2621                                  &comp_ev_file->ev_queue.event_list, list) {
2622                 list_del(&entry->list);
2623                 kvfree(entry);
2624         }
2625         spin_unlock_irq(&comp_ev_file->ev_queue.lock);
2626         return 0;
2627 };
2628
2629 static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
2630                                          enum rdma_remove_reason why)
2631 {
2632         struct devx_async_event_file *ev_file =
2633                 container_of(uobj, struct devx_async_event_file,
2634                              uobj);
2635         struct devx_event_subscription *event_sub, *event_sub_tmp;
2636         struct mlx5_ib_dev *dev = ev_file->dev;
2637
2638         spin_lock_irq(&ev_file->lock);
2639         ev_file->is_destroyed = 1;
2640
2641         /* free the pending events allocation */
2642         if (ev_file->omit_data) {
2643                 struct devx_event_subscription *event_sub, *tmp;
2644
2645                 list_for_each_entry_safe(event_sub, tmp, &ev_file->event_list,
2646                                          event_list)
2647                         list_del_init(&event_sub->event_list);
2648
2649         } else {
2650                 struct devx_async_event_data *entry, *tmp;
2651
2652                 list_for_each_entry_safe(entry, tmp, &ev_file->event_list,
2653                                          list) {
2654                         list_del(&entry->list);
2655                         kfree(entry);
2656                 }
2657         }
2658
2659         spin_unlock_irq(&ev_file->lock);
2660         wake_up_interruptible(&ev_file->poll_wait);
2661
2662         mutex_lock(&dev->devx_event_table.event_xa_lock);
2663         /* delete the subscriptions which are related to this FD */
2664         list_for_each_entry_safe(event_sub, event_sub_tmp,
2665                                  &ev_file->subscribed_events_list, file_list) {
2666                 devx_cleanup_subscription(dev, event_sub);
2667                 list_del_rcu(&event_sub->file_list);
2668                 /* subscription may not be used by the read API any more */
2669                 call_rcu(&event_sub->rcu, devx_free_subscription);
2670         }
2671         mutex_unlock(&dev->devx_event_table.event_xa_lock);
2672
2673         put_device(&dev->ib_dev.dev);
2674         return 0;
2675 };
2676
2677 DECLARE_UVERBS_NAMED_METHOD(
2678         MLX5_IB_METHOD_DEVX_UMEM_REG,
2679         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
2680                         MLX5_IB_OBJECT_DEVX_UMEM,
2681                         UVERBS_ACCESS_NEW,
2682                         UA_MANDATORY),
2683         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
2684                            UVERBS_ATTR_TYPE(u64),
2685                            UA_MANDATORY),
2686         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
2687                            UVERBS_ATTR_TYPE(u64),
2688                            UA_MANDATORY),
2689         UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
2690                              enum ib_access_flags),
2691         UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
2692                             UVERBS_ATTR_TYPE(u32),
2693                             UA_MANDATORY));
2694
2695 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2696         MLX5_IB_METHOD_DEVX_UMEM_DEREG,
2697         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
2698                         MLX5_IB_OBJECT_DEVX_UMEM,
2699                         UVERBS_ACCESS_DESTROY,
2700                         UA_MANDATORY));
2701
2702 DECLARE_UVERBS_NAMED_METHOD(
2703         MLX5_IB_METHOD_DEVX_QUERY_EQN,
2704         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
2705                            UVERBS_ATTR_TYPE(u32),
2706                            UA_MANDATORY),
2707         UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
2708                             UVERBS_ATTR_TYPE(u32),
2709                             UA_MANDATORY));
2710
2711 DECLARE_UVERBS_NAMED_METHOD(
2712         MLX5_IB_METHOD_DEVX_QUERY_UAR,
2713         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
2714                            UVERBS_ATTR_TYPE(u32),
2715                            UA_MANDATORY),
2716         UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
2717                             UVERBS_ATTR_TYPE(u32),
2718                             UA_MANDATORY));
2719
2720 DECLARE_UVERBS_NAMED_METHOD(
2721         MLX5_IB_METHOD_DEVX_OTHER,
2722         UVERBS_ATTR_PTR_IN(
2723                 MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
2724                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2725                 UA_MANDATORY,
2726                 UA_ALLOC_AND_COPY),
2727         UVERBS_ATTR_PTR_OUT(
2728                 MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
2729                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2730                 UA_MANDATORY));
2731
2732 DECLARE_UVERBS_NAMED_METHOD(
2733         MLX5_IB_METHOD_DEVX_OBJ_CREATE,
2734         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
2735                         MLX5_IB_OBJECT_DEVX_OBJ,
2736                         UVERBS_ACCESS_NEW,
2737                         UA_MANDATORY),
2738         UVERBS_ATTR_PTR_IN(
2739                 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
2740                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2741                 UA_MANDATORY,
2742                 UA_ALLOC_AND_COPY),
2743         UVERBS_ATTR_PTR_OUT(
2744                 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
2745                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2746                 UA_MANDATORY));
2747
2748 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2749         MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
2750         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
2751                         MLX5_IB_OBJECT_DEVX_OBJ,
2752                         UVERBS_ACCESS_DESTROY,
2753                         UA_MANDATORY));
2754
2755 DECLARE_UVERBS_NAMED_METHOD(
2756         MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
2757         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
2758                         UVERBS_IDR_ANY_OBJECT,
2759                         UVERBS_ACCESS_WRITE,
2760                         UA_MANDATORY),
2761         UVERBS_ATTR_PTR_IN(
2762                 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
2763                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2764                 UA_MANDATORY,
2765                 UA_ALLOC_AND_COPY),
2766         UVERBS_ATTR_PTR_OUT(
2767                 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
2768                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2769                 UA_MANDATORY));
2770
2771 DECLARE_UVERBS_NAMED_METHOD(
2772         MLX5_IB_METHOD_DEVX_OBJ_QUERY,
2773         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
2774                         UVERBS_IDR_ANY_OBJECT,
2775                         UVERBS_ACCESS_READ,
2776                         UA_MANDATORY),
2777         UVERBS_ATTR_PTR_IN(
2778                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
2779                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2780                 UA_MANDATORY,
2781                 UA_ALLOC_AND_COPY),
2782         UVERBS_ATTR_PTR_OUT(
2783                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
2784                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2785                 UA_MANDATORY));
2786
2787 DECLARE_UVERBS_NAMED_METHOD(
2788         MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
2789         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
2790                         UVERBS_IDR_ANY_OBJECT,
2791                         UVERBS_ACCESS_READ,
2792                         UA_MANDATORY),
2793         UVERBS_ATTR_PTR_IN(
2794                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
2795                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2796                 UA_MANDATORY,
2797                 UA_ALLOC_AND_COPY),
2798         UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
2799                 u16, UA_MANDATORY),
2800         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
2801                 MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2802                 UVERBS_ACCESS_READ,
2803                 UA_MANDATORY),
2804         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
2805                 UVERBS_ATTR_TYPE(u64),
2806                 UA_MANDATORY));
2807
2808 DECLARE_UVERBS_NAMED_METHOD(
2809         MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
2810         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE,
2811                 MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2812                 UVERBS_ACCESS_READ,
2813                 UA_MANDATORY),
2814         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
2815                 MLX5_IB_OBJECT_DEVX_OBJ,
2816                 UVERBS_ACCESS_READ,
2817                 UA_OPTIONAL),
2818         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
2819                 UVERBS_ATTR_MIN_SIZE(sizeof(u16)),
2820                 UA_MANDATORY,
2821                 UA_ALLOC_AND_COPY),
2822         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
2823                 UVERBS_ATTR_TYPE(u64),
2824                 UA_OPTIONAL),
2825         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
2826                 UVERBS_ATTR_TYPE(u32),
2827                 UA_OPTIONAL));
2828
2829 DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
2830                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
2831                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
2832                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN),
2833                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT));
2834
2835 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
2836                             UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
2837                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
2838                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
2839                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
2840                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY),
2841                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY));
2842
2843 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
2844                             UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
2845                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
2846                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
2847
2848
2849 DECLARE_UVERBS_NAMED_METHOD(
2850         MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC,
2851         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE,
2852                         MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2853                         UVERBS_ACCESS_NEW,
2854                         UA_MANDATORY));
2855
2856 DECLARE_UVERBS_NAMED_OBJECT(
2857         MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2858         UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
2859                              devx_async_cmd_event_destroy_uobj,
2860                              &devx_async_cmd_event_fops, "[devx_async_cmd]",
2861                              O_RDONLY),
2862         &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
2863
2864 DECLARE_UVERBS_NAMED_METHOD(
2865         MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC,
2866         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE,
2867                         MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2868                         UVERBS_ACCESS_NEW,
2869                         UA_MANDATORY),
2870         UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
2871                         enum mlx5_ib_uapi_devx_create_event_channel_flags,
2872                         UA_MANDATORY));
2873
2874 DECLARE_UVERBS_NAMED_OBJECT(
2875         MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2876         UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
2877                              devx_async_event_destroy_uobj,
2878                              &devx_async_event_fops, "[devx_async_event]",
2879                              O_RDONLY),
2880         &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
2881
2882 static bool devx_is_supported(struct ib_device *device)
2883 {
2884         struct mlx5_ib_dev *dev = to_mdev(device);
2885
2886         return MLX5_CAP_GEN(dev->mdev, log_max_uctx);
2887 }
2888
2889 const struct uapi_definition mlx5_ib_devx_defs[] = {
2890         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2891                 MLX5_IB_OBJECT_DEVX,
2892                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2893         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2894                 MLX5_IB_OBJECT_DEVX_OBJ,
2895                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2896         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2897                 MLX5_IB_OBJECT_DEVX_UMEM,
2898                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2899         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2900                 MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2901                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2902         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2903                 MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2904                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2905         {},
2906 };