Merge branch 'hugepage-fallbacks' (hugepatch patches from David Rientjes)
[linux-2.6-microblaze.git] / drivers / infiniband / hw / mlx5 / devx.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
4  */
5
6 #include <rdma/ib_user_verbs.h>
7 #include <rdma/ib_verbs.h>
8 #include <rdma/uverbs_types.h>
9 #include <rdma/uverbs_ioctl.h>
10 #include <rdma/mlx5_user_ioctl_cmds.h>
11 #include <rdma/mlx5_user_ioctl_verbs.h>
12 #include <rdma/ib_umem.h>
13 #include <rdma/uverbs_std_types.h>
14 #include <linux/mlx5/driver.h>
15 #include <linux/mlx5/fs.h>
16 #include "mlx5_ib.h"
17 #include <linux/xarray.h>
18
19 #define UVERBS_MODULE_NAME mlx5_ib
20 #include <rdma/uverbs_named_ioctl.h>
21
22 static void dispatch_event_fd(struct list_head *fd_list, const void *data);
23
24 enum devx_obj_flags {
25         DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
26         DEVX_OBJ_FLAGS_DCT = 1 << 1,
27         DEVX_OBJ_FLAGS_CQ = 1 << 2,
28 };
29
30 struct devx_async_data {
31         struct mlx5_ib_dev *mdev;
32         struct list_head list;
33         struct ib_uobject *fd_uobj;
34         struct mlx5_async_work cb_work;
35         u16 cmd_out_len;
36         /* must be last field in this structure */
37         struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
38 };
39
40 struct devx_async_event_data {
41         struct list_head list; /* headed in ev_file->event_list */
42         struct mlx5_ib_uapi_devx_async_event_hdr hdr;
43 };
44
45 /* first level XA value data structure */
46 struct devx_event {
47         struct xarray object_ids; /* second XA level, Key = object id */
48         struct list_head unaffiliated_list;
49 };
50
51 /* second level XA value data structure */
52 struct devx_obj_event {
53         struct rcu_head rcu;
54         struct list_head obj_sub_list;
55 };
56
57 struct devx_event_subscription {
58         struct list_head file_list; /* headed in ev_file->
59                                      * subscribed_events_list
60                                      */
61         struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
62                                    * devx_obj_event->obj_sub_list
63                                    */
64         struct list_head obj_list; /* headed in devx_object */
65         struct list_head event_list; /* headed in ev_file->event_list or in
66                                       * temp list via subscription
67                                       */
68
69         u8 is_cleaned:1;
70         u32 xa_key_level1;
71         u32 xa_key_level2;
72         struct rcu_head rcu;
73         u64 cookie;
74         struct devx_async_event_file *ev_file;
75         struct file *filp; /* Upon hot unplug we need a direct access to */
76         struct eventfd_ctx *eventfd;
77 };
78
79 struct devx_async_event_file {
80         struct ib_uobject uobj;
81         /* Head of events that are subscribed to this FD */
82         struct list_head subscribed_events_list;
83         spinlock_t lock;
84         wait_queue_head_t poll_wait;
85         struct list_head event_list;
86         struct mlx5_ib_dev *dev;
87         u8 omit_data:1;
88         u8 is_overflow_err:1;
89         u8 is_destroyed:1;
90 };
91
92 #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
93 struct devx_obj {
94         struct mlx5_ib_dev      *ib_dev;
95         u64                     obj_id;
96         u32                     dinlen; /* destroy inbox length */
97         u32                     dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
98         u32                     flags;
99         union {
100                 struct mlx5_ib_devx_mr  devx_mr;
101                 struct mlx5_core_dct    core_dct;
102                 struct mlx5_core_cq     core_cq;
103         };
104         struct list_head event_sub; /* holds devx_event_subscription entries */
105 };
106
107 struct devx_umem {
108         struct mlx5_core_dev            *mdev;
109         struct ib_umem                  *umem;
110         u32                             page_offset;
111         int                             page_shift;
112         int                             ncont;
113         u32                             dinlen;
114         u32                             dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
115 };
116
117 struct devx_umem_reg_cmd {
118         void                            *in;
119         u32                             inlen;
120         u32                             out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
121 };
122
123 static struct mlx5_ib_ucontext *
124 devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
125 {
126         return to_mucontext(ib_uverbs_get_ucontext(attrs));
127 }
128
129 int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
130 {
131         u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
132         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
133         void *uctx;
134         int err;
135         u16 uid;
136         u32 cap = 0;
137
138         /* 0 means not supported */
139         if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx))
140                 return -EINVAL;
141
142         uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
143         if (is_user && capable(CAP_NET_RAW) &&
144             (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
145                 cap |= MLX5_UCTX_CAP_RAW_TX;
146         if (is_user && capable(CAP_SYS_RAWIO) &&
147             (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
148              MLX5_UCTX_CAP_INTERNAL_DEV_RES))
149                 cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
150
151         MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
152         MLX5_SET(uctx, uctx, cap, cap);
153
154         err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
155         if (err)
156                 return err;
157
158         uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
159         return uid;
160 }
161
162 void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
163 {
164         u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0};
165         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
166
167         MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
168         MLX5_SET(destroy_uctx_in, in, uid, uid);
169
170         mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
171 }
172
173 bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
174 {
175         struct devx_obj *devx_obj = obj;
176         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
177
178         switch (opcode) {
179         case MLX5_CMD_OP_DESTROY_TIR:
180                 *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
181                 *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
182                                     obj_id);
183                 return true;
184
185         case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
186                 *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
187                 *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
188                                     table_id);
189                 return true;
190         default:
191                 return false;
192         }
193 }
194
195 bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id)
196 {
197         struct devx_obj *devx_obj = obj;
198         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
199
200         if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
201                 *counter_id = MLX5_GET(dealloc_flow_counter_in,
202                                        devx_obj->dinbox,
203                                        flow_counter_id);
204                 return true;
205         }
206
207         return false;
208 }
209
210 static bool is_legacy_unaffiliated_event_num(u16 event_num)
211 {
212         switch (event_num) {
213         case MLX5_EVENT_TYPE_PORT_CHANGE:
214                 return true;
215         default:
216                 return false;
217         }
218 }
219
220 static bool is_legacy_obj_event_num(u16 event_num)
221 {
222         switch (event_num) {
223         case MLX5_EVENT_TYPE_PATH_MIG:
224         case MLX5_EVENT_TYPE_COMM_EST:
225         case MLX5_EVENT_TYPE_SQ_DRAINED:
226         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
227         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
228         case MLX5_EVENT_TYPE_CQ_ERROR:
229         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
230         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
231         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
232         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
233         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
234         case MLX5_EVENT_TYPE_DCT_DRAINED:
235         case MLX5_EVENT_TYPE_COMP:
236         case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
237         case MLX5_EVENT_TYPE_XRQ_ERROR:
238                 return true;
239         default:
240                 return false;
241         }
242 }
243
244 static u16 get_legacy_obj_type(u16 opcode)
245 {
246         switch (opcode) {
247         case MLX5_CMD_OP_CREATE_RQ:
248                 return MLX5_EVENT_QUEUE_TYPE_RQ;
249         case MLX5_CMD_OP_CREATE_QP:
250                 return MLX5_EVENT_QUEUE_TYPE_QP;
251         case MLX5_CMD_OP_CREATE_SQ:
252                 return MLX5_EVENT_QUEUE_TYPE_SQ;
253         case MLX5_CMD_OP_CREATE_DCT:
254                 return MLX5_EVENT_QUEUE_TYPE_DCT;
255         default:
256                 return 0;
257         }
258 }
259
260 static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
261 {
262         u16 opcode;
263
264         opcode = (obj->obj_id >> 32) & 0xffff;
265
266         if (is_legacy_obj_event_num(event_num))
267                 return get_legacy_obj_type(opcode);
268
269         switch (opcode) {
270         case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
271                 return (obj->obj_id >> 48);
272         case MLX5_CMD_OP_CREATE_RQ:
273                 return MLX5_OBJ_TYPE_RQ;
274         case MLX5_CMD_OP_CREATE_QP:
275                 return MLX5_OBJ_TYPE_QP;
276         case MLX5_CMD_OP_CREATE_SQ:
277                 return MLX5_OBJ_TYPE_SQ;
278         case MLX5_CMD_OP_CREATE_DCT:
279                 return MLX5_OBJ_TYPE_DCT;
280         case MLX5_CMD_OP_CREATE_TIR:
281                 return MLX5_OBJ_TYPE_TIR;
282         case MLX5_CMD_OP_CREATE_TIS:
283                 return MLX5_OBJ_TYPE_TIS;
284         case MLX5_CMD_OP_CREATE_PSV:
285                 return MLX5_OBJ_TYPE_PSV;
286         case MLX5_OBJ_TYPE_MKEY:
287                 return MLX5_OBJ_TYPE_MKEY;
288         case MLX5_CMD_OP_CREATE_RMP:
289                 return MLX5_OBJ_TYPE_RMP;
290         case MLX5_CMD_OP_CREATE_XRC_SRQ:
291                 return MLX5_OBJ_TYPE_XRC_SRQ;
292         case MLX5_CMD_OP_CREATE_XRQ:
293                 return MLX5_OBJ_TYPE_XRQ;
294         case MLX5_CMD_OP_CREATE_RQT:
295                 return MLX5_OBJ_TYPE_RQT;
296         case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
297                 return MLX5_OBJ_TYPE_FLOW_COUNTER;
298         case MLX5_CMD_OP_CREATE_CQ:
299                 return MLX5_OBJ_TYPE_CQ;
300         default:
301                 return 0;
302         }
303 }
304
305 static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
306 {
307         switch (event_type) {
308         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
309         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
310         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
311         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
312         case MLX5_EVENT_TYPE_PATH_MIG:
313         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
314         case MLX5_EVENT_TYPE_COMM_EST:
315         case MLX5_EVENT_TYPE_SQ_DRAINED:
316         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
317         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
318                 return eqe->data.qp_srq.type;
319         case MLX5_EVENT_TYPE_CQ_ERROR:
320         case MLX5_EVENT_TYPE_XRQ_ERROR:
321                 return 0;
322         case MLX5_EVENT_TYPE_DCT_DRAINED:
323         case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
324                 return MLX5_EVENT_QUEUE_TYPE_DCT;
325         default:
326                 return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
327         }
328 }
329
330 static u32 get_dec_obj_id(u64 obj_id)
331 {
332         return (obj_id & 0xffffffff);
333 }
334
335 /*
336  * As the obj_id in the firmware is not globally unique the object type
337  * must be considered upon checking for a valid object id.
338  * For that the opcode of the creator command is encoded as part of the obj_id.
339  */
340 static u64 get_enc_obj_id(u32 opcode, u32 obj_id)
341 {
342         return ((u64)opcode << 32) | obj_id;
343 }
344
345 static u64 devx_get_obj_id(const void *in)
346 {
347         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
348         u64 obj_id;
349
350         switch (opcode) {
351         case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
352         case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
353                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJECT |
354                                         MLX5_GET(general_obj_in_cmd_hdr, in,
355                                                  obj_type) << 16,
356                                         MLX5_GET(general_obj_in_cmd_hdr, in,
357                                                  obj_id));
358                 break;
359         case MLX5_CMD_OP_QUERY_MKEY:
360                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY,
361                                         MLX5_GET(query_mkey_in, in,
362                                                  mkey_index));
363                 break;
364         case MLX5_CMD_OP_QUERY_CQ:
365                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
366                                         MLX5_GET(query_cq_in, in, cqn));
367                 break;
368         case MLX5_CMD_OP_MODIFY_CQ:
369                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
370                                         MLX5_GET(modify_cq_in, in, cqn));
371                 break;
372         case MLX5_CMD_OP_QUERY_SQ:
373                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
374                                         MLX5_GET(query_sq_in, in, sqn));
375                 break;
376         case MLX5_CMD_OP_MODIFY_SQ:
377                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
378                                         MLX5_GET(modify_sq_in, in, sqn));
379                 break;
380         case MLX5_CMD_OP_QUERY_RQ:
381                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
382                                         MLX5_GET(query_rq_in, in, rqn));
383                 break;
384         case MLX5_CMD_OP_MODIFY_RQ:
385                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
386                                         MLX5_GET(modify_rq_in, in, rqn));
387                 break;
388         case MLX5_CMD_OP_QUERY_RMP:
389                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
390                                         MLX5_GET(query_rmp_in, in, rmpn));
391                 break;
392         case MLX5_CMD_OP_MODIFY_RMP:
393                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
394                                         MLX5_GET(modify_rmp_in, in, rmpn));
395                 break;
396         case MLX5_CMD_OP_QUERY_RQT:
397                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
398                                         MLX5_GET(query_rqt_in, in, rqtn));
399                 break;
400         case MLX5_CMD_OP_MODIFY_RQT:
401                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
402                                         MLX5_GET(modify_rqt_in, in, rqtn));
403                 break;
404         case MLX5_CMD_OP_QUERY_TIR:
405                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
406                                         MLX5_GET(query_tir_in, in, tirn));
407                 break;
408         case MLX5_CMD_OP_MODIFY_TIR:
409                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
410                                         MLX5_GET(modify_tir_in, in, tirn));
411                 break;
412         case MLX5_CMD_OP_QUERY_TIS:
413                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
414                                         MLX5_GET(query_tis_in, in, tisn));
415                 break;
416         case MLX5_CMD_OP_MODIFY_TIS:
417                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
418                                         MLX5_GET(modify_tis_in, in, tisn));
419                 break;
420         case MLX5_CMD_OP_QUERY_FLOW_TABLE:
421                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
422                                         MLX5_GET(query_flow_table_in, in,
423                                                  table_id));
424                 break;
425         case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
426                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
427                                         MLX5_GET(modify_flow_table_in, in,
428                                                  table_id));
429                 break;
430         case MLX5_CMD_OP_QUERY_FLOW_GROUP:
431                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP,
432                                         MLX5_GET(query_flow_group_in, in,
433                                                  group_id));
434                 break;
435         case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
436                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
437                                         MLX5_GET(query_fte_in, in,
438                                                  flow_index));
439                 break;
440         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
441                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
442                                         MLX5_GET(set_fte_in, in, flow_index));
443                 break;
444         case MLX5_CMD_OP_QUERY_Q_COUNTER:
445                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER,
446                                         MLX5_GET(query_q_counter_in, in,
447                                                  counter_set_id));
448                 break;
449         case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
450                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER,
451                                         MLX5_GET(query_flow_counter_in, in,
452                                                  flow_counter_id));
453                 break;
454         case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
455                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
456                                         MLX5_GET(general_obj_in_cmd_hdr, in,
457                                                  obj_id));
458                 break;
459         case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
460                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
461                                         MLX5_GET(query_scheduling_element_in,
462                                                  in, scheduling_element_id));
463                 break;
464         case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
465                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
466                                         MLX5_GET(modify_scheduling_element_in,
467                                                  in, scheduling_element_id));
468                 break;
469         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
470                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT,
471                                         MLX5_GET(add_vxlan_udp_dport_in, in,
472                                                  vxlan_udp_port));
473                 break;
474         case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
475                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
476                                         MLX5_GET(query_l2_table_entry_in, in,
477                                                  table_index));
478                 break;
479         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
480                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
481                                         MLX5_GET(set_l2_table_entry_in, in,
482                                                  table_index));
483                 break;
484         case MLX5_CMD_OP_QUERY_QP:
485                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
486                                         MLX5_GET(query_qp_in, in, qpn));
487                 break;
488         case MLX5_CMD_OP_RST2INIT_QP:
489                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
490                                         MLX5_GET(rst2init_qp_in, in, qpn));
491                 break;
492         case MLX5_CMD_OP_INIT2RTR_QP:
493                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
494                                         MLX5_GET(init2rtr_qp_in, in, qpn));
495                 break;
496         case MLX5_CMD_OP_RTR2RTS_QP:
497                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
498                                         MLX5_GET(rtr2rts_qp_in, in, qpn));
499                 break;
500         case MLX5_CMD_OP_RTS2RTS_QP:
501                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
502                                         MLX5_GET(rts2rts_qp_in, in, qpn));
503                 break;
504         case MLX5_CMD_OP_SQERR2RTS_QP:
505                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
506                                         MLX5_GET(sqerr2rts_qp_in, in, qpn));
507                 break;
508         case MLX5_CMD_OP_2ERR_QP:
509                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
510                                         MLX5_GET(qp_2err_in, in, qpn));
511                 break;
512         case MLX5_CMD_OP_2RST_QP:
513                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
514                                         MLX5_GET(qp_2rst_in, in, qpn));
515                 break;
516         case MLX5_CMD_OP_QUERY_DCT:
517                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
518                                         MLX5_GET(query_dct_in, in, dctn));
519                 break;
520         case MLX5_CMD_OP_QUERY_XRQ:
521         case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
522         case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
523                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
524                                         MLX5_GET(query_xrq_in, in, xrqn));
525                 break;
526         case MLX5_CMD_OP_QUERY_XRC_SRQ:
527                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
528                                         MLX5_GET(query_xrc_srq_in, in,
529                                                  xrc_srqn));
530                 break;
531         case MLX5_CMD_OP_ARM_XRC_SRQ:
532                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
533                                         MLX5_GET(arm_xrc_srq_in, in, xrc_srqn));
534                 break;
535         case MLX5_CMD_OP_QUERY_SRQ:
536                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ,
537                                         MLX5_GET(query_srq_in, in, srqn));
538                 break;
539         case MLX5_CMD_OP_ARM_RQ:
540                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
541                                         MLX5_GET(arm_rq_in, in, srq_number));
542                 break;
543         case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
544                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
545                                         MLX5_GET(drain_dct_in, in, dctn));
546                 break;
547         case MLX5_CMD_OP_ARM_XRQ:
548         case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
549         case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
550         case MLX5_CMD_OP_MODIFY_XRQ:
551                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
552                                         MLX5_GET(arm_xrq_in, in, xrqn));
553                 break;
554         case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
555                 obj_id = get_enc_obj_id
556                                 (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT,
557                                  MLX5_GET(query_packet_reformat_context_in,
558                                           in, packet_reformat_id));
559                 break;
560         default:
561                 obj_id = 0;
562         }
563
564         return obj_id;
565 }
566
567 static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
568                                  struct ib_uobject *uobj, const void *in)
569 {
570         struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
571         u64 obj_id = devx_get_obj_id(in);
572
573         if (!obj_id)
574                 return false;
575
576         switch (uobj_get_object_id(uobj)) {
577         case UVERBS_OBJECT_CQ:
578                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
579                                       to_mcq(uobj->object)->mcq.cqn) ==
580                                       obj_id;
581
582         case UVERBS_OBJECT_SRQ:
583         {
584                 struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq);
585                 u16 opcode;
586
587                 switch (srq->common.res) {
588                 case MLX5_RES_XSRQ:
589                         opcode = MLX5_CMD_OP_CREATE_XRC_SRQ;
590                         break;
591                 case MLX5_RES_XRQ:
592                         opcode = MLX5_CMD_OP_CREATE_XRQ;
593                         break;
594                 default:
595                         if (!dev->mdev->issi)
596                                 opcode = MLX5_CMD_OP_CREATE_SRQ;
597                         else
598                                 opcode = MLX5_CMD_OP_CREATE_RMP;
599                 }
600
601                 return get_enc_obj_id(opcode,
602                                       to_msrq(uobj->object)->msrq.srqn) ==
603                                       obj_id;
604         }
605
606         case UVERBS_OBJECT_QP:
607         {
608                 struct mlx5_ib_qp *qp = to_mqp(uobj->object);
609                 enum ib_qp_type qp_type = qp->ibqp.qp_type;
610
611                 if (qp_type == IB_QPT_RAW_PACKET ||
612                     (qp->flags & MLX5_IB_QP_UNDERLAY)) {
613                         struct mlx5_ib_raw_packet_qp *raw_packet_qp =
614                                                          &qp->raw_packet_qp;
615                         struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
616                         struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
617
618                         return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
619                                                rq->base.mqp.qpn) == obj_id ||
620                                 get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
621                                                sq->base.mqp.qpn) == obj_id ||
622                                 get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
623                                                rq->tirn) == obj_id ||
624                                 get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
625                                                sq->tisn) == obj_id);
626                 }
627
628                 if (qp_type == MLX5_IB_QPT_DCT)
629                         return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
630                                               qp->dct.mdct.mqp.qpn) == obj_id;
631
632                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
633                                       qp->ibqp.qp_num) == obj_id;
634         }
635
636         case UVERBS_OBJECT_WQ:
637                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
638                                       to_mrwq(uobj->object)->core_qp.qpn) ==
639                                       obj_id;
640
641         case UVERBS_OBJECT_RWQ_IND_TBL:
642                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
643                                       to_mrwq_ind_table(uobj->object)->rqtn) ==
644                                       obj_id;
645
646         case MLX5_IB_OBJECT_DEVX_OBJ:
647                 return ((struct devx_obj *)uobj->object)->obj_id == obj_id;
648
649         default:
650                 return false;
651         }
652 }
653
654 static void devx_set_umem_valid(const void *in)
655 {
656         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
657
658         switch (opcode) {
659         case MLX5_CMD_OP_CREATE_MKEY:
660                 MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
661                 break;
662         case MLX5_CMD_OP_CREATE_CQ:
663         {
664                 void *cqc;
665
666                 MLX5_SET(create_cq_in, in, cq_umem_valid, 1);
667                 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
668                 MLX5_SET(cqc, cqc, dbr_umem_valid, 1);
669                 break;
670         }
671         case MLX5_CMD_OP_CREATE_QP:
672         {
673                 void *qpc;
674
675                 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
676                 MLX5_SET(qpc, qpc, dbr_umem_valid, 1);
677                 MLX5_SET(create_qp_in, in, wq_umem_valid, 1);
678                 break;
679         }
680
681         case MLX5_CMD_OP_CREATE_RQ:
682         {
683                 void *rqc, *wq;
684
685                 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
686                 wq  = MLX5_ADDR_OF(rqc, rqc, wq);
687                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
688                 MLX5_SET(wq, wq, wq_umem_valid, 1);
689                 break;
690         }
691
692         case MLX5_CMD_OP_CREATE_SQ:
693         {
694                 void *sqc, *wq;
695
696                 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
697                 wq = MLX5_ADDR_OF(sqc, sqc, wq);
698                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
699                 MLX5_SET(wq, wq, wq_umem_valid, 1);
700                 break;
701         }
702
703         case MLX5_CMD_OP_MODIFY_CQ:
704                 MLX5_SET(modify_cq_in, in, cq_umem_valid, 1);
705                 break;
706
707         case MLX5_CMD_OP_CREATE_RMP:
708         {
709                 void *rmpc, *wq;
710
711                 rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx);
712                 wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
713                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
714                 MLX5_SET(wq, wq, wq_umem_valid, 1);
715                 break;
716         }
717
718         case MLX5_CMD_OP_CREATE_XRQ:
719         {
720                 void *xrqc, *wq;
721
722                 xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context);
723                 wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
724                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
725                 MLX5_SET(wq, wq, wq_umem_valid, 1);
726                 break;
727         }
728
729         case MLX5_CMD_OP_CREATE_XRC_SRQ:
730         {
731                 void *xrc_srqc;
732
733                 MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1);
734                 xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in,
735                                         xrc_srq_context_entry);
736                 MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1);
737                 break;
738         }
739
740         default:
741                 return;
742         }
743 }
744
745 static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
746 {
747         *opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
748
749         switch (*opcode) {
750         case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
751         case MLX5_CMD_OP_CREATE_MKEY:
752         case MLX5_CMD_OP_CREATE_CQ:
753         case MLX5_CMD_OP_ALLOC_PD:
754         case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
755         case MLX5_CMD_OP_CREATE_RMP:
756         case MLX5_CMD_OP_CREATE_SQ:
757         case MLX5_CMD_OP_CREATE_RQ:
758         case MLX5_CMD_OP_CREATE_RQT:
759         case MLX5_CMD_OP_CREATE_TIR:
760         case MLX5_CMD_OP_CREATE_TIS:
761         case MLX5_CMD_OP_ALLOC_Q_COUNTER:
762         case MLX5_CMD_OP_CREATE_FLOW_TABLE:
763         case MLX5_CMD_OP_CREATE_FLOW_GROUP:
764         case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
765         case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
766         case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
767         case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
768         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
769         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
770         case MLX5_CMD_OP_CREATE_QP:
771         case MLX5_CMD_OP_CREATE_SRQ:
772         case MLX5_CMD_OP_CREATE_XRC_SRQ:
773         case MLX5_CMD_OP_CREATE_DCT:
774         case MLX5_CMD_OP_CREATE_XRQ:
775         case MLX5_CMD_OP_ATTACH_TO_MCG:
776         case MLX5_CMD_OP_ALLOC_XRCD:
777                 return true;
778         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
779         {
780                 u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
781                 if (op_mod == 0)
782                         return true;
783                 return false;
784         }
785         case MLX5_CMD_OP_CREATE_PSV:
786         {
787                 u8 num_psv = MLX5_GET(create_psv_in, in, num_psv);
788
789                 if (num_psv == 1)
790                         return true;
791                 return false;
792         }
793         default:
794                 return false;
795         }
796 }
797
798 static bool devx_is_obj_modify_cmd(const void *in)
799 {
800         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
801
802         switch (opcode) {
803         case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
804         case MLX5_CMD_OP_MODIFY_CQ:
805         case MLX5_CMD_OP_MODIFY_RMP:
806         case MLX5_CMD_OP_MODIFY_SQ:
807         case MLX5_CMD_OP_MODIFY_RQ:
808         case MLX5_CMD_OP_MODIFY_RQT:
809         case MLX5_CMD_OP_MODIFY_TIR:
810         case MLX5_CMD_OP_MODIFY_TIS:
811         case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
812         case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
813         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
814         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
815         case MLX5_CMD_OP_RST2INIT_QP:
816         case MLX5_CMD_OP_INIT2RTR_QP:
817         case MLX5_CMD_OP_RTR2RTS_QP:
818         case MLX5_CMD_OP_RTS2RTS_QP:
819         case MLX5_CMD_OP_SQERR2RTS_QP:
820         case MLX5_CMD_OP_2ERR_QP:
821         case MLX5_CMD_OP_2RST_QP:
822         case MLX5_CMD_OP_ARM_XRC_SRQ:
823         case MLX5_CMD_OP_ARM_RQ:
824         case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
825         case MLX5_CMD_OP_ARM_XRQ:
826         case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
827         case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
828         case MLX5_CMD_OP_MODIFY_XRQ:
829                 return true;
830         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
831         {
832                 u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
833
834                 if (op_mod == 1)
835                         return true;
836                 return false;
837         }
838         default:
839                 return false;
840         }
841 }
842
843 static bool devx_is_obj_query_cmd(const void *in)
844 {
845         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
846
847         switch (opcode) {
848         case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
849         case MLX5_CMD_OP_QUERY_MKEY:
850         case MLX5_CMD_OP_QUERY_CQ:
851         case MLX5_CMD_OP_QUERY_RMP:
852         case MLX5_CMD_OP_QUERY_SQ:
853         case MLX5_CMD_OP_QUERY_RQ:
854         case MLX5_CMD_OP_QUERY_RQT:
855         case MLX5_CMD_OP_QUERY_TIR:
856         case MLX5_CMD_OP_QUERY_TIS:
857         case MLX5_CMD_OP_QUERY_Q_COUNTER:
858         case MLX5_CMD_OP_QUERY_FLOW_TABLE:
859         case MLX5_CMD_OP_QUERY_FLOW_GROUP:
860         case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
861         case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
862         case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
863         case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
864         case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
865         case MLX5_CMD_OP_QUERY_QP:
866         case MLX5_CMD_OP_QUERY_SRQ:
867         case MLX5_CMD_OP_QUERY_XRC_SRQ:
868         case MLX5_CMD_OP_QUERY_DCT:
869         case MLX5_CMD_OP_QUERY_XRQ:
870         case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
871         case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
872         case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
873                 return true;
874         default:
875                 return false;
876         }
877 }
878
879 static bool devx_is_whitelist_cmd(void *in)
880 {
881         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
882
883         switch (opcode) {
884         case MLX5_CMD_OP_QUERY_HCA_CAP:
885         case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
886         case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
887                 return true;
888         default:
889                 return false;
890         }
891 }
892
893 static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
894 {
895         if (devx_is_whitelist_cmd(cmd_in)) {
896                 struct mlx5_ib_dev *dev;
897
898                 if (c->devx_uid)
899                         return c->devx_uid;
900
901                 dev = to_mdev(c->ibucontext.device);
902                 if (dev->devx_whitelist_uid)
903                         return dev->devx_whitelist_uid;
904
905                 return -EOPNOTSUPP;
906         }
907
908         if (!c->devx_uid)
909                 return -EINVAL;
910
911         return c->devx_uid;
912 }
913
914 static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
915 {
916         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
917
918         /* Pass all cmds for vhca_tunnel as general, tracking is done in FW */
919         if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) &&
920              MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) ||
921             (opcode >= MLX5_CMD_OP_GENERAL_START &&
922              opcode < MLX5_CMD_OP_GENERAL_END))
923                 return true;
924
925         switch (opcode) {
926         case MLX5_CMD_OP_QUERY_HCA_CAP:
927         case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
928         case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
929         case MLX5_CMD_OP_QUERY_VPORT_STATE:
930         case MLX5_CMD_OP_QUERY_ADAPTER:
931         case MLX5_CMD_OP_QUERY_ISSI:
932         case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
933         case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
934         case MLX5_CMD_OP_QUERY_VNIC_ENV:
935         case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
936         case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
937         case MLX5_CMD_OP_NOP:
938         case MLX5_CMD_OP_QUERY_CONG_STATUS:
939         case MLX5_CMD_OP_QUERY_CONG_PARAMS:
940         case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
941         case MLX5_CMD_OP_QUERY_LAG:
942                 return true;
943         default:
944                 return false;
945         }
946 }
947
948 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
949         struct uverbs_attr_bundle *attrs)
950 {
951         struct mlx5_ib_ucontext *c;
952         struct mlx5_ib_dev *dev;
953         int user_vector;
954         int dev_eqn;
955         unsigned int irqn;
956         int err;
957
958         if (uverbs_copy_from(&user_vector, attrs,
959                              MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
960                 return -EFAULT;
961
962         c = devx_ufile2uctx(attrs);
963         if (IS_ERR(c))
964                 return PTR_ERR(c);
965         dev = to_mdev(c->ibucontext.device);
966
967         err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn);
968         if (err < 0)
969                 return err;
970
971         if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
972                            &dev_eqn, sizeof(dev_eqn)))
973                 return -EFAULT;
974
975         return 0;
976 }
977
978 /*
979  *Security note:
980  * The hardware protection mechanism works like this: Each device object that
981  * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
982  * the device specification manual) upon its creation. Then upon doorbell,
983  * hardware fetches the object context for which the doorbell was rang, and
984  * validates that the UAR through which the DB was rang matches the UAR ID
985  * of the object.
986  * If no match the doorbell is silently ignored by the hardware. Of course,
987  * the user cannot ring a doorbell on a UAR that was not mapped to it.
988  * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
989  * mailboxes (except tagging them with UID), we expose to the user its UAR
990  * ID, so it can embed it in these objects in the expected specification
991  * format. So the only thing the user can do is hurt itself by creating a
992  * QP/SQ/CQ with a UAR ID other than his, and then in this case other users
993  * may ring a doorbell on its objects.
994  * The consequence of that will be that another user can schedule a QP/SQ
995  * of the buggy user for execution (just insert it to the hardware schedule
996  * queue or arm its CQ for event generation), no further harm is expected.
997  */
998 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
999         struct uverbs_attr_bundle *attrs)
1000 {
1001         struct mlx5_ib_ucontext *c;
1002         struct mlx5_ib_dev *dev;
1003         u32 user_idx;
1004         s32 dev_idx;
1005
1006         c = devx_ufile2uctx(attrs);
1007         if (IS_ERR(c))
1008                 return PTR_ERR(c);
1009         dev = to_mdev(c->ibucontext.device);
1010
1011         if (uverbs_copy_from(&user_idx, attrs,
1012                              MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
1013                 return -EFAULT;
1014
1015         dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true);
1016         if (dev_idx < 0)
1017                 return dev_idx;
1018
1019         if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
1020                            &dev_idx, sizeof(dev_idx)))
1021                 return -EFAULT;
1022
1023         return 0;
1024 }
1025
1026 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
1027         struct uverbs_attr_bundle *attrs)
1028 {
1029         struct mlx5_ib_ucontext *c;
1030         struct mlx5_ib_dev *dev;
1031         void *cmd_in = uverbs_attr_get_alloced_ptr(
1032                 attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
1033         int cmd_out_len = uverbs_attr_get_len(attrs,
1034                                         MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
1035         void *cmd_out;
1036         int err;
1037         int uid;
1038
1039         c = devx_ufile2uctx(attrs);
1040         if (IS_ERR(c))
1041                 return PTR_ERR(c);
1042         dev = to_mdev(c->ibucontext.device);
1043
1044         uid = devx_get_uid(c, cmd_in);
1045         if (uid < 0)
1046                 return uid;
1047
1048         /* Only white list of some general HCA commands are allowed for this method. */
1049         if (!devx_is_general_cmd(cmd_in, dev))
1050                 return -EINVAL;
1051
1052         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1053         if (IS_ERR(cmd_out))
1054                 return PTR_ERR(cmd_out);
1055
1056         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1057         err = mlx5_cmd_exec(dev->mdev, cmd_in,
1058                             uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
1059                             cmd_out, cmd_out_len);
1060         if (err)
1061                 return err;
1062
1063         return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
1064                               cmd_out_len);
1065 }
1066
1067 static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
1068                                        u32 *dinlen,
1069                                        u32 *obj_id)
1070 {
1071         u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
1072         u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
1073
1074         *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1075         *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
1076
1077         MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
1078         MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
1079
1080         switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
1081         case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
1082                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1083                 MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
1084                 break;
1085
1086         case MLX5_CMD_OP_CREATE_UMEM:
1087                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1088                          MLX5_CMD_OP_DESTROY_UMEM);
1089                 break;
1090         case MLX5_CMD_OP_CREATE_MKEY:
1091                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
1092                 break;
1093         case MLX5_CMD_OP_CREATE_CQ:
1094                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
1095                 break;
1096         case MLX5_CMD_OP_ALLOC_PD:
1097                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
1098                 break;
1099         case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
1100                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1101                          MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
1102                 break;
1103         case MLX5_CMD_OP_CREATE_RMP:
1104                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
1105                 break;
1106         case MLX5_CMD_OP_CREATE_SQ:
1107                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
1108                 break;
1109         case MLX5_CMD_OP_CREATE_RQ:
1110                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
1111                 break;
1112         case MLX5_CMD_OP_CREATE_RQT:
1113                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
1114                 break;
1115         case MLX5_CMD_OP_CREATE_TIR:
1116                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
1117                 break;
1118         case MLX5_CMD_OP_CREATE_TIS:
1119                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
1120                 break;
1121         case MLX5_CMD_OP_ALLOC_Q_COUNTER:
1122                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1123                          MLX5_CMD_OP_DEALLOC_Q_COUNTER);
1124                 break;
1125         case MLX5_CMD_OP_CREATE_FLOW_TABLE:
1126                 *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
1127                 *obj_id = MLX5_GET(create_flow_table_out, out, table_id);
1128                 MLX5_SET(destroy_flow_table_in, din, other_vport,
1129                          MLX5_GET(create_flow_table_in,  in, other_vport));
1130                 MLX5_SET(destroy_flow_table_in, din, vport_number,
1131                          MLX5_GET(create_flow_table_in,  in, vport_number));
1132                 MLX5_SET(destroy_flow_table_in, din, table_type,
1133                          MLX5_GET(create_flow_table_in,  in, table_type));
1134                 MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
1135                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1136                          MLX5_CMD_OP_DESTROY_FLOW_TABLE);
1137                 break;
1138         case MLX5_CMD_OP_CREATE_FLOW_GROUP:
1139                 *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
1140                 *obj_id = MLX5_GET(create_flow_group_out, out, group_id);
1141                 MLX5_SET(destroy_flow_group_in, din, other_vport,
1142                          MLX5_GET(create_flow_group_in, in, other_vport));
1143                 MLX5_SET(destroy_flow_group_in, din, vport_number,
1144                          MLX5_GET(create_flow_group_in, in, vport_number));
1145                 MLX5_SET(destroy_flow_group_in, din, table_type,
1146                          MLX5_GET(create_flow_group_in, in, table_type));
1147                 MLX5_SET(destroy_flow_group_in, din, table_id,
1148                          MLX5_GET(create_flow_group_in, in, table_id));
1149                 MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
1150                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1151                          MLX5_CMD_OP_DESTROY_FLOW_GROUP);
1152                 break;
1153         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
1154                 *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
1155                 *obj_id = MLX5_GET(set_fte_in, in, flow_index);
1156                 MLX5_SET(delete_fte_in, din, other_vport,
1157                          MLX5_GET(set_fte_in,  in, other_vport));
1158                 MLX5_SET(delete_fte_in, din, vport_number,
1159                          MLX5_GET(set_fte_in, in, vport_number));
1160                 MLX5_SET(delete_fte_in, din, table_type,
1161                          MLX5_GET(set_fte_in, in, table_type));
1162                 MLX5_SET(delete_fte_in, din, table_id,
1163                          MLX5_GET(set_fte_in, in, table_id));
1164                 MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
1165                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1166                          MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
1167                 break;
1168         case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
1169                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1170                          MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
1171                 break;
1172         case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
1173                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1174                          MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
1175                 break;
1176         case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
1177                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1178                          MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
1179                 break;
1180         case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
1181                 *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
1182                 *obj_id = MLX5_GET(create_scheduling_element_out, out,
1183                                    scheduling_element_id);
1184                 MLX5_SET(destroy_scheduling_element_in, din,
1185                          scheduling_hierarchy,
1186                          MLX5_GET(create_scheduling_element_in, in,
1187                                   scheduling_hierarchy));
1188                 MLX5_SET(destroy_scheduling_element_in, din,
1189                          scheduling_element_id, *obj_id);
1190                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1191                          MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
1192                 break;
1193         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
1194                 *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
1195                 *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
1196                 MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
1197                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1198                          MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
1199                 break;
1200         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
1201                 *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
1202                 *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
1203                 MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
1204                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1205                          MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
1206                 break;
1207         case MLX5_CMD_OP_CREATE_QP:
1208                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
1209                 break;
1210         case MLX5_CMD_OP_CREATE_SRQ:
1211                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
1212                 break;
1213         case MLX5_CMD_OP_CREATE_XRC_SRQ:
1214                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1215                          MLX5_CMD_OP_DESTROY_XRC_SRQ);
1216                 break;
1217         case MLX5_CMD_OP_CREATE_DCT:
1218                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
1219                 break;
1220         case MLX5_CMD_OP_CREATE_XRQ:
1221                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
1222                 break;
1223         case MLX5_CMD_OP_ATTACH_TO_MCG:
1224                 *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
1225                 MLX5_SET(detach_from_mcg_in, din, qpn,
1226                          MLX5_GET(attach_to_mcg_in, in, qpn));
1227                 memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
1228                        MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
1229                        MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
1230                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
1231                 break;
1232         case MLX5_CMD_OP_ALLOC_XRCD:
1233                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
1234                 break;
1235         case MLX5_CMD_OP_CREATE_PSV:
1236                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1237                          MLX5_CMD_OP_DESTROY_PSV);
1238                 MLX5_SET(destroy_psv_in, din, psvn,
1239                          MLX5_GET(create_psv_out, out, psv0_index));
1240                 break;
1241         default:
1242                 /* The entry must match to one of the devx_is_obj_create_cmd */
1243                 WARN_ON(true);
1244                 break;
1245         }
1246 }
1247
1248 static int devx_handle_mkey_indirect(struct devx_obj *obj,
1249                                      struct mlx5_ib_dev *dev,
1250                                      void *in, void *out)
1251 {
1252         struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
1253         struct mlx5_core_mkey *mkey;
1254         void *mkc;
1255         u8 key;
1256
1257         mkey = &devx_mr->mmkey;
1258         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1259         key = MLX5_GET(mkc, mkc, mkey_7_0);
1260         mkey->key = mlx5_idx_to_mkey(
1261                         MLX5_GET(create_mkey_out, out, mkey_index)) | key;
1262         mkey->type = MLX5_MKEY_INDIRECT_DEVX;
1263         mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
1264         mkey->size = MLX5_GET64(mkc, mkc, len);
1265         mkey->pd = MLX5_GET(mkc, mkc, pd);
1266         devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
1267
1268         return xa_err(xa_store(&dev->mdev->priv.mkey_table,
1269                                mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL));
1270 }
1271
1272 static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
1273                                    struct devx_obj *obj,
1274                                    void *in, int in_len)
1275 {
1276         int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) +
1277                         MLX5_FLD_SZ_BYTES(create_mkey_in,
1278                         memory_key_mkey_entry);
1279         void *mkc;
1280         u8 access_mode;
1281
1282         if (in_len < min_len)
1283                 return -EINVAL;
1284
1285         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1286
1287         access_mode = MLX5_GET(mkc, mkc, access_mode_1_0);
1288         access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
1289
1290         if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS ||
1291                 access_mode == MLX5_MKC_ACCESS_MODE_KSM) {
1292                 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
1293                         obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY;
1294                 return 0;
1295         }
1296
1297         MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
1298         return 0;
1299 }
1300
1301 static void devx_free_indirect_mkey(struct rcu_head *rcu)
1302 {
1303         kfree(container_of(rcu, struct devx_obj, devx_mr.rcu));
1304 }
1305
1306 /* This function to delete from the radix tree needs to be called before
1307  * destroying the underlying mkey. Otherwise a race might occur in case that
1308  * other thread will get the same mkey before this one will be deleted,
1309  * in that case it will fail via inserting to the tree its own data.
1310  *
1311  * Note:
1312  * An error in the destroy is not expected unless there is some other indirect
1313  * mkey which points to this one. In a kernel cleanup flow it will be just
1314  * destroyed in the iterative destruction call. In a user flow, in case
1315  * the application didn't close in the expected order it's its own problem,
1316  * the mkey won't be part of the tree, in both cases the kernel is safe.
1317  */
1318 static void devx_cleanup_mkey(struct devx_obj *obj)
1319 {
1320         xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
1321                  mlx5_base_mkey(obj->devx_mr.mmkey.key));
1322 }
1323
1324 static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
1325                                       struct devx_event_subscription *sub)
1326 {
1327         struct devx_event *event;
1328         struct devx_obj_event *xa_val_level2;
1329
1330         if (sub->is_cleaned)
1331                 return;
1332
1333         sub->is_cleaned = 1;
1334         list_del_rcu(&sub->xa_list);
1335
1336         if (list_empty(&sub->obj_list))
1337                 return;
1338
1339         list_del_rcu(&sub->obj_list);
1340         /* check whether key level 1 for this obj_sub_list is empty */
1341         event = xa_load(&dev->devx_event_table.event_xa,
1342                         sub->xa_key_level1);
1343         WARN_ON(!event);
1344
1345         xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2);
1346         if (list_empty(&xa_val_level2->obj_sub_list)) {
1347                 xa_erase(&event->object_ids,
1348                          sub->xa_key_level2);
1349                 kfree_rcu(xa_val_level2, rcu);
1350         }
1351 }
1352
1353 static int devx_obj_cleanup(struct ib_uobject *uobject,
1354                             enum rdma_remove_reason why,
1355                             struct uverbs_attr_bundle *attrs)
1356 {
1357         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
1358         struct mlx5_devx_event_table *devx_event_table;
1359         struct devx_obj *obj = uobject->object;
1360         struct devx_event_subscription *sub_entry, *tmp;
1361         struct mlx5_ib_dev *dev;
1362         int ret;
1363
1364         dev = mlx5_udata_to_mdev(&attrs->driver_udata);
1365         if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
1366                 devx_cleanup_mkey(obj);
1367
1368         if (obj->flags & DEVX_OBJ_FLAGS_DCT)
1369                 ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
1370         else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
1371                 ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
1372         else
1373                 ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
1374                                     obj->dinlen, out, sizeof(out));
1375         if (ib_is_destroy_retryable(ret, why, uobject))
1376                 return ret;
1377
1378         devx_event_table = &dev->devx_event_table;
1379
1380         mutex_lock(&devx_event_table->event_xa_lock);
1381         list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list)
1382                 devx_cleanup_subscription(dev, sub_entry);
1383         mutex_unlock(&devx_event_table->event_xa_lock);
1384
1385         if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
1386                 call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
1387                           devx_free_indirect_mkey);
1388                 return ret;
1389         }
1390
1391         kfree(obj);
1392         return ret;
1393 }
1394
1395 static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
1396 {
1397         struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq);
1398         struct mlx5_devx_event_table *table;
1399         struct devx_event *event;
1400         struct devx_obj_event *obj_event;
1401         u32 obj_id = mcq->cqn;
1402
1403         table = &obj->ib_dev->devx_event_table;
1404         rcu_read_lock();
1405         event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP);
1406         if (!event)
1407                 goto out;
1408
1409         obj_event = xa_load(&event->object_ids, obj_id);
1410         if (!obj_event)
1411                 goto out;
1412
1413         dispatch_event_fd(&obj_event->obj_sub_list, eqe);
1414 out:
1415         rcu_read_unlock();
1416 }
1417
1418 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
1419         struct uverbs_attr_bundle *attrs)
1420 {
1421         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
1422         int cmd_out_len =  uverbs_attr_get_len(attrs,
1423                                         MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
1424         int cmd_in_len = uverbs_attr_get_len(attrs,
1425                                         MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
1426         void *cmd_out;
1427         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1428                 attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
1429         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1430                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1431         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1432         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
1433         struct devx_obj *obj;
1434         u16 obj_type = 0;
1435         int err;
1436         int uid;
1437         u32 obj_id;
1438         u16 opcode;
1439
1440         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1441                 return -EINVAL;
1442
1443         uid = devx_get_uid(c, cmd_in);
1444         if (uid < 0)
1445                 return uid;
1446
1447         if (!devx_is_obj_create_cmd(cmd_in, &opcode))
1448                 return -EINVAL;
1449
1450         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1451         if (IS_ERR(cmd_out))
1452                 return PTR_ERR(cmd_out);
1453
1454         obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL);
1455         if (!obj)
1456                 return -ENOMEM;
1457
1458         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1459         if (opcode == MLX5_CMD_OP_CREATE_MKEY) {
1460                 err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len);
1461                 if (err)
1462                         goto obj_free;
1463         } else {
1464                 devx_set_umem_valid(cmd_in);
1465         }
1466
1467         if (opcode == MLX5_CMD_OP_CREATE_DCT) {
1468                 obj->flags |= DEVX_OBJ_FLAGS_DCT;
1469                 err = mlx5_core_create_dct(dev->mdev, &obj->core_dct,
1470                                            cmd_in, cmd_in_len,
1471                                            cmd_out, cmd_out_len);
1472         } else if (opcode == MLX5_CMD_OP_CREATE_CQ) {
1473                 obj->flags |= DEVX_OBJ_FLAGS_CQ;
1474                 obj->core_cq.comp = devx_cq_comp;
1475                 err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
1476                                           cmd_in, cmd_in_len, cmd_out,
1477                                           cmd_out_len);
1478         } else {
1479                 err = mlx5_cmd_exec(dev->mdev, cmd_in,
1480                                     cmd_in_len,
1481                                     cmd_out, cmd_out_len);
1482         }
1483
1484         if (err)
1485                 goto obj_free;
1486
1487         uobj->object = obj;
1488         INIT_LIST_HEAD(&obj->event_sub);
1489         obj->ib_dev = dev;
1490         devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
1491                                    &obj_id);
1492         WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
1493
1494         if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
1495                 err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
1496                 if (err)
1497                         goto obj_destroy;
1498         }
1499
1500         err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
1501         if (err)
1502                 goto err_copy;
1503
1504         if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT)
1505                 obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
1506
1507         obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
1508
1509         return 0;
1510
1511 err_copy:
1512         if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
1513                 devx_cleanup_mkey(obj);
1514 obj_destroy:
1515         if (obj->flags & DEVX_OBJ_FLAGS_DCT)
1516                 mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
1517         else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
1518                 mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
1519         else
1520                 mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out,
1521                               sizeof(out));
1522 obj_free:
1523         kfree(obj);
1524         return err;
1525 }
1526
1527 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
1528         struct uverbs_attr_bundle *attrs)
1529 {
1530         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
1531         int cmd_out_len = uverbs_attr_get_len(attrs,
1532                                         MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
1533         struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
1534                                                           MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
1535         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1536                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1537         struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1538         void *cmd_out;
1539         int err;
1540         int uid;
1541
1542         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1543                 return -EINVAL;
1544
1545         uid = devx_get_uid(c, cmd_in);
1546         if (uid < 0)
1547                 return uid;
1548
1549         if (!devx_is_obj_modify_cmd(cmd_in))
1550                 return -EINVAL;
1551
1552         if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1553                 return -EINVAL;
1554
1555         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1556         if (IS_ERR(cmd_out))
1557                 return PTR_ERR(cmd_out);
1558
1559         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1560         devx_set_umem_valid(cmd_in);
1561
1562         err = mlx5_cmd_exec(mdev->mdev, cmd_in,
1563                             uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
1564                             cmd_out, cmd_out_len);
1565         if (err)
1566                 return err;
1567
1568         return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
1569                               cmd_out, cmd_out_len);
1570 }
1571
1572 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
1573         struct uverbs_attr_bundle *attrs)
1574 {
1575         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
1576         int cmd_out_len = uverbs_attr_get_len(attrs,
1577                                               MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
1578         struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
1579                                                           MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
1580         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1581                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1582         void *cmd_out;
1583         int err;
1584         int uid;
1585         struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1586
1587         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1588                 return -EINVAL;
1589
1590         uid = devx_get_uid(c, cmd_in);
1591         if (uid < 0)
1592                 return uid;
1593
1594         if (!devx_is_obj_query_cmd(cmd_in))
1595                 return -EINVAL;
1596
1597         if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1598                 return -EINVAL;
1599
1600         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1601         if (IS_ERR(cmd_out))
1602                 return PTR_ERR(cmd_out);
1603
1604         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1605         err = mlx5_cmd_exec(mdev->mdev, cmd_in,
1606                             uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
1607                             cmd_out, cmd_out_len);
1608         if (err)
1609                 return err;
1610
1611         return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
1612                               cmd_out, cmd_out_len);
1613 }
1614
1615 struct devx_async_event_queue {
1616         spinlock_t              lock;
1617         wait_queue_head_t       poll_wait;
1618         struct list_head        event_list;
1619         atomic_t                bytes_in_use;
1620         u8                      is_destroyed:1;
1621 };
1622
1623 struct devx_async_cmd_event_file {
1624         struct ib_uobject               uobj;
1625         struct devx_async_event_queue   ev_queue;
1626         struct mlx5_async_ctx           async_ctx;
1627 };
1628
1629 static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
1630 {
1631         spin_lock_init(&ev_queue->lock);
1632         INIT_LIST_HEAD(&ev_queue->event_list);
1633         init_waitqueue_head(&ev_queue->poll_wait);
1634         atomic_set(&ev_queue->bytes_in_use, 0);
1635         ev_queue->is_destroyed = 0;
1636 }
1637
1638 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
1639         struct uverbs_attr_bundle *attrs)
1640 {
1641         struct devx_async_cmd_event_file *ev_file;
1642
1643         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1644                 attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE);
1645         struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
1646
1647         ev_file = container_of(uobj, struct devx_async_cmd_event_file,
1648                                uobj);
1649         devx_init_event_queue(&ev_file->ev_queue);
1650         mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx);
1651         return 0;
1652 }
1653
1654 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)(
1655         struct uverbs_attr_bundle *attrs)
1656 {
1657         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1658                 attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE);
1659         struct devx_async_event_file *ev_file;
1660         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1661                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1662         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1663         u32 flags;
1664         int err;
1665
1666         err = uverbs_get_flags32(&flags, attrs,
1667                 MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
1668                 MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA);
1669
1670         if (err)
1671                 return err;
1672
1673         ev_file = container_of(uobj, struct devx_async_event_file,
1674                                uobj);
1675         spin_lock_init(&ev_file->lock);
1676         INIT_LIST_HEAD(&ev_file->event_list);
1677         init_waitqueue_head(&ev_file->poll_wait);
1678         if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA)
1679                 ev_file->omit_data = 1;
1680         INIT_LIST_HEAD(&ev_file->subscribed_events_list);
1681         ev_file->dev = dev;
1682         get_device(&dev->ib_dev.dev);
1683         return 0;
1684 }
1685
1686 static void devx_query_callback(int status, struct mlx5_async_work *context)
1687 {
1688         struct devx_async_data *async_data =
1689                 container_of(context, struct devx_async_data, cb_work);
1690         struct ib_uobject *fd_uobj = async_data->fd_uobj;
1691         struct devx_async_cmd_event_file *ev_file;
1692         struct devx_async_event_queue *ev_queue;
1693         unsigned long flags;
1694
1695         ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
1696                                uobj);
1697         ev_queue = &ev_file->ev_queue;
1698
1699         spin_lock_irqsave(&ev_queue->lock, flags);
1700         list_add_tail(&async_data->list, &ev_queue->event_list);
1701         spin_unlock_irqrestore(&ev_queue->lock, flags);
1702
1703         wake_up_interruptible(&ev_queue->poll_wait);
1704         fput(fd_uobj->object);
1705 }
1706
1707 #define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
1708
1709 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
1710         struct uverbs_attr_bundle *attrs)
1711 {
1712         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs,
1713                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN);
1714         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1715                                 attrs,
1716                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE);
1717         u16 cmd_out_len;
1718         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1719                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1720         struct ib_uobject *fd_uobj;
1721         int err;
1722         int uid;
1723         struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1724         struct devx_async_cmd_event_file *ev_file;
1725         struct devx_async_data *async_data;
1726
1727         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1728                 return -EINVAL;
1729
1730         uid = devx_get_uid(c, cmd_in);
1731         if (uid < 0)
1732                 return uid;
1733
1734         if (!devx_is_obj_query_cmd(cmd_in))
1735                 return -EINVAL;
1736
1737         err = uverbs_get_const(&cmd_out_len, attrs,
1738                                MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN);
1739         if (err)
1740                 return err;
1741
1742         if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1743                 return -EINVAL;
1744
1745         fd_uobj = uverbs_attr_get_uobject(attrs,
1746                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD);
1747         if (IS_ERR(fd_uobj))
1748                 return PTR_ERR(fd_uobj);
1749
1750         ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
1751                                uobj);
1752
1753         if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) >
1754                         MAX_ASYNC_BYTES_IN_USE) {
1755                 atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
1756                 return -EAGAIN;
1757         }
1758
1759         async_data = kvzalloc(struct_size(async_data, hdr.out_data,
1760                                           cmd_out_len), GFP_KERNEL);
1761         if (!async_data) {
1762                 err = -ENOMEM;
1763                 goto sub_bytes;
1764         }
1765
1766         err = uverbs_copy_from(&async_data->hdr.wr_id, attrs,
1767                                MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID);
1768         if (err)
1769                 goto free_async;
1770
1771         async_data->cmd_out_len = cmd_out_len;
1772         async_data->mdev = mdev;
1773         async_data->fd_uobj = fd_uobj;
1774
1775         get_file(fd_uobj->object);
1776         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1777         err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
1778                     uverbs_attr_get_len(attrs,
1779                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN),
1780                     async_data->hdr.out_data,
1781                     async_data->cmd_out_len,
1782                     devx_query_callback, &async_data->cb_work);
1783
1784         if (err)
1785                 goto cb_err;
1786
1787         return 0;
1788
1789 cb_err:
1790         fput(fd_uobj->object);
1791 free_async:
1792         kvfree(async_data);
1793 sub_bytes:
1794         atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
1795         return err;
1796 }
1797
1798 static void
1799 subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
1800                            u32 key_level1,
1801                            bool is_level2,
1802                            u32 key_level2)
1803 {
1804         struct devx_event *event;
1805         struct devx_obj_event *xa_val_level2;
1806
1807         /* Level 1 is valid for future use, no need to free */
1808         if (!is_level2)
1809                 return;
1810
1811         event = xa_load(&devx_event_table->event_xa, key_level1);
1812         WARN_ON(!event);
1813
1814         xa_val_level2 = xa_load(&event->object_ids,
1815                                 key_level2);
1816         if (list_empty(&xa_val_level2->obj_sub_list)) {
1817                 xa_erase(&event->object_ids,
1818                          key_level2);
1819                 kfree_rcu(xa_val_level2, rcu);
1820         }
1821 }
1822
1823 static int
1824 subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
1825                          u32 key_level1,
1826                          bool is_level2,
1827                          u32 key_level2)
1828 {
1829         struct devx_obj_event *obj_event;
1830         struct devx_event *event;
1831         int err;
1832
1833         event = xa_load(&devx_event_table->event_xa, key_level1);
1834         if (!event) {
1835                 event = kzalloc(sizeof(*event), GFP_KERNEL);
1836                 if (!event)
1837                         return -ENOMEM;
1838
1839                 INIT_LIST_HEAD(&event->unaffiliated_list);
1840                 xa_init(&event->object_ids);
1841
1842                 err = xa_insert(&devx_event_table->event_xa,
1843                                 key_level1,
1844                                 event,
1845                                 GFP_KERNEL);
1846                 if (err) {
1847                         kfree(event);
1848                         return err;
1849                 }
1850         }
1851
1852         if (!is_level2)
1853                 return 0;
1854
1855         obj_event = xa_load(&event->object_ids, key_level2);
1856         if (!obj_event) {
1857                 obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
1858                 if (!obj_event)
1859                         /* Level1 is valid for future use, no need to free */
1860                         return -ENOMEM;
1861
1862                 err = xa_insert(&event->object_ids,
1863                                 key_level2,
1864                                 obj_event,
1865                                 GFP_KERNEL);
1866                 if (err)
1867                         return err;
1868                 INIT_LIST_HEAD(&obj_event->obj_sub_list);
1869         }
1870
1871         return 0;
1872 }
1873
1874 static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list,
1875                                    struct devx_obj *obj)
1876 {
1877         int i;
1878
1879         for (i = 0; i < num_events; i++) {
1880                 if (obj) {
1881                         if (!is_legacy_obj_event_num(event_type_num_list[i]))
1882                                 return false;
1883                 } else if (!is_legacy_unaffiliated_event_num(
1884                                 event_type_num_list[i])) {
1885                         return false;
1886                 }
1887         }
1888
1889         return true;
1890 }
1891
1892 #define MAX_SUPP_EVENT_NUM 255
1893 static bool is_valid_events(struct mlx5_core_dev *dev,
1894                             int num_events, u16 *event_type_num_list,
1895                             struct devx_obj *obj)
1896 {
1897         __be64 *aff_events;
1898         __be64 *unaff_events;
1899         int mask_entry;
1900         int mask_bit;
1901         int i;
1902
1903         if (MLX5_CAP_GEN(dev, event_cap)) {
1904                 aff_events = MLX5_CAP_DEV_EVENT(dev,
1905                                                 user_affiliated_events);
1906                 unaff_events = MLX5_CAP_DEV_EVENT(dev,
1907                                                   user_unaffiliated_events);
1908         } else {
1909                 return is_valid_events_legacy(num_events, event_type_num_list,
1910                                               obj);
1911         }
1912
1913         for (i = 0; i < num_events; i++) {
1914                 if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM)
1915                         return false;
1916
1917                 mask_entry = event_type_num_list[i] / 64;
1918                 mask_bit = event_type_num_list[i] % 64;
1919
1920                 if (obj) {
1921                         /* CQ completion */
1922                         if (event_type_num_list[i] == 0)
1923                                 continue;
1924
1925                         if (!(be64_to_cpu(aff_events[mask_entry]) &
1926                                         (1ull << mask_bit)))
1927                                 return false;
1928
1929                         continue;
1930                 }
1931
1932                 if (!(be64_to_cpu(unaff_events[mask_entry]) &
1933                                 (1ull << mask_bit)))
1934                         return false;
1935         }
1936
1937         return true;
1938 }
1939
1940 #define MAX_NUM_EVENTS 16
1941 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
1942         struct uverbs_attr_bundle *attrs)
1943 {
1944         struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
1945                                 attrs,
1946                                 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
1947         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1948                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1949         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1950         struct ib_uobject *fd_uobj;
1951         struct devx_obj *obj = NULL;
1952         struct devx_async_event_file *ev_file;
1953         struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
1954         u16 *event_type_num_list;
1955         struct devx_event_subscription *event_sub, *tmp_sub;
1956         struct list_head sub_list;
1957         int redirect_fd;
1958         bool use_eventfd = false;
1959         int num_events;
1960         int num_alloc_xa_entries = 0;
1961         u16 obj_type = 0;
1962         u64 cookie = 0;
1963         u32 obj_id = 0;
1964         int err;
1965         int i;
1966
1967         if (!c->devx_uid)
1968                 return -EINVAL;
1969
1970         if (!IS_ERR(devx_uobj)) {
1971                 obj = (struct devx_obj *)devx_uobj->object;
1972                 if (obj)
1973                         obj_id = get_dec_obj_id(obj->obj_id);
1974         }
1975
1976         fd_uobj = uverbs_attr_get_uobject(attrs,
1977                                 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
1978         if (IS_ERR(fd_uobj))
1979                 return PTR_ERR(fd_uobj);
1980
1981         ev_file = container_of(fd_uobj, struct devx_async_event_file,
1982                                uobj);
1983
1984         if (uverbs_attr_is_valid(attrs,
1985                                  MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
1986                 err = uverbs_copy_from(&redirect_fd, attrs,
1987                                MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
1988                 if (err)
1989                         return err;
1990
1991                 use_eventfd = true;
1992         }
1993
1994         if (uverbs_attr_is_valid(attrs,
1995                                  MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
1996                 if (use_eventfd)
1997                         return -EINVAL;
1998
1999                 err = uverbs_copy_from(&cookie, attrs,
2000                                 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
2001                 if (err)
2002                         return err;
2003         }
2004
2005         num_events = uverbs_attr_ptr_get_array_size(
2006                 attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
2007                 sizeof(u16));
2008
2009         if (num_events < 0)
2010                 return num_events;
2011
2012         if (num_events > MAX_NUM_EVENTS)
2013                 return -EINVAL;
2014
2015         event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
2016                         MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
2017
2018         if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
2019                 return -EINVAL;
2020
2021         INIT_LIST_HEAD(&sub_list);
2022
2023         /* Protect from concurrent subscriptions to same XA entries to allow
2024          * both to succeed
2025          */
2026         mutex_lock(&devx_event_table->event_xa_lock);
2027         for (i = 0; i < num_events; i++) {
2028                 u32 key_level1;
2029
2030                 if (obj)
2031                         obj_type = get_dec_obj_type(obj,
2032                                                     event_type_num_list[i]);
2033                 key_level1 = event_type_num_list[i] | obj_type << 16;
2034
2035                 err = subscribe_event_xa_alloc(devx_event_table,
2036                                                key_level1,
2037                                                obj,
2038                                                obj_id);
2039                 if (err)
2040                         goto err;
2041
2042                 num_alloc_xa_entries++;
2043                 event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
2044                 if (!event_sub)
2045                         goto err;
2046
2047                 list_add_tail(&event_sub->event_list, &sub_list);
2048                 if (use_eventfd) {
2049                         event_sub->eventfd =
2050                                 eventfd_ctx_fdget(redirect_fd);
2051
2052                         if (IS_ERR(event_sub->eventfd)) {
2053                                 err = PTR_ERR(event_sub->eventfd);
2054                                 event_sub->eventfd = NULL;
2055                                 goto err;
2056                         }
2057                 }
2058
2059                 event_sub->cookie = cookie;
2060                 event_sub->ev_file = ev_file;
2061                 event_sub->filp = fd_uobj->object;
2062                 /* May be needed upon cleanup the devx object/subscription */
2063                 event_sub->xa_key_level1 = key_level1;
2064                 event_sub->xa_key_level2 = obj_id;
2065                 INIT_LIST_HEAD(&event_sub->obj_list);
2066         }
2067
2068         /* Once all the allocations and the XA data insertions were done we
2069          * can go ahead and add all the subscriptions to the relevant lists
2070          * without concern of a failure.
2071          */
2072         list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
2073                 struct devx_event *event;
2074                 struct devx_obj_event *obj_event;
2075
2076                 list_del_init(&event_sub->event_list);
2077
2078                 spin_lock_irq(&ev_file->lock);
2079                 list_add_tail_rcu(&event_sub->file_list,
2080                                   &ev_file->subscribed_events_list);
2081                 spin_unlock_irq(&ev_file->lock);
2082
2083                 event = xa_load(&devx_event_table->event_xa,
2084                                 event_sub->xa_key_level1);
2085                 WARN_ON(!event);
2086
2087                 if (!obj) {
2088                         list_add_tail_rcu(&event_sub->xa_list,
2089                                           &event->unaffiliated_list);
2090                         continue;
2091                 }
2092
2093                 obj_event = xa_load(&event->object_ids, obj_id);
2094                 WARN_ON(!obj_event);
2095                 list_add_tail_rcu(&event_sub->xa_list,
2096                                   &obj_event->obj_sub_list);
2097                 list_add_tail_rcu(&event_sub->obj_list,
2098                                   &obj->event_sub);
2099         }
2100
2101         mutex_unlock(&devx_event_table->event_xa_lock);
2102         return 0;
2103
2104 err:
2105         list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
2106                 list_del(&event_sub->event_list);
2107
2108                 subscribe_event_xa_dealloc(devx_event_table,
2109                                            event_sub->xa_key_level1,
2110                                            obj,
2111                                            obj_id);
2112
2113                 if (event_sub->eventfd)
2114                         eventfd_ctx_put(event_sub->eventfd);
2115
2116                 kfree(event_sub);
2117         }
2118
2119         mutex_unlock(&devx_event_table->event_xa_lock);
2120         return err;
2121 }
2122
2123 static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
2124                          struct uverbs_attr_bundle *attrs,
2125                          struct devx_umem *obj)
2126 {
2127         u64 addr;
2128         size_t size;
2129         u32 access;
2130         int npages;
2131         int err;
2132         u32 page_mask;
2133
2134         if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
2135             uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
2136                 return -EFAULT;
2137
2138         err = uverbs_get_flags32(&access, attrs,
2139                                  MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
2140                                  IB_ACCESS_LOCAL_WRITE |
2141                                  IB_ACCESS_REMOTE_WRITE |
2142                                  IB_ACCESS_REMOTE_READ);
2143         if (err)
2144                 return err;
2145
2146         err = ib_check_mr_access(access);
2147         if (err)
2148                 return err;
2149
2150         obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0);
2151         if (IS_ERR(obj->umem))
2152                 return PTR_ERR(obj->umem);
2153
2154         mlx5_ib_cont_pages(obj->umem, obj->umem->address,
2155                            MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
2156                            &obj->page_shift, &obj->ncont, NULL);
2157
2158         if (!npages) {
2159                 ib_umem_release(obj->umem);
2160                 return -EINVAL;
2161         }
2162
2163         page_mask = (1 << obj->page_shift) - 1;
2164         obj->page_offset = obj->umem->address & page_mask;
2165
2166         return 0;
2167 }
2168
2169 static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
2170                                    struct devx_umem *obj,
2171                                    struct devx_umem_reg_cmd *cmd)
2172 {
2173         cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
2174                     (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
2175         cmd->in = uverbs_zalloc(attrs, cmd->inlen);
2176         return PTR_ERR_OR_ZERO(cmd->in);
2177 }
2178
2179 static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
2180                                     struct devx_umem *obj,
2181                                     struct devx_umem_reg_cmd *cmd)
2182 {
2183         void *umem;
2184         __be64 *mtt;
2185
2186         umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
2187         mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
2188
2189         MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
2190         MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
2191         MLX5_SET(umem, umem, log_page_size, obj->page_shift -
2192                                             MLX5_ADAPTER_PAGE_SHIFT);
2193         MLX5_SET(umem, umem, page_offset, obj->page_offset);
2194         mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
2195                              (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
2196                              MLX5_IB_MTT_READ);
2197 }
2198
2199 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
2200         struct uverbs_attr_bundle *attrs)
2201 {
2202         struct devx_umem_reg_cmd cmd;
2203         struct devx_umem *obj;
2204         struct ib_uobject *uobj = uverbs_attr_get_uobject(
2205                 attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
2206         u32 obj_id;
2207         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
2208                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
2209         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
2210         int err;
2211
2212         if (!c->devx_uid)
2213                 return -EINVAL;
2214
2215         obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
2216         if (!obj)
2217                 return -ENOMEM;
2218
2219         err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
2220         if (err)
2221                 goto err_obj_free;
2222
2223         err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
2224         if (err)
2225                 goto err_umem_release;
2226
2227         devx_umem_reg_cmd_build(dev, obj, &cmd);
2228
2229         MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
2230         err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
2231                             sizeof(cmd.out));
2232         if (err)
2233                 goto err_umem_release;
2234
2235         obj->mdev = dev->mdev;
2236         uobj->object = obj;
2237         devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
2238         err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id));
2239         if (err)
2240                 goto err_umem_destroy;
2241
2242         return 0;
2243
2244 err_umem_destroy:
2245         mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out));
2246 err_umem_release:
2247         ib_umem_release(obj->umem);
2248 err_obj_free:
2249         kfree(obj);
2250         return err;
2251 }
2252
2253 static int devx_umem_cleanup(struct ib_uobject *uobject,
2254                              enum rdma_remove_reason why,
2255                              struct uverbs_attr_bundle *attrs)
2256 {
2257         struct devx_umem *obj = uobject->object;
2258         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
2259         int err;
2260
2261         err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
2262         if (ib_is_destroy_retryable(err, why, uobject))
2263                 return err;
2264
2265         ib_umem_release(obj->umem);
2266         kfree(obj);
2267         return 0;
2268 }
2269
2270 static bool is_unaffiliated_event(struct mlx5_core_dev *dev,
2271                                   unsigned long event_type)
2272 {
2273         __be64 *unaff_events;
2274         int mask_entry;
2275         int mask_bit;
2276
2277         if (!MLX5_CAP_GEN(dev, event_cap))
2278                 return is_legacy_unaffiliated_event_num(event_type);
2279
2280         unaff_events = MLX5_CAP_DEV_EVENT(dev,
2281                                           user_unaffiliated_events);
2282         WARN_ON(event_type > MAX_SUPP_EVENT_NUM);
2283
2284         mask_entry = event_type / 64;
2285         mask_bit = event_type % 64;
2286
2287         if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit)))
2288                 return false;
2289
2290         return true;
2291 }
2292
2293 static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
2294 {
2295         struct mlx5_eqe *eqe = data;
2296         u32 obj_id = 0;
2297
2298         switch (event_type) {
2299         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
2300         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
2301         case MLX5_EVENT_TYPE_PATH_MIG:
2302         case MLX5_EVENT_TYPE_COMM_EST:
2303         case MLX5_EVENT_TYPE_SQ_DRAINED:
2304         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
2305         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
2306         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
2307         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
2308         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
2309                 obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
2310                 break;
2311         case MLX5_EVENT_TYPE_XRQ_ERROR:
2312                 obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
2313                 break;
2314         case MLX5_EVENT_TYPE_DCT_DRAINED:
2315         case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
2316                 obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
2317                 break;
2318         case MLX5_EVENT_TYPE_CQ_ERROR:
2319                 obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
2320                 break;
2321         default:
2322                 obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id);
2323                 break;
2324         }
2325
2326         return obj_id;
2327 }
2328
2329 static int deliver_event(struct devx_event_subscription *event_sub,
2330                          const void *data)
2331 {
2332         struct devx_async_event_file *ev_file;
2333         struct devx_async_event_data *event_data;
2334         unsigned long flags;
2335
2336         ev_file = event_sub->ev_file;
2337
2338         if (ev_file->omit_data) {
2339                 spin_lock_irqsave(&ev_file->lock, flags);
2340                 if (!list_empty(&event_sub->event_list)) {
2341                         spin_unlock_irqrestore(&ev_file->lock, flags);
2342                         return 0;
2343                 }
2344
2345                 list_add_tail(&event_sub->event_list, &ev_file->event_list);
2346                 spin_unlock_irqrestore(&ev_file->lock, flags);
2347                 wake_up_interruptible(&ev_file->poll_wait);
2348                 return 0;
2349         }
2350
2351         event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe),
2352                              GFP_ATOMIC);
2353         if (!event_data) {
2354                 spin_lock_irqsave(&ev_file->lock, flags);
2355                 ev_file->is_overflow_err = 1;
2356                 spin_unlock_irqrestore(&ev_file->lock, flags);
2357                 return -ENOMEM;
2358         }
2359
2360         event_data->hdr.cookie = event_sub->cookie;
2361         memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
2362
2363         spin_lock_irqsave(&ev_file->lock, flags);
2364         list_add_tail(&event_data->list, &ev_file->event_list);
2365         spin_unlock_irqrestore(&ev_file->lock, flags);
2366         wake_up_interruptible(&ev_file->poll_wait);
2367
2368         return 0;
2369 }
2370
2371 static void dispatch_event_fd(struct list_head *fd_list,
2372                               const void *data)
2373 {
2374         struct devx_event_subscription *item;
2375
2376         list_for_each_entry_rcu(item, fd_list, xa_list) {
2377                 if (!get_file_rcu(item->filp))
2378                         continue;
2379
2380                 if (item->eventfd) {
2381                         eventfd_signal(item->eventfd, 1);
2382                         fput(item->filp);
2383                         continue;
2384                 }
2385
2386                 deliver_event(item, data);
2387                 fput(item->filp);
2388         }
2389 }
2390
2391 static int devx_event_notifier(struct notifier_block *nb,
2392                                unsigned long event_type, void *data)
2393 {
2394         struct mlx5_devx_event_table *table;
2395         struct mlx5_ib_dev *dev;
2396         struct devx_event *event;
2397         struct devx_obj_event *obj_event;
2398         u16 obj_type = 0;
2399         bool is_unaffiliated;
2400         u32 obj_id;
2401
2402         /* Explicit filtering to kernel events which may occur frequently */
2403         if (event_type == MLX5_EVENT_TYPE_CMD ||
2404             event_type == MLX5_EVENT_TYPE_PAGE_REQUEST)
2405                 return NOTIFY_OK;
2406
2407         table = container_of(nb, struct mlx5_devx_event_table, devx_nb.nb);
2408         dev = container_of(table, struct mlx5_ib_dev, devx_event_table);
2409         is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type);
2410
2411         if (!is_unaffiliated)
2412                 obj_type = get_event_obj_type(event_type, data);
2413
2414         rcu_read_lock();
2415         event = xa_load(&table->event_xa, event_type | (obj_type << 16));
2416         if (!event) {
2417                 rcu_read_unlock();
2418                 return NOTIFY_DONE;
2419         }
2420
2421         if (is_unaffiliated) {
2422                 dispatch_event_fd(&event->unaffiliated_list, data);
2423                 rcu_read_unlock();
2424                 return NOTIFY_OK;
2425         }
2426
2427         obj_id = devx_get_obj_id_from_event(event_type, data);
2428         obj_event = xa_load(&event->object_ids, obj_id);
2429         if (!obj_event) {
2430                 rcu_read_unlock();
2431                 return NOTIFY_DONE;
2432         }
2433
2434         dispatch_event_fd(&obj_event->obj_sub_list, data);
2435
2436         rcu_read_unlock();
2437         return NOTIFY_OK;
2438 }
2439
2440 void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev)
2441 {
2442         struct mlx5_devx_event_table *table = &dev->devx_event_table;
2443
2444         xa_init(&table->event_xa);
2445         mutex_init(&table->event_xa_lock);
2446         MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
2447         mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
2448 }
2449
2450 void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
2451 {
2452         struct mlx5_devx_event_table *table = &dev->devx_event_table;
2453         struct devx_event_subscription *sub, *tmp;
2454         struct devx_event *event;
2455         void *entry;
2456         unsigned long id;
2457
2458         mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
2459         mutex_lock(&dev->devx_event_table.event_xa_lock);
2460         xa_for_each(&table->event_xa, id, entry) {
2461                 event = entry;
2462                 list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list,
2463                                          xa_list)
2464                         devx_cleanup_subscription(dev, sub);
2465                 kfree(entry);
2466         }
2467         mutex_unlock(&dev->devx_event_table.event_xa_lock);
2468         xa_destroy(&table->event_xa);
2469 }
2470
2471 static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
2472                                          size_t count, loff_t *pos)
2473 {
2474         struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
2475         struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2476         struct devx_async_data *event;
2477         int ret = 0;
2478         size_t eventsz;
2479
2480         spin_lock_irq(&ev_queue->lock);
2481
2482         while (list_empty(&ev_queue->event_list)) {
2483                 spin_unlock_irq(&ev_queue->lock);
2484
2485                 if (filp->f_flags & O_NONBLOCK)
2486                         return -EAGAIN;
2487
2488                 if (wait_event_interruptible(
2489                             ev_queue->poll_wait,
2490                             (!list_empty(&ev_queue->event_list) ||
2491                              ev_queue->is_destroyed))) {
2492                         return -ERESTARTSYS;
2493                 }
2494
2495                 if (list_empty(&ev_queue->event_list) &&
2496                     ev_queue->is_destroyed)
2497                         return -EIO;
2498
2499                 spin_lock_irq(&ev_queue->lock);
2500         }
2501
2502         event = list_entry(ev_queue->event_list.next,
2503                            struct devx_async_data, list);
2504         eventsz = event->cmd_out_len +
2505                         sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr);
2506
2507         if (eventsz > count) {
2508                 spin_unlock_irq(&ev_queue->lock);
2509                 return -ENOSPC;
2510         }
2511
2512         list_del(ev_queue->event_list.next);
2513         spin_unlock_irq(&ev_queue->lock);
2514
2515         if (copy_to_user(buf, &event->hdr, eventsz))
2516                 ret = -EFAULT;
2517         else
2518                 ret = eventsz;
2519
2520         atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use);
2521         kvfree(event);
2522         return ret;
2523 }
2524
2525 static int devx_async_cmd_event_close(struct inode *inode, struct file *filp)
2526 {
2527         struct ib_uobject *uobj = filp->private_data;
2528         struct devx_async_cmd_event_file *comp_ev_file = container_of(
2529                 uobj, struct devx_async_cmd_event_file, uobj);
2530         struct devx_async_data *entry, *tmp;
2531
2532         spin_lock_irq(&comp_ev_file->ev_queue.lock);
2533         list_for_each_entry_safe(entry, tmp,
2534                                  &comp_ev_file->ev_queue.event_list, list)
2535                 kvfree(entry);
2536         spin_unlock_irq(&comp_ev_file->ev_queue.lock);
2537
2538         uverbs_close_fd(filp);
2539         return 0;
2540 }
2541
2542 static __poll_t devx_async_cmd_event_poll(struct file *filp,
2543                                               struct poll_table_struct *wait)
2544 {
2545         struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
2546         struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2547         __poll_t pollflags = 0;
2548
2549         poll_wait(filp, &ev_queue->poll_wait, wait);
2550
2551         spin_lock_irq(&ev_queue->lock);
2552         if (ev_queue->is_destroyed)
2553                 pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
2554         else if (!list_empty(&ev_queue->event_list))
2555                 pollflags = EPOLLIN | EPOLLRDNORM;
2556         spin_unlock_irq(&ev_queue->lock);
2557
2558         return pollflags;
2559 }
2560
2561 static const struct file_operations devx_async_cmd_event_fops = {
2562         .owner   = THIS_MODULE,
2563         .read    = devx_async_cmd_event_read,
2564         .poll    = devx_async_cmd_event_poll,
2565         .release = devx_async_cmd_event_close,
2566         .llseek  = no_llseek,
2567 };
2568
2569 static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
2570                                      size_t count, loff_t *pos)
2571 {
2572         struct devx_async_event_file *ev_file = filp->private_data;
2573         struct devx_event_subscription *event_sub;
2574         struct devx_async_event_data *uninitialized_var(event);
2575         int ret = 0;
2576         size_t eventsz;
2577         bool omit_data;
2578         void *event_data;
2579
2580         omit_data = ev_file->omit_data;
2581
2582         spin_lock_irq(&ev_file->lock);
2583
2584         if (ev_file->is_overflow_err) {
2585                 ev_file->is_overflow_err = 0;
2586                 spin_unlock_irq(&ev_file->lock);
2587                 return -EOVERFLOW;
2588         }
2589
2590         if (ev_file->is_destroyed) {
2591                 spin_unlock_irq(&ev_file->lock);
2592                 return -EIO;
2593         }
2594
2595         while (list_empty(&ev_file->event_list)) {
2596                 spin_unlock_irq(&ev_file->lock);
2597
2598                 if (filp->f_flags & O_NONBLOCK)
2599                         return -EAGAIN;
2600
2601                 if (wait_event_interruptible(ev_file->poll_wait,
2602                             (!list_empty(&ev_file->event_list) ||
2603                              ev_file->is_destroyed))) {
2604                         return -ERESTARTSYS;
2605                 }
2606
2607                 spin_lock_irq(&ev_file->lock);
2608                 if (ev_file->is_destroyed) {
2609                         spin_unlock_irq(&ev_file->lock);
2610                         return -EIO;
2611                 }
2612         }
2613
2614         if (omit_data) {
2615                 event_sub = list_first_entry(&ev_file->event_list,
2616                                         struct devx_event_subscription,
2617                                         event_list);
2618                 eventsz = sizeof(event_sub->cookie);
2619                 event_data = &event_sub->cookie;
2620         } else {
2621                 event = list_first_entry(&ev_file->event_list,
2622                                       struct devx_async_event_data, list);
2623                 eventsz = sizeof(struct mlx5_eqe) +
2624                         sizeof(struct mlx5_ib_uapi_devx_async_event_hdr);
2625                 event_data = &event->hdr;
2626         }
2627
2628         if (eventsz > count) {
2629                 spin_unlock_irq(&ev_file->lock);
2630                 return -EINVAL;
2631         }
2632
2633         if (omit_data)
2634                 list_del_init(&event_sub->event_list);
2635         else
2636                 list_del(&event->list);
2637
2638         spin_unlock_irq(&ev_file->lock);
2639
2640         if (copy_to_user(buf, event_data, eventsz))
2641                 /* This points to an application issue, not a kernel concern */
2642                 ret = -EFAULT;
2643         else
2644                 ret = eventsz;
2645
2646         if (!omit_data)
2647                 kfree(event);
2648         return ret;
2649 }
2650
2651 static __poll_t devx_async_event_poll(struct file *filp,
2652                                       struct poll_table_struct *wait)
2653 {
2654         struct devx_async_event_file *ev_file = filp->private_data;
2655         __poll_t pollflags = 0;
2656
2657         poll_wait(filp, &ev_file->poll_wait, wait);
2658
2659         spin_lock_irq(&ev_file->lock);
2660         if (ev_file->is_destroyed)
2661                 pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
2662         else if (!list_empty(&ev_file->event_list))
2663                 pollflags = EPOLLIN | EPOLLRDNORM;
2664         spin_unlock_irq(&ev_file->lock);
2665
2666         return pollflags;
2667 }
2668
2669 static int devx_async_event_close(struct inode *inode, struct file *filp)
2670 {
2671         struct devx_async_event_file *ev_file = filp->private_data;
2672         struct devx_event_subscription *event_sub, *event_sub_tmp;
2673         struct devx_async_event_data *entry, *tmp;
2674         struct mlx5_ib_dev *dev = ev_file->dev;
2675
2676         mutex_lock(&dev->devx_event_table.event_xa_lock);
2677         /* delete the subscriptions which are related to this FD */
2678         list_for_each_entry_safe(event_sub, event_sub_tmp,
2679                                  &ev_file->subscribed_events_list, file_list) {
2680                 devx_cleanup_subscription(dev, event_sub);
2681                 if (event_sub->eventfd)
2682                         eventfd_ctx_put(event_sub->eventfd);
2683
2684                 list_del_rcu(&event_sub->file_list);
2685                 /* subscription may not be used by the read API any more */
2686                 kfree_rcu(event_sub, rcu);
2687         }
2688
2689         mutex_unlock(&dev->devx_event_table.event_xa_lock);
2690
2691         /* free the pending events allocation */
2692         if (!ev_file->omit_data) {
2693                 spin_lock_irq(&ev_file->lock);
2694                 list_for_each_entry_safe(entry, tmp,
2695                                          &ev_file->event_list, list)
2696                         kfree(entry); /* read can't come any more */
2697                 spin_unlock_irq(&ev_file->lock);
2698         }
2699
2700         uverbs_close_fd(filp);
2701         put_device(&dev->ib_dev.dev);
2702         return 0;
2703 }
2704
2705 static const struct file_operations devx_async_event_fops = {
2706         .owner   = THIS_MODULE,
2707         .read    = devx_async_event_read,
2708         .poll    = devx_async_event_poll,
2709         .release = devx_async_event_close,
2710         .llseek  = no_llseek,
2711 };
2712
2713 static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
2714                                                    enum rdma_remove_reason why)
2715 {
2716         struct devx_async_cmd_event_file *comp_ev_file =
2717                 container_of(uobj, struct devx_async_cmd_event_file,
2718                              uobj);
2719         struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2720
2721         spin_lock_irq(&ev_queue->lock);
2722         ev_queue->is_destroyed = 1;
2723         spin_unlock_irq(&ev_queue->lock);
2724
2725         if (why == RDMA_REMOVE_DRIVER_REMOVE)
2726                 wake_up_interruptible(&ev_queue->poll_wait);
2727
2728         mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
2729         return 0;
2730 };
2731
2732 static int devx_hot_unplug_async_event_file(struct ib_uobject *uobj,
2733                                             enum rdma_remove_reason why)
2734 {
2735         struct devx_async_event_file *ev_file =
2736                 container_of(uobj, struct devx_async_event_file,
2737                              uobj);
2738
2739         spin_lock_irq(&ev_file->lock);
2740         ev_file->is_destroyed = 1;
2741         spin_unlock_irq(&ev_file->lock);
2742
2743         wake_up_interruptible(&ev_file->poll_wait);
2744         return 0;
2745 };
2746
2747 DECLARE_UVERBS_NAMED_METHOD(
2748         MLX5_IB_METHOD_DEVX_UMEM_REG,
2749         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
2750                         MLX5_IB_OBJECT_DEVX_UMEM,
2751                         UVERBS_ACCESS_NEW,
2752                         UA_MANDATORY),
2753         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
2754                            UVERBS_ATTR_TYPE(u64),
2755                            UA_MANDATORY),
2756         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
2757                            UVERBS_ATTR_TYPE(u64),
2758                            UA_MANDATORY),
2759         UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
2760                              enum ib_access_flags),
2761         UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
2762                             UVERBS_ATTR_TYPE(u32),
2763                             UA_MANDATORY));
2764
2765 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2766         MLX5_IB_METHOD_DEVX_UMEM_DEREG,
2767         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
2768                         MLX5_IB_OBJECT_DEVX_UMEM,
2769                         UVERBS_ACCESS_DESTROY,
2770                         UA_MANDATORY));
2771
2772 DECLARE_UVERBS_NAMED_METHOD(
2773         MLX5_IB_METHOD_DEVX_QUERY_EQN,
2774         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
2775                            UVERBS_ATTR_TYPE(u32),
2776                            UA_MANDATORY),
2777         UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
2778                             UVERBS_ATTR_TYPE(u32),
2779                             UA_MANDATORY));
2780
2781 DECLARE_UVERBS_NAMED_METHOD(
2782         MLX5_IB_METHOD_DEVX_QUERY_UAR,
2783         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
2784                            UVERBS_ATTR_TYPE(u32),
2785                            UA_MANDATORY),
2786         UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
2787                             UVERBS_ATTR_TYPE(u32),
2788                             UA_MANDATORY));
2789
2790 DECLARE_UVERBS_NAMED_METHOD(
2791         MLX5_IB_METHOD_DEVX_OTHER,
2792         UVERBS_ATTR_PTR_IN(
2793                 MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
2794                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2795                 UA_MANDATORY,
2796                 UA_ALLOC_AND_COPY),
2797         UVERBS_ATTR_PTR_OUT(
2798                 MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
2799                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2800                 UA_MANDATORY));
2801
2802 DECLARE_UVERBS_NAMED_METHOD(
2803         MLX5_IB_METHOD_DEVX_OBJ_CREATE,
2804         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
2805                         MLX5_IB_OBJECT_DEVX_OBJ,
2806                         UVERBS_ACCESS_NEW,
2807                         UA_MANDATORY),
2808         UVERBS_ATTR_PTR_IN(
2809                 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
2810                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2811                 UA_MANDATORY,
2812                 UA_ALLOC_AND_COPY),
2813         UVERBS_ATTR_PTR_OUT(
2814                 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
2815                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2816                 UA_MANDATORY));
2817
2818 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2819         MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
2820         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
2821                         MLX5_IB_OBJECT_DEVX_OBJ,
2822                         UVERBS_ACCESS_DESTROY,
2823                         UA_MANDATORY));
2824
2825 DECLARE_UVERBS_NAMED_METHOD(
2826         MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
2827         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
2828                         UVERBS_IDR_ANY_OBJECT,
2829                         UVERBS_ACCESS_WRITE,
2830                         UA_MANDATORY),
2831         UVERBS_ATTR_PTR_IN(
2832                 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
2833                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2834                 UA_MANDATORY,
2835                 UA_ALLOC_AND_COPY),
2836         UVERBS_ATTR_PTR_OUT(
2837                 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
2838                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2839                 UA_MANDATORY));
2840
2841 DECLARE_UVERBS_NAMED_METHOD(
2842         MLX5_IB_METHOD_DEVX_OBJ_QUERY,
2843         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
2844                         UVERBS_IDR_ANY_OBJECT,
2845                         UVERBS_ACCESS_READ,
2846                         UA_MANDATORY),
2847         UVERBS_ATTR_PTR_IN(
2848                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
2849                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2850                 UA_MANDATORY,
2851                 UA_ALLOC_AND_COPY),
2852         UVERBS_ATTR_PTR_OUT(
2853                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
2854                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2855                 UA_MANDATORY));
2856
2857 DECLARE_UVERBS_NAMED_METHOD(
2858         MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
2859         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
2860                         UVERBS_IDR_ANY_OBJECT,
2861                         UVERBS_ACCESS_READ,
2862                         UA_MANDATORY),
2863         UVERBS_ATTR_PTR_IN(
2864                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
2865                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2866                 UA_MANDATORY,
2867                 UA_ALLOC_AND_COPY),
2868         UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
2869                 u16, UA_MANDATORY),
2870         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
2871                 MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2872                 UVERBS_ACCESS_READ,
2873                 UA_MANDATORY),
2874         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
2875                 UVERBS_ATTR_TYPE(u64),
2876                 UA_MANDATORY));
2877
2878 DECLARE_UVERBS_NAMED_METHOD(
2879         MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
2880         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE,
2881                 MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2882                 UVERBS_ACCESS_READ,
2883                 UA_MANDATORY),
2884         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
2885                 MLX5_IB_OBJECT_DEVX_OBJ,
2886                 UVERBS_ACCESS_READ,
2887                 UA_OPTIONAL),
2888         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
2889                 UVERBS_ATTR_MIN_SIZE(sizeof(u16)),
2890                 UA_MANDATORY,
2891                 UA_ALLOC_AND_COPY),
2892         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
2893                 UVERBS_ATTR_TYPE(u64),
2894                 UA_OPTIONAL),
2895         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
2896                 UVERBS_ATTR_TYPE(u32),
2897                 UA_OPTIONAL));
2898
2899 DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
2900                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
2901                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
2902                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN),
2903                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT));
2904
2905 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
2906                             UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
2907                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
2908                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
2909                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
2910                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY),
2911                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY));
2912
2913 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
2914                             UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
2915                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
2916                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
2917
2918
2919 DECLARE_UVERBS_NAMED_METHOD(
2920         MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC,
2921         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE,
2922                         MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2923                         UVERBS_ACCESS_NEW,
2924                         UA_MANDATORY));
2925
2926 DECLARE_UVERBS_NAMED_OBJECT(
2927         MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2928         UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
2929                              devx_hot_unplug_async_cmd_event_file,
2930                              &devx_async_cmd_event_fops, "[devx_async_cmd]",
2931                              O_RDONLY),
2932         &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
2933
2934 DECLARE_UVERBS_NAMED_METHOD(
2935         MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC,
2936         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE,
2937                         MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2938                         UVERBS_ACCESS_NEW,
2939                         UA_MANDATORY),
2940         UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
2941                         enum mlx5_ib_uapi_devx_create_event_channel_flags,
2942                         UA_MANDATORY));
2943
2944 DECLARE_UVERBS_NAMED_OBJECT(
2945         MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2946         UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
2947                              devx_hot_unplug_async_event_file,
2948                              &devx_async_event_fops, "[devx_async_event]",
2949                              O_RDONLY),
2950         &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
2951
2952 static bool devx_is_supported(struct ib_device *device)
2953 {
2954         struct mlx5_ib_dev *dev = to_mdev(device);
2955
2956         return MLX5_CAP_GEN(dev->mdev, log_max_uctx);
2957 }
2958
2959 const struct uapi_definition mlx5_ib_devx_defs[] = {
2960         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2961                 MLX5_IB_OBJECT_DEVX,
2962                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2963         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2964                 MLX5_IB_OBJECT_DEVX_OBJ,
2965                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2966         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2967                 MLX5_IB_OBJECT_DEVX_UMEM,
2968                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2969         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2970                 MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2971                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2972         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2973                 MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2974                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2975         {},
2976 };