2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 #include <linux/module.h>
38 #include <linux/init.h>
39 #include <linux/device.h>
40 #include <linux/err.h>
42 #include <linux/poll.h>
43 #include <linux/sched.h>
44 #include <linux/sched/mm.h>
45 #include <linux/sched/task.h>
46 #include <linux/file.h>
47 #include <linux/cdev.h>
48 #include <linux/anon_inodes.h>
49 #include <linux/slab.h>
51 #include <linux/uaccess.h>
54 #include <rdma/uverbs_std_types.h>
57 #include "core_priv.h"
58 #include "rdma_core.h"
60 MODULE_AUTHOR("Roland Dreier");
61 MODULE_DESCRIPTION("InfiniBand userspace verbs access");
62 MODULE_LICENSE("Dual BSD/GPL");
65 IB_UVERBS_MAJOR = 231,
66 IB_UVERBS_BASE_MINOR = 192,
67 IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
68 IB_UVERBS_NUM_FIXED_MINOR = 32,
69 IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
72 #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
74 static dev_t dynamic_uverbs_dev;
75 static struct class *uverbs_class;
77 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
79 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
80 struct ib_device *ib_dev,
81 const char __user *buf, int in_len,
83 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
84 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
85 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
86 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
87 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
88 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
89 [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
90 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
91 [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
92 [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
93 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
94 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
95 [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
96 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
97 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
98 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
99 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
100 [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
101 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
102 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
103 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
104 [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
105 [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
106 [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
107 [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
108 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
109 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
110 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
111 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
112 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
113 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
114 [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
115 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
116 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
117 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
120 static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
121 struct ib_device *ib_dev,
122 struct ib_udata *ucore,
123 struct ib_udata *uhw) = {
124 [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
125 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
126 [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
127 [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
128 [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
129 [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq,
130 [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq,
131 [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
132 [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
133 [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
134 [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp,
135 [IB_USER_VERBS_EX_CMD_MODIFY_CQ] = ib_uverbs_ex_modify_cq,
138 static void ib_uverbs_add_one(struct ib_device *device);
139 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
141 int uverbs_dealloc_mw(struct ib_mw *mw)
143 struct ib_pd *pd = mw->pd;
146 ret = mw->device->dealloc_mw(mw);
148 atomic_dec(&pd->usecnt);
152 static void ib_uverbs_release_dev(struct kobject *kobj)
154 struct ib_uverbs_device *dev =
155 container_of(kobj, struct ib_uverbs_device, kobj);
157 cleanup_srcu_struct(&dev->disassociate_srcu);
161 static struct kobj_type ib_uverbs_dev_ktype = {
162 .release = ib_uverbs_release_dev,
165 static void ib_uverbs_release_async_event_file(struct kref *ref)
167 struct ib_uverbs_async_event_file *file =
168 container_of(ref, struct ib_uverbs_async_event_file, ref);
173 void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
174 struct ib_uverbs_completion_event_file *ev_file,
175 struct ib_ucq_object *uobj)
177 struct ib_uverbs_event *evt, *tmp;
180 spin_lock_irq(&ev_file->ev_queue.lock);
181 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
182 list_del(&evt->list);
185 spin_unlock_irq(&ev_file->ev_queue.lock);
187 uverbs_uobject_put(&ev_file->uobj_file.uobj);
190 spin_lock_irq(&file->async_file->ev_queue.lock);
191 list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
192 list_del(&evt->list);
195 spin_unlock_irq(&file->async_file->ev_queue.lock);
198 void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
199 struct ib_uevent_object *uobj)
201 struct ib_uverbs_event *evt, *tmp;
203 spin_lock_irq(&file->async_file->ev_queue.lock);
204 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
205 list_del(&evt->list);
208 spin_unlock_irq(&file->async_file->ev_queue.lock);
211 void ib_uverbs_detach_umcast(struct ib_qp *qp,
212 struct ib_uqp_object *uobj)
214 struct ib_uverbs_mcast_entry *mcast, *tmp;
216 list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
217 ib_detach_mcast(qp, &mcast->gid, mcast->lid);
218 list_del(&mcast->list);
223 static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
224 struct ib_ucontext *context,
227 context->closing = 1;
228 uverbs_cleanup_ucontext(context, device_removed);
229 put_pid(context->tgid);
231 ib_rdmacg_uncharge(&context->cg_obj, context->device,
232 RDMACG_RESOURCE_HCA_HANDLE);
234 return context->device->dealloc_ucontext(context);
237 static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
239 complete(&dev->comp);
242 void ib_uverbs_release_file(struct kref *ref)
244 struct ib_uverbs_file *file =
245 container_of(ref, struct ib_uverbs_file, ref);
246 struct ib_device *ib_dev;
249 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
250 ib_dev = srcu_dereference(file->device->ib_dev,
251 &file->device->disassociate_srcu);
252 if (ib_dev && !ib_dev->disassociate_ucontext)
253 module_put(ib_dev->owner);
254 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
256 if (atomic_dec_and_test(&file->device->refcount))
257 ib_uverbs_comp_dev(file->device);
259 kobject_put(&file->device->kobj);
263 static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
264 struct ib_uverbs_file *uverbs_file,
265 struct file *filp, char __user *buf,
266 size_t count, loff_t *pos,
269 struct ib_uverbs_event *event;
272 spin_lock_irq(&ev_queue->lock);
274 while (list_empty(&ev_queue->event_list)) {
275 spin_unlock_irq(&ev_queue->lock);
277 if (filp->f_flags & O_NONBLOCK)
280 if (wait_event_interruptible(ev_queue->poll_wait,
281 (!list_empty(&ev_queue->event_list) ||
282 /* The barriers built into wait_event_interruptible()
283 * and wake_up() guarentee this will see the null set
286 !uverbs_file->device->ib_dev)))
289 /* If device was disassociated and no event exists set an error */
290 if (list_empty(&ev_queue->event_list) &&
291 !uverbs_file->device->ib_dev)
294 spin_lock_irq(&ev_queue->lock);
297 event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
299 if (eventsz > count) {
303 list_del(ev_queue->event_list.next);
304 if (event->counter) {
306 list_del(&event->obj_list);
310 spin_unlock_irq(&ev_queue->lock);
313 if (copy_to_user(buf, event, eventsz))
324 static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
325 size_t count, loff_t *pos)
327 struct ib_uverbs_async_event_file *file = filp->private_data;
329 return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
331 sizeof(struct ib_uverbs_async_event_desc));
334 static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
335 size_t count, loff_t *pos)
337 struct ib_uverbs_completion_event_file *comp_ev_file =
340 return ib_uverbs_event_read(&comp_ev_file->ev_queue,
341 comp_ev_file->uobj_file.ufile, filp,
343 sizeof(struct ib_uverbs_comp_event_desc));
346 static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
348 struct poll_table_struct *wait)
350 __poll_t pollflags = 0;
352 poll_wait(filp, &ev_queue->poll_wait, wait);
354 spin_lock_irq(&ev_queue->lock);
355 if (!list_empty(&ev_queue->event_list))
356 pollflags = EPOLLIN | EPOLLRDNORM;
357 spin_unlock_irq(&ev_queue->lock);
362 static __poll_t ib_uverbs_async_event_poll(struct file *filp,
363 struct poll_table_struct *wait)
365 return ib_uverbs_event_poll(filp->private_data, filp, wait);
368 static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
369 struct poll_table_struct *wait)
371 struct ib_uverbs_completion_event_file *comp_ev_file =
374 return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
377 static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
379 struct ib_uverbs_event_queue *ev_queue = filp->private_data;
381 return fasync_helper(fd, filp, on, &ev_queue->async_queue);
384 static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
386 struct ib_uverbs_completion_event_file *comp_ev_file =
389 return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
392 static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
394 struct ib_uverbs_async_event_file *file = filp->private_data;
395 struct ib_uverbs_file *uverbs_file = file->uverbs_file;
396 struct ib_uverbs_event *entry, *tmp;
397 int closed_already = 0;
399 mutex_lock(&uverbs_file->device->lists_mutex);
400 spin_lock_irq(&file->ev_queue.lock);
401 closed_already = file->ev_queue.is_closed;
402 file->ev_queue.is_closed = 1;
403 list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
405 list_del(&entry->obj_list);
408 spin_unlock_irq(&file->ev_queue.lock);
409 if (!closed_already) {
410 list_del(&file->list);
411 ib_unregister_event_handler(&uverbs_file->event_handler);
413 mutex_unlock(&uverbs_file->device->lists_mutex);
415 kref_put(&uverbs_file->ref, ib_uverbs_release_file);
416 kref_put(&file->ref, ib_uverbs_release_async_event_file);
421 static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
423 struct ib_uverbs_completion_event_file *file = filp->private_data;
424 struct ib_uverbs_event *entry, *tmp;
426 spin_lock_irq(&file->ev_queue.lock);
427 list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
429 list_del(&entry->obj_list);
432 spin_unlock_irq(&file->ev_queue.lock);
434 uverbs_close_fd(filp);
439 const struct file_operations uverbs_event_fops = {
440 .owner = THIS_MODULE,
441 .read = ib_uverbs_comp_event_read,
442 .poll = ib_uverbs_comp_event_poll,
443 .release = ib_uverbs_comp_event_close,
444 .fasync = ib_uverbs_comp_event_fasync,
448 static const struct file_operations uverbs_async_event_fops = {
449 .owner = THIS_MODULE,
450 .read = ib_uverbs_async_event_read,
451 .poll = ib_uverbs_async_event_poll,
452 .release = ib_uverbs_async_event_close,
453 .fasync = ib_uverbs_async_event_fasync,
457 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
459 struct ib_uverbs_event_queue *ev_queue = cq_context;
460 struct ib_ucq_object *uobj;
461 struct ib_uverbs_event *entry;
467 spin_lock_irqsave(&ev_queue->lock, flags);
468 if (ev_queue->is_closed) {
469 spin_unlock_irqrestore(&ev_queue->lock, flags);
473 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
475 spin_unlock_irqrestore(&ev_queue->lock, flags);
479 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
481 entry->desc.comp.cq_handle = cq->uobject->user_handle;
482 entry->counter = &uobj->comp_events_reported;
484 list_add_tail(&entry->list, &ev_queue->event_list);
485 list_add_tail(&entry->obj_list, &uobj->comp_list);
486 spin_unlock_irqrestore(&ev_queue->lock, flags);
488 wake_up_interruptible(&ev_queue->poll_wait);
489 kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
492 static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
493 __u64 element, __u64 event,
494 struct list_head *obj_list,
497 struct ib_uverbs_event *entry;
500 spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
501 if (file->async_file->ev_queue.is_closed) {
502 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
506 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
508 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
512 entry->desc.async.element = element;
513 entry->desc.async.event_type = event;
514 entry->desc.async.reserved = 0;
515 entry->counter = counter;
517 list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
519 list_add_tail(&entry->obj_list, obj_list);
520 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
522 wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
523 kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
526 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
528 struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
529 struct ib_ucq_object, uobject);
531 ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
532 event->event, &uobj->async_list,
533 &uobj->async_events_reported);
536 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
538 struct ib_uevent_object *uobj;
540 /* for XRC target qp's, check that qp is live */
541 if (!event->element.qp->uobject)
544 uobj = container_of(event->element.qp->uobject,
545 struct ib_uevent_object, uobject);
547 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
548 event->event, &uobj->event_list,
549 &uobj->events_reported);
552 void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
554 struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
555 struct ib_uevent_object, uobject);
557 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
558 event->event, &uobj->event_list,
559 &uobj->events_reported);
562 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
564 struct ib_uevent_object *uobj;
566 uobj = container_of(event->element.srq->uobject,
567 struct ib_uevent_object, uobject);
569 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
570 event->event, &uobj->event_list,
571 &uobj->events_reported);
574 void ib_uverbs_event_handler(struct ib_event_handler *handler,
575 struct ib_event *event)
577 struct ib_uverbs_file *file =
578 container_of(handler, struct ib_uverbs_file, event_handler);
580 ib_uverbs_async_handler(file, event->element.port_num, event->event,
584 void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
586 kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
587 file->async_file = NULL;
590 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
592 spin_lock_init(&ev_queue->lock);
593 INIT_LIST_HEAD(&ev_queue->event_list);
594 init_waitqueue_head(&ev_queue->poll_wait);
595 ev_queue->is_closed = 0;
596 ev_queue->async_queue = NULL;
599 struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
600 struct ib_device *ib_dev)
602 struct ib_uverbs_async_event_file *ev_file;
605 ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
607 return ERR_PTR(-ENOMEM);
609 ib_uverbs_init_event_queue(&ev_file->ev_queue);
610 ev_file->uverbs_file = uverbs_file;
611 kref_get(&ev_file->uverbs_file->ref);
612 kref_init(&ev_file->ref);
613 filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
618 mutex_lock(&uverbs_file->device->lists_mutex);
619 list_add_tail(&ev_file->list,
620 &uverbs_file->device->uverbs_events_file_list);
621 mutex_unlock(&uverbs_file->device->lists_mutex);
623 WARN_ON(uverbs_file->async_file);
624 uverbs_file->async_file = ev_file;
625 kref_get(&uverbs_file->async_file->ref);
626 INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
628 ib_uverbs_event_handler);
629 ib_register_event_handler(&uverbs_file->event_handler);
630 /* At that point async file stuff was fully set */
635 kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
636 kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
640 static bool verify_command_mask(struct ib_device *ib_dev,
641 u32 command, bool extended)
644 return ib_dev->uverbs_cmd_mask & BIT_ULL(command);
646 return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command);
649 static bool verify_command_idx(u32 command, bool extended)
652 return command < ARRAY_SIZE(uverbs_ex_cmd_table) &&
653 uverbs_ex_cmd_table[command];
655 return command < ARRAY_SIZE(uverbs_cmd_table) &&
656 uverbs_cmd_table[command];
659 static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr,
660 u32 *command, bool *extended)
662 if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
663 IB_USER_VERBS_CMD_COMMAND_MASK))
666 *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK;
667 *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED;
669 if (!verify_command_idx(*command, *extended))
675 static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
676 struct ib_uverbs_ex_cmd_hdr *ex_hdr,
677 size_t count, bool extended)
680 count -= sizeof(*hdr) + sizeof(*ex_hdr);
682 if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
685 if (ex_hdr->cmd_hdr_reserved)
688 if (ex_hdr->response) {
689 if (!hdr->out_words && !ex_hdr->provider_out_words)
692 if (!access_ok(VERIFY_WRITE,
693 u64_to_user_ptr(ex_hdr->response),
694 (hdr->out_words + ex_hdr->provider_out_words) * 8))
697 if (hdr->out_words || ex_hdr->provider_out_words)
704 /* not extended command */
705 if (hdr->in_words * 4 != count)
711 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
712 size_t count, loff_t *pos)
714 struct ib_uverbs_file *file = filp->private_data;
715 struct ib_uverbs_ex_cmd_hdr ex_hdr;
716 struct ib_device *ib_dev;
717 struct ib_uverbs_cmd_hdr hdr;
723 if (!ib_safe_file_access(filp)) {
724 pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
725 task_tgid_vnr(current), current->comm);
729 if (count < sizeof(hdr))
732 if (copy_from_user(&hdr, buf, sizeof(hdr)))
735 ret = process_hdr(&hdr, &command, &extended);
739 if (!file->ucontext &&
740 (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended))
744 if (count < (sizeof(hdr) + sizeof(ex_hdr)))
746 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
750 ret = verify_hdr(&hdr, &ex_hdr, count, extended);
754 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
755 ib_dev = srcu_dereference(file->device->ib_dev,
756 &file->device->disassociate_srcu);
762 if (!verify_command_mask(ib_dev, command, extended)) {
770 ret = uverbs_cmd_table[command](file, ib_dev, buf,
774 struct ib_udata ucore;
777 buf += sizeof(ex_hdr);
779 ib_uverbs_init_udata_buf_or_null(&ucore, buf,
780 u64_to_user_ptr(ex_hdr.response),
781 hdr.in_words * 8, hdr.out_words * 8);
783 ib_uverbs_init_udata_buf_or_null(&uhw,
785 u64_to_user_ptr(ex_hdr.response) + ucore.outlen,
786 ex_hdr.provider_in_words * 8,
787 ex_hdr.provider_out_words * 8);
789 ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw);
790 ret = (ret) ? : count;
794 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
798 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
800 struct ib_uverbs_file *file = filp->private_data;
801 struct ib_device *ib_dev;
805 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
806 ib_dev = srcu_dereference(file->device->ib_dev,
807 &file->device->disassociate_srcu);
816 ret = ib_dev->mmap(file->ucontext, vma);
818 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
823 * ib_uverbs_open() does not need the BKL:
825 * - the ib_uverbs_device structures are properly reference counted and
826 * everything else is purely local to the file being created, so
827 * races against other open calls are not a problem;
828 * - there is no ioctl method to race against;
829 * - the open method will either immediately run -ENXIO, or all
830 * required initialization will be done.
832 static int ib_uverbs_open(struct inode *inode, struct file *filp)
834 struct ib_uverbs_device *dev;
835 struct ib_uverbs_file *file;
836 struct ib_device *ib_dev;
838 int module_dependent;
841 dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
842 if (!atomic_inc_not_zero(&dev->refcount))
845 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
846 mutex_lock(&dev->lists_mutex);
847 ib_dev = srcu_dereference(dev->ib_dev,
848 &dev->disassociate_srcu);
854 /* In case IB device supports disassociate ucontext, there is no hard
855 * dependency between uverbs device and its low level device.
857 module_dependent = !(ib_dev->disassociate_ucontext);
859 if (module_dependent) {
860 if (!try_module_get(ib_dev->owner)) {
866 file = kzalloc(sizeof(*file), GFP_KERNEL);
869 if (module_dependent)
876 spin_lock_init(&file->idr_lock);
877 idr_init(&file->idr);
878 file->ucontext = NULL;
879 file->async_file = NULL;
880 kref_init(&file->ref);
881 mutex_init(&file->mutex);
882 mutex_init(&file->cleanup_mutex);
884 filp->private_data = file;
885 kobject_get(&dev->kobj);
886 list_add_tail(&file->list, &dev->uverbs_file_list);
887 mutex_unlock(&dev->lists_mutex);
888 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
890 return nonseekable_open(inode, filp);
893 module_put(ib_dev->owner);
896 mutex_unlock(&dev->lists_mutex);
897 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
898 if (atomic_dec_and_test(&dev->refcount))
899 ib_uverbs_comp_dev(dev);
904 static int ib_uverbs_close(struct inode *inode, struct file *filp)
906 struct ib_uverbs_file *file = filp->private_data;
908 mutex_lock(&file->cleanup_mutex);
909 if (file->ucontext) {
910 ib_uverbs_cleanup_ucontext(file, file->ucontext, false);
911 file->ucontext = NULL;
913 mutex_unlock(&file->cleanup_mutex);
914 idr_destroy(&file->idr);
916 mutex_lock(&file->device->lists_mutex);
917 if (!file->is_closed) {
918 list_del(&file->list);
921 mutex_unlock(&file->device->lists_mutex);
923 if (file->async_file)
924 kref_put(&file->async_file->ref,
925 ib_uverbs_release_async_event_file);
927 kref_put(&file->ref, ib_uverbs_release_file);
932 static const struct file_operations uverbs_fops = {
933 .owner = THIS_MODULE,
934 .write = ib_uverbs_write,
935 .open = ib_uverbs_open,
936 .release = ib_uverbs_close,
938 .unlocked_ioctl = ib_uverbs_ioctl,
939 .compat_ioctl = ib_uverbs_ioctl,
942 static const struct file_operations uverbs_mmap_fops = {
943 .owner = THIS_MODULE,
944 .write = ib_uverbs_write,
945 .mmap = ib_uverbs_mmap,
946 .open = ib_uverbs_open,
947 .release = ib_uverbs_close,
949 .unlocked_ioctl = ib_uverbs_ioctl,
950 .compat_ioctl = ib_uverbs_ioctl,
953 static struct ib_client uverbs_client = {
955 .add = ib_uverbs_add_one,
956 .remove = ib_uverbs_remove_one
959 static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
964 struct ib_uverbs_device *dev = dev_get_drvdata(device);
965 struct ib_device *ib_dev;
970 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
971 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
973 ret = sprintf(buf, "%s\n", ib_dev->name);
974 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
978 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
980 static ssize_t show_dev_abi_version(struct device *device,
981 struct device_attribute *attr, char *buf)
983 struct ib_uverbs_device *dev = dev_get_drvdata(device);
986 struct ib_device *ib_dev;
990 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
991 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
993 ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
994 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
998 static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
1000 static CLASS_ATTR_STRING(abi_version, S_IRUGO,
1001 __stringify(IB_USER_VERBS_ABI_VERSION));
1003 static void ib_uverbs_add_one(struct ib_device *device)
1007 struct ib_uverbs_device *uverbs_dev;
1010 if (!device->alloc_ucontext)
1013 uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
1017 ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
1023 atomic_set(&uverbs_dev->refcount, 1);
1024 init_completion(&uverbs_dev->comp);
1025 uverbs_dev->xrcd_tree = RB_ROOT;
1026 mutex_init(&uverbs_dev->xrcd_tree_mutex);
1027 kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
1028 mutex_init(&uverbs_dev->lists_mutex);
1029 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
1030 INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
1032 devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
1033 if (devnum >= IB_UVERBS_MAX_DEVICES)
1035 uverbs_dev->devnum = devnum;
1036 set_bit(devnum, dev_map);
1037 if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
1038 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
1040 base = IB_UVERBS_BASE_DEV + devnum;
1042 rcu_assign_pointer(uverbs_dev->ib_dev, device);
1043 uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1045 cdev_init(&uverbs_dev->cdev, NULL);
1046 uverbs_dev->cdev.owner = THIS_MODULE;
1047 uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
1048 cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj);
1049 kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
1050 if (cdev_add(&uverbs_dev->cdev, base, 1))
1053 uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
1054 uverbs_dev->cdev.dev, uverbs_dev,
1055 "uverbs%d", uverbs_dev->devnum);
1056 if (IS_ERR(uverbs_dev->dev))
1059 if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
1061 if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
1064 if (!device->specs_root) {
1065 const struct uverbs_object_tree_def *default_root[] = {
1066 uverbs_default_get_objects()};
1068 uverbs_dev->specs_root = uverbs_alloc_spec_tree(1,
1070 if (IS_ERR(uverbs_dev->specs_root))
1073 device->specs_root = uverbs_dev->specs_root;
1076 ib_set_client_data(device, &uverbs_client, uverbs_dev);
1081 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1084 cdev_del(&uverbs_dev->cdev);
1085 clear_bit(devnum, dev_map);
1088 if (atomic_dec_and_test(&uverbs_dev->refcount))
1089 ib_uverbs_comp_dev(uverbs_dev);
1090 wait_for_completion(&uverbs_dev->comp);
1091 kobject_put(&uverbs_dev->kobj);
1095 static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
1097 struct ib_device *ib_dev = ibcontext->device;
1098 struct task_struct *owning_process = NULL;
1099 struct mm_struct *owning_mm = NULL;
1101 owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
1102 if (!owning_process)
1105 owning_mm = get_task_mm(owning_process);
1107 pr_info("no mm, disassociate ucontext is pending task termination\n");
1109 put_task_struct(owning_process);
1110 usleep_range(1000, 2000);
1111 owning_process = get_pid_task(ibcontext->tgid,
1113 if (!owning_process ||
1114 owning_process->state == TASK_DEAD) {
1115 pr_info("disassociate ucontext done, task was terminated\n");
1116 /* in case task was dead need to release the
1120 put_task_struct(owning_process);
1126 down_write(&owning_mm->mmap_sem);
1127 ib_dev->disassociate_ucontext(ibcontext);
1128 up_write(&owning_mm->mmap_sem);
1130 put_task_struct(owning_process);
1133 static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
1134 struct ib_device *ib_dev)
1136 struct ib_uverbs_file *file;
1137 struct ib_uverbs_async_event_file *event_file;
1138 struct ib_event event;
1140 /* Pending running commands to terminate */
1141 synchronize_srcu(&uverbs_dev->disassociate_srcu);
1142 event.event = IB_EVENT_DEVICE_FATAL;
1143 event.element.port_num = 0;
1144 event.device = ib_dev;
1146 mutex_lock(&uverbs_dev->lists_mutex);
1147 while (!list_empty(&uverbs_dev->uverbs_file_list)) {
1148 struct ib_ucontext *ucontext;
1149 file = list_first_entry(&uverbs_dev->uverbs_file_list,
1150 struct ib_uverbs_file, list);
1151 file->is_closed = 1;
1152 list_del(&file->list);
1153 kref_get(&file->ref);
1154 mutex_unlock(&uverbs_dev->lists_mutex);
1157 mutex_lock(&file->cleanup_mutex);
1158 ucontext = file->ucontext;
1159 file->ucontext = NULL;
1160 mutex_unlock(&file->cleanup_mutex);
1162 /* At this point ib_uverbs_close cannot be running
1163 * ib_uverbs_cleanup_ucontext
1166 /* We must release the mutex before going ahead and
1167 * calling disassociate_ucontext. disassociate_ucontext
1168 * might end up indirectly calling uverbs_close,
1169 * for example due to freeing the resources
1172 ib_uverbs_event_handler(&file->event_handler, &event);
1173 ib_uverbs_disassociate_ucontext(ucontext);
1174 mutex_lock(&file->cleanup_mutex);
1175 ib_uverbs_cleanup_ucontext(file, ucontext, true);
1176 mutex_unlock(&file->cleanup_mutex);
1179 mutex_lock(&uverbs_dev->lists_mutex);
1180 kref_put(&file->ref, ib_uverbs_release_file);
1183 while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
1184 event_file = list_first_entry(&uverbs_dev->
1185 uverbs_events_file_list,
1186 struct ib_uverbs_async_event_file,
1188 spin_lock_irq(&event_file->ev_queue.lock);
1189 event_file->ev_queue.is_closed = 1;
1190 spin_unlock_irq(&event_file->ev_queue.lock);
1192 list_del(&event_file->list);
1193 ib_unregister_event_handler(
1194 &event_file->uverbs_file->event_handler);
1195 event_file->uverbs_file->event_handler.device =
1198 wake_up_interruptible(&event_file->ev_queue.poll_wait);
1199 kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
1201 mutex_unlock(&uverbs_dev->lists_mutex);
1204 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1206 struct ib_uverbs_device *uverbs_dev = client_data;
1207 int wait_clients = 1;
1212 dev_set_drvdata(uverbs_dev->dev, NULL);
1213 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1214 cdev_del(&uverbs_dev->cdev);
1215 clear_bit(uverbs_dev->devnum, dev_map);
1217 if (device->disassociate_ucontext) {
1218 /* We disassociate HW resources and immediately return.
1219 * Userspace will see a EIO errno for all future access.
1220 * Upon returning, ib_device may be freed internally and is not
1222 * uverbs_device is still available until all clients close
1223 * their files, then the uverbs device ref count will be zero
1224 * and its resources will be freed.
1225 * Note: At this point no more files can be opened since the
1226 * cdev was deleted, however active clients can still issue
1227 * commands and close their open files.
1229 rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
1230 ib_uverbs_free_hw_resources(uverbs_dev, device);
1234 if (atomic_dec_and_test(&uverbs_dev->refcount))
1235 ib_uverbs_comp_dev(uverbs_dev);
1237 wait_for_completion(&uverbs_dev->comp);
1238 if (uverbs_dev->specs_root) {
1239 uverbs_free_spec_tree(uverbs_dev->specs_root);
1240 device->specs_root = NULL;
1243 kobject_put(&uverbs_dev->kobj);
1246 static char *uverbs_devnode(struct device *dev, umode_t *mode)
1250 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
1253 static int __init ib_uverbs_init(void)
1257 ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
1258 IB_UVERBS_NUM_FIXED_MINOR,
1259 "infiniband_verbs");
1261 pr_err("user_verbs: couldn't register device number\n");
1265 ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
1266 IB_UVERBS_NUM_DYNAMIC_MINOR,
1267 "infiniband_verbs");
1269 pr_err("couldn't register dynamic device number\n");
1273 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
1274 if (IS_ERR(uverbs_class)) {
1275 ret = PTR_ERR(uverbs_class);
1276 pr_err("user_verbs: couldn't create class infiniband_verbs\n");
1280 uverbs_class->devnode = uverbs_devnode;
1282 ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
1284 pr_err("user_verbs: couldn't create abi_version attribute\n");
1288 ret = ib_register_client(&uverbs_client);
1290 pr_err("user_verbs: couldn't register client\n");
1297 class_destroy(uverbs_class);
1300 unregister_chrdev_region(dynamic_uverbs_dev,
1301 IB_UVERBS_NUM_DYNAMIC_MINOR);
1304 unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1305 IB_UVERBS_NUM_FIXED_MINOR);
1311 static void __exit ib_uverbs_cleanup(void)
1313 ib_unregister_client(&uverbs_client);
1314 class_destroy(uverbs_class);
1315 unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1316 IB_UVERBS_NUM_FIXED_MINOR);
1317 unregister_chrdev_region(dynamic_uverbs_dev,
1318 IB_UVERBS_NUM_DYNAMIC_MINOR);
1321 module_init(ib_uverbs_init);
1322 module_exit(ib_uverbs_cleanup);