Merge tag 'devicetree-fixes-for-4.20-1' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / drivers / infiniband / core / uverbs_main.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6  * Copyright (c) 2005 PathScale, Inc. All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  */
36
37 #include <linux/module.h>
38 #include <linux/init.h>
39 #include <linux/device.h>
40 #include <linux/err.h>
41 #include <linux/fs.h>
42 #include <linux/poll.h>
43 #include <linux/sched.h>
44 #include <linux/file.h>
45 #include <linux/cdev.h>
46 #include <linux/anon_inodes.h>
47 #include <linux/slab.h>
48 #include <linux/sched/mm.h>
49
50 #include <linux/uaccess.h>
51
52 #include <rdma/ib.h>
53 #include <rdma/uverbs_std_types.h>
54
55 #include "uverbs.h"
56 #include "core_priv.h"
57 #include "rdma_core.h"
58
59 MODULE_AUTHOR("Roland Dreier");
60 MODULE_DESCRIPTION("InfiniBand userspace verbs access");
61 MODULE_LICENSE("Dual BSD/GPL");
62
63 enum {
64         IB_UVERBS_MAJOR       = 231,
65         IB_UVERBS_BASE_MINOR  = 192,
66         IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
67         IB_UVERBS_NUM_FIXED_MINOR = 32,
68         IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
69 };
70
71 #define IB_UVERBS_BASE_DEV      MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
72
73 static dev_t dynamic_uverbs_dev;
74 static struct class *uverbs_class;
75
76 static DEFINE_IDA(uverbs_ida);
77
78 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
79                                      const char __user *buf, int in_len,
80                                      int out_len) = {
81         [IB_USER_VERBS_CMD_GET_CONTEXT]         = ib_uverbs_get_context,
82         [IB_USER_VERBS_CMD_QUERY_DEVICE]        = ib_uverbs_query_device,
83         [IB_USER_VERBS_CMD_QUERY_PORT]          = ib_uverbs_query_port,
84         [IB_USER_VERBS_CMD_ALLOC_PD]            = ib_uverbs_alloc_pd,
85         [IB_USER_VERBS_CMD_DEALLOC_PD]          = ib_uverbs_dealloc_pd,
86         [IB_USER_VERBS_CMD_REG_MR]              = ib_uverbs_reg_mr,
87         [IB_USER_VERBS_CMD_REREG_MR]            = ib_uverbs_rereg_mr,
88         [IB_USER_VERBS_CMD_DEREG_MR]            = ib_uverbs_dereg_mr,
89         [IB_USER_VERBS_CMD_ALLOC_MW]            = ib_uverbs_alloc_mw,
90         [IB_USER_VERBS_CMD_DEALLOC_MW]          = ib_uverbs_dealloc_mw,
91         [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
92         [IB_USER_VERBS_CMD_CREATE_CQ]           = ib_uverbs_create_cq,
93         [IB_USER_VERBS_CMD_RESIZE_CQ]           = ib_uverbs_resize_cq,
94         [IB_USER_VERBS_CMD_POLL_CQ]             = ib_uverbs_poll_cq,
95         [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]       = ib_uverbs_req_notify_cq,
96         [IB_USER_VERBS_CMD_DESTROY_CQ]          = ib_uverbs_destroy_cq,
97         [IB_USER_VERBS_CMD_CREATE_QP]           = ib_uverbs_create_qp,
98         [IB_USER_VERBS_CMD_QUERY_QP]            = ib_uverbs_query_qp,
99         [IB_USER_VERBS_CMD_MODIFY_QP]           = ib_uverbs_modify_qp,
100         [IB_USER_VERBS_CMD_DESTROY_QP]          = ib_uverbs_destroy_qp,
101         [IB_USER_VERBS_CMD_POST_SEND]           = ib_uverbs_post_send,
102         [IB_USER_VERBS_CMD_POST_RECV]           = ib_uverbs_post_recv,
103         [IB_USER_VERBS_CMD_POST_SRQ_RECV]       = ib_uverbs_post_srq_recv,
104         [IB_USER_VERBS_CMD_CREATE_AH]           = ib_uverbs_create_ah,
105         [IB_USER_VERBS_CMD_DESTROY_AH]          = ib_uverbs_destroy_ah,
106         [IB_USER_VERBS_CMD_ATTACH_MCAST]        = ib_uverbs_attach_mcast,
107         [IB_USER_VERBS_CMD_DETACH_MCAST]        = ib_uverbs_detach_mcast,
108         [IB_USER_VERBS_CMD_CREATE_SRQ]          = ib_uverbs_create_srq,
109         [IB_USER_VERBS_CMD_MODIFY_SRQ]          = ib_uverbs_modify_srq,
110         [IB_USER_VERBS_CMD_QUERY_SRQ]           = ib_uverbs_query_srq,
111         [IB_USER_VERBS_CMD_DESTROY_SRQ]         = ib_uverbs_destroy_srq,
112         [IB_USER_VERBS_CMD_OPEN_XRCD]           = ib_uverbs_open_xrcd,
113         [IB_USER_VERBS_CMD_CLOSE_XRCD]          = ib_uverbs_close_xrcd,
114         [IB_USER_VERBS_CMD_CREATE_XSRQ]         = ib_uverbs_create_xsrq,
115         [IB_USER_VERBS_CMD_OPEN_QP]             = ib_uverbs_open_qp,
116 };
117
118 static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
119                                     struct ib_udata *ucore,
120                                     struct ib_udata *uhw) = {
121         [IB_USER_VERBS_EX_CMD_CREATE_FLOW]      = ib_uverbs_ex_create_flow,
122         [IB_USER_VERBS_EX_CMD_DESTROY_FLOW]     = ib_uverbs_ex_destroy_flow,
123         [IB_USER_VERBS_EX_CMD_QUERY_DEVICE]     = ib_uverbs_ex_query_device,
124         [IB_USER_VERBS_EX_CMD_CREATE_CQ]        = ib_uverbs_ex_create_cq,
125         [IB_USER_VERBS_EX_CMD_CREATE_QP]        = ib_uverbs_ex_create_qp,
126         [IB_USER_VERBS_EX_CMD_CREATE_WQ]        = ib_uverbs_ex_create_wq,
127         [IB_USER_VERBS_EX_CMD_MODIFY_WQ]        = ib_uverbs_ex_modify_wq,
128         [IB_USER_VERBS_EX_CMD_DESTROY_WQ]       = ib_uverbs_ex_destroy_wq,
129         [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
130         [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
131         [IB_USER_VERBS_EX_CMD_MODIFY_QP]        = ib_uverbs_ex_modify_qp,
132         [IB_USER_VERBS_EX_CMD_MODIFY_CQ]        = ib_uverbs_ex_modify_cq,
133 };
134
135 static void ib_uverbs_add_one(struct ib_device *device);
136 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
137
138 /*
139  * Must be called with the ufile->device->disassociate_srcu held, and the lock
140  * must be held until use of the ucontext is finished.
141  */
142 struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
143 {
144         /*
145          * We do not hold the hw_destroy_rwsem lock for this flow, instead
146          * srcu is used. It does not matter if someone races this with
147          * get_context, we get NULL or valid ucontext.
148          */
149         struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext);
150
151         if (!srcu_dereference(ufile->device->ib_dev,
152                               &ufile->device->disassociate_srcu))
153                 return ERR_PTR(-EIO);
154
155         if (!ucontext)
156                 return ERR_PTR(-EINVAL);
157
158         return ucontext;
159 }
160 EXPORT_SYMBOL(ib_uverbs_get_ucontext);
161
162 int uverbs_dealloc_mw(struct ib_mw *mw)
163 {
164         struct ib_pd *pd = mw->pd;
165         int ret;
166
167         ret = mw->device->dealloc_mw(mw);
168         if (!ret)
169                 atomic_dec(&pd->usecnt);
170         return ret;
171 }
172
173 static void ib_uverbs_release_dev(struct device *device)
174 {
175         struct ib_uverbs_device *dev =
176                         container_of(device, struct ib_uverbs_device, dev);
177
178         uverbs_destroy_api(dev->uapi);
179         cleanup_srcu_struct(&dev->disassociate_srcu);
180         kfree(dev);
181 }
182
183 static void ib_uverbs_release_async_event_file(struct kref *ref)
184 {
185         struct ib_uverbs_async_event_file *file =
186                 container_of(ref, struct ib_uverbs_async_event_file, ref);
187
188         kfree(file);
189 }
190
191 void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
192                           struct ib_uverbs_completion_event_file *ev_file,
193                           struct ib_ucq_object *uobj)
194 {
195         struct ib_uverbs_event *evt, *tmp;
196
197         if (ev_file) {
198                 spin_lock_irq(&ev_file->ev_queue.lock);
199                 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
200                         list_del(&evt->list);
201                         kfree(evt);
202                 }
203                 spin_unlock_irq(&ev_file->ev_queue.lock);
204
205                 uverbs_uobject_put(&ev_file->uobj);
206         }
207
208         spin_lock_irq(&file->async_file->ev_queue.lock);
209         list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
210                 list_del(&evt->list);
211                 kfree(evt);
212         }
213         spin_unlock_irq(&file->async_file->ev_queue.lock);
214 }
215
216 void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
217                               struct ib_uevent_object *uobj)
218 {
219         struct ib_uverbs_event *evt, *tmp;
220
221         spin_lock_irq(&file->async_file->ev_queue.lock);
222         list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
223                 list_del(&evt->list);
224                 kfree(evt);
225         }
226         spin_unlock_irq(&file->async_file->ev_queue.lock);
227 }
228
229 void ib_uverbs_detach_umcast(struct ib_qp *qp,
230                              struct ib_uqp_object *uobj)
231 {
232         struct ib_uverbs_mcast_entry *mcast, *tmp;
233
234         list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
235                 ib_detach_mcast(qp, &mcast->gid, mcast->lid);
236                 list_del(&mcast->list);
237                 kfree(mcast);
238         }
239 }
240
241 static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
242 {
243         complete(&dev->comp);
244 }
245
246 void ib_uverbs_release_file(struct kref *ref)
247 {
248         struct ib_uverbs_file *file =
249                 container_of(ref, struct ib_uverbs_file, ref);
250         struct ib_device *ib_dev;
251         int srcu_key;
252
253         release_ufile_idr_uobject(file);
254
255         srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
256         ib_dev = srcu_dereference(file->device->ib_dev,
257                                   &file->device->disassociate_srcu);
258         if (ib_dev && !ib_dev->disassociate_ucontext)
259                 module_put(ib_dev->owner);
260         srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
261
262         if (atomic_dec_and_test(&file->device->refcount))
263                 ib_uverbs_comp_dev(file->device);
264
265         put_device(&file->device->dev);
266         kfree(file);
267 }
268
269 static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
270                                     struct ib_uverbs_file *uverbs_file,
271                                     struct file *filp, char __user *buf,
272                                     size_t count, loff_t *pos,
273                                     size_t eventsz)
274 {
275         struct ib_uverbs_event *event;
276         int ret = 0;
277
278         spin_lock_irq(&ev_queue->lock);
279
280         while (list_empty(&ev_queue->event_list)) {
281                 spin_unlock_irq(&ev_queue->lock);
282
283                 if (filp->f_flags & O_NONBLOCK)
284                         return -EAGAIN;
285
286                 if (wait_event_interruptible(ev_queue->poll_wait,
287                                              (!list_empty(&ev_queue->event_list) ||
288                         /* The barriers built into wait_event_interruptible()
289                          * and wake_up() guarentee this will see the null set
290                          * without using RCU
291                          */
292                                              !uverbs_file->device->ib_dev)))
293                         return -ERESTARTSYS;
294
295                 /* If device was disassociated and no event exists set an error */
296                 if (list_empty(&ev_queue->event_list) &&
297                     !uverbs_file->device->ib_dev)
298                         return -EIO;
299
300                 spin_lock_irq(&ev_queue->lock);
301         }
302
303         event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
304
305         if (eventsz > count) {
306                 ret   = -EINVAL;
307                 event = NULL;
308         } else {
309                 list_del(ev_queue->event_list.next);
310                 if (event->counter) {
311                         ++(*event->counter);
312                         list_del(&event->obj_list);
313                 }
314         }
315
316         spin_unlock_irq(&ev_queue->lock);
317
318         if (event) {
319                 if (copy_to_user(buf, event, eventsz))
320                         ret = -EFAULT;
321                 else
322                         ret = eventsz;
323         }
324
325         kfree(event);
326
327         return ret;
328 }
329
330 static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
331                                           size_t count, loff_t *pos)
332 {
333         struct ib_uverbs_async_event_file *file = filp->private_data;
334
335         return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
336                                     buf, count, pos,
337                                     sizeof(struct ib_uverbs_async_event_desc));
338 }
339
340 static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
341                                          size_t count, loff_t *pos)
342 {
343         struct ib_uverbs_completion_event_file *comp_ev_file =
344                 filp->private_data;
345
346         return ib_uverbs_event_read(&comp_ev_file->ev_queue,
347                                     comp_ev_file->uobj.ufile, filp,
348                                     buf, count, pos,
349                                     sizeof(struct ib_uverbs_comp_event_desc));
350 }
351
352 static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
353                                          struct file *filp,
354                                          struct poll_table_struct *wait)
355 {
356         __poll_t pollflags = 0;
357
358         poll_wait(filp, &ev_queue->poll_wait, wait);
359
360         spin_lock_irq(&ev_queue->lock);
361         if (!list_empty(&ev_queue->event_list))
362                 pollflags = EPOLLIN | EPOLLRDNORM;
363         spin_unlock_irq(&ev_queue->lock);
364
365         return pollflags;
366 }
367
368 static __poll_t ib_uverbs_async_event_poll(struct file *filp,
369                                                struct poll_table_struct *wait)
370 {
371         return ib_uverbs_event_poll(filp->private_data, filp, wait);
372 }
373
374 static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
375                                               struct poll_table_struct *wait)
376 {
377         struct ib_uverbs_completion_event_file *comp_ev_file =
378                 filp->private_data;
379
380         return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
381 }
382
383 static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
384 {
385         struct ib_uverbs_event_queue *ev_queue = filp->private_data;
386
387         return fasync_helper(fd, filp, on, &ev_queue->async_queue);
388 }
389
390 static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
391 {
392         struct ib_uverbs_completion_event_file *comp_ev_file =
393                 filp->private_data;
394
395         return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
396 }
397
398 static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
399 {
400         struct ib_uverbs_async_event_file *file = filp->private_data;
401         struct ib_uverbs_file *uverbs_file = file->uverbs_file;
402         struct ib_uverbs_event *entry, *tmp;
403         int closed_already = 0;
404
405         mutex_lock(&uverbs_file->device->lists_mutex);
406         spin_lock_irq(&file->ev_queue.lock);
407         closed_already = file->ev_queue.is_closed;
408         file->ev_queue.is_closed = 1;
409         list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
410                 if (entry->counter)
411                         list_del(&entry->obj_list);
412                 kfree(entry);
413         }
414         spin_unlock_irq(&file->ev_queue.lock);
415         if (!closed_already) {
416                 list_del(&file->list);
417                 ib_unregister_event_handler(&uverbs_file->event_handler);
418         }
419         mutex_unlock(&uverbs_file->device->lists_mutex);
420
421         kref_put(&uverbs_file->ref, ib_uverbs_release_file);
422         kref_put(&file->ref, ib_uverbs_release_async_event_file);
423
424         return 0;
425 }
426
427 static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
428 {
429         struct ib_uobject *uobj = filp->private_data;
430         struct ib_uverbs_completion_event_file *file = container_of(
431                 uobj, struct ib_uverbs_completion_event_file, uobj);
432         struct ib_uverbs_event *entry, *tmp;
433
434         spin_lock_irq(&file->ev_queue.lock);
435         list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
436                 if (entry->counter)
437                         list_del(&entry->obj_list);
438                 kfree(entry);
439         }
440         file->ev_queue.is_closed = 1;
441         spin_unlock_irq(&file->ev_queue.lock);
442
443         uverbs_close_fd(filp);
444
445         return 0;
446 }
447
448 const struct file_operations uverbs_event_fops = {
449         .owner   = THIS_MODULE,
450         .read    = ib_uverbs_comp_event_read,
451         .poll    = ib_uverbs_comp_event_poll,
452         .release = ib_uverbs_comp_event_close,
453         .fasync  = ib_uverbs_comp_event_fasync,
454         .llseek  = no_llseek,
455 };
456
457 static const struct file_operations uverbs_async_event_fops = {
458         .owner   = THIS_MODULE,
459         .read    = ib_uverbs_async_event_read,
460         .poll    = ib_uverbs_async_event_poll,
461         .release = ib_uverbs_async_event_close,
462         .fasync  = ib_uverbs_async_event_fasync,
463         .llseek  = no_llseek,
464 };
465
466 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
467 {
468         struct ib_uverbs_event_queue   *ev_queue = cq_context;
469         struct ib_ucq_object           *uobj;
470         struct ib_uverbs_event         *entry;
471         unsigned long                   flags;
472
473         if (!ev_queue)
474                 return;
475
476         spin_lock_irqsave(&ev_queue->lock, flags);
477         if (ev_queue->is_closed) {
478                 spin_unlock_irqrestore(&ev_queue->lock, flags);
479                 return;
480         }
481
482         entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
483         if (!entry) {
484                 spin_unlock_irqrestore(&ev_queue->lock, flags);
485                 return;
486         }
487
488         uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
489
490         entry->desc.comp.cq_handle = cq->uobject->user_handle;
491         entry->counter             = &uobj->comp_events_reported;
492
493         list_add_tail(&entry->list, &ev_queue->event_list);
494         list_add_tail(&entry->obj_list, &uobj->comp_list);
495         spin_unlock_irqrestore(&ev_queue->lock, flags);
496
497         wake_up_interruptible(&ev_queue->poll_wait);
498         kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
499 }
500
501 static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
502                                     __u64 element, __u64 event,
503                                     struct list_head *obj_list,
504                                     u32 *counter)
505 {
506         struct ib_uverbs_event *entry;
507         unsigned long flags;
508
509         spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
510         if (file->async_file->ev_queue.is_closed) {
511                 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
512                 return;
513         }
514
515         entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
516         if (!entry) {
517                 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
518                 return;
519         }
520
521         entry->desc.async.element    = element;
522         entry->desc.async.event_type = event;
523         entry->desc.async.reserved   = 0;
524         entry->counter               = counter;
525
526         list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
527         if (obj_list)
528                 list_add_tail(&entry->obj_list, obj_list);
529         spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
530
531         wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
532         kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
533 }
534
535 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
536 {
537         struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
538                                                   struct ib_ucq_object, uobject);
539
540         ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle,
541                                 event->event, &uobj->async_list,
542                                 &uobj->async_events_reported);
543 }
544
545 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
546 {
547         struct ib_uevent_object *uobj;
548
549         /* for XRC target qp's, check that qp is live */
550         if (!event->element.qp->uobject)
551                 return;
552
553         uobj = container_of(event->element.qp->uobject,
554                             struct ib_uevent_object, uobject);
555
556         ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
557                                 event->event, &uobj->event_list,
558                                 &uobj->events_reported);
559 }
560
561 void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
562 {
563         struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
564                                                   struct ib_uevent_object, uobject);
565
566         ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
567                                 event->event, &uobj->event_list,
568                                 &uobj->events_reported);
569 }
570
571 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
572 {
573         struct ib_uevent_object *uobj;
574
575         uobj = container_of(event->element.srq->uobject,
576                             struct ib_uevent_object, uobject);
577
578         ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
579                                 event->event, &uobj->event_list,
580                                 &uobj->events_reported);
581 }
582
583 void ib_uverbs_event_handler(struct ib_event_handler *handler,
584                              struct ib_event *event)
585 {
586         struct ib_uverbs_file *file =
587                 container_of(handler, struct ib_uverbs_file, event_handler);
588
589         ib_uverbs_async_handler(file, event->element.port_num, event->event,
590                                 NULL, NULL);
591 }
592
593 void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
594 {
595         kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
596         file->async_file = NULL;
597 }
598
599 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
600 {
601         spin_lock_init(&ev_queue->lock);
602         INIT_LIST_HEAD(&ev_queue->event_list);
603         init_waitqueue_head(&ev_queue->poll_wait);
604         ev_queue->is_closed   = 0;
605         ev_queue->async_queue = NULL;
606 }
607
608 struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
609                                               struct ib_device  *ib_dev)
610 {
611         struct ib_uverbs_async_event_file *ev_file;
612         struct file *filp;
613
614         ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
615         if (!ev_file)
616                 return ERR_PTR(-ENOMEM);
617
618         ib_uverbs_init_event_queue(&ev_file->ev_queue);
619         ev_file->uverbs_file = uverbs_file;
620         kref_get(&ev_file->uverbs_file->ref);
621         kref_init(&ev_file->ref);
622         filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
623                                   ev_file, O_RDONLY);
624         if (IS_ERR(filp))
625                 goto err_put_refs;
626
627         mutex_lock(&uverbs_file->device->lists_mutex);
628         list_add_tail(&ev_file->list,
629                       &uverbs_file->device->uverbs_events_file_list);
630         mutex_unlock(&uverbs_file->device->lists_mutex);
631
632         WARN_ON(uverbs_file->async_file);
633         uverbs_file->async_file = ev_file;
634         kref_get(&uverbs_file->async_file->ref);
635         INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
636                               ib_dev,
637                               ib_uverbs_event_handler);
638         ib_register_event_handler(&uverbs_file->event_handler);
639         /* At that point async file stuff was fully set */
640
641         return filp;
642
643 err_put_refs:
644         kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
645         kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
646         return filp;
647 }
648
649 static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command,
650                                 bool extended)
651 {
652         if (!extended)
653                 return ufile->uverbs_cmd_mask & BIT_ULL(command);
654
655         return ufile->uverbs_ex_cmd_mask & BIT_ULL(command);
656 }
657
658 static bool verify_command_idx(u32 command, bool extended)
659 {
660         if (extended)
661                 return command < ARRAY_SIZE(uverbs_ex_cmd_table) &&
662                        uverbs_ex_cmd_table[command];
663
664         return command < ARRAY_SIZE(uverbs_cmd_table) &&
665                uverbs_cmd_table[command];
666 }
667
668 static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr,
669                            u32 *command, bool *extended)
670 {
671         if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
672                                    IB_USER_VERBS_CMD_COMMAND_MASK))
673                 return -EINVAL;
674
675         *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK;
676         *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED;
677
678         if (!verify_command_idx(*command, *extended))
679                 return -EOPNOTSUPP;
680
681         return 0;
682 }
683
684 static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
685                           struct ib_uverbs_ex_cmd_hdr *ex_hdr,
686                           size_t count, bool extended)
687 {
688         if (extended) {
689                 count -= sizeof(*hdr) + sizeof(*ex_hdr);
690
691                 if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
692                         return -EINVAL;
693
694                 if (ex_hdr->cmd_hdr_reserved)
695                         return -EINVAL;
696
697                 if (ex_hdr->response) {
698                         if (!hdr->out_words && !ex_hdr->provider_out_words)
699                                 return -EINVAL;
700
701                         if (!access_ok(VERIFY_WRITE,
702                                        u64_to_user_ptr(ex_hdr->response),
703                                        (hdr->out_words + ex_hdr->provider_out_words) * 8))
704                                 return -EFAULT;
705                 } else {
706                         if (hdr->out_words || ex_hdr->provider_out_words)
707                                 return -EINVAL;
708                 }
709
710                 return 0;
711         }
712
713         /* not extended command */
714         if (hdr->in_words * 4 != count)
715                 return -EINVAL;
716
717         return 0;
718 }
719
720 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
721                              size_t count, loff_t *pos)
722 {
723         struct ib_uverbs_file *file = filp->private_data;
724         struct ib_uverbs_ex_cmd_hdr ex_hdr;
725         struct ib_uverbs_cmd_hdr hdr;
726         bool extended;
727         int srcu_key;
728         u32 command;
729         ssize_t ret;
730
731         if (!ib_safe_file_access(filp)) {
732                 pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
733                             task_tgid_vnr(current), current->comm);
734                 return -EACCES;
735         }
736
737         if (count < sizeof(hdr))
738                 return -EINVAL;
739
740         if (copy_from_user(&hdr, buf, sizeof(hdr)))
741                 return -EFAULT;
742
743         ret = process_hdr(&hdr, &command, &extended);
744         if (ret)
745                 return ret;
746
747         if (extended) {
748                 if (count < (sizeof(hdr) + sizeof(ex_hdr)))
749                         return -EINVAL;
750                 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
751                         return -EFAULT;
752         }
753
754         ret = verify_hdr(&hdr, &ex_hdr, count, extended);
755         if (ret)
756                 return ret;
757
758         srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
759
760         if (!verify_command_mask(file, command, extended)) {
761                 ret = -EOPNOTSUPP;
762                 goto out;
763         }
764
765         buf += sizeof(hdr);
766
767         if (!extended) {
768                 ret = uverbs_cmd_table[command](file, buf,
769                                                 hdr.in_words * 4,
770                                                 hdr.out_words * 4);
771         } else {
772                 struct ib_udata ucore;
773                 struct ib_udata uhw;
774
775                 buf += sizeof(ex_hdr);
776
777                 ib_uverbs_init_udata_buf_or_null(&ucore, buf,
778                                         u64_to_user_ptr(ex_hdr.response),
779                                         hdr.in_words * 8, hdr.out_words * 8);
780
781                 ib_uverbs_init_udata_buf_or_null(&uhw,
782                                         buf + ucore.inlen,
783                                         u64_to_user_ptr(ex_hdr.response) + ucore.outlen,
784                                         ex_hdr.provider_in_words * 8,
785                                         ex_hdr.provider_out_words * 8);
786
787                 ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
788                 ret = (ret) ? : count;
789         }
790
791 out:
792         srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
793         return ret;
794 }
795
796 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
797 {
798         struct ib_uverbs_file *file = filp->private_data;
799         struct ib_ucontext *ucontext;
800         int ret = 0;
801         int srcu_key;
802
803         srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
804         ucontext = ib_uverbs_get_ucontext(file);
805         if (IS_ERR(ucontext)) {
806                 ret = PTR_ERR(ucontext);
807                 goto out;
808         }
809
810         ret = ucontext->device->mmap(ucontext, vma);
811 out:
812         srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
813         return ret;
814 }
815
816 /*
817  * Each time we map IO memory into user space this keeps track of the mapping.
818  * When the device is hot-unplugged we 'zap' the mmaps in user space to point
819  * to the zero page and allow the hot unplug to proceed.
820  *
821  * This is necessary for cases like PCI physical hot unplug as the actual BAR
822  * memory may vanish after this and access to it from userspace could MCE.
823  *
824  * RDMA drivers supporting disassociation must have their user space designed
825  * to cope in some way with their IO pages going to the zero page.
826  */
827 struct rdma_umap_priv {
828         struct vm_area_struct *vma;
829         struct list_head list;
830 };
831
832 static const struct vm_operations_struct rdma_umap_ops;
833
834 static void rdma_umap_priv_init(struct rdma_umap_priv *priv,
835                                 struct vm_area_struct *vma)
836 {
837         struct ib_uverbs_file *ufile = vma->vm_file->private_data;
838
839         priv->vma = vma;
840         vma->vm_private_data = priv;
841         vma->vm_ops = &rdma_umap_ops;
842
843         mutex_lock(&ufile->umap_lock);
844         list_add(&priv->list, &ufile->umaps);
845         mutex_unlock(&ufile->umap_lock);
846 }
847
848 /*
849  * The VMA has been dup'd, initialize the vm_private_data with a new tracking
850  * struct
851  */
852 static void rdma_umap_open(struct vm_area_struct *vma)
853 {
854         struct ib_uverbs_file *ufile = vma->vm_file->private_data;
855         struct rdma_umap_priv *opriv = vma->vm_private_data;
856         struct rdma_umap_priv *priv;
857
858         if (!opriv)
859                 return;
860
861         /* We are racing with disassociation */
862         if (!down_read_trylock(&ufile->hw_destroy_rwsem))
863                 goto out_zap;
864         /*
865          * Disassociation already completed, the VMA should already be zapped.
866          */
867         if (!ufile->ucontext)
868                 goto out_unlock;
869
870         priv = kzalloc(sizeof(*priv), GFP_KERNEL);
871         if (!priv)
872                 goto out_unlock;
873         rdma_umap_priv_init(priv, vma);
874
875         up_read(&ufile->hw_destroy_rwsem);
876         return;
877
878 out_unlock:
879         up_read(&ufile->hw_destroy_rwsem);
880 out_zap:
881         /*
882          * We can't allow the VMA to be created with the actual IO pages, that
883          * would break our API contract, and it can't be stopped at this
884          * point, so zap it.
885          */
886         vma->vm_private_data = NULL;
887         zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
888 }
889
890 static void rdma_umap_close(struct vm_area_struct *vma)
891 {
892         struct ib_uverbs_file *ufile = vma->vm_file->private_data;
893         struct rdma_umap_priv *priv = vma->vm_private_data;
894
895         if (!priv)
896                 return;
897
898         /*
899          * The vma holds a reference on the struct file that created it, which
900          * in turn means that the ib_uverbs_file is guaranteed to exist at
901          * this point.
902          */
903         mutex_lock(&ufile->umap_lock);
904         list_del(&priv->list);
905         mutex_unlock(&ufile->umap_lock);
906         kfree(priv);
907 }
908
909 static const struct vm_operations_struct rdma_umap_ops = {
910         .open = rdma_umap_open,
911         .close = rdma_umap_close,
912 };
913
914 static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
915                                                  struct vm_area_struct *vma,
916                                                  unsigned long size)
917 {
918         struct ib_uverbs_file *ufile = ucontext->ufile;
919         struct rdma_umap_priv *priv;
920
921         if (vma->vm_end - vma->vm_start != size)
922                 return ERR_PTR(-EINVAL);
923
924         /* Driver is using this wrong, must be called by ib_uverbs_mmap */
925         if (WARN_ON(!vma->vm_file ||
926                     vma->vm_file->private_data != ufile))
927                 return ERR_PTR(-EINVAL);
928         lockdep_assert_held(&ufile->device->disassociate_srcu);
929
930         priv = kzalloc(sizeof(*priv), GFP_KERNEL);
931         if (!priv)
932                 return ERR_PTR(-ENOMEM);
933         return priv;
934 }
935
936 /*
937  * Map IO memory into a process. This is to be called by drivers as part of
938  * their mmap() functions if they wish to send something like PCI-E BAR memory
939  * to userspace.
940  */
941 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
942                       unsigned long pfn, unsigned long size, pgprot_t prot)
943 {
944         struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
945
946         if (IS_ERR(priv))
947                 return PTR_ERR(priv);
948
949         vma->vm_page_prot = prot;
950         if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
951                 kfree(priv);
952                 return -EAGAIN;
953         }
954
955         rdma_umap_priv_init(priv, vma);
956         return 0;
957 }
958 EXPORT_SYMBOL(rdma_user_mmap_io);
959
960 /*
961  * The page case is here for a slightly different reason, the driver expects
962  * to be able to free the page it is sharing to user space when it destroys
963  * its ucontext, which means we need to zap the user space references.
964  *
965  * We could handle this differently by providing an API to allocate a shared
966  * page and then only freeing the shared page when the last ufile is
967  * destroyed.
968  */
969 int rdma_user_mmap_page(struct ib_ucontext *ucontext,
970                         struct vm_area_struct *vma, struct page *page,
971                         unsigned long size)
972 {
973         struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
974
975         if (IS_ERR(priv))
976                 return PTR_ERR(priv);
977
978         if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size,
979                             vma->vm_page_prot)) {
980                 kfree(priv);
981                 return -EAGAIN;
982         }
983
984         rdma_umap_priv_init(priv, vma);
985         return 0;
986 }
987 EXPORT_SYMBOL(rdma_user_mmap_page);
988
989 void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
990 {
991         struct rdma_umap_priv *priv, *next_priv;
992
993         lockdep_assert_held(&ufile->hw_destroy_rwsem);
994
995         while (1) {
996                 struct mm_struct *mm = NULL;
997
998                 /* Get an arbitrary mm pointer that hasn't been cleaned yet */
999                 mutex_lock(&ufile->umap_lock);
1000                 if (!list_empty(&ufile->umaps)) {
1001                         mm = list_first_entry(&ufile->umaps,
1002                                               struct rdma_umap_priv, list)
1003                                      ->vma->vm_mm;
1004                         mmget(mm);
1005                 }
1006                 mutex_unlock(&ufile->umap_lock);
1007                 if (!mm)
1008                         return;
1009
1010                 /*
1011                  * The umap_lock is nested under mmap_sem since it used within
1012                  * the vma_ops callbacks, so we have to clean the list one mm
1013                  * at a time to get the lock ordering right. Typically there
1014                  * will only be one mm, so no big deal.
1015                  */
1016                 down_write(&mm->mmap_sem);
1017                 mutex_lock(&ufile->umap_lock);
1018                 list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
1019                                           list) {
1020                         struct vm_area_struct *vma = priv->vma;
1021
1022                         if (vma->vm_mm != mm)
1023                                 continue;
1024                         list_del_init(&priv->list);
1025
1026                         zap_vma_ptes(vma, vma->vm_start,
1027                                      vma->vm_end - vma->vm_start);
1028                         vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
1029                 }
1030                 mutex_unlock(&ufile->umap_lock);
1031                 up_write(&mm->mmap_sem);
1032                 mmput(mm);
1033         }
1034 }
1035
1036 /*
1037  * ib_uverbs_open() does not need the BKL:
1038  *
1039  *  - the ib_uverbs_device structures are properly reference counted and
1040  *    everything else is purely local to the file being created, so
1041  *    races against other open calls are not a problem;
1042  *  - there is no ioctl method to race against;
1043  *  - the open method will either immediately run -ENXIO, or all
1044  *    required initialization will be done.
1045  */
1046 static int ib_uverbs_open(struct inode *inode, struct file *filp)
1047 {
1048         struct ib_uverbs_device *dev;
1049         struct ib_uverbs_file *file;
1050         struct ib_device *ib_dev;
1051         int ret;
1052         int module_dependent;
1053         int srcu_key;
1054
1055         dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
1056         if (!atomic_inc_not_zero(&dev->refcount))
1057                 return -ENXIO;
1058
1059         get_device(&dev->dev);
1060         srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1061         mutex_lock(&dev->lists_mutex);
1062         ib_dev = srcu_dereference(dev->ib_dev,
1063                                   &dev->disassociate_srcu);
1064         if (!ib_dev) {
1065                 ret = -EIO;
1066                 goto err;
1067         }
1068
1069         /* In case IB device supports disassociate ucontext, there is no hard
1070          * dependency between uverbs device and its low level device.
1071          */
1072         module_dependent = !(ib_dev->disassociate_ucontext);
1073
1074         if (module_dependent) {
1075                 if (!try_module_get(ib_dev->owner)) {
1076                         ret = -ENODEV;
1077                         goto err;
1078                 }
1079         }
1080
1081         file = kzalloc(sizeof(*file), GFP_KERNEL);
1082         if (!file) {
1083                 ret = -ENOMEM;
1084                 if (module_dependent)
1085                         goto err_module;
1086
1087                 goto err;
1088         }
1089
1090         file->device     = dev;
1091         kref_init(&file->ref);
1092         mutex_init(&file->ucontext_lock);
1093
1094         spin_lock_init(&file->uobjects_lock);
1095         INIT_LIST_HEAD(&file->uobjects);
1096         init_rwsem(&file->hw_destroy_rwsem);
1097         mutex_init(&file->umap_lock);
1098         INIT_LIST_HEAD(&file->umaps);
1099
1100         filp->private_data = file;
1101         list_add_tail(&file->list, &dev->uverbs_file_list);
1102         mutex_unlock(&dev->lists_mutex);
1103         srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1104
1105         file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask;
1106         file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask;
1107
1108         setup_ufile_idr_uobject(file);
1109
1110         return nonseekable_open(inode, filp);
1111
1112 err_module:
1113         module_put(ib_dev->owner);
1114
1115 err:
1116         mutex_unlock(&dev->lists_mutex);
1117         srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1118         if (atomic_dec_and_test(&dev->refcount))
1119                 ib_uverbs_comp_dev(dev);
1120
1121         put_device(&dev->dev);
1122         return ret;
1123 }
1124
1125 static int ib_uverbs_close(struct inode *inode, struct file *filp)
1126 {
1127         struct ib_uverbs_file *file = filp->private_data;
1128
1129         uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
1130
1131         mutex_lock(&file->device->lists_mutex);
1132         list_del_init(&file->list);
1133         mutex_unlock(&file->device->lists_mutex);
1134
1135         if (file->async_file)
1136                 kref_put(&file->async_file->ref,
1137                          ib_uverbs_release_async_event_file);
1138
1139         kref_put(&file->ref, ib_uverbs_release_file);
1140
1141         return 0;
1142 }
1143
1144 static const struct file_operations uverbs_fops = {
1145         .owner   = THIS_MODULE,
1146         .write   = ib_uverbs_write,
1147         .open    = ib_uverbs_open,
1148         .release = ib_uverbs_close,
1149         .llseek  = no_llseek,
1150         .unlocked_ioctl = ib_uverbs_ioctl,
1151         .compat_ioctl = ib_uverbs_ioctl,
1152 };
1153
1154 static const struct file_operations uverbs_mmap_fops = {
1155         .owner   = THIS_MODULE,
1156         .write   = ib_uverbs_write,
1157         .mmap    = ib_uverbs_mmap,
1158         .open    = ib_uverbs_open,
1159         .release = ib_uverbs_close,
1160         .llseek  = no_llseek,
1161         .unlocked_ioctl = ib_uverbs_ioctl,
1162         .compat_ioctl = ib_uverbs_ioctl,
1163 };
1164
1165 static struct ib_client uverbs_client = {
1166         .name   = "uverbs",
1167         .add    = ib_uverbs_add_one,
1168         .remove = ib_uverbs_remove_one
1169 };
1170
1171 static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
1172                           char *buf)
1173 {
1174         struct ib_uverbs_device *dev =
1175                         container_of(device, struct ib_uverbs_device, dev);
1176         int ret = -ENODEV;
1177         int srcu_key;
1178         struct ib_device *ib_dev;
1179
1180         srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1181         ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
1182         if (ib_dev)
1183                 ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev));
1184         srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1185
1186         return ret;
1187 }
1188 static DEVICE_ATTR_RO(ibdev);
1189
1190 static ssize_t abi_version_show(struct device *device,
1191                                 struct device_attribute *attr, char *buf)
1192 {
1193         struct ib_uverbs_device *dev =
1194                         container_of(device, struct ib_uverbs_device, dev);
1195         int ret = -ENODEV;
1196         int srcu_key;
1197         struct ib_device *ib_dev;
1198
1199         srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1200         ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
1201         if (ib_dev)
1202                 ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
1203         srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1204
1205         return ret;
1206 }
1207 static DEVICE_ATTR_RO(abi_version);
1208
1209 static struct attribute *ib_dev_attrs[] = {
1210         &dev_attr_abi_version.attr,
1211         &dev_attr_ibdev.attr,
1212         NULL,
1213 };
1214
1215 static const struct attribute_group dev_attr_group = {
1216         .attrs = ib_dev_attrs,
1217 };
1218
1219 static CLASS_ATTR_STRING(abi_version, S_IRUGO,
1220                          __stringify(IB_USER_VERBS_ABI_VERSION));
1221
1222 static int ib_uverbs_create_uapi(struct ib_device *device,
1223                                  struct ib_uverbs_device *uverbs_dev)
1224 {
1225         struct uverbs_api *uapi;
1226
1227         uapi = uverbs_alloc_api(device->driver_specs, device->driver_id);
1228         if (IS_ERR(uapi))
1229                 return PTR_ERR(uapi);
1230
1231         uverbs_dev->uapi = uapi;
1232         return 0;
1233 }
1234
1235 static void ib_uverbs_add_one(struct ib_device *device)
1236 {
1237         int devnum;
1238         dev_t base;
1239         struct ib_uverbs_device *uverbs_dev;
1240         int ret;
1241
1242         if (!device->alloc_ucontext)
1243                 return;
1244
1245         uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
1246         if (!uverbs_dev)
1247                 return;
1248
1249         ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
1250         if (ret) {
1251                 kfree(uverbs_dev);
1252                 return;
1253         }
1254
1255         device_initialize(&uverbs_dev->dev);
1256         uverbs_dev->dev.class = uverbs_class;
1257         uverbs_dev->dev.parent = device->dev.parent;
1258         uverbs_dev->dev.release = ib_uverbs_release_dev;
1259         uverbs_dev->groups[0] = &dev_attr_group;
1260         uverbs_dev->dev.groups = uverbs_dev->groups;
1261         atomic_set(&uverbs_dev->refcount, 1);
1262         init_completion(&uverbs_dev->comp);
1263         uverbs_dev->xrcd_tree = RB_ROOT;
1264         mutex_init(&uverbs_dev->xrcd_tree_mutex);
1265         mutex_init(&uverbs_dev->lists_mutex);
1266         INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
1267         INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
1268         rcu_assign_pointer(uverbs_dev->ib_dev, device);
1269         uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1270
1271         devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
1272                                GFP_KERNEL);
1273         if (devnum < 0)
1274                 goto err;
1275         uverbs_dev->devnum = devnum;
1276         if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
1277                 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
1278         else
1279                 base = IB_UVERBS_BASE_DEV + devnum;
1280
1281         if (ib_uverbs_create_uapi(device, uverbs_dev))
1282                 goto err_uapi;
1283
1284         uverbs_dev->dev.devt = base;
1285         dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
1286
1287         cdev_init(&uverbs_dev->cdev,
1288                   device->mmap ? &uverbs_mmap_fops : &uverbs_fops);
1289         uverbs_dev->cdev.owner = THIS_MODULE;
1290
1291         ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
1292         if (ret)
1293                 goto err_uapi;
1294
1295         ib_set_client_data(device, &uverbs_client, uverbs_dev);
1296         return;
1297
1298 err_uapi:
1299         ida_free(&uverbs_ida, devnum);
1300 err:
1301         if (atomic_dec_and_test(&uverbs_dev->refcount))
1302                 ib_uverbs_comp_dev(uverbs_dev);
1303         wait_for_completion(&uverbs_dev->comp);
1304         put_device(&uverbs_dev->dev);
1305         return;
1306 }
1307
1308 static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
1309                                         struct ib_device *ib_dev)
1310 {
1311         struct ib_uverbs_file *file;
1312         struct ib_uverbs_async_event_file *event_file;
1313         struct ib_event event;
1314
1315         /* Pending running commands to terminate */
1316         uverbs_disassociate_api_pre(uverbs_dev);
1317         event.event = IB_EVENT_DEVICE_FATAL;
1318         event.element.port_num = 0;
1319         event.device = ib_dev;
1320
1321         mutex_lock(&uverbs_dev->lists_mutex);
1322         while (!list_empty(&uverbs_dev->uverbs_file_list)) {
1323                 file = list_first_entry(&uverbs_dev->uverbs_file_list,
1324                                         struct ib_uverbs_file, list);
1325                 list_del_init(&file->list);
1326                 kref_get(&file->ref);
1327
1328                 /* We must release the mutex before going ahead and calling
1329                  * uverbs_cleanup_ufile, as it might end up indirectly calling
1330                  * uverbs_close, for example due to freeing the resources (e.g
1331                  * mmput).
1332                  */
1333                 mutex_unlock(&uverbs_dev->lists_mutex);
1334
1335                 ib_uverbs_event_handler(&file->event_handler, &event);
1336                 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
1337                 kref_put(&file->ref, ib_uverbs_release_file);
1338
1339                 mutex_lock(&uverbs_dev->lists_mutex);
1340         }
1341
1342         while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
1343                 event_file = list_first_entry(&uverbs_dev->
1344                                               uverbs_events_file_list,
1345                                               struct ib_uverbs_async_event_file,
1346                                               list);
1347                 spin_lock_irq(&event_file->ev_queue.lock);
1348                 event_file->ev_queue.is_closed = 1;
1349                 spin_unlock_irq(&event_file->ev_queue.lock);
1350
1351                 list_del(&event_file->list);
1352                 ib_unregister_event_handler(
1353                         &event_file->uverbs_file->event_handler);
1354                 event_file->uverbs_file->event_handler.device =
1355                         NULL;
1356
1357                 wake_up_interruptible(&event_file->ev_queue.poll_wait);
1358                 kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
1359         }
1360         mutex_unlock(&uverbs_dev->lists_mutex);
1361
1362         uverbs_disassociate_api(uverbs_dev->uapi);
1363 }
1364
1365 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1366 {
1367         struct ib_uverbs_device *uverbs_dev = client_data;
1368         int wait_clients = 1;
1369
1370         if (!uverbs_dev)
1371                 return;
1372
1373         cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
1374         ida_free(&uverbs_ida, uverbs_dev->devnum);
1375
1376         if (device->disassociate_ucontext) {
1377                 /* We disassociate HW resources and immediately return.
1378                  * Userspace will see a EIO errno for all future access.
1379                  * Upon returning, ib_device may be freed internally and is not
1380                  * valid any more.
1381                  * uverbs_device is still available until all clients close
1382                  * their files, then the uverbs device ref count will be zero
1383                  * and its resources will be freed.
1384                  * Note: At this point no more files can be opened since the
1385                  * cdev was deleted, however active clients can still issue
1386                  * commands and close their open files.
1387                  */
1388                 ib_uverbs_free_hw_resources(uverbs_dev, device);
1389                 wait_clients = 0;
1390         }
1391
1392         if (atomic_dec_and_test(&uverbs_dev->refcount))
1393                 ib_uverbs_comp_dev(uverbs_dev);
1394         if (wait_clients)
1395                 wait_for_completion(&uverbs_dev->comp);
1396
1397         put_device(&uverbs_dev->dev);
1398 }
1399
1400 static char *uverbs_devnode(struct device *dev, umode_t *mode)
1401 {
1402         if (mode)
1403                 *mode = 0666;
1404         return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
1405 }
1406
1407 static int __init ib_uverbs_init(void)
1408 {
1409         int ret;
1410
1411         ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
1412                                      IB_UVERBS_NUM_FIXED_MINOR,
1413                                      "infiniband_verbs");
1414         if (ret) {
1415                 pr_err("user_verbs: couldn't register device number\n");
1416                 goto out;
1417         }
1418
1419         ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
1420                                   IB_UVERBS_NUM_DYNAMIC_MINOR,
1421                                   "infiniband_verbs");
1422         if (ret) {
1423                 pr_err("couldn't register dynamic device number\n");
1424                 goto out_alloc;
1425         }
1426
1427         uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
1428         if (IS_ERR(uverbs_class)) {
1429                 ret = PTR_ERR(uverbs_class);
1430                 pr_err("user_verbs: couldn't create class infiniband_verbs\n");
1431                 goto out_chrdev;
1432         }
1433
1434         uverbs_class->devnode = uverbs_devnode;
1435
1436         ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
1437         if (ret) {
1438                 pr_err("user_verbs: couldn't create abi_version attribute\n");
1439                 goto out_class;
1440         }
1441
1442         ret = ib_register_client(&uverbs_client);
1443         if (ret) {
1444                 pr_err("user_verbs: couldn't register client\n");
1445                 goto out_class;
1446         }
1447
1448         return 0;
1449
1450 out_class:
1451         class_destroy(uverbs_class);
1452
1453 out_chrdev:
1454         unregister_chrdev_region(dynamic_uverbs_dev,
1455                                  IB_UVERBS_NUM_DYNAMIC_MINOR);
1456
1457 out_alloc:
1458         unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1459                                  IB_UVERBS_NUM_FIXED_MINOR);
1460
1461 out:
1462         return ret;
1463 }
1464
1465 static void __exit ib_uverbs_cleanup(void)
1466 {
1467         ib_unregister_client(&uverbs_client);
1468         class_destroy(uverbs_class);
1469         unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1470                                  IB_UVERBS_NUM_FIXED_MINOR);
1471         unregister_chrdev_region(dynamic_uverbs_dev,
1472                                  IB_UVERBS_NUM_DYNAMIC_MINOR);
1473 }
1474
1475 module_init(ib_uverbs_init);
1476 module_exit(ib_uverbs_cleanup);