drm/amdkfd: Fix handling of return code of dma_buf_get
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdkfd / kfd_chardev.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/device.h>
24 #include <linux/export.h>
25 #include <linux/err.h>
26 #include <linux/fs.h>
27 #include <linux/file.h>
28 #include <linux/sched.h>
29 #include <linux/slab.h>
30 #include <linux/uaccess.h>
31 #include <linux/compat.h>
32 #include <uapi/linux/kfd_ioctl.h>
33 #include <linux/time.h>
34 #include <linux/mm.h>
35 #include <linux/mman.h>
36 #include <linux/dma-buf.h>
37 #include <asm/processor.h>
38 #include "kfd_priv.h"
39 #include "kfd_device_queue_manager.h"
40 #include "kfd_dbgmgr.h"
41 #include "amdgpu_amdkfd.h"
42
43 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
44 static int kfd_open(struct inode *, struct file *);
45 static int kfd_mmap(struct file *, struct vm_area_struct *);
46
47 static const char kfd_dev_name[] = "kfd";
48
49 static const struct file_operations kfd_fops = {
50         .owner = THIS_MODULE,
51         .unlocked_ioctl = kfd_ioctl,
52         .compat_ioctl = kfd_ioctl,
53         .open = kfd_open,
54         .mmap = kfd_mmap,
55 };
56
57 static int kfd_char_dev_major = -1;
58 static struct class *kfd_class;
59 struct device *kfd_device;
60
61 int kfd_chardev_init(void)
62 {
63         int err = 0;
64
65         kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
66         err = kfd_char_dev_major;
67         if (err < 0)
68                 goto err_register_chrdev;
69
70         kfd_class = class_create(THIS_MODULE, kfd_dev_name);
71         err = PTR_ERR(kfd_class);
72         if (IS_ERR(kfd_class))
73                 goto err_class_create;
74
75         kfd_device = device_create(kfd_class, NULL,
76                                         MKDEV(kfd_char_dev_major, 0),
77                                         NULL, kfd_dev_name);
78         err = PTR_ERR(kfd_device);
79         if (IS_ERR(kfd_device))
80                 goto err_device_create;
81
82         return 0;
83
84 err_device_create:
85         class_destroy(kfd_class);
86 err_class_create:
87         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
88 err_register_chrdev:
89         return err;
90 }
91
92 void kfd_chardev_exit(void)
93 {
94         device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
95         class_destroy(kfd_class);
96         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
97 }
98
99 struct device *kfd_chardev(void)
100 {
101         return kfd_device;
102 }
103
104
105 static int kfd_open(struct inode *inode, struct file *filep)
106 {
107         struct kfd_process *process;
108         bool is_32bit_user_mode;
109
110         if (iminor(inode) != 0)
111                 return -ENODEV;
112
113         is_32bit_user_mode = in_compat_syscall();
114
115         if (is_32bit_user_mode) {
116                 dev_warn(kfd_device,
117                         "Process %d (32-bit) failed to open /dev/kfd\n"
118                         "32-bit processes are not supported by amdkfd\n",
119                         current->pid);
120                 return -EPERM;
121         }
122
123         process = kfd_create_process(filep);
124         if (IS_ERR(process))
125                 return PTR_ERR(process);
126
127         if (kfd_is_locked())
128                 return -EAGAIN;
129
130         dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
131                 process->pasid, process->is_32bit_user_mode);
132
133         return 0;
134 }
135
136 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
137                                         void *data)
138 {
139         struct kfd_ioctl_get_version_args *args = data;
140
141         args->major_version = KFD_IOCTL_MAJOR_VERSION;
142         args->minor_version = KFD_IOCTL_MINOR_VERSION;
143
144         return 0;
145 }
146
147 static int set_queue_properties_from_user(struct queue_properties *q_properties,
148                                 struct kfd_ioctl_create_queue_args *args)
149 {
150         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
151                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
152                 return -EINVAL;
153         }
154
155         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
156                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
157                 return -EINVAL;
158         }
159
160         if ((args->ring_base_address) &&
161                 (!access_ok(VERIFY_WRITE,
162                         (const void __user *) args->ring_base_address,
163                         sizeof(uint64_t)))) {
164                 pr_err("Can't access ring base address\n");
165                 return -EFAULT;
166         }
167
168         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
169                 pr_err("Ring size must be a power of 2 or 0\n");
170                 return -EINVAL;
171         }
172
173         if (!access_ok(VERIFY_WRITE,
174                         (const void __user *) args->read_pointer_address,
175                         sizeof(uint32_t))) {
176                 pr_err("Can't access read pointer\n");
177                 return -EFAULT;
178         }
179
180         if (!access_ok(VERIFY_WRITE,
181                         (const void __user *) args->write_pointer_address,
182                         sizeof(uint32_t))) {
183                 pr_err("Can't access write pointer\n");
184                 return -EFAULT;
185         }
186
187         if (args->eop_buffer_address &&
188                 !access_ok(VERIFY_WRITE,
189                         (const void __user *) args->eop_buffer_address,
190                         sizeof(uint32_t))) {
191                 pr_debug("Can't access eop buffer");
192                 return -EFAULT;
193         }
194
195         if (args->ctx_save_restore_address &&
196                 !access_ok(VERIFY_WRITE,
197                         (const void __user *) args->ctx_save_restore_address,
198                         sizeof(uint32_t))) {
199                 pr_debug("Can't access ctx save restore buffer");
200                 return -EFAULT;
201         }
202
203         q_properties->is_interop = false;
204         q_properties->queue_percent = args->queue_percentage;
205         q_properties->priority = args->queue_priority;
206         q_properties->queue_address = args->ring_base_address;
207         q_properties->queue_size = args->ring_size;
208         q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
209         q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
210         q_properties->eop_ring_buffer_address = args->eop_buffer_address;
211         q_properties->eop_ring_buffer_size = args->eop_buffer_size;
212         q_properties->ctx_save_restore_area_address =
213                         args->ctx_save_restore_address;
214         q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
215         q_properties->ctl_stack_size = args->ctl_stack_size;
216         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
217                 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
218                 q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
219         else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
220                 q_properties->type = KFD_QUEUE_TYPE_SDMA;
221         else
222                 return -ENOTSUPP;
223
224         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
225                 q_properties->format = KFD_QUEUE_FORMAT_AQL;
226         else
227                 q_properties->format = KFD_QUEUE_FORMAT_PM4;
228
229         pr_debug("Queue Percentage: %d, %d\n",
230                         q_properties->queue_percent, args->queue_percentage);
231
232         pr_debug("Queue Priority: %d, %d\n",
233                         q_properties->priority, args->queue_priority);
234
235         pr_debug("Queue Address: 0x%llX, 0x%llX\n",
236                         q_properties->queue_address, args->ring_base_address);
237
238         pr_debug("Queue Size: 0x%llX, %u\n",
239                         q_properties->queue_size, args->ring_size);
240
241         pr_debug("Queue r/w Pointers: %px, %px\n",
242                         q_properties->read_ptr,
243                         q_properties->write_ptr);
244
245         pr_debug("Queue Format: %d\n", q_properties->format);
246
247         pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
248
249         pr_debug("Queue CTX save area: 0x%llX\n",
250                         q_properties->ctx_save_restore_area_address);
251
252         return 0;
253 }
254
255 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
256                                         void *data)
257 {
258         struct kfd_ioctl_create_queue_args *args = data;
259         struct kfd_dev *dev;
260         int err = 0;
261         unsigned int queue_id;
262         struct kfd_process_device *pdd;
263         struct queue_properties q_properties;
264
265         memset(&q_properties, 0, sizeof(struct queue_properties));
266
267         pr_debug("Creating queue ioctl\n");
268
269         err = set_queue_properties_from_user(&q_properties, args);
270         if (err)
271                 return err;
272
273         pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
274         dev = kfd_device_by_id(args->gpu_id);
275         if (!dev) {
276                 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
277                 return -EINVAL;
278         }
279
280         mutex_lock(&p->mutex);
281
282         pdd = kfd_bind_process_to_device(dev, p);
283         if (IS_ERR(pdd)) {
284                 err = -ESRCH;
285                 goto err_bind_process;
286         }
287
288         pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
289                         p->pasid,
290                         dev->id);
291
292         err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
293         if (err != 0)
294                 goto err_create_queue;
295
296         args->queue_id = queue_id;
297
298
299         /* Return gpu_id as doorbell offset for mmap usage */
300         args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
301         args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
302         args->doorbell_offset <<= PAGE_SHIFT;
303         if (KFD_IS_SOC15(dev->device_info->asic_family))
304                 /* On SOC15 ASICs, doorbell allocation must be
305                  * per-device, and independent from the per-process
306                  * queue_id. Return the doorbell offset within the
307                  * doorbell aperture to user mode.
308                  */
309                 args->doorbell_offset |= q_properties.doorbell_off;
310
311         mutex_unlock(&p->mutex);
312
313         pr_debug("Queue id %d was created successfully\n", args->queue_id);
314
315         pr_debug("Ring buffer address == 0x%016llX\n",
316                         args->ring_base_address);
317
318         pr_debug("Read ptr address    == 0x%016llX\n",
319                         args->read_pointer_address);
320
321         pr_debug("Write ptr address   == 0x%016llX\n",
322                         args->write_pointer_address);
323
324         return 0;
325
326 err_create_queue:
327 err_bind_process:
328         mutex_unlock(&p->mutex);
329         return err;
330 }
331
332 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
333                                         void *data)
334 {
335         int retval;
336         struct kfd_ioctl_destroy_queue_args *args = data;
337
338         pr_debug("Destroying queue id %d for pasid %d\n",
339                                 args->queue_id,
340                                 p->pasid);
341
342         mutex_lock(&p->mutex);
343
344         retval = pqm_destroy_queue(&p->pqm, args->queue_id);
345
346         mutex_unlock(&p->mutex);
347         return retval;
348 }
349
350 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
351                                         void *data)
352 {
353         int retval;
354         struct kfd_ioctl_update_queue_args *args = data;
355         struct queue_properties properties;
356
357         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
358                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
359                 return -EINVAL;
360         }
361
362         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
363                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
364                 return -EINVAL;
365         }
366
367         if ((args->ring_base_address) &&
368                 (!access_ok(VERIFY_WRITE,
369                         (const void __user *) args->ring_base_address,
370                         sizeof(uint64_t)))) {
371                 pr_err("Can't access ring base address\n");
372                 return -EFAULT;
373         }
374
375         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
376                 pr_err("Ring size must be a power of 2 or 0\n");
377                 return -EINVAL;
378         }
379
380         properties.queue_address = args->ring_base_address;
381         properties.queue_size = args->ring_size;
382         properties.queue_percent = args->queue_percentage;
383         properties.priority = args->queue_priority;
384
385         pr_debug("Updating queue id %d for pasid %d\n",
386                         args->queue_id, p->pasid);
387
388         mutex_lock(&p->mutex);
389
390         retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
391
392         mutex_unlock(&p->mutex);
393
394         return retval;
395 }
396
397 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
398                                         void *data)
399 {
400         int retval;
401         const int max_num_cus = 1024;
402         struct kfd_ioctl_set_cu_mask_args *args = data;
403         struct queue_properties properties;
404         uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
405         size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
406
407         if ((args->num_cu_mask % 32) != 0) {
408                 pr_debug("num_cu_mask 0x%x must be a multiple of 32",
409                                 args->num_cu_mask);
410                 return -EINVAL;
411         }
412
413         properties.cu_mask_count = args->num_cu_mask;
414         if (properties.cu_mask_count == 0) {
415                 pr_debug("CU mask cannot be 0");
416                 return -EINVAL;
417         }
418
419         /* To prevent an unreasonably large CU mask size, set an arbitrary
420          * limit of max_num_cus bits.  We can then just drop any CU mask bits
421          * past max_num_cus bits and just use the first max_num_cus bits.
422          */
423         if (properties.cu_mask_count > max_num_cus) {
424                 pr_debug("CU mask cannot be greater than 1024 bits");
425                 properties.cu_mask_count = max_num_cus;
426                 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
427         }
428
429         properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
430         if (!properties.cu_mask)
431                 return -ENOMEM;
432
433         retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
434         if (retval) {
435                 pr_debug("Could not copy CU mask from userspace");
436                 kfree(properties.cu_mask);
437                 return -EFAULT;
438         }
439
440         mutex_lock(&p->mutex);
441
442         retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
443
444         mutex_unlock(&p->mutex);
445
446         if (retval)
447                 kfree(properties.cu_mask);
448
449         return retval;
450 }
451
452 static int kfd_ioctl_get_queue_wave_state(struct file *filep,
453                                           struct kfd_process *p, void *data)
454 {
455         struct kfd_ioctl_get_queue_wave_state_args *args = data;
456         int r;
457
458         mutex_lock(&p->mutex);
459
460         r = pqm_get_wave_state(&p->pqm, args->queue_id,
461                                (void __user *)args->ctl_stack_address,
462                                &args->ctl_stack_used_size,
463                                &args->save_area_used_size);
464
465         mutex_unlock(&p->mutex);
466
467         return r;
468 }
469
470 static int kfd_ioctl_set_memory_policy(struct file *filep,
471                                         struct kfd_process *p, void *data)
472 {
473         struct kfd_ioctl_set_memory_policy_args *args = data;
474         struct kfd_dev *dev;
475         int err = 0;
476         struct kfd_process_device *pdd;
477         enum cache_policy default_policy, alternate_policy;
478
479         if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
480             && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
481                 return -EINVAL;
482         }
483
484         if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
485             && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
486                 return -EINVAL;
487         }
488
489         dev = kfd_device_by_id(args->gpu_id);
490         if (!dev)
491                 return -EINVAL;
492
493         mutex_lock(&p->mutex);
494
495         pdd = kfd_bind_process_to_device(dev, p);
496         if (IS_ERR(pdd)) {
497                 err = -ESRCH;
498                 goto out;
499         }
500
501         default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
502                          ? cache_policy_coherent : cache_policy_noncoherent;
503
504         alternate_policy =
505                 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
506                    ? cache_policy_coherent : cache_policy_noncoherent;
507
508         if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
509                                 &pdd->qpd,
510                                 default_policy,
511                                 alternate_policy,
512                                 (void __user *)args->alternate_aperture_base,
513                                 args->alternate_aperture_size))
514                 err = -EINVAL;
515
516 out:
517         mutex_unlock(&p->mutex);
518
519         return err;
520 }
521
522 static int kfd_ioctl_set_trap_handler(struct file *filep,
523                                         struct kfd_process *p, void *data)
524 {
525         struct kfd_ioctl_set_trap_handler_args *args = data;
526         struct kfd_dev *dev;
527         int err = 0;
528         struct kfd_process_device *pdd;
529
530         dev = kfd_device_by_id(args->gpu_id);
531         if (dev == NULL)
532                 return -EINVAL;
533
534         mutex_lock(&p->mutex);
535
536         pdd = kfd_bind_process_to_device(dev, p);
537         if (IS_ERR(pdd)) {
538                 err = -ESRCH;
539                 goto out;
540         }
541
542         if (dev->dqm->ops.set_trap_handler(dev->dqm,
543                                         &pdd->qpd,
544                                         args->tba_addr,
545                                         args->tma_addr))
546                 err = -EINVAL;
547
548 out:
549         mutex_unlock(&p->mutex);
550
551         return err;
552 }
553
554 static int kfd_ioctl_dbg_register(struct file *filep,
555                                 struct kfd_process *p, void *data)
556 {
557         struct kfd_ioctl_dbg_register_args *args = data;
558         struct kfd_dev *dev;
559         struct kfd_dbgmgr *dbgmgr_ptr;
560         struct kfd_process_device *pdd;
561         bool create_ok;
562         long status = 0;
563
564         dev = kfd_device_by_id(args->gpu_id);
565         if (!dev)
566                 return -EINVAL;
567
568         if (dev->device_info->asic_family == CHIP_CARRIZO) {
569                 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
570                 return -EINVAL;
571         }
572
573         mutex_lock(&p->mutex);
574         mutex_lock(kfd_get_dbgmgr_mutex());
575
576         /*
577          * make sure that we have pdd, if this the first queue created for
578          * this process
579          */
580         pdd = kfd_bind_process_to_device(dev, p);
581         if (IS_ERR(pdd)) {
582                 status = PTR_ERR(pdd);
583                 goto out;
584         }
585
586         if (!dev->dbgmgr) {
587                 /* In case of a legal call, we have no dbgmgr yet */
588                 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
589                 if (create_ok) {
590                         status = kfd_dbgmgr_register(dbgmgr_ptr, p);
591                         if (status != 0)
592                                 kfd_dbgmgr_destroy(dbgmgr_ptr);
593                         else
594                                 dev->dbgmgr = dbgmgr_ptr;
595                 }
596         } else {
597                 pr_debug("debugger already registered\n");
598                 status = -EINVAL;
599         }
600
601 out:
602         mutex_unlock(kfd_get_dbgmgr_mutex());
603         mutex_unlock(&p->mutex);
604
605         return status;
606 }
607
608 static int kfd_ioctl_dbg_unregister(struct file *filep,
609                                 struct kfd_process *p, void *data)
610 {
611         struct kfd_ioctl_dbg_unregister_args *args = data;
612         struct kfd_dev *dev;
613         long status;
614
615         dev = kfd_device_by_id(args->gpu_id);
616         if (!dev || !dev->dbgmgr)
617                 return -EINVAL;
618
619         if (dev->device_info->asic_family == CHIP_CARRIZO) {
620                 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
621                 return -EINVAL;
622         }
623
624         mutex_lock(kfd_get_dbgmgr_mutex());
625
626         status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
627         if (!status) {
628                 kfd_dbgmgr_destroy(dev->dbgmgr);
629                 dev->dbgmgr = NULL;
630         }
631
632         mutex_unlock(kfd_get_dbgmgr_mutex());
633
634         return status;
635 }
636
637 /*
638  * Parse and generate variable size data structure for address watch.
639  * Total size of the buffer and # watch points is limited in order
640  * to prevent kernel abuse. (no bearing to the much smaller HW limitation
641  * which is enforced by dbgdev module)
642  * please also note that the watch address itself are not "copied from user",
643  * since it be set into the HW in user mode values.
644  *
645  */
646 static int kfd_ioctl_dbg_address_watch(struct file *filep,
647                                         struct kfd_process *p, void *data)
648 {
649         struct kfd_ioctl_dbg_address_watch_args *args = data;
650         struct kfd_dev *dev;
651         struct dbg_address_watch_info aw_info;
652         unsigned char *args_buff;
653         long status;
654         void __user *cmd_from_user;
655         uint64_t watch_mask_value = 0;
656         unsigned int args_idx = 0;
657
658         memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
659
660         dev = kfd_device_by_id(args->gpu_id);
661         if (!dev)
662                 return -EINVAL;
663
664         if (dev->device_info->asic_family == CHIP_CARRIZO) {
665                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
666                 return -EINVAL;
667         }
668
669         cmd_from_user = (void __user *) args->content_ptr;
670
671         /* Validate arguments */
672
673         if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
674                 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
675                 (cmd_from_user == NULL))
676                 return -EINVAL;
677
678         /* this is the actual buffer to work with */
679         args_buff = memdup_user(cmd_from_user,
680                                 args->buf_size_in_bytes - sizeof(*args));
681         if (IS_ERR(args_buff))
682                 return PTR_ERR(args_buff);
683
684         aw_info.process = p;
685
686         aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
687         args_idx += sizeof(aw_info.num_watch_points);
688
689         aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
690         args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
691
692         /*
693          * set watch address base pointer to point on the array base
694          * within args_buff
695          */
696         aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
697
698         /* skip over the addresses buffer */
699         args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
700
701         if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
702                 status = -EINVAL;
703                 goto out;
704         }
705
706         watch_mask_value = (uint64_t) args_buff[args_idx];
707
708         if (watch_mask_value > 0) {
709                 /*
710                  * There is an array of masks.
711                  * set watch mask base pointer to point on the array base
712                  * within args_buff
713                  */
714                 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
715
716                 /* skip over the masks buffer */
717                 args_idx += sizeof(aw_info.watch_mask) *
718                                 aw_info.num_watch_points;
719         } else {
720                 /* just the NULL mask, set to NULL and skip over it */
721                 aw_info.watch_mask = NULL;
722                 args_idx += sizeof(aw_info.watch_mask);
723         }
724
725         if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
726                 status = -EINVAL;
727                 goto out;
728         }
729
730         /* Currently HSA Event is not supported for DBG */
731         aw_info.watch_event = NULL;
732
733         mutex_lock(kfd_get_dbgmgr_mutex());
734
735         status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
736
737         mutex_unlock(kfd_get_dbgmgr_mutex());
738
739 out:
740         kfree(args_buff);
741
742         return status;
743 }
744
745 /* Parse and generate fixed size data structure for wave control */
746 static int kfd_ioctl_dbg_wave_control(struct file *filep,
747                                         struct kfd_process *p, void *data)
748 {
749         struct kfd_ioctl_dbg_wave_control_args *args = data;
750         struct kfd_dev *dev;
751         struct dbg_wave_control_info wac_info;
752         unsigned char *args_buff;
753         uint32_t computed_buff_size;
754         long status;
755         void __user *cmd_from_user;
756         unsigned int args_idx = 0;
757
758         memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
759
760         /* we use compact form, independent of the packing attribute value */
761         computed_buff_size = sizeof(*args) +
762                                 sizeof(wac_info.mode) +
763                                 sizeof(wac_info.operand) +
764                                 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
765                                 sizeof(wac_info.dbgWave_msg.MemoryVA) +
766                                 sizeof(wac_info.trapId);
767
768         dev = kfd_device_by_id(args->gpu_id);
769         if (!dev)
770                 return -EINVAL;
771
772         if (dev->device_info->asic_family == CHIP_CARRIZO) {
773                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
774                 return -EINVAL;
775         }
776
777         /* input size must match the computed "compact" size */
778         if (args->buf_size_in_bytes != computed_buff_size) {
779                 pr_debug("size mismatch, computed : actual %u : %u\n",
780                                 args->buf_size_in_bytes, computed_buff_size);
781                 return -EINVAL;
782         }
783
784         cmd_from_user = (void __user *) args->content_ptr;
785
786         if (cmd_from_user == NULL)
787                 return -EINVAL;
788
789         /* copy the entire buffer from user */
790
791         args_buff = memdup_user(cmd_from_user,
792                                 args->buf_size_in_bytes - sizeof(*args));
793         if (IS_ERR(args_buff))
794                 return PTR_ERR(args_buff);
795
796         /* move ptr to the start of the "pay-load" area */
797         wac_info.process = p;
798
799         wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
800         args_idx += sizeof(wac_info.operand);
801
802         wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
803         args_idx += sizeof(wac_info.mode);
804
805         wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
806         args_idx += sizeof(wac_info.trapId);
807
808         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
809                                         *((uint32_t *)(&args_buff[args_idx]));
810         wac_info.dbgWave_msg.MemoryVA = NULL;
811
812         mutex_lock(kfd_get_dbgmgr_mutex());
813
814         pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
815                         wac_info.process, wac_info.operand,
816                         wac_info.mode, wac_info.trapId,
817                         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
818
819         status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
820
821         pr_debug("Returned status of dbg manager is %ld\n", status);
822
823         mutex_unlock(kfd_get_dbgmgr_mutex());
824
825         kfree(args_buff);
826
827         return status;
828 }
829
830 static int kfd_ioctl_get_clock_counters(struct file *filep,
831                                 struct kfd_process *p, void *data)
832 {
833         struct kfd_ioctl_get_clock_counters_args *args = data;
834         struct kfd_dev *dev;
835
836         dev = kfd_device_by_id(args->gpu_id);
837         if (dev)
838                 /* Reading GPU clock counter from KGD */
839                 args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
840         else
841                 /* Node without GPU resource */
842                 args->gpu_clock_counter = 0;
843
844         /* No access to rdtsc. Using raw monotonic time */
845         args->cpu_clock_counter = ktime_get_raw_ns();
846         args->system_clock_counter = ktime_get_boot_ns();
847
848         /* Since the counter is in nano-seconds we use 1GHz frequency */
849         args->system_clock_freq = 1000000000;
850
851         return 0;
852 }
853
854
855 static int kfd_ioctl_get_process_apertures(struct file *filp,
856                                 struct kfd_process *p, void *data)
857 {
858         struct kfd_ioctl_get_process_apertures_args *args = data;
859         struct kfd_process_device_apertures *pAperture;
860         struct kfd_process_device *pdd;
861
862         dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
863
864         args->num_of_nodes = 0;
865
866         mutex_lock(&p->mutex);
867
868         /*if the process-device list isn't empty*/
869         if (kfd_has_process_device_data(p)) {
870                 /* Run over all pdd of the process */
871                 pdd = kfd_get_first_process_device_data(p);
872                 do {
873                         pAperture =
874                                 &args->process_apertures[args->num_of_nodes];
875                         pAperture->gpu_id = pdd->dev->id;
876                         pAperture->lds_base = pdd->lds_base;
877                         pAperture->lds_limit = pdd->lds_limit;
878                         pAperture->gpuvm_base = pdd->gpuvm_base;
879                         pAperture->gpuvm_limit = pdd->gpuvm_limit;
880                         pAperture->scratch_base = pdd->scratch_base;
881                         pAperture->scratch_limit = pdd->scratch_limit;
882
883                         dev_dbg(kfd_device,
884                                 "node id %u\n", args->num_of_nodes);
885                         dev_dbg(kfd_device,
886                                 "gpu id %u\n", pdd->dev->id);
887                         dev_dbg(kfd_device,
888                                 "lds_base %llX\n", pdd->lds_base);
889                         dev_dbg(kfd_device,
890                                 "lds_limit %llX\n", pdd->lds_limit);
891                         dev_dbg(kfd_device,
892                                 "gpuvm_base %llX\n", pdd->gpuvm_base);
893                         dev_dbg(kfd_device,
894                                 "gpuvm_limit %llX\n", pdd->gpuvm_limit);
895                         dev_dbg(kfd_device,
896                                 "scratch_base %llX\n", pdd->scratch_base);
897                         dev_dbg(kfd_device,
898                                 "scratch_limit %llX\n", pdd->scratch_limit);
899
900                         args->num_of_nodes++;
901
902                         pdd = kfd_get_next_process_device_data(p, pdd);
903                 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
904         }
905
906         mutex_unlock(&p->mutex);
907
908         return 0;
909 }
910
911 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
912                                 struct kfd_process *p, void *data)
913 {
914         struct kfd_ioctl_get_process_apertures_new_args *args = data;
915         struct kfd_process_device_apertures *pa;
916         struct kfd_process_device *pdd;
917         uint32_t nodes = 0;
918         int ret;
919
920         dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
921
922         if (args->num_of_nodes == 0) {
923                 /* Return number of nodes, so that user space can alloacate
924                  * sufficient memory
925                  */
926                 mutex_lock(&p->mutex);
927
928                 if (!kfd_has_process_device_data(p))
929                         goto out_unlock;
930
931                 /* Run over all pdd of the process */
932                 pdd = kfd_get_first_process_device_data(p);
933                 do {
934                         args->num_of_nodes++;
935                         pdd = kfd_get_next_process_device_data(p, pdd);
936                 } while (pdd);
937
938                 goto out_unlock;
939         }
940
941         /* Fill in process-aperture information for all available
942          * nodes, but not more than args->num_of_nodes as that is
943          * the amount of memory allocated by user
944          */
945         pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
946                                 args->num_of_nodes), GFP_KERNEL);
947         if (!pa)
948                 return -ENOMEM;
949
950         mutex_lock(&p->mutex);
951
952         if (!kfd_has_process_device_data(p)) {
953                 args->num_of_nodes = 0;
954                 kfree(pa);
955                 goto out_unlock;
956         }
957
958         /* Run over all pdd of the process */
959         pdd = kfd_get_first_process_device_data(p);
960         do {
961                 pa[nodes].gpu_id = pdd->dev->id;
962                 pa[nodes].lds_base = pdd->lds_base;
963                 pa[nodes].lds_limit = pdd->lds_limit;
964                 pa[nodes].gpuvm_base = pdd->gpuvm_base;
965                 pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
966                 pa[nodes].scratch_base = pdd->scratch_base;
967                 pa[nodes].scratch_limit = pdd->scratch_limit;
968
969                 dev_dbg(kfd_device,
970                         "gpu id %u\n", pdd->dev->id);
971                 dev_dbg(kfd_device,
972                         "lds_base %llX\n", pdd->lds_base);
973                 dev_dbg(kfd_device,
974                         "lds_limit %llX\n", pdd->lds_limit);
975                 dev_dbg(kfd_device,
976                         "gpuvm_base %llX\n", pdd->gpuvm_base);
977                 dev_dbg(kfd_device,
978                         "gpuvm_limit %llX\n", pdd->gpuvm_limit);
979                 dev_dbg(kfd_device,
980                         "scratch_base %llX\n", pdd->scratch_base);
981                 dev_dbg(kfd_device,
982                         "scratch_limit %llX\n", pdd->scratch_limit);
983                 nodes++;
984
985                 pdd = kfd_get_next_process_device_data(p, pdd);
986         } while (pdd && (nodes < args->num_of_nodes));
987         mutex_unlock(&p->mutex);
988
989         args->num_of_nodes = nodes;
990         ret = copy_to_user(
991                         (void __user *)args->kfd_process_device_apertures_ptr,
992                         pa,
993                         (nodes * sizeof(struct kfd_process_device_apertures)));
994         kfree(pa);
995         return ret ? -EFAULT : 0;
996
997 out_unlock:
998         mutex_unlock(&p->mutex);
999         return 0;
1000 }
1001
1002 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
1003                                         void *data)
1004 {
1005         struct kfd_ioctl_create_event_args *args = data;
1006         int err;
1007
1008         /* For dGPUs the event page is allocated in user mode. The
1009          * handle is passed to KFD with the first call to this IOCTL
1010          * through the event_page_offset field.
1011          */
1012         if (args->event_page_offset) {
1013                 struct kfd_dev *kfd;
1014                 struct kfd_process_device *pdd;
1015                 void *mem, *kern_addr;
1016                 uint64_t size;
1017
1018                 if (p->signal_page) {
1019                         pr_err("Event page is already set\n");
1020                         return -EINVAL;
1021                 }
1022
1023                 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
1024                 if (!kfd) {
1025                         pr_err("Getting device by id failed in %s\n", __func__);
1026                         return -EINVAL;
1027                 }
1028
1029                 mutex_lock(&p->mutex);
1030                 pdd = kfd_bind_process_to_device(kfd, p);
1031                 if (IS_ERR(pdd)) {
1032                         err = PTR_ERR(pdd);
1033                         goto out_unlock;
1034                 }
1035
1036                 mem = kfd_process_device_translate_handle(pdd,
1037                                 GET_IDR_HANDLE(args->event_page_offset));
1038                 if (!mem) {
1039                         pr_err("Can't find BO, offset is 0x%llx\n",
1040                                args->event_page_offset);
1041                         err = -EINVAL;
1042                         goto out_unlock;
1043                 }
1044                 mutex_unlock(&p->mutex);
1045
1046                 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
1047                                                 mem, &kern_addr, &size);
1048                 if (err) {
1049                         pr_err("Failed to map event page to kernel\n");
1050                         return err;
1051                 }
1052
1053                 err = kfd_event_page_set(p, kern_addr, size);
1054                 if (err) {
1055                         pr_err("Failed to set event page\n");
1056                         return err;
1057                 }
1058         }
1059
1060         err = kfd_event_create(filp, p, args->event_type,
1061                                 args->auto_reset != 0, args->node_id,
1062                                 &args->event_id, &args->event_trigger_data,
1063                                 &args->event_page_offset,
1064                                 &args->event_slot_index);
1065
1066         return err;
1067
1068 out_unlock:
1069         mutex_unlock(&p->mutex);
1070         return err;
1071 }
1072
1073 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1074                                         void *data)
1075 {
1076         struct kfd_ioctl_destroy_event_args *args = data;
1077
1078         return kfd_event_destroy(p, args->event_id);
1079 }
1080
1081 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1082                                 void *data)
1083 {
1084         struct kfd_ioctl_set_event_args *args = data;
1085
1086         return kfd_set_event(p, args->event_id);
1087 }
1088
1089 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1090                                 void *data)
1091 {
1092         struct kfd_ioctl_reset_event_args *args = data;
1093
1094         return kfd_reset_event(p, args->event_id);
1095 }
1096
1097 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1098                                 void *data)
1099 {
1100         struct kfd_ioctl_wait_events_args *args = data;
1101         int err;
1102
1103         err = kfd_wait_on_events(p, args->num_events,
1104                         (void __user *)args->events_ptr,
1105                         (args->wait_for_all != 0),
1106                         args->timeout, &args->wait_result);
1107
1108         return err;
1109 }
1110 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1111                                         struct kfd_process *p, void *data)
1112 {
1113         struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1114         struct kfd_process_device *pdd;
1115         struct kfd_dev *dev;
1116         long err;
1117
1118         dev = kfd_device_by_id(args->gpu_id);
1119         if (!dev)
1120                 return -EINVAL;
1121
1122         mutex_lock(&p->mutex);
1123
1124         pdd = kfd_bind_process_to_device(dev, p);
1125         if (IS_ERR(pdd)) {
1126                 err = PTR_ERR(pdd);
1127                 goto bind_process_to_device_fail;
1128         }
1129
1130         pdd->qpd.sh_hidden_private_base = args->va_addr;
1131
1132         mutex_unlock(&p->mutex);
1133
1134         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1135             pdd->qpd.vmid != 0)
1136                 dev->kfd2kgd->set_scratch_backing_va(
1137                         dev->kgd, args->va_addr, pdd->qpd.vmid);
1138
1139         return 0;
1140
1141 bind_process_to_device_fail:
1142         mutex_unlock(&p->mutex);
1143         return err;
1144 }
1145
1146 static int kfd_ioctl_get_tile_config(struct file *filep,
1147                 struct kfd_process *p, void *data)
1148 {
1149         struct kfd_ioctl_get_tile_config_args *args = data;
1150         struct kfd_dev *dev;
1151         struct tile_config config;
1152         int err = 0;
1153
1154         dev = kfd_device_by_id(args->gpu_id);
1155         if (!dev)
1156                 return -EINVAL;
1157
1158         dev->kfd2kgd->get_tile_config(dev->kgd, &config);
1159
1160         args->gb_addr_config = config.gb_addr_config;
1161         args->num_banks = config.num_banks;
1162         args->num_ranks = config.num_ranks;
1163
1164         if (args->num_tile_configs > config.num_tile_configs)
1165                 args->num_tile_configs = config.num_tile_configs;
1166         err = copy_to_user((void __user *)args->tile_config_ptr,
1167                         config.tile_config_ptr,
1168                         args->num_tile_configs * sizeof(uint32_t));
1169         if (err) {
1170                 args->num_tile_configs = 0;
1171                 return -EFAULT;
1172         }
1173
1174         if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1175                 args->num_macro_tile_configs =
1176                                 config.num_macro_tile_configs;
1177         err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1178                         config.macro_tile_config_ptr,
1179                         args->num_macro_tile_configs * sizeof(uint32_t));
1180         if (err) {
1181                 args->num_macro_tile_configs = 0;
1182                 return -EFAULT;
1183         }
1184
1185         return 0;
1186 }
1187
1188 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1189                                 void *data)
1190 {
1191         struct kfd_ioctl_acquire_vm_args *args = data;
1192         struct kfd_process_device *pdd;
1193         struct kfd_dev *dev;
1194         struct file *drm_file;
1195         int ret;
1196
1197         dev = kfd_device_by_id(args->gpu_id);
1198         if (!dev)
1199                 return -EINVAL;
1200
1201         drm_file = fget(args->drm_fd);
1202         if (!drm_file)
1203                 return -EINVAL;
1204
1205         mutex_lock(&p->mutex);
1206
1207         pdd = kfd_get_process_device_data(dev, p);
1208         if (!pdd) {
1209                 ret = -EINVAL;
1210                 goto err_unlock;
1211         }
1212
1213         if (pdd->drm_file) {
1214                 ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1215                 goto err_unlock;
1216         }
1217
1218         ret = kfd_process_device_init_vm(pdd, drm_file);
1219         if (ret)
1220                 goto err_unlock;
1221         /* On success, the PDD keeps the drm_file reference */
1222         mutex_unlock(&p->mutex);
1223
1224         return 0;
1225
1226 err_unlock:
1227         mutex_unlock(&p->mutex);
1228         fput(drm_file);
1229         return ret;
1230 }
1231
1232 bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1233 {
1234         struct kfd_local_mem_info mem_info;
1235
1236         if (debug_largebar) {
1237                 pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1238                 return true;
1239         }
1240
1241         if (dev->device_info->needs_iommu_device)
1242                 return false;
1243
1244         amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
1245         if (mem_info.local_mem_size_private == 0 &&
1246                         mem_info.local_mem_size_public > 0)
1247                 return true;
1248         return false;
1249 }
1250
1251 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1252                                         struct kfd_process *p, void *data)
1253 {
1254         struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1255         struct kfd_process_device *pdd;
1256         void *mem;
1257         struct kfd_dev *dev;
1258         int idr_handle;
1259         long err;
1260         uint64_t offset = args->mmap_offset;
1261         uint32_t flags = args->flags;
1262
1263         if (args->size == 0)
1264                 return -EINVAL;
1265
1266         dev = kfd_device_by_id(args->gpu_id);
1267         if (!dev)
1268                 return -EINVAL;
1269
1270         if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1271                 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1272                 !kfd_dev_is_large_bar(dev)) {
1273                 pr_err("Alloc host visible vram on small bar is not allowed\n");
1274                 return -EINVAL;
1275         }
1276
1277         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1278                 if (args->size != kfd_doorbell_process_slice(dev))
1279                         return -EINVAL;
1280                 offset = kfd_get_process_doorbells(dev, p);
1281         }
1282
1283         mutex_lock(&p->mutex);
1284
1285         pdd = kfd_bind_process_to_device(dev, p);
1286         if (IS_ERR(pdd)) {
1287                 err = PTR_ERR(pdd);
1288                 goto err_unlock;
1289         }
1290
1291         err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1292                 dev->kgd, args->va_addr, args->size,
1293                 pdd->vm, (struct kgd_mem **) &mem, &offset,
1294                 flags);
1295
1296         if (err)
1297                 goto err_unlock;
1298
1299         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1300         if (idr_handle < 0) {
1301                 err = -EFAULT;
1302                 goto err_free;
1303         }
1304
1305         mutex_unlock(&p->mutex);
1306
1307         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1308         args->mmap_offset = offset;
1309
1310         return 0;
1311
1312 err_free:
1313         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1314 err_unlock:
1315         mutex_unlock(&p->mutex);
1316         return err;
1317 }
1318
1319 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1320                                         struct kfd_process *p, void *data)
1321 {
1322         struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1323         struct kfd_process_device *pdd;
1324         void *mem;
1325         struct kfd_dev *dev;
1326         int ret;
1327
1328         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1329         if (!dev)
1330                 return -EINVAL;
1331
1332         mutex_lock(&p->mutex);
1333
1334         pdd = kfd_get_process_device_data(dev, p);
1335         if (!pdd) {
1336                 pr_err("Process device data doesn't exist\n");
1337                 ret = -EINVAL;
1338                 goto err_unlock;
1339         }
1340
1341         mem = kfd_process_device_translate_handle(
1342                 pdd, GET_IDR_HANDLE(args->handle));
1343         if (!mem) {
1344                 ret = -EINVAL;
1345                 goto err_unlock;
1346         }
1347
1348         ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
1349                                                 (struct kgd_mem *)mem);
1350
1351         /* If freeing the buffer failed, leave the handle in place for
1352          * clean-up during process tear-down.
1353          */
1354         if (!ret)
1355                 kfd_process_device_remove_obj_handle(
1356                         pdd, GET_IDR_HANDLE(args->handle));
1357
1358 err_unlock:
1359         mutex_unlock(&p->mutex);
1360         return ret;
1361 }
1362
1363 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1364                                         struct kfd_process *p, void *data)
1365 {
1366         struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1367         struct kfd_process_device *pdd, *peer_pdd;
1368         void *mem;
1369         struct kfd_dev *dev, *peer;
1370         long err = 0;
1371         int i;
1372         uint32_t *devices_arr = NULL;
1373
1374         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1375         if (!dev)
1376                 return -EINVAL;
1377
1378         if (!args->n_devices) {
1379                 pr_debug("Device IDs array empty\n");
1380                 return -EINVAL;
1381         }
1382         if (args->n_success > args->n_devices) {
1383                 pr_debug("n_success exceeds n_devices\n");
1384                 return -EINVAL;
1385         }
1386
1387         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1388                                     GFP_KERNEL);
1389         if (!devices_arr)
1390                 return -ENOMEM;
1391
1392         err = copy_from_user(devices_arr,
1393                              (void __user *)args->device_ids_array_ptr,
1394                              args->n_devices * sizeof(*devices_arr));
1395         if (err != 0) {
1396                 err = -EFAULT;
1397                 goto copy_from_user_failed;
1398         }
1399
1400         mutex_lock(&p->mutex);
1401
1402         pdd = kfd_bind_process_to_device(dev, p);
1403         if (IS_ERR(pdd)) {
1404                 err = PTR_ERR(pdd);
1405                 goto bind_process_to_device_failed;
1406         }
1407
1408         mem = kfd_process_device_translate_handle(pdd,
1409                                                 GET_IDR_HANDLE(args->handle));
1410         if (!mem) {
1411                 err = -ENOMEM;
1412                 goto get_mem_obj_from_handle_failed;
1413         }
1414
1415         for (i = args->n_success; i < args->n_devices; i++) {
1416                 peer = kfd_device_by_id(devices_arr[i]);
1417                 if (!peer) {
1418                         pr_debug("Getting device by id failed for 0x%x\n",
1419                                  devices_arr[i]);
1420                         err = -EINVAL;
1421                         goto get_mem_obj_from_handle_failed;
1422                 }
1423
1424                 peer_pdd = kfd_bind_process_to_device(peer, p);
1425                 if (IS_ERR(peer_pdd)) {
1426                         err = PTR_ERR(peer_pdd);
1427                         goto get_mem_obj_from_handle_failed;
1428                 }
1429                 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1430                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1431                 if (err) {
1432                         pr_err("Failed to map to gpu %d/%d\n",
1433                                i, args->n_devices);
1434                         goto map_memory_to_gpu_failed;
1435                 }
1436                 args->n_success = i+1;
1437         }
1438
1439         mutex_unlock(&p->mutex);
1440
1441         err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1442         if (err) {
1443                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
1444                 goto sync_memory_failed;
1445         }
1446
1447         /* Flush TLBs after waiting for the page table updates to complete */
1448         for (i = 0; i < args->n_devices; i++) {
1449                 peer = kfd_device_by_id(devices_arr[i]);
1450                 if (WARN_ON_ONCE(!peer))
1451                         continue;
1452                 peer_pdd = kfd_get_process_device_data(peer, p);
1453                 if (WARN_ON_ONCE(!peer_pdd))
1454                         continue;
1455                 kfd_flush_tlb(peer_pdd);
1456         }
1457
1458         kfree(devices_arr);
1459
1460         return err;
1461
1462 bind_process_to_device_failed:
1463 get_mem_obj_from_handle_failed:
1464 map_memory_to_gpu_failed:
1465         mutex_unlock(&p->mutex);
1466 copy_from_user_failed:
1467 sync_memory_failed:
1468         kfree(devices_arr);
1469
1470         return err;
1471 }
1472
1473 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1474                                         struct kfd_process *p, void *data)
1475 {
1476         struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1477         struct kfd_process_device *pdd, *peer_pdd;
1478         void *mem;
1479         struct kfd_dev *dev, *peer;
1480         long err = 0;
1481         uint32_t *devices_arr = NULL, i;
1482
1483         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1484         if (!dev)
1485                 return -EINVAL;
1486
1487         if (!args->n_devices) {
1488                 pr_debug("Device IDs array empty\n");
1489                 return -EINVAL;
1490         }
1491         if (args->n_success > args->n_devices) {
1492                 pr_debug("n_success exceeds n_devices\n");
1493                 return -EINVAL;
1494         }
1495
1496         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1497                                     GFP_KERNEL);
1498         if (!devices_arr)
1499                 return -ENOMEM;
1500
1501         err = copy_from_user(devices_arr,
1502                              (void __user *)args->device_ids_array_ptr,
1503                              args->n_devices * sizeof(*devices_arr));
1504         if (err != 0) {
1505                 err = -EFAULT;
1506                 goto copy_from_user_failed;
1507         }
1508
1509         mutex_lock(&p->mutex);
1510
1511         pdd = kfd_get_process_device_data(dev, p);
1512         if (!pdd) {
1513                 err = -EINVAL;
1514                 goto bind_process_to_device_failed;
1515         }
1516
1517         mem = kfd_process_device_translate_handle(pdd,
1518                                                 GET_IDR_HANDLE(args->handle));
1519         if (!mem) {
1520                 err = -ENOMEM;
1521                 goto get_mem_obj_from_handle_failed;
1522         }
1523
1524         for (i = args->n_success; i < args->n_devices; i++) {
1525                 peer = kfd_device_by_id(devices_arr[i]);
1526                 if (!peer) {
1527                         err = -EINVAL;
1528                         goto get_mem_obj_from_handle_failed;
1529                 }
1530
1531                 peer_pdd = kfd_get_process_device_data(peer, p);
1532                 if (!peer_pdd) {
1533                         err = -ENODEV;
1534                         goto get_mem_obj_from_handle_failed;
1535                 }
1536                 err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1537                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1538                 if (err) {
1539                         pr_err("Failed to unmap from gpu %d/%d\n",
1540                                i, args->n_devices);
1541                         goto unmap_memory_from_gpu_failed;
1542                 }
1543                 args->n_success = i+1;
1544         }
1545         kfree(devices_arr);
1546
1547         mutex_unlock(&p->mutex);
1548
1549         return 0;
1550
1551 bind_process_to_device_failed:
1552 get_mem_obj_from_handle_failed:
1553 unmap_memory_from_gpu_failed:
1554         mutex_unlock(&p->mutex);
1555 copy_from_user_failed:
1556         kfree(devices_arr);
1557         return err;
1558 }
1559
1560 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1561                 struct kfd_process *p, void *data)
1562 {
1563         struct kfd_ioctl_get_dmabuf_info_args *args = data;
1564         struct kfd_dev *dev = NULL;
1565         struct kgd_dev *dma_buf_kgd;
1566         void *metadata_buffer = NULL;
1567         uint32_t flags;
1568         unsigned int i;
1569         int r;
1570
1571         /* Find a KFD GPU device that supports the get_dmabuf_info query */
1572         for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1573                 if (dev)
1574                         break;
1575         if (!dev)
1576                 return -EINVAL;
1577
1578         if (args->metadata_ptr) {
1579                 metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1580                 if (!metadata_buffer)
1581                         return -ENOMEM;
1582         }
1583
1584         /* Get dmabuf info from KGD */
1585         r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
1586                                           &dma_buf_kgd, &args->size,
1587                                           metadata_buffer, args->metadata_size,
1588                                           &args->metadata_size, &flags);
1589         if (r)
1590                 goto exit;
1591
1592         /* Reverse-lookup gpu_id from kgd pointer */
1593         dev = kfd_device_by_kgd(dma_buf_kgd);
1594         if (!dev) {
1595                 r = -EINVAL;
1596                 goto exit;
1597         }
1598         args->gpu_id = dev->id;
1599         args->flags = flags;
1600
1601         /* Copy metadata buffer to user mode */
1602         if (metadata_buffer) {
1603                 r = copy_to_user((void __user *)args->metadata_ptr,
1604                                  metadata_buffer, args->metadata_size);
1605                 if (r != 0)
1606                         r = -EFAULT;
1607         }
1608
1609 exit:
1610         kfree(metadata_buffer);
1611
1612         return r;
1613 }
1614
1615 static int kfd_ioctl_import_dmabuf(struct file *filep,
1616                                    struct kfd_process *p, void *data)
1617 {
1618         struct kfd_ioctl_import_dmabuf_args *args = data;
1619         struct kfd_process_device *pdd;
1620         struct dma_buf *dmabuf;
1621         struct kfd_dev *dev;
1622         int idr_handle;
1623         uint64_t size;
1624         void *mem;
1625         int r;
1626
1627         dev = kfd_device_by_id(args->gpu_id);
1628         if (!dev)
1629                 return -EINVAL;
1630
1631         dmabuf = dma_buf_get(args->dmabuf_fd);
1632         if (IS_ERR(dmabuf))
1633                 return PTR_ERR(dmabuf);
1634
1635         mutex_lock(&p->mutex);
1636
1637         pdd = kfd_bind_process_to_device(dev, p);
1638         if (IS_ERR(pdd)) {
1639                 r = PTR_ERR(pdd);
1640                 goto err_unlock;
1641         }
1642
1643         r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
1644                                               args->va_addr, pdd->vm,
1645                                               (struct kgd_mem **)&mem, &size,
1646                                               NULL);
1647         if (r)
1648                 goto err_unlock;
1649
1650         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1651         if (idr_handle < 0) {
1652                 r = -EFAULT;
1653                 goto err_free;
1654         }
1655
1656         mutex_unlock(&p->mutex);
1657
1658         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1659
1660         return 0;
1661
1662 err_free:
1663         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1664 err_unlock:
1665         mutex_unlock(&p->mutex);
1666         return r;
1667 }
1668
1669 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1670         [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1671                             .cmd_drv = 0, .name = #ioctl}
1672
1673 /** Ioctl table */
1674 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1675         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1676                         kfd_ioctl_get_version, 0),
1677
1678         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1679                         kfd_ioctl_create_queue, 0),
1680
1681         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1682                         kfd_ioctl_destroy_queue, 0),
1683
1684         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1685                         kfd_ioctl_set_memory_policy, 0),
1686
1687         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1688                         kfd_ioctl_get_clock_counters, 0),
1689
1690         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1691                         kfd_ioctl_get_process_apertures, 0),
1692
1693         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1694                         kfd_ioctl_update_queue, 0),
1695
1696         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1697                         kfd_ioctl_create_event, 0),
1698
1699         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1700                         kfd_ioctl_destroy_event, 0),
1701
1702         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1703                         kfd_ioctl_set_event, 0),
1704
1705         AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1706                         kfd_ioctl_reset_event, 0),
1707
1708         AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1709                         kfd_ioctl_wait_events, 0),
1710
1711         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1712                         kfd_ioctl_dbg_register, 0),
1713
1714         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1715                         kfd_ioctl_dbg_unregister, 0),
1716
1717         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1718                         kfd_ioctl_dbg_address_watch, 0),
1719
1720         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1721                         kfd_ioctl_dbg_wave_control, 0),
1722
1723         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1724                         kfd_ioctl_set_scratch_backing_va, 0),
1725
1726         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1727                         kfd_ioctl_get_tile_config, 0),
1728
1729         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1730                         kfd_ioctl_set_trap_handler, 0),
1731
1732         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1733                         kfd_ioctl_get_process_apertures_new, 0),
1734
1735         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1736                         kfd_ioctl_acquire_vm, 0),
1737
1738         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1739                         kfd_ioctl_alloc_memory_of_gpu, 0),
1740
1741         AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1742                         kfd_ioctl_free_memory_of_gpu, 0),
1743
1744         AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1745                         kfd_ioctl_map_memory_to_gpu, 0),
1746
1747         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1748                         kfd_ioctl_unmap_memory_from_gpu, 0),
1749
1750         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1751                         kfd_ioctl_set_cu_mask, 0),
1752
1753         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
1754                         kfd_ioctl_get_queue_wave_state, 0),
1755
1756         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
1757                                 kfd_ioctl_get_dmabuf_info, 0),
1758
1759         AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
1760                                 kfd_ioctl_import_dmabuf, 0),
1761
1762 };
1763
1764 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
1765
1766 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1767 {
1768         struct kfd_process *process;
1769         amdkfd_ioctl_t *func;
1770         const struct amdkfd_ioctl_desc *ioctl = NULL;
1771         unsigned int nr = _IOC_NR(cmd);
1772         char stack_kdata[128];
1773         char *kdata = NULL;
1774         unsigned int usize, asize;
1775         int retcode = -EINVAL;
1776
1777         if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1778                 goto err_i1;
1779
1780         if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1781                 u32 amdkfd_size;
1782
1783                 ioctl = &amdkfd_ioctls[nr];
1784
1785                 amdkfd_size = _IOC_SIZE(ioctl->cmd);
1786                 usize = asize = _IOC_SIZE(cmd);
1787                 if (amdkfd_size > asize)
1788                         asize = amdkfd_size;
1789
1790                 cmd = ioctl->cmd;
1791         } else
1792                 goto err_i1;
1793
1794         dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
1795
1796         process = kfd_get_process(current);
1797         if (IS_ERR(process)) {
1798                 dev_dbg(kfd_device, "no process\n");
1799                 goto err_i1;
1800         }
1801
1802         /* Do not trust userspace, use our own definition */
1803         func = ioctl->func;
1804
1805         if (unlikely(!func)) {
1806                 dev_dbg(kfd_device, "no function\n");
1807                 retcode = -EINVAL;
1808                 goto err_i1;
1809         }
1810
1811         if (cmd & (IOC_IN | IOC_OUT)) {
1812                 if (asize <= sizeof(stack_kdata)) {
1813                         kdata = stack_kdata;
1814                 } else {
1815                         kdata = kmalloc(asize, GFP_KERNEL);
1816                         if (!kdata) {
1817                                 retcode = -ENOMEM;
1818                                 goto err_i1;
1819                         }
1820                 }
1821                 if (asize > usize)
1822                         memset(kdata + usize, 0, asize - usize);
1823         }
1824
1825         if (cmd & IOC_IN) {
1826                 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1827                         retcode = -EFAULT;
1828                         goto err_i1;
1829                 }
1830         } else if (cmd & IOC_OUT) {
1831                 memset(kdata, 0, usize);
1832         }
1833
1834         retcode = func(filep, process, kdata);
1835
1836         if (cmd & IOC_OUT)
1837                 if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1838                         retcode = -EFAULT;
1839
1840 err_i1:
1841         if (!ioctl)
1842                 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1843                           task_pid_nr(current), cmd, nr);
1844
1845         if (kdata != stack_kdata)
1846                 kfree(kdata);
1847
1848         if (retcode)
1849                 dev_dbg(kfd_device, "ret = %d\n", retcode);
1850
1851         return retcode;
1852 }
1853
1854 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1855 {
1856         struct kfd_process *process;
1857         struct kfd_dev *dev = NULL;
1858         unsigned long vm_pgoff;
1859         unsigned int gpu_id;
1860
1861         process = kfd_get_process(current);
1862         if (IS_ERR(process))
1863                 return PTR_ERR(process);
1864
1865         vm_pgoff = vma->vm_pgoff;
1866         vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
1867         gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
1868         if (gpu_id)
1869                 dev = kfd_device_by_id(gpu_id);
1870
1871         switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
1872         case KFD_MMAP_TYPE_DOORBELL:
1873                 if (!dev)
1874                         return -ENODEV;
1875                 return kfd_doorbell_mmap(dev, process, vma);
1876
1877         case KFD_MMAP_TYPE_EVENTS:
1878                 return kfd_event_mmap(process, vma);
1879
1880         case KFD_MMAP_TYPE_RESERVED_MEM:
1881                 if (!dev)
1882                         return -ENODEV;
1883                 return kfd_reserved_mem_mmap(dev, process, vma);
1884         }
1885
1886         return -EFAULT;
1887 }