Merge tag 'defconfig-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdkfd / kfd_chardev.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/device.h>
24 #include <linux/export.h>
25 #include <linux/err.h>
26 #include <linux/fs.h>
27 #include <linux/file.h>
28 #include <linux/sched.h>
29 #include <linux/slab.h>
30 #include <linux/uaccess.h>
31 #include <linux/compat.h>
32 #include <uapi/linux/kfd_ioctl.h>
33 #include <linux/time.h>
34 #include <linux/mm.h>
35 #include <linux/mman.h>
36 #include <linux/dma-buf.h>
37 #include <asm/processor.h>
38 #include "kfd_priv.h"
39 #include "kfd_device_queue_manager.h"
40 #include "kfd_dbgmgr.h"
41 #include "kfd_svm.h"
42 #include "amdgpu_amdkfd.h"
43 #include "kfd_smi_events.h"
44
45 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
46 static int kfd_open(struct inode *, struct file *);
47 static int kfd_release(struct inode *, struct file *);
48 static int kfd_mmap(struct file *, struct vm_area_struct *);
49
50 static const char kfd_dev_name[] = "kfd";
51
52 static const struct file_operations kfd_fops = {
53         .owner = THIS_MODULE,
54         .unlocked_ioctl = kfd_ioctl,
55         .compat_ioctl = compat_ptr_ioctl,
56         .open = kfd_open,
57         .release = kfd_release,
58         .mmap = kfd_mmap,
59 };
60
61 static int kfd_char_dev_major = -1;
62 static struct class *kfd_class;
63 struct device *kfd_device;
64
65 int kfd_chardev_init(void)
66 {
67         int err = 0;
68
69         kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
70         err = kfd_char_dev_major;
71         if (err < 0)
72                 goto err_register_chrdev;
73
74         kfd_class = class_create(THIS_MODULE, kfd_dev_name);
75         err = PTR_ERR(kfd_class);
76         if (IS_ERR(kfd_class))
77                 goto err_class_create;
78
79         kfd_device = device_create(kfd_class, NULL,
80                                         MKDEV(kfd_char_dev_major, 0),
81                                         NULL, kfd_dev_name);
82         err = PTR_ERR(kfd_device);
83         if (IS_ERR(kfd_device))
84                 goto err_device_create;
85
86         return 0;
87
88 err_device_create:
89         class_destroy(kfd_class);
90 err_class_create:
91         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
92 err_register_chrdev:
93         return err;
94 }
95
96 void kfd_chardev_exit(void)
97 {
98         device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
99         class_destroy(kfd_class);
100         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
101         kfd_device = NULL;
102 }
103
104 struct device *kfd_chardev(void)
105 {
106         return kfd_device;
107 }
108
109
110 static int kfd_open(struct inode *inode, struct file *filep)
111 {
112         struct kfd_process *process;
113         bool is_32bit_user_mode;
114
115         if (iminor(inode) != 0)
116                 return -ENODEV;
117
118         is_32bit_user_mode = in_compat_syscall();
119
120         if (is_32bit_user_mode) {
121                 dev_warn(kfd_device,
122                         "Process %d (32-bit) failed to open /dev/kfd\n"
123                         "32-bit processes are not supported by amdkfd\n",
124                         current->pid);
125                 return -EPERM;
126         }
127
128         process = kfd_create_process(filep);
129         if (IS_ERR(process))
130                 return PTR_ERR(process);
131
132         if (kfd_is_locked()) {
133                 dev_dbg(kfd_device, "kfd is locked!\n"
134                                 "process %d unreferenced", process->pasid);
135                 kfd_unref_process(process);
136                 return -EAGAIN;
137         }
138
139         /* filep now owns the reference returned by kfd_create_process */
140         filep->private_data = process;
141
142         dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
143                 process->pasid, process->is_32bit_user_mode);
144
145         return 0;
146 }
147
148 static int kfd_release(struct inode *inode, struct file *filep)
149 {
150         struct kfd_process *process = filep->private_data;
151
152         if (process)
153                 kfd_unref_process(process);
154
155         return 0;
156 }
157
158 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
159                                         void *data)
160 {
161         struct kfd_ioctl_get_version_args *args = data;
162
163         args->major_version = KFD_IOCTL_MAJOR_VERSION;
164         args->minor_version = KFD_IOCTL_MINOR_VERSION;
165
166         return 0;
167 }
168
169 static int set_queue_properties_from_user(struct queue_properties *q_properties,
170                                 struct kfd_ioctl_create_queue_args *args)
171 {
172         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
173                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
174                 return -EINVAL;
175         }
176
177         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
178                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
179                 return -EINVAL;
180         }
181
182         if ((args->ring_base_address) &&
183                 (!access_ok((const void __user *) args->ring_base_address,
184                         sizeof(uint64_t)))) {
185                 pr_err("Can't access ring base address\n");
186                 return -EFAULT;
187         }
188
189         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
190                 pr_err("Ring size must be a power of 2 or 0\n");
191                 return -EINVAL;
192         }
193
194         if (!access_ok((const void __user *) args->read_pointer_address,
195                         sizeof(uint32_t))) {
196                 pr_err("Can't access read pointer\n");
197                 return -EFAULT;
198         }
199
200         if (!access_ok((const void __user *) args->write_pointer_address,
201                         sizeof(uint32_t))) {
202                 pr_err("Can't access write pointer\n");
203                 return -EFAULT;
204         }
205
206         if (args->eop_buffer_address &&
207                 !access_ok((const void __user *) args->eop_buffer_address,
208                         sizeof(uint32_t))) {
209                 pr_debug("Can't access eop buffer");
210                 return -EFAULT;
211         }
212
213         if (args->ctx_save_restore_address &&
214                 !access_ok((const void __user *) args->ctx_save_restore_address,
215                         sizeof(uint32_t))) {
216                 pr_debug("Can't access ctx save restore buffer");
217                 return -EFAULT;
218         }
219
220         q_properties->is_interop = false;
221         q_properties->is_gws = false;
222         q_properties->queue_percent = args->queue_percentage;
223         q_properties->priority = args->queue_priority;
224         q_properties->queue_address = args->ring_base_address;
225         q_properties->queue_size = args->ring_size;
226         q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
227         q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
228         q_properties->eop_ring_buffer_address = args->eop_buffer_address;
229         q_properties->eop_ring_buffer_size = args->eop_buffer_size;
230         q_properties->ctx_save_restore_area_address =
231                         args->ctx_save_restore_address;
232         q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
233         q_properties->ctl_stack_size = args->ctl_stack_size;
234         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
235                 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
236                 q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
237         else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
238                 q_properties->type = KFD_QUEUE_TYPE_SDMA;
239         else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
240                 q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
241         else
242                 return -ENOTSUPP;
243
244         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
245                 q_properties->format = KFD_QUEUE_FORMAT_AQL;
246         else
247                 q_properties->format = KFD_QUEUE_FORMAT_PM4;
248
249         pr_debug("Queue Percentage: %d, %d\n",
250                         q_properties->queue_percent, args->queue_percentage);
251
252         pr_debug("Queue Priority: %d, %d\n",
253                         q_properties->priority, args->queue_priority);
254
255         pr_debug("Queue Address: 0x%llX, 0x%llX\n",
256                         q_properties->queue_address, args->ring_base_address);
257
258         pr_debug("Queue Size: 0x%llX, %u\n",
259                         q_properties->queue_size, args->ring_size);
260
261         pr_debug("Queue r/w Pointers: %px, %px\n",
262                         q_properties->read_ptr,
263                         q_properties->write_ptr);
264
265         pr_debug("Queue Format: %d\n", q_properties->format);
266
267         pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
268
269         pr_debug("Queue CTX save area: 0x%llX\n",
270                         q_properties->ctx_save_restore_area_address);
271
272         return 0;
273 }
274
275 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
276                                         void *data)
277 {
278         struct kfd_ioctl_create_queue_args *args = data;
279         struct kfd_dev *dev;
280         int err = 0;
281         unsigned int queue_id;
282         struct kfd_process_device *pdd;
283         struct queue_properties q_properties;
284         uint32_t doorbell_offset_in_process = 0;
285
286         memset(&q_properties, 0, sizeof(struct queue_properties));
287
288         pr_debug("Creating queue ioctl\n");
289
290         err = set_queue_properties_from_user(&q_properties, args);
291         if (err)
292                 return err;
293
294         pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
295         dev = kfd_device_by_id(args->gpu_id);
296         if (!dev) {
297                 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
298                 return -EINVAL;
299         }
300
301         mutex_lock(&p->mutex);
302
303         pdd = kfd_bind_process_to_device(dev, p);
304         if (IS_ERR(pdd)) {
305                 err = -ESRCH;
306                 goto err_bind_process;
307         }
308
309         pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
310                         p->pasid,
311                         dev->id);
312
313         err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
314                         &doorbell_offset_in_process);
315         if (err != 0)
316                 goto err_create_queue;
317
318         args->queue_id = queue_id;
319
320
321         /* Return gpu_id as doorbell offset for mmap usage */
322         args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
323         args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
324         if (KFD_IS_SOC15(dev->device_info->asic_family))
325                 /* On SOC15 ASICs, include the doorbell offset within the
326                  * process doorbell frame, which is 2 pages.
327                  */
328                 args->doorbell_offset |= doorbell_offset_in_process;
329
330         mutex_unlock(&p->mutex);
331
332         pr_debug("Queue id %d was created successfully\n", args->queue_id);
333
334         pr_debug("Ring buffer address == 0x%016llX\n",
335                         args->ring_base_address);
336
337         pr_debug("Read ptr address    == 0x%016llX\n",
338                         args->read_pointer_address);
339
340         pr_debug("Write ptr address   == 0x%016llX\n",
341                         args->write_pointer_address);
342
343         return 0;
344
345 err_create_queue:
346 err_bind_process:
347         mutex_unlock(&p->mutex);
348         return err;
349 }
350
351 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
352                                         void *data)
353 {
354         int retval;
355         struct kfd_ioctl_destroy_queue_args *args = data;
356
357         pr_debug("Destroying queue id %d for pasid 0x%x\n",
358                                 args->queue_id,
359                                 p->pasid);
360
361         mutex_lock(&p->mutex);
362
363         retval = pqm_destroy_queue(&p->pqm, args->queue_id);
364
365         mutex_unlock(&p->mutex);
366         return retval;
367 }
368
369 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
370                                         void *data)
371 {
372         int retval;
373         struct kfd_ioctl_update_queue_args *args = data;
374         struct queue_properties properties;
375
376         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
377                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
378                 return -EINVAL;
379         }
380
381         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
382                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
383                 return -EINVAL;
384         }
385
386         if ((args->ring_base_address) &&
387                 (!access_ok((const void __user *) args->ring_base_address,
388                         sizeof(uint64_t)))) {
389                 pr_err("Can't access ring base address\n");
390                 return -EFAULT;
391         }
392
393         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
394                 pr_err("Ring size must be a power of 2 or 0\n");
395                 return -EINVAL;
396         }
397
398         properties.queue_address = args->ring_base_address;
399         properties.queue_size = args->ring_size;
400         properties.queue_percent = args->queue_percentage;
401         properties.priority = args->queue_priority;
402
403         pr_debug("Updating queue id %d for pasid 0x%x\n",
404                         args->queue_id, p->pasid);
405
406         mutex_lock(&p->mutex);
407
408         retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
409
410         mutex_unlock(&p->mutex);
411
412         return retval;
413 }
414
415 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
416                                         void *data)
417 {
418         int retval;
419         const int max_num_cus = 1024;
420         struct kfd_ioctl_set_cu_mask_args *args = data;
421         struct queue_properties properties;
422         uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
423         size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
424
425         if ((args->num_cu_mask % 32) != 0) {
426                 pr_debug("num_cu_mask 0x%x must be a multiple of 32",
427                                 args->num_cu_mask);
428                 return -EINVAL;
429         }
430
431         properties.cu_mask_count = args->num_cu_mask;
432         if (properties.cu_mask_count == 0) {
433                 pr_debug("CU mask cannot be 0");
434                 return -EINVAL;
435         }
436
437         /* To prevent an unreasonably large CU mask size, set an arbitrary
438          * limit of max_num_cus bits.  We can then just drop any CU mask bits
439          * past max_num_cus bits and just use the first max_num_cus bits.
440          */
441         if (properties.cu_mask_count > max_num_cus) {
442                 pr_debug("CU mask cannot be greater than 1024 bits");
443                 properties.cu_mask_count = max_num_cus;
444                 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
445         }
446
447         properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
448         if (!properties.cu_mask)
449                 return -ENOMEM;
450
451         retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
452         if (retval) {
453                 pr_debug("Could not copy CU mask from userspace");
454                 kfree(properties.cu_mask);
455                 return -EFAULT;
456         }
457
458         mutex_lock(&p->mutex);
459
460         retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
461
462         mutex_unlock(&p->mutex);
463
464         if (retval)
465                 kfree(properties.cu_mask);
466
467         return retval;
468 }
469
470 static int kfd_ioctl_get_queue_wave_state(struct file *filep,
471                                           struct kfd_process *p, void *data)
472 {
473         struct kfd_ioctl_get_queue_wave_state_args *args = data;
474         int r;
475
476         mutex_lock(&p->mutex);
477
478         r = pqm_get_wave_state(&p->pqm, args->queue_id,
479                                (void __user *)args->ctl_stack_address,
480                                &args->ctl_stack_used_size,
481                                &args->save_area_used_size);
482
483         mutex_unlock(&p->mutex);
484
485         return r;
486 }
487
488 static int kfd_ioctl_set_memory_policy(struct file *filep,
489                                         struct kfd_process *p, void *data)
490 {
491         struct kfd_ioctl_set_memory_policy_args *args = data;
492         struct kfd_dev *dev;
493         int err = 0;
494         struct kfd_process_device *pdd;
495         enum cache_policy default_policy, alternate_policy;
496
497         if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
498             && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
499                 return -EINVAL;
500         }
501
502         if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
503             && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
504                 return -EINVAL;
505         }
506
507         dev = kfd_device_by_id(args->gpu_id);
508         if (!dev)
509                 return -EINVAL;
510
511         mutex_lock(&p->mutex);
512
513         pdd = kfd_bind_process_to_device(dev, p);
514         if (IS_ERR(pdd)) {
515                 err = -ESRCH;
516                 goto out;
517         }
518
519         default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
520                          ? cache_policy_coherent : cache_policy_noncoherent;
521
522         alternate_policy =
523                 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
524                    ? cache_policy_coherent : cache_policy_noncoherent;
525
526         if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
527                                 &pdd->qpd,
528                                 default_policy,
529                                 alternate_policy,
530                                 (void __user *)args->alternate_aperture_base,
531                                 args->alternate_aperture_size))
532                 err = -EINVAL;
533
534 out:
535         mutex_unlock(&p->mutex);
536
537         return err;
538 }
539
540 static int kfd_ioctl_set_trap_handler(struct file *filep,
541                                         struct kfd_process *p, void *data)
542 {
543         struct kfd_ioctl_set_trap_handler_args *args = data;
544         struct kfd_dev *dev;
545         int err = 0;
546         struct kfd_process_device *pdd;
547
548         dev = kfd_device_by_id(args->gpu_id);
549         if (!dev)
550                 return -EINVAL;
551
552         mutex_lock(&p->mutex);
553
554         pdd = kfd_bind_process_to_device(dev, p);
555         if (IS_ERR(pdd)) {
556                 err = -ESRCH;
557                 goto out;
558         }
559
560         kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);
561
562 out:
563         mutex_unlock(&p->mutex);
564
565         return err;
566 }
567
568 static int kfd_ioctl_dbg_register(struct file *filep,
569                                 struct kfd_process *p, void *data)
570 {
571         struct kfd_ioctl_dbg_register_args *args = data;
572         struct kfd_dev *dev;
573         struct kfd_dbgmgr *dbgmgr_ptr;
574         struct kfd_process_device *pdd;
575         bool create_ok;
576         long status = 0;
577
578         dev = kfd_device_by_id(args->gpu_id);
579         if (!dev)
580                 return -EINVAL;
581
582         if (dev->device_info->asic_family == CHIP_CARRIZO) {
583                 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
584                 return -EINVAL;
585         }
586
587         mutex_lock(&p->mutex);
588         mutex_lock(kfd_get_dbgmgr_mutex());
589
590         /*
591          * make sure that we have pdd, if this the first queue created for
592          * this process
593          */
594         pdd = kfd_bind_process_to_device(dev, p);
595         if (IS_ERR(pdd)) {
596                 status = PTR_ERR(pdd);
597                 goto out;
598         }
599
600         if (!dev->dbgmgr) {
601                 /* In case of a legal call, we have no dbgmgr yet */
602                 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
603                 if (create_ok) {
604                         status = kfd_dbgmgr_register(dbgmgr_ptr, p);
605                         if (status != 0)
606                                 kfd_dbgmgr_destroy(dbgmgr_ptr);
607                         else
608                                 dev->dbgmgr = dbgmgr_ptr;
609                 }
610         } else {
611                 pr_debug("debugger already registered\n");
612                 status = -EINVAL;
613         }
614
615 out:
616         mutex_unlock(kfd_get_dbgmgr_mutex());
617         mutex_unlock(&p->mutex);
618
619         return status;
620 }
621
622 static int kfd_ioctl_dbg_unregister(struct file *filep,
623                                 struct kfd_process *p, void *data)
624 {
625         struct kfd_ioctl_dbg_unregister_args *args = data;
626         struct kfd_dev *dev;
627         long status;
628
629         dev = kfd_device_by_id(args->gpu_id);
630         if (!dev || !dev->dbgmgr)
631                 return -EINVAL;
632
633         if (dev->device_info->asic_family == CHIP_CARRIZO) {
634                 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
635                 return -EINVAL;
636         }
637
638         mutex_lock(kfd_get_dbgmgr_mutex());
639
640         status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
641         if (!status) {
642                 kfd_dbgmgr_destroy(dev->dbgmgr);
643                 dev->dbgmgr = NULL;
644         }
645
646         mutex_unlock(kfd_get_dbgmgr_mutex());
647
648         return status;
649 }
650
651 /*
652  * Parse and generate variable size data structure for address watch.
653  * Total size of the buffer and # watch points is limited in order
654  * to prevent kernel abuse. (no bearing to the much smaller HW limitation
655  * which is enforced by dbgdev module)
656  * please also note that the watch address itself are not "copied from user",
657  * since it be set into the HW in user mode values.
658  *
659  */
660 static int kfd_ioctl_dbg_address_watch(struct file *filep,
661                                         struct kfd_process *p, void *data)
662 {
663         struct kfd_ioctl_dbg_address_watch_args *args = data;
664         struct kfd_dev *dev;
665         struct dbg_address_watch_info aw_info;
666         unsigned char *args_buff;
667         long status;
668         void __user *cmd_from_user;
669         uint64_t watch_mask_value = 0;
670         unsigned int args_idx = 0;
671
672         memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
673
674         dev = kfd_device_by_id(args->gpu_id);
675         if (!dev)
676                 return -EINVAL;
677
678         if (dev->device_info->asic_family == CHIP_CARRIZO) {
679                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
680                 return -EINVAL;
681         }
682
683         cmd_from_user = (void __user *) args->content_ptr;
684
685         /* Validate arguments */
686
687         if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
688                 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
689                 (cmd_from_user == NULL))
690                 return -EINVAL;
691
692         /* this is the actual buffer to work with */
693         args_buff = memdup_user(cmd_from_user,
694                                 args->buf_size_in_bytes - sizeof(*args));
695         if (IS_ERR(args_buff))
696                 return PTR_ERR(args_buff);
697
698         aw_info.process = p;
699
700         aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
701         args_idx += sizeof(aw_info.num_watch_points);
702
703         aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
704         args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
705
706         /*
707          * set watch address base pointer to point on the array base
708          * within args_buff
709          */
710         aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
711
712         /* skip over the addresses buffer */
713         args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
714
715         if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
716                 status = -EINVAL;
717                 goto out;
718         }
719
720         watch_mask_value = (uint64_t) args_buff[args_idx];
721
722         if (watch_mask_value > 0) {
723                 /*
724                  * There is an array of masks.
725                  * set watch mask base pointer to point on the array base
726                  * within args_buff
727                  */
728                 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
729
730                 /* skip over the masks buffer */
731                 args_idx += sizeof(aw_info.watch_mask) *
732                                 aw_info.num_watch_points;
733         } else {
734                 /* just the NULL mask, set to NULL and skip over it */
735                 aw_info.watch_mask = NULL;
736                 args_idx += sizeof(aw_info.watch_mask);
737         }
738
739         if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
740                 status = -EINVAL;
741                 goto out;
742         }
743
744         /* Currently HSA Event is not supported for DBG */
745         aw_info.watch_event = NULL;
746
747         mutex_lock(kfd_get_dbgmgr_mutex());
748
749         status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
750
751         mutex_unlock(kfd_get_dbgmgr_mutex());
752
753 out:
754         kfree(args_buff);
755
756         return status;
757 }
758
759 /* Parse and generate fixed size data structure for wave control */
760 static int kfd_ioctl_dbg_wave_control(struct file *filep,
761                                         struct kfd_process *p, void *data)
762 {
763         struct kfd_ioctl_dbg_wave_control_args *args = data;
764         struct kfd_dev *dev;
765         struct dbg_wave_control_info wac_info;
766         unsigned char *args_buff;
767         uint32_t computed_buff_size;
768         long status;
769         void __user *cmd_from_user;
770         unsigned int args_idx = 0;
771
772         memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
773
774         /* we use compact form, independent of the packing attribute value */
775         computed_buff_size = sizeof(*args) +
776                                 sizeof(wac_info.mode) +
777                                 sizeof(wac_info.operand) +
778                                 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
779                                 sizeof(wac_info.dbgWave_msg.MemoryVA) +
780                                 sizeof(wac_info.trapId);
781
782         dev = kfd_device_by_id(args->gpu_id);
783         if (!dev)
784                 return -EINVAL;
785
786         if (dev->device_info->asic_family == CHIP_CARRIZO) {
787                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
788                 return -EINVAL;
789         }
790
791         /* input size must match the computed "compact" size */
792         if (args->buf_size_in_bytes != computed_buff_size) {
793                 pr_debug("size mismatch, computed : actual %u : %u\n",
794                                 args->buf_size_in_bytes, computed_buff_size);
795                 return -EINVAL;
796         }
797
798         cmd_from_user = (void __user *) args->content_ptr;
799
800         if (cmd_from_user == NULL)
801                 return -EINVAL;
802
803         /* copy the entire buffer from user */
804
805         args_buff = memdup_user(cmd_from_user,
806                                 args->buf_size_in_bytes - sizeof(*args));
807         if (IS_ERR(args_buff))
808                 return PTR_ERR(args_buff);
809
810         /* move ptr to the start of the "pay-load" area */
811         wac_info.process = p;
812
813         wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
814         args_idx += sizeof(wac_info.operand);
815
816         wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
817         args_idx += sizeof(wac_info.mode);
818
819         wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
820         args_idx += sizeof(wac_info.trapId);
821
822         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
823                                         *((uint32_t *)(&args_buff[args_idx]));
824         wac_info.dbgWave_msg.MemoryVA = NULL;
825
826         mutex_lock(kfd_get_dbgmgr_mutex());
827
828         pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
829                         wac_info.process, wac_info.operand,
830                         wac_info.mode, wac_info.trapId,
831                         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
832
833         status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
834
835         pr_debug("Returned status of dbg manager is %ld\n", status);
836
837         mutex_unlock(kfd_get_dbgmgr_mutex());
838
839         kfree(args_buff);
840
841         return status;
842 }
843
844 static int kfd_ioctl_get_clock_counters(struct file *filep,
845                                 struct kfd_process *p, void *data)
846 {
847         struct kfd_ioctl_get_clock_counters_args *args = data;
848         struct kfd_dev *dev;
849
850         dev = kfd_device_by_id(args->gpu_id);
851         if (dev)
852                 /* Reading GPU clock counter from KGD */
853                 args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
854         else
855                 /* Node without GPU resource */
856                 args->gpu_clock_counter = 0;
857
858         /* No access to rdtsc. Using raw monotonic time */
859         args->cpu_clock_counter = ktime_get_raw_ns();
860         args->system_clock_counter = ktime_get_boottime_ns();
861
862         /* Since the counter is in nano-seconds we use 1GHz frequency */
863         args->system_clock_freq = 1000000000;
864
865         return 0;
866 }
867
868
869 static int kfd_ioctl_get_process_apertures(struct file *filp,
870                                 struct kfd_process *p, void *data)
871 {
872         struct kfd_ioctl_get_process_apertures_args *args = data;
873         struct kfd_process_device_apertures *pAperture;
874         int i;
875
876         dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
877
878         args->num_of_nodes = 0;
879
880         mutex_lock(&p->mutex);
881         /* Run over all pdd of the process */
882         for (i = 0; i < p->n_pdds; i++) {
883                 struct kfd_process_device *pdd = p->pdds[i];
884
885                 pAperture =
886                         &args->process_apertures[args->num_of_nodes];
887                 pAperture->gpu_id = pdd->dev->id;
888                 pAperture->lds_base = pdd->lds_base;
889                 pAperture->lds_limit = pdd->lds_limit;
890                 pAperture->gpuvm_base = pdd->gpuvm_base;
891                 pAperture->gpuvm_limit = pdd->gpuvm_limit;
892                 pAperture->scratch_base = pdd->scratch_base;
893                 pAperture->scratch_limit = pdd->scratch_limit;
894
895                 dev_dbg(kfd_device,
896                         "node id %u\n", args->num_of_nodes);
897                 dev_dbg(kfd_device,
898                         "gpu id %u\n", pdd->dev->id);
899                 dev_dbg(kfd_device,
900                         "lds_base %llX\n", pdd->lds_base);
901                 dev_dbg(kfd_device,
902                         "lds_limit %llX\n", pdd->lds_limit);
903                 dev_dbg(kfd_device,
904                         "gpuvm_base %llX\n", pdd->gpuvm_base);
905                 dev_dbg(kfd_device,
906                         "gpuvm_limit %llX\n", pdd->gpuvm_limit);
907                 dev_dbg(kfd_device,
908                         "scratch_base %llX\n", pdd->scratch_base);
909                 dev_dbg(kfd_device,
910                         "scratch_limit %llX\n", pdd->scratch_limit);
911
912                 if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS)
913                         break;
914         }
915         mutex_unlock(&p->mutex);
916
917         return 0;
918 }
919
920 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
921                                 struct kfd_process *p, void *data)
922 {
923         struct kfd_ioctl_get_process_apertures_new_args *args = data;
924         struct kfd_process_device_apertures *pa;
925         int ret;
926         int i;
927
928         dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
929
930         if (args->num_of_nodes == 0) {
931                 /* Return number of nodes, so that user space can alloacate
932                  * sufficient memory
933                  */
934                 mutex_lock(&p->mutex);
935                 args->num_of_nodes = p->n_pdds;
936                 goto out_unlock;
937         }
938
939         /* Fill in process-aperture information for all available
940          * nodes, but not more than args->num_of_nodes as that is
941          * the amount of memory allocated by user
942          */
943         pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
944                                 args->num_of_nodes), GFP_KERNEL);
945         if (!pa)
946                 return -ENOMEM;
947
948         mutex_lock(&p->mutex);
949
950         if (!p->n_pdds) {
951                 args->num_of_nodes = 0;
952                 kfree(pa);
953                 goto out_unlock;
954         }
955
956         /* Run over all pdd of the process */
957         for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) {
958                 struct kfd_process_device *pdd = p->pdds[i];
959
960                 pa[i].gpu_id = pdd->dev->id;
961                 pa[i].lds_base = pdd->lds_base;
962                 pa[i].lds_limit = pdd->lds_limit;
963                 pa[i].gpuvm_base = pdd->gpuvm_base;
964                 pa[i].gpuvm_limit = pdd->gpuvm_limit;
965                 pa[i].scratch_base = pdd->scratch_base;
966                 pa[i].scratch_limit = pdd->scratch_limit;
967
968                 dev_dbg(kfd_device,
969                         "gpu id %u\n", pdd->dev->id);
970                 dev_dbg(kfd_device,
971                         "lds_base %llX\n", pdd->lds_base);
972                 dev_dbg(kfd_device,
973                         "lds_limit %llX\n", pdd->lds_limit);
974                 dev_dbg(kfd_device,
975                         "gpuvm_base %llX\n", pdd->gpuvm_base);
976                 dev_dbg(kfd_device,
977                         "gpuvm_limit %llX\n", pdd->gpuvm_limit);
978                 dev_dbg(kfd_device,
979                         "scratch_base %llX\n", pdd->scratch_base);
980                 dev_dbg(kfd_device,
981                         "scratch_limit %llX\n", pdd->scratch_limit);
982         }
983         mutex_unlock(&p->mutex);
984
985         args->num_of_nodes = i;
986         ret = copy_to_user(
987                         (void __user *)args->kfd_process_device_apertures_ptr,
988                         pa,
989                         (i * sizeof(struct kfd_process_device_apertures)));
990         kfree(pa);
991         return ret ? -EFAULT : 0;
992
993 out_unlock:
994         mutex_unlock(&p->mutex);
995         return 0;
996 }
997
998 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
999                                         void *data)
1000 {
1001         struct kfd_ioctl_create_event_args *args = data;
1002         int err;
1003
1004         /* For dGPUs the event page is allocated in user mode. The
1005          * handle is passed to KFD with the first call to this IOCTL
1006          * through the event_page_offset field.
1007          */
1008         if (args->event_page_offset) {
1009                 struct kfd_dev *kfd;
1010                 struct kfd_process_device *pdd;
1011                 void *mem, *kern_addr;
1012                 uint64_t size;
1013
1014                 if (p->signal_page) {
1015                         pr_err("Event page is already set\n");
1016                         return -EINVAL;
1017                 }
1018
1019                 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
1020                 if (!kfd) {
1021                         pr_err("Getting device by id failed in %s\n", __func__);
1022                         return -EINVAL;
1023                 }
1024
1025                 mutex_lock(&p->mutex);
1026                 pdd = kfd_bind_process_to_device(kfd, p);
1027                 if (IS_ERR(pdd)) {
1028                         err = PTR_ERR(pdd);
1029                         goto out_unlock;
1030                 }
1031
1032                 mem = kfd_process_device_translate_handle(pdd,
1033                                 GET_IDR_HANDLE(args->event_page_offset));
1034                 if (!mem) {
1035                         pr_err("Can't find BO, offset is 0x%llx\n",
1036                                args->event_page_offset);
1037                         err = -EINVAL;
1038                         goto out_unlock;
1039                 }
1040                 mutex_unlock(&p->mutex);
1041
1042                 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
1043                                                 mem, &kern_addr, &size);
1044                 if (err) {
1045                         pr_err("Failed to map event page to kernel\n");
1046                         return err;
1047                 }
1048
1049                 err = kfd_event_page_set(p, kern_addr, size);
1050                 if (err) {
1051                         pr_err("Failed to set event page\n");
1052                         return err;
1053                 }
1054         }
1055
1056         err = kfd_event_create(filp, p, args->event_type,
1057                                 args->auto_reset != 0, args->node_id,
1058                                 &args->event_id, &args->event_trigger_data,
1059                                 &args->event_page_offset,
1060                                 &args->event_slot_index);
1061
1062         return err;
1063
1064 out_unlock:
1065         mutex_unlock(&p->mutex);
1066         return err;
1067 }
1068
1069 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1070                                         void *data)
1071 {
1072         struct kfd_ioctl_destroy_event_args *args = data;
1073
1074         return kfd_event_destroy(p, args->event_id);
1075 }
1076
1077 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1078                                 void *data)
1079 {
1080         struct kfd_ioctl_set_event_args *args = data;
1081
1082         return kfd_set_event(p, args->event_id);
1083 }
1084
1085 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1086                                 void *data)
1087 {
1088         struct kfd_ioctl_reset_event_args *args = data;
1089
1090         return kfd_reset_event(p, args->event_id);
1091 }
1092
1093 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1094                                 void *data)
1095 {
1096         struct kfd_ioctl_wait_events_args *args = data;
1097         int err;
1098
1099         err = kfd_wait_on_events(p, args->num_events,
1100                         (void __user *)args->events_ptr,
1101                         (args->wait_for_all != 0),
1102                         args->timeout, &args->wait_result);
1103
1104         return err;
1105 }
1106 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1107                                         struct kfd_process *p, void *data)
1108 {
1109         struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1110         struct kfd_process_device *pdd;
1111         struct kfd_dev *dev;
1112         long err;
1113
1114         dev = kfd_device_by_id(args->gpu_id);
1115         if (!dev)
1116                 return -EINVAL;
1117
1118         mutex_lock(&p->mutex);
1119
1120         pdd = kfd_bind_process_to_device(dev, p);
1121         if (IS_ERR(pdd)) {
1122                 err = PTR_ERR(pdd);
1123                 goto bind_process_to_device_fail;
1124         }
1125
1126         pdd->qpd.sh_hidden_private_base = args->va_addr;
1127
1128         mutex_unlock(&p->mutex);
1129
1130         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1131             pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
1132                 dev->kfd2kgd->set_scratch_backing_va(
1133                         dev->kgd, args->va_addr, pdd->qpd.vmid);
1134
1135         return 0;
1136
1137 bind_process_to_device_fail:
1138         mutex_unlock(&p->mutex);
1139         return err;
1140 }
1141
1142 static int kfd_ioctl_get_tile_config(struct file *filep,
1143                 struct kfd_process *p, void *data)
1144 {
1145         struct kfd_ioctl_get_tile_config_args *args = data;
1146         struct kfd_dev *dev;
1147         struct tile_config config;
1148         int err = 0;
1149
1150         dev = kfd_device_by_id(args->gpu_id);
1151         if (!dev)
1152                 return -EINVAL;
1153
1154         amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
1155
1156         args->gb_addr_config = config.gb_addr_config;
1157         args->num_banks = config.num_banks;
1158         args->num_ranks = config.num_ranks;
1159
1160         if (args->num_tile_configs > config.num_tile_configs)
1161                 args->num_tile_configs = config.num_tile_configs;
1162         err = copy_to_user((void __user *)args->tile_config_ptr,
1163                         config.tile_config_ptr,
1164                         args->num_tile_configs * sizeof(uint32_t));
1165         if (err) {
1166                 args->num_tile_configs = 0;
1167                 return -EFAULT;
1168         }
1169
1170         if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1171                 args->num_macro_tile_configs =
1172                                 config.num_macro_tile_configs;
1173         err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1174                         config.macro_tile_config_ptr,
1175                         args->num_macro_tile_configs * sizeof(uint32_t));
1176         if (err) {
1177                 args->num_macro_tile_configs = 0;
1178                 return -EFAULT;
1179         }
1180
1181         return 0;
1182 }
1183
1184 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1185                                 void *data)
1186 {
1187         struct kfd_ioctl_acquire_vm_args *args = data;
1188         struct kfd_process_device *pdd;
1189         struct kfd_dev *dev;
1190         struct file *drm_file;
1191         int ret;
1192
1193         dev = kfd_device_by_id(args->gpu_id);
1194         if (!dev)
1195                 return -EINVAL;
1196
1197         drm_file = fget(args->drm_fd);
1198         if (!drm_file)
1199                 return -EINVAL;
1200
1201         mutex_lock(&p->mutex);
1202
1203         pdd = kfd_get_process_device_data(dev, p);
1204         if (!pdd) {
1205                 ret = -EINVAL;
1206                 goto err_unlock;
1207         }
1208
1209         if (pdd->drm_file) {
1210                 ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1211                 goto err_unlock;
1212         }
1213
1214         ret = kfd_process_device_init_vm(pdd, drm_file);
1215         if (ret)
1216                 goto err_unlock;
1217         /* On success, the PDD keeps the drm_file reference */
1218         mutex_unlock(&p->mutex);
1219
1220         return 0;
1221
1222 err_unlock:
1223         mutex_unlock(&p->mutex);
1224         fput(drm_file);
1225         return ret;
1226 }
1227
1228 bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1229 {
1230         struct kfd_local_mem_info mem_info;
1231
1232         if (debug_largebar) {
1233                 pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1234                 return true;
1235         }
1236
1237         if (dev->use_iommu_v2)
1238                 return false;
1239
1240         amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
1241         if (mem_info.local_mem_size_private == 0 &&
1242                         mem_info.local_mem_size_public > 0)
1243                 return true;
1244         return false;
1245 }
1246
1247 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1248                                         struct kfd_process *p, void *data)
1249 {
1250         struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1251         struct kfd_process_device *pdd;
1252         void *mem;
1253         struct kfd_dev *dev;
1254         int idr_handle;
1255         long err;
1256         uint64_t offset = args->mmap_offset;
1257         uint32_t flags = args->flags;
1258
1259         if (args->size == 0)
1260                 return -EINVAL;
1261
1262         dev = kfd_device_by_id(args->gpu_id);
1263         if (!dev)
1264                 return -EINVAL;
1265
1266         if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1267                 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1268                 !kfd_dev_is_large_bar(dev)) {
1269                 pr_err("Alloc host visible vram on small bar is not allowed\n");
1270                 return -EINVAL;
1271         }
1272
1273         mutex_lock(&p->mutex);
1274
1275         pdd = kfd_bind_process_to_device(dev, p);
1276         if (IS_ERR(pdd)) {
1277                 err = PTR_ERR(pdd);
1278                 goto err_unlock;
1279         }
1280
1281         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1282                 if (args->size != kfd_doorbell_process_slice(dev)) {
1283                         err = -EINVAL;
1284                         goto err_unlock;
1285                 }
1286                 offset = kfd_get_process_doorbells(pdd);
1287         } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1288                 if (args->size != PAGE_SIZE) {
1289                         err = -EINVAL;
1290                         goto err_unlock;
1291                 }
1292                 offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1293                 if (!offset) {
1294                         err = -ENOMEM;
1295                         goto err_unlock;
1296                 }
1297         }
1298
1299         err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1300                 dev->kgd, args->va_addr, args->size,
1301                 pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
1302                 flags);
1303
1304         if (err)
1305                 goto err_unlock;
1306
1307         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1308         if (idr_handle < 0) {
1309                 err = -EFAULT;
1310                 goto err_free;
1311         }
1312
1313         /* Update the VRAM usage count */
1314         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
1315                 WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
1316
1317         mutex_unlock(&p->mutex);
1318
1319         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1320         args->mmap_offset = offset;
1321
1322         /* MMIO is mapped through kfd device
1323          * Generate a kfd mmap offset
1324          */
1325         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1326                 args->mmap_offset = KFD_MMAP_TYPE_MMIO
1327                                         | KFD_MMAP_GPU_ID(args->gpu_id);
1328
1329         return 0;
1330
1331 err_free:
1332         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
1333                                                pdd->drm_priv, NULL);
1334 err_unlock:
1335         mutex_unlock(&p->mutex);
1336         return err;
1337 }
1338
1339 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1340                                         struct kfd_process *p, void *data)
1341 {
1342         struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1343         struct kfd_process_device *pdd;
1344         void *mem;
1345         struct kfd_dev *dev;
1346         int ret;
1347         uint64_t size = 0;
1348
1349         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1350         if (!dev)
1351                 return -EINVAL;
1352
1353         mutex_lock(&p->mutex);
1354
1355         pdd = kfd_get_process_device_data(dev, p);
1356         if (!pdd) {
1357                 pr_err("Process device data doesn't exist\n");
1358                 ret = -EINVAL;
1359                 goto err_unlock;
1360         }
1361
1362         mem = kfd_process_device_translate_handle(
1363                 pdd, GET_IDR_HANDLE(args->handle));
1364         if (!mem) {
1365                 ret = -EINVAL;
1366                 goto err_unlock;
1367         }
1368
1369         ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
1370                                 (struct kgd_mem *)mem, pdd->drm_priv, &size);
1371
1372         /* If freeing the buffer failed, leave the handle in place for
1373          * clean-up during process tear-down.
1374          */
1375         if (!ret)
1376                 kfd_process_device_remove_obj_handle(
1377                         pdd, GET_IDR_HANDLE(args->handle));
1378
1379         WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
1380
1381 err_unlock:
1382         mutex_unlock(&p->mutex);
1383         return ret;
1384 }
1385
1386 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1387                                         struct kfd_process *p, void *data)
1388 {
1389         struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1390         struct kfd_process_device *pdd, *peer_pdd;
1391         void *mem;
1392         struct kfd_dev *dev, *peer;
1393         long err = 0;
1394         int i;
1395         uint32_t *devices_arr = NULL;
1396         bool table_freed = false;
1397
1398         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1399         if (!dev)
1400                 return -EINVAL;
1401
1402         if (!args->n_devices) {
1403                 pr_debug("Device IDs array empty\n");
1404                 return -EINVAL;
1405         }
1406         if (args->n_success > args->n_devices) {
1407                 pr_debug("n_success exceeds n_devices\n");
1408                 return -EINVAL;
1409         }
1410
1411         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1412                                     GFP_KERNEL);
1413         if (!devices_arr)
1414                 return -ENOMEM;
1415
1416         err = copy_from_user(devices_arr,
1417                              (void __user *)args->device_ids_array_ptr,
1418                              args->n_devices * sizeof(*devices_arr));
1419         if (err != 0) {
1420                 err = -EFAULT;
1421                 goto copy_from_user_failed;
1422         }
1423
1424         mutex_lock(&p->mutex);
1425
1426         pdd = kfd_bind_process_to_device(dev, p);
1427         if (IS_ERR(pdd)) {
1428                 err = PTR_ERR(pdd);
1429                 goto bind_process_to_device_failed;
1430         }
1431
1432         mem = kfd_process_device_translate_handle(pdd,
1433                                                 GET_IDR_HANDLE(args->handle));
1434         if (!mem) {
1435                 err = -ENOMEM;
1436                 goto get_mem_obj_from_handle_failed;
1437         }
1438
1439         for (i = args->n_success; i < args->n_devices; i++) {
1440                 peer = kfd_device_by_id(devices_arr[i]);
1441                 if (!peer) {
1442                         pr_debug("Getting device by id failed for 0x%x\n",
1443                                  devices_arr[i]);
1444                         err = -EINVAL;
1445                         goto get_mem_obj_from_handle_failed;
1446                 }
1447
1448                 peer_pdd = kfd_bind_process_to_device(peer, p);
1449                 if (IS_ERR(peer_pdd)) {
1450                         err = PTR_ERR(peer_pdd);
1451                         goto get_mem_obj_from_handle_failed;
1452                 }
1453                 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1454                         peer->kgd, (struct kgd_mem *)mem,
1455                         peer_pdd->drm_priv, &table_freed);
1456                 if (err) {
1457                         pr_err("Failed to map to gpu %d/%d\n",
1458                                i, args->n_devices);
1459                         goto map_memory_to_gpu_failed;
1460                 }
1461                 args->n_success = i+1;
1462         }
1463
1464         mutex_unlock(&p->mutex);
1465
1466         err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1467         if (err) {
1468                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
1469                 goto sync_memory_failed;
1470         }
1471
1472         /* Flush TLBs after waiting for the page table updates to complete */
1473         if (table_freed) {
1474                 for (i = 0; i < args->n_devices; i++) {
1475                         peer = kfd_device_by_id(devices_arr[i]);
1476                         if (WARN_ON_ONCE(!peer))
1477                                 continue;
1478                         peer_pdd = kfd_get_process_device_data(peer, p);
1479                         if (WARN_ON_ONCE(!peer_pdd))
1480                                 continue;
1481                         kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
1482                 }
1483         }
1484         kfree(devices_arr);
1485
1486         return err;
1487
1488 bind_process_to_device_failed:
1489 get_mem_obj_from_handle_failed:
1490 map_memory_to_gpu_failed:
1491         mutex_unlock(&p->mutex);
1492 copy_from_user_failed:
1493 sync_memory_failed:
1494         kfree(devices_arr);
1495
1496         return err;
1497 }
1498
1499 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1500                                         struct kfd_process *p, void *data)
1501 {
1502         struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1503         struct kfd_process_device *pdd, *peer_pdd;
1504         void *mem;
1505         struct kfd_dev *dev, *peer;
1506         long err = 0;
1507         uint32_t *devices_arr = NULL, i;
1508
1509         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1510         if (!dev)
1511                 return -EINVAL;
1512
1513         if (!args->n_devices) {
1514                 pr_debug("Device IDs array empty\n");
1515                 return -EINVAL;
1516         }
1517         if (args->n_success > args->n_devices) {
1518                 pr_debug("n_success exceeds n_devices\n");
1519                 return -EINVAL;
1520         }
1521
1522         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1523                                     GFP_KERNEL);
1524         if (!devices_arr)
1525                 return -ENOMEM;
1526
1527         err = copy_from_user(devices_arr,
1528                              (void __user *)args->device_ids_array_ptr,
1529                              args->n_devices * sizeof(*devices_arr));
1530         if (err != 0) {
1531                 err = -EFAULT;
1532                 goto copy_from_user_failed;
1533         }
1534
1535         mutex_lock(&p->mutex);
1536
1537         pdd = kfd_get_process_device_data(dev, p);
1538         if (!pdd) {
1539                 err = -EINVAL;
1540                 goto bind_process_to_device_failed;
1541         }
1542
1543         mem = kfd_process_device_translate_handle(pdd,
1544                                                 GET_IDR_HANDLE(args->handle));
1545         if (!mem) {
1546                 err = -ENOMEM;
1547                 goto get_mem_obj_from_handle_failed;
1548         }
1549
1550         for (i = args->n_success; i < args->n_devices; i++) {
1551                 peer = kfd_device_by_id(devices_arr[i]);
1552                 if (!peer) {
1553                         err = -EINVAL;
1554                         goto get_mem_obj_from_handle_failed;
1555                 }
1556
1557                 peer_pdd = kfd_get_process_device_data(peer, p);
1558                 if (!peer_pdd) {
1559                         err = -ENODEV;
1560                         goto get_mem_obj_from_handle_failed;
1561                 }
1562                 err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1563                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
1564                 if (err) {
1565                         pr_err("Failed to unmap from gpu %d/%d\n",
1566                                i, args->n_devices);
1567                         goto unmap_memory_from_gpu_failed;
1568                 }
1569                 args->n_success = i+1;
1570         }
1571         mutex_unlock(&p->mutex);
1572
1573         if (dev->device_info->asic_family == CHIP_ALDEBARAN) {
1574                 err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd,
1575                                 (struct kgd_mem *) mem, true);
1576                 if (err) {
1577                         pr_debug("Sync memory failed, wait interrupted by user signal\n");
1578                         goto sync_memory_failed;
1579                 }
1580
1581                 /* Flush TLBs after waiting for the page table updates to complete */
1582                 for (i = 0; i < args->n_devices; i++) {
1583                         peer = kfd_device_by_id(devices_arr[i]);
1584                         if (WARN_ON_ONCE(!peer))
1585                                 continue;
1586                         peer_pdd = kfd_get_process_device_data(peer, p);
1587                         if (WARN_ON_ONCE(!peer_pdd))
1588                                 continue;
1589                         kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
1590                 }
1591         }
1592         kfree(devices_arr);
1593
1594         return 0;
1595
1596 bind_process_to_device_failed:
1597 get_mem_obj_from_handle_failed:
1598 unmap_memory_from_gpu_failed:
1599         mutex_unlock(&p->mutex);
1600 copy_from_user_failed:
1601 sync_memory_failed:
1602         kfree(devices_arr);
1603         return err;
1604 }
1605
1606 static int kfd_ioctl_alloc_queue_gws(struct file *filep,
1607                 struct kfd_process *p, void *data)
1608 {
1609         int retval;
1610         struct kfd_ioctl_alloc_queue_gws_args *args = data;
1611         struct queue *q;
1612         struct kfd_dev *dev;
1613
1614         mutex_lock(&p->mutex);
1615         q = pqm_get_user_queue(&p->pqm, args->queue_id);
1616
1617         if (q) {
1618                 dev = q->device;
1619         } else {
1620                 retval = -EINVAL;
1621                 goto out_unlock;
1622         }
1623
1624         if (!dev->gws) {
1625                 retval = -ENODEV;
1626                 goto out_unlock;
1627         }
1628
1629         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1630                 retval = -ENODEV;
1631                 goto out_unlock;
1632         }
1633
1634         retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
1635         mutex_unlock(&p->mutex);
1636
1637         args->first_gws = 0;
1638         return retval;
1639
1640 out_unlock:
1641         mutex_unlock(&p->mutex);
1642         return retval;
1643 }
1644
1645 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1646                 struct kfd_process *p, void *data)
1647 {
1648         struct kfd_ioctl_get_dmabuf_info_args *args = data;
1649         struct kfd_dev *dev = NULL;
1650         struct kgd_dev *dma_buf_kgd;
1651         void *metadata_buffer = NULL;
1652         uint32_t flags;
1653         unsigned int i;
1654         int r;
1655
1656         /* Find a KFD GPU device that supports the get_dmabuf_info query */
1657         for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1658                 if (dev)
1659                         break;
1660         if (!dev)
1661                 return -EINVAL;
1662
1663         if (args->metadata_ptr) {
1664                 metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1665                 if (!metadata_buffer)
1666                         return -ENOMEM;
1667         }
1668
1669         /* Get dmabuf info from KGD */
1670         r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
1671                                           &dma_buf_kgd, &args->size,
1672                                           metadata_buffer, args->metadata_size,
1673                                           &args->metadata_size, &flags);
1674         if (r)
1675                 goto exit;
1676
1677         /* Reverse-lookup gpu_id from kgd pointer */
1678         dev = kfd_device_by_kgd(dma_buf_kgd);
1679         if (!dev) {
1680                 r = -EINVAL;
1681                 goto exit;
1682         }
1683         args->gpu_id = dev->id;
1684         args->flags = flags;
1685
1686         /* Copy metadata buffer to user mode */
1687         if (metadata_buffer) {
1688                 r = copy_to_user((void __user *)args->metadata_ptr,
1689                                  metadata_buffer, args->metadata_size);
1690                 if (r != 0)
1691                         r = -EFAULT;
1692         }
1693
1694 exit:
1695         kfree(metadata_buffer);
1696
1697         return r;
1698 }
1699
1700 static int kfd_ioctl_import_dmabuf(struct file *filep,
1701                                    struct kfd_process *p, void *data)
1702 {
1703         struct kfd_ioctl_import_dmabuf_args *args = data;
1704         struct kfd_process_device *pdd;
1705         struct dma_buf *dmabuf;
1706         struct kfd_dev *dev;
1707         int idr_handle;
1708         uint64_t size;
1709         void *mem;
1710         int r;
1711
1712         dev = kfd_device_by_id(args->gpu_id);
1713         if (!dev)
1714                 return -EINVAL;
1715
1716         dmabuf = dma_buf_get(args->dmabuf_fd);
1717         if (IS_ERR(dmabuf))
1718                 return PTR_ERR(dmabuf);
1719
1720         mutex_lock(&p->mutex);
1721
1722         pdd = kfd_bind_process_to_device(dev, p);
1723         if (IS_ERR(pdd)) {
1724                 r = PTR_ERR(pdd);
1725                 goto err_unlock;
1726         }
1727
1728         r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
1729                                               args->va_addr, pdd->drm_priv,
1730                                               (struct kgd_mem **)&mem, &size,
1731                                               NULL);
1732         if (r)
1733                 goto err_unlock;
1734
1735         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1736         if (idr_handle < 0) {
1737                 r = -EFAULT;
1738                 goto err_free;
1739         }
1740
1741         mutex_unlock(&p->mutex);
1742         dma_buf_put(dmabuf);
1743
1744         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1745
1746         return 0;
1747
1748 err_free:
1749         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
1750                                                pdd->drm_priv, NULL);
1751 err_unlock:
1752         mutex_unlock(&p->mutex);
1753         dma_buf_put(dmabuf);
1754         return r;
1755 }
1756
1757 /* Handle requests for watching SMI events */
1758 static int kfd_ioctl_smi_events(struct file *filep,
1759                                 struct kfd_process *p, void *data)
1760 {
1761         struct kfd_ioctl_smi_events_args *args = data;
1762         struct kfd_dev *dev;
1763
1764         dev = kfd_device_by_id(args->gpuid);
1765         if (!dev)
1766                 return -EINVAL;
1767
1768         return kfd_smi_event_open(dev, &args->anon_fd);
1769 }
1770
1771 static int kfd_ioctl_set_xnack_mode(struct file *filep,
1772                                     struct kfd_process *p, void *data)
1773 {
1774         struct kfd_ioctl_set_xnack_mode_args *args = data;
1775         int r = 0;
1776
1777         mutex_lock(&p->mutex);
1778         if (args->xnack_enabled >= 0) {
1779                 if (!list_empty(&p->pqm.queues)) {
1780                         pr_debug("Process has user queues running\n");
1781                         mutex_unlock(&p->mutex);
1782                         return -EBUSY;
1783                 }
1784                 if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
1785                         r = -EPERM;
1786                 else
1787                         p->xnack_enabled = args->xnack_enabled;
1788         } else {
1789                 args->xnack_enabled = p->xnack_enabled;
1790         }
1791         mutex_unlock(&p->mutex);
1792
1793         return r;
1794 }
1795
1796 #if IS_ENABLED(CONFIG_HSA_AMD_SVM)
1797 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
1798 {
1799         struct kfd_ioctl_svm_args *args = data;
1800         int r = 0;
1801
1802         pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
1803                  args->start_addr, args->size, args->op, args->nattr);
1804
1805         if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
1806                 return -EINVAL;
1807         if (!args->start_addr || !args->size)
1808                 return -EINVAL;
1809
1810         mutex_lock(&p->mutex);
1811
1812         r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
1813                       args->attrs);
1814
1815         mutex_unlock(&p->mutex);
1816
1817         return r;
1818 }
1819 #else
1820 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
1821 {
1822         return -EPERM;
1823 }
1824 #endif
1825
1826 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1827         [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1828                             .cmd_drv = 0, .name = #ioctl}
1829
1830 /** Ioctl table */
1831 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1832         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1833                         kfd_ioctl_get_version, 0),
1834
1835         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1836                         kfd_ioctl_create_queue, 0),
1837
1838         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1839                         kfd_ioctl_destroy_queue, 0),
1840
1841         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1842                         kfd_ioctl_set_memory_policy, 0),
1843
1844         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1845                         kfd_ioctl_get_clock_counters, 0),
1846
1847         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1848                         kfd_ioctl_get_process_apertures, 0),
1849
1850         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1851                         kfd_ioctl_update_queue, 0),
1852
1853         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1854                         kfd_ioctl_create_event, 0),
1855
1856         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1857                         kfd_ioctl_destroy_event, 0),
1858
1859         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1860                         kfd_ioctl_set_event, 0),
1861
1862         AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1863                         kfd_ioctl_reset_event, 0),
1864
1865         AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1866                         kfd_ioctl_wait_events, 0),
1867
1868         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1869                         kfd_ioctl_dbg_register, 0),
1870
1871         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1872                         kfd_ioctl_dbg_unregister, 0),
1873
1874         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1875                         kfd_ioctl_dbg_address_watch, 0),
1876
1877         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1878                         kfd_ioctl_dbg_wave_control, 0),
1879
1880         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1881                         kfd_ioctl_set_scratch_backing_va, 0),
1882
1883         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1884                         kfd_ioctl_get_tile_config, 0),
1885
1886         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1887                         kfd_ioctl_set_trap_handler, 0),
1888
1889         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1890                         kfd_ioctl_get_process_apertures_new, 0),
1891
1892         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1893                         kfd_ioctl_acquire_vm, 0),
1894
1895         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1896                         kfd_ioctl_alloc_memory_of_gpu, 0),
1897
1898         AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1899                         kfd_ioctl_free_memory_of_gpu, 0),
1900
1901         AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1902                         kfd_ioctl_map_memory_to_gpu, 0),
1903
1904         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1905                         kfd_ioctl_unmap_memory_from_gpu, 0),
1906
1907         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1908                         kfd_ioctl_set_cu_mask, 0),
1909
1910         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
1911                         kfd_ioctl_get_queue_wave_state, 0),
1912
1913         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
1914                                 kfd_ioctl_get_dmabuf_info, 0),
1915
1916         AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
1917                                 kfd_ioctl_import_dmabuf, 0),
1918
1919         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
1920                         kfd_ioctl_alloc_queue_gws, 0),
1921
1922         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
1923                         kfd_ioctl_smi_events, 0),
1924
1925         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
1926
1927         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
1928                         kfd_ioctl_set_xnack_mode, 0),
1929 };
1930
1931 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
1932
1933 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1934 {
1935         struct kfd_process *process;
1936         amdkfd_ioctl_t *func;
1937         const struct amdkfd_ioctl_desc *ioctl = NULL;
1938         unsigned int nr = _IOC_NR(cmd);
1939         char stack_kdata[128];
1940         char *kdata = NULL;
1941         unsigned int usize, asize;
1942         int retcode = -EINVAL;
1943
1944         if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1945                 goto err_i1;
1946
1947         if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1948                 u32 amdkfd_size;
1949
1950                 ioctl = &amdkfd_ioctls[nr];
1951
1952                 amdkfd_size = _IOC_SIZE(ioctl->cmd);
1953                 usize = asize = _IOC_SIZE(cmd);
1954                 if (amdkfd_size > asize)
1955                         asize = amdkfd_size;
1956
1957                 cmd = ioctl->cmd;
1958         } else
1959                 goto err_i1;
1960
1961         dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
1962
1963         /* Get the process struct from the filep. Only the process
1964          * that opened /dev/kfd can use the file descriptor. Child
1965          * processes need to create their own KFD device context.
1966          */
1967         process = filep->private_data;
1968         if (process->lead_thread != current->group_leader) {
1969                 dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
1970                 retcode = -EBADF;
1971                 goto err_i1;
1972         }
1973
1974         /* Do not trust userspace, use our own definition */
1975         func = ioctl->func;
1976
1977         if (unlikely(!func)) {
1978                 dev_dbg(kfd_device, "no function\n");
1979                 retcode = -EINVAL;
1980                 goto err_i1;
1981         }
1982
1983         if (cmd & (IOC_IN | IOC_OUT)) {
1984                 if (asize <= sizeof(stack_kdata)) {
1985                         kdata = stack_kdata;
1986                 } else {
1987                         kdata = kmalloc(asize, GFP_KERNEL);
1988                         if (!kdata) {
1989                                 retcode = -ENOMEM;
1990                                 goto err_i1;
1991                         }
1992                 }
1993                 if (asize > usize)
1994                         memset(kdata + usize, 0, asize - usize);
1995         }
1996
1997         if (cmd & IOC_IN) {
1998                 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1999                         retcode = -EFAULT;
2000                         goto err_i1;
2001                 }
2002         } else if (cmd & IOC_OUT) {
2003                 memset(kdata, 0, usize);
2004         }
2005
2006         retcode = func(filep, process, kdata);
2007
2008         if (cmd & IOC_OUT)
2009                 if (copy_to_user((void __user *)arg, kdata, usize) != 0)
2010                         retcode = -EFAULT;
2011
2012 err_i1:
2013         if (!ioctl)
2014                 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
2015                           task_pid_nr(current), cmd, nr);
2016
2017         if (kdata != stack_kdata)
2018                 kfree(kdata);
2019
2020         if (retcode)
2021                 dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
2022                                 nr, arg, retcode);
2023
2024         return retcode;
2025 }
2026
2027 static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
2028                       struct vm_area_struct *vma)
2029 {
2030         phys_addr_t address;
2031         int ret;
2032
2033         if (vma->vm_end - vma->vm_start != PAGE_SIZE)
2034                 return -EINVAL;
2035
2036         address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
2037
2038         vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
2039                                 VM_DONTDUMP | VM_PFNMAP;
2040
2041         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
2042
2043         pr_debug("pasid 0x%x mapping mmio page\n"
2044                  "     target user address == 0x%08llX\n"
2045                  "     physical address    == 0x%08llX\n"
2046                  "     vm_flags            == 0x%04lX\n"
2047                  "     size                == 0x%04lX\n",
2048                  process->pasid, (unsigned long long) vma->vm_start,
2049                  address, vma->vm_flags, PAGE_SIZE);
2050
2051         ret = io_remap_pfn_range(vma,
2052                                 vma->vm_start,
2053                                 address >> PAGE_SHIFT,
2054                                 PAGE_SIZE,
2055                                 vma->vm_page_prot);
2056         return ret;
2057 }
2058
2059
2060 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
2061 {
2062         struct kfd_process *process;
2063         struct kfd_dev *dev = NULL;
2064         unsigned long mmap_offset;
2065         unsigned int gpu_id;
2066
2067         process = kfd_get_process(current);
2068         if (IS_ERR(process))
2069                 return PTR_ERR(process);
2070
2071         mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
2072         gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
2073         if (gpu_id)
2074                 dev = kfd_device_by_id(gpu_id);
2075
2076         switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
2077         case KFD_MMAP_TYPE_DOORBELL:
2078                 if (!dev)
2079                         return -ENODEV;
2080                 return kfd_doorbell_mmap(dev, process, vma);
2081
2082         case KFD_MMAP_TYPE_EVENTS:
2083                 return kfd_event_mmap(process, vma);
2084
2085         case KFD_MMAP_TYPE_RESERVED_MEM:
2086                 if (!dev)
2087                         return -ENODEV;
2088                 return kfd_reserved_mem_mmap(dev, process, vma);
2089         case KFD_MMAP_TYPE_MMIO:
2090                 if (!dev)
2091                         return -ENODEV;
2092                 return kfd_mmio_mmap(dev, process, vma);
2093         }
2094
2095         return -EFAULT;
2096 }