Merge tag 'i3c/for-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdkfd / kfd_chardev.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/device.h>
24 #include <linux/export.h>
25 #include <linux/err.h>
26 #include <linux/fs.h>
27 #include <linux/file.h>
28 #include <linux/sched.h>
29 #include <linux/slab.h>
30 #include <linux/uaccess.h>
31 #include <linux/compat.h>
32 #include <uapi/linux/kfd_ioctl.h>
33 #include <linux/time.h>
34 #include <linux/mm.h>
35 #include <linux/mman.h>
36 #include <linux/dma-buf.h>
37 #include <asm/processor.h>
38 #include "kfd_priv.h"
39 #include "kfd_device_queue_manager.h"
40 #include "kfd_dbgmgr.h"
41 #include "amdgpu_amdkfd.h"
42 #include "kfd_smi_events.h"
43
44 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
45 static int kfd_open(struct inode *, struct file *);
46 static int kfd_release(struct inode *, struct file *);
47 static int kfd_mmap(struct file *, struct vm_area_struct *);
48
49 static const char kfd_dev_name[] = "kfd";
50
51 static const struct file_operations kfd_fops = {
52         .owner = THIS_MODULE,
53         .unlocked_ioctl = kfd_ioctl,
54         .compat_ioctl = compat_ptr_ioctl,
55         .open = kfd_open,
56         .release = kfd_release,
57         .mmap = kfd_mmap,
58 };
59
60 static int kfd_char_dev_major = -1;
61 static struct class *kfd_class;
62 struct device *kfd_device;
63
64 int kfd_chardev_init(void)
65 {
66         int err = 0;
67
68         kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
69         err = kfd_char_dev_major;
70         if (err < 0)
71                 goto err_register_chrdev;
72
73         kfd_class = class_create(THIS_MODULE, kfd_dev_name);
74         err = PTR_ERR(kfd_class);
75         if (IS_ERR(kfd_class))
76                 goto err_class_create;
77
78         kfd_device = device_create(kfd_class, NULL,
79                                         MKDEV(kfd_char_dev_major, 0),
80                                         NULL, kfd_dev_name);
81         err = PTR_ERR(kfd_device);
82         if (IS_ERR(kfd_device))
83                 goto err_device_create;
84
85         return 0;
86
87 err_device_create:
88         class_destroy(kfd_class);
89 err_class_create:
90         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
91 err_register_chrdev:
92         return err;
93 }
94
95 void kfd_chardev_exit(void)
96 {
97         device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
98         class_destroy(kfd_class);
99         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
100         kfd_device = NULL;
101 }
102
103 struct device *kfd_chardev(void)
104 {
105         return kfd_device;
106 }
107
108
109 static int kfd_open(struct inode *inode, struct file *filep)
110 {
111         struct kfd_process *process;
112         bool is_32bit_user_mode;
113
114         if (iminor(inode) != 0)
115                 return -ENODEV;
116
117         is_32bit_user_mode = in_compat_syscall();
118
119         if (is_32bit_user_mode) {
120                 dev_warn(kfd_device,
121                         "Process %d (32-bit) failed to open /dev/kfd\n"
122                         "32-bit processes are not supported by amdkfd\n",
123                         current->pid);
124                 return -EPERM;
125         }
126
127         process = kfd_create_process(filep);
128         if (IS_ERR(process))
129                 return PTR_ERR(process);
130
131         if (kfd_is_locked()) {
132                 dev_dbg(kfd_device, "kfd is locked!\n"
133                                 "process %d unreferenced", process->pasid);
134                 kfd_unref_process(process);
135                 return -EAGAIN;
136         }
137
138         /* filep now owns the reference returned by kfd_create_process */
139         filep->private_data = process;
140
141         dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
142                 process->pasid, process->is_32bit_user_mode);
143
144         return 0;
145 }
146
147 static int kfd_release(struct inode *inode, struct file *filep)
148 {
149         struct kfd_process *process = filep->private_data;
150
151         if (process)
152                 kfd_unref_process(process);
153
154         return 0;
155 }
156
157 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
158                                         void *data)
159 {
160         struct kfd_ioctl_get_version_args *args = data;
161
162         args->major_version = KFD_IOCTL_MAJOR_VERSION;
163         args->minor_version = KFD_IOCTL_MINOR_VERSION;
164
165         return 0;
166 }
167
168 static int set_queue_properties_from_user(struct queue_properties *q_properties,
169                                 struct kfd_ioctl_create_queue_args *args)
170 {
171         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
172                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
173                 return -EINVAL;
174         }
175
176         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
177                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
178                 return -EINVAL;
179         }
180
181         if ((args->ring_base_address) &&
182                 (!access_ok((const void __user *) args->ring_base_address,
183                         sizeof(uint64_t)))) {
184                 pr_err("Can't access ring base address\n");
185                 return -EFAULT;
186         }
187
188         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
189                 pr_err("Ring size must be a power of 2 or 0\n");
190                 return -EINVAL;
191         }
192
193         if (!access_ok((const void __user *) args->read_pointer_address,
194                         sizeof(uint32_t))) {
195                 pr_err("Can't access read pointer\n");
196                 return -EFAULT;
197         }
198
199         if (!access_ok((const void __user *) args->write_pointer_address,
200                         sizeof(uint32_t))) {
201                 pr_err("Can't access write pointer\n");
202                 return -EFAULT;
203         }
204
205         if (args->eop_buffer_address &&
206                 !access_ok((const void __user *) args->eop_buffer_address,
207                         sizeof(uint32_t))) {
208                 pr_debug("Can't access eop buffer");
209                 return -EFAULT;
210         }
211
212         if (args->ctx_save_restore_address &&
213                 !access_ok((const void __user *) args->ctx_save_restore_address,
214                         sizeof(uint32_t))) {
215                 pr_debug("Can't access ctx save restore buffer");
216                 return -EFAULT;
217         }
218
219         q_properties->is_interop = false;
220         q_properties->is_gws = false;
221         q_properties->queue_percent = args->queue_percentage;
222         q_properties->priority = args->queue_priority;
223         q_properties->queue_address = args->ring_base_address;
224         q_properties->queue_size = args->ring_size;
225         q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
226         q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
227         q_properties->eop_ring_buffer_address = args->eop_buffer_address;
228         q_properties->eop_ring_buffer_size = args->eop_buffer_size;
229         q_properties->ctx_save_restore_area_address =
230                         args->ctx_save_restore_address;
231         q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
232         q_properties->ctl_stack_size = args->ctl_stack_size;
233         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
234                 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
235                 q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
236         else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
237                 q_properties->type = KFD_QUEUE_TYPE_SDMA;
238         else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
239                 q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
240         else
241                 return -ENOTSUPP;
242
243         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
244                 q_properties->format = KFD_QUEUE_FORMAT_AQL;
245         else
246                 q_properties->format = KFD_QUEUE_FORMAT_PM4;
247
248         pr_debug("Queue Percentage: %d, %d\n",
249                         q_properties->queue_percent, args->queue_percentage);
250
251         pr_debug("Queue Priority: %d, %d\n",
252                         q_properties->priority, args->queue_priority);
253
254         pr_debug("Queue Address: 0x%llX, 0x%llX\n",
255                         q_properties->queue_address, args->ring_base_address);
256
257         pr_debug("Queue Size: 0x%llX, %u\n",
258                         q_properties->queue_size, args->ring_size);
259
260         pr_debug("Queue r/w Pointers: %px, %px\n",
261                         q_properties->read_ptr,
262                         q_properties->write_ptr);
263
264         pr_debug("Queue Format: %d\n", q_properties->format);
265
266         pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
267
268         pr_debug("Queue CTX save area: 0x%llX\n",
269                         q_properties->ctx_save_restore_area_address);
270
271         return 0;
272 }
273
274 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
275                                         void *data)
276 {
277         struct kfd_ioctl_create_queue_args *args = data;
278         struct kfd_dev *dev;
279         int err = 0;
280         unsigned int queue_id;
281         struct kfd_process_device *pdd;
282         struct queue_properties q_properties;
283         uint32_t doorbell_offset_in_process = 0;
284
285         memset(&q_properties, 0, sizeof(struct queue_properties));
286
287         pr_debug("Creating queue ioctl\n");
288
289         err = set_queue_properties_from_user(&q_properties, args);
290         if (err)
291                 return err;
292
293         pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
294         dev = kfd_device_by_id(args->gpu_id);
295         if (!dev) {
296                 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
297                 return -EINVAL;
298         }
299
300         mutex_lock(&p->mutex);
301
302         pdd = kfd_bind_process_to_device(dev, p);
303         if (IS_ERR(pdd)) {
304                 err = -ESRCH;
305                 goto err_bind_process;
306         }
307
308         pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
309                         p->pasid,
310                         dev->id);
311
312         err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
313                         &doorbell_offset_in_process);
314         if (err != 0)
315                 goto err_create_queue;
316
317         args->queue_id = queue_id;
318
319
320         /* Return gpu_id as doorbell offset for mmap usage */
321         args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
322         args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
323         if (KFD_IS_SOC15(dev->device_info->asic_family))
324                 /* On SOC15 ASICs, include the doorbell offset within the
325                  * process doorbell frame, which is 2 pages.
326                  */
327                 args->doorbell_offset |= doorbell_offset_in_process;
328
329         mutex_unlock(&p->mutex);
330
331         pr_debug("Queue id %d was created successfully\n", args->queue_id);
332
333         pr_debug("Ring buffer address == 0x%016llX\n",
334                         args->ring_base_address);
335
336         pr_debug("Read ptr address    == 0x%016llX\n",
337                         args->read_pointer_address);
338
339         pr_debug("Write ptr address   == 0x%016llX\n",
340                         args->write_pointer_address);
341
342         return 0;
343
344 err_create_queue:
345 err_bind_process:
346         mutex_unlock(&p->mutex);
347         return err;
348 }
349
350 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
351                                         void *data)
352 {
353         int retval;
354         struct kfd_ioctl_destroy_queue_args *args = data;
355
356         pr_debug("Destroying queue id %d for pasid 0x%x\n",
357                                 args->queue_id,
358                                 p->pasid);
359
360         mutex_lock(&p->mutex);
361
362         retval = pqm_destroy_queue(&p->pqm, args->queue_id);
363
364         mutex_unlock(&p->mutex);
365         return retval;
366 }
367
368 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
369                                         void *data)
370 {
371         int retval;
372         struct kfd_ioctl_update_queue_args *args = data;
373         struct queue_properties properties;
374
375         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
376                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
377                 return -EINVAL;
378         }
379
380         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
381                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
382                 return -EINVAL;
383         }
384
385         if ((args->ring_base_address) &&
386                 (!access_ok((const void __user *) args->ring_base_address,
387                         sizeof(uint64_t)))) {
388                 pr_err("Can't access ring base address\n");
389                 return -EFAULT;
390         }
391
392         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
393                 pr_err("Ring size must be a power of 2 or 0\n");
394                 return -EINVAL;
395         }
396
397         properties.queue_address = args->ring_base_address;
398         properties.queue_size = args->ring_size;
399         properties.queue_percent = args->queue_percentage;
400         properties.priority = args->queue_priority;
401
402         pr_debug("Updating queue id %d for pasid 0x%x\n",
403                         args->queue_id, p->pasid);
404
405         mutex_lock(&p->mutex);
406
407         retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
408
409         mutex_unlock(&p->mutex);
410
411         return retval;
412 }
413
414 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
415                                         void *data)
416 {
417         int retval;
418         const int max_num_cus = 1024;
419         struct kfd_ioctl_set_cu_mask_args *args = data;
420         struct queue_properties properties;
421         uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
422         size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
423
424         if ((args->num_cu_mask % 32) != 0) {
425                 pr_debug("num_cu_mask 0x%x must be a multiple of 32",
426                                 args->num_cu_mask);
427                 return -EINVAL;
428         }
429
430         properties.cu_mask_count = args->num_cu_mask;
431         if (properties.cu_mask_count == 0) {
432                 pr_debug("CU mask cannot be 0");
433                 return -EINVAL;
434         }
435
436         /* To prevent an unreasonably large CU mask size, set an arbitrary
437          * limit of max_num_cus bits.  We can then just drop any CU mask bits
438          * past max_num_cus bits and just use the first max_num_cus bits.
439          */
440         if (properties.cu_mask_count > max_num_cus) {
441                 pr_debug("CU mask cannot be greater than 1024 bits");
442                 properties.cu_mask_count = max_num_cus;
443                 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
444         }
445
446         properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
447         if (!properties.cu_mask)
448                 return -ENOMEM;
449
450         retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
451         if (retval) {
452                 pr_debug("Could not copy CU mask from userspace");
453                 kfree(properties.cu_mask);
454                 return -EFAULT;
455         }
456
457         mutex_lock(&p->mutex);
458
459         retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
460
461         mutex_unlock(&p->mutex);
462
463         if (retval)
464                 kfree(properties.cu_mask);
465
466         return retval;
467 }
468
469 static int kfd_ioctl_get_queue_wave_state(struct file *filep,
470                                           struct kfd_process *p, void *data)
471 {
472         struct kfd_ioctl_get_queue_wave_state_args *args = data;
473         int r;
474
475         mutex_lock(&p->mutex);
476
477         r = pqm_get_wave_state(&p->pqm, args->queue_id,
478                                (void __user *)args->ctl_stack_address,
479                                &args->ctl_stack_used_size,
480                                &args->save_area_used_size);
481
482         mutex_unlock(&p->mutex);
483
484         return r;
485 }
486
487 static int kfd_ioctl_set_memory_policy(struct file *filep,
488                                         struct kfd_process *p, void *data)
489 {
490         struct kfd_ioctl_set_memory_policy_args *args = data;
491         struct kfd_dev *dev;
492         int err = 0;
493         struct kfd_process_device *pdd;
494         enum cache_policy default_policy, alternate_policy;
495
496         if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
497             && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
498                 return -EINVAL;
499         }
500
501         if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
502             && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
503                 return -EINVAL;
504         }
505
506         dev = kfd_device_by_id(args->gpu_id);
507         if (!dev)
508                 return -EINVAL;
509
510         mutex_lock(&p->mutex);
511
512         pdd = kfd_bind_process_to_device(dev, p);
513         if (IS_ERR(pdd)) {
514                 err = -ESRCH;
515                 goto out;
516         }
517
518         default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
519                          ? cache_policy_coherent : cache_policy_noncoherent;
520
521         alternate_policy =
522                 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
523                    ? cache_policy_coherent : cache_policy_noncoherent;
524
525         if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
526                                 &pdd->qpd,
527                                 default_policy,
528                                 alternate_policy,
529                                 (void __user *)args->alternate_aperture_base,
530                                 args->alternate_aperture_size))
531                 err = -EINVAL;
532
533 out:
534         mutex_unlock(&p->mutex);
535
536         return err;
537 }
538
539 static int kfd_ioctl_set_trap_handler(struct file *filep,
540                                         struct kfd_process *p, void *data)
541 {
542         struct kfd_ioctl_set_trap_handler_args *args = data;
543         struct kfd_dev *dev;
544         int err = 0;
545         struct kfd_process_device *pdd;
546
547         dev = kfd_device_by_id(args->gpu_id);
548         if (!dev)
549                 return -EINVAL;
550
551         mutex_lock(&p->mutex);
552
553         pdd = kfd_bind_process_to_device(dev, p);
554         if (IS_ERR(pdd)) {
555                 err = -ESRCH;
556                 goto out;
557         }
558
559         kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);
560
561 out:
562         mutex_unlock(&p->mutex);
563
564         return err;
565 }
566
567 static int kfd_ioctl_dbg_register(struct file *filep,
568                                 struct kfd_process *p, void *data)
569 {
570         struct kfd_ioctl_dbg_register_args *args = data;
571         struct kfd_dev *dev;
572         struct kfd_dbgmgr *dbgmgr_ptr;
573         struct kfd_process_device *pdd;
574         bool create_ok;
575         long status = 0;
576
577         dev = kfd_device_by_id(args->gpu_id);
578         if (!dev)
579                 return -EINVAL;
580
581         if (dev->device_info->asic_family == CHIP_CARRIZO) {
582                 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
583                 return -EINVAL;
584         }
585
586         mutex_lock(&p->mutex);
587         mutex_lock(kfd_get_dbgmgr_mutex());
588
589         /*
590          * make sure that we have pdd, if this the first queue created for
591          * this process
592          */
593         pdd = kfd_bind_process_to_device(dev, p);
594         if (IS_ERR(pdd)) {
595                 status = PTR_ERR(pdd);
596                 goto out;
597         }
598
599         if (!dev->dbgmgr) {
600                 /* In case of a legal call, we have no dbgmgr yet */
601                 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
602                 if (create_ok) {
603                         status = kfd_dbgmgr_register(dbgmgr_ptr, p);
604                         if (status != 0)
605                                 kfd_dbgmgr_destroy(dbgmgr_ptr);
606                         else
607                                 dev->dbgmgr = dbgmgr_ptr;
608                 }
609         } else {
610                 pr_debug("debugger already registered\n");
611                 status = -EINVAL;
612         }
613
614 out:
615         mutex_unlock(kfd_get_dbgmgr_mutex());
616         mutex_unlock(&p->mutex);
617
618         return status;
619 }
620
621 static int kfd_ioctl_dbg_unregister(struct file *filep,
622                                 struct kfd_process *p, void *data)
623 {
624         struct kfd_ioctl_dbg_unregister_args *args = data;
625         struct kfd_dev *dev;
626         long status;
627
628         dev = kfd_device_by_id(args->gpu_id);
629         if (!dev || !dev->dbgmgr)
630                 return -EINVAL;
631
632         if (dev->device_info->asic_family == CHIP_CARRIZO) {
633                 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
634                 return -EINVAL;
635         }
636
637         mutex_lock(kfd_get_dbgmgr_mutex());
638
639         status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
640         if (!status) {
641                 kfd_dbgmgr_destroy(dev->dbgmgr);
642                 dev->dbgmgr = NULL;
643         }
644
645         mutex_unlock(kfd_get_dbgmgr_mutex());
646
647         return status;
648 }
649
650 /*
651  * Parse and generate variable size data structure for address watch.
652  * Total size of the buffer and # watch points is limited in order
653  * to prevent kernel abuse. (no bearing to the much smaller HW limitation
654  * which is enforced by dbgdev module)
655  * please also note that the watch address itself are not "copied from user",
656  * since it be set into the HW in user mode values.
657  *
658  */
659 static int kfd_ioctl_dbg_address_watch(struct file *filep,
660                                         struct kfd_process *p, void *data)
661 {
662         struct kfd_ioctl_dbg_address_watch_args *args = data;
663         struct kfd_dev *dev;
664         struct dbg_address_watch_info aw_info;
665         unsigned char *args_buff;
666         long status;
667         void __user *cmd_from_user;
668         uint64_t watch_mask_value = 0;
669         unsigned int args_idx = 0;
670
671         memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
672
673         dev = kfd_device_by_id(args->gpu_id);
674         if (!dev)
675                 return -EINVAL;
676
677         if (dev->device_info->asic_family == CHIP_CARRIZO) {
678                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
679                 return -EINVAL;
680         }
681
682         cmd_from_user = (void __user *) args->content_ptr;
683
684         /* Validate arguments */
685
686         if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
687                 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
688                 (cmd_from_user == NULL))
689                 return -EINVAL;
690
691         /* this is the actual buffer to work with */
692         args_buff = memdup_user(cmd_from_user,
693                                 args->buf_size_in_bytes - sizeof(*args));
694         if (IS_ERR(args_buff))
695                 return PTR_ERR(args_buff);
696
697         aw_info.process = p;
698
699         aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
700         args_idx += sizeof(aw_info.num_watch_points);
701
702         aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
703         args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
704
705         /*
706          * set watch address base pointer to point on the array base
707          * within args_buff
708          */
709         aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
710
711         /* skip over the addresses buffer */
712         args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
713
714         if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
715                 status = -EINVAL;
716                 goto out;
717         }
718
719         watch_mask_value = (uint64_t) args_buff[args_idx];
720
721         if (watch_mask_value > 0) {
722                 /*
723                  * There is an array of masks.
724                  * set watch mask base pointer to point on the array base
725                  * within args_buff
726                  */
727                 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
728
729                 /* skip over the masks buffer */
730                 args_idx += sizeof(aw_info.watch_mask) *
731                                 aw_info.num_watch_points;
732         } else {
733                 /* just the NULL mask, set to NULL and skip over it */
734                 aw_info.watch_mask = NULL;
735                 args_idx += sizeof(aw_info.watch_mask);
736         }
737
738         if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
739                 status = -EINVAL;
740                 goto out;
741         }
742
743         /* Currently HSA Event is not supported for DBG */
744         aw_info.watch_event = NULL;
745
746         mutex_lock(kfd_get_dbgmgr_mutex());
747
748         status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
749
750         mutex_unlock(kfd_get_dbgmgr_mutex());
751
752 out:
753         kfree(args_buff);
754
755         return status;
756 }
757
758 /* Parse and generate fixed size data structure for wave control */
759 static int kfd_ioctl_dbg_wave_control(struct file *filep,
760                                         struct kfd_process *p, void *data)
761 {
762         struct kfd_ioctl_dbg_wave_control_args *args = data;
763         struct kfd_dev *dev;
764         struct dbg_wave_control_info wac_info;
765         unsigned char *args_buff;
766         uint32_t computed_buff_size;
767         long status;
768         void __user *cmd_from_user;
769         unsigned int args_idx = 0;
770
771         memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
772
773         /* we use compact form, independent of the packing attribute value */
774         computed_buff_size = sizeof(*args) +
775                                 sizeof(wac_info.mode) +
776                                 sizeof(wac_info.operand) +
777                                 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
778                                 sizeof(wac_info.dbgWave_msg.MemoryVA) +
779                                 sizeof(wac_info.trapId);
780
781         dev = kfd_device_by_id(args->gpu_id);
782         if (!dev)
783                 return -EINVAL;
784
785         if (dev->device_info->asic_family == CHIP_CARRIZO) {
786                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
787                 return -EINVAL;
788         }
789
790         /* input size must match the computed "compact" size */
791         if (args->buf_size_in_bytes != computed_buff_size) {
792                 pr_debug("size mismatch, computed : actual %u : %u\n",
793                                 args->buf_size_in_bytes, computed_buff_size);
794                 return -EINVAL;
795         }
796
797         cmd_from_user = (void __user *) args->content_ptr;
798
799         if (cmd_from_user == NULL)
800                 return -EINVAL;
801
802         /* copy the entire buffer from user */
803
804         args_buff = memdup_user(cmd_from_user,
805                                 args->buf_size_in_bytes - sizeof(*args));
806         if (IS_ERR(args_buff))
807                 return PTR_ERR(args_buff);
808
809         /* move ptr to the start of the "pay-load" area */
810         wac_info.process = p;
811
812         wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
813         args_idx += sizeof(wac_info.operand);
814
815         wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
816         args_idx += sizeof(wac_info.mode);
817
818         wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
819         args_idx += sizeof(wac_info.trapId);
820
821         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
822                                         *((uint32_t *)(&args_buff[args_idx]));
823         wac_info.dbgWave_msg.MemoryVA = NULL;
824
825         mutex_lock(kfd_get_dbgmgr_mutex());
826
827         pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
828                         wac_info.process, wac_info.operand,
829                         wac_info.mode, wac_info.trapId,
830                         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
831
832         status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
833
834         pr_debug("Returned status of dbg manager is %ld\n", status);
835
836         mutex_unlock(kfd_get_dbgmgr_mutex());
837
838         kfree(args_buff);
839
840         return status;
841 }
842
843 static int kfd_ioctl_get_clock_counters(struct file *filep,
844                                 struct kfd_process *p, void *data)
845 {
846         struct kfd_ioctl_get_clock_counters_args *args = data;
847         struct kfd_dev *dev;
848
849         dev = kfd_device_by_id(args->gpu_id);
850         if (dev)
851                 /* Reading GPU clock counter from KGD */
852                 args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
853         else
854                 /* Node without GPU resource */
855                 args->gpu_clock_counter = 0;
856
857         /* No access to rdtsc. Using raw monotonic time */
858         args->cpu_clock_counter = ktime_get_raw_ns();
859         args->system_clock_counter = ktime_get_boottime_ns();
860
861         /* Since the counter is in nano-seconds we use 1GHz frequency */
862         args->system_clock_freq = 1000000000;
863
864         return 0;
865 }
866
867
868 static int kfd_ioctl_get_process_apertures(struct file *filp,
869                                 struct kfd_process *p, void *data)
870 {
871         struct kfd_ioctl_get_process_apertures_args *args = data;
872         struct kfd_process_device_apertures *pAperture;
873         int i;
874
875         dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
876
877         args->num_of_nodes = 0;
878
879         mutex_lock(&p->mutex);
880         /* Run over all pdd of the process */
881         for (i = 0; i < p->n_pdds; i++) {
882                 struct kfd_process_device *pdd = p->pdds[i];
883
884                 pAperture =
885                         &args->process_apertures[args->num_of_nodes];
886                 pAperture->gpu_id = pdd->dev->id;
887                 pAperture->lds_base = pdd->lds_base;
888                 pAperture->lds_limit = pdd->lds_limit;
889                 pAperture->gpuvm_base = pdd->gpuvm_base;
890                 pAperture->gpuvm_limit = pdd->gpuvm_limit;
891                 pAperture->scratch_base = pdd->scratch_base;
892                 pAperture->scratch_limit = pdd->scratch_limit;
893
894                 dev_dbg(kfd_device,
895                         "node id %u\n", args->num_of_nodes);
896                 dev_dbg(kfd_device,
897                         "gpu id %u\n", pdd->dev->id);
898                 dev_dbg(kfd_device,
899                         "lds_base %llX\n", pdd->lds_base);
900                 dev_dbg(kfd_device,
901                         "lds_limit %llX\n", pdd->lds_limit);
902                 dev_dbg(kfd_device,
903                         "gpuvm_base %llX\n", pdd->gpuvm_base);
904                 dev_dbg(kfd_device,
905                         "gpuvm_limit %llX\n", pdd->gpuvm_limit);
906                 dev_dbg(kfd_device,
907                         "scratch_base %llX\n", pdd->scratch_base);
908                 dev_dbg(kfd_device,
909                         "scratch_limit %llX\n", pdd->scratch_limit);
910
911                 if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS)
912                         break;
913         }
914         mutex_unlock(&p->mutex);
915
916         return 0;
917 }
918
919 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
920                                 struct kfd_process *p, void *data)
921 {
922         struct kfd_ioctl_get_process_apertures_new_args *args = data;
923         struct kfd_process_device_apertures *pa;
924         int ret;
925         int i;
926
927         dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
928
929         if (args->num_of_nodes == 0) {
930                 /* Return number of nodes, so that user space can alloacate
931                  * sufficient memory
932                  */
933                 mutex_lock(&p->mutex);
934                 args->num_of_nodes = p->n_pdds;
935                 goto out_unlock;
936         }
937
938         /* Fill in process-aperture information for all available
939          * nodes, but not more than args->num_of_nodes as that is
940          * the amount of memory allocated by user
941          */
942         pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
943                                 args->num_of_nodes), GFP_KERNEL);
944         if (!pa)
945                 return -ENOMEM;
946
947         mutex_lock(&p->mutex);
948
949         if (!p->n_pdds) {
950                 args->num_of_nodes = 0;
951                 kfree(pa);
952                 goto out_unlock;
953         }
954
955         /* Run over all pdd of the process */
956         for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) {
957                 struct kfd_process_device *pdd = p->pdds[i];
958
959                 pa[i].gpu_id = pdd->dev->id;
960                 pa[i].lds_base = pdd->lds_base;
961                 pa[i].lds_limit = pdd->lds_limit;
962                 pa[i].gpuvm_base = pdd->gpuvm_base;
963                 pa[i].gpuvm_limit = pdd->gpuvm_limit;
964                 pa[i].scratch_base = pdd->scratch_base;
965                 pa[i].scratch_limit = pdd->scratch_limit;
966
967                 dev_dbg(kfd_device,
968                         "gpu id %u\n", pdd->dev->id);
969                 dev_dbg(kfd_device,
970                         "lds_base %llX\n", pdd->lds_base);
971                 dev_dbg(kfd_device,
972                         "lds_limit %llX\n", pdd->lds_limit);
973                 dev_dbg(kfd_device,
974                         "gpuvm_base %llX\n", pdd->gpuvm_base);
975                 dev_dbg(kfd_device,
976                         "gpuvm_limit %llX\n", pdd->gpuvm_limit);
977                 dev_dbg(kfd_device,
978                         "scratch_base %llX\n", pdd->scratch_base);
979                 dev_dbg(kfd_device,
980                         "scratch_limit %llX\n", pdd->scratch_limit);
981         }
982         mutex_unlock(&p->mutex);
983
984         args->num_of_nodes = i;
985         ret = copy_to_user(
986                         (void __user *)args->kfd_process_device_apertures_ptr,
987                         pa,
988                         (i * sizeof(struct kfd_process_device_apertures)));
989         kfree(pa);
990         return ret ? -EFAULT : 0;
991
992 out_unlock:
993         mutex_unlock(&p->mutex);
994         return 0;
995 }
996
997 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
998                                         void *data)
999 {
1000         struct kfd_ioctl_create_event_args *args = data;
1001         int err;
1002
1003         /* For dGPUs the event page is allocated in user mode. The
1004          * handle is passed to KFD with the first call to this IOCTL
1005          * through the event_page_offset field.
1006          */
1007         if (args->event_page_offset) {
1008                 struct kfd_dev *kfd;
1009                 struct kfd_process_device *pdd;
1010                 void *mem, *kern_addr;
1011                 uint64_t size;
1012
1013                 if (p->signal_page) {
1014                         pr_err("Event page is already set\n");
1015                         return -EINVAL;
1016                 }
1017
1018                 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
1019                 if (!kfd) {
1020                         pr_err("Getting device by id failed in %s\n", __func__);
1021                         return -EINVAL;
1022                 }
1023
1024                 mutex_lock(&p->mutex);
1025                 pdd = kfd_bind_process_to_device(kfd, p);
1026                 if (IS_ERR(pdd)) {
1027                         err = PTR_ERR(pdd);
1028                         goto out_unlock;
1029                 }
1030
1031                 mem = kfd_process_device_translate_handle(pdd,
1032                                 GET_IDR_HANDLE(args->event_page_offset));
1033                 if (!mem) {
1034                         pr_err("Can't find BO, offset is 0x%llx\n",
1035                                args->event_page_offset);
1036                         err = -EINVAL;
1037                         goto out_unlock;
1038                 }
1039                 mutex_unlock(&p->mutex);
1040
1041                 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
1042                                                 mem, &kern_addr, &size);
1043                 if (err) {
1044                         pr_err("Failed to map event page to kernel\n");
1045                         return err;
1046                 }
1047
1048                 err = kfd_event_page_set(p, kern_addr, size);
1049                 if (err) {
1050                         pr_err("Failed to set event page\n");
1051                         return err;
1052                 }
1053         }
1054
1055         err = kfd_event_create(filp, p, args->event_type,
1056                                 args->auto_reset != 0, args->node_id,
1057                                 &args->event_id, &args->event_trigger_data,
1058                                 &args->event_page_offset,
1059                                 &args->event_slot_index);
1060
1061         return err;
1062
1063 out_unlock:
1064         mutex_unlock(&p->mutex);
1065         return err;
1066 }
1067
1068 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1069                                         void *data)
1070 {
1071         struct kfd_ioctl_destroy_event_args *args = data;
1072
1073         return kfd_event_destroy(p, args->event_id);
1074 }
1075
1076 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1077                                 void *data)
1078 {
1079         struct kfd_ioctl_set_event_args *args = data;
1080
1081         return kfd_set_event(p, args->event_id);
1082 }
1083
1084 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1085                                 void *data)
1086 {
1087         struct kfd_ioctl_reset_event_args *args = data;
1088
1089         return kfd_reset_event(p, args->event_id);
1090 }
1091
1092 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1093                                 void *data)
1094 {
1095         struct kfd_ioctl_wait_events_args *args = data;
1096         int err;
1097
1098         err = kfd_wait_on_events(p, args->num_events,
1099                         (void __user *)args->events_ptr,
1100                         (args->wait_for_all != 0),
1101                         args->timeout, &args->wait_result);
1102
1103         return err;
1104 }
1105 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1106                                         struct kfd_process *p, void *data)
1107 {
1108         struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1109         struct kfd_process_device *pdd;
1110         struct kfd_dev *dev;
1111         long err;
1112
1113         dev = kfd_device_by_id(args->gpu_id);
1114         if (!dev)
1115                 return -EINVAL;
1116
1117         mutex_lock(&p->mutex);
1118
1119         pdd = kfd_bind_process_to_device(dev, p);
1120         if (IS_ERR(pdd)) {
1121                 err = PTR_ERR(pdd);
1122                 goto bind_process_to_device_fail;
1123         }
1124
1125         pdd->qpd.sh_hidden_private_base = args->va_addr;
1126
1127         mutex_unlock(&p->mutex);
1128
1129         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1130             pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
1131                 dev->kfd2kgd->set_scratch_backing_va(
1132                         dev->kgd, args->va_addr, pdd->qpd.vmid);
1133
1134         return 0;
1135
1136 bind_process_to_device_fail:
1137         mutex_unlock(&p->mutex);
1138         return err;
1139 }
1140
1141 static int kfd_ioctl_get_tile_config(struct file *filep,
1142                 struct kfd_process *p, void *data)
1143 {
1144         struct kfd_ioctl_get_tile_config_args *args = data;
1145         struct kfd_dev *dev;
1146         struct tile_config config;
1147         int err = 0;
1148
1149         dev = kfd_device_by_id(args->gpu_id);
1150         if (!dev)
1151                 return -EINVAL;
1152
1153         amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
1154
1155         args->gb_addr_config = config.gb_addr_config;
1156         args->num_banks = config.num_banks;
1157         args->num_ranks = config.num_ranks;
1158
1159         if (args->num_tile_configs > config.num_tile_configs)
1160                 args->num_tile_configs = config.num_tile_configs;
1161         err = copy_to_user((void __user *)args->tile_config_ptr,
1162                         config.tile_config_ptr,
1163                         args->num_tile_configs * sizeof(uint32_t));
1164         if (err) {
1165                 args->num_tile_configs = 0;
1166                 return -EFAULT;
1167         }
1168
1169         if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1170                 args->num_macro_tile_configs =
1171                                 config.num_macro_tile_configs;
1172         err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1173                         config.macro_tile_config_ptr,
1174                         args->num_macro_tile_configs * sizeof(uint32_t));
1175         if (err) {
1176                 args->num_macro_tile_configs = 0;
1177                 return -EFAULT;
1178         }
1179
1180         return 0;
1181 }
1182
1183 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1184                                 void *data)
1185 {
1186         struct kfd_ioctl_acquire_vm_args *args = data;
1187         struct kfd_process_device *pdd;
1188         struct kfd_dev *dev;
1189         struct file *drm_file;
1190         int ret;
1191
1192         dev = kfd_device_by_id(args->gpu_id);
1193         if (!dev)
1194                 return -EINVAL;
1195
1196         drm_file = fget(args->drm_fd);
1197         if (!drm_file)
1198                 return -EINVAL;
1199
1200         mutex_lock(&p->mutex);
1201
1202         pdd = kfd_get_process_device_data(dev, p);
1203         if (!pdd) {
1204                 ret = -EINVAL;
1205                 goto err_unlock;
1206         }
1207
1208         if (pdd->drm_file) {
1209                 ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1210                 goto err_unlock;
1211         }
1212
1213         ret = kfd_process_device_init_vm(pdd, drm_file);
1214         if (ret)
1215                 goto err_unlock;
1216         /* On success, the PDD keeps the drm_file reference */
1217         mutex_unlock(&p->mutex);
1218
1219         return 0;
1220
1221 err_unlock:
1222         mutex_unlock(&p->mutex);
1223         fput(drm_file);
1224         return ret;
1225 }
1226
1227 bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1228 {
1229         struct kfd_local_mem_info mem_info;
1230
1231         if (debug_largebar) {
1232                 pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1233                 return true;
1234         }
1235
1236         if (dev->use_iommu_v2)
1237                 return false;
1238
1239         amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
1240         if (mem_info.local_mem_size_private == 0 &&
1241                         mem_info.local_mem_size_public > 0)
1242                 return true;
1243         return false;
1244 }
1245
1246 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1247                                         struct kfd_process *p, void *data)
1248 {
1249         struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1250         struct kfd_process_device *pdd;
1251         void *mem;
1252         struct kfd_dev *dev;
1253         int idr_handle;
1254         long err;
1255         uint64_t offset = args->mmap_offset;
1256         uint32_t flags = args->flags;
1257
1258         if (args->size == 0)
1259                 return -EINVAL;
1260
1261         dev = kfd_device_by_id(args->gpu_id);
1262         if (!dev)
1263                 return -EINVAL;
1264
1265         if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1266                 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1267                 !kfd_dev_is_large_bar(dev)) {
1268                 pr_err("Alloc host visible vram on small bar is not allowed\n");
1269                 return -EINVAL;
1270         }
1271
1272         mutex_lock(&p->mutex);
1273
1274         pdd = kfd_bind_process_to_device(dev, p);
1275         if (IS_ERR(pdd)) {
1276                 err = PTR_ERR(pdd);
1277                 goto err_unlock;
1278         }
1279
1280         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1281                 if (args->size != kfd_doorbell_process_slice(dev)) {
1282                         err = -EINVAL;
1283                         goto err_unlock;
1284                 }
1285                 offset = kfd_get_process_doorbells(pdd);
1286         } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1287                 if (args->size != PAGE_SIZE) {
1288                         err = -EINVAL;
1289                         goto err_unlock;
1290                 }
1291                 offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1292                 if (!offset) {
1293                         err = -ENOMEM;
1294                         goto err_unlock;
1295                 }
1296         }
1297
1298         err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1299                 dev->kgd, args->va_addr, args->size,
1300                 pdd->vm, (struct kgd_mem **) &mem, &offset,
1301                 flags);
1302
1303         if (err)
1304                 goto err_unlock;
1305
1306         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1307         if (idr_handle < 0) {
1308                 err = -EFAULT;
1309                 goto err_free;
1310         }
1311
1312         /* Update the VRAM usage count */
1313         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
1314                 WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
1315
1316         mutex_unlock(&p->mutex);
1317
1318         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1319         args->mmap_offset = offset;
1320
1321         /* MMIO is mapped through kfd device
1322          * Generate a kfd mmap offset
1323          */
1324         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1325                 args->mmap_offset = KFD_MMAP_TYPE_MMIO
1326                                         | KFD_MMAP_GPU_ID(args->gpu_id);
1327
1328         return 0;
1329
1330 err_free:
1331         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
1332 err_unlock:
1333         mutex_unlock(&p->mutex);
1334         return err;
1335 }
1336
1337 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1338                                         struct kfd_process *p, void *data)
1339 {
1340         struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1341         struct kfd_process_device *pdd;
1342         void *mem;
1343         struct kfd_dev *dev;
1344         int ret;
1345         uint64_t size = 0;
1346
1347         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1348         if (!dev)
1349                 return -EINVAL;
1350
1351         mutex_lock(&p->mutex);
1352
1353         pdd = kfd_get_process_device_data(dev, p);
1354         if (!pdd) {
1355                 pr_err("Process device data doesn't exist\n");
1356                 ret = -EINVAL;
1357                 goto err_unlock;
1358         }
1359
1360         mem = kfd_process_device_translate_handle(
1361                 pdd, GET_IDR_HANDLE(args->handle));
1362         if (!mem) {
1363                 ret = -EINVAL;
1364                 goto err_unlock;
1365         }
1366
1367         ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
1368                                                 (struct kgd_mem *)mem, &size);
1369
1370         /* If freeing the buffer failed, leave the handle in place for
1371          * clean-up during process tear-down.
1372          */
1373         if (!ret)
1374                 kfd_process_device_remove_obj_handle(
1375                         pdd, GET_IDR_HANDLE(args->handle));
1376
1377         WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
1378
1379 err_unlock:
1380         mutex_unlock(&p->mutex);
1381         return ret;
1382 }
1383
1384 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1385                                         struct kfd_process *p, void *data)
1386 {
1387         struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1388         struct kfd_process_device *pdd, *peer_pdd;
1389         void *mem;
1390         struct kfd_dev *dev, *peer;
1391         long err = 0;
1392         int i;
1393         uint32_t *devices_arr = NULL;
1394
1395         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1396         if (!dev)
1397                 return -EINVAL;
1398
1399         if (!args->n_devices) {
1400                 pr_debug("Device IDs array empty\n");
1401                 return -EINVAL;
1402         }
1403         if (args->n_success > args->n_devices) {
1404                 pr_debug("n_success exceeds n_devices\n");
1405                 return -EINVAL;
1406         }
1407
1408         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1409                                     GFP_KERNEL);
1410         if (!devices_arr)
1411                 return -ENOMEM;
1412
1413         err = copy_from_user(devices_arr,
1414                              (void __user *)args->device_ids_array_ptr,
1415                              args->n_devices * sizeof(*devices_arr));
1416         if (err != 0) {
1417                 err = -EFAULT;
1418                 goto copy_from_user_failed;
1419         }
1420
1421         mutex_lock(&p->mutex);
1422
1423         pdd = kfd_bind_process_to_device(dev, p);
1424         if (IS_ERR(pdd)) {
1425                 err = PTR_ERR(pdd);
1426                 goto bind_process_to_device_failed;
1427         }
1428
1429         mem = kfd_process_device_translate_handle(pdd,
1430                                                 GET_IDR_HANDLE(args->handle));
1431         if (!mem) {
1432                 err = -ENOMEM;
1433                 goto get_mem_obj_from_handle_failed;
1434         }
1435
1436         for (i = args->n_success; i < args->n_devices; i++) {
1437                 peer = kfd_device_by_id(devices_arr[i]);
1438                 if (!peer) {
1439                         pr_debug("Getting device by id failed for 0x%x\n",
1440                                  devices_arr[i]);
1441                         err = -EINVAL;
1442                         goto get_mem_obj_from_handle_failed;
1443                 }
1444
1445                 peer_pdd = kfd_bind_process_to_device(peer, p);
1446                 if (IS_ERR(peer_pdd)) {
1447                         err = PTR_ERR(peer_pdd);
1448                         goto get_mem_obj_from_handle_failed;
1449                 }
1450                 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1451                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1452                 if (err) {
1453                         pr_err("Failed to map to gpu %d/%d\n",
1454                                i, args->n_devices);
1455                         goto map_memory_to_gpu_failed;
1456                 }
1457                 args->n_success = i+1;
1458         }
1459
1460         mutex_unlock(&p->mutex);
1461
1462         err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1463         if (err) {
1464                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
1465                 goto sync_memory_failed;
1466         }
1467
1468         /* Flush TLBs after waiting for the page table updates to complete */
1469         for (i = 0; i < args->n_devices; i++) {
1470                 peer = kfd_device_by_id(devices_arr[i]);
1471                 if (WARN_ON_ONCE(!peer))
1472                         continue;
1473                 peer_pdd = kfd_get_process_device_data(peer, p);
1474                 if (WARN_ON_ONCE(!peer_pdd))
1475                         continue;
1476                 kfd_flush_tlb(peer_pdd);
1477         }
1478
1479         kfree(devices_arr);
1480
1481         return err;
1482
1483 bind_process_to_device_failed:
1484 get_mem_obj_from_handle_failed:
1485 map_memory_to_gpu_failed:
1486         mutex_unlock(&p->mutex);
1487 copy_from_user_failed:
1488 sync_memory_failed:
1489         kfree(devices_arr);
1490
1491         return err;
1492 }
1493
1494 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1495                                         struct kfd_process *p, void *data)
1496 {
1497         struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1498         struct kfd_process_device *pdd, *peer_pdd;
1499         void *mem;
1500         struct kfd_dev *dev, *peer;
1501         long err = 0;
1502         uint32_t *devices_arr = NULL, i;
1503
1504         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1505         if (!dev)
1506                 return -EINVAL;
1507
1508         if (!args->n_devices) {
1509                 pr_debug("Device IDs array empty\n");
1510                 return -EINVAL;
1511         }
1512         if (args->n_success > args->n_devices) {
1513                 pr_debug("n_success exceeds n_devices\n");
1514                 return -EINVAL;
1515         }
1516
1517         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1518                                     GFP_KERNEL);
1519         if (!devices_arr)
1520                 return -ENOMEM;
1521
1522         err = copy_from_user(devices_arr,
1523                              (void __user *)args->device_ids_array_ptr,
1524                              args->n_devices * sizeof(*devices_arr));
1525         if (err != 0) {
1526                 err = -EFAULT;
1527                 goto copy_from_user_failed;
1528         }
1529
1530         mutex_lock(&p->mutex);
1531
1532         pdd = kfd_get_process_device_data(dev, p);
1533         if (!pdd) {
1534                 err = -EINVAL;
1535                 goto bind_process_to_device_failed;
1536         }
1537
1538         mem = kfd_process_device_translate_handle(pdd,
1539                                                 GET_IDR_HANDLE(args->handle));
1540         if (!mem) {
1541                 err = -ENOMEM;
1542                 goto get_mem_obj_from_handle_failed;
1543         }
1544
1545         for (i = args->n_success; i < args->n_devices; i++) {
1546                 peer = kfd_device_by_id(devices_arr[i]);
1547                 if (!peer) {
1548                         err = -EINVAL;
1549                         goto get_mem_obj_from_handle_failed;
1550                 }
1551
1552                 peer_pdd = kfd_get_process_device_data(peer, p);
1553                 if (!peer_pdd) {
1554                         err = -ENODEV;
1555                         goto get_mem_obj_from_handle_failed;
1556                 }
1557                 err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1558                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1559                 if (err) {
1560                         pr_err("Failed to unmap from gpu %d/%d\n",
1561                                i, args->n_devices);
1562                         goto unmap_memory_from_gpu_failed;
1563                 }
1564                 args->n_success = i+1;
1565         }
1566         kfree(devices_arr);
1567
1568         mutex_unlock(&p->mutex);
1569
1570         return 0;
1571
1572 bind_process_to_device_failed:
1573 get_mem_obj_from_handle_failed:
1574 unmap_memory_from_gpu_failed:
1575         mutex_unlock(&p->mutex);
1576 copy_from_user_failed:
1577         kfree(devices_arr);
1578         return err;
1579 }
1580
1581 static int kfd_ioctl_alloc_queue_gws(struct file *filep,
1582                 struct kfd_process *p, void *data)
1583 {
1584         int retval;
1585         struct kfd_ioctl_alloc_queue_gws_args *args = data;
1586         struct queue *q;
1587         struct kfd_dev *dev;
1588
1589         mutex_lock(&p->mutex);
1590         q = pqm_get_user_queue(&p->pqm, args->queue_id);
1591
1592         if (q) {
1593                 dev = q->device;
1594         } else {
1595                 retval = -EINVAL;
1596                 goto out_unlock;
1597         }
1598
1599         if (!dev->gws) {
1600                 retval = -ENODEV;
1601                 goto out_unlock;
1602         }
1603
1604         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1605                 retval = -ENODEV;
1606                 goto out_unlock;
1607         }
1608
1609         retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
1610         mutex_unlock(&p->mutex);
1611
1612         args->first_gws = 0;
1613         return retval;
1614
1615 out_unlock:
1616         mutex_unlock(&p->mutex);
1617         return retval;
1618 }
1619
1620 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1621                 struct kfd_process *p, void *data)
1622 {
1623         struct kfd_ioctl_get_dmabuf_info_args *args = data;
1624         struct kfd_dev *dev = NULL;
1625         struct kgd_dev *dma_buf_kgd;
1626         void *metadata_buffer = NULL;
1627         uint32_t flags;
1628         unsigned int i;
1629         int r;
1630
1631         /* Find a KFD GPU device that supports the get_dmabuf_info query */
1632         for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1633                 if (dev)
1634                         break;
1635         if (!dev)
1636                 return -EINVAL;
1637
1638         if (args->metadata_ptr) {
1639                 metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1640                 if (!metadata_buffer)
1641                         return -ENOMEM;
1642         }
1643
1644         /* Get dmabuf info from KGD */
1645         r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
1646                                           &dma_buf_kgd, &args->size,
1647                                           metadata_buffer, args->metadata_size,
1648                                           &args->metadata_size, &flags);
1649         if (r)
1650                 goto exit;
1651
1652         /* Reverse-lookup gpu_id from kgd pointer */
1653         dev = kfd_device_by_kgd(dma_buf_kgd);
1654         if (!dev) {
1655                 r = -EINVAL;
1656                 goto exit;
1657         }
1658         args->gpu_id = dev->id;
1659         args->flags = flags;
1660
1661         /* Copy metadata buffer to user mode */
1662         if (metadata_buffer) {
1663                 r = copy_to_user((void __user *)args->metadata_ptr,
1664                                  metadata_buffer, args->metadata_size);
1665                 if (r != 0)
1666                         r = -EFAULT;
1667         }
1668
1669 exit:
1670         kfree(metadata_buffer);
1671
1672         return r;
1673 }
1674
1675 static int kfd_ioctl_import_dmabuf(struct file *filep,
1676                                    struct kfd_process *p, void *data)
1677 {
1678         struct kfd_ioctl_import_dmabuf_args *args = data;
1679         struct kfd_process_device *pdd;
1680         struct dma_buf *dmabuf;
1681         struct kfd_dev *dev;
1682         int idr_handle;
1683         uint64_t size;
1684         void *mem;
1685         int r;
1686
1687         dev = kfd_device_by_id(args->gpu_id);
1688         if (!dev)
1689                 return -EINVAL;
1690
1691         dmabuf = dma_buf_get(args->dmabuf_fd);
1692         if (IS_ERR(dmabuf))
1693                 return PTR_ERR(dmabuf);
1694
1695         mutex_lock(&p->mutex);
1696
1697         pdd = kfd_bind_process_to_device(dev, p);
1698         if (IS_ERR(pdd)) {
1699                 r = PTR_ERR(pdd);
1700                 goto err_unlock;
1701         }
1702
1703         r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
1704                                               args->va_addr, pdd->vm,
1705                                               (struct kgd_mem **)&mem, &size,
1706                                               NULL);
1707         if (r)
1708                 goto err_unlock;
1709
1710         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1711         if (idr_handle < 0) {
1712                 r = -EFAULT;
1713                 goto err_free;
1714         }
1715
1716         mutex_unlock(&p->mutex);
1717         dma_buf_put(dmabuf);
1718
1719         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1720
1721         return 0;
1722
1723 err_free:
1724         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
1725 err_unlock:
1726         mutex_unlock(&p->mutex);
1727         dma_buf_put(dmabuf);
1728         return r;
1729 }
1730
1731 /* Handle requests for watching SMI events */
1732 static int kfd_ioctl_smi_events(struct file *filep,
1733                                 struct kfd_process *p, void *data)
1734 {
1735         struct kfd_ioctl_smi_events_args *args = data;
1736         struct kfd_dev *dev;
1737
1738         dev = kfd_device_by_id(args->gpuid);
1739         if (!dev)
1740                 return -EINVAL;
1741
1742         return kfd_smi_event_open(dev, &args->anon_fd);
1743 }
1744
1745 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1746         [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1747                             .cmd_drv = 0, .name = #ioctl}
1748
1749 /** Ioctl table */
1750 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1751         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1752                         kfd_ioctl_get_version, 0),
1753
1754         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1755                         kfd_ioctl_create_queue, 0),
1756
1757         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1758                         kfd_ioctl_destroy_queue, 0),
1759
1760         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1761                         kfd_ioctl_set_memory_policy, 0),
1762
1763         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1764                         kfd_ioctl_get_clock_counters, 0),
1765
1766         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1767                         kfd_ioctl_get_process_apertures, 0),
1768
1769         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1770                         kfd_ioctl_update_queue, 0),
1771
1772         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1773                         kfd_ioctl_create_event, 0),
1774
1775         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1776                         kfd_ioctl_destroy_event, 0),
1777
1778         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1779                         kfd_ioctl_set_event, 0),
1780
1781         AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1782                         kfd_ioctl_reset_event, 0),
1783
1784         AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1785                         kfd_ioctl_wait_events, 0),
1786
1787         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1788                         kfd_ioctl_dbg_register, 0),
1789
1790         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1791                         kfd_ioctl_dbg_unregister, 0),
1792
1793         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1794                         kfd_ioctl_dbg_address_watch, 0),
1795
1796         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1797                         kfd_ioctl_dbg_wave_control, 0),
1798
1799         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1800                         kfd_ioctl_set_scratch_backing_va, 0),
1801
1802         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1803                         kfd_ioctl_get_tile_config, 0),
1804
1805         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1806                         kfd_ioctl_set_trap_handler, 0),
1807
1808         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1809                         kfd_ioctl_get_process_apertures_new, 0),
1810
1811         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1812                         kfd_ioctl_acquire_vm, 0),
1813
1814         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1815                         kfd_ioctl_alloc_memory_of_gpu, 0),
1816
1817         AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1818                         kfd_ioctl_free_memory_of_gpu, 0),
1819
1820         AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1821                         kfd_ioctl_map_memory_to_gpu, 0),
1822
1823         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1824                         kfd_ioctl_unmap_memory_from_gpu, 0),
1825
1826         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1827                         kfd_ioctl_set_cu_mask, 0),
1828
1829         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
1830                         kfd_ioctl_get_queue_wave_state, 0),
1831
1832         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
1833                                 kfd_ioctl_get_dmabuf_info, 0),
1834
1835         AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
1836                                 kfd_ioctl_import_dmabuf, 0),
1837
1838         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
1839                         kfd_ioctl_alloc_queue_gws, 0),
1840
1841         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
1842                         kfd_ioctl_smi_events, 0),
1843 };
1844
1845 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
1846
1847 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1848 {
1849         struct kfd_process *process;
1850         amdkfd_ioctl_t *func;
1851         const struct amdkfd_ioctl_desc *ioctl = NULL;
1852         unsigned int nr = _IOC_NR(cmd);
1853         char stack_kdata[128];
1854         char *kdata = NULL;
1855         unsigned int usize, asize;
1856         int retcode = -EINVAL;
1857
1858         if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1859                 goto err_i1;
1860
1861         if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1862                 u32 amdkfd_size;
1863
1864                 ioctl = &amdkfd_ioctls[nr];
1865
1866                 amdkfd_size = _IOC_SIZE(ioctl->cmd);
1867                 usize = asize = _IOC_SIZE(cmd);
1868                 if (amdkfd_size > asize)
1869                         asize = amdkfd_size;
1870
1871                 cmd = ioctl->cmd;
1872         } else
1873                 goto err_i1;
1874
1875         dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
1876
1877         /* Get the process struct from the filep. Only the process
1878          * that opened /dev/kfd can use the file descriptor. Child
1879          * processes need to create their own KFD device context.
1880          */
1881         process = filep->private_data;
1882         if (process->lead_thread != current->group_leader) {
1883                 dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
1884                 retcode = -EBADF;
1885                 goto err_i1;
1886         }
1887
1888         /* Do not trust userspace, use our own definition */
1889         func = ioctl->func;
1890
1891         if (unlikely(!func)) {
1892                 dev_dbg(kfd_device, "no function\n");
1893                 retcode = -EINVAL;
1894                 goto err_i1;
1895         }
1896
1897         if (cmd & (IOC_IN | IOC_OUT)) {
1898                 if (asize <= sizeof(stack_kdata)) {
1899                         kdata = stack_kdata;
1900                 } else {
1901                         kdata = kmalloc(asize, GFP_KERNEL);
1902                         if (!kdata) {
1903                                 retcode = -ENOMEM;
1904                                 goto err_i1;
1905                         }
1906                 }
1907                 if (asize > usize)
1908                         memset(kdata + usize, 0, asize - usize);
1909         }
1910
1911         if (cmd & IOC_IN) {
1912                 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1913                         retcode = -EFAULT;
1914                         goto err_i1;
1915                 }
1916         } else if (cmd & IOC_OUT) {
1917                 memset(kdata, 0, usize);
1918         }
1919
1920         retcode = func(filep, process, kdata);
1921
1922         if (cmd & IOC_OUT)
1923                 if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1924                         retcode = -EFAULT;
1925
1926 err_i1:
1927         if (!ioctl)
1928                 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1929                           task_pid_nr(current), cmd, nr);
1930
1931         if (kdata != stack_kdata)
1932                 kfree(kdata);
1933
1934         if (retcode)
1935                 dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
1936                                 nr, arg, retcode);
1937
1938         return retcode;
1939 }
1940
1941 static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
1942                       struct vm_area_struct *vma)
1943 {
1944         phys_addr_t address;
1945         int ret;
1946
1947         if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1948                 return -EINVAL;
1949
1950         address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1951
1952         vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
1953                                 VM_DONTDUMP | VM_PFNMAP;
1954
1955         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1956
1957         pr_debug("pasid 0x%x mapping mmio page\n"
1958                  "     target user address == 0x%08llX\n"
1959                  "     physical address    == 0x%08llX\n"
1960                  "     vm_flags            == 0x%04lX\n"
1961                  "     size                == 0x%04lX\n",
1962                  process->pasid, (unsigned long long) vma->vm_start,
1963                  address, vma->vm_flags, PAGE_SIZE);
1964
1965         ret = io_remap_pfn_range(vma,
1966                                 vma->vm_start,
1967                                 address >> PAGE_SHIFT,
1968                                 PAGE_SIZE,
1969                                 vma->vm_page_prot);
1970         return ret;
1971 }
1972
1973
1974 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1975 {
1976         struct kfd_process *process;
1977         struct kfd_dev *dev = NULL;
1978         unsigned long mmap_offset;
1979         unsigned int gpu_id;
1980
1981         process = kfd_get_process(current);
1982         if (IS_ERR(process))
1983                 return PTR_ERR(process);
1984
1985         mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
1986         gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
1987         if (gpu_id)
1988                 dev = kfd_device_by_id(gpu_id);
1989
1990         switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
1991         case KFD_MMAP_TYPE_DOORBELL:
1992                 if (!dev)
1993                         return -ENODEV;
1994                 return kfd_doorbell_mmap(dev, process, vma);
1995
1996         case KFD_MMAP_TYPE_EVENTS:
1997                 return kfd_event_mmap(process, vma);
1998
1999         case KFD_MMAP_TYPE_RESERVED_MEM:
2000                 if (!dev)
2001                         return -ENODEV;
2002                 return kfd_reserved_mem_mmap(dev, process, vma);
2003         case KFD_MMAP_TYPE_MMIO:
2004                 if (!dev)
2005                         return -ENODEV;
2006                 return kfd_mmio_mmap(dev, process, vma);
2007         }
2008
2009         return -EFAULT;
2010 }