drm/amdkfd: Enable over-subscription with >1 GWS queue
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdkfd / kfd_chardev.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/device.h>
24 #include <linux/export.h>
25 #include <linux/err.h>
26 #include <linux/fs.h>
27 #include <linux/file.h>
28 #include <linux/sched.h>
29 #include <linux/slab.h>
30 #include <linux/uaccess.h>
31 #include <linux/compat.h>
32 #include <uapi/linux/kfd_ioctl.h>
33 #include <linux/time.h>
34 #include <linux/mm.h>
35 #include <linux/mman.h>
36 #include <linux/dma-buf.h>
37 #include <asm/processor.h>
38 #include "kfd_priv.h"
39 #include "kfd_device_queue_manager.h"
40 #include "kfd_dbgmgr.h"
41 #include "amdgpu_amdkfd.h"
42
43 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
44 static int kfd_open(struct inode *, struct file *);
45 static int kfd_release(struct inode *, struct file *);
46 static int kfd_mmap(struct file *, struct vm_area_struct *);
47
48 static const char kfd_dev_name[] = "kfd";
49
50 static const struct file_operations kfd_fops = {
51         .owner = THIS_MODULE,
52         .unlocked_ioctl = kfd_ioctl,
53         .compat_ioctl = compat_ptr_ioctl,
54         .open = kfd_open,
55         .release = kfd_release,
56         .mmap = kfd_mmap,
57 };
58
59 static int kfd_char_dev_major = -1;
60 static struct class *kfd_class;
61 struct device *kfd_device;
62
63 int kfd_chardev_init(void)
64 {
65         int err = 0;
66
67         kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
68         err = kfd_char_dev_major;
69         if (err < 0)
70                 goto err_register_chrdev;
71
72         kfd_class = class_create(THIS_MODULE, kfd_dev_name);
73         err = PTR_ERR(kfd_class);
74         if (IS_ERR(kfd_class))
75                 goto err_class_create;
76
77         kfd_device = device_create(kfd_class, NULL,
78                                         MKDEV(kfd_char_dev_major, 0),
79                                         NULL, kfd_dev_name);
80         err = PTR_ERR(kfd_device);
81         if (IS_ERR(kfd_device))
82                 goto err_device_create;
83
84         return 0;
85
86 err_device_create:
87         class_destroy(kfd_class);
88 err_class_create:
89         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
90 err_register_chrdev:
91         return err;
92 }
93
94 void kfd_chardev_exit(void)
95 {
96         device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
97         class_destroy(kfd_class);
98         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
99 }
100
101 struct device *kfd_chardev(void)
102 {
103         return kfd_device;
104 }
105
106
107 static int kfd_open(struct inode *inode, struct file *filep)
108 {
109         struct kfd_process *process;
110         bool is_32bit_user_mode;
111
112         if (iminor(inode) != 0)
113                 return -ENODEV;
114
115         is_32bit_user_mode = in_compat_syscall();
116
117         if (is_32bit_user_mode) {
118                 dev_warn(kfd_device,
119                         "Process %d (32-bit) failed to open /dev/kfd\n"
120                         "32-bit processes are not supported by amdkfd\n",
121                         current->pid);
122                 return -EPERM;
123         }
124
125         process = kfd_create_process(filep);
126         if (IS_ERR(process))
127                 return PTR_ERR(process);
128
129         if (kfd_is_locked()) {
130                 dev_dbg(kfd_device, "kfd is locked!\n"
131                                 "process %d unreferenced", process->pasid);
132                 kfd_unref_process(process);
133                 return -EAGAIN;
134         }
135
136         /* filep now owns the reference returned by kfd_create_process */
137         filep->private_data = process;
138
139         dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
140                 process->pasid, process->is_32bit_user_mode);
141
142         return 0;
143 }
144
145 static int kfd_release(struct inode *inode, struct file *filep)
146 {
147         struct kfd_process *process = filep->private_data;
148
149         if (process)
150                 kfd_unref_process(process);
151
152         return 0;
153 }
154
155 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
156                                         void *data)
157 {
158         struct kfd_ioctl_get_version_args *args = data;
159
160         args->major_version = KFD_IOCTL_MAJOR_VERSION;
161         args->minor_version = KFD_IOCTL_MINOR_VERSION;
162
163         return 0;
164 }
165
166 static int set_queue_properties_from_user(struct queue_properties *q_properties,
167                                 struct kfd_ioctl_create_queue_args *args)
168 {
169         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
170                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
171                 return -EINVAL;
172         }
173
174         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
175                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
176                 return -EINVAL;
177         }
178
179         if ((args->ring_base_address) &&
180                 (!access_ok((const void __user *) args->ring_base_address,
181                         sizeof(uint64_t)))) {
182                 pr_err("Can't access ring base address\n");
183                 return -EFAULT;
184         }
185
186         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
187                 pr_err("Ring size must be a power of 2 or 0\n");
188                 return -EINVAL;
189         }
190
191         if (!access_ok((const void __user *) args->read_pointer_address,
192                         sizeof(uint32_t))) {
193                 pr_err("Can't access read pointer\n");
194                 return -EFAULT;
195         }
196
197         if (!access_ok((const void __user *) args->write_pointer_address,
198                         sizeof(uint32_t))) {
199                 pr_err("Can't access write pointer\n");
200                 return -EFAULT;
201         }
202
203         if (args->eop_buffer_address &&
204                 !access_ok((const void __user *) args->eop_buffer_address,
205                         sizeof(uint32_t))) {
206                 pr_debug("Can't access eop buffer");
207                 return -EFAULT;
208         }
209
210         if (args->ctx_save_restore_address &&
211                 !access_ok((const void __user *) args->ctx_save_restore_address,
212                         sizeof(uint32_t))) {
213                 pr_debug("Can't access ctx save restore buffer");
214                 return -EFAULT;
215         }
216
217         q_properties->is_interop = false;
218         q_properties->is_gws = false;
219         q_properties->queue_percent = args->queue_percentage;
220         q_properties->priority = args->queue_priority;
221         q_properties->queue_address = args->ring_base_address;
222         q_properties->queue_size = args->ring_size;
223         q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
224         q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
225         q_properties->eop_ring_buffer_address = args->eop_buffer_address;
226         q_properties->eop_ring_buffer_size = args->eop_buffer_size;
227         q_properties->ctx_save_restore_area_address =
228                         args->ctx_save_restore_address;
229         q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
230         q_properties->ctl_stack_size = args->ctl_stack_size;
231         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
232                 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
233                 q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
234         else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
235                 q_properties->type = KFD_QUEUE_TYPE_SDMA;
236         else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
237                 q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
238         else
239                 return -ENOTSUPP;
240
241         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
242                 q_properties->format = KFD_QUEUE_FORMAT_AQL;
243         else
244                 q_properties->format = KFD_QUEUE_FORMAT_PM4;
245
246         pr_debug("Queue Percentage: %d, %d\n",
247                         q_properties->queue_percent, args->queue_percentage);
248
249         pr_debug("Queue Priority: %d, %d\n",
250                         q_properties->priority, args->queue_priority);
251
252         pr_debug("Queue Address: 0x%llX, 0x%llX\n",
253                         q_properties->queue_address, args->ring_base_address);
254
255         pr_debug("Queue Size: 0x%llX, %u\n",
256                         q_properties->queue_size, args->ring_size);
257
258         pr_debug("Queue r/w Pointers: %px, %px\n",
259                         q_properties->read_ptr,
260                         q_properties->write_ptr);
261
262         pr_debug("Queue Format: %d\n", q_properties->format);
263
264         pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
265
266         pr_debug("Queue CTX save area: 0x%llX\n",
267                         q_properties->ctx_save_restore_area_address);
268
269         return 0;
270 }
271
272 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
273                                         void *data)
274 {
275         struct kfd_ioctl_create_queue_args *args = data;
276         struct kfd_dev *dev;
277         int err = 0;
278         unsigned int queue_id;
279         struct kfd_process_device *pdd;
280         struct queue_properties q_properties;
281         uint32_t doorbell_offset_in_process = 0;
282
283         memset(&q_properties, 0, sizeof(struct queue_properties));
284
285         pr_debug("Creating queue ioctl\n");
286
287         err = set_queue_properties_from_user(&q_properties, args);
288         if (err)
289                 return err;
290
291         pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
292         dev = kfd_device_by_id(args->gpu_id);
293         if (!dev) {
294                 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
295                 return -EINVAL;
296         }
297
298         mutex_lock(&p->mutex);
299
300         pdd = kfd_bind_process_to_device(dev, p);
301         if (IS_ERR(pdd)) {
302                 err = -ESRCH;
303                 goto err_bind_process;
304         }
305
306         pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
307                         p->pasid,
308                         dev->id);
309
310         err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
311                         &doorbell_offset_in_process);
312         if (err != 0)
313                 goto err_create_queue;
314
315         args->queue_id = queue_id;
316
317
318         /* Return gpu_id as doorbell offset for mmap usage */
319         args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
320         args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
321         if (KFD_IS_SOC15(dev->device_info->asic_family))
322                 /* On SOC15 ASICs, include the doorbell offset within the
323                  * process doorbell frame, which is 2 pages.
324                  */
325                 args->doorbell_offset |= doorbell_offset_in_process;
326
327         mutex_unlock(&p->mutex);
328
329         pr_debug("Queue id %d was created successfully\n", args->queue_id);
330
331         pr_debug("Ring buffer address == 0x%016llX\n",
332                         args->ring_base_address);
333
334         pr_debug("Read ptr address    == 0x%016llX\n",
335                         args->read_pointer_address);
336
337         pr_debug("Write ptr address   == 0x%016llX\n",
338                         args->write_pointer_address);
339
340         return 0;
341
342 err_create_queue:
343 err_bind_process:
344         mutex_unlock(&p->mutex);
345         return err;
346 }
347
348 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
349                                         void *data)
350 {
351         int retval;
352         struct kfd_ioctl_destroy_queue_args *args = data;
353
354         pr_debug("Destroying queue id %d for pasid 0x%x\n",
355                                 args->queue_id,
356                                 p->pasid);
357
358         mutex_lock(&p->mutex);
359
360         retval = pqm_destroy_queue(&p->pqm, args->queue_id);
361
362         mutex_unlock(&p->mutex);
363         return retval;
364 }
365
366 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
367                                         void *data)
368 {
369         int retval;
370         struct kfd_ioctl_update_queue_args *args = data;
371         struct queue_properties properties;
372
373         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
374                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
375                 return -EINVAL;
376         }
377
378         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
379                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
380                 return -EINVAL;
381         }
382
383         if ((args->ring_base_address) &&
384                 (!access_ok((const void __user *) args->ring_base_address,
385                         sizeof(uint64_t)))) {
386                 pr_err("Can't access ring base address\n");
387                 return -EFAULT;
388         }
389
390         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
391                 pr_err("Ring size must be a power of 2 or 0\n");
392                 return -EINVAL;
393         }
394
395         properties.queue_address = args->ring_base_address;
396         properties.queue_size = args->ring_size;
397         properties.queue_percent = args->queue_percentage;
398         properties.priority = args->queue_priority;
399
400         pr_debug("Updating queue id %d for pasid 0x%x\n",
401                         args->queue_id, p->pasid);
402
403         mutex_lock(&p->mutex);
404
405         retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
406
407         mutex_unlock(&p->mutex);
408
409         return retval;
410 }
411
412 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
413                                         void *data)
414 {
415         int retval;
416         const int max_num_cus = 1024;
417         struct kfd_ioctl_set_cu_mask_args *args = data;
418         struct queue_properties properties;
419         uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
420         size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
421
422         if ((args->num_cu_mask % 32) != 0) {
423                 pr_debug("num_cu_mask 0x%x must be a multiple of 32",
424                                 args->num_cu_mask);
425                 return -EINVAL;
426         }
427
428         properties.cu_mask_count = args->num_cu_mask;
429         if (properties.cu_mask_count == 0) {
430                 pr_debug("CU mask cannot be 0");
431                 return -EINVAL;
432         }
433
434         /* To prevent an unreasonably large CU mask size, set an arbitrary
435          * limit of max_num_cus bits.  We can then just drop any CU mask bits
436          * past max_num_cus bits and just use the first max_num_cus bits.
437          */
438         if (properties.cu_mask_count > max_num_cus) {
439                 pr_debug("CU mask cannot be greater than 1024 bits");
440                 properties.cu_mask_count = max_num_cus;
441                 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
442         }
443
444         properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
445         if (!properties.cu_mask)
446                 return -ENOMEM;
447
448         retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
449         if (retval) {
450                 pr_debug("Could not copy CU mask from userspace");
451                 kfree(properties.cu_mask);
452                 return -EFAULT;
453         }
454
455         mutex_lock(&p->mutex);
456
457         retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
458
459         mutex_unlock(&p->mutex);
460
461         if (retval)
462                 kfree(properties.cu_mask);
463
464         return retval;
465 }
466
467 static int kfd_ioctl_get_queue_wave_state(struct file *filep,
468                                           struct kfd_process *p, void *data)
469 {
470         struct kfd_ioctl_get_queue_wave_state_args *args = data;
471         int r;
472
473         mutex_lock(&p->mutex);
474
475         r = pqm_get_wave_state(&p->pqm, args->queue_id,
476                                (void __user *)args->ctl_stack_address,
477                                &args->ctl_stack_used_size,
478                                &args->save_area_used_size);
479
480         mutex_unlock(&p->mutex);
481
482         return r;
483 }
484
485 static int kfd_ioctl_set_memory_policy(struct file *filep,
486                                         struct kfd_process *p, void *data)
487 {
488         struct kfd_ioctl_set_memory_policy_args *args = data;
489         struct kfd_dev *dev;
490         int err = 0;
491         struct kfd_process_device *pdd;
492         enum cache_policy default_policy, alternate_policy;
493
494         if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
495             && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
496                 return -EINVAL;
497         }
498
499         if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
500             && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
501                 return -EINVAL;
502         }
503
504         dev = kfd_device_by_id(args->gpu_id);
505         if (!dev)
506                 return -EINVAL;
507
508         mutex_lock(&p->mutex);
509
510         pdd = kfd_bind_process_to_device(dev, p);
511         if (IS_ERR(pdd)) {
512                 err = -ESRCH;
513                 goto out;
514         }
515
516         default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
517                          ? cache_policy_coherent : cache_policy_noncoherent;
518
519         alternate_policy =
520                 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
521                    ? cache_policy_coherent : cache_policy_noncoherent;
522
523         if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
524                                 &pdd->qpd,
525                                 default_policy,
526                                 alternate_policy,
527                                 (void __user *)args->alternate_aperture_base,
528                                 args->alternate_aperture_size))
529                 err = -EINVAL;
530
531 out:
532         mutex_unlock(&p->mutex);
533
534         return err;
535 }
536
537 static int kfd_ioctl_set_trap_handler(struct file *filep,
538                                         struct kfd_process *p, void *data)
539 {
540         struct kfd_ioctl_set_trap_handler_args *args = data;
541         struct kfd_dev *dev;
542         int err = 0;
543         struct kfd_process_device *pdd;
544
545         dev = kfd_device_by_id(args->gpu_id);
546         if (!dev)
547                 return -EINVAL;
548
549         mutex_lock(&p->mutex);
550
551         pdd = kfd_bind_process_to_device(dev, p);
552         if (IS_ERR(pdd)) {
553                 err = -ESRCH;
554                 goto out;
555         }
556
557         if (dev->dqm->ops.set_trap_handler(dev->dqm,
558                                         &pdd->qpd,
559                                         args->tba_addr,
560                                         args->tma_addr))
561                 err = -EINVAL;
562
563 out:
564         mutex_unlock(&p->mutex);
565
566         return err;
567 }
568
569 static int kfd_ioctl_dbg_register(struct file *filep,
570                                 struct kfd_process *p, void *data)
571 {
572         struct kfd_ioctl_dbg_register_args *args = data;
573         struct kfd_dev *dev;
574         struct kfd_dbgmgr *dbgmgr_ptr;
575         struct kfd_process_device *pdd;
576         bool create_ok;
577         long status = 0;
578
579         dev = kfd_device_by_id(args->gpu_id);
580         if (!dev)
581                 return -EINVAL;
582
583         if (dev->device_info->asic_family == CHIP_CARRIZO) {
584                 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
585                 return -EINVAL;
586         }
587
588         mutex_lock(&p->mutex);
589         mutex_lock(kfd_get_dbgmgr_mutex());
590
591         /*
592          * make sure that we have pdd, if this the first queue created for
593          * this process
594          */
595         pdd = kfd_bind_process_to_device(dev, p);
596         if (IS_ERR(pdd)) {
597                 status = PTR_ERR(pdd);
598                 goto out;
599         }
600
601         if (!dev->dbgmgr) {
602                 /* In case of a legal call, we have no dbgmgr yet */
603                 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
604                 if (create_ok) {
605                         status = kfd_dbgmgr_register(dbgmgr_ptr, p);
606                         if (status != 0)
607                                 kfd_dbgmgr_destroy(dbgmgr_ptr);
608                         else
609                                 dev->dbgmgr = dbgmgr_ptr;
610                 }
611         } else {
612                 pr_debug("debugger already registered\n");
613                 status = -EINVAL;
614         }
615
616 out:
617         mutex_unlock(kfd_get_dbgmgr_mutex());
618         mutex_unlock(&p->mutex);
619
620         return status;
621 }
622
623 static int kfd_ioctl_dbg_unregister(struct file *filep,
624                                 struct kfd_process *p, void *data)
625 {
626         struct kfd_ioctl_dbg_unregister_args *args = data;
627         struct kfd_dev *dev;
628         long status;
629
630         dev = kfd_device_by_id(args->gpu_id);
631         if (!dev || !dev->dbgmgr)
632                 return -EINVAL;
633
634         if (dev->device_info->asic_family == CHIP_CARRIZO) {
635                 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
636                 return -EINVAL;
637         }
638
639         mutex_lock(kfd_get_dbgmgr_mutex());
640
641         status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
642         if (!status) {
643                 kfd_dbgmgr_destroy(dev->dbgmgr);
644                 dev->dbgmgr = NULL;
645         }
646
647         mutex_unlock(kfd_get_dbgmgr_mutex());
648
649         return status;
650 }
651
652 /*
653  * Parse and generate variable size data structure for address watch.
654  * Total size of the buffer and # watch points is limited in order
655  * to prevent kernel abuse. (no bearing to the much smaller HW limitation
656  * which is enforced by dbgdev module)
657  * please also note that the watch address itself are not "copied from user",
658  * since it be set into the HW in user mode values.
659  *
660  */
661 static int kfd_ioctl_dbg_address_watch(struct file *filep,
662                                         struct kfd_process *p, void *data)
663 {
664         struct kfd_ioctl_dbg_address_watch_args *args = data;
665         struct kfd_dev *dev;
666         struct dbg_address_watch_info aw_info;
667         unsigned char *args_buff;
668         long status;
669         void __user *cmd_from_user;
670         uint64_t watch_mask_value = 0;
671         unsigned int args_idx = 0;
672
673         memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
674
675         dev = kfd_device_by_id(args->gpu_id);
676         if (!dev)
677                 return -EINVAL;
678
679         if (dev->device_info->asic_family == CHIP_CARRIZO) {
680                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
681                 return -EINVAL;
682         }
683
684         cmd_from_user = (void __user *) args->content_ptr;
685
686         /* Validate arguments */
687
688         if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
689                 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
690                 (cmd_from_user == NULL))
691                 return -EINVAL;
692
693         /* this is the actual buffer to work with */
694         args_buff = memdup_user(cmd_from_user,
695                                 args->buf_size_in_bytes - sizeof(*args));
696         if (IS_ERR(args_buff))
697                 return PTR_ERR(args_buff);
698
699         aw_info.process = p;
700
701         aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
702         args_idx += sizeof(aw_info.num_watch_points);
703
704         aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
705         args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
706
707         /*
708          * set watch address base pointer to point on the array base
709          * within args_buff
710          */
711         aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
712
713         /* skip over the addresses buffer */
714         args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
715
716         if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
717                 status = -EINVAL;
718                 goto out;
719         }
720
721         watch_mask_value = (uint64_t) args_buff[args_idx];
722
723         if (watch_mask_value > 0) {
724                 /*
725                  * There is an array of masks.
726                  * set watch mask base pointer to point on the array base
727                  * within args_buff
728                  */
729                 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
730
731                 /* skip over the masks buffer */
732                 args_idx += sizeof(aw_info.watch_mask) *
733                                 aw_info.num_watch_points;
734         } else {
735                 /* just the NULL mask, set to NULL and skip over it */
736                 aw_info.watch_mask = NULL;
737                 args_idx += sizeof(aw_info.watch_mask);
738         }
739
740         if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
741                 status = -EINVAL;
742                 goto out;
743         }
744
745         /* Currently HSA Event is not supported for DBG */
746         aw_info.watch_event = NULL;
747
748         mutex_lock(kfd_get_dbgmgr_mutex());
749
750         status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
751
752         mutex_unlock(kfd_get_dbgmgr_mutex());
753
754 out:
755         kfree(args_buff);
756
757         return status;
758 }
759
760 /* Parse and generate fixed size data structure for wave control */
761 static int kfd_ioctl_dbg_wave_control(struct file *filep,
762                                         struct kfd_process *p, void *data)
763 {
764         struct kfd_ioctl_dbg_wave_control_args *args = data;
765         struct kfd_dev *dev;
766         struct dbg_wave_control_info wac_info;
767         unsigned char *args_buff;
768         uint32_t computed_buff_size;
769         long status;
770         void __user *cmd_from_user;
771         unsigned int args_idx = 0;
772
773         memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
774
775         /* we use compact form, independent of the packing attribute value */
776         computed_buff_size = sizeof(*args) +
777                                 sizeof(wac_info.mode) +
778                                 sizeof(wac_info.operand) +
779                                 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
780                                 sizeof(wac_info.dbgWave_msg.MemoryVA) +
781                                 sizeof(wac_info.trapId);
782
783         dev = kfd_device_by_id(args->gpu_id);
784         if (!dev)
785                 return -EINVAL;
786
787         if (dev->device_info->asic_family == CHIP_CARRIZO) {
788                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
789                 return -EINVAL;
790         }
791
792         /* input size must match the computed "compact" size */
793         if (args->buf_size_in_bytes != computed_buff_size) {
794                 pr_debug("size mismatch, computed : actual %u : %u\n",
795                                 args->buf_size_in_bytes, computed_buff_size);
796                 return -EINVAL;
797         }
798
799         cmd_from_user = (void __user *) args->content_ptr;
800
801         if (cmd_from_user == NULL)
802                 return -EINVAL;
803
804         /* copy the entire buffer from user */
805
806         args_buff = memdup_user(cmd_from_user,
807                                 args->buf_size_in_bytes - sizeof(*args));
808         if (IS_ERR(args_buff))
809                 return PTR_ERR(args_buff);
810
811         /* move ptr to the start of the "pay-load" area */
812         wac_info.process = p;
813
814         wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
815         args_idx += sizeof(wac_info.operand);
816
817         wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
818         args_idx += sizeof(wac_info.mode);
819
820         wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
821         args_idx += sizeof(wac_info.trapId);
822
823         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
824                                         *((uint32_t *)(&args_buff[args_idx]));
825         wac_info.dbgWave_msg.MemoryVA = NULL;
826
827         mutex_lock(kfd_get_dbgmgr_mutex());
828
829         pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
830                         wac_info.process, wac_info.operand,
831                         wac_info.mode, wac_info.trapId,
832                         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
833
834         status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
835
836         pr_debug("Returned status of dbg manager is %ld\n", status);
837
838         mutex_unlock(kfd_get_dbgmgr_mutex());
839
840         kfree(args_buff);
841
842         return status;
843 }
844
845 static int kfd_ioctl_get_clock_counters(struct file *filep,
846                                 struct kfd_process *p, void *data)
847 {
848         struct kfd_ioctl_get_clock_counters_args *args = data;
849         struct kfd_dev *dev;
850
851         dev = kfd_device_by_id(args->gpu_id);
852         if (dev)
853                 /* Reading GPU clock counter from KGD */
854                 args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
855         else
856                 /* Node without GPU resource */
857                 args->gpu_clock_counter = 0;
858
859         /* No access to rdtsc. Using raw monotonic time */
860         args->cpu_clock_counter = ktime_get_raw_ns();
861         args->system_clock_counter = ktime_get_boottime_ns();
862
863         /* Since the counter is in nano-seconds we use 1GHz frequency */
864         args->system_clock_freq = 1000000000;
865
866         return 0;
867 }
868
869
870 static int kfd_ioctl_get_process_apertures(struct file *filp,
871                                 struct kfd_process *p, void *data)
872 {
873         struct kfd_ioctl_get_process_apertures_args *args = data;
874         struct kfd_process_device_apertures *pAperture;
875         struct kfd_process_device *pdd;
876
877         dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
878
879         args->num_of_nodes = 0;
880
881         mutex_lock(&p->mutex);
882
883         /*if the process-device list isn't empty*/
884         if (kfd_has_process_device_data(p)) {
885                 /* Run over all pdd of the process */
886                 pdd = kfd_get_first_process_device_data(p);
887                 do {
888                         pAperture =
889                                 &args->process_apertures[args->num_of_nodes];
890                         pAperture->gpu_id = pdd->dev->id;
891                         pAperture->lds_base = pdd->lds_base;
892                         pAperture->lds_limit = pdd->lds_limit;
893                         pAperture->gpuvm_base = pdd->gpuvm_base;
894                         pAperture->gpuvm_limit = pdd->gpuvm_limit;
895                         pAperture->scratch_base = pdd->scratch_base;
896                         pAperture->scratch_limit = pdd->scratch_limit;
897
898                         dev_dbg(kfd_device,
899                                 "node id %u\n", args->num_of_nodes);
900                         dev_dbg(kfd_device,
901                                 "gpu id %u\n", pdd->dev->id);
902                         dev_dbg(kfd_device,
903                                 "lds_base %llX\n", pdd->lds_base);
904                         dev_dbg(kfd_device,
905                                 "lds_limit %llX\n", pdd->lds_limit);
906                         dev_dbg(kfd_device,
907                                 "gpuvm_base %llX\n", pdd->gpuvm_base);
908                         dev_dbg(kfd_device,
909                                 "gpuvm_limit %llX\n", pdd->gpuvm_limit);
910                         dev_dbg(kfd_device,
911                                 "scratch_base %llX\n", pdd->scratch_base);
912                         dev_dbg(kfd_device,
913                                 "scratch_limit %llX\n", pdd->scratch_limit);
914
915                         args->num_of_nodes++;
916
917                         pdd = kfd_get_next_process_device_data(p, pdd);
918                 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
919         }
920
921         mutex_unlock(&p->mutex);
922
923         return 0;
924 }
925
926 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
927                                 struct kfd_process *p, void *data)
928 {
929         struct kfd_ioctl_get_process_apertures_new_args *args = data;
930         struct kfd_process_device_apertures *pa;
931         struct kfd_process_device *pdd;
932         uint32_t nodes = 0;
933         int ret;
934
935         dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
936
937         if (args->num_of_nodes == 0) {
938                 /* Return number of nodes, so that user space can alloacate
939                  * sufficient memory
940                  */
941                 mutex_lock(&p->mutex);
942
943                 if (!kfd_has_process_device_data(p))
944                         goto out_unlock;
945
946                 /* Run over all pdd of the process */
947                 pdd = kfd_get_first_process_device_data(p);
948                 do {
949                         args->num_of_nodes++;
950                         pdd = kfd_get_next_process_device_data(p, pdd);
951                 } while (pdd);
952
953                 goto out_unlock;
954         }
955
956         /* Fill in process-aperture information for all available
957          * nodes, but not more than args->num_of_nodes as that is
958          * the amount of memory allocated by user
959          */
960         pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
961                                 args->num_of_nodes), GFP_KERNEL);
962         if (!pa)
963                 return -ENOMEM;
964
965         mutex_lock(&p->mutex);
966
967         if (!kfd_has_process_device_data(p)) {
968                 args->num_of_nodes = 0;
969                 kfree(pa);
970                 goto out_unlock;
971         }
972
973         /* Run over all pdd of the process */
974         pdd = kfd_get_first_process_device_data(p);
975         do {
976                 pa[nodes].gpu_id = pdd->dev->id;
977                 pa[nodes].lds_base = pdd->lds_base;
978                 pa[nodes].lds_limit = pdd->lds_limit;
979                 pa[nodes].gpuvm_base = pdd->gpuvm_base;
980                 pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
981                 pa[nodes].scratch_base = pdd->scratch_base;
982                 pa[nodes].scratch_limit = pdd->scratch_limit;
983
984                 dev_dbg(kfd_device,
985                         "gpu id %u\n", pdd->dev->id);
986                 dev_dbg(kfd_device,
987                         "lds_base %llX\n", pdd->lds_base);
988                 dev_dbg(kfd_device,
989                         "lds_limit %llX\n", pdd->lds_limit);
990                 dev_dbg(kfd_device,
991                         "gpuvm_base %llX\n", pdd->gpuvm_base);
992                 dev_dbg(kfd_device,
993                         "gpuvm_limit %llX\n", pdd->gpuvm_limit);
994                 dev_dbg(kfd_device,
995                         "scratch_base %llX\n", pdd->scratch_base);
996                 dev_dbg(kfd_device,
997                         "scratch_limit %llX\n", pdd->scratch_limit);
998                 nodes++;
999
1000                 pdd = kfd_get_next_process_device_data(p, pdd);
1001         } while (pdd && (nodes < args->num_of_nodes));
1002         mutex_unlock(&p->mutex);
1003
1004         args->num_of_nodes = nodes;
1005         ret = copy_to_user(
1006                         (void __user *)args->kfd_process_device_apertures_ptr,
1007                         pa,
1008                         (nodes * sizeof(struct kfd_process_device_apertures)));
1009         kfree(pa);
1010         return ret ? -EFAULT : 0;
1011
1012 out_unlock:
1013         mutex_unlock(&p->mutex);
1014         return 0;
1015 }
1016
1017 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
1018                                         void *data)
1019 {
1020         struct kfd_ioctl_create_event_args *args = data;
1021         int err;
1022
1023         /* For dGPUs the event page is allocated in user mode. The
1024          * handle is passed to KFD with the first call to this IOCTL
1025          * through the event_page_offset field.
1026          */
1027         if (args->event_page_offset) {
1028                 struct kfd_dev *kfd;
1029                 struct kfd_process_device *pdd;
1030                 void *mem, *kern_addr;
1031                 uint64_t size;
1032
1033                 if (p->signal_page) {
1034                         pr_err("Event page is already set\n");
1035                         return -EINVAL;
1036                 }
1037
1038                 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
1039                 if (!kfd) {
1040                         pr_err("Getting device by id failed in %s\n", __func__);
1041                         return -EINVAL;
1042                 }
1043
1044                 mutex_lock(&p->mutex);
1045                 pdd = kfd_bind_process_to_device(kfd, p);
1046                 if (IS_ERR(pdd)) {
1047                         err = PTR_ERR(pdd);
1048                         goto out_unlock;
1049                 }
1050
1051                 mem = kfd_process_device_translate_handle(pdd,
1052                                 GET_IDR_HANDLE(args->event_page_offset));
1053                 if (!mem) {
1054                         pr_err("Can't find BO, offset is 0x%llx\n",
1055                                args->event_page_offset);
1056                         err = -EINVAL;
1057                         goto out_unlock;
1058                 }
1059                 mutex_unlock(&p->mutex);
1060
1061                 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
1062                                                 mem, &kern_addr, &size);
1063                 if (err) {
1064                         pr_err("Failed to map event page to kernel\n");
1065                         return err;
1066                 }
1067
1068                 err = kfd_event_page_set(p, kern_addr, size);
1069                 if (err) {
1070                         pr_err("Failed to set event page\n");
1071                         return err;
1072                 }
1073         }
1074
1075         err = kfd_event_create(filp, p, args->event_type,
1076                                 args->auto_reset != 0, args->node_id,
1077                                 &args->event_id, &args->event_trigger_data,
1078                                 &args->event_page_offset,
1079                                 &args->event_slot_index);
1080
1081         return err;
1082
1083 out_unlock:
1084         mutex_unlock(&p->mutex);
1085         return err;
1086 }
1087
1088 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1089                                         void *data)
1090 {
1091         struct kfd_ioctl_destroy_event_args *args = data;
1092
1093         return kfd_event_destroy(p, args->event_id);
1094 }
1095
1096 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1097                                 void *data)
1098 {
1099         struct kfd_ioctl_set_event_args *args = data;
1100
1101         return kfd_set_event(p, args->event_id);
1102 }
1103
1104 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1105                                 void *data)
1106 {
1107         struct kfd_ioctl_reset_event_args *args = data;
1108
1109         return kfd_reset_event(p, args->event_id);
1110 }
1111
1112 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1113                                 void *data)
1114 {
1115         struct kfd_ioctl_wait_events_args *args = data;
1116         int err;
1117
1118         err = kfd_wait_on_events(p, args->num_events,
1119                         (void __user *)args->events_ptr,
1120                         (args->wait_for_all != 0),
1121                         args->timeout, &args->wait_result);
1122
1123         return err;
1124 }
1125 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1126                                         struct kfd_process *p, void *data)
1127 {
1128         struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1129         struct kfd_process_device *pdd;
1130         struct kfd_dev *dev;
1131         long err;
1132
1133         dev = kfd_device_by_id(args->gpu_id);
1134         if (!dev)
1135                 return -EINVAL;
1136
1137         mutex_lock(&p->mutex);
1138
1139         pdd = kfd_bind_process_to_device(dev, p);
1140         if (IS_ERR(pdd)) {
1141                 err = PTR_ERR(pdd);
1142                 goto bind_process_to_device_fail;
1143         }
1144
1145         pdd->qpd.sh_hidden_private_base = args->va_addr;
1146
1147         mutex_unlock(&p->mutex);
1148
1149         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1150             pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
1151                 dev->kfd2kgd->set_scratch_backing_va(
1152                         dev->kgd, args->va_addr, pdd->qpd.vmid);
1153
1154         return 0;
1155
1156 bind_process_to_device_fail:
1157         mutex_unlock(&p->mutex);
1158         return err;
1159 }
1160
1161 static int kfd_ioctl_get_tile_config(struct file *filep,
1162                 struct kfd_process *p, void *data)
1163 {
1164         struct kfd_ioctl_get_tile_config_args *args = data;
1165         struct kfd_dev *dev;
1166         struct tile_config config;
1167         int err = 0;
1168
1169         dev = kfd_device_by_id(args->gpu_id);
1170         if (!dev)
1171                 return -EINVAL;
1172
1173         amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
1174
1175         args->gb_addr_config = config.gb_addr_config;
1176         args->num_banks = config.num_banks;
1177         args->num_ranks = config.num_ranks;
1178
1179         if (args->num_tile_configs > config.num_tile_configs)
1180                 args->num_tile_configs = config.num_tile_configs;
1181         err = copy_to_user((void __user *)args->tile_config_ptr,
1182                         config.tile_config_ptr,
1183                         args->num_tile_configs * sizeof(uint32_t));
1184         if (err) {
1185                 args->num_tile_configs = 0;
1186                 return -EFAULT;
1187         }
1188
1189         if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1190                 args->num_macro_tile_configs =
1191                                 config.num_macro_tile_configs;
1192         err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1193                         config.macro_tile_config_ptr,
1194                         args->num_macro_tile_configs * sizeof(uint32_t));
1195         if (err) {
1196                 args->num_macro_tile_configs = 0;
1197                 return -EFAULT;
1198         }
1199
1200         return 0;
1201 }
1202
1203 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1204                                 void *data)
1205 {
1206         struct kfd_ioctl_acquire_vm_args *args = data;
1207         struct kfd_process_device *pdd;
1208         struct kfd_dev *dev;
1209         struct file *drm_file;
1210         int ret;
1211
1212         dev = kfd_device_by_id(args->gpu_id);
1213         if (!dev)
1214                 return -EINVAL;
1215
1216         drm_file = fget(args->drm_fd);
1217         if (!drm_file)
1218                 return -EINVAL;
1219
1220         mutex_lock(&p->mutex);
1221
1222         pdd = kfd_get_process_device_data(dev, p);
1223         if (!pdd) {
1224                 ret = -EINVAL;
1225                 goto err_unlock;
1226         }
1227
1228         if (pdd->drm_file) {
1229                 ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1230                 goto err_unlock;
1231         }
1232
1233         ret = kfd_process_device_init_vm(pdd, drm_file);
1234         if (ret)
1235                 goto err_unlock;
1236         /* On success, the PDD keeps the drm_file reference */
1237         mutex_unlock(&p->mutex);
1238
1239         return 0;
1240
1241 err_unlock:
1242         mutex_unlock(&p->mutex);
1243         fput(drm_file);
1244         return ret;
1245 }
1246
1247 bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1248 {
1249         struct kfd_local_mem_info mem_info;
1250
1251         if (debug_largebar) {
1252                 pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1253                 return true;
1254         }
1255
1256         if (dev->device_info->needs_iommu_device)
1257                 return false;
1258
1259         amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
1260         if (mem_info.local_mem_size_private == 0 &&
1261                         mem_info.local_mem_size_public > 0)
1262                 return true;
1263         return false;
1264 }
1265
1266 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1267                                         struct kfd_process *p, void *data)
1268 {
1269         struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1270         struct kfd_process_device *pdd;
1271         void *mem;
1272         struct kfd_dev *dev;
1273         int idr_handle;
1274         long err;
1275         uint64_t offset = args->mmap_offset;
1276         uint32_t flags = args->flags;
1277
1278         if (args->size == 0)
1279                 return -EINVAL;
1280
1281         dev = kfd_device_by_id(args->gpu_id);
1282         if (!dev)
1283                 return -EINVAL;
1284
1285         if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1286                 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1287                 !kfd_dev_is_large_bar(dev)) {
1288                 pr_err("Alloc host visible vram on small bar is not allowed\n");
1289                 return -EINVAL;
1290         }
1291
1292         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1293                 if (args->size != kfd_doorbell_process_slice(dev))
1294                         return -EINVAL;
1295                 offset = kfd_get_process_doorbells(dev, p);
1296         } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1297                 if (args->size != PAGE_SIZE)
1298                         return -EINVAL;
1299                 offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1300                 if (!offset)
1301                         return -ENOMEM;
1302         }
1303
1304         mutex_lock(&p->mutex);
1305
1306         pdd = kfd_bind_process_to_device(dev, p);
1307         if (IS_ERR(pdd)) {
1308                 err = PTR_ERR(pdd);
1309                 goto err_unlock;
1310         }
1311
1312         err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1313                 dev->kgd, args->va_addr, args->size,
1314                 pdd->vm, (struct kgd_mem **) &mem, &offset,
1315                 flags);
1316
1317         if (err)
1318                 goto err_unlock;
1319
1320         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1321         if (idr_handle < 0) {
1322                 err = -EFAULT;
1323                 goto err_free;
1324         }
1325
1326         mutex_unlock(&p->mutex);
1327
1328         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1329         args->mmap_offset = offset;
1330
1331         /* MMIO is mapped through kfd device
1332          * Generate a kfd mmap offset
1333          */
1334         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1335                 args->mmap_offset = KFD_MMAP_TYPE_MMIO
1336                                         | KFD_MMAP_GPU_ID(args->gpu_id);
1337
1338         return 0;
1339
1340 err_free:
1341         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1342 err_unlock:
1343         mutex_unlock(&p->mutex);
1344         return err;
1345 }
1346
1347 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1348                                         struct kfd_process *p, void *data)
1349 {
1350         struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1351         struct kfd_process_device *pdd;
1352         void *mem;
1353         struct kfd_dev *dev;
1354         int ret;
1355
1356         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1357         if (!dev)
1358                 return -EINVAL;
1359
1360         mutex_lock(&p->mutex);
1361
1362         pdd = kfd_get_process_device_data(dev, p);
1363         if (!pdd) {
1364                 pr_err("Process device data doesn't exist\n");
1365                 ret = -EINVAL;
1366                 goto err_unlock;
1367         }
1368
1369         mem = kfd_process_device_translate_handle(
1370                 pdd, GET_IDR_HANDLE(args->handle));
1371         if (!mem) {
1372                 ret = -EINVAL;
1373                 goto err_unlock;
1374         }
1375
1376         ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
1377                                                 (struct kgd_mem *)mem);
1378
1379         /* If freeing the buffer failed, leave the handle in place for
1380          * clean-up during process tear-down.
1381          */
1382         if (!ret)
1383                 kfd_process_device_remove_obj_handle(
1384                         pdd, GET_IDR_HANDLE(args->handle));
1385
1386 err_unlock:
1387         mutex_unlock(&p->mutex);
1388         return ret;
1389 }
1390
1391 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1392                                         struct kfd_process *p, void *data)
1393 {
1394         struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1395         struct kfd_process_device *pdd, *peer_pdd;
1396         void *mem;
1397         struct kfd_dev *dev, *peer;
1398         long err = 0;
1399         int i;
1400         uint32_t *devices_arr = NULL;
1401
1402         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1403         if (!dev)
1404                 return -EINVAL;
1405
1406         if (!args->n_devices) {
1407                 pr_debug("Device IDs array empty\n");
1408                 return -EINVAL;
1409         }
1410         if (args->n_success > args->n_devices) {
1411                 pr_debug("n_success exceeds n_devices\n");
1412                 return -EINVAL;
1413         }
1414
1415         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1416                                     GFP_KERNEL);
1417         if (!devices_arr)
1418                 return -ENOMEM;
1419
1420         err = copy_from_user(devices_arr,
1421                              (void __user *)args->device_ids_array_ptr,
1422                              args->n_devices * sizeof(*devices_arr));
1423         if (err != 0) {
1424                 err = -EFAULT;
1425                 goto copy_from_user_failed;
1426         }
1427
1428         mutex_lock(&p->mutex);
1429
1430         pdd = kfd_bind_process_to_device(dev, p);
1431         if (IS_ERR(pdd)) {
1432                 err = PTR_ERR(pdd);
1433                 goto bind_process_to_device_failed;
1434         }
1435
1436         mem = kfd_process_device_translate_handle(pdd,
1437                                                 GET_IDR_HANDLE(args->handle));
1438         if (!mem) {
1439                 err = -ENOMEM;
1440                 goto get_mem_obj_from_handle_failed;
1441         }
1442
1443         for (i = args->n_success; i < args->n_devices; i++) {
1444                 peer = kfd_device_by_id(devices_arr[i]);
1445                 if (!peer) {
1446                         pr_debug("Getting device by id failed for 0x%x\n",
1447                                  devices_arr[i]);
1448                         err = -EINVAL;
1449                         goto get_mem_obj_from_handle_failed;
1450                 }
1451
1452                 peer_pdd = kfd_bind_process_to_device(peer, p);
1453                 if (IS_ERR(peer_pdd)) {
1454                         err = PTR_ERR(peer_pdd);
1455                         goto get_mem_obj_from_handle_failed;
1456                 }
1457                 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1458                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1459                 if (err) {
1460                         pr_err("Failed to map to gpu %d/%d\n",
1461                                i, args->n_devices);
1462                         goto map_memory_to_gpu_failed;
1463                 }
1464                 args->n_success = i+1;
1465         }
1466
1467         mutex_unlock(&p->mutex);
1468
1469         err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1470         if (err) {
1471                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
1472                 goto sync_memory_failed;
1473         }
1474
1475         /* Flush TLBs after waiting for the page table updates to complete */
1476         for (i = 0; i < args->n_devices; i++) {
1477                 peer = kfd_device_by_id(devices_arr[i]);
1478                 if (WARN_ON_ONCE(!peer))
1479                         continue;
1480                 peer_pdd = kfd_get_process_device_data(peer, p);
1481                 if (WARN_ON_ONCE(!peer_pdd))
1482                         continue;
1483                 kfd_flush_tlb(peer_pdd);
1484         }
1485
1486         kfree(devices_arr);
1487
1488         return err;
1489
1490 bind_process_to_device_failed:
1491 get_mem_obj_from_handle_failed:
1492 map_memory_to_gpu_failed:
1493         mutex_unlock(&p->mutex);
1494 copy_from_user_failed:
1495 sync_memory_failed:
1496         kfree(devices_arr);
1497
1498         return err;
1499 }
1500
1501 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1502                                         struct kfd_process *p, void *data)
1503 {
1504         struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1505         struct kfd_process_device *pdd, *peer_pdd;
1506         void *mem;
1507         struct kfd_dev *dev, *peer;
1508         long err = 0;
1509         uint32_t *devices_arr = NULL, i;
1510
1511         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1512         if (!dev)
1513                 return -EINVAL;
1514
1515         if (!args->n_devices) {
1516                 pr_debug("Device IDs array empty\n");
1517                 return -EINVAL;
1518         }
1519         if (args->n_success > args->n_devices) {
1520                 pr_debug("n_success exceeds n_devices\n");
1521                 return -EINVAL;
1522         }
1523
1524         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1525                                     GFP_KERNEL);
1526         if (!devices_arr)
1527                 return -ENOMEM;
1528
1529         err = copy_from_user(devices_arr,
1530                              (void __user *)args->device_ids_array_ptr,
1531                              args->n_devices * sizeof(*devices_arr));
1532         if (err != 0) {
1533                 err = -EFAULT;
1534                 goto copy_from_user_failed;
1535         }
1536
1537         mutex_lock(&p->mutex);
1538
1539         pdd = kfd_get_process_device_data(dev, p);
1540         if (!pdd) {
1541                 err = -EINVAL;
1542                 goto bind_process_to_device_failed;
1543         }
1544
1545         mem = kfd_process_device_translate_handle(pdd,
1546                                                 GET_IDR_HANDLE(args->handle));
1547         if (!mem) {
1548                 err = -ENOMEM;
1549                 goto get_mem_obj_from_handle_failed;
1550         }
1551
1552         for (i = args->n_success; i < args->n_devices; i++) {
1553                 peer = kfd_device_by_id(devices_arr[i]);
1554                 if (!peer) {
1555                         err = -EINVAL;
1556                         goto get_mem_obj_from_handle_failed;
1557                 }
1558
1559                 peer_pdd = kfd_get_process_device_data(peer, p);
1560                 if (!peer_pdd) {
1561                         err = -ENODEV;
1562                         goto get_mem_obj_from_handle_failed;
1563                 }
1564                 err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1565                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1566                 if (err) {
1567                         pr_err("Failed to unmap from gpu %d/%d\n",
1568                                i, args->n_devices);
1569                         goto unmap_memory_from_gpu_failed;
1570                 }
1571                 args->n_success = i+1;
1572         }
1573         kfree(devices_arr);
1574
1575         mutex_unlock(&p->mutex);
1576
1577         return 0;
1578
1579 bind_process_to_device_failed:
1580 get_mem_obj_from_handle_failed:
1581 unmap_memory_from_gpu_failed:
1582         mutex_unlock(&p->mutex);
1583 copy_from_user_failed:
1584         kfree(devices_arr);
1585         return err;
1586 }
1587
1588 static int kfd_ioctl_alloc_queue_gws(struct file *filep,
1589                 struct kfd_process *p, void *data)
1590 {
1591         int retval;
1592         struct kfd_ioctl_alloc_queue_gws_args *args = data;
1593         struct queue *q;
1594         struct kfd_dev *dev;
1595
1596         mutex_lock(&p->mutex);
1597         q = pqm_get_user_queue(&p->pqm, args->queue_id);
1598
1599         if (q) {
1600                 dev = q->device;
1601         } else {
1602                 retval = -EINVAL;
1603                 goto out_unlock;
1604         }
1605
1606         if (!dev->gws) {
1607                 retval = -ENODEV;
1608                 goto out_unlock;
1609         }
1610
1611         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1612                 retval = -ENODEV;
1613                 goto out_unlock;
1614         }
1615
1616         retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
1617         mutex_unlock(&p->mutex);
1618
1619         args->first_gws = 0;
1620         return retval;
1621
1622 out_unlock:
1623         mutex_unlock(&p->mutex);
1624         return retval;
1625 }
1626
1627 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1628                 struct kfd_process *p, void *data)
1629 {
1630         struct kfd_ioctl_get_dmabuf_info_args *args = data;
1631         struct kfd_dev *dev = NULL;
1632         struct kgd_dev *dma_buf_kgd;
1633         void *metadata_buffer = NULL;
1634         uint32_t flags;
1635         unsigned int i;
1636         int r;
1637
1638         /* Find a KFD GPU device that supports the get_dmabuf_info query */
1639         for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1640                 if (dev)
1641                         break;
1642         if (!dev)
1643                 return -EINVAL;
1644
1645         if (args->metadata_ptr) {
1646                 metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1647                 if (!metadata_buffer)
1648                         return -ENOMEM;
1649         }
1650
1651         /* Get dmabuf info from KGD */
1652         r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
1653                                           &dma_buf_kgd, &args->size,
1654                                           metadata_buffer, args->metadata_size,
1655                                           &args->metadata_size, &flags);
1656         if (r)
1657                 goto exit;
1658
1659         /* Reverse-lookup gpu_id from kgd pointer */
1660         dev = kfd_device_by_kgd(dma_buf_kgd);
1661         if (!dev) {
1662                 r = -EINVAL;
1663                 goto exit;
1664         }
1665         args->gpu_id = dev->id;
1666         args->flags = flags;
1667
1668         /* Copy metadata buffer to user mode */
1669         if (metadata_buffer) {
1670                 r = copy_to_user((void __user *)args->metadata_ptr,
1671                                  metadata_buffer, args->metadata_size);
1672                 if (r != 0)
1673                         r = -EFAULT;
1674         }
1675
1676 exit:
1677         kfree(metadata_buffer);
1678
1679         return r;
1680 }
1681
1682 static int kfd_ioctl_import_dmabuf(struct file *filep,
1683                                    struct kfd_process *p, void *data)
1684 {
1685         struct kfd_ioctl_import_dmabuf_args *args = data;
1686         struct kfd_process_device *pdd;
1687         struct dma_buf *dmabuf;
1688         struct kfd_dev *dev;
1689         int idr_handle;
1690         uint64_t size;
1691         void *mem;
1692         int r;
1693
1694         dev = kfd_device_by_id(args->gpu_id);
1695         if (!dev)
1696                 return -EINVAL;
1697
1698         dmabuf = dma_buf_get(args->dmabuf_fd);
1699         if (IS_ERR(dmabuf))
1700                 return PTR_ERR(dmabuf);
1701
1702         mutex_lock(&p->mutex);
1703
1704         pdd = kfd_bind_process_to_device(dev, p);
1705         if (IS_ERR(pdd)) {
1706                 r = PTR_ERR(pdd);
1707                 goto err_unlock;
1708         }
1709
1710         r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
1711                                               args->va_addr, pdd->vm,
1712                                               (struct kgd_mem **)&mem, &size,
1713                                               NULL);
1714         if (r)
1715                 goto err_unlock;
1716
1717         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1718         if (idr_handle < 0) {
1719                 r = -EFAULT;
1720                 goto err_free;
1721         }
1722
1723         mutex_unlock(&p->mutex);
1724
1725         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1726
1727         return 0;
1728
1729 err_free:
1730         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1731 err_unlock:
1732         mutex_unlock(&p->mutex);
1733         return r;
1734 }
1735
1736 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1737         [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1738                             .cmd_drv = 0, .name = #ioctl}
1739
1740 /** Ioctl table */
1741 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1742         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1743                         kfd_ioctl_get_version, 0),
1744
1745         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1746                         kfd_ioctl_create_queue, 0),
1747
1748         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1749                         kfd_ioctl_destroy_queue, 0),
1750
1751         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1752                         kfd_ioctl_set_memory_policy, 0),
1753
1754         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1755                         kfd_ioctl_get_clock_counters, 0),
1756
1757         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1758                         kfd_ioctl_get_process_apertures, 0),
1759
1760         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1761                         kfd_ioctl_update_queue, 0),
1762
1763         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1764                         kfd_ioctl_create_event, 0),
1765
1766         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1767                         kfd_ioctl_destroy_event, 0),
1768
1769         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1770                         kfd_ioctl_set_event, 0),
1771
1772         AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1773                         kfd_ioctl_reset_event, 0),
1774
1775         AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1776                         kfd_ioctl_wait_events, 0),
1777
1778         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1779                         kfd_ioctl_dbg_register, 0),
1780
1781         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1782                         kfd_ioctl_dbg_unregister, 0),
1783
1784         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1785                         kfd_ioctl_dbg_address_watch, 0),
1786
1787         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1788                         kfd_ioctl_dbg_wave_control, 0),
1789
1790         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1791                         kfd_ioctl_set_scratch_backing_va, 0),
1792
1793         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1794                         kfd_ioctl_get_tile_config, 0),
1795
1796         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1797                         kfd_ioctl_set_trap_handler, 0),
1798
1799         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1800                         kfd_ioctl_get_process_apertures_new, 0),
1801
1802         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1803                         kfd_ioctl_acquire_vm, 0),
1804
1805         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1806                         kfd_ioctl_alloc_memory_of_gpu, 0),
1807
1808         AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1809                         kfd_ioctl_free_memory_of_gpu, 0),
1810
1811         AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1812                         kfd_ioctl_map_memory_to_gpu, 0),
1813
1814         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1815                         kfd_ioctl_unmap_memory_from_gpu, 0),
1816
1817         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1818                         kfd_ioctl_set_cu_mask, 0),
1819
1820         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
1821                         kfd_ioctl_get_queue_wave_state, 0),
1822
1823         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
1824                                 kfd_ioctl_get_dmabuf_info, 0),
1825
1826         AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
1827                                 kfd_ioctl_import_dmabuf, 0),
1828
1829         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
1830                         kfd_ioctl_alloc_queue_gws, 0),
1831 };
1832
1833 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
1834
1835 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1836 {
1837         struct kfd_process *process;
1838         amdkfd_ioctl_t *func;
1839         const struct amdkfd_ioctl_desc *ioctl = NULL;
1840         unsigned int nr = _IOC_NR(cmd);
1841         char stack_kdata[128];
1842         char *kdata = NULL;
1843         unsigned int usize, asize;
1844         int retcode = -EINVAL;
1845
1846         if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1847                 goto err_i1;
1848
1849         if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1850                 u32 amdkfd_size;
1851
1852                 ioctl = &amdkfd_ioctls[nr];
1853
1854                 amdkfd_size = _IOC_SIZE(ioctl->cmd);
1855                 usize = asize = _IOC_SIZE(cmd);
1856                 if (amdkfd_size > asize)
1857                         asize = amdkfd_size;
1858
1859                 cmd = ioctl->cmd;
1860         } else
1861                 goto err_i1;
1862
1863         dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
1864
1865         /* Get the process struct from the filep. Only the process
1866          * that opened /dev/kfd can use the file descriptor. Child
1867          * processes need to create their own KFD device context.
1868          */
1869         process = filep->private_data;
1870         if (process->lead_thread != current->group_leader) {
1871                 dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
1872                 retcode = -EBADF;
1873                 goto err_i1;
1874         }
1875
1876         /* Do not trust userspace, use our own definition */
1877         func = ioctl->func;
1878
1879         if (unlikely(!func)) {
1880                 dev_dbg(kfd_device, "no function\n");
1881                 retcode = -EINVAL;
1882                 goto err_i1;
1883         }
1884
1885         if (cmd & (IOC_IN | IOC_OUT)) {
1886                 if (asize <= sizeof(stack_kdata)) {
1887                         kdata = stack_kdata;
1888                 } else {
1889                         kdata = kmalloc(asize, GFP_KERNEL);
1890                         if (!kdata) {
1891                                 retcode = -ENOMEM;
1892                                 goto err_i1;
1893                         }
1894                 }
1895                 if (asize > usize)
1896                         memset(kdata + usize, 0, asize - usize);
1897         }
1898
1899         if (cmd & IOC_IN) {
1900                 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1901                         retcode = -EFAULT;
1902                         goto err_i1;
1903                 }
1904         } else if (cmd & IOC_OUT) {
1905                 memset(kdata, 0, usize);
1906         }
1907
1908         retcode = func(filep, process, kdata);
1909
1910         if (cmd & IOC_OUT)
1911                 if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1912                         retcode = -EFAULT;
1913
1914 err_i1:
1915         if (!ioctl)
1916                 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1917                           task_pid_nr(current), cmd, nr);
1918
1919         if (kdata != stack_kdata)
1920                 kfree(kdata);
1921
1922         if (retcode)
1923                 dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
1924                                 nr, arg, retcode);
1925
1926         return retcode;
1927 }
1928
1929 static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
1930                       struct vm_area_struct *vma)
1931 {
1932         phys_addr_t address;
1933         int ret;
1934
1935         if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1936                 return -EINVAL;
1937
1938         address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1939
1940         vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
1941                                 VM_DONTDUMP | VM_PFNMAP;
1942
1943         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1944
1945         pr_debug("pasid 0x%x mapping mmio page\n"
1946                  "     target user address == 0x%08llX\n"
1947                  "     physical address    == 0x%08llX\n"
1948                  "     vm_flags            == 0x%04lX\n"
1949                  "     size                == 0x%04lX\n",
1950                  process->pasid, (unsigned long long) vma->vm_start,
1951                  address, vma->vm_flags, PAGE_SIZE);
1952
1953         ret = io_remap_pfn_range(vma,
1954                                 vma->vm_start,
1955                                 address >> PAGE_SHIFT,
1956                                 PAGE_SIZE,
1957                                 vma->vm_page_prot);
1958         return ret;
1959 }
1960
1961
1962 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1963 {
1964         struct kfd_process *process;
1965         struct kfd_dev *dev = NULL;
1966         unsigned long mmap_offset;
1967         unsigned int gpu_id;
1968
1969         process = kfd_get_process(current);
1970         if (IS_ERR(process))
1971                 return PTR_ERR(process);
1972
1973         mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
1974         gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
1975         if (gpu_id)
1976                 dev = kfd_device_by_id(gpu_id);
1977
1978         switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
1979         case KFD_MMAP_TYPE_DOORBELL:
1980                 if (!dev)
1981                         return -ENODEV;
1982                 return kfd_doorbell_mmap(dev, process, vma);
1983
1984         case KFD_MMAP_TYPE_EVENTS:
1985                 return kfd_event_mmap(process, vma);
1986
1987         case KFD_MMAP_TYPE_RESERVED_MEM:
1988                 if (!dev)
1989                         return -ENODEV;
1990                 return kfd_reserved_mem_mmap(dev, process, vma);
1991         case KFD_MMAP_TYPE_MMIO:
1992                 if (!dev)
1993                         return -ENODEV;
1994                 return kfd_mmio_mmap(dev, process, vma);
1995         }
1996
1997         return -EFAULT;
1998 }