Merge tag 'rpmsg-v4.18' of git://github.com/andersson/remoteproc
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdkfd / kfd_chardev.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/device.h>
24 #include <linux/export.h>
25 #include <linux/err.h>
26 #include <linux/fs.h>
27 #include <linux/file.h>
28 #include <linux/sched.h>
29 #include <linux/slab.h>
30 #include <linux/uaccess.h>
31 #include <linux/compat.h>
32 #include <uapi/linux/kfd_ioctl.h>
33 #include <linux/time.h>
34 #include <linux/mm.h>
35 #include <linux/mman.h>
36 #include <asm/processor.h>
37 #include "kfd_priv.h"
38 #include "kfd_device_queue_manager.h"
39 #include "kfd_dbgmgr.h"
40
41 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
42 static int kfd_open(struct inode *, struct file *);
43 static int kfd_mmap(struct file *, struct vm_area_struct *);
44
45 static const char kfd_dev_name[] = "kfd";
46
47 static const struct file_operations kfd_fops = {
48         .owner = THIS_MODULE,
49         .unlocked_ioctl = kfd_ioctl,
50         .compat_ioctl = kfd_ioctl,
51         .open = kfd_open,
52         .mmap = kfd_mmap,
53 };
54
55 static int kfd_char_dev_major = -1;
56 static struct class *kfd_class;
57 struct device *kfd_device;
58
59 int kfd_chardev_init(void)
60 {
61         int err = 0;
62
63         kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
64         err = kfd_char_dev_major;
65         if (err < 0)
66                 goto err_register_chrdev;
67
68         kfd_class = class_create(THIS_MODULE, kfd_dev_name);
69         err = PTR_ERR(kfd_class);
70         if (IS_ERR(kfd_class))
71                 goto err_class_create;
72
73         kfd_device = device_create(kfd_class, NULL,
74                                         MKDEV(kfd_char_dev_major, 0),
75                                         NULL, kfd_dev_name);
76         err = PTR_ERR(kfd_device);
77         if (IS_ERR(kfd_device))
78                 goto err_device_create;
79
80         return 0;
81
82 err_device_create:
83         class_destroy(kfd_class);
84 err_class_create:
85         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
86 err_register_chrdev:
87         return err;
88 }
89
90 void kfd_chardev_exit(void)
91 {
92         device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
93         class_destroy(kfd_class);
94         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
95 }
96
97 struct device *kfd_chardev(void)
98 {
99         return kfd_device;
100 }
101
102
103 static int kfd_open(struct inode *inode, struct file *filep)
104 {
105         struct kfd_process *process;
106         bool is_32bit_user_mode;
107
108         if (iminor(inode) != 0)
109                 return -ENODEV;
110
111         is_32bit_user_mode = in_compat_syscall();
112
113         if (is_32bit_user_mode) {
114                 dev_warn(kfd_device,
115                         "Process %d (32-bit) failed to open /dev/kfd\n"
116                         "32-bit processes are not supported by amdkfd\n",
117                         current->pid);
118                 return -EPERM;
119         }
120
121         process = kfd_create_process(filep);
122         if (IS_ERR(process))
123                 return PTR_ERR(process);
124
125         dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
126                 process->pasid, process->is_32bit_user_mode);
127
128         return 0;
129 }
130
131 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
132                                         void *data)
133 {
134         struct kfd_ioctl_get_version_args *args = data;
135
136         args->major_version = KFD_IOCTL_MAJOR_VERSION;
137         args->minor_version = KFD_IOCTL_MINOR_VERSION;
138
139         return 0;
140 }
141
142 static int set_queue_properties_from_user(struct queue_properties *q_properties,
143                                 struct kfd_ioctl_create_queue_args *args)
144 {
145         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
146                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
147                 return -EINVAL;
148         }
149
150         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
151                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
152                 return -EINVAL;
153         }
154
155         if ((args->ring_base_address) &&
156                 (!access_ok(VERIFY_WRITE,
157                         (const void __user *) args->ring_base_address,
158                         sizeof(uint64_t)))) {
159                 pr_err("Can't access ring base address\n");
160                 return -EFAULT;
161         }
162
163         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
164                 pr_err("Ring size must be a power of 2 or 0\n");
165                 return -EINVAL;
166         }
167
168         if (!access_ok(VERIFY_WRITE,
169                         (const void __user *) args->read_pointer_address,
170                         sizeof(uint32_t))) {
171                 pr_err("Can't access read pointer\n");
172                 return -EFAULT;
173         }
174
175         if (!access_ok(VERIFY_WRITE,
176                         (const void __user *) args->write_pointer_address,
177                         sizeof(uint32_t))) {
178                 pr_err("Can't access write pointer\n");
179                 return -EFAULT;
180         }
181
182         if (args->eop_buffer_address &&
183                 !access_ok(VERIFY_WRITE,
184                         (const void __user *) args->eop_buffer_address,
185                         sizeof(uint32_t))) {
186                 pr_debug("Can't access eop buffer");
187                 return -EFAULT;
188         }
189
190         if (args->ctx_save_restore_address &&
191                 !access_ok(VERIFY_WRITE,
192                         (const void __user *) args->ctx_save_restore_address,
193                         sizeof(uint32_t))) {
194                 pr_debug("Can't access ctx save restore buffer");
195                 return -EFAULT;
196         }
197
198         q_properties->is_interop = false;
199         q_properties->queue_percent = args->queue_percentage;
200         q_properties->priority = args->queue_priority;
201         q_properties->queue_address = args->ring_base_address;
202         q_properties->queue_size = args->ring_size;
203         q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
204         q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
205         q_properties->eop_ring_buffer_address = args->eop_buffer_address;
206         q_properties->eop_ring_buffer_size = args->eop_buffer_size;
207         q_properties->ctx_save_restore_area_address =
208                         args->ctx_save_restore_address;
209         q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
210         q_properties->ctl_stack_size = args->ctl_stack_size;
211         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
212                 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
213                 q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
214         else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
215                 q_properties->type = KFD_QUEUE_TYPE_SDMA;
216         else
217                 return -ENOTSUPP;
218
219         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
220                 q_properties->format = KFD_QUEUE_FORMAT_AQL;
221         else
222                 q_properties->format = KFD_QUEUE_FORMAT_PM4;
223
224         pr_debug("Queue Percentage: %d, %d\n",
225                         q_properties->queue_percent, args->queue_percentage);
226
227         pr_debug("Queue Priority: %d, %d\n",
228                         q_properties->priority, args->queue_priority);
229
230         pr_debug("Queue Address: 0x%llX, 0x%llX\n",
231                         q_properties->queue_address, args->ring_base_address);
232
233         pr_debug("Queue Size: 0x%llX, %u\n",
234                         q_properties->queue_size, args->ring_size);
235
236         pr_debug("Queue r/w Pointers: %px, %px\n",
237                         q_properties->read_ptr,
238                         q_properties->write_ptr);
239
240         pr_debug("Queue Format: %d\n", q_properties->format);
241
242         pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
243
244         pr_debug("Queue CTX save area: 0x%llX\n",
245                         q_properties->ctx_save_restore_area_address);
246
247         return 0;
248 }
249
250 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
251                                         void *data)
252 {
253         struct kfd_ioctl_create_queue_args *args = data;
254         struct kfd_dev *dev;
255         int err = 0;
256         unsigned int queue_id;
257         struct kfd_process_device *pdd;
258         struct queue_properties q_properties;
259
260         memset(&q_properties, 0, sizeof(struct queue_properties));
261
262         pr_debug("Creating queue ioctl\n");
263
264         err = set_queue_properties_from_user(&q_properties, args);
265         if (err)
266                 return err;
267
268         pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
269         dev = kfd_device_by_id(args->gpu_id);
270         if (!dev) {
271                 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
272                 return -EINVAL;
273         }
274
275         mutex_lock(&p->mutex);
276
277         pdd = kfd_bind_process_to_device(dev, p);
278         if (IS_ERR(pdd)) {
279                 err = -ESRCH;
280                 goto err_bind_process;
281         }
282
283         pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
284                         p->pasid,
285                         dev->id);
286
287         err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
288         if (err != 0)
289                 goto err_create_queue;
290
291         args->queue_id = queue_id;
292
293
294         /* Return gpu_id as doorbell offset for mmap usage */
295         args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
296         args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
297         args->doorbell_offset <<= PAGE_SHIFT;
298         if (KFD_IS_SOC15(dev->device_info->asic_family))
299                 /* On SOC15 ASICs, doorbell allocation must be
300                  * per-device, and independent from the per-process
301                  * queue_id. Return the doorbell offset within the
302                  * doorbell aperture to user mode.
303                  */
304                 args->doorbell_offset |= q_properties.doorbell_off;
305
306         mutex_unlock(&p->mutex);
307
308         pr_debug("Queue id %d was created successfully\n", args->queue_id);
309
310         pr_debug("Ring buffer address == 0x%016llX\n",
311                         args->ring_base_address);
312
313         pr_debug("Read ptr address    == 0x%016llX\n",
314                         args->read_pointer_address);
315
316         pr_debug("Write ptr address   == 0x%016llX\n",
317                         args->write_pointer_address);
318
319         return 0;
320
321 err_create_queue:
322 err_bind_process:
323         mutex_unlock(&p->mutex);
324         return err;
325 }
326
327 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
328                                         void *data)
329 {
330         int retval;
331         struct kfd_ioctl_destroy_queue_args *args = data;
332
333         pr_debug("Destroying queue id %d for pasid %d\n",
334                                 args->queue_id,
335                                 p->pasid);
336
337         mutex_lock(&p->mutex);
338
339         retval = pqm_destroy_queue(&p->pqm, args->queue_id);
340
341         mutex_unlock(&p->mutex);
342         return retval;
343 }
344
345 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
346                                         void *data)
347 {
348         int retval;
349         struct kfd_ioctl_update_queue_args *args = data;
350         struct queue_properties properties;
351
352         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
353                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
354                 return -EINVAL;
355         }
356
357         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
358                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
359                 return -EINVAL;
360         }
361
362         if ((args->ring_base_address) &&
363                 (!access_ok(VERIFY_WRITE,
364                         (const void __user *) args->ring_base_address,
365                         sizeof(uint64_t)))) {
366                 pr_err("Can't access ring base address\n");
367                 return -EFAULT;
368         }
369
370         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
371                 pr_err("Ring size must be a power of 2 or 0\n");
372                 return -EINVAL;
373         }
374
375         properties.queue_address = args->ring_base_address;
376         properties.queue_size = args->ring_size;
377         properties.queue_percent = args->queue_percentage;
378         properties.priority = args->queue_priority;
379
380         pr_debug("Updating queue id %d for pasid %d\n",
381                         args->queue_id, p->pasid);
382
383         mutex_lock(&p->mutex);
384
385         retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
386
387         mutex_unlock(&p->mutex);
388
389         return retval;
390 }
391
392 static int kfd_ioctl_set_memory_policy(struct file *filep,
393                                         struct kfd_process *p, void *data)
394 {
395         struct kfd_ioctl_set_memory_policy_args *args = data;
396         struct kfd_dev *dev;
397         int err = 0;
398         struct kfd_process_device *pdd;
399         enum cache_policy default_policy, alternate_policy;
400
401         if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
402             && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
403                 return -EINVAL;
404         }
405
406         if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
407             && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
408                 return -EINVAL;
409         }
410
411         dev = kfd_device_by_id(args->gpu_id);
412         if (!dev)
413                 return -EINVAL;
414
415         mutex_lock(&p->mutex);
416
417         pdd = kfd_bind_process_to_device(dev, p);
418         if (IS_ERR(pdd)) {
419                 err = -ESRCH;
420                 goto out;
421         }
422
423         default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
424                          ? cache_policy_coherent : cache_policy_noncoherent;
425
426         alternate_policy =
427                 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
428                    ? cache_policy_coherent : cache_policy_noncoherent;
429
430         if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
431                                 &pdd->qpd,
432                                 default_policy,
433                                 alternate_policy,
434                                 (void __user *)args->alternate_aperture_base,
435                                 args->alternate_aperture_size))
436                 err = -EINVAL;
437
438 out:
439         mutex_unlock(&p->mutex);
440
441         return err;
442 }
443
444 static int kfd_ioctl_set_trap_handler(struct file *filep,
445                                         struct kfd_process *p, void *data)
446 {
447         struct kfd_ioctl_set_trap_handler_args *args = data;
448         struct kfd_dev *dev;
449         int err = 0;
450         struct kfd_process_device *pdd;
451
452         dev = kfd_device_by_id(args->gpu_id);
453         if (dev == NULL)
454                 return -EINVAL;
455
456         mutex_lock(&p->mutex);
457
458         pdd = kfd_bind_process_to_device(dev, p);
459         if (IS_ERR(pdd)) {
460                 err = -ESRCH;
461                 goto out;
462         }
463
464         if (dev->dqm->ops.set_trap_handler(dev->dqm,
465                                         &pdd->qpd,
466                                         args->tba_addr,
467                                         args->tma_addr))
468                 err = -EINVAL;
469
470 out:
471         mutex_unlock(&p->mutex);
472
473         return err;
474 }
475
476 static int kfd_ioctl_dbg_register(struct file *filep,
477                                 struct kfd_process *p, void *data)
478 {
479         struct kfd_ioctl_dbg_register_args *args = data;
480         struct kfd_dev *dev;
481         struct kfd_dbgmgr *dbgmgr_ptr;
482         struct kfd_process_device *pdd;
483         bool create_ok;
484         long status = 0;
485
486         dev = kfd_device_by_id(args->gpu_id);
487         if (!dev)
488                 return -EINVAL;
489
490         if (dev->device_info->asic_family == CHIP_CARRIZO) {
491                 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
492                 return -EINVAL;
493         }
494
495         mutex_lock(&p->mutex);
496         mutex_lock(kfd_get_dbgmgr_mutex());
497
498         /*
499          * make sure that we have pdd, if this the first queue created for
500          * this process
501          */
502         pdd = kfd_bind_process_to_device(dev, p);
503         if (IS_ERR(pdd)) {
504                 status = PTR_ERR(pdd);
505                 goto out;
506         }
507
508         if (!dev->dbgmgr) {
509                 /* In case of a legal call, we have no dbgmgr yet */
510                 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
511                 if (create_ok) {
512                         status = kfd_dbgmgr_register(dbgmgr_ptr, p);
513                         if (status != 0)
514                                 kfd_dbgmgr_destroy(dbgmgr_ptr);
515                         else
516                                 dev->dbgmgr = dbgmgr_ptr;
517                 }
518         } else {
519                 pr_debug("debugger already registered\n");
520                 status = -EINVAL;
521         }
522
523 out:
524         mutex_unlock(kfd_get_dbgmgr_mutex());
525         mutex_unlock(&p->mutex);
526
527         return status;
528 }
529
530 static int kfd_ioctl_dbg_unregister(struct file *filep,
531                                 struct kfd_process *p, void *data)
532 {
533         struct kfd_ioctl_dbg_unregister_args *args = data;
534         struct kfd_dev *dev;
535         long status;
536
537         dev = kfd_device_by_id(args->gpu_id);
538         if (!dev || !dev->dbgmgr)
539                 return -EINVAL;
540
541         if (dev->device_info->asic_family == CHIP_CARRIZO) {
542                 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
543                 return -EINVAL;
544         }
545
546         mutex_lock(kfd_get_dbgmgr_mutex());
547
548         status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
549         if (!status) {
550                 kfd_dbgmgr_destroy(dev->dbgmgr);
551                 dev->dbgmgr = NULL;
552         }
553
554         mutex_unlock(kfd_get_dbgmgr_mutex());
555
556         return status;
557 }
558
559 /*
560  * Parse and generate variable size data structure for address watch.
561  * Total size of the buffer and # watch points is limited in order
562  * to prevent kernel abuse. (no bearing to the much smaller HW limitation
563  * which is enforced by dbgdev module)
564  * please also note that the watch address itself are not "copied from user",
565  * since it be set into the HW in user mode values.
566  *
567  */
568 static int kfd_ioctl_dbg_address_watch(struct file *filep,
569                                         struct kfd_process *p, void *data)
570 {
571         struct kfd_ioctl_dbg_address_watch_args *args = data;
572         struct kfd_dev *dev;
573         struct dbg_address_watch_info aw_info;
574         unsigned char *args_buff;
575         long status;
576         void __user *cmd_from_user;
577         uint64_t watch_mask_value = 0;
578         unsigned int args_idx = 0;
579
580         memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
581
582         dev = kfd_device_by_id(args->gpu_id);
583         if (!dev)
584                 return -EINVAL;
585
586         if (dev->device_info->asic_family == CHIP_CARRIZO) {
587                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
588                 return -EINVAL;
589         }
590
591         cmd_from_user = (void __user *) args->content_ptr;
592
593         /* Validate arguments */
594
595         if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
596                 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
597                 (cmd_from_user == NULL))
598                 return -EINVAL;
599
600         /* this is the actual buffer to work with */
601         args_buff = memdup_user(cmd_from_user,
602                                 args->buf_size_in_bytes - sizeof(*args));
603         if (IS_ERR(args_buff))
604                 return PTR_ERR(args_buff);
605
606         aw_info.process = p;
607
608         aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
609         args_idx += sizeof(aw_info.num_watch_points);
610
611         aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
612         args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
613
614         /*
615          * set watch address base pointer to point on the array base
616          * within args_buff
617          */
618         aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
619
620         /* skip over the addresses buffer */
621         args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
622
623         if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
624                 status = -EINVAL;
625                 goto out;
626         }
627
628         watch_mask_value = (uint64_t) args_buff[args_idx];
629
630         if (watch_mask_value > 0) {
631                 /*
632                  * There is an array of masks.
633                  * set watch mask base pointer to point on the array base
634                  * within args_buff
635                  */
636                 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
637
638                 /* skip over the masks buffer */
639                 args_idx += sizeof(aw_info.watch_mask) *
640                                 aw_info.num_watch_points;
641         } else {
642                 /* just the NULL mask, set to NULL and skip over it */
643                 aw_info.watch_mask = NULL;
644                 args_idx += sizeof(aw_info.watch_mask);
645         }
646
647         if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
648                 status = -EINVAL;
649                 goto out;
650         }
651
652         /* Currently HSA Event is not supported for DBG */
653         aw_info.watch_event = NULL;
654
655         mutex_lock(kfd_get_dbgmgr_mutex());
656
657         status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
658
659         mutex_unlock(kfd_get_dbgmgr_mutex());
660
661 out:
662         kfree(args_buff);
663
664         return status;
665 }
666
667 /* Parse and generate fixed size data structure for wave control */
668 static int kfd_ioctl_dbg_wave_control(struct file *filep,
669                                         struct kfd_process *p, void *data)
670 {
671         struct kfd_ioctl_dbg_wave_control_args *args = data;
672         struct kfd_dev *dev;
673         struct dbg_wave_control_info wac_info;
674         unsigned char *args_buff;
675         uint32_t computed_buff_size;
676         long status;
677         void __user *cmd_from_user;
678         unsigned int args_idx = 0;
679
680         memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
681
682         /* we use compact form, independent of the packing attribute value */
683         computed_buff_size = sizeof(*args) +
684                                 sizeof(wac_info.mode) +
685                                 sizeof(wac_info.operand) +
686                                 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
687                                 sizeof(wac_info.dbgWave_msg.MemoryVA) +
688                                 sizeof(wac_info.trapId);
689
690         dev = kfd_device_by_id(args->gpu_id);
691         if (!dev)
692                 return -EINVAL;
693
694         if (dev->device_info->asic_family == CHIP_CARRIZO) {
695                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
696                 return -EINVAL;
697         }
698
699         /* input size must match the computed "compact" size */
700         if (args->buf_size_in_bytes != computed_buff_size) {
701                 pr_debug("size mismatch, computed : actual %u : %u\n",
702                                 args->buf_size_in_bytes, computed_buff_size);
703                 return -EINVAL;
704         }
705
706         cmd_from_user = (void __user *) args->content_ptr;
707
708         if (cmd_from_user == NULL)
709                 return -EINVAL;
710
711         /* copy the entire buffer from user */
712
713         args_buff = memdup_user(cmd_from_user,
714                                 args->buf_size_in_bytes - sizeof(*args));
715         if (IS_ERR(args_buff))
716                 return PTR_ERR(args_buff);
717
718         /* move ptr to the start of the "pay-load" area */
719         wac_info.process = p;
720
721         wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
722         args_idx += sizeof(wac_info.operand);
723
724         wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
725         args_idx += sizeof(wac_info.mode);
726
727         wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
728         args_idx += sizeof(wac_info.trapId);
729
730         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
731                                         *((uint32_t *)(&args_buff[args_idx]));
732         wac_info.dbgWave_msg.MemoryVA = NULL;
733
734         mutex_lock(kfd_get_dbgmgr_mutex());
735
736         pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
737                         wac_info.process, wac_info.operand,
738                         wac_info.mode, wac_info.trapId,
739                         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
740
741         status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
742
743         pr_debug("Returned status of dbg manager is %ld\n", status);
744
745         mutex_unlock(kfd_get_dbgmgr_mutex());
746
747         kfree(args_buff);
748
749         return status;
750 }
751
752 static int kfd_ioctl_get_clock_counters(struct file *filep,
753                                 struct kfd_process *p, void *data)
754 {
755         struct kfd_ioctl_get_clock_counters_args *args = data;
756         struct kfd_dev *dev;
757         struct timespec64 time;
758
759         dev = kfd_device_by_id(args->gpu_id);
760         if (dev)
761                 /* Reading GPU clock counter from KGD */
762                 args->gpu_clock_counter =
763                         dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
764         else
765                 /* Node without GPU resource */
766                 args->gpu_clock_counter = 0;
767
768         /* No access to rdtsc. Using raw monotonic time */
769         getrawmonotonic64(&time);
770         args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time);
771
772         get_monotonic_boottime64(&time);
773         args->system_clock_counter = (uint64_t)timespec64_to_ns(&time);
774
775         /* Since the counter is in nano-seconds we use 1GHz frequency */
776         args->system_clock_freq = 1000000000;
777
778         return 0;
779 }
780
781
782 static int kfd_ioctl_get_process_apertures(struct file *filp,
783                                 struct kfd_process *p, void *data)
784 {
785         struct kfd_ioctl_get_process_apertures_args *args = data;
786         struct kfd_process_device_apertures *pAperture;
787         struct kfd_process_device *pdd;
788
789         dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
790
791         args->num_of_nodes = 0;
792
793         mutex_lock(&p->mutex);
794
795         /*if the process-device list isn't empty*/
796         if (kfd_has_process_device_data(p)) {
797                 /* Run over all pdd of the process */
798                 pdd = kfd_get_first_process_device_data(p);
799                 do {
800                         pAperture =
801                                 &args->process_apertures[args->num_of_nodes];
802                         pAperture->gpu_id = pdd->dev->id;
803                         pAperture->lds_base = pdd->lds_base;
804                         pAperture->lds_limit = pdd->lds_limit;
805                         pAperture->gpuvm_base = pdd->gpuvm_base;
806                         pAperture->gpuvm_limit = pdd->gpuvm_limit;
807                         pAperture->scratch_base = pdd->scratch_base;
808                         pAperture->scratch_limit = pdd->scratch_limit;
809
810                         dev_dbg(kfd_device,
811                                 "node id %u\n", args->num_of_nodes);
812                         dev_dbg(kfd_device,
813                                 "gpu id %u\n", pdd->dev->id);
814                         dev_dbg(kfd_device,
815                                 "lds_base %llX\n", pdd->lds_base);
816                         dev_dbg(kfd_device,
817                                 "lds_limit %llX\n", pdd->lds_limit);
818                         dev_dbg(kfd_device,
819                                 "gpuvm_base %llX\n", pdd->gpuvm_base);
820                         dev_dbg(kfd_device,
821                                 "gpuvm_limit %llX\n", pdd->gpuvm_limit);
822                         dev_dbg(kfd_device,
823                                 "scratch_base %llX\n", pdd->scratch_base);
824                         dev_dbg(kfd_device,
825                                 "scratch_limit %llX\n", pdd->scratch_limit);
826
827                         args->num_of_nodes++;
828
829                         pdd = kfd_get_next_process_device_data(p, pdd);
830                 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
831         }
832
833         mutex_unlock(&p->mutex);
834
835         return 0;
836 }
837
838 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
839                                 struct kfd_process *p, void *data)
840 {
841         struct kfd_ioctl_get_process_apertures_new_args *args = data;
842         struct kfd_process_device_apertures *pa;
843         struct kfd_process_device *pdd;
844         uint32_t nodes = 0;
845         int ret;
846
847         dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
848
849         if (args->num_of_nodes == 0) {
850                 /* Return number of nodes, so that user space can alloacate
851                  * sufficient memory
852                  */
853                 mutex_lock(&p->mutex);
854
855                 if (!kfd_has_process_device_data(p))
856                         goto out_unlock;
857
858                 /* Run over all pdd of the process */
859                 pdd = kfd_get_first_process_device_data(p);
860                 do {
861                         args->num_of_nodes++;
862                         pdd = kfd_get_next_process_device_data(p, pdd);
863                 } while (pdd);
864
865                 goto out_unlock;
866         }
867
868         /* Fill in process-aperture information for all available
869          * nodes, but not more than args->num_of_nodes as that is
870          * the amount of memory allocated by user
871          */
872         pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
873                                 args->num_of_nodes), GFP_KERNEL);
874         if (!pa)
875                 return -ENOMEM;
876
877         mutex_lock(&p->mutex);
878
879         if (!kfd_has_process_device_data(p)) {
880                 args->num_of_nodes = 0;
881                 kfree(pa);
882                 goto out_unlock;
883         }
884
885         /* Run over all pdd of the process */
886         pdd = kfd_get_first_process_device_data(p);
887         do {
888                 pa[nodes].gpu_id = pdd->dev->id;
889                 pa[nodes].lds_base = pdd->lds_base;
890                 pa[nodes].lds_limit = pdd->lds_limit;
891                 pa[nodes].gpuvm_base = pdd->gpuvm_base;
892                 pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
893                 pa[nodes].scratch_base = pdd->scratch_base;
894                 pa[nodes].scratch_limit = pdd->scratch_limit;
895
896                 dev_dbg(kfd_device,
897                         "gpu id %u\n", pdd->dev->id);
898                 dev_dbg(kfd_device,
899                         "lds_base %llX\n", pdd->lds_base);
900                 dev_dbg(kfd_device,
901                         "lds_limit %llX\n", pdd->lds_limit);
902                 dev_dbg(kfd_device,
903                         "gpuvm_base %llX\n", pdd->gpuvm_base);
904                 dev_dbg(kfd_device,
905                         "gpuvm_limit %llX\n", pdd->gpuvm_limit);
906                 dev_dbg(kfd_device,
907                         "scratch_base %llX\n", pdd->scratch_base);
908                 dev_dbg(kfd_device,
909                         "scratch_limit %llX\n", pdd->scratch_limit);
910                 nodes++;
911
912                 pdd = kfd_get_next_process_device_data(p, pdd);
913         } while (pdd && (nodes < args->num_of_nodes));
914         mutex_unlock(&p->mutex);
915
916         args->num_of_nodes = nodes;
917         ret = copy_to_user(
918                         (void __user *)args->kfd_process_device_apertures_ptr,
919                         pa,
920                         (nodes * sizeof(struct kfd_process_device_apertures)));
921         kfree(pa);
922         return ret ? -EFAULT : 0;
923
924 out_unlock:
925         mutex_unlock(&p->mutex);
926         return 0;
927 }
928
929 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
930                                         void *data)
931 {
932         struct kfd_ioctl_create_event_args *args = data;
933         int err;
934
935         /* For dGPUs the event page is allocated in user mode. The
936          * handle is passed to KFD with the first call to this IOCTL
937          * through the event_page_offset field.
938          */
939         if (args->event_page_offset) {
940                 struct kfd_dev *kfd;
941                 struct kfd_process_device *pdd;
942                 void *mem, *kern_addr;
943                 uint64_t size;
944
945                 if (p->signal_page) {
946                         pr_err("Event page is already set\n");
947                         return -EINVAL;
948                 }
949
950                 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
951                 if (!kfd) {
952                         pr_err("Getting device by id failed in %s\n", __func__);
953                         return -EINVAL;
954                 }
955
956                 mutex_lock(&p->mutex);
957                 pdd = kfd_bind_process_to_device(kfd, p);
958                 if (IS_ERR(pdd)) {
959                         err = PTR_ERR(pdd);
960                         goto out_unlock;
961                 }
962
963                 mem = kfd_process_device_translate_handle(pdd,
964                                 GET_IDR_HANDLE(args->event_page_offset));
965                 if (!mem) {
966                         pr_err("Can't find BO, offset is 0x%llx\n",
967                                args->event_page_offset);
968                         err = -EINVAL;
969                         goto out_unlock;
970                 }
971                 mutex_unlock(&p->mutex);
972
973                 err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
974                                                 mem, &kern_addr, &size);
975                 if (err) {
976                         pr_err("Failed to map event page to kernel\n");
977                         return err;
978                 }
979
980                 err = kfd_event_page_set(p, kern_addr, size);
981                 if (err) {
982                         pr_err("Failed to set event page\n");
983                         return err;
984                 }
985         }
986
987         err = kfd_event_create(filp, p, args->event_type,
988                                 args->auto_reset != 0, args->node_id,
989                                 &args->event_id, &args->event_trigger_data,
990                                 &args->event_page_offset,
991                                 &args->event_slot_index);
992
993         return err;
994
995 out_unlock:
996         mutex_unlock(&p->mutex);
997         return err;
998 }
999
1000 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1001                                         void *data)
1002 {
1003         struct kfd_ioctl_destroy_event_args *args = data;
1004
1005         return kfd_event_destroy(p, args->event_id);
1006 }
1007
1008 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1009                                 void *data)
1010 {
1011         struct kfd_ioctl_set_event_args *args = data;
1012
1013         return kfd_set_event(p, args->event_id);
1014 }
1015
1016 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1017                                 void *data)
1018 {
1019         struct kfd_ioctl_reset_event_args *args = data;
1020
1021         return kfd_reset_event(p, args->event_id);
1022 }
1023
1024 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1025                                 void *data)
1026 {
1027         struct kfd_ioctl_wait_events_args *args = data;
1028         int err;
1029
1030         err = kfd_wait_on_events(p, args->num_events,
1031                         (void __user *)args->events_ptr,
1032                         (args->wait_for_all != 0),
1033                         args->timeout, &args->wait_result);
1034
1035         return err;
1036 }
1037 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1038                                         struct kfd_process *p, void *data)
1039 {
1040         struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1041         struct kfd_process_device *pdd;
1042         struct kfd_dev *dev;
1043         long err;
1044
1045         dev = kfd_device_by_id(args->gpu_id);
1046         if (!dev)
1047                 return -EINVAL;
1048
1049         mutex_lock(&p->mutex);
1050
1051         pdd = kfd_bind_process_to_device(dev, p);
1052         if (IS_ERR(pdd)) {
1053                 err = PTR_ERR(pdd);
1054                 goto bind_process_to_device_fail;
1055         }
1056
1057         pdd->qpd.sh_hidden_private_base = args->va_addr;
1058
1059         mutex_unlock(&p->mutex);
1060
1061         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1062             pdd->qpd.vmid != 0)
1063                 dev->kfd2kgd->set_scratch_backing_va(
1064                         dev->kgd, args->va_addr, pdd->qpd.vmid);
1065
1066         return 0;
1067
1068 bind_process_to_device_fail:
1069         mutex_unlock(&p->mutex);
1070         return err;
1071 }
1072
1073 static int kfd_ioctl_get_tile_config(struct file *filep,
1074                 struct kfd_process *p, void *data)
1075 {
1076         struct kfd_ioctl_get_tile_config_args *args = data;
1077         struct kfd_dev *dev;
1078         struct tile_config config;
1079         int err = 0;
1080
1081         dev = kfd_device_by_id(args->gpu_id);
1082         if (!dev)
1083                 return -EINVAL;
1084
1085         dev->kfd2kgd->get_tile_config(dev->kgd, &config);
1086
1087         args->gb_addr_config = config.gb_addr_config;
1088         args->num_banks = config.num_banks;
1089         args->num_ranks = config.num_ranks;
1090
1091         if (args->num_tile_configs > config.num_tile_configs)
1092                 args->num_tile_configs = config.num_tile_configs;
1093         err = copy_to_user((void __user *)args->tile_config_ptr,
1094                         config.tile_config_ptr,
1095                         args->num_tile_configs * sizeof(uint32_t));
1096         if (err) {
1097                 args->num_tile_configs = 0;
1098                 return -EFAULT;
1099         }
1100
1101         if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1102                 args->num_macro_tile_configs =
1103                                 config.num_macro_tile_configs;
1104         err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1105                         config.macro_tile_config_ptr,
1106                         args->num_macro_tile_configs * sizeof(uint32_t));
1107         if (err) {
1108                 args->num_macro_tile_configs = 0;
1109                 return -EFAULT;
1110         }
1111
1112         return 0;
1113 }
1114
1115 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1116                                 void *data)
1117 {
1118         struct kfd_ioctl_acquire_vm_args *args = data;
1119         struct kfd_process_device *pdd;
1120         struct kfd_dev *dev;
1121         struct file *drm_file;
1122         int ret;
1123
1124         dev = kfd_device_by_id(args->gpu_id);
1125         if (!dev)
1126                 return -EINVAL;
1127
1128         drm_file = fget(args->drm_fd);
1129         if (!drm_file)
1130                 return -EINVAL;
1131
1132         mutex_lock(&p->mutex);
1133
1134         pdd = kfd_get_process_device_data(dev, p);
1135         if (!pdd) {
1136                 ret = -EINVAL;
1137                 goto err_unlock;
1138         }
1139
1140         if (pdd->drm_file) {
1141                 ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1142                 goto err_unlock;
1143         }
1144
1145         ret = kfd_process_device_init_vm(pdd, drm_file);
1146         if (ret)
1147                 goto err_unlock;
1148         /* On success, the PDD keeps the drm_file reference */
1149         mutex_unlock(&p->mutex);
1150
1151         return 0;
1152
1153 err_unlock:
1154         mutex_unlock(&p->mutex);
1155         fput(drm_file);
1156         return ret;
1157 }
1158
1159 static bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1160 {
1161         struct kfd_local_mem_info mem_info;
1162
1163         if (debug_largebar) {
1164                 pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1165                 return true;
1166         }
1167
1168         if (dev->device_info->needs_iommu_device)
1169                 return false;
1170
1171         dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
1172         if (mem_info.local_mem_size_private == 0 &&
1173                         mem_info.local_mem_size_public > 0)
1174                 return true;
1175         return false;
1176 }
1177
1178 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1179                                         struct kfd_process *p, void *data)
1180 {
1181         struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1182         struct kfd_process_device *pdd;
1183         void *mem;
1184         struct kfd_dev *dev;
1185         int idr_handle;
1186         long err;
1187         uint64_t offset = args->mmap_offset;
1188         uint32_t flags = args->flags;
1189
1190         if (args->size == 0)
1191                 return -EINVAL;
1192
1193         dev = kfd_device_by_id(args->gpu_id);
1194         if (!dev)
1195                 return -EINVAL;
1196
1197         if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1198                 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1199                 !kfd_dev_is_large_bar(dev)) {
1200                 pr_err("Alloc host visible vram on small bar is not allowed\n");
1201                 return -EINVAL;
1202         }
1203
1204         mutex_lock(&p->mutex);
1205
1206         pdd = kfd_bind_process_to_device(dev, p);
1207         if (IS_ERR(pdd)) {
1208                 err = PTR_ERR(pdd);
1209                 goto err_unlock;
1210         }
1211
1212         err = dev->kfd2kgd->alloc_memory_of_gpu(
1213                 dev->kgd, args->va_addr, args->size,
1214                 pdd->vm, (struct kgd_mem **) &mem, &offset,
1215                 flags);
1216
1217         if (err)
1218                 goto err_unlock;
1219
1220         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1221         if (idr_handle < 0) {
1222                 err = -EFAULT;
1223                 goto err_free;
1224         }
1225
1226         mutex_unlock(&p->mutex);
1227
1228         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1229         args->mmap_offset = offset;
1230
1231         return 0;
1232
1233 err_free:
1234         dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1235 err_unlock:
1236         mutex_unlock(&p->mutex);
1237         return err;
1238 }
1239
1240 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1241                                         struct kfd_process *p, void *data)
1242 {
1243         struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1244         struct kfd_process_device *pdd;
1245         void *mem;
1246         struct kfd_dev *dev;
1247         int ret;
1248
1249         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1250         if (!dev)
1251                 return -EINVAL;
1252
1253         mutex_lock(&p->mutex);
1254
1255         pdd = kfd_get_process_device_data(dev, p);
1256         if (!pdd) {
1257                 pr_err("Process device data doesn't exist\n");
1258                 ret = -EINVAL;
1259                 goto err_unlock;
1260         }
1261
1262         mem = kfd_process_device_translate_handle(
1263                 pdd, GET_IDR_HANDLE(args->handle));
1264         if (!mem) {
1265                 ret = -EINVAL;
1266                 goto err_unlock;
1267         }
1268
1269         ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1270
1271         /* If freeing the buffer failed, leave the handle in place for
1272          * clean-up during process tear-down.
1273          */
1274         if (!ret)
1275                 kfd_process_device_remove_obj_handle(
1276                         pdd, GET_IDR_HANDLE(args->handle));
1277
1278 err_unlock:
1279         mutex_unlock(&p->mutex);
1280         return ret;
1281 }
1282
1283 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1284                                         struct kfd_process *p, void *data)
1285 {
1286         struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1287         struct kfd_process_device *pdd, *peer_pdd;
1288         void *mem;
1289         struct kfd_dev *dev, *peer;
1290         long err = 0;
1291         int i;
1292         uint32_t *devices_arr = NULL;
1293
1294         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1295         if (!dev)
1296                 return -EINVAL;
1297
1298         if (!args->n_devices) {
1299                 pr_debug("Device IDs array empty\n");
1300                 return -EINVAL;
1301         }
1302         if (args->n_success > args->n_devices) {
1303                 pr_debug("n_success exceeds n_devices\n");
1304                 return -EINVAL;
1305         }
1306
1307         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1308                                     GFP_KERNEL);
1309         if (!devices_arr)
1310                 return -ENOMEM;
1311
1312         err = copy_from_user(devices_arr,
1313                              (void __user *)args->device_ids_array_ptr,
1314                              args->n_devices * sizeof(*devices_arr));
1315         if (err != 0) {
1316                 err = -EFAULT;
1317                 goto copy_from_user_failed;
1318         }
1319
1320         mutex_lock(&p->mutex);
1321
1322         pdd = kfd_bind_process_to_device(dev, p);
1323         if (IS_ERR(pdd)) {
1324                 err = PTR_ERR(pdd);
1325                 goto bind_process_to_device_failed;
1326         }
1327
1328         mem = kfd_process_device_translate_handle(pdd,
1329                                                 GET_IDR_HANDLE(args->handle));
1330         if (!mem) {
1331                 err = -ENOMEM;
1332                 goto get_mem_obj_from_handle_failed;
1333         }
1334
1335         for (i = args->n_success; i < args->n_devices; i++) {
1336                 peer = kfd_device_by_id(devices_arr[i]);
1337                 if (!peer) {
1338                         pr_debug("Getting device by id failed for 0x%x\n",
1339                                  devices_arr[i]);
1340                         err = -EINVAL;
1341                         goto get_mem_obj_from_handle_failed;
1342                 }
1343
1344                 peer_pdd = kfd_bind_process_to_device(peer, p);
1345                 if (IS_ERR(peer_pdd)) {
1346                         err = PTR_ERR(peer_pdd);
1347                         goto get_mem_obj_from_handle_failed;
1348                 }
1349                 err = peer->kfd2kgd->map_memory_to_gpu(
1350                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1351                 if (err) {
1352                         pr_err("Failed to map to gpu %d/%d\n",
1353                                i, args->n_devices);
1354                         goto map_memory_to_gpu_failed;
1355                 }
1356                 args->n_success = i+1;
1357         }
1358
1359         mutex_unlock(&p->mutex);
1360
1361         err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1362         if (err) {
1363                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
1364                 goto sync_memory_failed;
1365         }
1366
1367         /* Flush TLBs after waiting for the page table updates to complete */
1368         for (i = 0; i < args->n_devices; i++) {
1369                 peer = kfd_device_by_id(devices_arr[i]);
1370                 if (WARN_ON_ONCE(!peer))
1371                         continue;
1372                 peer_pdd = kfd_get_process_device_data(peer, p);
1373                 if (WARN_ON_ONCE(!peer_pdd))
1374                         continue;
1375                 kfd_flush_tlb(peer_pdd);
1376         }
1377
1378         kfree(devices_arr);
1379
1380         return err;
1381
1382 bind_process_to_device_failed:
1383 get_mem_obj_from_handle_failed:
1384 map_memory_to_gpu_failed:
1385         mutex_unlock(&p->mutex);
1386 copy_from_user_failed:
1387 sync_memory_failed:
1388         kfree(devices_arr);
1389
1390         return err;
1391 }
1392
1393 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1394                                         struct kfd_process *p, void *data)
1395 {
1396         struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1397         struct kfd_process_device *pdd, *peer_pdd;
1398         void *mem;
1399         struct kfd_dev *dev, *peer;
1400         long err = 0;
1401         uint32_t *devices_arr = NULL, i;
1402
1403         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1404         if (!dev)
1405                 return -EINVAL;
1406
1407         if (!args->n_devices) {
1408                 pr_debug("Device IDs array empty\n");
1409                 return -EINVAL;
1410         }
1411         if (args->n_success > args->n_devices) {
1412                 pr_debug("n_success exceeds n_devices\n");
1413                 return -EINVAL;
1414         }
1415
1416         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1417                                     GFP_KERNEL);
1418         if (!devices_arr)
1419                 return -ENOMEM;
1420
1421         err = copy_from_user(devices_arr,
1422                              (void __user *)args->device_ids_array_ptr,
1423                              args->n_devices * sizeof(*devices_arr));
1424         if (err != 0) {
1425                 err = -EFAULT;
1426                 goto copy_from_user_failed;
1427         }
1428
1429         mutex_lock(&p->mutex);
1430
1431         pdd = kfd_get_process_device_data(dev, p);
1432         if (!pdd) {
1433                 err = -EINVAL;
1434                 goto bind_process_to_device_failed;
1435         }
1436
1437         mem = kfd_process_device_translate_handle(pdd,
1438                                                 GET_IDR_HANDLE(args->handle));
1439         if (!mem) {
1440                 err = -ENOMEM;
1441                 goto get_mem_obj_from_handle_failed;
1442         }
1443
1444         for (i = args->n_success; i < args->n_devices; i++) {
1445                 peer = kfd_device_by_id(devices_arr[i]);
1446                 if (!peer) {
1447                         err = -EINVAL;
1448                         goto get_mem_obj_from_handle_failed;
1449                 }
1450
1451                 peer_pdd = kfd_get_process_device_data(peer, p);
1452                 if (!peer_pdd) {
1453                         err = -ENODEV;
1454                         goto get_mem_obj_from_handle_failed;
1455                 }
1456                 err = dev->kfd2kgd->unmap_memory_to_gpu(
1457                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1458                 if (err) {
1459                         pr_err("Failed to unmap from gpu %d/%d\n",
1460                                i, args->n_devices);
1461                         goto unmap_memory_from_gpu_failed;
1462                 }
1463                 args->n_success = i+1;
1464         }
1465         kfree(devices_arr);
1466
1467         mutex_unlock(&p->mutex);
1468
1469         return 0;
1470
1471 bind_process_to_device_failed:
1472 get_mem_obj_from_handle_failed:
1473 unmap_memory_from_gpu_failed:
1474         mutex_unlock(&p->mutex);
1475 copy_from_user_failed:
1476         kfree(devices_arr);
1477         return err;
1478 }
1479
1480 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1481         [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1482                             .cmd_drv = 0, .name = #ioctl}
1483
1484 /** Ioctl table */
1485 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1486         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1487                         kfd_ioctl_get_version, 0),
1488
1489         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1490                         kfd_ioctl_create_queue, 0),
1491
1492         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1493                         kfd_ioctl_destroy_queue, 0),
1494
1495         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1496                         kfd_ioctl_set_memory_policy, 0),
1497
1498         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1499                         kfd_ioctl_get_clock_counters, 0),
1500
1501         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1502                         kfd_ioctl_get_process_apertures, 0),
1503
1504         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1505                         kfd_ioctl_update_queue, 0),
1506
1507         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1508                         kfd_ioctl_create_event, 0),
1509
1510         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1511                         kfd_ioctl_destroy_event, 0),
1512
1513         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1514                         kfd_ioctl_set_event, 0),
1515
1516         AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1517                         kfd_ioctl_reset_event, 0),
1518
1519         AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1520                         kfd_ioctl_wait_events, 0),
1521
1522         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1523                         kfd_ioctl_dbg_register, 0),
1524
1525         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1526                         kfd_ioctl_dbg_unregister, 0),
1527
1528         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1529                         kfd_ioctl_dbg_address_watch, 0),
1530
1531         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1532                         kfd_ioctl_dbg_wave_control, 0),
1533
1534         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1535                         kfd_ioctl_set_scratch_backing_va, 0),
1536
1537         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1538                         kfd_ioctl_get_tile_config, 0),
1539
1540         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1541                         kfd_ioctl_set_trap_handler, 0),
1542
1543         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1544                         kfd_ioctl_get_process_apertures_new, 0),
1545
1546         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1547                         kfd_ioctl_acquire_vm, 0),
1548
1549         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1550                         kfd_ioctl_alloc_memory_of_gpu, 0),
1551
1552         AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1553                         kfd_ioctl_free_memory_of_gpu, 0),
1554
1555         AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1556                         kfd_ioctl_map_memory_to_gpu, 0),
1557
1558         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1559                         kfd_ioctl_unmap_memory_from_gpu, 0),
1560
1561 };
1562
1563 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
1564
1565 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1566 {
1567         struct kfd_process *process;
1568         amdkfd_ioctl_t *func;
1569         const struct amdkfd_ioctl_desc *ioctl = NULL;
1570         unsigned int nr = _IOC_NR(cmd);
1571         char stack_kdata[128];
1572         char *kdata = NULL;
1573         unsigned int usize, asize;
1574         int retcode = -EINVAL;
1575
1576         if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1577                 goto err_i1;
1578
1579         if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1580                 u32 amdkfd_size;
1581
1582                 ioctl = &amdkfd_ioctls[nr];
1583
1584                 amdkfd_size = _IOC_SIZE(ioctl->cmd);
1585                 usize = asize = _IOC_SIZE(cmd);
1586                 if (amdkfd_size > asize)
1587                         asize = amdkfd_size;
1588
1589                 cmd = ioctl->cmd;
1590         } else
1591                 goto err_i1;
1592
1593         dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
1594
1595         process = kfd_get_process(current);
1596         if (IS_ERR(process)) {
1597                 dev_dbg(kfd_device, "no process\n");
1598                 goto err_i1;
1599         }
1600
1601         /* Do not trust userspace, use our own definition */
1602         func = ioctl->func;
1603
1604         if (unlikely(!func)) {
1605                 dev_dbg(kfd_device, "no function\n");
1606                 retcode = -EINVAL;
1607                 goto err_i1;
1608         }
1609
1610         if (cmd & (IOC_IN | IOC_OUT)) {
1611                 if (asize <= sizeof(stack_kdata)) {
1612                         kdata = stack_kdata;
1613                 } else {
1614                         kdata = kmalloc(asize, GFP_KERNEL);
1615                         if (!kdata) {
1616                                 retcode = -ENOMEM;
1617                                 goto err_i1;
1618                         }
1619                 }
1620                 if (asize > usize)
1621                         memset(kdata + usize, 0, asize - usize);
1622         }
1623
1624         if (cmd & IOC_IN) {
1625                 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1626                         retcode = -EFAULT;
1627                         goto err_i1;
1628                 }
1629         } else if (cmd & IOC_OUT) {
1630                 memset(kdata, 0, usize);
1631         }
1632
1633         retcode = func(filep, process, kdata);
1634
1635         if (cmd & IOC_OUT)
1636                 if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1637                         retcode = -EFAULT;
1638
1639 err_i1:
1640         if (!ioctl)
1641                 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1642                           task_pid_nr(current), cmd, nr);
1643
1644         if (kdata != stack_kdata)
1645                 kfree(kdata);
1646
1647         if (retcode)
1648                 dev_dbg(kfd_device, "ret = %d\n", retcode);
1649
1650         return retcode;
1651 }
1652
1653 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1654 {
1655         struct kfd_process *process;
1656         struct kfd_dev *dev = NULL;
1657         unsigned long vm_pgoff;
1658         unsigned int gpu_id;
1659
1660         process = kfd_get_process(current);
1661         if (IS_ERR(process))
1662                 return PTR_ERR(process);
1663
1664         vm_pgoff = vma->vm_pgoff;
1665         vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
1666         gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
1667         if (gpu_id)
1668                 dev = kfd_device_by_id(gpu_id);
1669
1670         switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
1671         case KFD_MMAP_TYPE_DOORBELL:
1672                 if (!dev)
1673                         return -ENODEV;
1674                 return kfd_doorbell_mmap(dev, process, vma);
1675
1676         case KFD_MMAP_TYPE_EVENTS:
1677                 return kfd_event_mmap(process, vma);
1678
1679         case KFD_MMAP_TYPE_RESERVED_MEM:
1680                 if (!dev)
1681                         return -ENODEV;
1682                 return kfd_reserved_mem_mmap(dev, process, vma);
1683         }
1684
1685         return -EFAULT;
1686 }