Merge tag 'drm-intel-gt-next-2021-05-28' of git://anongit.freedesktop.org/drm/drm...
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdkfd / kfd_process.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/sched/mm.h>
27 #include <linux/sched/task.h>
28 #include <linux/mmu_context.h>
29 #include <linux/slab.h>
30 #include <linux/amd-iommu.h>
31 #include <linux/notifier.h>
32 #include <linux/compat.h>
33 #include <linux/mman.h>
34 #include <linux/file.h>
35 #include <linux/pm_runtime.h>
36 #include "amdgpu_amdkfd.h"
37 #include "amdgpu.h"
38 #include "kfd_svm.h"
39
40 struct mm_struct;
41
42 #include "kfd_priv.h"
43 #include "kfd_device_queue_manager.h"
44 #include "kfd_dbgmgr.h"
45 #include "kfd_iommu.h"
46 #include "kfd_svm.h"
47
48 /*
49  * List of struct kfd_process (field kfd_process).
50  * Unique/indexed by mm_struct*
51  */
52 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
53 static DEFINE_MUTEX(kfd_processes_mutex);
54
55 DEFINE_SRCU(kfd_processes_srcu);
56
57 /* For process termination handling */
58 static struct workqueue_struct *kfd_process_wq;
59
60 /* Ordered, single-threaded workqueue for restoring evicted
61  * processes. Restoring multiple processes concurrently under memory
62  * pressure can lead to processes blocking each other from validating
63  * their BOs and result in a live-lock situation where processes
64  * remain evicted indefinitely.
65  */
66 static struct workqueue_struct *kfd_restore_wq;
67
68 static struct kfd_process *find_process(const struct task_struct *thread);
69 static void kfd_process_ref_release(struct kref *ref);
70 static struct kfd_process *create_process(const struct task_struct *thread);
71 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
72
73 static void evict_process_worker(struct work_struct *work);
74 static void restore_process_worker(struct work_struct *work);
75
76 struct kfd_procfs_tree {
77         struct kobject *kobj;
78 };
79
80 static struct kfd_procfs_tree procfs;
81
82 /*
83  * Structure for SDMA activity tracking
84  */
85 struct kfd_sdma_activity_handler_workarea {
86         struct work_struct sdma_activity_work;
87         struct kfd_process_device *pdd;
88         uint64_t sdma_activity_counter;
89 };
90
91 struct temp_sdma_queue_list {
92         uint64_t __user *rptr;
93         uint64_t sdma_val;
94         unsigned int queue_id;
95         struct list_head list;
96 };
97
98 static void kfd_sdma_activity_worker(struct work_struct *work)
99 {
100         struct kfd_sdma_activity_handler_workarea *workarea;
101         struct kfd_process_device *pdd;
102         uint64_t val;
103         struct mm_struct *mm;
104         struct queue *q;
105         struct qcm_process_device *qpd;
106         struct device_queue_manager *dqm;
107         int ret = 0;
108         struct temp_sdma_queue_list sdma_q_list;
109         struct temp_sdma_queue_list *sdma_q, *next;
110
111         workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
112                                 sdma_activity_work);
113         if (!workarea)
114                 return;
115
116         pdd = workarea->pdd;
117         if (!pdd)
118                 return;
119         dqm = pdd->dev->dqm;
120         qpd = &pdd->qpd;
121         if (!dqm || !qpd)
122                 return;
123         /*
124          * Total SDMA activity is current SDMA activity + past SDMA activity
125          * Past SDMA count is stored in pdd.
126          * To get the current activity counters for all active SDMA queues,
127          * we loop over all SDMA queues and get their counts from user-space.
128          *
129          * We cannot call get_user() with dqm_lock held as it can cause
130          * a circular lock dependency situation. To read the SDMA stats,
131          * we need to do the following:
132          *
133          * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list,
134          *    with dqm_lock/dqm_unlock().
135          * 2. Call get_user() for each node in temporary list without dqm_lock.
136          *    Save the SDMA count for each node and also add the count to the total
137          *    SDMA count counter.
138          *    Its possible, during this step, a few SDMA queue nodes got deleted
139          *    from the qpd->queues_list.
140          * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted.
141          *    If any node got deleted, its SDMA count would be captured in the sdma
142          *    past activity counter. So subtract the SDMA counter stored in step 2
143          *    for this node from the total SDMA count.
144          */
145         INIT_LIST_HEAD(&sdma_q_list.list);
146
147         /*
148          * Create the temp list of all SDMA queues
149          */
150         dqm_lock(dqm);
151
152         list_for_each_entry(q, &qpd->queues_list, list) {
153                 if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
154                     (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
155                         continue;
156
157                 sdma_q = kzalloc(sizeof(struct temp_sdma_queue_list), GFP_KERNEL);
158                 if (!sdma_q) {
159                         dqm_unlock(dqm);
160                         goto cleanup;
161                 }
162
163                 INIT_LIST_HEAD(&sdma_q->list);
164                 sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
165                 sdma_q->queue_id = q->properties.queue_id;
166                 list_add_tail(&sdma_q->list, &sdma_q_list.list);
167         }
168
169         /*
170          * If the temp list is empty, then no SDMA queues nodes were found in
171          * qpd->queues_list. Return the past activity count as the total sdma
172          * count
173          */
174         if (list_empty(&sdma_q_list.list)) {
175                 workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;
176                 dqm_unlock(dqm);
177                 return;
178         }
179
180         dqm_unlock(dqm);
181
182         /*
183          * Get the usage count for each SDMA queue in temp_list.
184          */
185         mm = get_task_mm(pdd->process->lead_thread);
186         if (!mm)
187                 goto cleanup;
188
189         kthread_use_mm(mm);
190
191         list_for_each_entry(sdma_q, &sdma_q_list.list, list) {
192                 val = 0;
193                 ret = read_sdma_queue_counter(sdma_q->rptr, &val);
194                 if (ret) {
195                         pr_debug("Failed to read SDMA queue active counter for queue id: %d",
196                                  sdma_q->queue_id);
197                 } else {
198                         sdma_q->sdma_val = val;
199                         workarea->sdma_activity_counter += val;
200                 }
201         }
202
203         kthread_unuse_mm(mm);
204         mmput(mm);
205
206         /*
207          * Do a second iteration over qpd_queues_list to check if any SDMA
208          * nodes got deleted while fetching SDMA counter.
209          */
210         dqm_lock(dqm);
211
212         workarea->sdma_activity_counter += pdd->sdma_past_activity_counter;
213
214         list_for_each_entry(q, &qpd->queues_list, list) {
215                 if (list_empty(&sdma_q_list.list))
216                         break;
217
218                 if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
219                     (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
220                         continue;
221
222                 list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
223                         if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) &&
224                              (sdma_q->queue_id == q->properties.queue_id)) {
225                                 list_del(&sdma_q->list);
226                                 kfree(sdma_q);
227                                 break;
228                         }
229                 }
230         }
231
232         dqm_unlock(dqm);
233
234         /*
235          * If temp list is not empty, it implies some queues got deleted
236          * from qpd->queues_list during SDMA usage read. Subtract the SDMA
237          * count for each node from the total SDMA count.
238          */
239         list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
240                 workarea->sdma_activity_counter -= sdma_q->sdma_val;
241                 list_del(&sdma_q->list);
242                 kfree(sdma_q);
243         }
244
245         return;
246
247 cleanup:
248         list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
249                 list_del(&sdma_q->list);
250                 kfree(sdma_q);
251         }
252 }
253
254 /**
255  * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device
256  * by current process. Translates acquired wave count into number of compute units
257  * that are occupied.
258  *
259  * @atr: Handle of attribute that allows reporting of wave count. The attribute
260  * handle encapsulates GPU device it is associated with, thereby allowing collection
261  * of waves in flight, etc
262  *
263  * @buffer: Handle of user provided buffer updated with wave count
264  *
265  * Return: Number of bytes written to user buffer or an error value
266  */
267 static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
268 {
269         int cu_cnt;
270         int wave_cnt;
271         int max_waves_per_cu;
272         struct kfd_dev *dev = NULL;
273         struct kfd_process *proc = NULL;
274         struct kfd_process_device *pdd = NULL;
275
276         pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
277         dev = pdd->dev;
278         if (dev->kfd2kgd->get_cu_occupancy == NULL)
279                 return -EINVAL;
280
281         cu_cnt = 0;
282         proc = pdd->process;
283         if (pdd->qpd.queue_count == 0) {
284                 pr_debug("Gpu-Id: %d has no active queues for process %d\n",
285                          dev->id, proc->pasid);
286                 return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
287         }
288
289         /* Collect wave count from device if it supports */
290         wave_cnt = 0;
291         max_waves_per_cu = 0;
292         dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt,
293                         &max_waves_per_cu);
294
295         /* Translate wave count to number of compute units */
296         cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
297         return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
298 }
299
300 static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
301                                char *buffer)
302 {
303         if (strcmp(attr->name, "pasid") == 0) {
304                 struct kfd_process *p = container_of(attr, struct kfd_process,
305                                                      attr_pasid);
306
307                 return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
308         } else if (strncmp(attr->name, "vram_", 5) == 0) {
309                 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
310                                                               attr_vram);
311                 return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
312         } else if (strncmp(attr->name, "sdma_", 5) == 0) {
313                 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
314                                                               attr_sdma);
315                 struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
316
317                 INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
318                                         kfd_sdma_activity_worker);
319
320                 sdma_activity_work_handler.pdd = pdd;
321                 sdma_activity_work_handler.sdma_activity_counter = 0;
322
323                 schedule_work(&sdma_activity_work_handler.sdma_activity_work);
324
325                 flush_work(&sdma_activity_work_handler.sdma_activity_work);
326
327                 return snprintf(buffer, PAGE_SIZE, "%llu\n",
328                                 (sdma_activity_work_handler.sdma_activity_counter)/
329                                  SDMA_ACTIVITY_DIVISOR);
330         } else {
331                 pr_err("Invalid attribute");
332                 return -EINVAL;
333         }
334
335         return 0;
336 }
337
338 static void kfd_procfs_kobj_release(struct kobject *kobj)
339 {
340         kfree(kobj);
341 }
342
343 static const struct sysfs_ops kfd_procfs_ops = {
344         .show = kfd_procfs_show,
345 };
346
347 static struct kobj_type procfs_type = {
348         .release = kfd_procfs_kobj_release,
349         .sysfs_ops = &kfd_procfs_ops,
350 };
351
352 void kfd_procfs_init(void)
353 {
354         int ret = 0;
355
356         procfs.kobj = kfd_alloc_struct(procfs.kobj);
357         if (!procfs.kobj)
358                 return;
359
360         ret = kobject_init_and_add(procfs.kobj, &procfs_type,
361                                    &kfd_device->kobj, "proc");
362         if (ret) {
363                 pr_warn("Could not create procfs proc folder");
364                 /* If we fail to create the procfs, clean up */
365                 kfd_procfs_shutdown();
366         }
367 }
368
369 void kfd_procfs_shutdown(void)
370 {
371         if (procfs.kobj) {
372                 kobject_del(procfs.kobj);
373                 kobject_put(procfs.kobj);
374                 procfs.kobj = NULL;
375         }
376 }
377
378 static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
379                                      struct attribute *attr, char *buffer)
380 {
381         struct queue *q = container_of(kobj, struct queue, kobj);
382
383         if (!strcmp(attr->name, "size"))
384                 return snprintf(buffer, PAGE_SIZE, "%llu",
385                                 q->properties.queue_size);
386         else if (!strcmp(attr->name, "type"))
387                 return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type);
388         else if (!strcmp(attr->name, "gpuid"))
389                 return snprintf(buffer, PAGE_SIZE, "%u", q->device->id);
390         else
391                 pr_err("Invalid attribute");
392
393         return 0;
394 }
395
396 static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
397                                      struct attribute *attr, char *buffer)
398 {
399         if (strcmp(attr->name, "evicted_ms") == 0) {
400                 struct kfd_process_device *pdd = container_of(attr,
401                                 struct kfd_process_device,
402                                 attr_evict);
403                 uint64_t evict_jiffies;
404
405                 evict_jiffies = atomic64_read(&pdd->evict_duration_counter);
406
407                 return snprintf(buffer,
408                                 PAGE_SIZE,
409                                 "%llu\n",
410                                 jiffies64_to_msecs(evict_jiffies));
411
412         /* Sysfs handle that gets CU occupancy is per device */
413         } else if (strcmp(attr->name, "cu_occupancy") == 0) {
414                 return kfd_get_cu_occupancy(attr, buffer);
415         } else {
416                 pr_err("Invalid attribute");
417         }
418
419         return 0;
420 }
421
422 static struct attribute attr_queue_size = {
423         .name = "size",
424         .mode = KFD_SYSFS_FILE_MODE
425 };
426
427 static struct attribute attr_queue_type = {
428         .name = "type",
429         .mode = KFD_SYSFS_FILE_MODE
430 };
431
432 static struct attribute attr_queue_gpuid = {
433         .name = "gpuid",
434         .mode = KFD_SYSFS_FILE_MODE
435 };
436
437 static struct attribute *procfs_queue_attrs[] = {
438         &attr_queue_size,
439         &attr_queue_type,
440         &attr_queue_gpuid,
441         NULL
442 };
443
444 static const struct sysfs_ops procfs_queue_ops = {
445         .show = kfd_procfs_queue_show,
446 };
447
448 static struct kobj_type procfs_queue_type = {
449         .sysfs_ops = &procfs_queue_ops,
450         .default_attrs = procfs_queue_attrs,
451 };
452
453 static const struct sysfs_ops procfs_stats_ops = {
454         .show = kfd_procfs_stats_show,
455 };
456
457 static struct attribute *procfs_stats_attrs[] = {
458         NULL
459 };
460
461 static struct kobj_type procfs_stats_type = {
462         .sysfs_ops = &procfs_stats_ops,
463         .default_attrs = procfs_stats_attrs,
464 };
465
466 int kfd_procfs_add_queue(struct queue *q)
467 {
468         struct kfd_process *proc;
469         int ret;
470
471         if (!q || !q->process)
472                 return -EINVAL;
473         proc = q->process;
474
475         /* Create proc/<pid>/queues/<queue id> folder */
476         if (!proc->kobj_queues)
477                 return -EFAULT;
478         ret = kobject_init_and_add(&q->kobj, &procfs_queue_type,
479                         proc->kobj_queues, "%u", q->properties.queue_id);
480         if (ret < 0) {
481                 pr_warn("Creating proc/<pid>/queues/%u failed",
482                         q->properties.queue_id);
483                 kobject_put(&q->kobj);
484                 return ret;
485         }
486
487         return 0;
488 }
489
490 static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
491                                  char *name)
492 {
493         int ret = 0;
494
495         if (!p || !attr || !name)
496                 return -EINVAL;
497
498         attr->name = name;
499         attr->mode = KFD_SYSFS_FILE_MODE;
500         sysfs_attr_init(attr);
501
502         ret = sysfs_create_file(p->kobj, attr);
503
504         return ret;
505 }
506
507 static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
508 {
509         int ret = 0;
510         int i;
511         char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];
512
513         if (!p)
514                 return -EINVAL;
515
516         if (!p->kobj)
517                 return -EFAULT;
518
519         /*
520          * Create sysfs files for each GPU:
521          * - proc/<pid>/stats_<gpuid>/
522          * - proc/<pid>/stats_<gpuid>/evicted_ms
523          * - proc/<pid>/stats_<gpuid>/cu_occupancy
524          */
525         for (i = 0; i < p->n_pdds; i++) {
526                 struct kfd_process_device *pdd = p->pdds[i];
527                 struct kobject *kobj_stats;
528
529                 snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
530                                 "stats_%u", pdd->dev->id);
531                 kobj_stats = kfd_alloc_struct(kobj_stats);
532                 if (!kobj_stats)
533                         return -ENOMEM;
534
535                 ret = kobject_init_and_add(kobj_stats,
536                                                 &procfs_stats_type,
537                                                 p->kobj,
538                                                 stats_dir_filename);
539
540                 if (ret) {
541                         pr_warn("Creating KFD proc/stats_%s folder failed",
542                                         stats_dir_filename);
543                         kobject_put(kobj_stats);
544                         goto err;
545                 }
546
547                 pdd->kobj_stats = kobj_stats;
548                 pdd->attr_evict.name = "evicted_ms";
549                 pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE;
550                 sysfs_attr_init(&pdd->attr_evict);
551                 ret = sysfs_create_file(kobj_stats, &pdd->attr_evict);
552                 if (ret)
553                         pr_warn("Creating eviction stats for gpuid %d failed",
554                                         (int)pdd->dev->id);
555
556                 /* Add sysfs file to report compute unit occupancy */
557                 if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) {
558                         pdd->attr_cu_occupancy.name = "cu_occupancy";
559                         pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE;
560                         sysfs_attr_init(&pdd->attr_cu_occupancy);
561                         ret = sysfs_create_file(kobj_stats,
562                                                 &pdd->attr_cu_occupancy);
563                         if (ret)
564                                 pr_warn("Creating %s failed for gpuid: %d",
565                                         pdd->attr_cu_occupancy.name,
566                                         (int)pdd->dev->id);
567                 }
568         }
569 err:
570         return ret;
571 }
572
573
574 static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
575 {
576         int ret = 0;
577         int i;
578
579         if (!p)
580                 return -EINVAL;
581
582         if (!p->kobj)
583                 return -EFAULT;
584
585         /*
586          * Create sysfs files for each GPU:
587          * - proc/<pid>/vram_<gpuid>
588          * - proc/<pid>/sdma_<gpuid>
589          */
590         for (i = 0; i < p->n_pdds; i++) {
591                 struct kfd_process_device *pdd = p->pdds[i];
592
593                 snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
594                          pdd->dev->id);
595                 ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename);
596                 if (ret)
597                         pr_warn("Creating vram usage for gpu id %d failed",
598                                 (int)pdd->dev->id);
599
600                 snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
601                          pdd->dev->id);
602                 ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename);
603                 if (ret)
604                         pr_warn("Creating sdma usage for gpu id %d failed",
605                                 (int)pdd->dev->id);
606         }
607
608         return ret;
609 }
610
611 void kfd_procfs_del_queue(struct queue *q)
612 {
613         if (!q)
614                 return;
615
616         kobject_del(&q->kobj);
617         kobject_put(&q->kobj);
618 }
619
620 int kfd_process_create_wq(void)
621 {
622         if (!kfd_process_wq)
623                 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
624         if (!kfd_restore_wq)
625                 kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
626
627         if (!kfd_process_wq || !kfd_restore_wq) {
628                 kfd_process_destroy_wq();
629                 return -ENOMEM;
630         }
631
632         return 0;
633 }
634
635 void kfd_process_destroy_wq(void)
636 {
637         if (kfd_process_wq) {
638                 destroy_workqueue(kfd_process_wq);
639                 kfd_process_wq = NULL;
640         }
641         if (kfd_restore_wq) {
642                 destroy_workqueue(kfd_restore_wq);
643                 kfd_restore_wq = NULL;
644         }
645 }
646
647 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
648                         struct kfd_process_device *pdd)
649 {
650         struct kfd_dev *dev = pdd->dev;
651
652         amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
653         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
654                                                NULL);
655 }
656
657 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
658  *      This function should be only called right after the process
659  *      is created and when kfd_processes_mutex is still being held
660  *      to avoid concurrency. Because of that exclusiveness, we do
661  *      not need to take p->mutex.
662  */
663 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
664                                    uint64_t gpu_va, uint32_t size,
665                                    uint32_t flags, void **kptr)
666 {
667         struct kfd_dev *kdev = pdd->dev;
668         struct kgd_mem *mem = NULL;
669         int handle;
670         int err;
671
672         err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
673                                                  pdd->drm_priv, &mem, NULL, flags);
674         if (err)
675                 goto err_alloc_mem;
676
677         err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
678         if (err)
679                 goto err_map_mem;
680
681         err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
682         if (err) {
683                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
684                 goto sync_memory_failed;
685         }
686
687         /* Create an obj handle so kfd_process_device_remove_obj_handle
688          * will take care of the bo removal when the process finishes.
689          * We do not need to take p->mutex, because the process is just
690          * created and the ioctls have not had the chance to run.
691          */
692         handle = kfd_process_device_create_obj_handle(pdd, mem);
693
694         if (handle < 0) {
695                 err = handle;
696                 goto free_gpuvm;
697         }
698
699         if (kptr) {
700                 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
701                                 (struct kgd_mem *)mem, kptr, NULL);
702                 if (err) {
703                         pr_debug("Map GTT BO to kernel failed\n");
704                         goto free_obj_handle;
705                 }
706         }
707
708         return err;
709
710 free_obj_handle:
711         kfd_process_device_remove_obj_handle(pdd, handle);
712 free_gpuvm:
713 sync_memory_failed:
714         kfd_process_free_gpuvm(mem, pdd);
715         return err;
716
717 err_map_mem:
718         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv,
719                                                NULL);
720 err_alloc_mem:
721         *kptr = NULL;
722         return err;
723 }
724
725 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
726  *      process for IB usage The memory reserved is for KFD to submit
727  *      IB to AMDGPU from kernel.  If the memory is reserved
728  *      successfully, ib_kaddr will have the CPU/kernel
729  *      address. Check ib_kaddr before accessing the memory.
730  */
731 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
732 {
733         struct qcm_process_device *qpd = &pdd->qpd;
734         uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT |
735                         KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
736                         KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
737                         KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
738         void *kaddr;
739         int ret;
740
741         if (qpd->ib_kaddr || !qpd->ib_base)
742                 return 0;
743
744         /* ib_base is only set for dGPU */
745         ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
746                                       &kaddr);
747         if (ret)
748                 return ret;
749
750         qpd->ib_kaddr = kaddr;
751
752         return 0;
753 }
754
755 struct kfd_process *kfd_create_process(struct file *filep)
756 {
757         struct kfd_process *process;
758         struct task_struct *thread = current;
759         int ret;
760
761         if (!thread->mm)
762                 return ERR_PTR(-EINVAL);
763
764         /* Only the pthreads threading model is supported. */
765         if (thread->group_leader->mm != thread->mm)
766                 return ERR_PTR(-EINVAL);
767
768         /*
769          * take kfd processes mutex before starting of process creation
770          * so there won't be a case where two threads of the same process
771          * create two kfd_process structures
772          */
773         mutex_lock(&kfd_processes_mutex);
774
775         /* A prior open of /dev/kfd could have already created the process. */
776         process = find_process(thread);
777         if (process) {
778                 pr_debug("Process already found\n");
779         } else {
780                 process = create_process(thread);
781                 if (IS_ERR(process))
782                         goto out;
783
784                 ret = kfd_process_init_cwsr_apu(process, filep);
785                 if (ret)
786                         goto out_destroy;
787
788                 if (!procfs.kobj)
789                         goto out;
790
791                 process->kobj = kfd_alloc_struct(process->kobj);
792                 if (!process->kobj) {
793                         pr_warn("Creating procfs kobject failed");
794                         goto out;
795                 }
796                 ret = kobject_init_and_add(process->kobj, &procfs_type,
797                                            procfs.kobj, "%d",
798                                            (int)process->lead_thread->pid);
799                 if (ret) {
800                         pr_warn("Creating procfs pid directory failed");
801                         kobject_put(process->kobj);
802                         goto out;
803                 }
804
805                 process->attr_pasid.name = "pasid";
806                 process->attr_pasid.mode = KFD_SYSFS_FILE_MODE;
807                 sysfs_attr_init(&process->attr_pasid);
808                 ret = sysfs_create_file(process->kobj, &process->attr_pasid);
809                 if (ret)
810                         pr_warn("Creating pasid for pid %d failed",
811                                         (int)process->lead_thread->pid);
812
813                 process->kobj_queues = kobject_create_and_add("queues",
814                                                         process->kobj);
815                 if (!process->kobj_queues)
816                         pr_warn("Creating KFD proc/queues folder failed");
817
818                 ret = kfd_procfs_add_sysfs_stats(process);
819                 if (ret)
820                         pr_warn("Creating sysfs stats dir for pid %d failed",
821                                 (int)process->lead_thread->pid);
822
823                 ret = kfd_procfs_add_sysfs_files(process);
824                 if (ret)
825                         pr_warn("Creating sysfs usage file for pid %d failed",
826                                 (int)process->lead_thread->pid);
827         }
828 out:
829         if (!IS_ERR(process))
830                 kref_get(&process->ref);
831         mutex_unlock(&kfd_processes_mutex);
832
833         return process;
834
835 out_destroy:
836         hash_del_rcu(&process->kfd_processes);
837         mutex_unlock(&kfd_processes_mutex);
838         synchronize_srcu(&kfd_processes_srcu);
839         /* kfd_process_free_notifier will trigger the cleanup */
840         mmu_notifier_put(&process->mmu_notifier);
841         return ERR_PTR(ret);
842 }
843
844 struct kfd_process *kfd_get_process(const struct task_struct *thread)
845 {
846         struct kfd_process *process;
847
848         if (!thread->mm)
849                 return ERR_PTR(-EINVAL);
850
851         /* Only the pthreads threading model is supported. */
852         if (thread->group_leader->mm != thread->mm)
853                 return ERR_PTR(-EINVAL);
854
855         process = find_process(thread);
856         if (!process)
857                 return ERR_PTR(-EINVAL);
858
859         return process;
860 }
861
862 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
863 {
864         struct kfd_process *process;
865
866         hash_for_each_possible_rcu(kfd_processes_table, process,
867                                         kfd_processes, (uintptr_t)mm)
868                 if (process->mm == mm)
869                         return process;
870
871         return NULL;
872 }
873
874 static struct kfd_process *find_process(const struct task_struct *thread)
875 {
876         struct kfd_process *p;
877         int idx;
878
879         idx = srcu_read_lock(&kfd_processes_srcu);
880         p = find_process_by_mm(thread->mm);
881         srcu_read_unlock(&kfd_processes_srcu, idx);
882
883         return p;
884 }
885
886 void kfd_unref_process(struct kfd_process *p)
887 {
888         kref_put(&p->ref, kfd_process_ref_release);
889 }
890
891
892 static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
893 {
894         struct kfd_process *p = pdd->process;
895         void *mem;
896         int id;
897         int i;
898
899         /*
900          * Remove all handles from idr and release appropriate
901          * local memory object
902          */
903         idr_for_each_entry(&pdd->alloc_idr, mem, id) {
904
905                 for (i = 0; i < p->n_pdds; i++) {
906                         struct kfd_process_device *peer_pdd = p->pdds[i];
907
908                         if (!peer_pdd->drm_priv)
909                                 continue;
910                         amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
911                                 peer_pdd->dev->kgd, mem, peer_pdd->drm_priv);
912                 }
913
914                 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem,
915                                                        pdd->drm_priv, NULL);
916                 kfd_process_device_remove_obj_handle(pdd, id);
917         }
918 }
919
920 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
921 {
922         int i;
923
924         for (i = 0; i < p->n_pdds; i++)
925                 kfd_process_device_free_bos(p->pdds[i]);
926 }
927
928 static void kfd_process_destroy_pdds(struct kfd_process *p)
929 {
930         int i;
931
932         for (i = 0; i < p->n_pdds; i++) {
933                 struct kfd_process_device *pdd = p->pdds[i];
934
935                 pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
936                                 pdd->dev->id, p->pasid);
937
938                 if (pdd->drm_file) {
939                         amdgpu_amdkfd_gpuvm_release_process_vm(
940                                         pdd->dev->kgd, pdd->drm_priv);
941                         fput(pdd->drm_file);
942                 }
943
944                 if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
945                         free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
946                                 get_order(KFD_CWSR_TBA_TMA_SIZE));
947
948                 kfree(pdd->qpd.doorbell_bitmap);
949                 idr_destroy(&pdd->alloc_idr);
950
951                 kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
952
953                 /*
954                  * before destroying pdd, make sure to report availability
955                  * for auto suspend
956                  */
957                 if (pdd->runtime_inuse) {
958                         pm_runtime_mark_last_busy(pdd->dev->ddev->dev);
959                         pm_runtime_put_autosuspend(pdd->dev->ddev->dev);
960                         pdd->runtime_inuse = false;
961                 }
962
963                 kfree(pdd);
964                 p->pdds[i] = NULL;
965         }
966         p->n_pdds = 0;
967 }
968
969 /* No process locking is needed in this function, because the process
970  * is not findable any more. We must assume that no other thread is
971  * using it any more, otherwise we couldn't safely free the process
972  * structure in the end.
973  */
974 static void kfd_process_wq_release(struct work_struct *work)
975 {
976         struct kfd_process *p = container_of(work, struct kfd_process,
977                                              release_work);
978         int i;
979
980         /* Remove the procfs files */
981         if (p->kobj) {
982                 sysfs_remove_file(p->kobj, &p->attr_pasid);
983                 kobject_del(p->kobj_queues);
984                 kobject_put(p->kobj_queues);
985                 p->kobj_queues = NULL;
986
987                 for (i = 0; i < p->n_pdds; i++) {
988                         struct kfd_process_device *pdd = p->pdds[i];
989
990                         sysfs_remove_file(p->kobj, &pdd->attr_vram);
991                         sysfs_remove_file(p->kobj, &pdd->attr_sdma);
992                         sysfs_remove_file(p->kobj, &pdd->attr_evict);
993                         if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL)
994                                 sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy);
995                         kobject_del(pdd->kobj_stats);
996                         kobject_put(pdd->kobj_stats);
997                         pdd->kobj_stats = NULL;
998                 }
999
1000                 kobject_del(p->kobj);
1001                 kobject_put(p->kobj);
1002                 p->kobj = NULL;
1003         }
1004
1005         kfd_iommu_unbind_process(p);
1006
1007         kfd_process_free_outstanding_kfd_bos(p);
1008         svm_range_list_fini(p);
1009
1010         kfd_process_destroy_pdds(p);
1011         dma_fence_put(p->ef);
1012
1013         kfd_event_free_process(p);
1014
1015         kfd_pasid_free(p->pasid);
1016         mutex_destroy(&p->mutex);
1017
1018         put_task_struct(p->lead_thread);
1019
1020         kfree(p);
1021 }
1022
1023 static void kfd_process_ref_release(struct kref *ref)
1024 {
1025         struct kfd_process *p = container_of(ref, struct kfd_process, ref);
1026
1027         INIT_WORK(&p->release_work, kfd_process_wq_release);
1028         queue_work(kfd_process_wq, &p->release_work);
1029 }
1030
1031 static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
1032 {
1033         int idx = srcu_read_lock(&kfd_processes_srcu);
1034         struct kfd_process *p = find_process_by_mm(mm);
1035
1036         srcu_read_unlock(&kfd_processes_srcu, idx);
1037
1038         return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
1039 }
1040
1041 static void kfd_process_free_notifier(struct mmu_notifier *mn)
1042 {
1043         kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
1044 }
1045
1046 static void kfd_process_notifier_release(struct mmu_notifier *mn,
1047                                         struct mm_struct *mm)
1048 {
1049         struct kfd_process *p;
1050         int i;
1051
1052         /*
1053          * The kfd_process structure can not be free because the
1054          * mmu_notifier srcu is read locked
1055          */
1056         p = container_of(mn, struct kfd_process, mmu_notifier);
1057         if (WARN_ON(p->mm != mm))
1058                 return;
1059
1060         mutex_lock(&kfd_processes_mutex);
1061         hash_del_rcu(&p->kfd_processes);
1062         mutex_unlock(&kfd_processes_mutex);
1063         synchronize_srcu(&kfd_processes_srcu);
1064
1065         cancel_delayed_work_sync(&p->eviction_work);
1066         cancel_delayed_work_sync(&p->restore_work);
1067         cancel_delayed_work_sync(&p->svms.restore_work);
1068
1069         mutex_lock(&p->mutex);
1070
1071         /* Iterate over all process device data structures and if the
1072          * pdd is in debug mode, we should first force unregistration,
1073          * then we will be able to destroy the queues
1074          */
1075         for (i = 0; i < p->n_pdds; i++) {
1076                 struct kfd_dev *dev = p->pdds[i]->dev;
1077
1078                 mutex_lock(kfd_get_dbgmgr_mutex());
1079                 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
1080                         if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
1081                                 kfd_dbgmgr_destroy(dev->dbgmgr);
1082                                 dev->dbgmgr = NULL;
1083                         }
1084                 }
1085                 mutex_unlock(kfd_get_dbgmgr_mutex());
1086         }
1087
1088         kfd_process_dequeue_from_all_devices(p);
1089         pqm_uninit(&p->pqm);
1090
1091         /* Indicate to other users that MM is no longer valid */
1092         p->mm = NULL;
1093         /* Signal the eviction fence after user mode queues are
1094          * destroyed. This allows any BOs to be freed without
1095          * triggering pointless evictions or waiting for fences.
1096          */
1097         dma_fence_signal(p->ef);
1098
1099         mutex_unlock(&p->mutex);
1100
1101         mmu_notifier_put(&p->mmu_notifier);
1102 }
1103
1104 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
1105         .release = kfd_process_notifier_release,
1106         .alloc_notifier = kfd_process_alloc_notifier,
1107         .free_notifier = kfd_process_free_notifier,
1108 };
1109
1110 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
1111 {
1112         unsigned long  offset;
1113         int i;
1114
1115         for (i = 0; i < p->n_pdds; i++) {
1116                 struct kfd_dev *dev = p->pdds[i]->dev;
1117                 struct qcm_process_device *qpd = &p->pdds[i]->qpd;
1118
1119                 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
1120                         continue;
1121
1122                 offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
1123                 qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
1124                         KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
1125                         MAP_SHARED, offset);
1126
1127                 if (IS_ERR_VALUE(qpd->tba_addr)) {
1128                         int err = qpd->tba_addr;
1129
1130                         pr_err("Failure to set tba address. error %d.\n", err);
1131                         qpd->tba_addr = 0;
1132                         qpd->cwsr_kaddr = NULL;
1133                         return err;
1134                 }
1135
1136                 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
1137
1138                 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
1139                 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
1140                         qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
1141         }
1142
1143         return 0;
1144 }
1145
1146 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
1147 {
1148         struct kfd_dev *dev = pdd->dev;
1149         struct qcm_process_device *qpd = &pdd->qpd;
1150         uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
1151                         | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
1152                         | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
1153         void *kaddr;
1154         int ret;
1155
1156         if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
1157                 return 0;
1158
1159         /* cwsr_base is only set for dGPU */
1160         ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
1161                                       KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
1162         if (ret)
1163                 return ret;
1164
1165         qpd->cwsr_kaddr = kaddr;
1166         qpd->tba_addr = qpd->cwsr_base;
1167
1168         memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
1169
1170         qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
1171         pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
1172                  qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
1173
1174         return 0;
1175 }
1176
1177 void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
1178                                   uint64_t tba_addr,
1179                                   uint64_t tma_addr)
1180 {
1181         if (qpd->cwsr_kaddr) {
1182                 /* KFD trap handler is bound, record as second-level TBA/TMA
1183                  * in first-level TMA. First-level trap will jump to second.
1184                  */
1185                 uint64_t *tma =
1186                         (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1187                 tma[0] = tba_addr;
1188                 tma[1] = tma_addr;
1189         } else {
1190                 /* No trap handler bound, bind as first-level TBA/TMA. */
1191                 qpd->tba_addr = tba_addr;
1192                 qpd->tma_addr = tma_addr;
1193         }
1194 }
1195
1196 bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
1197 {
1198         int i;
1199
1200         /* On most GFXv9 GPUs, the retry mode in the SQ must match the
1201          * boot time retry setting. Mixing processes with different
1202          * XNACK/retry settings can hang the GPU.
1203          *
1204          * Different GPUs can have different noretry settings depending
1205          * on HW bugs or limitations. We need to find at least one
1206          * XNACK mode for this process that's compatible with all GPUs.
1207          * Fortunately GPUs with retry enabled (noretry=0) can run code
1208          * built for XNACK-off. On GFXv9 it may perform slower.
1209          *
1210          * Therefore applications built for XNACK-off can always be
1211          * supported and will be our fallback if any GPU does not
1212          * support retry.
1213          */
1214         for (i = 0; i < p->n_pdds; i++) {
1215                 struct kfd_dev *dev = p->pdds[i]->dev;
1216
1217                 /* Only consider GFXv9 and higher GPUs. Older GPUs don't
1218                  * support the SVM APIs and don't need to be considered
1219                  * for the XNACK mode selection.
1220                  */
1221                 if (dev->device_info->asic_family < CHIP_VEGA10)
1222                         continue;
1223                 /* Aldebaran can always support XNACK because it can support
1224                  * per-process XNACK mode selection. But let the dev->noretry
1225                  * setting still influence the default XNACK mode.
1226                  */
1227                 if (supported &&
1228                     dev->device_info->asic_family == CHIP_ALDEBARAN)
1229                         continue;
1230
1231                 /* GFXv10 and later GPUs do not support shader preemption
1232                  * during page faults. This can lead to poor QoS for queue
1233                  * management and memory-manager-related preemptions or
1234                  * even deadlocks.
1235                  */
1236                 if (dev->device_info->asic_family >= CHIP_NAVI10)
1237                         return false;
1238
1239                 if (dev->noretry)
1240                         return false;
1241         }
1242
1243         return true;
1244 }
1245
1246 /*
1247  * On return the kfd_process is fully operational and will be freed when the
1248  * mm is released
1249  */
1250 static struct kfd_process *create_process(const struct task_struct *thread)
1251 {
1252         struct kfd_process *process;
1253         struct mmu_notifier *mn;
1254         int err = -ENOMEM;
1255
1256         process = kzalloc(sizeof(*process), GFP_KERNEL);
1257         if (!process)
1258                 goto err_alloc_process;
1259
1260         kref_init(&process->ref);
1261         mutex_init(&process->mutex);
1262         process->mm = thread->mm;
1263         process->lead_thread = thread->group_leader;
1264         process->n_pdds = 0;
1265         process->svm_disabled = false;
1266         INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
1267         INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
1268         process->last_restore_timestamp = get_jiffies_64();
1269         kfd_event_init_process(process);
1270         process->is_32bit_user_mode = in_compat_syscall();
1271
1272         process->pasid = kfd_pasid_alloc();
1273         if (process->pasid == 0)
1274                 goto err_alloc_pasid;
1275
1276         err = pqm_init(&process->pqm, process);
1277         if (err != 0)
1278                 goto err_process_pqm_init;
1279
1280         /* init process apertures*/
1281         err = kfd_init_apertures(process);
1282         if (err != 0)
1283                 goto err_init_apertures;
1284
1285         /* Check XNACK support after PDDs are created in kfd_init_apertures */
1286         process->xnack_enabled = kfd_process_xnack_mode(process, false);
1287
1288         err = svm_range_list_init(process);
1289         if (err)
1290                 goto err_init_svm_range_list;
1291
1292         /* alloc_notifier needs to find the process in the hash table */
1293         hash_add_rcu(kfd_processes_table, &process->kfd_processes,
1294                         (uintptr_t)process->mm);
1295
1296         /* MMU notifier registration must be the last call that can fail
1297          * because after this point we cannot unwind the process creation.
1298          * After this point, mmu_notifier_put will trigger the cleanup by
1299          * dropping the last process reference in the free_notifier.
1300          */
1301         mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
1302         if (IS_ERR(mn)) {
1303                 err = PTR_ERR(mn);
1304                 goto err_register_notifier;
1305         }
1306         BUG_ON(mn != &process->mmu_notifier);
1307
1308         get_task_struct(process->lead_thread);
1309
1310         return process;
1311
1312 err_register_notifier:
1313         hash_del_rcu(&process->kfd_processes);
1314         svm_range_list_fini(process);
1315 err_init_svm_range_list:
1316         kfd_process_free_outstanding_kfd_bos(process);
1317         kfd_process_destroy_pdds(process);
1318 err_init_apertures:
1319         pqm_uninit(&process->pqm);
1320 err_process_pqm_init:
1321         kfd_pasid_free(process->pasid);
1322 err_alloc_pasid:
1323         mutex_destroy(&process->mutex);
1324         kfree(process);
1325 err_alloc_process:
1326         return ERR_PTR(err);
1327 }
1328
1329 static int init_doorbell_bitmap(struct qcm_process_device *qpd,
1330                         struct kfd_dev *dev)
1331 {
1332         unsigned int i;
1333         int range_start = dev->shared_resources.non_cp_doorbells_start;
1334         int range_end = dev->shared_resources.non_cp_doorbells_end;
1335
1336         if (!KFD_IS_SOC15(dev->device_info->asic_family))
1337                 return 0;
1338
1339         qpd->doorbell_bitmap =
1340                 kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
1341                                      BITS_PER_BYTE), GFP_KERNEL);
1342         if (!qpd->doorbell_bitmap)
1343                 return -ENOMEM;
1344
1345         /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
1346         pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
1347         pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
1348                         range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
1349                         range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
1350
1351         for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
1352                 if (i >= range_start && i <= range_end) {
1353                         set_bit(i, qpd->doorbell_bitmap);
1354                         set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
1355                                 qpd->doorbell_bitmap);
1356                 }
1357         }
1358
1359         return 0;
1360 }
1361
1362 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
1363                                                         struct kfd_process *p)
1364 {
1365         int i;
1366
1367         for (i = 0; i < p->n_pdds; i++)
1368                 if (p->pdds[i]->dev == dev)
1369                         return p->pdds[i];
1370
1371         return NULL;
1372 }
1373
1374 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
1375                                                         struct kfd_process *p)
1376 {
1377         struct kfd_process_device *pdd = NULL;
1378
1379         if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
1380                 return NULL;
1381         pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
1382         if (!pdd)
1383                 return NULL;
1384
1385         if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
1386                 pr_err("Failed to alloc doorbell for pdd\n");
1387                 goto err_free_pdd;
1388         }
1389
1390         if (init_doorbell_bitmap(&pdd->qpd, dev)) {
1391                 pr_err("Failed to init doorbell for process\n");
1392                 goto err_free_pdd;
1393         }
1394
1395         pdd->dev = dev;
1396         INIT_LIST_HEAD(&pdd->qpd.queues_list);
1397         INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
1398         pdd->qpd.dqm = dev->dqm;
1399         pdd->qpd.pqm = &p->pqm;
1400         pdd->qpd.evicted = 0;
1401         pdd->qpd.mapped_gws_queue = false;
1402         pdd->process = p;
1403         pdd->bound = PDD_UNBOUND;
1404         pdd->already_dequeued = false;
1405         pdd->runtime_inuse = false;
1406         pdd->vram_usage = 0;
1407         pdd->sdma_past_activity_counter = 0;
1408         atomic64_set(&pdd->evict_duration_counter, 0);
1409         p->pdds[p->n_pdds++] = pdd;
1410
1411         /* Init idr used for memory handle translation */
1412         idr_init(&pdd->alloc_idr);
1413
1414         return pdd;
1415
1416 err_free_pdd:
1417         kfree(pdd);
1418         return NULL;
1419 }
1420
1421 /**
1422  * kfd_process_device_init_vm - Initialize a VM for a process-device
1423  *
1424  * @pdd: The process-device
1425  * @drm_file: Optional pointer to a DRM file descriptor
1426  *
1427  * If @drm_file is specified, it will be used to acquire the VM from
1428  * that file descriptor. If successful, the @pdd takes ownership of
1429  * the file descriptor.
1430  *
1431  * If @drm_file is NULL, a new VM is created.
1432  *
1433  * Returns 0 on success, -errno on failure.
1434  */
1435 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
1436                                struct file *drm_file)
1437 {
1438         struct kfd_process *p;
1439         struct kfd_dev *dev;
1440         int ret;
1441
1442         if (!drm_file)
1443                 return -EINVAL;
1444
1445         if (pdd->drm_priv)
1446                 return -EBUSY;
1447
1448         p = pdd->process;
1449         dev = pdd->dev;
1450
1451         ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
1452                 dev->kgd, drm_file, p->pasid,
1453                 &p->kgd_process_info, &p->ef);
1454         if (ret) {
1455                 pr_err("Failed to create process VM object\n");
1456                 return ret;
1457         }
1458         pdd->drm_priv = drm_file->private_data;
1459
1460         ret = kfd_process_device_reserve_ib_mem(pdd);
1461         if (ret)
1462                 goto err_reserve_ib_mem;
1463         ret = kfd_process_device_init_cwsr_dgpu(pdd);
1464         if (ret)
1465                 goto err_init_cwsr;
1466
1467         pdd->drm_file = drm_file;
1468
1469         return 0;
1470
1471 err_init_cwsr:
1472 err_reserve_ib_mem:
1473         kfd_process_device_free_bos(pdd);
1474         pdd->drm_priv = NULL;
1475
1476         return ret;
1477 }
1478
1479 /*
1480  * Direct the IOMMU to bind the process (specifically the pasid->mm)
1481  * to the device.
1482  * Unbinding occurs when the process dies or the device is removed.
1483  *
1484  * Assumes that the process lock is held.
1485  */
1486 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
1487                                                         struct kfd_process *p)
1488 {
1489         struct kfd_process_device *pdd;
1490         int err;
1491
1492         pdd = kfd_get_process_device_data(dev, p);
1493         if (!pdd) {
1494                 pr_err("Process device data doesn't exist\n");
1495                 return ERR_PTR(-ENOMEM);
1496         }
1497
1498         if (!pdd->drm_priv)
1499                 return ERR_PTR(-ENODEV);
1500
1501         /*
1502          * signal runtime-pm system to auto resume and prevent
1503          * further runtime suspend once device pdd is created until
1504          * pdd is destroyed.
1505          */
1506         if (!pdd->runtime_inuse) {
1507                 err = pm_runtime_get_sync(dev->ddev->dev);
1508                 if (err < 0) {
1509                         pm_runtime_put_autosuspend(dev->ddev->dev);
1510                         return ERR_PTR(err);
1511                 }
1512         }
1513
1514         err = kfd_iommu_bind_process_to_device(pdd);
1515         if (err)
1516                 goto out;
1517
1518         /*
1519          * make sure that runtime_usage counter is incremented just once
1520          * per pdd
1521          */
1522         pdd->runtime_inuse = true;
1523
1524         return pdd;
1525
1526 out:
1527         /* balance runpm reference count and exit with error */
1528         if (!pdd->runtime_inuse) {
1529                 pm_runtime_mark_last_busy(dev->ddev->dev);
1530                 pm_runtime_put_autosuspend(dev->ddev->dev);
1531         }
1532
1533         return ERR_PTR(err);
1534 }
1535
1536 /* Create specific handle mapped to mem from process local memory idr
1537  * Assumes that the process lock is held.
1538  */
1539 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
1540                                         void *mem)
1541 {
1542         return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
1543 }
1544
1545 /* Translate specific handle from process local memory idr
1546  * Assumes that the process lock is held.
1547  */
1548 void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
1549                                         int handle)
1550 {
1551         if (handle < 0)
1552                 return NULL;
1553
1554         return idr_find(&pdd->alloc_idr, handle);
1555 }
1556
1557 /* Remove specific handle from process local memory idr
1558  * Assumes that the process lock is held.
1559  */
1560 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
1561                                         int handle)
1562 {
1563         if (handle >= 0)
1564                 idr_remove(&pdd->alloc_idr, handle);
1565 }
1566
1567 /* This increments the process->ref counter. */
1568 struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
1569 {
1570         struct kfd_process *p, *ret_p = NULL;
1571         unsigned int temp;
1572
1573         int idx = srcu_read_lock(&kfd_processes_srcu);
1574
1575         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1576                 if (p->pasid == pasid) {
1577                         kref_get(&p->ref);
1578                         ret_p = p;
1579                         break;
1580                 }
1581         }
1582
1583         srcu_read_unlock(&kfd_processes_srcu, idx);
1584
1585         return ret_p;
1586 }
1587
1588 /* This increments the process->ref counter. */
1589 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
1590 {
1591         struct kfd_process *p;
1592
1593         int idx = srcu_read_lock(&kfd_processes_srcu);
1594
1595         p = find_process_by_mm(mm);
1596         if (p)
1597                 kref_get(&p->ref);
1598
1599         srcu_read_unlock(&kfd_processes_srcu, idx);
1600
1601         return p;
1602 }
1603
1604 /* kfd_process_evict_queues - Evict all user queues of a process
1605  *
1606  * Eviction is reference-counted per process-device. This means multiple
1607  * evictions from different sources can be nested safely.
1608  */
1609 int kfd_process_evict_queues(struct kfd_process *p)
1610 {
1611         int r = 0;
1612         int i;
1613         unsigned int n_evicted = 0;
1614
1615         for (i = 0; i < p->n_pdds; i++) {
1616                 struct kfd_process_device *pdd = p->pdds[i];
1617
1618                 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
1619                                                             &pdd->qpd);
1620                 if (r) {
1621                         pr_err("Failed to evict process queues\n");
1622                         goto fail;
1623                 }
1624                 n_evicted++;
1625         }
1626
1627         return r;
1628
1629 fail:
1630         /* To keep state consistent, roll back partial eviction by
1631          * restoring queues
1632          */
1633         for (i = 0; i < p->n_pdds; i++) {
1634                 struct kfd_process_device *pdd = p->pdds[i];
1635
1636                 if (n_evicted == 0)
1637                         break;
1638                 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1639                                                               &pdd->qpd))
1640                         pr_err("Failed to restore queues\n");
1641
1642                 n_evicted--;
1643         }
1644
1645         return r;
1646 }
1647
1648 /* kfd_process_restore_queues - Restore all user queues of a process */
1649 int kfd_process_restore_queues(struct kfd_process *p)
1650 {
1651         int r, ret = 0;
1652         int i;
1653
1654         for (i = 0; i < p->n_pdds; i++) {
1655                 struct kfd_process_device *pdd = p->pdds[i];
1656
1657                 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1658                                                               &pdd->qpd);
1659                 if (r) {
1660                         pr_err("Failed to restore process queues\n");
1661                         if (!ret)
1662                                 ret = r;
1663                 }
1664         }
1665
1666         return ret;
1667 }
1668
1669 int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
1670 {
1671         int i;
1672
1673         for (i = 0; i < p->n_pdds; i++)
1674                 if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
1675                         return i;
1676         return -EINVAL;
1677 }
1678
1679 int
1680 kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
1681                            uint32_t *gpuid, uint32_t *gpuidx)
1682 {
1683         struct kgd_dev *kgd = (struct kgd_dev *)adev;
1684         int i;
1685
1686         for (i = 0; i < p->n_pdds; i++)
1687                 if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
1688                         *gpuid = p->pdds[i]->dev->id;
1689                         *gpuidx = i;
1690                         return 0;
1691                 }
1692         return -EINVAL;
1693 }
1694
1695 static void evict_process_worker(struct work_struct *work)
1696 {
1697         int ret;
1698         struct kfd_process *p;
1699         struct delayed_work *dwork;
1700
1701         dwork = to_delayed_work(work);
1702
1703         /* Process termination destroys this worker thread. So during the
1704          * lifetime of this thread, kfd_process p will be valid
1705          */
1706         p = container_of(dwork, struct kfd_process, eviction_work);
1707         WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
1708                   "Eviction fence mismatch\n");
1709
1710         /* Narrow window of overlap between restore and evict work
1711          * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
1712          * unreserves KFD BOs, it is possible to evicted again. But
1713          * restore has few more steps of finish. So lets wait for any
1714          * previous restore work to complete
1715          */
1716         flush_delayed_work(&p->restore_work);
1717
1718         pr_debug("Started evicting pasid 0x%x\n", p->pasid);
1719         ret = kfd_process_evict_queues(p);
1720         if (!ret) {
1721                 dma_fence_signal(p->ef);
1722                 dma_fence_put(p->ef);
1723                 p->ef = NULL;
1724                 queue_delayed_work(kfd_restore_wq, &p->restore_work,
1725                                 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
1726
1727                 pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
1728         } else
1729                 pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
1730 }
1731
1732 static void restore_process_worker(struct work_struct *work)
1733 {
1734         struct delayed_work *dwork;
1735         struct kfd_process *p;
1736         int ret = 0;
1737
1738         dwork = to_delayed_work(work);
1739
1740         /* Process termination destroys this worker thread. So during the
1741          * lifetime of this thread, kfd_process p will be valid
1742          */
1743         p = container_of(dwork, struct kfd_process, restore_work);
1744         pr_debug("Started restoring pasid 0x%x\n", p->pasid);
1745
1746         /* Setting last_restore_timestamp before successful restoration.
1747          * Otherwise this would have to be set by KGD (restore_process_bos)
1748          * before KFD BOs are unreserved. If not, the process can be evicted
1749          * again before the timestamp is set.
1750          * If restore fails, the timestamp will be set again in the next
1751          * attempt. This would mean that the minimum GPU quanta would be
1752          * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
1753          * functions)
1754          */
1755
1756         p->last_restore_timestamp = get_jiffies_64();
1757         ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
1758                                                      &p->ef);
1759         if (ret) {
1760                 pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
1761                          p->pasid, PROCESS_BACK_OFF_TIME_MS);
1762                 ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
1763                                 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
1764                 WARN(!ret, "reschedule restore work failed\n");
1765                 return;
1766         }
1767
1768         ret = kfd_process_restore_queues(p);
1769         if (!ret)
1770                 pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
1771         else
1772                 pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
1773 }
1774
1775 void kfd_suspend_all_processes(void)
1776 {
1777         struct kfd_process *p;
1778         unsigned int temp;
1779         int idx = srcu_read_lock(&kfd_processes_srcu);
1780
1781         WARN(debug_evictions, "Evicting all processes");
1782         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1783                 cancel_delayed_work_sync(&p->eviction_work);
1784                 cancel_delayed_work_sync(&p->restore_work);
1785
1786                 if (kfd_process_evict_queues(p))
1787                         pr_err("Failed to suspend process 0x%x\n", p->pasid);
1788                 dma_fence_signal(p->ef);
1789                 dma_fence_put(p->ef);
1790                 p->ef = NULL;
1791         }
1792         srcu_read_unlock(&kfd_processes_srcu, idx);
1793 }
1794
1795 int kfd_resume_all_processes(void)
1796 {
1797         struct kfd_process *p;
1798         unsigned int temp;
1799         int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
1800
1801         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1802                 if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
1803                         pr_err("Restore process %d failed during resume\n",
1804                                p->pasid);
1805                         ret = -EFAULT;
1806                 }
1807         }
1808         srcu_read_unlock(&kfd_processes_srcu, idx);
1809         return ret;
1810 }
1811
1812 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
1813                           struct vm_area_struct *vma)
1814 {
1815         struct kfd_process_device *pdd;
1816         struct qcm_process_device *qpd;
1817
1818         if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
1819                 pr_err("Incorrect CWSR mapping size.\n");
1820                 return -EINVAL;
1821         }
1822
1823         pdd = kfd_get_process_device_data(dev, process);
1824         if (!pdd)
1825                 return -EINVAL;
1826         qpd = &pdd->qpd;
1827
1828         qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1829                                         get_order(KFD_CWSR_TBA_TMA_SIZE));
1830         if (!qpd->cwsr_kaddr) {
1831                 pr_err("Error allocating per process CWSR buffer.\n");
1832                 return -ENOMEM;
1833         }
1834
1835         vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
1836                 | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
1837         /* Mapping pages to user process */
1838         return remap_pfn_range(vma, vma->vm_start,
1839                                PFN_DOWN(__pa(qpd->cwsr_kaddr)),
1840                                KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
1841 }
1842
1843 void kfd_flush_tlb(struct kfd_process_device *pdd)
1844 {
1845         struct kfd_dev *dev = pdd->dev;
1846
1847         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1848                 /* Nothing to flush until a VMID is assigned, which
1849                  * only happens when the first queue is created.
1850                  */
1851                 if (pdd->qpd.vmid)
1852                         amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
1853                                                         pdd->qpd.vmid);
1854         } else {
1855                 amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
1856                                         pdd->process->pasid, TLB_FLUSH_LEGACY);
1857         }
1858 }
1859
1860 #if defined(CONFIG_DEBUG_FS)
1861
1862 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
1863 {
1864         struct kfd_process *p;
1865         unsigned int temp;
1866         int r = 0;
1867
1868         int idx = srcu_read_lock(&kfd_processes_srcu);
1869
1870         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1871                 seq_printf(m, "Process %d PASID 0x%x:\n",
1872                            p->lead_thread->tgid, p->pasid);
1873
1874                 mutex_lock(&p->mutex);
1875                 r = pqm_debugfs_mqds(m, &p->pqm);
1876                 mutex_unlock(&p->mutex);
1877
1878                 if (r)
1879                         break;
1880         }
1881
1882         srcu_read_unlock(&kfd_processes_srcu, idx);
1883
1884         return r;
1885 }
1886
1887 #endif
1888